├── .gitignore ├── README.MD ├── args.MD ├── config ├── __init__.py ├── constant_map.py ├── reward_config │ ├── __init__.py │ ├── deepspeed.json │ ├── deepspeed_offload.json │ ├── main.py │ ├── reward_config.py │ ├── reward_config_lora.py │ └── reward_config_ptv2.py └── rlhf_config │ ├── __init__.py │ ├── deepspeed.json │ ├── deepspeed_offload.json │ ├── main.py │ ├── rlhf_config.py │ ├── rlhf_config_lora.py │ └── rlhf_config_ptv2.py ├── requirements.txt ├── rlhf_stage2_reward ├── README.MD ├── __init__.py ├── data │ └── make_data_example.py ├── data_processer.py ├── data_utils.py ├── evaluate_dev_lora.py ├── infer_finetuning.py ├── infer_lora_finetuning.py └── train.py └── rlhf_stage3_ppo ├── README.MD ├── __init__.py ├── data └── make_data_example.py ├── data_processer.py ├── data_utils.py ├── infer_finetuning.py ├── infer_lora_finetuning.py ├── reward_weight.py └── train.py /.gitignore: -------------------------------------------------------------------------------- 1 | /.idea 2 | -------------------------------------------------------------------------------- /README.MD: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ssbuild/chatglm_rlhf/HEAD/README.MD -------------------------------------------------------------------------------- /args.MD: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ssbuild/chatglm_rlhf/HEAD/args.MD -------------------------------------------------------------------------------- /config/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ssbuild/chatglm_rlhf/HEAD/config/__init__.py -------------------------------------------------------------------------------- /config/constant_map.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ssbuild/chatglm_rlhf/HEAD/config/constant_map.py -------------------------------------------------------------------------------- /config/reward_config/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ssbuild/chatglm_rlhf/HEAD/config/reward_config/__init__.py -------------------------------------------------------------------------------- /config/reward_config/deepspeed.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ssbuild/chatglm_rlhf/HEAD/config/reward_config/deepspeed.json -------------------------------------------------------------------------------- /config/reward_config/deepspeed_offload.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ssbuild/chatglm_rlhf/HEAD/config/reward_config/deepspeed_offload.json -------------------------------------------------------------------------------- /config/reward_config/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ssbuild/chatglm_rlhf/HEAD/config/reward_config/main.py -------------------------------------------------------------------------------- /config/reward_config/reward_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ssbuild/chatglm_rlhf/HEAD/config/reward_config/reward_config.py -------------------------------------------------------------------------------- /config/reward_config/reward_config_lora.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ssbuild/chatglm_rlhf/HEAD/config/reward_config/reward_config_lora.py -------------------------------------------------------------------------------- /config/reward_config/reward_config_ptv2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ssbuild/chatglm_rlhf/HEAD/config/reward_config/reward_config_ptv2.py -------------------------------------------------------------------------------- /config/rlhf_config/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ssbuild/chatglm_rlhf/HEAD/config/rlhf_config/__init__.py -------------------------------------------------------------------------------- /config/rlhf_config/deepspeed.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ssbuild/chatglm_rlhf/HEAD/config/rlhf_config/deepspeed.json -------------------------------------------------------------------------------- /config/rlhf_config/deepspeed_offload.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ssbuild/chatglm_rlhf/HEAD/config/rlhf_config/deepspeed_offload.json -------------------------------------------------------------------------------- /config/rlhf_config/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ssbuild/chatglm_rlhf/HEAD/config/rlhf_config/main.py -------------------------------------------------------------------------------- /config/rlhf_config/rlhf_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ssbuild/chatglm_rlhf/HEAD/config/rlhf_config/rlhf_config.py -------------------------------------------------------------------------------- /config/rlhf_config/rlhf_config_lora.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ssbuild/chatglm_rlhf/HEAD/config/rlhf_config/rlhf_config_lora.py -------------------------------------------------------------------------------- /config/rlhf_config/rlhf_config_ptv2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ssbuild/chatglm_rlhf/HEAD/config/rlhf_config/rlhf_config_ptv2.py -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ssbuild/chatglm_rlhf/HEAD/requirements.txt -------------------------------------------------------------------------------- /rlhf_stage2_reward/README.MD: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ssbuild/chatglm_rlhf/HEAD/rlhf_stage2_reward/README.MD -------------------------------------------------------------------------------- /rlhf_stage2_reward/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ssbuild/chatglm_rlhf/HEAD/rlhf_stage2_reward/__init__.py -------------------------------------------------------------------------------- /rlhf_stage2_reward/data/make_data_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ssbuild/chatglm_rlhf/HEAD/rlhf_stage2_reward/data/make_data_example.py -------------------------------------------------------------------------------- /rlhf_stage2_reward/data_processer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ssbuild/chatglm_rlhf/HEAD/rlhf_stage2_reward/data_processer.py -------------------------------------------------------------------------------- /rlhf_stage2_reward/data_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ssbuild/chatglm_rlhf/HEAD/rlhf_stage2_reward/data_utils.py -------------------------------------------------------------------------------- /rlhf_stage2_reward/evaluate_dev_lora.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ssbuild/chatglm_rlhf/HEAD/rlhf_stage2_reward/evaluate_dev_lora.py -------------------------------------------------------------------------------- /rlhf_stage2_reward/infer_finetuning.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ssbuild/chatglm_rlhf/HEAD/rlhf_stage2_reward/infer_finetuning.py -------------------------------------------------------------------------------- /rlhf_stage2_reward/infer_lora_finetuning.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ssbuild/chatglm_rlhf/HEAD/rlhf_stage2_reward/infer_lora_finetuning.py -------------------------------------------------------------------------------- /rlhf_stage2_reward/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ssbuild/chatglm_rlhf/HEAD/rlhf_stage2_reward/train.py -------------------------------------------------------------------------------- /rlhf_stage3_ppo/README.MD: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ssbuild/chatglm_rlhf/HEAD/rlhf_stage3_ppo/README.MD -------------------------------------------------------------------------------- /rlhf_stage3_ppo/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ssbuild/chatglm_rlhf/HEAD/rlhf_stage3_ppo/__init__.py -------------------------------------------------------------------------------- /rlhf_stage3_ppo/data/make_data_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ssbuild/chatglm_rlhf/HEAD/rlhf_stage3_ppo/data/make_data_example.py -------------------------------------------------------------------------------- /rlhf_stage3_ppo/data_processer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ssbuild/chatglm_rlhf/HEAD/rlhf_stage3_ppo/data_processer.py -------------------------------------------------------------------------------- /rlhf_stage3_ppo/data_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ssbuild/chatglm_rlhf/HEAD/rlhf_stage3_ppo/data_utils.py -------------------------------------------------------------------------------- /rlhf_stage3_ppo/infer_finetuning.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ssbuild/chatglm_rlhf/HEAD/rlhf_stage3_ppo/infer_finetuning.py -------------------------------------------------------------------------------- /rlhf_stage3_ppo/infer_lora_finetuning.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ssbuild/chatglm_rlhf/HEAD/rlhf_stage3_ppo/infer_lora_finetuning.py -------------------------------------------------------------------------------- /rlhf_stage3_ppo/reward_weight.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ssbuild/chatglm_rlhf/HEAD/rlhf_stage3_ppo/reward_weight.py -------------------------------------------------------------------------------- /rlhf_stage3_ppo/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ssbuild/chatglm_rlhf/HEAD/rlhf_stage3_ppo/train.py --------------------------------------------------------------------------------