├── .gitignore
├── README.MD
├── args.MD
├── config
    ├── __init__.py
    ├── constant_map.py
    ├── reward_config
    │   ├── __init__.py
    │   ├── deepspeed.json
    │   ├── deepspeed_offload.json
    │   ├── main.py
    │   ├── reward_config.py
    │   ├── reward_config_lora.py
    │   └── reward_config_ptv2.py
    └── rlhf_config
    │   ├── __init__.py
    │   ├── deepspeed.json
    │   ├── deepspeed_offload.json
    │   ├── main.py
    │   ├── rlhf_config.py
    │   ├── rlhf_config_lora.py
    │   └── rlhf_config_ptv2.py
├── requirements.txt
├── rlhf_stage2_reward
    ├── README.MD
    ├── __init__.py
    ├── data
    │   └── make_data_example.py
    ├── data_processer.py
    ├── data_utils.py
    ├── evaluate_dev_lora.py
    ├── infer_finetuning.py
    ├── infer_lora_finetuning.py
    └── train.py
└── rlhf_stage3_ppo
    ├── README.MD
    ├── __init__.py
    ├── data
        └── make_data_example.py
    ├── data_processer.py
    ├── data_utils.py
    ├── infer_finetuning.py
    ├── infer_lora_finetuning.py
    ├── reward_weight.py
    └── train.py


/.gitignore:
--------------------------------------------------------------------------------
1 | /.idea
2 | 


--------------------------------------------------------------------------------
/README.MD:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ssbuild/chatglm_rlhf/HEAD/README.MD


--------------------------------------------------------------------------------
/args.MD:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ssbuild/chatglm_rlhf/HEAD/args.MD


--------------------------------------------------------------------------------
/config/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ssbuild/chatglm_rlhf/HEAD/config/__init__.py


--------------------------------------------------------------------------------
/config/constant_map.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ssbuild/chatglm_rlhf/HEAD/config/constant_map.py


--------------------------------------------------------------------------------
/config/reward_config/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ssbuild/chatglm_rlhf/HEAD/config/reward_config/__init__.py


--------------------------------------------------------------------------------
/config/reward_config/deepspeed.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ssbuild/chatglm_rlhf/HEAD/config/reward_config/deepspeed.json


--------------------------------------------------------------------------------
/config/reward_config/deepspeed_offload.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ssbuild/chatglm_rlhf/HEAD/config/reward_config/deepspeed_offload.json


--------------------------------------------------------------------------------
/config/reward_config/main.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ssbuild/chatglm_rlhf/HEAD/config/reward_config/main.py


--------------------------------------------------------------------------------
/config/reward_config/reward_config.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ssbuild/chatglm_rlhf/HEAD/config/reward_config/reward_config.py


--------------------------------------------------------------------------------
/config/reward_config/reward_config_lora.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ssbuild/chatglm_rlhf/HEAD/config/reward_config/reward_config_lora.py


--------------------------------------------------------------------------------
/config/reward_config/reward_config_ptv2.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ssbuild/chatglm_rlhf/HEAD/config/reward_config/reward_config_ptv2.py


--------------------------------------------------------------------------------
/config/rlhf_config/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ssbuild/chatglm_rlhf/HEAD/config/rlhf_config/__init__.py


--------------------------------------------------------------------------------
/config/rlhf_config/deepspeed.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ssbuild/chatglm_rlhf/HEAD/config/rlhf_config/deepspeed.json


--------------------------------------------------------------------------------
/config/rlhf_config/deepspeed_offload.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ssbuild/chatglm_rlhf/HEAD/config/rlhf_config/deepspeed_offload.json


--------------------------------------------------------------------------------
/config/rlhf_config/main.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ssbuild/chatglm_rlhf/HEAD/config/rlhf_config/main.py


--------------------------------------------------------------------------------
/config/rlhf_config/rlhf_config.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ssbuild/chatglm_rlhf/HEAD/config/rlhf_config/rlhf_config.py


--------------------------------------------------------------------------------
/config/rlhf_config/rlhf_config_lora.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ssbuild/chatglm_rlhf/HEAD/config/rlhf_config/rlhf_config_lora.py


--------------------------------------------------------------------------------
/config/rlhf_config/rlhf_config_ptv2.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ssbuild/chatglm_rlhf/HEAD/config/rlhf_config/rlhf_config_ptv2.py


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ssbuild/chatglm_rlhf/HEAD/requirements.txt


--------------------------------------------------------------------------------
/rlhf_stage2_reward/README.MD:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ssbuild/chatglm_rlhf/HEAD/rlhf_stage2_reward/README.MD


--------------------------------------------------------------------------------
/rlhf_stage2_reward/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ssbuild/chatglm_rlhf/HEAD/rlhf_stage2_reward/__init__.py


--------------------------------------------------------------------------------
/rlhf_stage2_reward/data/make_data_example.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ssbuild/chatglm_rlhf/HEAD/rlhf_stage2_reward/data/make_data_example.py


--------------------------------------------------------------------------------
/rlhf_stage2_reward/data_processer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ssbuild/chatglm_rlhf/HEAD/rlhf_stage2_reward/data_processer.py


--------------------------------------------------------------------------------
/rlhf_stage2_reward/data_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ssbuild/chatglm_rlhf/HEAD/rlhf_stage2_reward/data_utils.py


--------------------------------------------------------------------------------
/rlhf_stage2_reward/evaluate_dev_lora.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ssbuild/chatglm_rlhf/HEAD/rlhf_stage2_reward/evaluate_dev_lora.py


--------------------------------------------------------------------------------
/rlhf_stage2_reward/infer_finetuning.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ssbuild/chatglm_rlhf/HEAD/rlhf_stage2_reward/infer_finetuning.py


--------------------------------------------------------------------------------
/rlhf_stage2_reward/infer_lora_finetuning.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ssbuild/chatglm_rlhf/HEAD/rlhf_stage2_reward/infer_lora_finetuning.py


--------------------------------------------------------------------------------
/rlhf_stage2_reward/train.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ssbuild/chatglm_rlhf/HEAD/rlhf_stage2_reward/train.py


--------------------------------------------------------------------------------
/rlhf_stage3_ppo/README.MD:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ssbuild/chatglm_rlhf/HEAD/rlhf_stage3_ppo/README.MD


--------------------------------------------------------------------------------
/rlhf_stage3_ppo/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ssbuild/chatglm_rlhf/HEAD/rlhf_stage3_ppo/__init__.py


--------------------------------------------------------------------------------
/rlhf_stage3_ppo/data/make_data_example.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ssbuild/chatglm_rlhf/HEAD/rlhf_stage3_ppo/data/make_data_example.py


--------------------------------------------------------------------------------
/rlhf_stage3_ppo/data_processer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ssbuild/chatglm_rlhf/HEAD/rlhf_stage3_ppo/data_processer.py


--------------------------------------------------------------------------------
/rlhf_stage3_ppo/data_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ssbuild/chatglm_rlhf/HEAD/rlhf_stage3_ppo/data_utils.py


--------------------------------------------------------------------------------
/rlhf_stage3_ppo/infer_finetuning.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ssbuild/chatglm_rlhf/HEAD/rlhf_stage3_ppo/infer_finetuning.py


--------------------------------------------------------------------------------
/rlhf_stage3_ppo/infer_lora_finetuning.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ssbuild/chatglm_rlhf/HEAD/rlhf_stage3_ppo/infer_lora_finetuning.py


--------------------------------------------------------------------------------
/rlhf_stage3_ppo/reward_weight.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ssbuild/chatglm_rlhf/HEAD/rlhf_stage3_ppo/reward_weight.py


--------------------------------------------------------------------------------
/rlhf_stage3_ppo/train.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ssbuild/chatglm_rlhf/HEAD/rlhf_stage3_ppo/train.py


--------------------------------------------------------------------------------