├── README.md
├── chatglm_tuning
    ├── README.md
    ├── chatglm
    │   ├── __init__.py
    │   ├── configuration_chatglm.py
    │   ├── modeling_chatglm.py
    │   └── tokenization_chatglm.py
    ├── lora_ddp
    │   ├── config.py
    │   ├── data_helper.py
    │   ├── generate.py
    │   ├── metric.py
    │   ├── model.py
    │   ├── run.sh
    │   ├── trainer.py
    │   └── utils.py
    ├── lora_deepspeed
    │   ├── README.md
    │   ├── config.py
    │   ├── data_helper.py
    │   ├── deepspeed_config.json
    │   ├── deepspeed_config.yaml
    │   ├── generate.py
    │   ├── metric.py
    │   ├── model.py
    │   ├── run.sh
    │   ├── trainer.py
    │   └── utils.py
    ├── lora_fsdp
    │   ├── config.py
    │   ├── data_helper.py
    │   ├── fsdp_config.yaml
    │   ├── generate.py
    │   ├── metric.py
    │   ├── model.py
    │   ├── run.sh
    │   ├── trainer.py
    │   └── utils.py
    ├── lora_shared_ddp
    │   ├── config.py
    │   ├── data_helper.py
    │   ├── generate.py
    │   ├── metric.py
    │   ├── model.py
    │   ├── run.sh
    │   ├── trainer.py
    │   └── utils.py
    └── lora_single_gpu
    │   ├── config.py
    │   ├── data_helper.py
    │   ├── generate.py
    │   ├── metric.py
    │   ├── model.py
    │   ├── run.sh
    │   ├── trainer.py
    │   └── utils.py
├── data
    ├── README.md
    └── trans_chinese_alpaca_data.json
├── images
    └── deepspeed_config.png
├── install_env.sh
├── llama_tuning
    ├── README.md
    ├── lora_ddp
    │   ├── config.py
    │   ├── data_helper.py
    │   ├── generate.py
    │   ├── metric.py
    │   ├── model.py
    │   ├── run.sh
    │   ├── trainer.py
    │   └── utils.py
    ├── lora_deepspeed
    │   ├── README.md
    │   ├── config.py
    │   ├── data_helper.py
    │   ├── deepspeed_config.json
    │   ├── deepspeed_config.yaml
    │   ├── generate.py
    │   ├── metric.py
    │   ├── model.py
    │   ├── run.sh
    │   ├── trainer.py
    │   └── utils.py
    └── lora_single_gpu
    │   ├── config.py
    │   ├── data_helper.py
    │   ├── generate.py
    │   ├── metric.py
    │   ├── model.py
    │   ├── run.sh
    │   ├── trainer.py
    │   └── utils.py
└── rlhf_tuning
    ├── README.md
    ├── step1_supervised_finetuning
        ├── main.py
        └── run.sh
    ├── step2_reward_model_finetuning
        ├── main.py
        └── run.sh
    ├── step3_rlhf_finetuning
        ├── main.py
        ├── ppo_trainer.py
        ├── rlhf_engine.py
        └── run.sh
    └── utils
        ├── data
            ├── data_utils.py
            └── raw_datasets.py
        ├── ds_utils.py
        ├── model
            ├── model_utils.py
            └── reward_model.py
        ├── module
            └── lora.py
        └── utils.py


/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/README.md


--------------------------------------------------------------------------------
/chatglm_tuning/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/chatglm_tuning/README.md


--------------------------------------------------------------------------------
/chatglm_tuning/chatglm/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/chatglm_tuning/chatglm/__init__.py


--------------------------------------------------------------------------------
/chatglm_tuning/chatglm/configuration_chatglm.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/chatglm_tuning/chatglm/configuration_chatglm.py


--------------------------------------------------------------------------------
/chatglm_tuning/chatglm/modeling_chatglm.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/chatglm_tuning/chatglm/modeling_chatglm.py


--------------------------------------------------------------------------------
/chatglm_tuning/chatglm/tokenization_chatglm.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/chatglm_tuning/chatglm/tokenization_chatglm.py


--------------------------------------------------------------------------------
/chatglm_tuning/lora_ddp/config.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/chatglm_tuning/lora_ddp/config.py


--------------------------------------------------------------------------------
/chatglm_tuning/lora_ddp/data_helper.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/chatglm_tuning/lora_ddp/data_helper.py


--------------------------------------------------------------------------------
/chatglm_tuning/lora_ddp/generate.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/chatglm_tuning/lora_ddp/generate.py


--------------------------------------------------------------------------------
/chatglm_tuning/lora_ddp/metric.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/chatglm_tuning/lora_ddp/metric.py


--------------------------------------------------------------------------------
/chatglm_tuning/lora_ddp/model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/chatglm_tuning/lora_ddp/model.py


--------------------------------------------------------------------------------
/chatglm_tuning/lora_ddp/run.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/chatglm_tuning/lora_ddp/run.sh


--------------------------------------------------------------------------------
/chatglm_tuning/lora_ddp/trainer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/chatglm_tuning/lora_ddp/trainer.py


--------------------------------------------------------------------------------
/chatglm_tuning/lora_ddp/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/chatglm_tuning/lora_ddp/utils.py


--------------------------------------------------------------------------------
/chatglm_tuning/lora_deepspeed/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/chatglm_tuning/lora_deepspeed/README.md


--------------------------------------------------------------------------------
/chatglm_tuning/lora_deepspeed/config.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/chatglm_tuning/lora_deepspeed/config.py


--------------------------------------------------------------------------------
/chatglm_tuning/lora_deepspeed/data_helper.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/chatglm_tuning/lora_deepspeed/data_helper.py


--------------------------------------------------------------------------------
/chatglm_tuning/lora_deepspeed/deepspeed_config.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/chatglm_tuning/lora_deepspeed/deepspeed_config.json


--------------------------------------------------------------------------------
/chatglm_tuning/lora_deepspeed/deepspeed_config.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/chatglm_tuning/lora_deepspeed/deepspeed_config.yaml


--------------------------------------------------------------------------------
/chatglm_tuning/lora_deepspeed/generate.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/chatglm_tuning/lora_deepspeed/generate.py


--------------------------------------------------------------------------------
/chatglm_tuning/lora_deepspeed/metric.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/chatglm_tuning/lora_deepspeed/metric.py


--------------------------------------------------------------------------------
/chatglm_tuning/lora_deepspeed/model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/chatglm_tuning/lora_deepspeed/model.py


--------------------------------------------------------------------------------
/chatglm_tuning/lora_deepspeed/run.sh:
--------------------------------------------------------------------------------
1 | accelerate launch trainer.py


--------------------------------------------------------------------------------
/chatglm_tuning/lora_deepspeed/trainer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/chatglm_tuning/lora_deepspeed/trainer.py


--------------------------------------------------------------------------------
/chatglm_tuning/lora_deepspeed/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/chatglm_tuning/lora_deepspeed/utils.py


--------------------------------------------------------------------------------
/chatglm_tuning/lora_fsdp/config.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/chatglm_tuning/lora_fsdp/config.py


--------------------------------------------------------------------------------
/chatglm_tuning/lora_fsdp/data_helper.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/chatglm_tuning/lora_fsdp/data_helper.py


--------------------------------------------------------------------------------
/chatglm_tuning/lora_fsdp/fsdp_config.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/chatglm_tuning/lora_fsdp/fsdp_config.yaml


--------------------------------------------------------------------------------
/chatglm_tuning/lora_fsdp/generate.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/chatglm_tuning/lora_fsdp/generate.py


--------------------------------------------------------------------------------
/chatglm_tuning/lora_fsdp/metric.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/chatglm_tuning/lora_fsdp/metric.py


--------------------------------------------------------------------------------
/chatglm_tuning/lora_fsdp/model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/chatglm_tuning/lora_fsdp/model.py


--------------------------------------------------------------------------------
/chatglm_tuning/lora_fsdp/run.sh:
--------------------------------------------------------------------------------
1 | CUDA_VISIBLE_DEVICES=0,1
2 | python -m torch.distributed.launch --nproc_per_node=2 trainer.py


--------------------------------------------------------------------------------
/chatglm_tuning/lora_fsdp/trainer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/chatglm_tuning/lora_fsdp/trainer.py


--------------------------------------------------------------------------------
/chatglm_tuning/lora_fsdp/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/chatglm_tuning/lora_fsdp/utils.py


--------------------------------------------------------------------------------
/chatglm_tuning/lora_shared_ddp/config.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/chatglm_tuning/lora_shared_ddp/config.py


--------------------------------------------------------------------------------
/chatglm_tuning/lora_shared_ddp/data_helper.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/chatglm_tuning/lora_shared_ddp/data_helper.py


--------------------------------------------------------------------------------
/chatglm_tuning/lora_shared_ddp/generate.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/chatglm_tuning/lora_shared_ddp/generate.py


--------------------------------------------------------------------------------
/chatglm_tuning/lora_shared_ddp/metric.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/chatglm_tuning/lora_shared_ddp/metric.py


--------------------------------------------------------------------------------
/chatglm_tuning/lora_shared_ddp/model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/chatglm_tuning/lora_shared_ddp/model.py


--------------------------------------------------------------------------------
/chatglm_tuning/lora_shared_ddp/run.sh:
--------------------------------------------------------------------------------
1 | CUDA_VISIBLE_DEVICES=0,1
2 | python -m torch.distributed.launch --nproc_per_node=2 trainer.py


--------------------------------------------------------------------------------
/chatglm_tuning/lora_shared_ddp/trainer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/chatglm_tuning/lora_shared_ddp/trainer.py


--------------------------------------------------------------------------------
/chatglm_tuning/lora_shared_ddp/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/chatglm_tuning/lora_shared_ddp/utils.py


--------------------------------------------------------------------------------
/chatglm_tuning/lora_single_gpu/config.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/chatglm_tuning/lora_single_gpu/config.py


--------------------------------------------------------------------------------
/chatglm_tuning/lora_single_gpu/data_helper.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/chatglm_tuning/lora_single_gpu/data_helper.py


--------------------------------------------------------------------------------
/chatglm_tuning/lora_single_gpu/generate.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/chatglm_tuning/lora_single_gpu/generate.py


--------------------------------------------------------------------------------
/chatglm_tuning/lora_single_gpu/metric.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/chatglm_tuning/lora_single_gpu/metric.py


--------------------------------------------------------------------------------
/chatglm_tuning/lora_single_gpu/model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/chatglm_tuning/lora_single_gpu/model.py


--------------------------------------------------------------------------------
/chatglm_tuning/lora_single_gpu/run.sh:
--------------------------------------------------------------------------------
1 | python trainer.py


--------------------------------------------------------------------------------
/chatglm_tuning/lora_single_gpu/trainer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/chatglm_tuning/lora_single_gpu/trainer.py


--------------------------------------------------------------------------------
/chatglm_tuning/lora_single_gpu/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/chatglm_tuning/lora_single_gpu/utils.py


--------------------------------------------------------------------------------
/data/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/data/README.md


--------------------------------------------------------------------------------
/data/trans_chinese_alpaca_data.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/data/trans_chinese_alpaca_data.json


--------------------------------------------------------------------------------
/images/deepspeed_config.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/images/deepspeed_config.png


--------------------------------------------------------------------------------
/install_env.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/install_env.sh


--------------------------------------------------------------------------------
/llama_tuning/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/llama_tuning/README.md


--------------------------------------------------------------------------------
/llama_tuning/lora_ddp/config.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/llama_tuning/lora_ddp/config.py


--------------------------------------------------------------------------------
/llama_tuning/lora_ddp/data_helper.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/llama_tuning/lora_ddp/data_helper.py


--------------------------------------------------------------------------------
/llama_tuning/lora_ddp/generate.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/llama_tuning/lora_ddp/generate.py


--------------------------------------------------------------------------------
/llama_tuning/lora_ddp/metric.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/llama_tuning/lora_ddp/metric.py


--------------------------------------------------------------------------------
/llama_tuning/lora_ddp/model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/llama_tuning/lora_ddp/model.py


--------------------------------------------------------------------------------
/llama_tuning/lora_ddp/run.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/llama_tuning/lora_ddp/run.sh


--------------------------------------------------------------------------------
/llama_tuning/lora_ddp/trainer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/llama_tuning/lora_ddp/trainer.py


--------------------------------------------------------------------------------
/llama_tuning/lora_ddp/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/llama_tuning/lora_ddp/utils.py


--------------------------------------------------------------------------------
/llama_tuning/lora_deepspeed/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/llama_tuning/lora_deepspeed/README.md


--------------------------------------------------------------------------------
/llama_tuning/lora_deepspeed/config.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/llama_tuning/lora_deepspeed/config.py


--------------------------------------------------------------------------------
/llama_tuning/lora_deepspeed/data_helper.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/llama_tuning/lora_deepspeed/data_helper.py


--------------------------------------------------------------------------------
/llama_tuning/lora_deepspeed/deepspeed_config.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/llama_tuning/lora_deepspeed/deepspeed_config.json


--------------------------------------------------------------------------------
/llama_tuning/lora_deepspeed/deepspeed_config.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/llama_tuning/lora_deepspeed/deepspeed_config.yaml


--------------------------------------------------------------------------------
/llama_tuning/lora_deepspeed/generate.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/llama_tuning/lora_deepspeed/generate.py


--------------------------------------------------------------------------------
/llama_tuning/lora_deepspeed/metric.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/llama_tuning/lora_deepspeed/metric.py


--------------------------------------------------------------------------------
/llama_tuning/lora_deepspeed/model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/llama_tuning/lora_deepspeed/model.py


--------------------------------------------------------------------------------
/llama_tuning/lora_deepspeed/run.sh:
--------------------------------------------------------------------------------
1 | accelerate launch trainer.py


--------------------------------------------------------------------------------
/llama_tuning/lora_deepspeed/trainer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/llama_tuning/lora_deepspeed/trainer.py


--------------------------------------------------------------------------------
/llama_tuning/lora_deepspeed/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/llama_tuning/lora_deepspeed/utils.py


--------------------------------------------------------------------------------
/llama_tuning/lora_single_gpu/config.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/llama_tuning/lora_single_gpu/config.py


--------------------------------------------------------------------------------
/llama_tuning/lora_single_gpu/data_helper.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/llama_tuning/lora_single_gpu/data_helper.py


--------------------------------------------------------------------------------
/llama_tuning/lora_single_gpu/generate.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/llama_tuning/lora_single_gpu/generate.py


--------------------------------------------------------------------------------
/llama_tuning/lora_single_gpu/metric.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/llama_tuning/lora_single_gpu/metric.py


--------------------------------------------------------------------------------
/llama_tuning/lora_single_gpu/model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/llama_tuning/lora_single_gpu/model.py


--------------------------------------------------------------------------------
/llama_tuning/lora_single_gpu/run.sh:
--------------------------------------------------------------------------------
1 | python trainer.py


--------------------------------------------------------------------------------
/llama_tuning/lora_single_gpu/trainer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/llama_tuning/lora_single_gpu/trainer.py


--------------------------------------------------------------------------------
/llama_tuning/lora_single_gpu/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/llama_tuning/lora_single_gpu/utils.py


--------------------------------------------------------------------------------
/rlhf_tuning/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/rlhf_tuning/README.md


--------------------------------------------------------------------------------
/rlhf_tuning/step1_supervised_finetuning/main.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/rlhf_tuning/step1_supervised_finetuning/main.py


--------------------------------------------------------------------------------
/rlhf_tuning/step1_supervised_finetuning/run.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/rlhf_tuning/step1_supervised_finetuning/run.sh


--------------------------------------------------------------------------------
/rlhf_tuning/step2_reward_model_finetuning/main.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/rlhf_tuning/step2_reward_model_finetuning/main.py


--------------------------------------------------------------------------------
/rlhf_tuning/step2_reward_model_finetuning/run.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/rlhf_tuning/step2_reward_model_finetuning/run.sh


--------------------------------------------------------------------------------
/rlhf_tuning/step3_rlhf_finetuning/main.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/rlhf_tuning/step3_rlhf_finetuning/main.py


--------------------------------------------------------------------------------
/rlhf_tuning/step3_rlhf_finetuning/ppo_trainer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/rlhf_tuning/step3_rlhf_finetuning/ppo_trainer.py


--------------------------------------------------------------------------------
/rlhf_tuning/step3_rlhf_finetuning/rlhf_engine.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/rlhf_tuning/step3_rlhf_finetuning/rlhf_engine.py


--------------------------------------------------------------------------------
/rlhf_tuning/step3_rlhf_finetuning/run.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/rlhf_tuning/step3_rlhf_finetuning/run.sh


--------------------------------------------------------------------------------
/rlhf_tuning/utils/data/data_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/rlhf_tuning/utils/data/data_utils.py


--------------------------------------------------------------------------------
/rlhf_tuning/utils/data/raw_datasets.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/rlhf_tuning/utils/data/raw_datasets.py


--------------------------------------------------------------------------------
/rlhf_tuning/utils/ds_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/rlhf_tuning/utils/ds_utils.py


--------------------------------------------------------------------------------
/rlhf_tuning/utils/model/model_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/rlhf_tuning/utils/model/model_utils.py


--------------------------------------------------------------------------------
/rlhf_tuning/utils/model/reward_model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/rlhf_tuning/utils/model/reward_model.py


--------------------------------------------------------------------------------
/rlhf_tuning/utils/module/lora.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/rlhf_tuning/utils/module/lora.py


--------------------------------------------------------------------------------
/rlhf_tuning/utils/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/rlhf_tuning/utils/utils.py


--------------------------------------------------------------------------------