├── README.md ├── chatglm_tuning ├── README.md ├── chatglm │ ├── __init__.py │ ├── configuration_chatglm.py │ ├── modeling_chatglm.py │ └── tokenization_chatglm.py ├── lora_ddp │ ├── config.py │ ├── data_helper.py │ ├── generate.py │ ├── metric.py │ ├── model.py │ ├── run.sh │ ├── trainer.py │ └── utils.py ├── lora_deepspeed │ ├── README.md │ ├── config.py │ ├── data_helper.py │ ├── deepspeed_config.json │ ├── deepspeed_config.yaml │ ├── generate.py │ ├── metric.py │ ├── model.py │ ├── run.sh │ ├── trainer.py │ └── utils.py ├── lora_fsdp │ ├── config.py │ ├── data_helper.py │ ├── fsdp_config.yaml │ ├── generate.py │ ├── metric.py │ ├── model.py │ ├── run.sh │ ├── trainer.py │ └── utils.py ├── lora_shared_ddp │ ├── config.py │ ├── data_helper.py │ ├── generate.py │ ├── metric.py │ ├── model.py │ ├── run.sh │ ├── trainer.py │ └── utils.py └── lora_single_gpu │ ├── config.py │ ├── data_helper.py │ ├── generate.py │ ├── metric.py │ ├── model.py │ ├── run.sh │ ├── trainer.py │ └── utils.py ├── data ├── README.md └── trans_chinese_alpaca_data.json ├── images └── deepspeed_config.png ├── install_env.sh ├── llama_tuning ├── README.md ├── lora_ddp │ ├── config.py │ ├── data_helper.py │ ├── generate.py │ ├── metric.py │ ├── model.py │ ├── run.sh │ ├── trainer.py │ └── utils.py ├── lora_deepspeed │ ├── README.md │ ├── config.py │ ├── data_helper.py │ ├── deepspeed_config.json │ ├── deepspeed_config.yaml │ ├── generate.py │ ├── metric.py │ ├── model.py │ ├── run.sh │ ├── trainer.py │ └── utils.py └── lora_single_gpu │ ├── config.py │ ├── data_helper.py │ ├── generate.py │ ├── metric.py │ ├── model.py │ ├── run.sh │ ├── trainer.py │ └── utils.py └── rlhf_tuning ├── README.md ├── step1_supervised_finetuning ├── main.py └── run.sh ├── step2_reward_model_finetuning ├── main.py └── run.sh ├── step3_rlhf_finetuning ├── main.py ├── ppo_trainer.py ├── rlhf_engine.py └── run.sh └── utils ├── data ├── data_utils.py └── raw_datasets.py ├── ds_utils.py ├── model ├── model_utils.py └── reward_model.py ├── module └── lora.py └── utils.py /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/README.md -------------------------------------------------------------------------------- /chatglm_tuning/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/chatglm_tuning/README.md -------------------------------------------------------------------------------- /chatglm_tuning/chatglm/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/chatglm_tuning/chatglm/__init__.py -------------------------------------------------------------------------------- /chatglm_tuning/chatglm/configuration_chatglm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/chatglm_tuning/chatglm/configuration_chatglm.py -------------------------------------------------------------------------------- /chatglm_tuning/chatglm/modeling_chatglm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/chatglm_tuning/chatglm/modeling_chatglm.py -------------------------------------------------------------------------------- /chatglm_tuning/chatglm/tokenization_chatglm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/chatglm_tuning/chatglm/tokenization_chatglm.py -------------------------------------------------------------------------------- /chatglm_tuning/lora_ddp/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/chatglm_tuning/lora_ddp/config.py -------------------------------------------------------------------------------- /chatglm_tuning/lora_ddp/data_helper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/chatglm_tuning/lora_ddp/data_helper.py -------------------------------------------------------------------------------- /chatglm_tuning/lora_ddp/generate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/chatglm_tuning/lora_ddp/generate.py -------------------------------------------------------------------------------- /chatglm_tuning/lora_ddp/metric.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/chatglm_tuning/lora_ddp/metric.py -------------------------------------------------------------------------------- /chatglm_tuning/lora_ddp/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/chatglm_tuning/lora_ddp/model.py -------------------------------------------------------------------------------- /chatglm_tuning/lora_ddp/run.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/chatglm_tuning/lora_ddp/run.sh -------------------------------------------------------------------------------- /chatglm_tuning/lora_ddp/trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/chatglm_tuning/lora_ddp/trainer.py -------------------------------------------------------------------------------- /chatglm_tuning/lora_ddp/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/chatglm_tuning/lora_ddp/utils.py -------------------------------------------------------------------------------- /chatglm_tuning/lora_deepspeed/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/chatglm_tuning/lora_deepspeed/README.md -------------------------------------------------------------------------------- /chatglm_tuning/lora_deepspeed/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/chatglm_tuning/lora_deepspeed/config.py -------------------------------------------------------------------------------- /chatglm_tuning/lora_deepspeed/data_helper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/chatglm_tuning/lora_deepspeed/data_helper.py -------------------------------------------------------------------------------- /chatglm_tuning/lora_deepspeed/deepspeed_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/chatglm_tuning/lora_deepspeed/deepspeed_config.json -------------------------------------------------------------------------------- /chatglm_tuning/lora_deepspeed/deepspeed_config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/chatglm_tuning/lora_deepspeed/deepspeed_config.yaml -------------------------------------------------------------------------------- /chatglm_tuning/lora_deepspeed/generate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/chatglm_tuning/lora_deepspeed/generate.py -------------------------------------------------------------------------------- /chatglm_tuning/lora_deepspeed/metric.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/chatglm_tuning/lora_deepspeed/metric.py -------------------------------------------------------------------------------- /chatglm_tuning/lora_deepspeed/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/chatglm_tuning/lora_deepspeed/model.py -------------------------------------------------------------------------------- /chatglm_tuning/lora_deepspeed/run.sh: -------------------------------------------------------------------------------- 1 | accelerate launch trainer.py -------------------------------------------------------------------------------- /chatglm_tuning/lora_deepspeed/trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/chatglm_tuning/lora_deepspeed/trainer.py -------------------------------------------------------------------------------- /chatglm_tuning/lora_deepspeed/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/chatglm_tuning/lora_deepspeed/utils.py -------------------------------------------------------------------------------- /chatglm_tuning/lora_fsdp/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/chatglm_tuning/lora_fsdp/config.py -------------------------------------------------------------------------------- /chatglm_tuning/lora_fsdp/data_helper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/chatglm_tuning/lora_fsdp/data_helper.py -------------------------------------------------------------------------------- /chatglm_tuning/lora_fsdp/fsdp_config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/chatglm_tuning/lora_fsdp/fsdp_config.yaml -------------------------------------------------------------------------------- /chatglm_tuning/lora_fsdp/generate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/chatglm_tuning/lora_fsdp/generate.py -------------------------------------------------------------------------------- /chatglm_tuning/lora_fsdp/metric.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/chatglm_tuning/lora_fsdp/metric.py -------------------------------------------------------------------------------- /chatglm_tuning/lora_fsdp/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/chatglm_tuning/lora_fsdp/model.py -------------------------------------------------------------------------------- /chatglm_tuning/lora_fsdp/run.sh: -------------------------------------------------------------------------------- 1 | CUDA_VISIBLE_DEVICES=0,1 2 | python -m torch.distributed.launch --nproc_per_node=2 trainer.py -------------------------------------------------------------------------------- /chatglm_tuning/lora_fsdp/trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/chatglm_tuning/lora_fsdp/trainer.py -------------------------------------------------------------------------------- /chatglm_tuning/lora_fsdp/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/chatglm_tuning/lora_fsdp/utils.py -------------------------------------------------------------------------------- /chatglm_tuning/lora_shared_ddp/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/chatglm_tuning/lora_shared_ddp/config.py -------------------------------------------------------------------------------- /chatglm_tuning/lora_shared_ddp/data_helper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/chatglm_tuning/lora_shared_ddp/data_helper.py -------------------------------------------------------------------------------- /chatglm_tuning/lora_shared_ddp/generate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/chatglm_tuning/lora_shared_ddp/generate.py -------------------------------------------------------------------------------- /chatglm_tuning/lora_shared_ddp/metric.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/chatglm_tuning/lora_shared_ddp/metric.py -------------------------------------------------------------------------------- /chatglm_tuning/lora_shared_ddp/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/chatglm_tuning/lora_shared_ddp/model.py -------------------------------------------------------------------------------- /chatglm_tuning/lora_shared_ddp/run.sh: -------------------------------------------------------------------------------- 1 | CUDA_VISIBLE_DEVICES=0,1 2 | python -m torch.distributed.launch --nproc_per_node=2 trainer.py -------------------------------------------------------------------------------- /chatglm_tuning/lora_shared_ddp/trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/chatglm_tuning/lora_shared_ddp/trainer.py -------------------------------------------------------------------------------- /chatglm_tuning/lora_shared_ddp/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/chatglm_tuning/lora_shared_ddp/utils.py -------------------------------------------------------------------------------- /chatglm_tuning/lora_single_gpu/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/chatglm_tuning/lora_single_gpu/config.py -------------------------------------------------------------------------------- /chatglm_tuning/lora_single_gpu/data_helper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/chatglm_tuning/lora_single_gpu/data_helper.py -------------------------------------------------------------------------------- /chatglm_tuning/lora_single_gpu/generate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/chatglm_tuning/lora_single_gpu/generate.py -------------------------------------------------------------------------------- /chatglm_tuning/lora_single_gpu/metric.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/chatglm_tuning/lora_single_gpu/metric.py -------------------------------------------------------------------------------- /chatglm_tuning/lora_single_gpu/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/chatglm_tuning/lora_single_gpu/model.py -------------------------------------------------------------------------------- /chatglm_tuning/lora_single_gpu/run.sh: -------------------------------------------------------------------------------- 1 | python trainer.py -------------------------------------------------------------------------------- /chatglm_tuning/lora_single_gpu/trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/chatglm_tuning/lora_single_gpu/trainer.py -------------------------------------------------------------------------------- /chatglm_tuning/lora_single_gpu/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/chatglm_tuning/lora_single_gpu/utils.py -------------------------------------------------------------------------------- /data/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/data/README.md -------------------------------------------------------------------------------- /data/trans_chinese_alpaca_data.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/data/trans_chinese_alpaca_data.json -------------------------------------------------------------------------------- /images/deepspeed_config.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/images/deepspeed_config.png -------------------------------------------------------------------------------- /install_env.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/install_env.sh -------------------------------------------------------------------------------- /llama_tuning/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/llama_tuning/README.md -------------------------------------------------------------------------------- /llama_tuning/lora_ddp/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/llama_tuning/lora_ddp/config.py -------------------------------------------------------------------------------- /llama_tuning/lora_ddp/data_helper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/llama_tuning/lora_ddp/data_helper.py -------------------------------------------------------------------------------- /llama_tuning/lora_ddp/generate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/llama_tuning/lora_ddp/generate.py -------------------------------------------------------------------------------- /llama_tuning/lora_ddp/metric.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/llama_tuning/lora_ddp/metric.py -------------------------------------------------------------------------------- /llama_tuning/lora_ddp/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/llama_tuning/lora_ddp/model.py -------------------------------------------------------------------------------- /llama_tuning/lora_ddp/run.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/llama_tuning/lora_ddp/run.sh -------------------------------------------------------------------------------- /llama_tuning/lora_ddp/trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/llama_tuning/lora_ddp/trainer.py -------------------------------------------------------------------------------- /llama_tuning/lora_ddp/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/llama_tuning/lora_ddp/utils.py -------------------------------------------------------------------------------- /llama_tuning/lora_deepspeed/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/llama_tuning/lora_deepspeed/README.md -------------------------------------------------------------------------------- /llama_tuning/lora_deepspeed/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/llama_tuning/lora_deepspeed/config.py -------------------------------------------------------------------------------- /llama_tuning/lora_deepspeed/data_helper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/llama_tuning/lora_deepspeed/data_helper.py -------------------------------------------------------------------------------- /llama_tuning/lora_deepspeed/deepspeed_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/llama_tuning/lora_deepspeed/deepspeed_config.json -------------------------------------------------------------------------------- /llama_tuning/lora_deepspeed/deepspeed_config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/llama_tuning/lora_deepspeed/deepspeed_config.yaml -------------------------------------------------------------------------------- /llama_tuning/lora_deepspeed/generate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/llama_tuning/lora_deepspeed/generate.py -------------------------------------------------------------------------------- /llama_tuning/lora_deepspeed/metric.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/llama_tuning/lora_deepspeed/metric.py -------------------------------------------------------------------------------- /llama_tuning/lora_deepspeed/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/llama_tuning/lora_deepspeed/model.py -------------------------------------------------------------------------------- /llama_tuning/lora_deepspeed/run.sh: -------------------------------------------------------------------------------- 1 | accelerate launch trainer.py -------------------------------------------------------------------------------- /llama_tuning/lora_deepspeed/trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/llama_tuning/lora_deepspeed/trainer.py -------------------------------------------------------------------------------- /llama_tuning/lora_deepspeed/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/llama_tuning/lora_deepspeed/utils.py -------------------------------------------------------------------------------- /llama_tuning/lora_single_gpu/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/llama_tuning/lora_single_gpu/config.py -------------------------------------------------------------------------------- /llama_tuning/lora_single_gpu/data_helper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/llama_tuning/lora_single_gpu/data_helper.py -------------------------------------------------------------------------------- /llama_tuning/lora_single_gpu/generate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/llama_tuning/lora_single_gpu/generate.py -------------------------------------------------------------------------------- /llama_tuning/lora_single_gpu/metric.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/llama_tuning/lora_single_gpu/metric.py -------------------------------------------------------------------------------- /llama_tuning/lora_single_gpu/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/llama_tuning/lora_single_gpu/model.py -------------------------------------------------------------------------------- /llama_tuning/lora_single_gpu/run.sh: -------------------------------------------------------------------------------- 1 | python trainer.py -------------------------------------------------------------------------------- /llama_tuning/lora_single_gpu/trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/llama_tuning/lora_single_gpu/trainer.py -------------------------------------------------------------------------------- /llama_tuning/lora_single_gpu/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/llama_tuning/lora_single_gpu/utils.py -------------------------------------------------------------------------------- /rlhf_tuning/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/rlhf_tuning/README.md -------------------------------------------------------------------------------- /rlhf_tuning/step1_supervised_finetuning/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/rlhf_tuning/step1_supervised_finetuning/main.py -------------------------------------------------------------------------------- /rlhf_tuning/step1_supervised_finetuning/run.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/rlhf_tuning/step1_supervised_finetuning/run.sh -------------------------------------------------------------------------------- /rlhf_tuning/step2_reward_model_finetuning/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/rlhf_tuning/step2_reward_model_finetuning/main.py -------------------------------------------------------------------------------- /rlhf_tuning/step2_reward_model_finetuning/run.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/rlhf_tuning/step2_reward_model_finetuning/run.sh -------------------------------------------------------------------------------- /rlhf_tuning/step3_rlhf_finetuning/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/rlhf_tuning/step3_rlhf_finetuning/main.py -------------------------------------------------------------------------------- /rlhf_tuning/step3_rlhf_finetuning/ppo_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/rlhf_tuning/step3_rlhf_finetuning/ppo_trainer.py -------------------------------------------------------------------------------- /rlhf_tuning/step3_rlhf_finetuning/rlhf_engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/rlhf_tuning/step3_rlhf_finetuning/rlhf_engine.py -------------------------------------------------------------------------------- /rlhf_tuning/step3_rlhf_finetuning/run.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/rlhf_tuning/step3_rlhf_finetuning/run.sh -------------------------------------------------------------------------------- /rlhf_tuning/utils/data/data_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/rlhf_tuning/utils/data/data_utils.py -------------------------------------------------------------------------------- /rlhf_tuning/utils/data/raw_datasets.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/rlhf_tuning/utils/data/raw_datasets.py -------------------------------------------------------------------------------- /rlhf_tuning/utils/ds_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/rlhf_tuning/utils/ds_utils.py -------------------------------------------------------------------------------- /rlhf_tuning/utils/model/model_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/rlhf_tuning/utils/model/model_utils.py -------------------------------------------------------------------------------- /rlhf_tuning/utils/model/reward_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/rlhf_tuning/utils/model/reward_model.py -------------------------------------------------------------------------------- /rlhf_tuning/utils/module/lora.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/rlhf_tuning/utils/module/lora.py -------------------------------------------------------------------------------- /rlhf_tuning/utils/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiangxinyang227/LLM-tuning/HEAD/rlhf_tuning/utils/utils.py --------------------------------------------------------------------------------