├── .gitignore ├── README.md ├── environment.yml ├── images ├── algorithm.png ├── alpacaeval_result.png └── framework.png ├── results ├── dpo.json ├── ppo.json ├── remax.json └── sft.json ├── step1_supervised_finetuning ├── README.md ├── evaluation_scripts │ └── run_prompt.sh ├── main.py ├── prompt_eval.py └── training_scripts │ ├── llama2 │ └── run_llama2_7b.sh │ └── opt │ └── run_opt_1.3b.sh ├── step2_reward_model_finetuning ├── README.md ├── evaluation_scripts │ └── run_eval.sh ├── main.py ├── rw_eval.py └── training_scripts │ ├── README.md │ ├── llama2 │ └── run_llama2_7b.sh │ └── opt │ └── run_opt_350m.sh ├── step3_rlhf_finetuning ├── README.md ├── main.py ├── perf.py ├── remax_trainer.py ├── rlhf_engine.py └── training_scripts │ ├── llama2 │ └── run_llama2_7b.sh │ └── opt │ └── run_opt_1.3b.sh ├── tests └── test_training.py └── utils ├── data ├── data_utils.py └── raw_datasets.py ├── ds_utils.py ├── gpu_utils.py ├── model ├── model_utils.py └── reward_model.py ├── module └── lora.py ├── perf.py └── utils.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liziniu/ReMax/HEAD/.gitignore -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liziniu/ReMax/HEAD/README.md -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liziniu/ReMax/HEAD/environment.yml -------------------------------------------------------------------------------- /images/algorithm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liziniu/ReMax/HEAD/images/algorithm.png -------------------------------------------------------------------------------- /images/alpacaeval_result.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liziniu/ReMax/HEAD/images/alpacaeval_result.png -------------------------------------------------------------------------------- /images/framework.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liziniu/ReMax/HEAD/images/framework.png -------------------------------------------------------------------------------- /results/dpo.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liziniu/ReMax/HEAD/results/dpo.json -------------------------------------------------------------------------------- /results/ppo.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liziniu/ReMax/HEAD/results/ppo.json -------------------------------------------------------------------------------- /results/remax.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liziniu/ReMax/HEAD/results/remax.json -------------------------------------------------------------------------------- /results/sft.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liziniu/ReMax/HEAD/results/sft.json -------------------------------------------------------------------------------- /step1_supervised_finetuning/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liziniu/ReMax/HEAD/step1_supervised_finetuning/README.md -------------------------------------------------------------------------------- /step1_supervised_finetuning/evaluation_scripts/run_prompt.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liziniu/ReMax/HEAD/step1_supervised_finetuning/evaluation_scripts/run_prompt.sh -------------------------------------------------------------------------------- /step1_supervised_finetuning/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liziniu/ReMax/HEAD/step1_supervised_finetuning/main.py -------------------------------------------------------------------------------- /step1_supervised_finetuning/prompt_eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liziniu/ReMax/HEAD/step1_supervised_finetuning/prompt_eval.py -------------------------------------------------------------------------------- /step1_supervised_finetuning/training_scripts/llama2/run_llama2_7b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liziniu/ReMax/HEAD/step1_supervised_finetuning/training_scripts/llama2/run_llama2_7b.sh -------------------------------------------------------------------------------- /step1_supervised_finetuning/training_scripts/opt/run_opt_1.3b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liziniu/ReMax/HEAD/step1_supervised_finetuning/training_scripts/opt/run_opt_1.3b.sh -------------------------------------------------------------------------------- /step2_reward_model_finetuning/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liziniu/ReMax/HEAD/step2_reward_model_finetuning/README.md -------------------------------------------------------------------------------- /step2_reward_model_finetuning/evaluation_scripts/run_eval.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liziniu/ReMax/HEAD/step2_reward_model_finetuning/evaluation_scripts/run_eval.sh -------------------------------------------------------------------------------- /step2_reward_model_finetuning/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liziniu/ReMax/HEAD/step2_reward_model_finetuning/main.py -------------------------------------------------------------------------------- /step2_reward_model_finetuning/rw_eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liziniu/ReMax/HEAD/step2_reward_model_finetuning/rw_eval.py -------------------------------------------------------------------------------- /step2_reward_model_finetuning/training_scripts/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liziniu/ReMax/HEAD/step2_reward_model_finetuning/training_scripts/README.md -------------------------------------------------------------------------------- /step2_reward_model_finetuning/training_scripts/llama2/run_llama2_7b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liziniu/ReMax/HEAD/step2_reward_model_finetuning/training_scripts/llama2/run_llama2_7b.sh -------------------------------------------------------------------------------- /step2_reward_model_finetuning/training_scripts/opt/run_opt_350m.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liziniu/ReMax/HEAD/step2_reward_model_finetuning/training_scripts/opt/run_opt_350m.sh -------------------------------------------------------------------------------- /step3_rlhf_finetuning/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liziniu/ReMax/HEAD/step3_rlhf_finetuning/README.md -------------------------------------------------------------------------------- /step3_rlhf_finetuning/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liziniu/ReMax/HEAD/step3_rlhf_finetuning/main.py -------------------------------------------------------------------------------- /step3_rlhf_finetuning/perf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liziniu/ReMax/HEAD/step3_rlhf_finetuning/perf.py -------------------------------------------------------------------------------- /step3_rlhf_finetuning/remax_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liziniu/ReMax/HEAD/step3_rlhf_finetuning/remax_trainer.py -------------------------------------------------------------------------------- /step3_rlhf_finetuning/rlhf_engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liziniu/ReMax/HEAD/step3_rlhf_finetuning/rlhf_engine.py -------------------------------------------------------------------------------- /step3_rlhf_finetuning/training_scripts/llama2/run_llama2_7b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liziniu/ReMax/HEAD/step3_rlhf_finetuning/training_scripts/llama2/run_llama2_7b.sh -------------------------------------------------------------------------------- /step3_rlhf_finetuning/training_scripts/opt/run_opt_1.3b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liziniu/ReMax/HEAD/step3_rlhf_finetuning/training_scripts/opt/run_opt_1.3b.sh -------------------------------------------------------------------------------- /tests/test_training.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liziniu/ReMax/HEAD/tests/test_training.py -------------------------------------------------------------------------------- /utils/data/data_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liziniu/ReMax/HEAD/utils/data/data_utils.py -------------------------------------------------------------------------------- /utils/data/raw_datasets.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liziniu/ReMax/HEAD/utils/data/raw_datasets.py -------------------------------------------------------------------------------- /utils/ds_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liziniu/ReMax/HEAD/utils/ds_utils.py -------------------------------------------------------------------------------- /utils/gpu_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liziniu/ReMax/HEAD/utils/gpu_utils.py -------------------------------------------------------------------------------- /utils/model/model_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liziniu/ReMax/HEAD/utils/model/model_utils.py -------------------------------------------------------------------------------- /utils/model/reward_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liziniu/ReMax/HEAD/utils/model/reward_model.py -------------------------------------------------------------------------------- /utils/module/lora.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liziniu/ReMax/HEAD/utils/module/lora.py -------------------------------------------------------------------------------- /utils/perf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liziniu/ReMax/HEAD/utils/perf.py -------------------------------------------------------------------------------- /utils/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liziniu/ReMax/HEAD/utils/utils.py --------------------------------------------------------------------------------