├── R3_math ├── data │ ├── gsm8k_nl_mix_example.json │ ├── gsm8k_python_mix_example.json │ ├── gsm8k_test.json │ ├── svamp_nl_mix_example.json │ └── svamp_test.json ├── default_config_deepspeed.yaml ├── main.py ├── requirements.txt ├── scripts │ ├── R3_cot_gsm8k.sh │ ├── R3_cot_svamp.sh │ └── R3_pot_gsm8k.sh └── src │ ├── modeling_rl.py │ ├── python_engine.py │ ├── python_stdout_engine.py │ └── utils.py ├── R3_others ├── data │ ├── MNLI │ │ ├── mnli_mix_example.json │ │ ├── mnli_test.json │ │ └── mnli_train_example.json │ ├── SNLI │ │ ├── snli_mix_example.json │ │ ├── snli_test.json │ │ └── snli_train_example.json │ ├── boardgame │ │ ├── boardgame_mix_example.json │ │ ├── boardgame_test.json │ │ └── boardgame_train_example.json │ ├── gsm8k_cot │ │ ├── gsm8k_nl_train_example.json │ │ └── gsm8k_test.json │ ├── gsm8k_pot │ │ ├── gsm8k_python_train_example.json │ │ └── gsm8k_test.json │ ├── raceHigh │ │ ├── raceHigh_mix_example.json │ │ ├── raceHigh_test.json │ │ └── raceHigh_train_example.json │ └── svamp_cot │ │ ├── svamp_nl_train_example.json │ │ └── svamp_test.json ├── dschat │ ├── __init__.py │ ├── rlhf │ │ ├── ppo_trainer.py │ │ └── rlhf_engine.py │ └── utils │ │ ├── data │ │ ├── data_utils.py │ │ └── raw_datasets.py │ │ ├── ds_utils.py │ │ ├── model │ │ ├── model_utils.py │ │ └── reward_model.py │ │ ├── module │ │ └── lora.py │ │ ├── perf.py │ │ └── utils.py ├── requirements.txt └── scripts │ ├── eval │ ├── eval_batch.py │ ├── eval_single.sh │ ├── llama2.py │ └── output_mnli.py │ ├── step1_supervised_finetuning │ ├── R3_sft.sh │ └── main.py │ └── step3_rlhf_finetuning │ ├── R3_mix.sh │ └── main.py ├── README.md └── src └── figures └── main.png /R3_math/data/gsm8k_nl_mix_example.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WooooDyy/LLM-Reverse-Curriculum-RL/HEAD/R3_math/data/gsm8k_nl_mix_example.json -------------------------------------------------------------------------------- /R3_math/data/gsm8k_python_mix_example.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WooooDyy/LLM-Reverse-Curriculum-RL/HEAD/R3_math/data/gsm8k_python_mix_example.json -------------------------------------------------------------------------------- /R3_math/data/gsm8k_test.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WooooDyy/LLM-Reverse-Curriculum-RL/HEAD/R3_math/data/gsm8k_test.json -------------------------------------------------------------------------------- /R3_math/data/svamp_nl_mix_example.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WooooDyy/LLM-Reverse-Curriculum-RL/HEAD/R3_math/data/svamp_nl_mix_example.json -------------------------------------------------------------------------------- /R3_math/data/svamp_test.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WooooDyy/LLM-Reverse-Curriculum-RL/HEAD/R3_math/data/svamp_test.json -------------------------------------------------------------------------------- /R3_math/default_config_deepspeed.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WooooDyy/LLM-Reverse-Curriculum-RL/HEAD/R3_math/default_config_deepspeed.yaml -------------------------------------------------------------------------------- /R3_math/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WooooDyy/LLM-Reverse-Curriculum-RL/HEAD/R3_math/main.py -------------------------------------------------------------------------------- /R3_math/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WooooDyy/LLM-Reverse-Curriculum-RL/HEAD/R3_math/requirements.txt -------------------------------------------------------------------------------- /R3_math/scripts/R3_cot_gsm8k.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WooooDyy/LLM-Reverse-Curriculum-RL/HEAD/R3_math/scripts/R3_cot_gsm8k.sh -------------------------------------------------------------------------------- /R3_math/scripts/R3_cot_svamp.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WooooDyy/LLM-Reverse-Curriculum-RL/HEAD/R3_math/scripts/R3_cot_svamp.sh -------------------------------------------------------------------------------- /R3_math/scripts/R3_pot_gsm8k.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WooooDyy/LLM-Reverse-Curriculum-RL/HEAD/R3_math/scripts/R3_pot_gsm8k.sh -------------------------------------------------------------------------------- /R3_math/src/modeling_rl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WooooDyy/LLM-Reverse-Curriculum-RL/HEAD/R3_math/src/modeling_rl.py -------------------------------------------------------------------------------- /R3_math/src/python_engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WooooDyy/LLM-Reverse-Curriculum-RL/HEAD/R3_math/src/python_engine.py -------------------------------------------------------------------------------- /R3_math/src/python_stdout_engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WooooDyy/LLM-Reverse-Curriculum-RL/HEAD/R3_math/src/python_stdout_engine.py -------------------------------------------------------------------------------- /R3_math/src/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WooooDyy/LLM-Reverse-Curriculum-RL/HEAD/R3_math/src/utils.py -------------------------------------------------------------------------------- /R3_others/data/MNLI/mnli_mix_example.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WooooDyy/LLM-Reverse-Curriculum-RL/HEAD/R3_others/data/MNLI/mnli_mix_example.json -------------------------------------------------------------------------------- /R3_others/data/MNLI/mnli_test.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WooooDyy/LLM-Reverse-Curriculum-RL/HEAD/R3_others/data/MNLI/mnli_test.json -------------------------------------------------------------------------------- /R3_others/data/MNLI/mnli_train_example.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WooooDyy/LLM-Reverse-Curriculum-RL/HEAD/R3_others/data/MNLI/mnli_train_example.json -------------------------------------------------------------------------------- /R3_others/data/SNLI/snli_mix_example.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WooooDyy/LLM-Reverse-Curriculum-RL/HEAD/R3_others/data/SNLI/snli_mix_example.json -------------------------------------------------------------------------------- /R3_others/data/SNLI/snli_test.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WooooDyy/LLM-Reverse-Curriculum-RL/HEAD/R3_others/data/SNLI/snli_test.json -------------------------------------------------------------------------------- /R3_others/data/SNLI/snli_train_example.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WooooDyy/LLM-Reverse-Curriculum-RL/HEAD/R3_others/data/SNLI/snli_train_example.json -------------------------------------------------------------------------------- /R3_others/data/boardgame/boardgame_mix_example.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WooooDyy/LLM-Reverse-Curriculum-RL/HEAD/R3_others/data/boardgame/boardgame_mix_example.json -------------------------------------------------------------------------------- /R3_others/data/boardgame/boardgame_test.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WooooDyy/LLM-Reverse-Curriculum-RL/HEAD/R3_others/data/boardgame/boardgame_test.json -------------------------------------------------------------------------------- /R3_others/data/boardgame/boardgame_train_example.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WooooDyy/LLM-Reverse-Curriculum-RL/HEAD/R3_others/data/boardgame/boardgame_train_example.json -------------------------------------------------------------------------------- /R3_others/data/gsm8k_cot/gsm8k_nl_train_example.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WooooDyy/LLM-Reverse-Curriculum-RL/HEAD/R3_others/data/gsm8k_cot/gsm8k_nl_train_example.json -------------------------------------------------------------------------------- /R3_others/data/gsm8k_cot/gsm8k_test.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WooooDyy/LLM-Reverse-Curriculum-RL/HEAD/R3_others/data/gsm8k_cot/gsm8k_test.json -------------------------------------------------------------------------------- /R3_others/data/gsm8k_pot/gsm8k_python_train_example.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WooooDyy/LLM-Reverse-Curriculum-RL/HEAD/R3_others/data/gsm8k_pot/gsm8k_python_train_example.json -------------------------------------------------------------------------------- /R3_others/data/gsm8k_pot/gsm8k_test.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WooooDyy/LLM-Reverse-Curriculum-RL/HEAD/R3_others/data/gsm8k_pot/gsm8k_test.json -------------------------------------------------------------------------------- /R3_others/data/raceHigh/raceHigh_mix_example.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WooooDyy/LLM-Reverse-Curriculum-RL/HEAD/R3_others/data/raceHigh/raceHigh_mix_example.json -------------------------------------------------------------------------------- /R3_others/data/raceHigh/raceHigh_test.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WooooDyy/LLM-Reverse-Curriculum-RL/HEAD/R3_others/data/raceHigh/raceHigh_test.json -------------------------------------------------------------------------------- /R3_others/data/raceHigh/raceHigh_train_example.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WooooDyy/LLM-Reverse-Curriculum-RL/HEAD/R3_others/data/raceHigh/raceHigh_train_example.json -------------------------------------------------------------------------------- /R3_others/data/svamp_cot/svamp_nl_train_example.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WooooDyy/LLM-Reverse-Curriculum-RL/HEAD/R3_others/data/svamp_cot/svamp_nl_train_example.json -------------------------------------------------------------------------------- /R3_others/data/svamp_cot/svamp_test.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WooooDyy/LLM-Reverse-Curriculum-RL/HEAD/R3_others/data/svamp_cot/svamp_test.json -------------------------------------------------------------------------------- /R3_others/dschat/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /R3_others/dschat/rlhf/ppo_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WooooDyy/LLM-Reverse-Curriculum-RL/HEAD/R3_others/dschat/rlhf/ppo_trainer.py -------------------------------------------------------------------------------- /R3_others/dschat/rlhf/rlhf_engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WooooDyy/LLM-Reverse-Curriculum-RL/HEAD/R3_others/dschat/rlhf/rlhf_engine.py -------------------------------------------------------------------------------- /R3_others/dschat/utils/data/data_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WooooDyy/LLM-Reverse-Curriculum-RL/HEAD/R3_others/dschat/utils/data/data_utils.py -------------------------------------------------------------------------------- /R3_others/dschat/utils/data/raw_datasets.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WooooDyy/LLM-Reverse-Curriculum-RL/HEAD/R3_others/dschat/utils/data/raw_datasets.py -------------------------------------------------------------------------------- /R3_others/dschat/utils/ds_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WooooDyy/LLM-Reverse-Curriculum-RL/HEAD/R3_others/dschat/utils/ds_utils.py -------------------------------------------------------------------------------- /R3_others/dschat/utils/model/model_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WooooDyy/LLM-Reverse-Curriculum-RL/HEAD/R3_others/dschat/utils/model/model_utils.py -------------------------------------------------------------------------------- /R3_others/dschat/utils/model/reward_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WooooDyy/LLM-Reverse-Curriculum-RL/HEAD/R3_others/dschat/utils/model/reward_model.py -------------------------------------------------------------------------------- /R3_others/dschat/utils/module/lora.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WooooDyy/LLM-Reverse-Curriculum-RL/HEAD/R3_others/dschat/utils/module/lora.py -------------------------------------------------------------------------------- /R3_others/dschat/utils/perf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WooooDyy/LLM-Reverse-Curriculum-RL/HEAD/R3_others/dschat/utils/perf.py -------------------------------------------------------------------------------- /R3_others/dschat/utils/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WooooDyy/LLM-Reverse-Curriculum-RL/HEAD/R3_others/dschat/utils/utils.py -------------------------------------------------------------------------------- /R3_others/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WooooDyy/LLM-Reverse-Curriculum-RL/HEAD/R3_others/requirements.txt -------------------------------------------------------------------------------- /R3_others/scripts/eval/eval_batch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WooooDyy/LLM-Reverse-Curriculum-RL/HEAD/R3_others/scripts/eval/eval_batch.py -------------------------------------------------------------------------------- /R3_others/scripts/eval/eval_single.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WooooDyy/LLM-Reverse-Curriculum-RL/HEAD/R3_others/scripts/eval/eval_single.sh -------------------------------------------------------------------------------- /R3_others/scripts/eval/llama2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WooooDyy/LLM-Reverse-Curriculum-RL/HEAD/R3_others/scripts/eval/llama2.py -------------------------------------------------------------------------------- /R3_others/scripts/eval/output_mnli.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WooooDyy/LLM-Reverse-Curriculum-RL/HEAD/R3_others/scripts/eval/output_mnli.py -------------------------------------------------------------------------------- /R3_others/scripts/step1_supervised_finetuning/R3_sft.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WooooDyy/LLM-Reverse-Curriculum-RL/HEAD/R3_others/scripts/step1_supervised_finetuning/R3_sft.sh -------------------------------------------------------------------------------- /R3_others/scripts/step1_supervised_finetuning/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WooooDyy/LLM-Reverse-Curriculum-RL/HEAD/R3_others/scripts/step1_supervised_finetuning/main.py -------------------------------------------------------------------------------- /R3_others/scripts/step3_rlhf_finetuning/R3_mix.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WooooDyy/LLM-Reverse-Curriculum-RL/HEAD/R3_others/scripts/step3_rlhf_finetuning/R3_mix.sh -------------------------------------------------------------------------------- /R3_others/scripts/step3_rlhf_finetuning/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WooooDyy/LLM-Reverse-Curriculum-RL/HEAD/R3_others/scripts/step3_rlhf_finetuning/main.py -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WooooDyy/LLM-Reverse-Curriculum-RL/HEAD/README.md -------------------------------------------------------------------------------- /src/figures/main.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WooooDyy/LLM-Reverse-Curriculum-RL/HEAD/src/figures/main.png --------------------------------------------------------------------------------