├── README.md ├── infer.py ├── log ├── log.png └── trainer_state.json ├── recipes ├── DeepSeek-R1-Distill-Qwen-7B │ └── grpo │ │ └── config_demo.yaml ├── Qwen2.5-1.5B-Instruct │ ├── grpo │ │ └── config_demo.yaml │ └── sft │ │ └── config_demo.yaml ├── Qwen2.5-Math-7B │ └── grpo │ │ └── config_simple_rl.yaml ├── README.md └── accelerate_configs │ ├── ddp.yaml │ ├── zero2.yaml │ └── zero3.yaml ├── run_test.sh ├── src └── open_r1 │ ├── __init__.py │ ├── configs.py │ ├── evaluate.py │ ├── generate.py │ ├── grpo.py │ ├── rewards.py │ ├── sft.py │ └── utils │ ├── __init__.py │ ├── callbacks.py │ ├── evaluation.py │ ├── hub.py │ └── upload_details.py └── visualization.py /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mryangkaitong/deepseek-r1-gsm8k/HEAD/README.md -------------------------------------------------------------------------------- /infer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mryangkaitong/deepseek-r1-gsm8k/HEAD/infer.py -------------------------------------------------------------------------------- /log/log.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mryangkaitong/deepseek-r1-gsm8k/HEAD/log/log.png -------------------------------------------------------------------------------- /log/trainer_state.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mryangkaitong/deepseek-r1-gsm8k/HEAD/log/trainer_state.json -------------------------------------------------------------------------------- /recipes/DeepSeek-R1-Distill-Qwen-7B/grpo/config_demo.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mryangkaitong/deepseek-r1-gsm8k/HEAD/recipes/DeepSeek-R1-Distill-Qwen-7B/grpo/config_demo.yaml -------------------------------------------------------------------------------- /recipes/Qwen2.5-1.5B-Instruct/grpo/config_demo.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mryangkaitong/deepseek-r1-gsm8k/HEAD/recipes/Qwen2.5-1.5B-Instruct/grpo/config_demo.yaml -------------------------------------------------------------------------------- /recipes/Qwen2.5-1.5B-Instruct/sft/config_demo.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mryangkaitong/deepseek-r1-gsm8k/HEAD/recipes/Qwen2.5-1.5B-Instruct/sft/config_demo.yaml -------------------------------------------------------------------------------- /recipes/Qwen2.5-Math-7B/grpo/config_simple_rl.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mryangkaitong/deepseek-r1-gsm8k/HEAD/recipes/Qwen2.5-Math-7B/grpo/config_simple_rl.yaml -------------------------------------------------------------------------------- /recipes/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mryangkaitong/deepseek-r1-gsm8k/HEAD/recipes/README.md -------------------------------------------------------------------------------- /recipes/accelerate_configs/ddp.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mryangkaitong/deepseek-r1-gsm8k/HEAD/recipes/accelerate_configs/ddp.yaml -------------------------------------------------------------------------------- /recipes/accelerate_configs/zero2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mryangkaitong/deepseek-r1-gsm8k/HEAD/recipes/accelerate_configs/zero2.yaml -------------------------------------------------------------------------------- /recipes/accelerate_configs/zero3.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mryangkaitong/deepseek-r1-gsm8k/HEAD/recipes/accelerate_configs/zero3.yaml -------------------------------------------------------------------------------- /run_test.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mryangkaitong/deepseek-r1-gsm8k/HEAD/run_test.sh -------------------------------------------------------------------------------- /src/open_r1/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mryangkaitong/deepseek-r1-gsm8k/HEAD/src/open_r1/__init__.py -------------------------------------------------------------------------------- /src/open_r1/configs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mryangkaitong/deepseek-r1-gsm8k/HEAD/src/open_r1/configs.py -------------------------------------------------------------------------------- /src/open_r1/evaluate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mryangkaitong/deepseek-r1-gsm8k/HEAD/src/open_r1/evaluate.py -------------------------------------------------------------------------------- /src/open_r1/generate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mryangkaitong/deepseek-r1-gsm8k/HEAD/src/open_r1/generate.py -------------------------------------------------------------------------------- /src/open_r1/grpo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mryangkaitong/deepseek-r1-gsm8k/HEAD/src/open_r1/grpo.py -------------------------------------------------------------------------------- /src/open_r1/rewards.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mryangkaitong/deepseek-r1-gsm8k/HEAD/src/open_r1/rewards.py -------------------------------------------------------------------------------- /src/open_r1/sft.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mryangkaitong/deepseek-r1-gsm8k/HEAD/src/open_r1/sft.py -------------------------------------------------------------------------------- /src/open_r1/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/open_r1/utils/callbacks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mryangkaitong/deepseek-r1-gsm8k/HEAD/src/open_r1/utils/callbacks.py -------------------------------------------------------------------------------- /src/open_r1/utils/evaluation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mryangkaitong/deepseek-r1-gsm8k/HEAD/src/open_r1/utils/evaluation.py -------------------------------------------------------------------------------- /src/open_r1/utils/hub.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mryangkaitong/deepseek-r1-gsm8k/HEAD/src/open_r1/utils/hub.py -------------------------------------------------------------------------------- /src/open_r1/utils/upload_details.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mryangkaitong/deepseek-r1-gsm8k/HEAD/src/open_r1/utils/upload_details.py -------------------------------------------------------------------------------- /visualization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mryangkaitong/deepseek-r1-gsm8k/HEAD/visualization.py --------------------------------------------------------------------------------