├── .gitignore ├── LICENSE ├── README.md ├── recipes ├── Qwen2.5-1.5B-Instruct │ └── grpo │ │ ├── config_demo.yaml │ │ └── config_enhanced_demo.yaml └── accelerate_configs │ ├── ddp.yaml │ ├── fsdp.yaml │ ├── zero2.yaml │ └── zero3.yaml ├── setup.cfg ├── setup.py ├── src └── r1 │ ├── __init__.py │ ├── callbacks.py │ ├── configs.py │ ├── enhanced_grpo_trainer.py │ ├── grpo.py │ ├── rewards.py │ ├── sft.py │ ├── trainer.py │ └── utils │ ├── __init__.py │ ├── callbacks.py │ ├── data.py │ ├── formatting.py │ ├── import_utils.py │ ├── logging.py │ ├── model_utils.py │ └── wandb_logging.py ├── test_enhanced_features.py └── train_enhanced_demo.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alphadl/R1/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alphadl/R1/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alphadl/R1/HEAD/README.md -------------------------------------------------------------------------------- /recipes/Qwen2.5-1.5B-Instruct/grpo/config_demo.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alphadl/R1/HEAD/recipes/Qwen2.5-1.5B-Instruct/grpo/config_demo.yaml -------------------------------------------------------------------------------- /recipes/Qwen2.5-1.5B-Instruct/grpo/config_enhanced_demo.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alphadl/R1/HEAD/recipes/Qwen2.5-1.5B-Instruct/grpo/config_enhanced_demo.yaml -------------------------------------------------------------------------------- /recipes/accelerate_configs/ddp.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alphadl/R1/HEAD/recipes/accelerate_configs/ddp.yaml -------------------------------------------------------------------------------- /recipes/accelerate_configs/fsdp.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alphadl/R1/HEAD/recipes/accelerate_configs/fsdp.yaml -------------------------------------------------------------------------------- /recipes/accelerate_configs/zero2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alphadl/R1/HEAD/recipes/accelerate_configs/zero2.yaml -------------------------------------------------------------------------------- /recipes/accelerate_configs/zero3.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alphadl/R1/HEAD/recipes/accelerate_configs/zero3.yaml -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alphadl/R1/HEAD/setup.cfg -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alphadl/R1/HEAD/setup.py -------------------------------------------------------------------------------- /src/r1/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alphadl/R1/HEAD/src/r1/__init__.py -------------------------------------------------------------------------------- /src/r1/callbacks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alphadl/R1/HEAD/src/r1/callbacks.py -------------------------------------------------------------------------------- /src/r1/configs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alphadl/R1/HEAD/src/r1/configs.py -------------------------------------------------------------------------------- /src/r1/enhanced_grpo_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alphadl/R1/HEAD/src/r1/enhanced_grpo_trainer.py -------------------------------------------------------------------------------- /src/r1/grpo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alphadl/R1/HEAD/src/r1/grpo.py -------------------------------------------------------------------------------- /src/r1/rewards.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alphadl/R1/HEAD/src/r1/rewards.py -------------------------------------------------------------------------------- /src/r1/sft.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alphadl/R1/HEAD/src/r1/sft.py -------------------------------------------------------------------------------- /src/r1/trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alphadl/R1/HEAD/src/r1/trainer.py -------------------------------------------------------------------------------- /src/r1/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alphadl/R1/HEAD/src/r1/utils/__init__.py -------------------------------------------------------------------------------- /src/r1/utils/callbacks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alphadl/R1/HEAD/src/r1/utils/callbacks.py -------------------------------------------------------------------------------- /src/r1/utils/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alphadl/R1/HEAD/src/r1/utils/data.py -------------------------------------------------------------------------------- /src/r1/utils/formatting.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alphadl/R1/HEAD/src/r1/utils/formatting.py -------------------------------------------------------------------------------- /src/r1/utils/import_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alphadl/R1/HEAD/src/r1/utils/import_utils.py -------------------------------------------------------------------------------- /src/r1/utils/logging.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alphadl/R1/HEAD/src/r1/utils/logging.py -------------------------------------------------------------------------------- /src/r1/utils/model_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alphadl/R1/HEAD/src/r1/utils/model_utils.py -------------------------------------------------------------------------------- /src/r1/utils/wandb_logging.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alphadl/R1/HEAD/src/r1/utils/wandb_logging.py -------------------------------------------------------------------------------- /test_enhanced_features.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alphadl/R1/HEAD/test_enhanced_features.py -------------------------------------------------------------------------------- /train_enhanced_demo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alphadl/R1/HEAD/train_enhanced_demo.py --------------------------------------------------------------------------------