├── .gitignore ├── LICENSE ├── README.md ├── examples └── deepscaler │ ├── prepare_dataset.py │ ├── reasoning_eval.py │ ├── train_grpo_r1_distill_1b_8k.bash │ └── train_grpo_r1_distill_1b_8k.slurm ├── main_ppo.py ├── nanoverl ├── __init__.py ├── config │ └── ppo_trainer.yaml ├── data │ └── __init__.py └── rewards │ ├── __init__.py │ ├── deepscaler_rule_reward.py │ └── reward_types.py └── pyproject.toml /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koalazf99/nanoverl/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koalazf99/nanoverl/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koalazf99/nanoverl/HEAD/README.md -------------------------------------------------------------------------------- /examples/deepscaler/prepare_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koalazf99/nanoverl/HEAD/examples/deepscaler/prepare_dataset.py -------------------------------------------------------------------------------- /examples/deepscaler/reasoning_eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koalazf99/nanoverl/HEAD/examples/deepscaler/reasoning_eval.py -------------------------------------------------------------------------------- /examples/deepscaler/train_grpo_r1_distill_1b_8k.bash: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koalazf99/nanoverl/HEAD/examples/deepscaler/train_grpo_r1_distill_1b_8k.bash -------------------------------------------------------------------------------- /examples/deepscaler/train_grpo_r1_distill_1b_8k.slurm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koalazf99/nanoverl/HEAD/examples/deepscaler/train_grpo_r1_distill_1b_8k.slurm -------------------------------------------------------------------------------- /main_ppo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koalazf99/nanoverl/HEAD/main_ppo.py -------------------------------------------------------------------------------- /nanoverl/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /nanoverl/config/ppo_trainer.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koalazf99/nanoverl/HEAD/nanoverl/config/ppo_trainer.yaml -------------------------------------------------------------------------------- /nanoverl/data/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /nanoverl/rewards/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koalazf99/nanoverl/HEAD/nanoverl/rewards/__init__.py -------------------------------------------------------------------------------- /nanoverl/rewards/deepscaler_rule_reward.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koalazf99/nanoverl/HEAD/nanoverl/rewards/deepscaler_rule_reward.py -------------------------------------------------------------------------------- /nanoverl/rewards/reward_types.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koalazf99/nanoverl/HEAD/nanoverl/rewards/reward_types.py -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koalazf99/nanoverl/HEAD/pyproject.toml --------------------------------------------------------------------------------