├── .dockerignore ├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.md │ └── feature_request.md ├── PULL_REQUEST_TEMPLATE.md ├── labeler.yml └── workflows │ ├── _run_test.yml │ ├── build-test-publish-wheel.yml │ ├── cherry-pick-release-commit.yml │ ├── cicd-main.yml │ ├── copyright-check.yml │ ├── labeler.yaml │ ├── release-freeze.yml │ ├── release.yaml │ └── semantic-pull-request.yml ├── .gitignore ├── .gitmodules ├── .pre-commit-config.yaml ├── .python-version ├── 3rdparty ├── Megatron-LM-workspace │ ├── is_megatron_installed.py │ ├── pyproject.toml │ └── setup.py └── NeMo-workspace │ ├── is_nemo_installed.py │ ├── pyproject.toml │ └── setup.py ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── codecov.yml ├── docker ├── Dockerfile └── README.md ├── docs ├── adding-new-models.md ├── assets │ ├── actor-wg-worker-vc.png │ ├── aime_training_progress.png │ ├── deepscaler_training_progress.png │ ├── ray-debug-step1.png │ ├── ray-debug-step2.png │ ├── ray-debug-step3.png │ ├── ray-debug-step4.png │ ├── sft-openmathinstruct2-train-loss.png │ ├── sft-openmathinstruct2-train1M-loss.png │ └── val-log.png ├── autodoc2_docstrings_parser.py ├── cluster.md ├── conf.py ├── debugging.md ├── design-docs │ ├── chat-datasets.md │ ├── checkpointing.md │ ├── design-and-philosophy.md │ ├── fsdp2-parallel-plan.md │ ├── generation.md │ ├── logger.md │ ├── loss-functions.md │ ├── padding.md │ └── uv.md ├── docker.md ├── documentation.md ├── guides │ ├── dpo.md │ ├── eval.md │ ├── grpo-deepscaler.md │ ├── grpo.md │ ├── sft-openmathinstruct2.md │ └── sft.md ├── helpers.py ├── index.md ├── local-workstation.md ├── model-quirks.md ├── project.json ├── testing.md └── versions1.json ├── examples ├── __init__.py ├── configs │ ├── dpo.yaml │ ├── eval.yaml │ ├── grpo-deepscaler-1.5b-16K.yaml │ ├── grpo-deepscaler-1.5b-8K.yaml │ ├── grpo_deepscaler-1.5b-24K.yaml │ ├── grpo_math_1B.yaml │ ├── grpo_math_8B.yaml │ ├── grpo_sliding_puzzle.yaml │ ├── recipes │ │ └── llm │ │ │ ├── dpo-llama3.1-8b-instruct-4n8g-fsdp1-quick.v2.yaml │ │ │ ├── dpo-llama3.1-8b-instruct-4n8g-fsdp2tp1.v2.yaml │ │ │ ├── dpo-llama3.1-8b-instruct-4n8g-fsdp2tp2-quick.v2.yaml │ │ │ ├── dpo-llama3.2-1b-instruct-1n8g-fsdp2tp1.v2.yaml │ │ │ ├── grpo-gemma3-1b-it-1n8g-fsdp2tp1.yaml │ │ │ ├── grpo-gemma3-27b-it-16n8g-fsdp2tp8sp-actckpt-long.yaml │ │ │ ├── grpo-llama3.1-8b-instruct-4n8g-fsdp2tp1-long.v3.yaml │ │ │ ├── grpo-llama3.2-1b-instruct-1n8g-fsdp2tp1.v3.yaml │ │ │ ├── grpo-qwen2.5-32b-16n8g-fsdp2tp8sp-actckpt-long.v3.yaml │ │ │ ├── grpo-qwen2.5-32b-16n8g-fsdp2tp8sp-actckpt.v3.yaml │ │ │ ├── grpo-qwen2.5-7b-instruct-4n8g-fsdp1.v3.yaml │ │ │ ├── grpo-qwen2.5-7b-instruct-4n8g-fsdp2tp4sp.v3.yaml │ │ │ ├── grpo-qwen2.5-math-1.5b-instruct-1n8g-fsdp2tp1.v3.yaml │ │ │ ├── sft-llama3.1-8b-instruct-1n8g-fsdp1.v2.yaml │ │ │ ├── sft-llama3.1-8b-instruct-1n8g-fsdp2tp1-long.v2.yaml │ │ │ ├── sft-llama3.1-8b-instruct-1n8g-fsdp2tp2sp.v2.yaml │ │ │ ├── sft-llama3.2-1b-1n8g-fsdp2tp1.v2.yaml │ │ │ └── sft-qwen2.5-32b-4n8g-fsdp2tp8sp-actckpt.v2.yaml │ ├── sft.yaml │ └── sft_openmathinstruct2.yaml ├── convert_dcp_to_hf.py ├── custom_parallel.py ├── prompts │ ├── cot.txt │ └── math.txt ├── run_dpo.py ├── run_eval.py ├── run_grpo_math.py ├── run_grpo_sliding_puzzle.py └── run_sft.py ├── mypy.ini ├── nemo_rl ├── __init__.py ├── algorithms │ ├── __init__.py │ ├── dpo.py │ ├── grpo.py │ ├── interfaces.py │ ├── loss_functions.py │ ├── sft.py │ └── utils.py ├── converters │ ├── __init__.py │ ├── huggingface │ │ ├── __init__.py │ │ └── vllm_export.py │ └── megatron │ │ ├── __init__.py │ │ └── vllm_export.py ├── data │ ├── __init__.py │ ├── datasets.py │ ├── hf_datasets │ │ ├── __init__.py │ │ ├── chat_templates.py │ │ ├── deepscaler.py │ │ ├── dpo.py │ │ ├── helpsteer3.py │ │ ├── oasst.py │ │ ├── openmathinstruct2.py │ │ ├── prompt_response_dataset.py │ │ └── squad.py │ ├── interfaces.py │ └── llm_message_utils.py ├── distributed │ ├── __init__.py │ ├── batched_data_dict.py │ ├── collectives.py │ ├── model_utils.py │ ├── named_sharding.py │ ├── ray_actor_environment_registry.py │ ├── virtual_cluster.py │ └── worker_groups.py ├── environments │ ├── __init__.py │ ├── games │ │ └── sliding_puzzle.py │ ├── interfaces.py │ ├── math_environment.py │ ├── metrics.py │ └── utils.py ├── evals │ ├── __init__.py │ └── eval.py ├── experience │ ├── __init__.py │ └── rollouts.py ├── metrics │ ├── __init__.py │ └── metrics_utils.py ├── models │ ├── __init__.py │ ├── dtensor │ │ ├── __init__.py │ │ └── parallelize.py │ ├── generation │ │ ├── __init__.py │ │ ├── interfaces.py │ │ ├── vllm.py │ │ └── vllm_backend.py │ ├── huggingface │ │ ├── __init__.py │ │ └── common.py │ ├── megatron │ │ ├── __init__.py │ │ └── common.py │ └── policy │ │ ├── __init__.py │ │ ├── dtensor_policy_worker.py │ │ ├── fsdp1_policy_worker.py │ │ ├── hf_policy.py │ │ ├── interfaces.py │ │ └── utils.py ├── package_info.py └── utils │ ├── __init__.py │ ├── checkpoint.py │ ├── config.py │ ├── logger.py │ ├── native_checkpoint.py │ ├── nvml.py │ ├── timer.py │ └── venvs.py ├── pyproject.toml ├── ray.sub ├── tests ├── README.md ├── __init__.py ├── check_metrics.py ├── functional │ ├── dpo.sh │ ├── eval.sh │ ├── grpo.sh │ ├── grpo_multiturn.sh │ ├── sft.sh │ └── test_mcore_extra_installed_correctly.sh ├── json_dump_tb_logs.py ├── run_functional_in_docker.sh ├── run_unit.sh ├── run_unit_in_docker.sh ├── test_suites │ ├── README.md │ ├── llm │ │ ├── common.env │ │ ├── dpo-llama3.1-8b-instruct-4n8g-fsdp1-quick.v2.sh │ │ ├── dpo-llama3.1-8b-instruct-4n8g-fsdp2tp1.v2.sh │ │ ├── dpo-llama3.1-8b-instruct-4n8g-fsdp2tp2-quick.v2.sh │ │ ├── dpo-llama3.2-1b-instruct-1n8g-fsdp2tp1.v2.sh │ │ ├── grpo-gemma3-1b-it-1n8g-fsdp2tp1.sh │ │ ├── grpo-gemma3-27b-it-16n8g-fsdp2tp8sp-actckpt-long.sh │ │ ├── grpo-llama3.1-8b-instruct-4n8g-fsdp2tp1-long.v3.sh │ │ ├── grpo-llama3.2-1b-instruct-1n8g-fsdp2tp1.v3.sh │ │ ├── grpo-qwen2.5-32b-16n8g-fsdp2tp8sp-actckpt-long.v3.sh │ │ ├── grpo-qwen2.5-32b-16n8g-fsdp2tp8sp-actckpt.v3.sh │ │ ├── grpo-qwen2.5-7b-instruct-4n8g-fsdp1.v3.sh │ │ ├── grpo-qwen2.5-7b-instruct-4n8g-fsdp2tp4sp.v3.sh │ │ ├── grpo-qwen2.5-math-1.5b-instruct-1n8g-fsdp2tp1.v3.sh │ │ ├── performance │ │ │ └── .gitkeep │ │ ├── sft-llama3.1-8b-instruct-1n8g-fsdp1.v2.sh │ │ ├── sft-llama3.1-8b-instruct-1n8g-fsdp2tp1-long.v2.sh │ │ ├── sft-llama3.1-8b-instruct-1n8g-fsdp2tp2sp.v2.sh │ │ ├── sft-llama3.2-1b-1n8g-fsdp2tp1.v2.sh │ │ └── sft-qwen2.5-32b-4n8g-fsdp2tp8sp-actckpt.v2.sh │ ├── nightly.txt │ ├── nightly_performance.txt │ ├── release.txt │ └── release_performance.txt └── unit │ ├── __init__.py │ ├── algorithms │ ├── test_dpo.py │ ├── test_grpo.py │ ├── test_loss_functions.py │ ├── test_sft.py │ └── test_utils.py │ ├── conftest.py │ ├── data │ ├── hf_datasets │ │ ├── test_dpo_dataset.py │ │ ├── test_helpsteer.py │ │ ├── test_prompt_response.py │ │ └── test_squad.py │ ├── test_data_processor.py │ ├── test_datasets.py │ └── test_llm_message_utils.py │ ├── distributed │ ├── __init__.py │ ├── test_batched_data_dict.py │ ├── test_cluster_visualization.py │ ├── test_collectives.py │ ├── test_named_sharding.py │ ├── test_virtual_cluster.py │ └── test_worker_groups.py │ ├── environments │ └── test_math_environment.py │ ├── experience │ └── test_rollouts.py │ ├── models │ ├── generation │ │ ├── test_vllm_generation.py │ │ └── test_vllm_large_model.py │ ├── huggingface │ │ └── test_common.py │ └── policy │ │ ├── test_dtensor_worker.py │ │ └── test_fsdp1_worker.py │ ├── test_envs.py │ ├── test_meta.py │ ├── test_recipes_and_test_suites.py │ ├── test_utils.py │ └── utils │ ├── test_checkpoint.py │ ├── test_config.py │ ├── test_logger.py │ ├── test_native_checkpoint.py │ ├── test_pynvml.py │ ├── test_timer.py │ └── test_venvs.py ├── tools ├── autoformat.sh ├── code_snapshot.sh ├── copyright.sh ├── find_available_port_ranges.py ├── launch └── package_release_runs.sh └── uv.lock /.dockerignore: -------------------------------------------------------------------------------- 1 | # Adding to .gitignore helps reduce the size of your working_dir 2 | 3 | .git 4 | *.out 5 | *.log 6 | *.tar 7 | *.tar.gz 8 | .venv 9 | venv 10 | venvs 11 | __pycache__/ 12 | _build/ 13 | build/ 14 | apidocs/ 15 | dist/ 16 | *.egg-info/ 17 | *.vscode/ 18 | release_run* 19 | ckpts/ 20 | 21 | # Test 22 | coverage.json 23 | .coverage* 24 | test_assets/ 25 | 26 | # Cache 27 | uv_cache/ 28 | hf_home/ 29 | hf_datasets_cache/ 30 | *logs/ 31 | datasets/ 32 | wandb/ 33 | checkpoints/ 34 | results/ 35 | code_snapshots/ 36 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: bug 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | 12 | A clear and concise description of what the bug is. 13 | 14 | **Steps/Code to reproduce bug** 15 | 16 | Please list *minimal* steps or code snippet for us to be able to reproduce the bug. 17 | 18 | A helpful guide on on how to craft a minimal bug report http://matthewrocklin.com/blog/work/2018/02/28/minimal-bug-reports. 19 | 20 | 21 | **Expected behavior** 22 | 23 | A clear and concise description of what you expected to happen. 24 | 25 | **Environment overview (please complete the following information)** 26 | 27 | - Environment location: [Bare-metal, Docker, Cloud(specify cloud provider - AWS, Azure, GCP, Collab)] 28 | - Method of install: [pip install or from source]. Please specify exact commands you used to install. 29 | - If method of install is [Docker], provide `docker pull` & `docker run` commands used 30 | 31 | **Environment details** 32 | 33 | If NVIDIA docker image is used you don't need to specify these. 34 | Otherwise, please provide: 35 | - OS version 36 | - PyTorch version 37 | - Python version 38 | 39 | **Additional context** 40 | 41 | Add any other context about the problem here. 42 | Example: GPU model 43 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: feature request 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | 12 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 13 | 14 | **Describe the solution you'd like** 15 | 16 | A clear and concise description of what you want to happen. 17 | Provide a code snippet on how new APIs/changes would be used by others. 18 | 19 | **Describe alternatives you've considered** 20 | 21 | A clear and concise description of any alternative solutions or features you've considered. 22 | 23 | **Additional context** 24 | 25 | Add any other context or screenshots about the feature request here -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | # What does this PR do ? 2 | 3 | **Add a one line overview of what this PR aims to accomplish.** 4 | 5 | # Issues 6 | List issues that this PR closes ([syntax](https://docs.github.com/en/issues/tracking-your-work-with-issues/using-issues/linking-a-pull-request-to-an-issue#linking-a-pull-request-to-an-issue-using-a-keyword)): 7 | 8 | 9 | # Usage 10 | * **You can potentially add a usage example below** 11 | 12 | ```python 13 | # Add a code snippet demonstrating how to use this 14 | ``` 15 | 16 | # Before your PR is "Ready for review" 17 | **Pre checks**: 18 | - [ ] Make sure you read and followed [Contributor guidelines](/NVIDIA/NeMo-RL/blob/main/CONTRIBUTING.md) 19 | - [ ] Did you write any new necessary tests? 20 | - [ ] Did you run the unit tests and functional tests locally? Visit our [Testing Guide](/NVIDIA/NeMo-RL/blob/main/docs/testing.md) for how to run tests 21 | - [ ] Did you add or update any necessary documentation? Visit our [Document Development Guide](/NVIDIA/NeMo-RL/blob/main/docs/documentation.md) for how to write, build and test the docs. 22 | 23 | # Additional Information 24 | * ... 25 | -------------------------------------------------------------------------------- /.github/labeler.yml: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | documentation: 15 | - docs/** 16 | 17 | CI: 18 | - .github/**/* 19 | -------------------------------------------------------------------------------- /.github/workflows/build-test-publish-wheel.yml: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | name: Build, test, and publish a PyPi wheel (to testpypi). 16 | 17 | on: 18 | push: 19 | branches: 20 | - main 21 | - "r**" 22 | 23 | defaults: 24 | run: 25 | shell: bash -x -e -u -o pipefail {0} 26 | 27 | jobs: 28 | build-test-publish-wheel: 29 | uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_build_test_publish_wheel.yml@v0.33.0 30 | with: 31 | dry-run: true 32 | python-package: nemo_rl 33 | packaging: uv 34 | secrets: 35 | TWINE_USERNAME: ${{ secrets.TWINE_USERNAME }} 36 | TWINE_PASSWORD: ${{ secrets.TWINE_PASSWORD }} 37 | SLACK_WEBHOOK: ${{ secrets.SLACK_RELEASE_ENDPOINT }} 38 | SLACK_WEBHOOK_ADMIN: ${{ secrets.SLACK_WEBHOOK_ADMIN }} 39 | -------------------------------------------------------------------------------- /.github/workflows/cherry-pick-release-commit.yml: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | name: Create PR to main with cherry-pick from release 15 | 16 | on: 17 | push: 18 | branches: 19 | - main 20 | 21 | jobs: 22 | cherry-pick: 23 | uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_cherry_pick.yml@v0.31.0 24 | secrets: 25 | PAT: ${{ secrets.PAT }} 26 | SLACK_WEBHOOK_ADMIN: ${{ secrets.SLACK_WEBHOOK_ADMIN }} 27 | SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} 28 | -------------------------------------------------------------------------------- /.github/workflows/copyright-check.yml: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | name: Copyright check 15 | 16 | on: 17 | pull_request: 18 | 19 | jobs: 20 | copyright-check: 21 | uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_copyright_check.yml@v0.2.0 22 | -------------------------------------------------------------------------------- /.github/workflows/labeler.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | name: "Pull Request Labeler" 15 | on: 16 | - pull_request_target 17 | 18 | jobs: 19 | triage: 20 | permissions: 21 | contents: read 22 | pull-requests: write 23 | runs-on: ubuntu-latest 24 | steps: 25 | - uses: actions/labeler@v4 26 | with: 27 | repo-token: "${{ secrets.GITHUB_TOKEN }}" 28 | -------------------------------------------------------------------------------- /.github/workflows/release-freeze.yml: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | name: "Code freeze" 15 | 16 | on: 17 | workflow_dispatch: 18 | inputs: 19 | release-type: 20 | type: choice 21 | description: Type of release 22 | options: 23 | - major 24 | - minor 25 | freeze-commit: 26 | type: string 27 | description: Commit SHA to use for cut-off 28 | required: false 29 | default: main 30 | dry-run: 31 | type: boolean 32 | description: Dry-run of code-freeze 33 | required: false 34 | default: true 35 | jobs: 36 | code-freeze: 37 | uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_code_freeze.yml@v0.22.5 38 | with: 39 | library-name: NeMo-RL 40 | python-package: nemo_rl 41 | release-type: ${{ inputs.release-type }} 42 | freeze-commit: ${{ inputs.freeze-commit }} 43 | dry-run: ${{ inputs.dry-run }} 44 | secrets: 45 | SLACK_WEBHOOK: ${{ secrets.SLACK_RELEASE_ENDPOINT }} 46 | SLACK_WEBHOOK_ADMIN: ${{ secrets.SLACK_WEBHOOK_ADMIN }} 47 | -------------------------------------------------------------------------------- /.github/workflows/release.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | name: "Release NeMo-RL" 15 | 16 | on: 17 | workflow_dispatch: 18 | inputs: 19 | release-ref: 20 | description: Ref (SHA or branch name) to release 21 | required: true 22 | type: string 23 | dry-run: 24 | description: Do not publish a wheel and GitHub release. 25 | required: true 26 | default: true 27 | type: boolean 28 | create-gh-release: 29 | description: Create a GitHub release 30 | required: true 31 | default: true 32 | type: boolean 33 | version-bump-branch: 34 | description: Branch for version bump 35 | required: true 36 | type: string 37 | 38 | jobs: 39 | release: 40 | uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_release_library.yml@v0.34.0 41 | with: 42 | release-ref: ${{ inputs.release-ref }} 43 | python-package: nemo_rl 44 | library-name: NeMo-RL 45 | dry-run: ${{ inputs.dry-run }} 46 | version-bump-branch: ${{ inputs.version-bump-branch }} 47 | create-gh-release: ${{ inputs.create-gh-release }} 48 | packaging: uv 49 | secrets: 50 | TWINE_USERNAME: ${{ secrets.TWINE_USERNAME }} 51 | TWINE_PASSWORD: ${{ secrets.TWINE_PASSWORD }} 52 | SLACK_WEBHOOK_ADMIN: ${{ secrets.SLACK_WEBHOOK_ADMIN }} 53 | SLACK_WEBHOOK: ${{ secrets.SLACK_RELEASE_ENDPOINT }} 54 | PAT: ${{ secrets.PAT }} 55 | -------------------------------------------------------------------------------- /.github/workflows/semantic-pull-request.yml: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | name: Validate PR title 15 | 16 | on: 17 | pull_request_target: 18 | types: 19 | - opened 20 | - edited 21 | - synchronize 22 | - reopened 23 | pull_request: 24 | types: 25 | - opened 26 | - edited 27 | - synchronize 28 | - reopened 29 | 30 | defaults: 31 | run: 32 | shell: bash -x -e -u -o pipefail {0} 33 | 34 | permissions: 35 | pull-requests: read 36 | 37 | jobs: 38 | semantic-pull-request: 39 | uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_semantic_pull_request.yml@v0.31.0 40 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Adding to .gitignore helps reduce the size of your working_dir 2 | 3 | .git 4 | *.out 5 | *.log 6 | *.tar 7 | *.tar.gz 8 | .venv 9 | venv 10 | venvs 11 | __pycache__/ 12 | _build/ 13 | build/ 14 | apidocs/ 15 | dist/ 16 | *.egg-info/ 17 | *.vscode/ 18 | release_run* 19 | ckpts/ 20 | 21 | # Test 22 | coverage.json 23 | .coverage* 24 | test_assets/ 25 | 26 | # Cache 27 | uv_cache/ 28 | hf_home/ 29 | hf_datasets_cache/ 30 | *logs/ 31 | datasets/ 32 | docker/* 33 | !docker/Dockerfile 34 | !docker/README.md 35 | wandb/ 36 | checkpoints/ 37 | results/ 38 | code_snapshots/ 39 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "3rdparty/NeMo"] 2 | path = 3rdparty/NeMo-workspace/NeMo 3 | url = https://github.com/NVIDIA/NeMo.git 4 | branch = terryk/hemil/automodel-custom-loop-with-sahil-patch 5 | shallow = true 6 | [submodule "3rdparty/Megatron-LM"] 7 | path = 3rdparty/Megatron-LM-workspace/Megatron-LM 8 | url = https://github.com/terrykong/Megatron-LM.git 9 | branch = terryk/main-2025-05-01-with-sahil-patch 10 | shallow = true 11 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/pre-commit/pre-commit-hooks 3 | rev: v4.4.0 4 | hooks: 5 | - id: end-of-file-fixer 6 | # only include python files 7 | files: \.py$ 8 | - id: trailing-whitespace 9 | # only include python files 10 | files: \.py$ 11 | 12 | - repo: https://github.com/astral-sh/ruff-pre-commit 13 | rev: "v0.9.9" # Use the appropriate version 14 | hooks: 15 | - id: ruff 16 | args: ["--fix"] 17 | - id: ruff 18 | args: ["check", "--select", "I", "--fix"] 19 | - id: ruff-format 20 | 21 | - repo: local 22 | hooks: 23 | - id: no-underscore-md 24 | name: "Disallow '_' in Markdown filenames" 25 | language: system 26 | entry: | 27 | bash -c ' 28 | # Report the offending files 29 | echo "[pre-commit] ERROR: Found Markdown files with underscores:" >&2 30 | for file in "$@"; do 31 | echo " - $file (use hyphens instead)" >&2 32 | done 33 | exit 1 34 | ' 35 | files: '.*\/[^\/]*_[^\/]*\.md$' 36 | exclude: '^\.github/' 37 | types: [file] 38 | -------------------------------------------------------------------------------- /.python-version: -------------------------------------------------------------------------------- 1 | 3.12 2 | -------------------------------------------------------------------------------- /3rdparty/Megatron-LM-workspace/is_megatron_installed.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | try: 15 | from megatron.core import parallel_state # noqa: F401 16 | 17 | INSTALLED = True 18 | except ImportError: 19 | INSTALLED = False 20 | 21 | print(f"Megatron {INSTALLED=}") 22 | -------------------------------------------------------------------------------- /3rdparty/Megatron-LM-workspace/pyproject.toml: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. 2 | 3 | [build-system] 4 | requires = [ 5 | "setuptools", 6 | "pybind11", 7 | ] 8 | 9 | [project] 10 | name = "megatron-core" 11 | dynamic = ["dependencies", "version"] 12 | description = "Megatron Core - a library for efficient and scalable training of transformer based models" 13 | authors = [{ name = "NVIDIA", email = "nemo-toolkit@nvidia.com" }] 14 | maintainers = [{ name = "NVIDIA", email = "nemo-toolkit@nvidia.com" }] -------------------------------------------------------------------------------- /3rdparty/NeMo-workspace/is_nemo_installed.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import contextlib 15 | import io 16 | 17 | try: 18 | with ( 19 | contextlib.redirect_stdout(io.StringIO()), 20 | contextlib.redirect_stderr(io.StringIO()), 21 | ): 22 | # Silence the logging because NeMo is very verbose 23 | from nemo.tron.init import initialize_megatron # noqa: F401 24 | INSTALLED = True 25 | except ImportError: 26 | INSTALLED = False 27 | print(f"NeMo {INSTALLED=}") 28 | -------------------------------------------------------------------------------- /3rdparty/NeMo-workspace/pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools>=61.0", "wheel"] 3 | 4 | [project] 5 | name = "nemo-tron" 6 | dynamic = ["dependencies", "version"] 7 | authors = [{ name = "NVIDIA", email = "nemo-toolkit@nvidia.com" }] 8 | description = "Standalone packaging for the NeMo Tron sub-module." 9 | requires-python = ">=3.10" 10 | # Dependencies will be managed in setup.py 11 | -------------------------------------------------------------------------------- /3rdparty/NeMo-workspace/setup.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import os 15 | 16 | import setuptools 17 | 18 | # --- Configuration Start --- 19 | final_packages = [] 20 | final_package_dir = {} 21 | 22 | # --- nemo package conditional section --- 23 | nemo_package_source_dir = "NeMo/nemo" 24 | nemo_package_name = "nemo" 25 | 26 | if os.path.exists(nemo_package_source_dir): 27 | final_packages.append(nemo_package_name) 28 | final_package_dir[nemo_package_name] = nemo_package_source_dir 29 | # --- End of nemo package conditional section --- 30 | 31 | setuptools.setup( 32 | name="nemo-tron", # Must match [project].name in pyproject.toml 33 | version="0.0.0", # Must match [project].version in pyproject.toml 34 | description="Standalone packaging for the NeMo Tron sub-module.", # Can be sourced from pyproject.toml too 35 | author="NVIDIA", 36 | author_email="nemo-toolkit@nvidia.com", 37 | packages=final_packages, 38 | package_dir=final_package_dir, 39 | py_modules=["is_nemo_installed"], 40 | install_requires=[ 41 | "lightning", 42 | "wget", 43 | "onnx", 44 | "fiddle", 45 | "cloudpickle", 46 | "braceexpand", 47 | "webdataset", 48 | "h5py", 49 | "ijson", 50 | "matplotlib", 51 | "scikit-learn", 52 | "nemo-run", 53 | "hatchling", 54 | ], 55 | ) 56 | -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- 1 | comment: false 2 | coverage: 3 | status: 4 | patch: false 5 | project: false 6 | fixes: 7 | - "/opt/nemo-rl/::" 8 | -------------------------------------------------------------------------------- /docker/Dockerfile: -------------------------------------------------------------------------------- 1 | ARG BASE_IMAGE=nvcr.io/nvidia/cuda:12.8.1-cudnn-devel-ubuntu24.04 2 | FROM ${BASE_IMAGE} AS base 3 | 4 | # It is more convenient for users to run as root 5 | USER root 6 | 7 | RUN apt-get update && apt-get install -y --no-install-recommends \ 8 | jq \ 9 | curl \ 10 | git \ 11 | && rm -rf /var/lib/apt/lists/* && \ 12 | apt-get clean 13 | 14 | # Install uv and python 15 | ARG UV_VERSION=0.7.2 16 | ARG PYTHON_VERSION=3.12 17 | ENV PATH="/root/.local/bin:$PATH" 18 | RUN curl -LsSf https://astral.sh/uv/${UV_VERSION}/install.sh | sh && \ 19 | uv python install ${PYTHON_VERSION} 20 | 21 | # Disable usage stats by default for users who are sensitive to sharing usage. 22 | # Users are encouraged to enable if the wish. 23 | ENV RAY_USAGE_STATS_ENABLED=0 24 | 25 | FROM base AS hermetic 26 | 27 | WORKDIR /opt/nemo-rl 28 | 29 | # First copy only the dependency files 30 | COPY pyproject.toml uv.lock ./ 31 | COPY --link 3rdparty/ ./3rdparty/ 32 | 33 | # Variables to control the build of TE. If there are issues with parallelization, consider 34 | # setting these to 1. 35 | ARG MAX_JOBS 36 | ARG NVTE_BUILD_THREADS_PER_JOB 37 | 38 | ENV UV_PROJECT_ENVIRONMENT=/opt/nemo_rl_venv 39 | ENV UV_LINK_MODE=copy 40 | 41 | # Create and activate virtual environment 42 | RUN <<"EOF" bash -exu 43 | uv venv ${UV_PROJECT_ENVIRONMENT} 44 | # uv sync has a more reliable resolver than simple uv pip install which can fail 45 | 46 | # Sync each training + inference backend one at a time (since they may conflict) 47 | # to warm the uv cache, then at the end just sync the default dependencies. 48 | # Do everything in one layer to prevent large layers. 49 | 50 | # The venv is symlinked to avoid bloating the layer size 51 | uv sync --link-mode symlink --locked --extra vllm --no-install-project 52 | uv sync --link-mode symlink --locked --extra mcore --no-install-project --no-build-isolation 53 | uv sync --link-mode symlink --locked --all-groups --no-install-project 54 | EOF 55 | 56 | ENV PATH="/opt/nemo_rl_venv/bin:$PATH" 57 | 58 | FROM hermetic AS release 59 | 60 | ARG NEMO_RL_COMMIT 61 | ARG NVIDIA_BUILD_ID 62 | ARG NVIDIA_BUILD_REF 63 | ENV NEMO_RL_COMMIT=${NEMO_RL_COMMIT:-} 64 | ENV NVIDIA_BUILD_ID=${NVIDIA_BUILD_ID:-} 65 | ENV NVIDIA_BUILD_REF=${NVIDIA_BUILD_REF:-} 66 | LABEL com.nvidia.build.id="${NVIDIA_BUILD_ID}" 67 | LABEL com.nvidia.build.ref="${NVIDIA_BUILD_REF}" 68 | 69 | COPY . /opt/nemo-rl 70 | 71 | # Make hermetic the default target instead of release since that's the recommended container 72 | FROM hermetic 73 | -------------------------------------------------------------------------------- /docker/README.md: -------------------------------------------------------------------------------- 1 | # Building the Docker Container 2 | NOTE: *We use `docker buildx` instead of `docker build` for these containers* 3 | 4 | This directory contains the `Dockerfile` for NeMo-RL Docker images. 5 | You can build two types of images: 6 | - A **base image**: A minimal image where Python dependencies can be specified at runtime. 7 | - A **hermetic image**: An image that includes default dependencies for offline use. 8 | 9 | 10 | For detailed instructions on building these images, please see [docs/docker.md](../docs/docker.md). -------------------------------------------------------------------------------- /docs/assets/actor-wg-worker-vc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/NeMo-RL/99ba9a130e72cbf87a3e20acf43bc01a47adc8ee/docs/assets/actor-wg-worker-vc.png -------------------------------------------------------------------------------- /docs/assets/aime_training_progress.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/NeMo-RL/99ba9a130e72cbf87a3e20acf43bc01a47adc8ee/docs/assets/aime_training_progress.png -------------------------------------------------------------------------------- /docs/assets/deepscaler_training_progress.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/NeMo-RL/99ba9a130e72cbf87a3e20acf43bc01a47adc8ee/docs/assets/deepscaler_training_progress.png -------------------------------------------------------------------------------- /docs/assets/ray-debug-step1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/NeMo-RL/99ba9a130e72cbf87a3e20acf43bc01a47adc8ee/docs/assets/ray-debug-step1.png -------------------------------------------------------------------------------- /docs/assets/ray-debug-step2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/NeMo-RL/99ba9a130e72cbf87a3e20acf43bc01a47adc8ee/docs/assets/ray-debug-step2.png -------------------------------------------------------------------------------- /docs/assets/ray-debug-step3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/NeMo-RL/99ba9a130e72cbf87a3e20acf43bc01a47adc8ee/docs/assets/ray-debug-step3.png -------------------------------------------------------------------------------- /docs/assets/ray-debug-step4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/NeMo-RL/99ba9a130e72cbf87a3e20acf43bc01a47adc8ee/docs/assets/ray-debug-step4.png -------------------------------------------------------------------------------- /docs/assets/sft-openmathinstruct2-train-loss.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/NeMo-RL/99ba9a130e72cbf87a3e20acf43bc01a47adc8ee/docs/assets/sft-openmathinstruct2-train-loss.png -------------------------------------------------------------------------------- /docs/assets/sft-openmathinstruct2-train1M-loss.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/NeMo-RL/99ba9a130e72cbf87a3e20acf43bc01a47adc8ee/docs/assets/sft-openmathinstruct2-train1M-loss.png -------------------------------------------------------------------------------- /docs/assets/val-log.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/NeMo-RL/99ba9a130e72cbf87a3e20acf43bc01a47adc8ee/docs/assets/val-log.png -------------------------------------------------------------------------------- /docs/autodoc2_docstrings_parser.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | from docutils import nodes 15 | from myst_parser.parsers.sphinx_ import MystParser 16 | from sphinx.ext.napoleon.docstring import GoogleDocstring 17 | 18 | 19 | class NapoleonParser(MystParser): 20 | def parse(self, input_string: str, document: nodes.document) -> None: 21 | # Get the Sphinx configuration 22 | config = document.settings.env.config 23 | 24 | # Process with Google style 25 | google_parsed = str(GoogleDocstring(input_string, config)) 26 | 27 | return super().parse(google_parsed, document) 28 | 29 | 30 | Parser = NapoleonParser 31 | -------------------------------------------------------------------------------- /docs/design-docs/chat-datasets.md: -------------------------------------------------------------------------------- 1 | # Data Format 2 | 3 | This guide outlines the required data format for Hugging Face chat datasets and demonstrates how to use chat templates with Hugging Face tokenizers to add special tokens or task-specific information. 4 | 5 | ## Hugging Face Chat Datasets 6 | 7 | Hugging Face chat datasets are expected to have the following structure: Each example in the dataset should be a dictionary with a `messages` key. The `messages` should be a list of dictionaries, each with a `role` and `content` key. The `role` typically has one of the following values: `system`, `user`, and `assistant`. For example: 8 | 9 | ```json 10 | { 11 | "messages": [ 12 | { 13 | "role": "system", 14 | "content": "This is a helpful system message." 15 | }, 16 | { 17 | "role": "user", 18 | "content": "This is a user's question" 19 | }, 20 | { 21 | "role": "assistant", 22 | "content": "This is the assistant's response." 23 | } 24 | ] 25 | } 26 | ``` 27 | 28 | ## Chat Templates 29 | 30 | Formatting the data in this way allows us to take advantage of the Hugging Face tokenizers' `apply_chat_template` functionality to combine the messages. Chat templates can be used to add special tokens or task-specific information to each example in the dataset. Refer to the [HuggingFace apply_chat_template documentation](https://huggingface.co/docs/transformers/main/en/chat_templating#applychattemplate) for details. 31 | 32 | By default, `apply_chat_template` attempts to apply the `chat_template` associated with the tokenizer. However, in some cases, users might want to specify their own chat template. Also, note that many tokenizers do not have associated `chat_template`s, in which case an explicit chat template is required. Users can specify an explicit chat template string using Jinja format and can pass that string to `apply_chat_template`. 33 | The following is an example using a simple template which prepends a role header to each turn: 34 | 35 | ```{testcode} 36 | from transformers import AutoTokenizer 37 | 38 | example_template = "{% for message in messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{{ content }}{% endfor %}" 39 | 40 | example_input = [ 41 | { 42 | 'role': 'user', 43 | 'content': 'Hello!' 44 | }, 45 | { 46 | 'role': 'assistant', 47 | 'content': 'Hi there!' 48 | } 49 | ] 50 | tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-1B") 51 | output = tokenizer.apply_chat_template(example_input, chat_template=example_template, tokenize=False) 52 | 53 | ## this is the output string we expect 54 | expected_output = '<|start_header_id|>user<|end_header_id|>\n\nHello!<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nHi there!<|eot_id|>' 55 | assert output == expected_output 56 | ``` 57 | 58 | 59 | ```{testoutput} 60 | :hide: 61 | ``` 62 | 63 | For more details on creating chat templates, refer to the [Hugging Face documentation](https://huggingface.co/docs/transformers/v4.34.0/en/chat_templating#how-do-i-create-a-chat-template). -------------------------------------------------------------------------------- /docs/design-docs/checkpointing.md: -------------------------------------------------------------------------------- 1 | # Checkpointing with Hugging Face Models 2 | 3 | NeMo RL provides two checkpoint formats for Hugging Face models: Torch distributed and Hugging Face format. Torch distributed is used by default for efficiency, and Hugging Face format is provided for compatibility with Hugging Face's `AutoModel.from_pretrained` API. Note that Hugging Face format checkpoints save only the model weights, ignoring the optimizer states. It is recommended to use Torch distributed format to save intermediate checkpoints and to save a Hugging Face checkpoint only at the end of training. 4 | 5 | A checkpoint converter is provided to convert a Torch distributed checkpoint checkpoint to Hugging Face format after training: 6 | 7 | ```sh 8 | uv run examples/convert_dcp_to_hf.py --config= --dcp-ckpt-path= --hf-ckpt-path= 9 | ``` 10 | 11 | Usually Hugging Face checkpoints keep the weights and tokenizer together (which we also recommend for provenance). You can copy it afterwards. Here's an end-to-end example: 12 | 13 | ```sh 14 | # Change to your appropriate checkpoint directory 15 | CKPT_DIR=results/sft/step_10 16 | 17 | uv run examples/convert_dcp_to_hf.py --config=$CKPT_DIR/config.yaml --dcp-ckpt-path=$CKPT_DIR/policy/weights --hf-ckpt-path=${CKPT_DIR}-hf 18 | rsync -ahP $CKPT_DIR/policy/tokenizer ${CKPT_DIR}-hf/ 19 | ``` 20 | -------------------------------------------------------------------------------- /docs/design-docs/fsdp2-parallel-plan.md: -------------------------------------------------------------------------------- 1 | # FSDP2 Parallel Plan 2 | 3 | This guide outlines the parallelization strategy for Fully Sharded Data Parallel version 2 (FSDP2) training in NeMo RL. 4 | 5 | ## Fallback Priority 6 | 7 | NeMo RL supports three parallelization strategies, applied in the following order of fallback priority: 8 | 9 | ### 1. Custom Parallel Plan 10 | 11 | Your user-defined custom parallel plans always take precedence when available. For detailed implementation and usage, refer to the [Custom Parallel Plan Example](#custom-parallel-plan-example). 12 | 13 | ### 2. Optimized Parallel Plan 14 | 15 | Optimized parallel plans are available for specific model architectures. They may offer superior performance compared to Hugging Face's tensor parallel implementation. This approach is used if no custom parallel plan is specified and the model class supports optimized parallelization. 16 | 17 | ### 3. Hugging Face Tensor Parallel Plan 18 | 19 | The Hugging Face tensor parallel plan is the default. It's available for most models via `._tp_plan` and is used when neither a custom nor an optimized parallel plan is available. 20 | 21 | ## Custom Parallel Plan Example 22 | 23 | A custom parallel plan should be defined in a separate file, such as the example provided in `examples/custom_parallel.py`. 24 | 25 | To implement the custom parallel plan, either update the value of `custom_parallel_plan` in the `yaml` file directly, or pass the override via the command line. For example: 26 | 27 | ```bash 28 | uv run examples/run_grpo_math.py \ 29 | policy.dtensor_cfg.custom_parallel_plan=examples.custom_parallel.custom_parallel_plan 30 | ``` 31 | -------------------------------------------------------------------------------- /docs/docker.md: -------------------------------------------------------------------------------- 1 | # Build Docker Images 2 | 3 | This guide provides two methods for building Docker images: the base image, ideal for specifying Python dependencies at runtime, and the hermetic image, which includes default dependencies for offline use. 4 | 5 | ## Base Image 6 | 7 | If you only need the base image with ray + uv, you can build it like so: 8 | 9 | ```sh 10 | cd docker/ 11 | docker buildx build --target base -t nemo_rl -f Dockerfile .. 12 | ``` 13 | 14 | This is **our recommendation** as it is a small image and allows you to specify your Python dependencies at runtime. 15 | 16 | ## Hermetic Image 17 | 18 | The Docker image build without a target stage will include all of the default dependencies to get started. 19 | 20 | ```sh 21 | cd docker/ 22 | docker buildx build -t nemo_rl -f Dockerfile .. 23 | ``` 24 | 25 | This image sets up the Python environment for you, so you do not have to use `uv` if you don't need 26 | any other packages. 27 | 28 | This image is useful in situations where you may not have network connectivity to re-download packages. 29 | -------------------------------------------------------------------------------- /docs/documentation.md: -------------------------------------------------------------------------------- 1 | # Documentation Development 2 | 3 | - [Documentation Development](#documentation-development) 4 | - [Build the Documentation](#build-the-documentation) 5 | - [Live Building](#live-building) 6 | - [Run Tests in Python Docstrings](#run-tests-in-python-docstrings) 7 | - [Write Tests in Python Docstrings](#write-tests-in-python-docstrings) 8 | - [Documentation Version](#documentation-version) 9 | 10 | 11 | ## Build the Documentation 12 | 13 | The following sections describe how to set up and build the NeMo RL documentation. 14 | 15 | Switch to the documentation source folder and generate HTML output. 16 | 17 | ```sh 18 | cd docs/ 19 | uv run --group docs sphinx-build . _build/html 20 | ``` 21 | 22 | * The resulting HTML files are generated in a `_build/html` folder that is created under the project `docs/` folder. 23 | * The generated python API docs are placed in `apidocs` under the `docs/` folder. 24 | 25 | ## Live Building 26 | 27 | When writing documentation, it can be helpful to serve the documentation and have it update live while you edit. 28 | 29 | To do so, run: 30 | 31 | ```sh 32 | cd docs/ 33 | uv run --group docs sphinx-autobuild . _build/html --port 12345 --host 0.0.0.0 34 | ``` 35 | 36 | Open a web browser and go to `http://${HOST_WHERE_SPHINX_COMMAND_RUN}:12345` to view the output. 37 | 38 | 39 | ## Run Tests in Python Docstrings 40 | 41 | We also run tests in our Python docstrings. You can run them with: 42 | 43 | ```sh 44 | cd docs/ 45 | uv run --group docs sphinx-build -b doctest . _build/doctest 46 | ``` 47 | 48 | ## Write Tests in Python Docstrings 49 | 50 | Any code in triple backtick blocks with the `{doctest}` directive will be tested. The format follows Python's doctest module syntax, where `>>>` indicates Python input and the following line shows the expected output. Here's an example: 51 | 52 | ```python 53 | def add(x: int, y: int) -> int: 54 | """ 55 | Adds two integers together. 56 | 57 | Args: 58 | x (int): The first integer to add. 59 | y (int): The second integer to add. 60 | 61 | Returns: 62 | int: The sum of x and y. 63 | 64 | Examples: 65 | ```{doctest} 66 | >>> from nemo_rl.made_up_package import add 67 | >>> add(1, 2) 68 | 3 69 | ``` 70 | 71 | """ 72 | return x + y 73 | ``` 74 | 75 | ## Documentation Version 76 | 77 | The three files below control the version switcher. Before you attempt to publish a new version of the documentation, update these files to match the latest version numbers. 78 | 79 | * docs/versions1.json 80 | * docs/project.json 81 | * docs/conf.py 82 | 83 | -------------------------------------------------------------------------------- /docs/helpers.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import json 16 | import tempfile 17 | 18 | 19 | def make_dpo_dataset(): 20 | train_file = tempfile.NamedTemporaryFile(mode="w", suffix=".jsonl", delete=False) 21 | val_file = tempfile.NamedTemporaryFile(mode="w", suffix=".jsonl", delete=False) 22 | 23 | # Write train data 24 | train_data = [ 25 | {"context": "What is 2+2?", "chosen": "4", "rejected": "5"}, 26 | {"context": "What is 3*3?", "chosen": "9", "rejected": "6"}, 27 | ] 28 | for item in train_data: 29 | lines = train_file.write(json.dumps(item) + "\n") 30 | train_file.flush() 31 | 32 | # Write validation data 33 | val_data = [ 34 | {"context": "What is 4+4?", "chosen": "8", "rejected": "7"}, 35 | {"context": "What is 5*5?", "chosen": "25", "rejected": "20"}, 36 | ] 37 | for item in val_data: 38 | lines = val_file.write(json.dumps(item) + "\n") 39 | val_file.flush() 40 | 41 | return train_file, val_file 42 | -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | ```{include} ../README.md 2 | :relative-docs: docs/ 3 | ``` 4 | 5 | ```{toctree} 6 | :caption: 🖥️ Environment Start 7 | :hidden: 8 | 9 | local-workstation.md 10 | cluster.md 11 | 12 | ``` 13 | 14 | ```{toctree} 15 | :caption: 🚀 E2E Examples 16 | :hidden: 17 | 18 | guides/grpo-deepscaler.md 19 | guides/sft-openmathinstruct2.md 20 | ``` 21 | 22 | ```{toctree} 23 | :caption: 📚 Guides 24 | :hidden: 25 | 26 | adding-new-models.md 27 | guides/sft.md 28 | guides/dpo.md 29 | guides/grpo.md 30 | guides/grpo-deepscaler.md 31 | guides/eval.md 32 | model-quirks.md 33 | ``` 34 | 35 | ```{toctree} 36 | :caption: 🐳 Containers 37 | :hidden: 38 | 39 | docker.md 40 | ``` 41 | 42 | ```{toctree} 43 | :caption: 🛠️ Development 44 | :hidden: 45 | 46 | testing.md 47 | documentation.md 48 | debugging.md 49 | apidocs/index.rst 50 | ``` 51 | 52 | ```{toctree} 53 | :caption: 📐 Design Docs 54 | :hidden: 55 | 56 | design-docs/design-and-philosophy.md 57 | design-docs/padding.md 58 | design-docs/logger.md 59 | design-docs/uv.md 60 | design-docs/chat-datasets.md 61 | design-docs/generation.md 62 | design-docs/checkpointing.md 63 | design-docs/loss-functions.md 64 | design-docs/fsdp2-parallel-plan.md 65 | ``` 66 | -------------------------------------------------------------------------------- /docs/local-workstation.md: -------------------------------------------------------------------------------- 1 | # Run on Your Local Workstation 2 | 3 | When launching examples locally with `uv`, {py:class}`init_ray() ` will first attempt to connect to an existing cluster. If none is found, it will start a local one and connect to it using all available GPU and CPU resources on your node. 4 | 5 | To launch a job outside of a container, simply run: 6 | 7 | ```sh 8 | uv run examples/run_grpo_math.py 9 | ``` 10 | 11 | In the logs, you will see that Ray has started a local cluster instance, along with details on the resources made available to it: 12 | ``` 13 | 2025-03-17 13:37:45,360 INFO worker.py:1841 -- Started a local Ray instance. 14 | ... 15 | INFO:nemo_rl.distributed.virtual_cluster:Started local cluster with: {'node:__internal_head__': 1.0, 'CPU': 24.0, 'object_store_memory': 80448493977.0, 'accelerator_type:RTX': 1.0, 'memory': 177713152615.0, 'GPU': 1.0, 'node:10.0.0.1': 1.0} 16 | ``` 17 | 18 | To have more precise control over the GPUs Ray uses locally, please use `CUDA_VISIBLE_DEVICES`: 19 | 20 | ```sh 21 | # Use the 0th and 3rd indexed GPU (for a total of 2 GPUs) 22 | CUDA_VISIBLE_DEVICES=0,3 uv run examples/run_grpo_math.py 23 | ``` 24 | 25 | We also allow multiple colocated local clusters, which are uniquely identified by the values in 26 | `CUDA_VISIBLE_DEVICES`. Concretely: 27 | 28 | ```sh 29 | # (1) Start a fresh cluster on GPU=0 30 | CUDA_VISIBLE_DEVICES=0 uv run examples/run_grpo_math.py 31 | 32 | # (2) While (1) is running, this will start a new cluster using GPUs 1 and 2 without interferring with (1) 33 | # Ensure that the CUDA_VISIBLE_DEVICES do not overlap already running jobs. 34 | CUDA_VISIBLE_DEVICES=1,2 uv run examples/run_grpo_math.py 35 | ``` 36 | -------------------------------------------------------------------------------- /docs/model-quirks.md: -------------------------------------------------------------------------------- 1 | # Model Quirks 2 | 3 | This document outlines special cases and model-specific behaviors that require custom handling in NeMo RL. These special cases are controlled by the `ModelFlag` enum. 4 | 5 | ## Gemma-3 6 | 7 | ### Tied Weights 8 | 9 | Weight tying between the embedding layer (`model.embed_tokens`) and output layer (`lm_head`) is currently not respected when using the FSDP1 policy or the DTensor policy when TP > 1 (See [this issue](https://github.com/NVIDIA/NeMo-RL/issues/227)). To avoid errors when training these models, we only allow training models with tied weights using the DTensor policy with TP=1. For Llama-3 and Qwen2.5 models, weight-tying is only enabled for the smaller models (< 2B), which can typically be trained without tensor parallelism. For Gemma-3, all model sizes have weight-tying enabled, including the larger models which require tensor parallelism. To support training of these models, we specially handle the Gemma-3 models by allowing training using the DTensor policy with TP > 1. 10 | 11 | **Special Handling:** 12 | - We skip the tied weights check for all Gemma-3 models when using the DTensor policy, allowing training using TP > 1. 13 | - We exclude `model.embed_tokens` and `lm_head` from the DTensor tensor parallel plan to maintain weight tying correctly. 14 | 15 | ### vLLM Initialization 16 | 17 | Gemma-3 models have a specific issue with vLLM dummy weight initialization due to a vLLM bug where [a `normalizer` buffer is created](https://github.com/vllm-project/vllm/blob/964472b9667508b1d4a7ed92068ff81740ae0036/vllm/model_executor/models/gemma3.py#L372) that is not present in the Hugging Face model. This causes the `normalizer` buffer to be set to dummy weights at initialization and then never updated with the correct values during model refit. As a workaround for this issue, we do not use dummy weight initialization for vLLM with Gemma-3 models and instead use the `load_format="auto"` setting to load the full weights at initialization. 18 | 19 | **Special Handling:** 20 | - We automatically use `load_format="auto"` for Gemma-3 models when initializing vLLM. 21 | - This avoids issues with dummy weight initialization, where the dummy weights for this buffer would never get overwritten during refit. 22 | -------------------------------------------------------------------------------- /docs/project.json: -------------------------------------------------------------------------------- 1 | {"name": "nemo-rl", "version": "0.2.1"} -------------------------------------------------------------------------------- /docs/versions1.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "preferred": true, 4 | "version": "0.2.1", 5 | "url": "../0.2.1" 6 | } 7 | ] 8 | -------------------------------------------------------------------------------- /examples/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /examples/configs/eval.yaml: -------------------------------------------------------------------------------- 1 | # Evaluation Configuration 2 | eval: 3 | metric: "pass@1" # only pass@1 is supported now 4 | num_tests_per_prompt: 1 # every prompt will be tested num_tests_per_prompt times and use the average score as the final score 5 | seed: 42 6 | 7 | generation: 8 | backend: "vllm" # only vllm is supported for evaluation 9 | max_new_tokens: ${generation.vllm_cfg.max_model_len} 10 | temperature: 0.0 11 | top_p: 1.0 12 | top_k: -1 # -1 means disable 13 | num_prompts_per_step: -1 # -1 means pass all prompts at once 14 | model_name: "Qwen/Qwen2.5-Math-1.5B-Instruct" 15 | stop_token_ids: null 16 | stop_strings: null 17 | vllm_cfg: 18 | async_engine: false 19 | precision: "bfloat16" 20 | tensor_parallel_size: 1 21 | pipeline_parallel_size: 1 22 | gpu_memory_utilization: 0.9 23 | max_model_len: 2048 24 | 25 | tokenizer: 26 | name: ${generation.model_name} ## specify if you'd like to use a tokenizer different from the model's default 27 | chat_template: "default" 28 | 29 | data: 30 | max_input_seq_length: ${generation.vllm_cfg.max_model_len} # useless since we directly use prompts in evaluation 31 | prompt_file: null 32 | system_prompt_file: null 33 | dataset_name: "HuggingFaceH4/aime_2024" 34 | dataset_key: "train" 35 | problem_key: "problem" 36 | solution_key: "answer" 37 | 38 | env: 39 | math: 40 | num_workers: 8 41 | 42 | cluster: 43 | gpus_per_node: 1 44 | num_nodes: 1 45 | -------------------------------------------------------------------------------- /examples/configs/grpo-deepscaler-1.5b-16K.yaml: -------------------------------------------------------------------------------- 1 | # GRPO Algorithm Configuration 2 | defaults: "grpo-deepscaler-1.5b-8K.yaml" 3 | 4 | loss_fn: 5 | reference_policy_kl_penalty: 0.001 6 | ratio_clip_max: 0.28 7 | 8 | 9 | policy: 10 | max_total_sequence_length: 16384 11 | 12 | 13 | dynamic_batching: 14 | enabled: False -------------------------------------------------------------------------------- /examples/configs/grpo_deepscaler-1.5b-24K.yaml: -------------------------------------------------------------------------------- 1 | # GRPO Algorithm Configuration 2 | defaults: "grpo-deepscaler-1.5b-8K.yaml" 3 | 4 | loss_fn: 5 | reference_policy_kl_penalty: 0.0001 6 | ratio_clip_min: 0.2 7 | ratio_clip_max: 0.28 8 | 9 | policy: 10 | max_total_sequence_length: 24576 11 | 12 | dtensor_cfg: 13 | enabled: true 14 | cpu_offload: true 15 | sequence_parallel: true 16 | activation_checkpointing: true 17 | tensor_parallel_size: 4 18 | custom_parallel_plan: null 19 | 20 | dynamic_batching: 21 | enabled: False 22 | 23 | optimizer: 24 | name: "torch.optim.AdamW" 25 | kwargs: 26 | lr: 5.0e-7 27 | 28 | generation: 29 | backend: "vllm" 30 | max_new_tokens: ${policy.max_total_sequence_length} 31 | temperature: 1.0 32 | top_p: 1.0 33 | top_k: null 34 | stop_token_ids: null 35 | stop_strings: null 36 | vllm_cfg: 37 | precision: ${policy.precision} 38 | tensor_parallel_size: 1 39 | pipeline_parallel_size: 1 40 | gpu_memory_utilization: 0.8 41 | max_model_len: ${policy.max_total_sequence_length} 42 | # For most cases, use "dummy" to load the initial weights, since they will be overwritten during refit 43 | # For Gemma models, we need to use "auto" due to a vllm bug 44 | load_format: dummy 45 | -------------------------------------------------------------------------------- /examples/configs/grpo_math_8B.yaml: -------------------------------------------------------------------------------- 1 | # GRPO Algorithm Configuration 2 | defaults: "grpo_math_1B.yaml" 3 | 4 | grpo: 5 | num_prompts_per_step: 64 6 | num_generations_per_prompt: 32 7 | 8 | policy: 9 | model_name: "meta-llama/Llama-3.1-8B-Instruct" 10 | tokenizer: 11 | name: ${policy.model_name} ## specify if you'd like to use a tokenizer different from the model's default 12 | train_global_batch_size: 512 13 | train_micro_batch_size: 1 14 | generation_batch_size: 32 # Only used when generating using HF backend 15 | logprob_batch_size: 2 16 | max_total_sequence_length: 4096 17 | precision: "bfloat16" 18 | fsdp_offload_enabled: false 19 | activation_checkpointing_enabled: false 20 | 21 | dtensor_cfg: 22 | enabled: True 23 | 24 | dynamic_batching: 25 | train_mb_tokens: 4096 26 | logprob_mb_tokens: 8192 27 | 28 | optimizer: 29 | name: "torch.optim.AdamW" 30 | kwargs: 31 | lr: 3.0e-7 32 | weight_decay: 0.01 33 | betas: [0.9, 0.999] 34 | eps: 1e-8 35 | 36 | scheduler: 37 | - name: "torch.optim.lr_scheduler.LinearLR" 38 | kwargs: 39 | start_factor: 0.1 40 | end_factor: 1.0 41 | # The scheduler iteration is per GPRO step and is decoupled with the optimizer step (may be >=1 per GPRO step) 42 | total_iters: 13 43 | - name: "torch.optim.lr_scheduler.ConstantLR" 44 | kwargs: 45 | factor: 1.0 46 | total_iters: 10000000000 47 | - milestones: [13] 48 | 49 | generation: 50 | backend: "vllm" 51 | max_new_tokens: ${policy.max_total_sequence_length} 52 | temperature: 1.0 53 | top_p: 1.0 54 | top_k: null 55 | stop_token_ids: null 56 | stop_strings: null 57 | vllm_cfg: 58 | tensor_parallel_size: 1 59 | gpu_memory_utilization: 0.6 60 | max_model_len: ${policy.max_total_sequence_length} 61 | 62 | cluster: 63 | gpus_per_node: 8 64 | num_nodes: 1 65 | -------------------------------------------------------------------------------- /examples/configs/grpo_sliding_puzzle.yaml: -------------------------------------------------------------------------------- 1 | # GRPO Algorithm Configuration 2 | defaults: "grpo_math_1B.yaml" 3 | 4 | grpo: 5 | num_prompts_per_step: 32 6 | num_generations_per_prompt: 16 7 | max_rollout_turns: 50 # Maximum turns allowed per rollout 8 | max_num_steps: 10000 9 | 10 | checkpointing: 11 | enabled: true 12 | checkpoint_dir: "results/grpo-sliding-puzzle" 13 | metric_name: "val_reward" 14 | higher_is_better: true 15 | keep_top_k: 3 16 | save_period: 10 17 | 18 | policy: 19 | model_name: "Qwen/Qwen2.5-1.5B-Instruct" 20 | max_total_sequence_length: 3072 21 | 22 | generation: 23 | backend: "vllm" 24 | max_new_tokens: ${policy.max_total_sequence_length} 25 | temperature: 1.0 26 | # Setting top_p/top_k to 0.999/10000 to strip out Qwen's special/illegal tokens 27 | # https://github.com/NVIDIA/NeMo-RL/issues/237 28 | top_p: 0.999 29 | top_k: 10000 30 | stop_token_ids: null 31 | stop_strings: null 32 | vllm_cfg: 33 | async_engine: false 34 | tensor_parallel_size: 1 35 | pipeline_parallel_size: 1 36 | gpu_memory_utilization: 0.6 37 | max_model_len: ${policy.max_total_sequence_length} 38 | 39 | data: 40 | add_system_prompt: false 41 | 42 | env: 43 | sliding_puzzle_game: 44 | cfg: 45 | game_config: 46 | size: 5 # Size of the puzzle (e.g., 2 for 2x2, 3 for 3x3) 47 | shuffle_moves: 15 # Number of random moves to shuffle the solved state 48 | max_moves: 50 # Maximum moves allowed per episode 49 | 50 | logger: 51 | log_dir: "logs" # Base directory for all logs 52 | num_val_samples_to_print: 0 # Number of validation samples to pretty print on terminal 53 | wandb_enabled: false 54 | tensorboard_enabled: false 55 | monitor_gpus: false # If true, will monitor GPU usage and log to wandb and/or tensorboard 56 | wandb: 57 | project: "grpo-dev" 58 | name: "grpo-dev-sliding_puzzle" 59 | tensorboard: {} 60 | gpu_monitoring: 61 | collection_interval: 10 # How often to collect GPU usage metrics (in seconds) 62 | flush_interval: 10 # How often to flush GPU usage metrics to the loggers (in seconds) 63 | -------------------------------------------------------------------------------- /examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp1-quick.v2.yaml: -------------------------------------------------------------------------------- 1 | dpo: 2 | max_num_epochs: 2 3 | max_num_steps: 20 4 | val_period: 50 5 | val_batches: 16 6 | val_global_batch_size: 32 7 | val_micro_batch_size: 1 8 | val_at_start: false 9 | seed: 42 10 | 11 | reference_policy_kl_penalty: 0.05 12 | preference_average_log_probs: False 13 | sft_average_log_probs: ${.preference_average_log_probs} 14 | preference_loss_weight: 1 15 | sft_loss_weight: 0.01 16 | 17 | checkpointing: 18 | enabled: false 19 | checkpoint_dir: "results/dpo" 20 | metric_name: "val_loss" 21 | higher_is_better: false 22 | keep_top_k: 3 23 | save_period: 10000 24 | 25 | policy: 26 | model_name: "meta-llama/Llama-3.1-8B-Instruct" 27 | tokenizer: 28 | name: ${policy.model_name} 29 | train_global_batch_size: 256 30 | train_micro_batch_size: 1 31 | max_total_sequence_length: 2048 32 | precision: "bfloat16" 33 | fsdp_offload_enabled: false 34 | activation_checkpointing_enabled: false 35 | 36 | dtensor_cfg: 37 | enabled: false 38 | cpu_offload: False 39 | sequence_parallel: false 40 | activation_checkpointing: false 41 | tensor_parallel_size: 1 42 | custom_parallel_plan: null 43 | 44 | dynamic_batching: 45 | enabled: False 46 | 47 | make_sequence_length_divisible_by: ${policy.dtensor_cfg.tensor_parallel_size} 48 | max_grad_norm: 1.0 49 | 50 | optimizer: 51 | name: "torch.optim.AdamW" 52 | kwargs: 53 | lr: 5.0e-6 54 | weight_decay: 0.1 55 | betas: [0.9, 0.98] 56 | eps: 1e-8 57 | foreach: False 58 | fused: False 59 | 60 | scheduler: 61 | - name: "torch.optim.lr_scheduler.LinearLR" 62 | kwargs: 63 | start_factor: 0.000000001 64 | end_factor: 1.0 65 | total_iters: 1 66 | - name: "torch.optim.lr_scheduler.ConstantLR" 67 | kwargs: 68 | factor: 1.0 69 | total_iters: 10000000000 70 | - milestones: [1] 71 | 72 | data: 73 | dataset_name: "HelpSteer3" 74 | max_input_seq_length: ${policy.max_total_sequence_length} 75 | 76 | logger: 77 | log_dir: "logs" 78 | wandb_enabled: true 79 | tensorboard_enabled: true 80 | monitor_gpus: true 81 | wandb: 82 | project: nemo-rl 83 | name: dpo-llama3.1-8b-instruct-4n8g-fsdp2tp1 84 | tensorboard: {} 85 | gpu_monitoring: 86 | collection_interval: 10 87 | flush_interval: 10 88 | 89 | cluster: 90 | gpus_per_node: 8 91 | num_nodes: 4 92 | -------------------------------------------------------------------------------- /examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp2tp1.v2.yaml: -------------------------------------------------------------------------------- 1 | dpo: 2 | max_num_epochs: 1 3 | max_num_steps: 150 4 | val_period: 50 5 | val_batches: 16 6 | val_global_batch_size: 32 7 | val_micro_batch_size: 1 8 | val_at_start: false 9 | seed: 42 10 | 11 | reference_policy_kl_penalty: 0.05 12 | preference_average_log_probs: False 13 | sft_average_log_probs: ${.preference_average_log_probs} 14 | preference_loss_weight: 1 15 | sft_loss_weight: 0.01 16 | 17 | checkpointing: 18 | enabled: true 19 | checkpoint_dir: "results/dpo" 20 | metric_name: "val_loss" 21 | higher_is_better: false 22 | keep_top_k: 3 23 | save_period: 10000 24 | 25 | policy: 26 | model_name: "meta-llama/Llama-3.1-8B-Instruct" 27 | tokenizer: 28 | name: ${policy.model_name} 29 | train_global_batch_size: 256 30 | train_micro_batch_size: 1 31 | max_total_sequence_length: 2048 32 | precision: "bfloat16" 33 | fsdp_offload_enabled: false 34 | activation_checkpointing_enabled: false 35 | 36 | dtensor_cfg: 37 | enabled: true 38 | cpu_offload: False 39 | sequence_parallel: false 40 | activation_checkpointing: false 41 | tensor_parallel_size: 1 42 | custom_parallel_plan: null 43 | 44 | dynamic_batching: 45 | enabled: False 46 | 47 | make_sequence_length_divisible_by: ${policy.dtensor_cfg.tensor_parallel_size} 48 | max_grad_norm: 1.0 49 | 50 | optimizer: 51 | name: "torch.optim.AdamW" 52 | kwargs: 53 | lr: 5.0e-6 54 | weight_decay: 0.1 55 | betas: [0.9, 0.98] 56 | eps: 1e-8 57 | foreach: False 58 | fused: False 59 | 60 | scheduler: 61 | - name: "torch.optim.lr_scheduler.LinearLR" 62 | kwargs: 63 | start_factor: 0.000000001 64 | end_factor: 1.0 65 | total_iters: 1 66 | - name: "torch.optim.lr_scheduler.ConstantLR" 67 | kwargs: 68 | factor: 1.0 69 | total_iters: 10000000000 70 | - milestones: [1] 71 | 72 | data: 73 | dataset_name: "HelpSteer3" 74 | max_input_seq_length: ${policy.max_total_sequence_length} 75 | 76 | logger: 77 | log_dir: "logs" 78 | wandb_enabled: true 79 | tensorboard_enabled: true 80 | monitor_gpus: true 81 | wandb: 82 | project: nemo-rl 83 | name: dpo-llama3.1-8b-instruct-4n8g-fsdp2tp1 84 | tensorboard: {} 85 | gpu_monitoring: 86 | collection_interval: 10 87 | flush_interval: 10 88 | 89 | cluster: 90 | gpus_per_node: 8 91 | num_nodes: 4 92 | -------------------------------------------------------------------------------- /examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp2tp2-quick.v2.yaml: -------------------------------------------------------------------------------- 1 | dpo: 2 | max_num_epochs: 2 3 | max_num_steps: 20 4 | val_period: 50 5 | val_batches: 16 6 | val_global_batch_size: 32 7 | val_micro_batch_size: 1 8 | val_at_start: false 9 | seed: 42 10 | 11 | reference_policy_kl_penalty: 0.05 12 | preference_average_log_probs: False 13 | sft_average_log_probs: ${.preference_average_log_probs} 14 | preference_loss_weight: 1 15 | sft_loss_weight: 0.01 16 | 17 | checkpointing: 18 | enabled: true 19 | checkpoint_dir: "results/dpo" 20 | metric_name: "val_loss" 21 | higher_is_better: false 22 | keep_top_k: 3 23 | save_period: 10000 24 | 25 | policy: 26 | model_name: "meta-llama/Llama-3.1-8B-Instruct" 27 | tokenizer: 28 | name: ${policy.model_name} 29 | train_global_batch_size: 256 30 | train_micro_batch_size: 1 31 | max_total_sequence_length: 2048 32 | precision: "bfloat16" 33 | fsdp_offload_enabled: false 34 | activation_checkpointing_enabled: false 35 | 36 | dtensor_cfg: 37 | enabled: true 38 | cpu_offload: False 39 | sequence_parallel: false 40 | activation_checkpointing: false 41 | tensor_parallel_size: 2 42 | custom_parallel_plan: null 43 | 44 | dynamic_batching: 45 | enabled: False 46 | 47 | make_sequence_length_divisible_by: ${policy.dtensor_cfg.tensor_parallel_size} 48 | max_grad_norm: 1.0 49 | 50 | optimizer: 51 | name: "torch.optim.AdamW" 52 | kwargs: 53 | lr: 5.0e-6 54 | weight_decay: 0.1 55 | betas: [0.9, 0.98] 56 | eps: 1e-8 57 | foreach: False 58 | fused: False 59 | 60 | scheduler: 61 | - name: "torch.optim.lr_scheduler.LinearLR" 62 | kwargs: 63 | start_factor: 0.000000001 64 | end_factor: 1.0 65 | total_iters: 1 66 | - name: "torch.optim.lr_scheduler.ConstantLR" 67 | kwargs: 68 | factor: 1.0 69 | total_iters: 10000000000 70 | - milestones: [1] 71 | 72 | data: 73 | dataset_name: "HelpSteer3" 74 | max_input_seq_length: ${policy.max_total_sequence_length} 75 | 76 | logger: 77 | log_dir: "logs" 78 | wandb_enabled: true 79 | tensorboard_enabled: true 80 | monitor_gpus: true 81 | wandb: 82 | project: nemo-rl 83 | name: dpo-llama3.1-8b-instruct-4n8g-fsdp2tp1 84 | tensorboard: {} 85 | gpu_monitoring: 86 | collection_interval: 10 87 | flush_interval: 10 88 | 89 | cluster: 90 | gpus_per_node: 8 91 | num_nodes: 4 92 | -------------------------------------------------------------------------------- /examples/configs/recipes/llm/dpo-llama3.2-1b-instruct-1n8g-fsdp2tp1.v2.yaml: -------------------------------------------------------------------------------- 1 | dpo: 2 | max_num_epochs: 1 3 | max_num_steps: 150 4 | val_period: 25 5 | val_batches: 8 6 | val_global_batch_size: 32 7 | val_micro_batch_size: 1 8 | val_at_start: false 9 | seed: 42 10 | 11 | reference_policy_kl_penalty: 0.05 12 | preference_average_log_probs: False 13 | sft_average_log_probs: ${.preference_average_log_probs} 14 | preference_loss_weight: 1 15 | sft_loss_weight: 0 16 | 17 | checkpointing: 18 | enabled: true 19 | checkpoint_dir: "results/dpo" 20 | metric_name: "val_loss" 21 | higher_is_better: false 22 | keep_top_k: 3 23 | save_period: 50 24 | 25 | policy: 26 | model_name: "meta-llama/Llama-3.2-1B-Instruct" 27 | tokenizer: 28 | name: ${policy.model_name} 29 | 30 | train_global_batch_size: 128 31 | train_micro_batch_size: 2 32 | max_total_sequence_length: 1024 33 | precision: "bfloat16" 34 | fsdp_offload_enabled: false 35 | activation_checkpointing_enabled: false 36 | 37 | dtensor_cfg: 38 | enabled: true 39 | cpu_offload: False 40 | sequence_parallel: false 41 | activation_checkpointing: false 42 | tensor_parallel_size: 1 43 | custom_parallel_plan: null 44 | 45 | dynamic_batching: 46 | enabled: False 47 | 48 | make_sequence_length_divisible_by: ${policy.dtensor_cfg.tensor_parallel_size} 49 | max_grad_norm: 1.0 50 | 51 | optimizer: 52 | name: "torch.optim.AdamW" 53 | kwargs: 54 | lr: 5.0e-6 55 | weight_decay: 0.1 56 | betas: [0.9, 0.98] 57 | eps: 1e-5 58 | foreach: False 59 | fused: False 60 | 61 | scheduler: 62 | - name: "torch.optim.lr_scheduler.LinearLR" 63 | kwargs: 64 | start_factor: 0.1 65 | end_factor: 1.0 66 | total_iters: 20 67 | - name: "torch.optim.lr_scheduler.ConstantLR" 68 | kwargs: 69 | factor: 1.0 70 | total_iters: 10000000000 71 | - milestones: [20] 72 | 73 | data: 74 | dataset_name: "HelpSteer3" 75 | max_input_seq_length: ${policy.max_total_sequence_length} 76 | logger: 77 | log_dir: "logs" 78 | wandb_enabled: true 79 | tensorboard_enabled: true 80 | monitor_gpus: true 81 | wandb: 82 | project: nemo-rl 83 | name: dpo-llama3.2-1b-instruct-1n8g-fsdp2tp1 84 | tensorboard: {} 85 | gpu_monitoring: 86 | collection_interval: 10 87 | flush_interval: 10 88 | 89 | cluster: 90 | gpus_per_node: 8 91 | num_nodes: 1 92 | -------------------------------------------------------------------------------- /examples/configs/recipes/llm/grpo-qwen2.5-7b-instruct-4n8g-fsdp1.v3.yaml: -------------------------------------------------------------------------------- 1 | grpo: 2 | num_prompts_per_step: 64 3 | num_generations_per_prompt: 32 4 | max_rollout_turns: 1 5 | max_num_steps: 30 6 | normalize_rewards: true 7 | use_leave_one_out_baseline: true 8 | val_period: 10 9 | val_at_start: false 10 | max_val_samples: 256 11 | val_batch_size: 256 12 | loss_fn: 13 | reference_policy_kl_penalty: 0.01 14 | ratio_clip_min: 0.2 15 | ratio_clip_max: 0.2 16 | ratio_clip_c: null 17 | use_on_policy_kl_approximation: false 18 | use_importance_sampling_correction: false 19 | token_level_loss: true 20 | checkpointing: 21 | enabled: true 22 | checkpoint_dir: results/grpo-qwen2.5-7b-instruct-4n8g-fsdp1 23 | metric_name: val_reward 24 | higher_is_better: true 25 | keep_top_k: 3 26 | save_period: 10 27 | policy: 28 | model_name: Qwen/Qwen2.5-7B-Instruct 29 | tokenizer: 30 | name: Qwen/Qwen2.5-7B-Instruct 31 | train_global_batch_size: 512 32 | train_micro_batch_size: 1 33 | generation_batch_size: 32 34 | logprob_batch_size: 2 35 | max_total_sequence_length: 4096 36 | precision: bfloat16 37 | fsdp_offload_enabled: false 38 | activation_checkpointing_enabled: false 39 | dtensor_cfg: 40 | enabled: false 41 | cpu_offload: false 42 | sequence_parallel: false 43 | activation_checkpointing: false 44 | tensor_parallel_size: 1 45 | custom_parallel_plan: null 46 | dynamic_batching: 47 | enabled: False 48 | make_sequence_length_divisible_by: 1 49 | max_grad_norm: 1 50 | optimizer: 51 | name: torch.optim.AdamW 52 | kwargs: 53 | lr: 3e-07 54 | weight_decay: 0.01 55 | betas: 56 | - 0.9 57 | - 0.999 58 | eps: 1e-08 59 | foreach: false 60 | fused: false 61 | scheduler: 62 | - name: torch.optim.lr_scheduler.LinearLR 63 | kwargs: 64 | start_factor: 0.1 65 | end_factor: 1 66 | total_iters: 13 67 | - name: torch.optim.lr_scheduler.ConstantLR 68 | kwargs: 69 | factor: 1 70 | total_iters: 10000000000 71 | - milestones: 72 | - 13 73 | generation: 74 | backend: vllm 75 | max_new_tokens: 4096 76 | temperature: 1 77 | top_p: 1 78 | top_k: null 79 | stop_token_ids: 80 | - 151645 81 | stop_strings: null 82 | vllm_cfg: 83 | async_engine: false 84 | precision: ${policy.precision} 85 | tensor_parallel_size: 1 86 | pipeline_parallel_size: 1 87 | gpu_memory_utilization: 0.6 88 | max_model_len: 4096 89 | data: 90 | max_input_seq_length: 4096 91 | prompt_file: examples/prompts/cot.txt 92 | system_prompt_file: null 93 | dataset_name: OpenMathInstruct-2 94 | env: 95 | math: 96 | num_workers: 8 97 | logger: 98 | log_dir: logs/grpo-qwen2.5-7b-instruct-4n8g-fsdp1 99 | num_val_samples_to_print: 0 100 | wandb_enabled: true 101 | tensorboard_enabled: true 102 | monitor_gpus: true 103 | wandb: 104 | project: nemo-rl 105 | name: grpo-qwen2.5-7b-instruct-4n8g-fsdp1 106 | tensorboard: {} 107 | gpu_monitoring: 108 | collection_interval: 10 109 | flush_interval: 10 110 | cluster: 111 | gpus_per_node: 8 112 | num_nodes: 4 113 | -------------------------------------------------------------------------------- /examples/configs/recipes/llm/sft-llama3.1-8b-instruct-1n8g-fsdp1.v2.yaml: -------------------------------------------------------------------------------- 1 | sft: 2 | max_num_epochs: 1 3 | max_num_steps: 250 4 | val_period: 10 5 | val_batches: 8 6 | val_global_batch_size: 32 7 | val_micro_batch_size: 1 8 | val_at_start: true 9 | seed: 42 10 | checkpointing: 11 | enabled: true 12 | checkpoint_dir: results/sft-llama3.1-8b-instruct-1n8g-fsdp1 13 | metric_name: val_loss 14 | higher_is_better: false 15 | keep_top_k: 3 16 | save_period: 10 17 | policy: 18 | model_name: meta-llama/Llama-3.1-8B-Instruct 19 | tokenizer: 20 | name: meta-llama/Llama-3.1-8B-Instruct 21 | chat_template: '{% for message in messages %}{%- if message[''role''] == ''system'' %}{{''Context: '' + message[''content''].strip()}}{%- elif message[''role''] == ''user'' %}{{'' Question: '' + message[''content''].strip() + '' Answer:''}}{%- elif message[''role''] == ''assistant'' %}{{'' '' + message[''content''].strip()}}{%- endif %}{% endfor %}' 22 | train_global_batch_size: 32 23 | train_micro_batch_size: 1 24 | max_total_sequence_length: 1024 25 | precision: bfloat16 26 | fsdp_offload_enabled: false 27 | activation_checkpointing_enabled: false 28 | dtensor_cfg: 29 | enabled: false 30 | cpu_offload: false 31 | sequence_parallel: false 32 | activation_checkpointing: false 33 | tensor_parallel_size: 1 34 | custom_parallel_plan: null 35 | dynamic_batching: 36 | enabled: False 37 | make_sequence_length_divisible_by: 1 38 | max_grad_norm: 1 39 | optimizer: 40 | name: torch.optim.AdamW 41 | kwargs: 42 | lr: 5e-06 43 | weight_decay: 0.1 44 | betas: 45 | - 0.9 46 | - 0.98 47 | eps: 1e-05 48 | foreach: false 49 | fused: false 50 | data: 51 | max_input_seq_length: 1024 52 | dataset_name: squad 53 | add_bos: true 54 | add_eos: true 55 | add_generation_prompt: false 56 | logger: 57 | log_dir: logs/sft-llama3.1-8b-instruct-1n8g-fsdp1 58 | wandb_enabled: true 59 | tensorboard_enabled: true 60 | monitor_gpus: true 61 | wandb: 62 | project: nemo-rl 63 | name: sft-llama3.1-8b-instruct-1n8g-fsdp1 64 | tensorboard: 65 | log_dir: tb_logs-sft-dev-squad 66 | gpu_monitoring: 67 | collection_interval: 10 68 | flush_interval: 10 69 | cluster: 70 | gpus_per_node: 8 71 | num_nodes: 1 72 | -------------------------------------------------------------------------------- /examples/configs/recipes/llm/sft-llama3.1-8b-instruct-1n8g-fsdp2tp1-long.v2.yaml: -------------------------------------------------------------------------------- 1 | sft: 2 | max_num_epochs: 1 3 | max_num_steps: 2730 4 | val_period: 10 5 | val_batches: 8 6 | val_global_batch_size: 32 7 | val_micro_batch_size: 1 8 | val_at_start: true 9 | seed: 42 10 | checkpointing: 11 | enabled: true 12 | checkpoint_dir: results/sft-llama3.1-8b-instruct-1n8g-fsdp2tp1-long 13 | metric_name: val_loss 14 | higher_is_better: false 15 | keep_top_k: 3 16 | save_period: 10 17 | policy: 18 | model_name: meta-llama/Llama-3.1-8B-Instruct 19 | tokenizer: 20 | name: meta-llama/Llama-3.1-8B-Instruct 21 | chat_template: '{% for message in messages %}{%- if message[''role''] == ''system'' %}{{''Context: '' + message[''content''].strip()}}{%- elif message[''role''] == ''user'' %}{{'' Question: '' + message[''content''].strip() + '' Answer:''}}{%- elif message[''role''] == ''assistant'' %}{{'' '' + message[''content''].strip()}}{%- endif %}{% endfor %}' 22 | train_global_batch_size: 32 23 | train_micro_batch_size: 1 24 | max_total_sequence_length: 1024 25 | precision: bfloat16 26 | fsdp_offload_enabled: false 27 | activation_checkpointing_enabled: false 28 | dtensor_cfg: 29 | enabled: true 30 | cpu_offload: false 31 | sequence_parallel: false 32 | activation_checkpointing: false 33 | tensor_parallel_size: 1 34 | custom_parallel_plan: null 35 | dynamic_batching: 36 | enabled: False 37 | make_sequence_length_divisible_by: 1 38 | max_grad_norm: 1 39 | optimizer: 40 | name: torch.optim.AdamW 41 | kwargs: 42 | lr: 5e-06 43 | weight_decay: 0.1 44 | betas: 45 | - 0.9 46 | - 0.98 47 | eps: 1e-05 48 | foreach: false 49 | fused: false 50 | data: 51 | max_input_seq_length: 1024 52 | dataset_name: squad 53 | add_bos: true 54 | add_eos: true 55 | add_generation_prompt: false 56 | logger: 57 | log_dir: logs/sft-llama3.1-8b-instruct-1n8g-fsdp2tp1-long 58 | wandb_enabled: true 59 | tensorboard_enabled: true 60 | monitor_gpus: true 61 | wandb: 62 | project: nemo-rl 63 | name: sft-llama3.1-8b-instruct-1n8g-fsdp2tp1-long 64 | tensorboard: 65 | log_dir: tb_logs-sft-dev-squad 66 | gpu_monitoring: 67 | collection_interval: 10 68 | flush_interval: 10 69 | cluster: 70 | gpus_per_node: 8 71 | num_nodes: 1 72 | -------------------------------------------------------------------------------- /examples/configs/recipes/llm/sft-llama3.1-8b-instruct-1n8g-fsdp2tp2sp.v2.yaml: -------------------------------------------------------------------------------- 1 | sft: 2 | max_num_epochs: 1 3 | max_num_steps: 350 4 | val_period: 10 5 | val_batches: 8 6 | val_global_batch_size: 32 7 | val_micro_batch_size: 1 8 | val_at_start: true 9 | seed: 42 10 | checkpointing: 11 | enabled: true 12 | checkpoint_dir: results/sft-llama3.1-8b-instruct-1n8g-fsdp2tp2sp 13 | metric_name: val_loss 14 | higher_is_better: false 15 | keep_top_k: 3 16 | save_period: 10 17 | policy: 18 | model_name: meta-llama/Llama-3.1-8B-Instruct 19 | tokenizer: 20 | name: meta-llama/Llama-3.1-8B-Instruct 21 | chat_template: '{% for message in messages %}{%- if message[''role''] == ''system'' %}{{''Context: '' + message[''content''].strip()}}{%- elif message[''role''] == ''user'' %}{{'' Question: '' + message[''content''].strip() + '' Answer:''}}{%- elif message[''role''] == ''assistant'' %}{{'' '' + message[''content''].strip()}}{%- endif %}{% endfor %}' 22 | train_global_batch_size: 32 23 | train_micro_batch_size: 1 24 | max_total_sequence_length: 1024 25 | precision: bfloat16 26 | fsdp_offload_enabled: false 27 | activation_checkpointing_enabled: false 28 | dtensor_cfg: 29 | enabled: true 30 | cpu_offload: false 31 | sequence_parallel: true 32 | activation_checkpointing: false 33 | tensor_parallel_size: 2 34 | custom_parallel_plan: null 35 | dynamic_batching: 36 | enabled: False 37 | make_sequence_length_divisible_by: 2 38 | max_grad_norm: 1 39 | optimizer: 40 | name: torch.optim.AdamW 41 | kwargs: 42 | lr: 5e-06 43 | weight_decay: 0.1 44 | betas: 45 | - 0.9 46 | - 0.98 47 | eps: 1e-05 48 | foreach: false 49 | fused: false 50 | data: 51 | max_input_seq_length: 1024 52 | dataset_name: squad 53 | add_bos: true 54 | add_eos: true 55 | add_generation_prompt: false 56 | logger: 57 | log_dir: logs/sft-llama3.1-8b-instruct-1n8g-fsdp2tp2sp 58 | wandb_enabled: true 59 | tensorboard_enabled: true 60 | monitor_gpus: true 61 | wandb: 62 | project: nemo-rl 63 | name: sft-llama3.1-8b-instruct-1n8g-fsdp2tp2sp 64 | tensorboard: 65 | log_dir: tb_logs-sft-dev-squad 66 | gpu_monitoring: 67 | collection_interval: 10 68 | flush_interval: 10 69 | cluster: 70 | gpus_per_node: 8 71 | num_nodes: 1 72 | -------------------------------------------------------------------------------- /examples/configs/recipes/llm/sft-llama3.2-1b-1n8g-fsdp2tp1.v2.yaml: -------------------------------------------------------------------------------- 1 | sft: 2 | max_num_epochs: 1 3 | max_num_steps: 500 4 | val_period: 10 5 | val_batches: 8 6 | val_global_batch_size: 32 7 | val_micro_batch_size: 1 8 | val_at_start: true 9 | seed: 42 10 | checkpointing: 11 | enabled: true 12 | checkpoint_dir: results/sft-llama3.2-1b-1n8g-fsdp2tp1 13 | metric_name: val_loss 14 | higher_is_better: false 15 | keep_top_k: 3 16 | save_period: 10 17 | policy: 18 | model_name: meta-llama/Llama-3.2-1B 19 | tokenizer: 20 | name: meta-llama/Llama-3.2-1B 21 | chat_template: '{% for message in messages %}{%- if message[''role''] == ''system'' %}{{''Context: '' + message[''content''].strip()}}{%- elif message[''role''] == ''user'' %}{{'' Question: '' + message[''content''].strip() + '' Answer:''}}{%- elif message[''role''] == ''assistant'' %}{{'' '' + message[''content''].strip()}}{%- endif %}{% endfor %}' 22 | train_global_batch_size: 32 23 | train_micro_batch_size: 1 24 | max_total_sequence_length: 1024 25 | precision: bfloat16 26 | fsdp_offload_enabled: false 27 | activation_checkpointing_enabled: false 28 | dtensor_cfg: 29 | enabled: true 30 | cpu_offload: false 31 | sequence_parallel: false 32 | activation_checkpointing: false 33 | tensor_parallel_size: 1 34 | custom_parallel_plan: null 35 | dynamic_batching: 36 | enabled: False 37 | make_sequence_length_divisible_by: 1 38 | max_grad_norm: 1 39 | optimizer: 40 | name: torch.optim.AdamW 41 | kwargs: 42 | lr: 5e-06 43 | weight_decay: 0.1 44 | betas: 45 | - 0.9 46 | - 0.98 47 | eps: 1e-05 48 | foreach: false 49 | fused: false 50 | data: 51 | max_input_seq_length: 1024 52 | dataset_name: squad 53 | add_bos: true 54 | add_eos: true 55 | add_generation_prompt: false 56 | logger: 57 | log_dir: logs/sft-llama3.2-1b-1n8g-fsdp2tp1 58 | wandb_enabled: true 59 | tensorboard_enabled: true 60 | monitor_gpus: true 61 | wandb: 62 | project: nemo-rl 63 | name: sft-llama3.2-1b-1n8g-fsdp2tp1 64 | tensorboard: 65 | log_dir: tb_logs-sft-dev-squad 66 | gpu_monitoring: 67 | collection_interval: 10 68 | flush_interval: 10 69 | cluster: 70 | gpus_per_node: 8 71 | num_nodes: 1 72 | -------------------------------------------------------------------------------- /examples/configs/recipes/llm/sft-qwen2.5-32b-4n8g-fsdp2tp8sp-actckpt.v2.yaml: -------------------------------------------------------------------------------- 1 | sft: 2 | max_num_epochs: 1 3 | max_num_steps: 20 4 | val_period: 10 5 | val_batches: 8 6 | val_global_batch_size: 32 7 | val_micro_batch_size: 1 8 | val_at_start: true 9 | seed: 42 10 | checkpointing: 11 | enabled: true 12 | checkpoint_dir: results/sft-qwen2.5-32b-4n8g-fsdp2tp8sp-actckpt 13 | metric_name: val_loss 14 | higher_is_better: false 15 | keep_top_k: 3 16 | save_period: 10 17 | policy: 18 | model_name: Qwen/Qwen2.5-32B 19 | tokenizer: 20 | name: Qwen/Qwen2.5-32B 21 | chat_template: '{% for message in messages %}{%- if message[''role''] == ''system'' %}{{''Context: '' + message[''content''].strip()}}{%- elif message[''role''] == ''user'' %}{{'' Question: '' + message[''content''].strip() + '' Answer:''}}{%- elif message[''role''] == ''assistant'' %}{{'' '' + message[''content''].strip()}}{%- endif %}{% endfor %}' 22 | train_global_batch_size: 32 23 | train_micro_batch_size: 1 24 | max_total_sequence_length: 16000 25 | precision: bfloat16 26 | fsdp_offload_enabled: false 27 | activation_checkpointing_enabled: false 28 | dtensor_cfg: 29 | enabled: true 30 | cpu_offload: false 31 | sequence_parallel: true 32 | activation_checkpointing: true 33 | tensor_parallel_size: 8 34 | custom_parallel_plan: null 35 | dynamic_batching: 36 | enabled: False 37 | make_sequence_length_divisible_by: 8 38 | max_grad_norm: 1 39 | optimizer: 40 | name: torch.optim.AdamW 41 | kwargs: 42 | lr: 5e-06 43 | weight_decay: 0.1 44 | betas: 45 | - 0.9 46 | - 0.98 47 | eps: 1e-05 48 | foreach: false 49 | fused: false 50 | data: 51 | max_input_seq_length: 16000 52 | dataset_name: squad 53 | add_bos: true 54 | add_eos: true 55 | add_generation_prompt: false 56 | logger: 57 | log_dir: logs/sft-qwen2.5-32b-4n8g-fsdp2tp8sp-actckpt 58 | wandb_enabled: true 59 | tensorboard_enabled: true 60 | monitor_gpus: true 61 | wandb: 62 | project: nemo-rl 63 | name: sft-qwen2.5-32b-4n8g-fsdp2tp8sp-actckpt 64 | tensorboard: 65 | log_dir: tb_logs-sft-dev-squad 66 | gpu_monitoring: 67 | collection_interval: 10 68 | flush_interval: 10 69 | cluster: 70 | gpus_per_node: 8 71 | num_nodes: 4 72 | -------------------------------------------------------------------------------- /examples/configs/sft.yaml: -------------------------------------------------------------------------------- 1 | # SFT Algorithm Configuration 2 | sft: 3 | ## total number of steps to train will equal 4 | ## min((max_num_epochs * len(train_dataloader)), max_num_steps) 5 | max_num_epochs: 1 6 | max_num_steps: 60 7 | 8 | val_period: 10 9 | val_batches: 8 10 | val_global_batch_size: 32 11 | val_micro_batch_size: 1 12 | val_at_start: true 13 | seed: 42 14 | 15 | checkpointing: 16 | enabled: true 17 | checkpoint_dir: "results/sft" 18 | metric_name: "val_loss" 19 | higher_is_better: false 20 | keep_top_k: 3 21 | save_period: 10 22 | 23 | policy: 24 | model_name: "meta-llama/Llama-3.2-1B" 25 | tokenizer: 26 | name: ${policy.model_name} ## specify if you'd like to use a tokenizer different from the model's default 27 | chat_template: "{% for message in messages %}{%- if message['role'] == 'system' %}{{'Context: ' + message['content'].strip()}}{%- elif message['role'] == 'user' %}{{' Question: ' + message['content'].strip() + ' Answer:'}}{%- elif message['role'] == 'assistant' %}{{' ' + message['content'].strip()}}{%- endif %}{% endfor %}" 28 | train_global_batch_size: 32 29 | train_micro_batch_size: 1 30 | max_total_sequence_length: 1024 31 | precision: "bfloat16" 32 | fsdp_offload_enabled: false 33 | activation_checkpointing_enabled: false 34 | 35 | dtensor_cfg: 36 | enabled: true 37 | cpu_offload: False 38 | sequence_parallel: false 39 | activation_checkpointing: false 40 | tensor_parallel_size: 1 41 | custom_parallel_plan: null 42 | 43 | dynamic_batching: 44 | enabled: false 45 | 46 | # makes the training sequence length divisible by the tensor parallel size 47 | # this is useful for sequence parallel training 48 | make_sequence_length_divisible_by: ${policy.dtensor_cfg.tensor_parallel_size} 49 | max_grad_norm: 1.0 50 | 51 | optimizer: 52 | name: "torch.optim.AdamW" 53 | kwargs: 54 | lr: 5.0e-6 55 | weight_decay: 0.1 56 | betas: [0.9, 0.98] 57 | eps: 1e-5 58 | # when using Dtensor, we need to set foreach 59 | # and fused to False 60 | foreach: False 61 | fused: False 62 | 63 | data: 64 | max_input_seq_length: ${policy.max_total_sequence_length} 65 | dataset_name: "squad" 66 | add_bos: true 67 | add_eos: true 68 | add_generation_prompt: false 69 | 70 | logger: 71 | log_dir: "logs" # Base directory for all logs 72 | wandb_enabled: true # Make sure you do a ``wandb login [Your API key]'' before running 73 | tensorboard_enabled: true 74 | monitor_gpus: false # If true, will monitor GPU usage and log to wandb and/or tensorboard 75 | wandb: 76 | project: "sft-dev" 77 | name: "sft-dev-${data.dataset_name}" 78 | tensorboard: 79 | log_dir: "tb_logs-sft-dev-${data.dataset_name}" 80 | gpu_monitoring: 81 | collection_interval: 10 # How often to collect GPU usage metrics (in seconds) 82 | flush_interval: 10 # How often to flush GPU usage metrics to the loggers (in seconds) 83 | 84 | cluster: 85 | gpus_per_node: 1 86 | num_nodes: 1 87 | -------------------------------------------------------------------------------- /examples/configs/sft_openmathinstruct2.yaml: -------------------------------------------------------------------------------- 1 | # SFT Algorithm Configuration 2 | sft: 3 | max_num_epochs: 1 4 | max_num_steps: 1000000 5 | val_period: 500 6 | val_batches: 4 7 | val_global_batch_size: 128 8 | val_micro_batch_size: 2 9 | val_at_start: true 10 | seed: 42 11 | 12 | checkpointing: 13 | enabled: true 14 | checkpoint_dir: "results/sft_openmathinstruct2" 15 | metric_name: "val_loss" 16 | higher_is_better: false 17 | keep_top_k: 100 18 | save_period: 500 19 | 20 | policy: 21 | model_name: "meta-llama/Llama-3.1-8B" 22 | tokenizer: 23 | name: meta-llama/Llama-3.1-8B-Instruct ## specify if you'd like to use a tokenizer different from the model's default 24 | train_global_batch_size: 512 25 | train_micro_batch_size: 2 26 | max_total_sequence_length: 4096 27 | precision: "bfloat16" 28 | fsdp_offload_enabled: false 29 | activation_checkpointing_enabled: false 30 | 31 | dtensor_cfg: 32 | enabled: true 33 | cpu_offload: False 34 | sequence_parallel: false 35 | activation_checkpointing: false 36 | tensor_parallel_size: 4 37 | custom_parallel_plan: null 38 | 39 | # makes the training sequence length divisible by the tensor parallel size 40 | # this is useful for sequence parallel training 41 | make_sequence_length_divisible_by: ${policy.dtensor_cfg.tensor_parallel_size} 42 | max_grad_norm: null 43 | 44 | optimizer: 45 | name: "torch.optim.AdamW" 46 | kwargs: 47 | lr: 2e-5 48 | weight_decay: 0.01 49 | betas: [0.9, 0.98] 50 | eps: 1e-8 51 | # when using Dtensor, we need to set foreach 52 | # and fused to False 53 | foreach: False 54 | fused: False 55 | 56 | data: 57 | max_input_seq_length: ${policy.max_total_sequence_length} 58 | dataset_name: "openmathinstruct2" 59 | prompt_file: examples/prompts/math.txt 60 | split: "train_1M" 61 | add_bos: true 62 | add_eos: true 63 | add_generation_prompt: true 64 | output_key: 'generated_solution' 65 | 66 | logger: 67 | log_dir: "logs" # Base directory for all logs 68 | wandb_enabled: true # Make sure you do a ``wandb login [Your API key]'' before running 69 | tensorboard_enabled: true 70 | monitor_gpus: false # If true, will monitor GPU usage and log to wandb and/or tensorboard 71 | wandb: 72 | project: "sft-dev" 73 | name: "openmathinstruct-nemorl-1M_train" 74 | tensorboard: 75 | log_dir: "tb_logs-openmathinstruct-nemorl-1M_train" 76 | gpu_monitoring: 77 | collection_interval: 10 # How often to collect GPU usage metrics (in seconds) 78 | flush_interval: 10 # How often to flush GPU usage metrics to the loggers (in seconds) 79 | 80 | cluster: 81 | gpus_per_node: 8 82 | num_nodes: 1 83 | -------------------------------------------------------------------------------- /examples/convert_dcp_to_hf.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import argparse 16 | 17 | import yaml 18 | 19 | from nemo_rl.utils.native_checkpoint import convert_dcp_to_hf 20 | 21 | 22 | def parse_args(): 23 | """Parse command line arguments.""" 24 | parser = argparse.ArgumentParser( 25 | description="Convert Torch DCP checkpoint to HF checkpoint" 26 | ) 27 | parser.add_argument( 28 | "--config", 29 | type=str, 30 | default=None, 31 | help="Path to config.yaml file in the checkpoint directory", 32 | ) 33 | parser.add_argument( 34 | "--dcp-ckpt-path", type=str, default=None, help="Path to DCP checkpoint" 35 | ) 36 | parser.add_argument( 37 | "--hf-ckpt-path", type=str, default=None, help="Path to save HF checkpoint" 38 | ) 39 | # Parse known args for the script 40 | args = parser.parse_args() 41 | 42 | return args 43 | 44 | 45 | def main(): 46 | """Main entry point.""" 47 | args = parse_args() 48 | 49 | with open(args.config, "r") as f: 50 | config = yaml.safe_load(f) 51 | 52 | model_name_or_path = config["policy"]["model_name"] 53 | # TODO: After the following PR gets merged: 54 | # https://github.com/NVIDIA/NeMo-RL/pull/148/files 55 | # tokenizer should be copied from policy/tokenizer/* instead of relying on the model name 56 | # We can expose a arg at the top level --tokenizer_path to plumb that through. 57 | # This is more stable than relying on the current NeMo-RL get_tokenizer() which can 58 | # change release to release. 59 | tokenizer_name_or_path = config["policy"]["model_name"] 60 | 61 | hf_ckpt = convert_dcp_to_hf( 62 | dcp_ckpt_path=args.dcp_ckpt_path, 63 | hf_ckpt_path=args.hf_ckpt_path, 64 | model_name_or_path=model_name_or_path, 65 | tokenizer_name_or_path=tokenizer_name_or_path, 66 | ) 67 | print(f"Saved HF checkpoint to: {hf_ckpt}") 68 | 69 | 70 | if __name__ == "__main__": 71 | main() 72 | -------------------------------------------------------------------------------- /examples/custom_parallel.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from torch.distributed.tensor.parallel import ColwiseParallel, RowwiseParallel 16 | from torch.distributed.tensor.placement_types import Replicate, Shard 17 | 18 | custom_parallel_plan = { 19 | "model.embed_tokens": RowwiseParallel(input_layouts=Replicate()), 20 | "model.layers.*.self_attn.q_proj": ColwiseParallel(), 21 | "model.layers.*.self_attn.k_proj": ColwiseParallel(), 22 | "model.layers.*.self_attn.v_proj": ColwiseParallel(), 23 | "model.layers.*.self_attn.o_proj": RowwiseParallel(), 24 | "model.layers.*.mlp.up_proj": ColwiseParallel(), 25 | "model.layers.*.mlp.gate_proj": ColwiseParallel(), 26 | "model.layers.*.mlp.down_proj": RowwiseParallel(), 27 | "lm_head": ColwiseParallel(output_layouts=Shard(-1), use_local_output=False), 28 | } 29 | -------------------------------------------------------------------------------- /examples/prompts/cot.txt: -------------------------------------------------------------------------------- 1 | Think step-by-step to solve the following problem. Output your answer inside of \\boxed{{}} tags.: 2 | {} 3 | 4 | Let's think step-by-step -------------------------------------------------------------------------------- /examples/prompts/math.txt: -------------------------------------------------------------------------------- 1 | Solve the following math problem. Make sure to put the answer (and only answer) inside \\boxed{{}}. 2 | 3 | {} 4 | 5 | -------------------------------------------------------------------------------- /mypy.ini: -------------------------------------------------------------------------------- 1 | [mypy] 2 | disable_error_code = no-redef 3 | allow_redefinition = True 4 | check_untyped_defs = True 5 | python_version = 3.12 6 | 7 | [mypy-pynvml.*] 8 | ignore_missing_imports = True 9 | 10 | [mypy-hydra._internal.*] 11 | ignore_missing_imports = True 12 | 13 | [mypy-hydra.core.override_parser.*] 14 | ignore_missing_imports = True 15 | 16 | [mypy-datasets.*] 17 | ignore_missing_imports = True 18 | 19 | [mypy-transformers.*] 20 | ignore_missing_imports = True 21 | 22 | [mypy-vllm.*] 23 | ignore_missing_imports = True 24 | 25 | [mypy-math_verify.*] 26 | ignore_missing_imports = True 27 | 28 | [mypy-torchdata.*] 29 | ignore_missing_imports = True 30 | -------------------------------------------------------------------------------- /nemo_rl/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import os 15 | import sys 16 | from pathlib import Path 17 | 18 | """ 19 | This is a work around to ensure whenever NeMo RL is imported, that we 20 | add Megatron-LM to the python path. This is because the only sub-package 21 | that's officially installed is megatron.core. So we add the whole repo into 22 | the path so we can access megatron.{training,legacy,inference,...} 23 | 24 | Since users may pip install NeMo RL, this is a convenience so they do not 25 | have to manually run with PYTHONPATH=3rdparty/Megatron-LM-workspace/Megatron-LM. 26 | """ 27 | megatron_path = ( 28 | Path(__file__).parent.parent / "3rdparty" / "Megatron-LM-workspace" / "Megatron-LM" 29 | ) 30 | if megatron_path.exists() and str(megatron_path) not in sys.path: 31 | sys.path.append(str(megatron_path)) 32 | 33 | from nemo_rl.package_info import ( 34 | __contact_emails__, 35 | __contact_names__, 36 | __description__, 37 | __download_url__, 38 | __homepage__, 39 | __keywords__, 40 | __license__, 41 | __package_name__, 42 | __repository_url__, 43 | __shortversion__, 44 | __version__, 45 | ) 46 | 47 | os.environ["RAY_USAGE_STATS_ENABLED"] = "0" 48 | -------------------------------------------------------------------------------- /nemo_rl/algorithms/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/NeMo-RL/99ba9a130e72cbf87a3e20acf43bc01a47adc8ee/nemo_rl/algorithms/__init__.py -------------------------------------------------------------------------------- /nemo_rl/converters/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/NeMo-RL/99ba9a130e72cbf87a3e20acf43bc01a47adc8ee/nemo_rl/converters/__init__.py -------------------------------------------------------------------------------- /nemo_rl/converters/huggingface/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/NeMo-RL/99ba9a130e72cbf87a3e20acf43bc01a47adc8ee/nemo_rl/converters/huggingface/__init__.py -------------------------------------------------------------------------------- /nemo_rl/converters/huggingface/vllm_export.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /nemo_rl/converters/megatron/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/NeMo-RL/99ba9a130e72cbf87a3e20acf43bc01a47adc8ee/nemo_rl/converters/megatron/__init__.py -------------------------------------------------------------------------------- /nemo_rl/converters/megatron/vllm_export.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /nemo_rl/data/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from typing import Optional, TypedDict 16 | 17 | 18 | class DataConfig(TypedDict): 19 | max_input_seq_length: int 20 | prompt_file: str 21 | system_prompt_file: Optional[str] 22 | dataset_name: str 23 | val_dataset_name: Optional[str] 24 | add_bos: Optional[bool] 25 | add_eos: Optional[bool] 26 | input_key: Optional[str] 27 | output_key: Optional[str] 28 | 29 | 30 | class MathDataConfig(DataConfig): 31 | problem_key: str 32 | solution_key: str 33 | -------------------------------------------------------------------------------- /nemo_rl/data/hf_datasets/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from nemo_rl.data.hf_datasets.chat_templates import COMMON_CHAT_TEMPLATES 16 | from nemo_rl.data.hf_datasets.dpo import DPODataset 17 | from nemo_rl.data.hf_datasets.helpsteer3 import HelpSteer3Dataset 18 | from nemo_rl.data.hf_datasets.oasst import OasstDataset 19 | from nemo_rl.data.hf_datasets.openmathinstruct2 import OpenMathInstruct2Dataset 20 | from nemo_rl.data.hf_datasets.prompt_response_dataset import ( 21 | PromptResponseDataset, 22 | ) 23 | from nemo_rl.data.hf_datasets.squad import SquadDataset 24 | 25 | __all__ = [ 26 | "DPODataset", 27 | "HelpSteer3Dataset", 28 | "OasstDataset", 29 | "OpenMathInstruct2Dataset", 30 | "PromptResponseDataset", 31 | "SquadDataset", 32 | "COMMON_CHAT_TEMPLATES", 33 | ] 34 | -------------------------------------------------------------------------------- /nemo_rl/data/hf_datasets/chat_templates.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | ## a reference to frequently used chat templates for convenience 16 | class COMMON_CHAT_TEMPLATES: 17 | ### simple template which prepends a role header to the content 18 | simple_role_header = "{% for message in messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}" 19 | 20 | ### passthrough template which just concatenates the content of the messages with no special tokens 21 | passthrough_prompt_response = ( 22 | "{% for message in messages %}{{ message['content'] }}{% endfor %}" 23 | ) 24 | -------------------------------------------------------------------------------- /nemo_rl/data/hf_datasets/deepscaler.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | from typing import Any 17 | 18 | from datasets import Dataset, load_dataset 19 | 20 | from nemo_rl.data.interfaces import TaskDataSpec 21 | 22 | 23 | def format_math(data: dict[str, str | float | int]) -> dict[str, list[Any] | str]: 24 | return { 25 | "messages": [ 26 | { 27 | "role": "user", 28 | "content": data["problem"], 29 | }, 30 | { 31 | "role": "assistant", 32 | "content": data["answer"], 33 | }, 34 | ], 35 | # For v0.1 release, nemo rl datasets require a task_name key such that user can map a task processor per unique task. 36 | "task_name": "math", 37 | } 38 | 39 | 40 | def prepare_deepscaler_dataset(seed: int = 42) -> dict[str, Dataset | None]: 41 | """Load and split the DeepScaler dataset into train and test sets.""" 42 | # Load the original dataset for training 43 | train_ds = load_dataset("agentica-org/DeepScaleR-Preview-Dataset", split="train") 44 | 45 | # Load hendrydong/aime24 dataset for validation 46 | val_ds = load_dataset("HuggingFaceH4/aime_2024", split="train") 47 | 48 | # Shuffle the training dataset with the specified seed 49 | train_ds = train_ds.shuffle(seed=seed) 50 | 51 | # Format the examples, removing original columns 52 | train_formatted = train_ds.map(format_math, remove_columns=train_ds.column_names) 53 | val_formatted = val_ds.map(format_math, remove_columns=val_ds.column_names) 54 | 55 | # Compute accuracy 16 times per sample (matching the DeepScaleR evaluation setting) 56 | val_repeated = [] 57 | for _ in range(16): 58 | val_repeated.extend(val_formatted) 59 | val_formatted = val_formatted.from_list(val_repeated) 60 | 61 | return { 62 | "train": train_formatted, 63 | "validation": val_formatted, 64 | } 65 | 66 | 67 | class DeepScalerDataset: 68 | def __init__(self, seed: int = 42) -> None: 69 | """Initialize the DeepScaler dataset with train/test split. 70 | 71 | Args: 72 | seed: Random seed for reproducible splitting 73 | """ 74 | self.formatted_ds = prepare_deepscaler_dataset(seed=seed) 75 | 76 | self.task_spec = TaskDataSpec( 77 | task_name="DeepScaler", 78 | ) 79 | -------------------------------------------------------------------------------- /nemo_rl/data/hf_datasets/dpo.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | from datasets import load_dataset 15 | 16 | from nemo_rl.data.interfaces import TaskDataSpec 17 | 18 | 19 | class DPODataset: 20 | """Dataset class for Direct Preference Optimization (DPO) training. 21 | 22 | This class handles loading of preference data for DPO training. 23 | The input JSON files should contain examples with the following structure: 24 | { 25 | "prompt": str, # The input prompt/context 26 | "chosen_response": str, # The preferred/winning response 27 | "rejected_response": str # The non-preferred/losing response 28 | } 29 | 30 | Args: 31 | train_data_path (str): Path to the JSON file containing training data 32 | val_data_path (str): Path to the JSON file containing validation data 33 | 34 | """ 35 | 36 | def __init__(self, train_data_path: str, val_data_path: str): 37 | self.formatted_ds = { 38 | "train": load_dataset("json", data_files=train_data_path, split="train"), 39 | "validation": load_dataset("json", data_files=val_data_path, split="train"), 40 | } 41 | 42 | self.task_spec = TaskDataSpec( 43 | task_name="DPO", 44 | ) 45 | -------------------------------------------------------------------------------- /nemo_rl/data/hf_datasets/helpsteer3.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | from typing import Any 15 | 16 | from absl import logging 17 | from datasets import load_dataset 18 | 19 | from nemo_rl.data.interfaces import TaskDataSpec 20 | 21 | 22 | def format_helpsteer3(data: dict[str, Any]) -> dict[str, str | dict[str, str]]: 23 | response_1 = data["response1"] 24 | response_2 = data["response2"] 25 | overall_preference = data["overall_preference"] 26 | 27 | if overall_preference < 0: 28 | chosen = response_1 29 | rejected = response_2 30 | elif overall_preference == 0: 31 | logging.log_every_n( 32 | logging.WARNING, 33 | "Preference is 0 for some examples! Setting chosen and rejected to response 1 since we don't know which response is better", 34 | 1000, 35 | ) 36 | chosen = response_1 37 | rejected = response_1 38 | else: 39 | chosen = response_2 40 | rejected = response_1 41 | 42 | return { 43 | "prompt": data["context"], 44 | "chosen_response": chosen, 45 | "rejected_response": rejected, 46 | } 47 | 48 | 49 | class HelpSteer3Dataset: 50 | """HelpSteer3 preference dataset for DPO training.""" 51 | 52 | def __init__(self) -> None: 53 | ds = load_dataset("nvidia/HelpSteer3", "preference") 54 | self.formatted_ds = ds.map(format_helpsteer3) 55 | 56 | self.task_spec = TaskDataSpec( 57 | task_name="HelpSteer3", 58 | ) 59 | -------------------------------------------------------------------------------- /nemo_rl/data/hf_datasets/prompt_response_dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from typing import Any 16 | 17 | from datasets import load_dataset 18 | 19 | from nemo_rl.data.interfaces import TaskDataSpec 20 | 21 | 22 | class PromptResponseDataset: 23 | def __init__( 24 | self, 25 | train_ds_path: str, 26 | val_ds_path: str, 27 | input_key: str = "input", 28 | output_key: str = "output", 29 | ): 30 | train_original_dataset = load_dataset("json", data_files=train_ds_path)["train"] 31 | val_original_dataset = load_dataset("json", data_files=val_ds_path)["train"] 32 | 33 | self.input_key = input_key 34 | self.output_key = output_key 35 | 36 | formatted_train_dataset = train_original_dataset.map(self.add_messages_key) 37 | formatted_val_dataset = val_original_dataset.map(self.add_messages_key) 38 | 39 | self.formatted_ds = { 40 | "train": formatted_train_dataset, 41 | "validation": formatted_val_dataset, 42 | } 43 | 44 | self.task_spec = TaskDataSpec( 45 | "json_dataset", 46 | ) 47 | 48 | def add_messages_key( 49 | self, example: dict[str, Any] 50 | ) -> dict[str, list[dict[str, Any]]]: 51 | return { 52 | "messages": [ 53 | {"role": "user", "content": example[self.input_key]}, 54 | {"role": "assistant", "content": example[self.output_key]}, 55 | ] 56 | } 57 | -------------------------------------------------------------------------------- /nemo_rl/data/hf_datasets/squad.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | from typing import Any 17 | 18 | from datasets import load_dataset 19 | 20 | from nemo_rl.data.interfaces import TaskDataSpec 21 | 22 | 23 | def format_squad(data: dict[str, Any]) -> dict[str, list[dict[str, str]]]: 24 | return { 25 | "messages": [ 26 | { 27 | "role": "system", 28 | "content": data["context"], 29 | }, 30 | { 31 | "role": "user", 32 | "content": data["question"], 33 | }, 34 | { 35 | "role": "assistant", 36 | "content": data["answers"]["text"][0], 37 | }, 38 | ] 39 | } 40 | 41 | 42 | class SquadDataset: 43 | def __init__(self) -> None: 44 | original_ds = load_dataset("rajpurkar/squad") 45 | self.formatted_ds = original_ds.map(format_squad) 46 | self.task_spec = TaskDataSpec( 47 | task_name="SQuAD", 48 | ) 49 | -------------------------------------------------------------------------------- /nemo_rl/distributed/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/NeMo-RL/99ba9a130e72cbf87a3e20acf43bc01a47adc8ee/nemo_rl/distributed/__init__.py -------------------------------------------------------------------------------- /nemo_rl/distributed/ray_actor_environment_registry.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from nemo_rl.distributed.virtual_cluster import PY_EXECUTABLES 16 | 17 | ACTOR_ENVIRONMENT_REGISTRY: dict[str, str] = { 18 | "nemo_rl.models.generation.vllm.VllmGenerationWorker": PY_EXECUTABLES.VLLM, 19 | "nemo_rl.models.policy.dtensor_policy_worker.DTensorPolicyWorker": PY_EXECUTABLES.BASE, 20 | "nemo_rl.models.policy.fsdp1_policy_worker.FSDP1PolicyWorker": PY_EXECUTABLES.BASE, 21 | "nemo_rl.environments.math_environment.MathEnvironment": PY_EXECUTABLES.SYSTEM, 22 | "nemo_rl.environments.games.sliding_puzzle.SlidingPuzzleEnv": PY_EXECUTABLES.SYSTEM, 23 | } 24 | 25 | 26 | def get_actor_python_env(actor_class_fqn: str) -> str: 27 | if actor_class_fqn in ACTOR_ENVIRONMENT_REGISTRY: 28 | return ACTOR_ENVIRONMENT_REGISTRY[actor_class_fqn] 29 | else: 30 | raise ValueError( 31 | f"No actor environment registered for {actor_class_fqn}" 32 | f"You're attempting to create an actor ({actor_class_fqn})" 33 | "without specifying a python environment for it. Please either" 34 | "specify a python environment in the registry " 35 | "(nemo_rl.distributed.ray_actor_environment_registry.ACTOR_ENVIRONMENT_REGISTRY) " 36 | "or pass a py_executable to the RayWorkerBuilder. If you're unsure about which " 37 | "environment to use, a good default is PY_EXECUTABLES.SYSTEM for ray actors that " 38 | "don't have special dependencies. If you do have special dependencies (say, you're " 39 | "adding a new generation framework or training backend), you'll need to specify the " 40 | "appropriate environment. See uv.md for more details." 41 | ) 42 | -------------------------------------------------------------------------------- /nemo_rl/environments/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/NeMo-RL/99ba9a130e72cbf87a3e20acf43bc01a47adc8ee/nemo_rl/environments/__init__.py -------------------------------------------------------------------------------- /nemo_rl/environments/metrics.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import torch 15 | 16 | 17 | def calculate_pass_rate_per_prompt( 18 | prompts: torch.Tensor, is_correct: torch.Tensor 19 | ) -> float: 20 | """Function to compute fraction of prompts that have at least one correct answer (reward > 0). 21 | 22 | prompts: tensor (b, s) Tensor of prompts the model used. May be on any device 23 | is_correct: tensor (b,) bool-valued label. May be on any device 24 | 25 | Returns: 26 | pass rate: float 27 | """ 28 | unique_prompts = torch.unique(prompts, dim=0) 29 | 30 | correct_prompt_ct = 0 31 | for i in range(len(unique_prompts)): 32 | is_matching_prompt = (prompts == unique_prompts[i]).all(1) 33 | if torch.any(is_correct[is_matching_prompt] > 0): 34 | correct_prompt_ct += 1 35 | 36 | return correct_prompt_ct / len(unique_prompts) 37 | -------------------------------------------------------------------------------- /nemo_rl/environments/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | from typing import Any 15 | 16 | 17 | def chunk_list_to_workers(to_chunk: list[Any], num_workers: int) -> list[list[Any]]: 18 | """Chunk a list into a list of lists, where each sublist is assigned to a worker. Keeps ordering of elements. 19 | 20 | If the list is not divisible by the number of workers, the last worker may have fewer elements. 21 | If there are more workers than elements, the first len(list) workers will have a single element each, 22 | and the remaining workers will have empty lists. 23 | 24 | Args: 25 | list: The list to be chunked. 26 | num_workers: The number of workers to distribute the list to. 27 | 28 | Returns: 29 | A list of lists, where each sublist contains elements assigned to a worker. 30 | 31 | Examples: 32 | ```{doctest} 33 | >>> from nemo_rl.environments.utils import chunk_list_to_workers 34 | >>> chunk_list_to_workers([1, 2, 3, 4, 5], 3) 35 | [[1, 2], [3, 4], [5]] 36 | ``` 37 | """ 38 | if not to_chunk: 39 | return [[] for _ in range(num_workers)] 40 | 41 | # Handle case where we have more workers than elements 42 | if len(to_chunk) <= num_workers: 43 | result = [[item] for item in to_chunk] 44 | # Add empty lists for remaining workers 45 | result.extend([[] for _ in range(num_workers - len(to_chunk))]) 46 | return result 47 | 48 | # Calculate chunk size (ceiling division to ensure all elements are covered) 49 | chunk_size = (len(to_chunk) + num_workers - 1) // num_workers 50 | 51 | # Create chunks 52 | chunks = [] 53 | for i in range(0, len(to_chunk), chunk_size): 54 | chunks.append(to_chunk[i : i + chunk_size]) 55 | 56 | # If we somehow ended up with more chunks than workers (shouldn't happen with ceiling division) 57 | # merge the last chunks 58 | if len(chunks) > num_workers: 59 | chunks[num_workers - 1 :] = [sum(chunks[num_workers - 1 :], [])] 60 | 61 | return chunks 62 | -------------------------------------------------------------------------------- /nemo_rl/evals/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/NeMo-RL/99ba9a130e72cbf87a3e20acf43bc01a47adc8ee/nemo_rl/evals/__init__.py -------------------------------------------------------------------------------- /nemo_rl/experience/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/NeMo-RL/99ba9a130e72cbf87a3e20acf43bc01a47adc8ee/nemo_rl/experience/__init__.py -------------------------------------------------------------------------------- /nemo_rl/metrics/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/NeMo-RL/99ba9a130e72cbf87a3e20acf43bc01a47adc8ee/nemo_rl/metrics/__init__.py -------------------------------------------------------------------------------- /nemo_rl/metrics/metrics_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /nemo_rl/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/NeMo-RL/99ba9a130e72cbf87a3e20acf43bc01a47adc8ee/nemo_rl/models/__init__.py -------------------------------------------------------------------------------- /nemo_rl/models/dtensor/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/NeMo-RL/99ba9a130e72cbf87a3e20acf43bc01a47adc8ee/nemo_rl/models/dtensor/__init__.py -------------------------------------------------------------------------------- /nemo_rl/models/generation/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | from typing import cast 15 | 16 | from transformers import PreTrainedTokenizerBase 17 | 18 | from nemo_rl.models.generation.interfaces import GenerationConfig 19 | from nemo_rl.models.generation.vllm import VllmConfig 20 | 21 | TokenizerType = PreTrainedTokenizerBase 22 | 23 | 24 | def configure_generation_config( 25 | config: GenerationConfig, tokenizer: TokenizerType, is_eval=False 26 | ) -> GenerationConfig: 27 | """Apply specific configurations to generation config.""" 28 | # tokenizer setting 29 | config["pad_token_id"] = tokenizer.pad_token_id 30 | if config["stop_token_ids"] is None: 31 | config["stop_token_ids"] = [tokenizer.eos_token_id] 32 | 33 | # vllm setting 34 | if config["backend"] == "vllm": 35 | config = cast(VllmConfig, config) 36 | # set load_format 37 | config["vllm_cfg"]["load_format"] = "auto" if is_eval else "dummy" 38 | 39 | # set skip_tokenizer_init 40 | if is_eval or config["stop_strings"] is not None: 41 | config["vllm_cfg"]["skip_tokenizer_init"] = False 42 | else: 43 | config["vllm_cfg"]["skip_tokenizer_init"] = True 44 | 45 | return config 46 | -------------------------------------------------------------------------------- /nemo_rl/models/generation/vllm_backend.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import torch 15 | 16 | try: 17 | import vllm # noqa: F401 18 | except ImportError: 19 | raise ImportError( 20 | "vLLM is not installed. Please check that the py_executable in the runtime_env of VllmGenerationWorker " 21 | "covers the vllm dependency. You may have to update nemo_rl/distributed/ray_actor_environment_registry.py. " 22 | "If you are working interactively, you can install by running `uv sync --extra vllm` anywhere in the repo." 23 | ) 24 | 25 | 26 | class VllmInternalWorkerExtension: 27 | def report_device_id(self) -> str: 28 | from nemo_rl.utils.nvml import get_device_uuid 29 | 30 | return get_device_uuid(self.device.index) 31 | 32 | def update_weights_from_ipc_handles(self, ipc_handles): 33 | """Update weights from IPC handles. 34 | 35 | Args: 36 | ipc_handles (dict): Dictionary mapping device UUIDs to parameter IPC handles. 37 | 38 | Returns: 39 | bool: True if weights were successfully updated. 40 | """ 41 | try: 42 | # Get handles for this device 43 | device_uuid = self.report_device_id() 44 | handles = ipc_handles[device_uuid] 45 | device_id = self.device.index 46 | weights = [] 47 | 48 | # Process each handle to get the tensor 49 | for name, handle in handles: 50 | func, args = handle 51 | list_args = list(args) 52 | # Update device ID to match the current device 53 | list_args[6] = device_id 54 | tensor = func(*list_args) 55 | weights.append((name, tensor)) 56 | 57 | # Load weights into the model 58 | self.model_runner.model.load_weights(weights=weights) 59 | torch.cuda.synchronize() 60 | return True 61 | except Exception as e: 62 | print( 63 | f"Error in VllmInternalWorkerExtension.update_weights_from_ipc_handles: {e}" 64 | ) 65 | return False 66 | -------------------------------------------------------------------------------- /nemo_rl/models/huggingface/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/NeMo-RL/99ba9a130e72cbf87a3e20acf43bc01a47adc8ee/nemo_rl/models/huggingface/__init__.py -------------------------------------------------------------------------------- /nemo_rl/models/huggingface/common.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from enum import Enum, auto 16 | 17 | from transformers import AutoConfig 18 | 19 | 20 | class ModelFlag(Enum): 21 | """Enum that defines special flags for model-specific behaviors. 22 | 23 | This enum provides a way to identify models that require special handling or 24 | configuration in different parts of the NeMo RL codebase. 25 | 26 | Flags: 27 | SKIP_DTENSOR_TIED_WEIGHTS_CHECK: Models that should skip the tied weights check 28 | for the DTensor Policy even without setting the 29 | NRL_SKIP_TIED_WEIGHT_CHECK flag. 30 | VLLM_LOAD_FORMAT_AUTO: Models that should use the "auto" load format when initializing 31 | VLLM. 32 | 33 | Each flag has a `matches` method that determines if the flag applies to a given model_name. 34 | """ 35 | 36 | SKIP_DTENSOR_TIED_WEIGHTS_CHECK = auto() 37 | VLLM_LOAD_FORMAT_AUTO = auto() 38 | 39 | def matches(self, model_name: str) -> bool: 40 | match self: 41 | case ModelFlag.SKIP_DTENSOR_TIED_WEIGHTS_CHECK: 42 | return is_gemma_model(model_name) 43 | case ModelFlag.VLLM_LOAD_FORMAT_AUTO: 44 | return is_gemma_model(model_name) 45 | case _: 46 | raise ValueError(f"Unknown ModelFlag: {self}") 47 | 48 | 49 | def is_gemma_model(model_name: str) -> bool: 50 | hf_config = AutoConfig.from_pretrained(model_name, trust_remote_code=True) 51 | return hasattr(hf_config, "model_type") and hf_config.model_type in [ 52 | "gemma2", 53 | "gemma3", 54 | "gemma3_text", 55 | ] 56 | -------------------------------------------------------------------------------- /nemo_rl/models/megatron/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/NeMo-RL/99ba9a130e72cbf87a3e20acf43bc01a47adc8ee/nemo_rl/models/megatron/__init__.py -------------------------------------------------------------------------------- /nemo_rl/models/megatron/common.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /nemo_rl/models/policy/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from typing import Any, NotRequired, Optional, TypedDict, Union 16 | 17 | from nemo_rl.models.generation.interfaces import GenerationConfig 18 | 19 | 20 | class DTensorConfig(TypedDict): 21 | enabled: bool 22 | cpu_offload: bool 23 | sequence_parallel: bool 24 | activation_checkpointing: bool 25 | tensor_parallel_size: int 26 | custom_parallel_plan: str 27 | 28 | 29 | class TokenizerConfig(TypedDict): 30 | name: str 31 | chat_template: str 32 | 33 | 34 | class PytorchOptimizerConfig(TypedDict): 35 | name: str 36 | kwargs: dict[str, Any] 37 | 38 | 39 | class SinglePytorchSchedulerConfig(TypedDict): 40 | name: str 41 | kwargs: dict[str, Any] 42 | 43 | 44 | SchedulerMilestones = dict[str, list[int]] 45 | 46 | 47 | class DynamicBatchingConfig(TypedDict): 48 | # dynamic_batching improves performance by ensuring logprob and training microbatches 49 | # have a sufficent number of tokens to maximize GPU utilization. Specifically, variable length 50 | # responses are sorted by sequence length and bucketed into microbatches with a total 51 | # amount of tokens is approximately close to 'train_mb_tokens' and 'logprob_mb_tokens' for the 52 | # training and logprob stages respectively. 53 | enabled: bool 54 | train_mb_tokens: int 55 | logprob_mb_tokens: int 56 | sequence_length_round: int 57 | 58 | 59 | class PolicyConfig(TypedDict): 60 | model_name: str 61 | tokenizer: TokenizerConfig 62 | train_global_batch_size: int 63 | train_micro_batch_size: int 64 | learning_rate: float 65 | logprob_batch_size: int 66 | generation: Optional[GenerationConfig] 67 | generation_batch_size: NotRequired[ 68 | int 69 | ] # used in static batched (framework) generation 70 | precision: str 71 | dtensor_cfg: DTensorConfig 72 | dynamic_batching: DynamicBatchingConfig 73 | make_sequence_length_divisible_by: int 74 | max_total_sequence_length: int 75 | max_grad_norm: Optional[Union[float, int]] 76 | fsdp_offload_enabled: bool 77 | activation_checkpointing_enabled: bool 78 | optimizer: NotRequired[PytorchOptimizerConfig] = None 79 | scheduler: NotRequired[list[SinglePytorchSchedulerConfig] | SchedulerMilestones] = ( 80 | None 81 | ) 82 | -------------------------------------------------------------------------------- /nemo_rl/package_info.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | MAJOR = 0 17 | MINOR = 3 18 | PATCH = 0 19 | PRE_RELEASE = "rc0" 20 | 21 | # Use the following formatting: (major, minor, patch, pre-release) 22 | VERSION = (MAJOR, MINOR, PATCH, PRE_RELEASE) 23 | 24 | __shortversion__ = ".".join(map(str, VERSION[:3])) 25 | __version__ = ".".join(map(str, VERSION[:3])) + "".join(VERSION[3:]) 26 | 27 | __package_name__ = "nemo_rl" 28 | __contact_names__ = "NVIDIA" 29 | __contact_emails__ = "nemo-tookit@nvidia.com" 30 | __homepage__ = "https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/stable/" 31 | __repository_url__ = "https://github.com/NVIDIA/NeMo-RL" 32 | __download_url__ = "https://github.com/NVIDIA/NeMo-RL/releases" 33 | __description__ = "NeMo-RL - a toolkit for model alignment" 34 | __license__ = "Apache2" 35 | __keywords__ = "deep learning, machine learning, gpu, NLP, NeMo, nvidia, pytorch, torch, language, reinforcement learning, RLHF, preference modeling, SteerLM, DPO" 36 | -------------------------------------------------------------------------------- /nemo_rl/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/NeMo-RL/99ba9a130e72cbf87a3e20acf43bc01a47adc8ee/nemo_rl/utils/__init__.py -------------------------------------------------------------------------------- /tests/README.md: -------------------------------------------------------------------------------- 1 | # Tests 2 | 3 | ## Launching Release Tests 4 | 5 | ```sh 6 | # Assuming in NeMo RL project root 7 | 8 | cd tools/ 9 | 10 | IS_RELEASE=1 CONTAINER=... ACCOUNT=... PARTITION=... ./launch ... 11 | 12 | # DRYRUN=1 to get a rough estimate of compute 13 | DRYRUN=1 IS_RELEASE=1 CONTAINER=... ACCOUNT=... PARTITION=... ./launch ... 14 | 15 | # DRYRUN=2 will create a codesnapshot with a fully hermetic example 16 | DRYRUN=2 IS_RELEASE=1 CONTAINER=... ACCOUNT=... PARTITION=... ./launch ... 17 | 18 | # Run all (Caution: this will use a lot of compute; consider listing out the jobs) 19 | IS_RELEASE=1 CONTAINER=... ACCOUNT=... PARTITION=... ./launch ../../recipes/**/*.sh 20 | ``` 21 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /tests/functional/dpo.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd) 4 | PROJECT_ROOT=$(realpath $SCRIPT_DIR/../..) 5 | # Mark the current repo as safe, since wandb fetches metadata about the repo 6 | git config --global --add safe.directory $PROJECT_ROOT 7 | 8 | set -eou pipefail 9 | 10 | EXP_NAME=$(basename $0 .sh) 11 | EXP_DIR=$SCRIPT_DIR/$EXP_NAME 12 | LOG_DIR=$EXP_DIR/logs 13 | JSON_METRICS=$EXP_DIR/metrics.json 14 | RUN_LOG=$EXP_DIR/run.log 15 | export PYTHONPATH=${PROJECT_ROOT}:${PYTHONPATH:-} 16 | 17 | rm -rf $EXP_DIR $LOG_DIR 18 | mkdir -p $EXP_DIR $LOG_DIR 19 | 20 | cd $PROJECT_ROOT 21 | uv run $PROJECT_ROOT/examples/run_dpo.py \ 22 | policy.model_name=Qwen/Qwen3-0.6B \ 23 | cluster.gpus_per_node=2 \ 24 | dpo.max_num_steps=3 \ 25 | dpo.val_batches=1 \ 26 | dpo.val_global_batch_size=8 \ 27 | policy.train_global_batch_size=8 \ 28 | logger.tensorboard_enabled=true \ 29 | logger.log_dir=$LOG_DIR \ 30 | logger.wandb_enabled=false \ 31 | checkpointing.enabled=false \ 32 | $@ \ 33 | 2>&1 | tee $RUN_LOG 34 | 35 | uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS 36 | 37 | # TODO: threshold set higher since test is flaky 38 | # https://github.com/NVIDIA/NeMo-RL/issues/370 39 | uv run tests/check_metrics.py $JSON_METRICS \ 40 | 'data["train/loss"]["3"] < 0.8' 41 | 42 | -------------------------------------------------------------------------------- /tests/functional/eval.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd) 4 | PROJECT_ROOT=$(realpath $SCRIPT_DIR/../..) 5 | # Mark the current repo as safe, since wandb fetches metadata about the repo 6 | git config --global --add safe.directory $PROJECT_ROOT 7 | 8 | set -eou pipefail 9 | 10 | EXP_NAME=$(basename $0 .sh) 11 | EXP_DIR=$SCRIPT_DIR/$EXP_NAME 12 | LOG_DIR=$EXP_DIR/logs 13 | JSON_METRICS=$EXP_DIR/metrics.json 14 | RUN_LOG=$EXP_DIR/run.log 15 | export PYTHONPATH=${PROJECT_ROOT}:${PYTHONPATH:-} 16 | 17 | rm -rf $EXP_DIR $LOG_DIR 18 | mkdir -p $EXP_DIR $LOG_DIR 19 | 20 | cd $PROJECT_ROOT 21 | uv run $PROJECT_ROOT/examples/run_eval.py \ 22 | cluster.gpus_per_node=2 \ 23 | $@ \ 24 | 2>&1 | tee $RUN_LOG 25 | 26 | cat $RUN_LOG | grep "score=" | sed 's/.*score=\([^ ]*\).*/{"score": \1}/' > $JSON_METRICS 27 | 28 | uv run tests/check_metrics.py $JSON_METRICS \ 29 | 'data["score"] == 0.1' \ 30 | -------------------------------------------------------------------------------- /tests/functional/grpo.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd) 4 | PROJECT_ROOT=$(realpath $SCRIPT_DIR/../..) 5 | # Mark the current repo as safe, since wandb fetches metadata about the repo 6 | git config --global --add safe.directory $PROJECT_ROOT 7 | 8 | set -eou pipefail 9 | 10 | EXP_NAME=$(basename $0 .sh) 11 | EXP_DIR=$SCRIPT_DIR/$EXP_NAME 12 | LOG_DIR=$EXP_DIR/logs 13 | JSON_METRICS=$EXP_DIR/metrics.json 14 | RUN_LOG=$EXP_DIR/run.log 15 | export PYTHONPATH=${PROJECT_ROOT}:${PYTHONPATH:-} 16 | 17 | rm -rf $EXP_DIR $LOG_DIR 18 | mkdir -p $EXP_DIR $LOG_DIR 19 | 20 | cd $PROJECT_ROOT 21 | uv run $PROJECT_ROOT/examples/run_grpo_math.py \ 22 | policy.model_name=Qwen/Qwen3-0.6B \ 23 | grpo.num_prompts_per_step=2 \ 24 | grpo.num_generations_per_prompt=4 \ 25 | policy.train_global_batch_size=4 \ 26 | policy.train_micro_batch_size=1 \ 27 | cluster.gpus_per_node=2 \ 28 | grpo.max_num_steps=2 \ 29 | logger.tensorboard_enabled=true \ 30 | logger.log_dir=$LOG_DIR \ 31 | logger.wandb_enabled=false \ 32 | checkpointing.enabled=false \ 33 | $@ \ 34 | 2>&1 | tee $RUN_LOG 35 | 36 | uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS 37 | 38 | uv run tests/check_metrics.py $JSON_METRICS \ 39 | 'max(data["train/token_mult_prob_error"]) < 1.05' \ 40 | 41 | -------------------------------------------------------------------------------- /tests/functional/grpo_multiturn.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd) 4 | PROJECT_ROOT=$(realpath $SCRIPT_DIR/../..) 5 | # Mark the current repo as safe, since wandb fetchs metadata about the repo 6 | git config --global --add safe.directory $PROJECT_ROOT 7 | 8 | set -eou pipefail 9 | 10 | EXP_NAME=$(basename $0 .sh) 11 | EXP_DIR=$SCRIPT_DIR/$EXP_NAME 12 | LOG_DIR=$EXP_DIR/logs 13 | JSON_METRICS=$EXP_DIR/metrics.json 14 | RUN_LOG=$EXP_DIR/run.log 15 | export PYTHONPATH=${PROJECT_ROOT}:${PYTHONPATH:-} 16 | 17 | rm -rf $EXP_DIR $LOG_DIR 18 | mkdir -p $EXP_DIR $LOG_DIR 19 | 20 | cd $PROJECT_ROOT 21 | uv run $PROJECT_ROOT/examples/run_grpo_sliding_puzzle.py \ 22 | policy.model_name=Qwen/Qwen3-0.6B \ 23 | cluster.gpus_per_node=2 \ 24 | grpo.max_rollout_turns=5 \ 25 | grpo.max_num_steps=3 \ 26 | grpo.num_prompts_per_step=2 \ 27 | grpo.num_generations_per_prompt=4 \ 28 | policy.max_total_sequence_length=1024 \ 29 | policy.train_global_batch_size=4 \ 30 | policy.train_micro_batch_size=1 \ 31 | policy.generation.top_p=0.99 \ 32 | policy.generation.top_k=8000 \ 33 | logger.tensorboard_enabled=true \ 34 | logger.log_dir=$LOG_DIR \ 35 | logger.wandb_enabled=false \ 36 | checkpointing.enabled=false \ 37 | $@ \ 38 | 2>&1 | tee $RUN_LOG 39 | 40 | uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS 41 | 42 | uv run tests/check_metrics.py $JSON_METRICS \ 43 | 'max(data["train/token_mult_prob_error"]) < 1.1' \ 44 | 45 | -------------------------------------------------------------------------------- /tests/functional/sft.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # clean up checkpoint directory on exit 4 | trap "rm -rf /tmp/sft_checkpoints" EXIT 5 | 6 | SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd) 7 | PROJECT_ROOT=$(realpath $SCRIPT_DIR/../..) 8 | # Mark the current repo as safe, since wandb fetches metadata about the repo 9 | git config --global --add safe.directory $PROJECT_ROOT 10 | 11 | set -eou pipefail 12 | 13 | EXP_NAME=$(basename $0 .sh) 14 | EXP_DIR=$SCRIPT_DIR/$EXP_NAME 15 | LOG_DIR=$EXP_DIR/logs 16 | JSON_METRICS=$EXP_DIR/metrics.json 17 | RUN_LOG=$EXP_DIR/run.log 18 | export PYTHONPATH=${PROJECT_ROOT}:${PYTHONPATH:-} 19 | 20 | rm -rf $EXP_DIR $LOG_DIR 21 | mkdir -p $EXP_DIR $LOG_DIR 22 | 23 | cd $PROJECT_ROOT 24 | uv run $PROJECT_ROOT/examples/run_sft.py \ 25 | policy.model_name=Qwen/Qwen3-0.6B \ 26 | cluster.gpus_per_node=2 \ 27 | sft.max_num_steps=3 \ 28 | sft.val_batches=1 \ 29 | sft.val_period=3 \ 30 | logger.tensorboard_enabled=true \ 31 | logger.log_dir=$LOG_DIR \ 32 | logger.wandb_enabled=false \ 33 | checkpointing.enabled=true \ 34 | checkpointing.save_period=3 \ 35 | checkpointing.checkpoint_dir=/tmp/sft_checkpoints \ 36 | $@ \ 37 | 2>&1 | tee $RUN_LOG 38 | 39 | uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS 40 | 41 | uv run tests/check_metrics.py $JSON_METRICS \ 42 | 'data["train/loss"]["3"] < 5.9' \ 43 | 44 | -------------------------------------------------------------------------------- /tests/functional/test_mcore_extra_installed_correctly.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -eoux pipefail 3 | 4 | SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd) 5 | cd $SCRIPT_DIR 6 | 7 | uv sync 8 | # Just the first call with --extra mcore is invoked with --reinstall in case submodules were recently updated/downloaded 9 | uv run --reinstall --extra mcore --no-build-isolation python <<"EOF" 10 | import torch 11 | import transformer_engine.pytorch as te 12 | from transformer_engine.common import recipe 13 | 14 | # Set dimensions. 15 | in_features = 768 16 | out_features = 3072 17 | hidden_size = 2048 18 | 19 | # Initialize model and inputs. 20 | model = te.Linear(in_features, out_features, bias=True) 21 | inp = torch.randn(hidden_size, in_features, device="cuda") 22 | 23 | # TODO: Disabling FP8 testing since CI machines may not support FP8 24 | ## Create an FP8 recipe. Note: All input args are optional. 25 | #fp8_recipe = recipe.DelayedScaling(margin=0, fp8_format=recipe.Format.E4M3) 26 | # 27 | ## Enable autocasting for the forward pass 28 | #with te.fp8_autocast(enabled=True, fp8_recipe=fp8_recipe): 29 | # out = model(inp) 30 | 31 | out = model(inp) 32 | 33 | loss = out.sum() 34 | loss.backward() 35 | print("[TE hello world succeessful]") 36 | EOF 37 | 38 | uv run --extra mcore --no-build-isolation python <<"EOF" 39 | import is_megatron_installed 40 | import is_nemo_installed 41 | assert is_megatron_installed.INSTALLED, "Megatron is not installed. Please check if the submodule has been initialized. May need to run `git submodule update --init --recursive`" 42 | assert is_nemo_installed.INSTALLED, "NeMo is not installed. Please check if the submodule has been initialized. May need to run `git submodule update --init --recursive`" 43 | 44 | # This must be the first import to get all of the megatron non-core packages added to the path 45 | import nemo_rl 46 | import megatron.core 47 | from megatron.training.utils import get_ltor_masks_and_position_ids 48 | from nemo.tron.init import initialize_megatron 49 | from nemo.tron.config import ( 50 | ConfigContainer, 51 | TrainingConfig, 52 | LoggerConfig, 53 | OptimizerConfig, 54 | SchedulerConfig, 55 | CheckpointConfig, 56 | DistributedDataParallelConfig, 57 | ) 58 | from nemo.tron.utils.common_utils import get_rank_safe 59 | from nemo.tron.config import TokenizerConfig 60 | from nemo.tron.model import get_model_from_config 61 | from nemo.tron.checkpointing import checkpoint_exists, load_checkpoint 62 | from nemo.tron.init import initialize_megatron, set_jit_fusion_options 63 | from nemo.tron.setup import _init_checkpointing_context, _update_model_config_funcs 64 | from nemo.tron.state import GlobalState 65 | from nemo.tron.optim import setup_optimizer 66 | from nemo.tron import fault_tolerance 67 | from nemo.tron.tokenizers.tokenizer import build_tokenizer 68 | from nemo.tron.utils.train_utils import ( 69 | calc_params_l2_norm, 70 | logical_and_across_model_parallel_group, 71 | reduce_max_stat_across_model_parallel_group, 72 | ) 73 | from nemo.tron.train import train_step 74 | from nemo.tron.setup import HAVE_FSDP2 75 | print("[Nemo/Mcore imports successful]") 76 | EOF 77 | 78 | # Sync just to return the environment to the original base state 79 | uv sync 80 | echo Success 81 | -------------------------------------------------------------------------------- /tests/run_functional_in_docker.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) 3 | PROJECT_ROOT=$(realpath $SCRIPT_DIR/..) 4 | 5 | set -eou pipefail 6 | 7 | # Ensure Docker is installed 8 | if ! command -v docker &> /dev/null; then 9 | echo "Error: Docker is not installed or not in PATH." 10 | exit 1 11 | fi 12 | 13 | # CONTAINER is expected to be set as an environment variable 14 | if [[ -z "${CONTAINER:-}" ]]; then 15 | echo "Error: CONTAINER environment variable is not set." 16 | echo "Usage: CONTAINER= $0