├── .dockerignore
├── .github
    ├── ISSUE_TEMPLATE
    │   ├── bug_report.md
    │   └── feature_request.md
    ├── PULL_REQUEST_TEMPLATE.md
    ├── labeler.yml
    └── workflows
    │   ├── _run_test.yml
    │   ├── build-test-publish-wheel.yml
    │   ├── cherry-pick-release-commit.yml
    │   ├── cicd-main.yml
    │   ├── copyright-check.yml
    │   ├── labeler.yaml
    │   ├── release-freeze.yml
    │   ├── release.yaml
    │   └── semantic-pull-request.yml
├── .gitignore
├── .gitmodules
├── .pre-commit-config.yaml
├── .python-version
├── 3rdparty
    ├── Megatron-LM-workspace
    │   ├── is_megatron_installed.py
    │   ├── pyproject.toml
    │   └── setup.py
    └── NeMo-workspace
    │   ├── is_nemo_installed.py
    │   ├── pyproject.toml
    │   └── setup.py
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── codecov.yml
├── docker
    ├── Dockerfile
    └── README.md
├── docs
    ├── adding-new-models.md
    ├── assets
    │   ├── actor-wg-worker-vc.png
    │   ├── aime_training_progress.png
    │   ├── deepscaler_training_progress.png
    │   ├── ray-debug-step1.png
    │   ├── ray-debug-step2.png
    │   ├── ray-debug-step3.png
    │   ├── ray-debug-step4.png
    │   ├── sft-openmathinstruct2-train-loss.png
    │   ├── sft-openmathinstruct2-train1M-loss.png
    │   └── val-log.png
    ├── autodoc2_docstrings_parser.py
    ├── cluster.md
    ├── conf.py
    ├── debugging.md
    ├── design-docs
    │   ├── chat-datasets.md
    │   ├── checkpointing.md
    │   ├── design-and-philosophy.md
    │   ├── fsdp2-parallel-plan.md
    │   ├── generation.md
    │   ├── logger.md
    │   ├── loss-functions.md
    │   ├── padding.md
    │   └── uv.md
    ├── docker.md
    ├── documentation.md
    ├── guides
    │   ├── dpo.md
    │   ├── eval.md
    │   ├── grpo-deepscaler.md
    │   ├── grpo.md
    │   ├── sft-openmathinstruct2.md
    │   └── sft.md
    ├── helpers.py
    ├── index.md
    ├── local-workstation.md
    ├── model-quirks.md
    ├── project.json
    ├── testing.md
    └── versions1.json
├── examples
    ├── __init__.py
    ├── configs
    │   ├── dpo.yaml
    │   ├── eval.yaml
    │   ├── grpo-deepscaler-1.5b-16K.yaml
    │   ├── grpo-deepscaler-1.5b-8K.yaml
    │   ├── grpo_deepscaler-1.5b-24K.yaml
    │   ├── grpo_math_1B.yaml
    │   ├── grpo_math_8B.yaml
    │   ├── grpo_sliding_puzzle.yaml
    │   ├── recipes
    │   │   └── llm
    │   │   │   ├── dpo-llama3.1-8b-instruct-4n8g-fsdp1-quick.v2.yaml
    │   │   │   ├── dpo-llama3.1-8b-instruct-4n8g-fsdp2tp1.v2.yaml
    │   │   │   ├── dpo-llama3.1-8b-instruct-4n8g-fsdp2tp2-quick.v2.yaml
    │   │   │   ├── dpo-llama3.2-1b-instruct-1n8g-fsdp2tp1.v2.yaml
    │   │   │   ├── grpo-gemma3-1b-it-1n8g-fsdp2tp1.yaml
    │   │   │   ├── grpo-gemma3-27b-it-16n8g-fsdp2tp8sp-actckpt-long.yaml
    │   │   │   ├── grpo-llama3.1-8b-instruct-4n8g-fsdp2tp1-long.v3.yaml
    │   │   │   ├── grpo-llama3.2-1b-instruct-1n8g-fsdp2tp1.v3.yaml
    │   │   │   ├── grpo-qwen2.5-32b-16n8g-fsdp2tp8sp-actckpt-long.v3.yaml
    │   │   │   ├── grpo-qwen2.5-32b-16n8g-fsdp2tp8sp-actckpt.v3.yaml
    │   │   │   ├── grpo-qwen2.5-7b-instruct-4n8g-fsdp1.v3.yaml
    │   │   │   ├── grpo-qwen2.5-7b-instruct-4n8g-fsdp2tp4sp.v3.yaml
    │   │   │   ├── grpo-qwen2.5-math-1.5b-instruct-1n8g-fsdp2tp1.v3.yaml
    │   │   │   ├── sft-llama3.1-8b-instruct-1n8g-fsdp1.v2.yaml
    │   │   │   ├── sft-llama3.1-8b-instruct-1n8g-fsdp2tp1-long.v2.yaml
    │   │   │   ├── sft-llama3.1-8b-instruct-1n8g-fsdp2tp2sp.v2.yaml
    │   │   │   ├── sft-llama3.2-1b-1n8g-fsdp2tp1.v2.yaml
    │   │   │   └── sft-qwen2.5-32b-4n8g-fsdp2tp8sp-actckpt.v2.yaml
    │   ├── sft.yaml
    │   └── sft_openmathinstruct2.yaml
    ├── convert_dcp_to_hf.py
    ├── custom_parallel.py
    ├── prompts
    │   ├── cot.txt
    │   └── math.txt
    ├── run_dpo.py
    ├── run_eval.py
    ├── run_grpo_math.py
    ├── run_grpo_sliding_puzzle.py
    └── run_sft.py
├── mypy.ini
├── nemo_rl
    ├── __init__.py
    ├── algorithms
    │   ├── __init__.py
    │   ├── dpo.py
    │   ├── grpo.py
    │   ├── interfaces.py
    │   ├── loss_functions.py
    │   ├── sft.py
    │   └── utils.py
    ├── converters
    │   ├── __init__.py
    │   ├── huggingface
    │   │   ├── __init__.py
    │   │   └── vllm_export.py
    │   └── megatron
    │   │   ├── __init__.py
    │   │   └── vllm_export.py
    ├── data
    │   ├── __init__.py
    │   ├── datasets.py
    │   ├── hf_datasets
    │   │   ├── __init__.py
    │   │   ├── chat_templates.py
    │   │   ├── deepscaler.py
    │   │   ├── dpo.py
    │   │   ├── helpsteer3.py
    │   │   ├── oasst.py
    │   │   ├── openmathinstruct2.py
    │   │   ├── prompt_response_dataset.py
    │   │   └── squad.py
    │   ├── interfaces.py
    │   └── llm_message_utils.py
    ├── distributed
    │   ├── __init__.py
    │   ├── batched_data_dict.py
    │   ├── collectives.py
    │   ├── model_utils.py
    │   ├── named_sharding.py
    │   ├── ray_actor_environment_registry.py
    │   ├── virtual_cluster.py
    │   └── worker_groups.py
    ├── environments
    │   ├── __init__.py
    │   ├── games
    │   │   └── sliding_puzzle.py
    │   ├── interfaces.py
    │   ├── math_environment.py
    │   ├── metrics.py
    │   └── utils.py
    ├── evals
    │   ├── __init__.py
    │   └── eval.py
    ├── experience
    │   ├── __init__.py
    │   └── rollouts.py
    ├── metrics
    │   ├── __init__.py
    │   └── metrics_utils.py
    ├── models
    │   ├── __init__.py
    │   ├── dtensor
    │   │   ├── __init__.py
    │   │   └── parallelize.py
    │   ├── generation
    │   │   ├── __init__.py
    │   │   ├── interfaces.py
    │   │   ├── vllm.py
    │   │   └── vllm_backend.py
    │   ├── huggingface
    │   │   ├── __init__.py
    │   │   └── common.py
    │   ├── megatron
    │   │   ├── __init__.py
    │   │   └── common.py
    │   └── policy
    │   │   ├── __init__.py
    │   │   ├── dtensor_policy_worker.py
    │   │   ├── fsdp1_policy_worker.py
    │   │   ├── hf_policy.py
    │   │   ├── interfaces.py
    │   │   └── utils.py
    ├── package_info.py
    └── utils
    │   ├── __init__.py
    │   ├── checkpoint.py
    │   ├── config.py
    │   ├── logger.py
    │   ├── native_checkpoint.py
    │   ├── nvml.py
    │   ├── timer.py
    │   └── venvs.py
├── pyproject.toml
├── ray.sub
├── tests
    ├── README.md
    ├── __init__.py
    ├── check_metrics.py
    ├── functional
    │   ├── dpo.sh
    │   ├── eval.sh
    │   ├── grpo.sh
    │   ├── grpo_multiturn.sh
    │   ├── sft.sh
    │   └── test_mcore_extra_installed_correctly.sh
    ├── json_dump_tb_logs.py
    ├── run_functional_in_docker.sh
    ├── run_unit.sh
    ├── run_unit_in_docker.sh
    ├── test_suites
    │   ├── README.md
    │   ├── llm
    │   │   ├── common.env
    │   │   ├── dpo-llama3.1-8b-instruct-4n8g-fsdp1-quick.v2.sh
    │   │   ├── dpo-llama3.1-8b-instruct-4n8g-fsdp2tp1.v2.sh
    │   │   ├── dpo-llama3.1-8b-instruct-4n8g-fsdp2tp2-quick.v2.sh
    │   │   ├── dpo-llama3.2-1b-instruct-1n8g-fsdp2tp1.v2.sh
    │   │   ├── grpo-gemma3-1b-it-1n8g-fsdp2tp1.sh
    │   │   ├── grpo-gemma3-27b-it-16n8g-fsdp2tp8sp-actckpt-long.sh
    │   │   ├── grpo-llama3.1-8b-instruct-4n8g-fsdp2tp1-long.v3.sh
    │   │   ├── grpo-llama3.2-1b-instruct-1n8g-fsdp2tp1.v3.sh
    │   │   ├── grpo-qwen2.5-32b-16n8g-fsdp2tp8sp-actckpt-long.v3.sh
    │   │   ├── grpo-qwen2.5-32b-16n8g-fsdp2tp8sp-actckpt.v3.sh
    │   │   ├── grpo-qwen2.5-7b-instruct-4n8g-fsdp1.v3.sh
    │   │   ├── grpo-qwen2.5-7b-instruct-4n8g-fsdp2tp4sp.v3.sh
    │   │   ├── grpo-qwen2.5-math-1.5b-instruct-1n8g-fsdp2tp1.v3.sh
    │   │   ├── performance
    │   │   │   └── .gitkeep
    │   │   ├── sft-llama3.1-8b-instruct-1n8g-fsdp1.v2.sh
    │   │   ├── sft-llama3.1-8b-instruct-1n8g-fsdp2tp1-long.v2.sh
    │   │   ├── sft-llama3.1-8b-instruct-1n8g-fsdp2tp2sp.v2.sh
    │   │   ├── sft-llama3.2-1b-1n8g-fsdp2tp1.v2.sh
    │   │   └── sft-qwen2.5-32b-4n8g-fsdp2tp8sp-actckpt.v2.sh
    │   ├── nightly.txt
    │   ├── nightly_performance.txt
    │   ├── release.txt
    │   └── release_performance.txt
    └── unit
    │   ├── __init__.py
    │   ├── algorithms
    │       ├── test_dpo.py
    │       ├── test_grpo.py
    │       ├── test_loss_functions.py
    │       ├── test_sft.py
    │       └── test_utils.py
    │   ├── conftest.py
    │   ├── data
    │       ├── hf_datasets
    │       │   ├── test_dpo_dataset.py
    │       │   ├── test_helpsteer.py
    │       │   ├── test_prompt_response.py
    │       │   └── test_squad.py
    │       ├── test_data_processor.py
    │       ├── test_datasets.py
    │       └── test_llm_message_utils.py
    │   ├── distributed
    │       ├── __init__.py
    │       ├── test_batched_data_dict.py
    │       ├── test_cluster_visualization.py
    │       ├── test_collectives.py
    │       ├── test_named_sharding.py
    │       ├── test_virtual_cluster.py
    │       └── test_worker_groups.py
    │   ├── environments
    │       └── test_math_environment.py
    │   ├── experience
    │       └── test_rollouts.py
    │   ├── models
    │       ├── generation
    │       │   ├── test_vllm_generation.py
    │       │   └── test_vllm_large_model.py
    │       ├── huggingface
    │       │   └── test_common.py
    │       └── policy
    │       │   ├── test_dtensor_worker.py
    │       │   └── test_fsdp1_worker.py
    │   ├── test_envs.py
    │   ├── test_meta.py
    │   ├── test_recipes_and_test_suites.py
    │   ├── test_utils.py
    │   └── utils
    │       ├── test_checkpoint.py
    │       ├── test_config.py
    │       ├── test_logger.py
    │       ├── test_native_checkpoint.py
    │       ├── test_pynvml.py
    │       ├── test_timer.py
    │       └── test_venvs.py
├── tools
    ├── autoformat.sh
    ├── code_snapshot.sh
    ├── copyright.sh
    ├── find_available_port_ranges.py
    ├── launch
    └── package_release_runs.sh
└── uv.lock


/.dockerignore:
--------------------------------------------------------------------------------
 1 | # Adding to .gitignore helps reduce the size of your working_dir
 2 | 
 3 | .git
 4 | *.out
 5 | *.log
 6 | *.tar
 7 | *.tar.gz
 8 | .venv
 9 | venv
10 | venvs
11 | __pycache__/
12 | _build/
13 | build/
14 | apidocs/
15 | dist/
16 | *.egg-info/
17 | *.vscode/
18 | release_run*
19 | ckpts/
20 | 
21 | # Test
22 | coverage.json
23 | .coverage*
24 | test_assets/
25 | 
26 | # Cache
27 | uv_cache/
28 | hf_home/
29 | hf_datasets_cache/
30 | *logs/
31 | datasets/
32 | wandb/
33 | checkpoints/
34 | results/
35 | code_snapshots/
36 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Create a report to help us improve
 4 | title: ''
 5 | labels: bug
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Describe the bug**
11 | 
12 | A clear and concise description of what the bug is.
13 | 
14 | **Steps/Code to reproduce bug**
15 | 
16 | Please list *minimal* steps or code snippet for us to be able to reproduce the bug.
17 | 
18 | A helpful guide on on how to craft a minimal bug report  http://matthewrocklin.com/blog/work/2018/02/28/minimal-bug-reports. 
19 | 
20 | 
21 | **Expected behavior**
22 | 
23 | A clear and concise description of what you expected to happen.
24 | 
25 | **Environment overview (please complete the following information)**
26 | 
27 |  - Environment location: [Bare-metal, Docker, Cloud(specify cloud provider - AWS, Azure, GCP, Collab)]
28 |  - Method of install: [pip install or from source]. Please specify exact commands you used to install.
29 |  - If method of install is [Docker], provide `docker pull` & `docker run` commands used
30 | 
31 | **Environment details**
32 | 
33 | If NVIDIA docker image is used you don't need to specify these.
34 | Otherwise, please provide:
35 | - OS version
36 | - PyTorch version
37 | - Python version
38 | 
39 | **Additional context**
40 | 
41 | Add any other context about the problem here.
42 | Example: GPU model
43 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature request
 3 | about: Suggest an idea for this project
 4 | title: ''
 5 | labels: feature request
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Is your feature request related to a problem? Please describe.**
11 | 
12 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
13 | 
14 | **Describe the solution you'd like**
15 | 
16 | A clear and concise description of what you want to happen.
17 | Provide a code snippet on how new APIs/changes would be used by others.
18 | 
19 | **Describe alternatives you've considered**
20 | 
21 | A clear and concise description of any alternative solutions or features you've considered.
22 | 
23 | **Additional context**
24 | 
25 | Add any other context or screenshots about the feature request here


--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | # What does this PR do ?
 2 | 
 3 | **Add a one line overview of what this PR aims to accomplish.**
 4 | 
 5 | # Issues
 6 | List issues that this PR closes ([syntax](https://docs.github.com/en/issues/tracking-your-work-with-issues/using-issues/linking-a-pull-request-to-an-issue#linking-a-pull-request-to-an-issue-using-a-keyword)):
 7 | 
 8 | 
 9 | # Usage
10 | * **You can potentially add a usage example below**
11 | 
12 | ```python
13 | # Add a code snippet demonstrating how to use this 
14 | ```
15 | 
16 | # Before your PR is "Ready for review"
17 | **Pre checks**:
18 | - [ ] Make sure you read and followed [Contributor guidelines](/NVIDIA/NeMo-RL/blob/main/CONTRIBUTING.md)
19 | - [ ] Did you write any new necessary tests?
20 | - [ ] Did you run the unit tests and functional tests locally? Visit our [Testing Guide](/NVIDIA/NeMo-RL/blob/main/docs/testing.md) for how to run tests
21 | - [ ] Did you add or update any necessary documentation? Visit our [Document Development Guide](/NVIDIA/NeMo-RL/blob/main/docs/documentation.md) for how to write, build and test the docs.
22 | 
23 | # Additional Information
24 | * ...
25 | 


--------------------------------------------------------------------------------
/.github/labeler.yml:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | documentation:
15 | - docs/**
16 | 
17 | CI:
18 | - .github/**/*
19 | 


--------------------------------------------------------------------------------
/.github/workflows/build-test-publish-wheel.yml:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | name: Build, test, and publish a PyPi wheel (to testpypi).
16 | 
17 | on:
18 |   push:
19 |     branches:
20 |       - main
21 |       - "r**"
22 | 
23 | defaults:
24 |   run:
25 |     shell: bash -x -e -u -o pipefail {0}
26 | 
27 | jobs:
28 |   build-test-publish-wheel:
29 |     uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_build_test_publish_wheel.yml@v0.33.0
30 |     with:
31 |       dry-run: true
32 |       python-package: nemo_rl
33 |       packaging: uv
34 |     secrets:
35 |       TWINE_USERNAME: ${{ secrets.TWINE_USERNAME }}
36 |       TWINE_PASSWORD: ${{ secrets.TWINE_PASSWORD }}
37 |       SLACK_WEBHOOK: ${{ secrets.SLACK_RELEASE_ENDPOINT }}
38 |       SLACK_WEBHOOK_ADMIN: ${{ secrets.SLACK_WEBHOOK_ADMIN }}
39 | 


--------------------------------------------------------------------------------
/.github/workflows/cherry-pick-release-commit.yml:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | name: Create PR to main with cherry-pick from release
15 | 
16 | on:
17 |   push:
18 |     branches:
19 |       - main
20 | 
21 | jobs:
22 |   cherry-pick:
23 |     uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_cherry_pick.yml@v0.31.0
24 |     secrets:
25 |       PAT: ${{ secrets.PAT }}
26 |       SLACK_WEBHOOK_ADMIN: ${{ secrets.SLACK_WEBHOOK_ADMIN }}
27 |       SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
28 | 


--------------------------------------------------------------------------------
/.github/workflows/copyright-check.yml:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | name: Copyright check
15 | 
16 | on: 
17 |   pull_request:
18 | 
19 | jobs:
20 |   copyright-check:
21 |     uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_copyright_check.yml@v0.2.0
22 | 


--------------------------------------------------------------------------------
/.github/workflows/labeler.yaml:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | name: "Pull Request Labeler"
15 | on:
16 | - pull_request_target
17 | 
18 | jobs:
19 |   triage:
20 |     permissions:
21 |       contents: read
22 |       pull-requests: write
23 |     runs-on: ubuntu-latest
24 |     steps:
25 |     - uses: actions/labeler@v4
26 |       with:
27 |         repo-token: "${{ secrets.GITHUB_TOKEN }}"
28 | 


--------------------------------------------------------------------------------
/.github/workflows/release-freeze.yml:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | name: "Code freeze"
15 | 
16 | on:
17 |   workflow_dispatch:
18 |     inputs:
19 |       release-type:
20 |         type: choice
21 |         description: Type of release
22 |         options: 
23 |         - major
24 |         - minor
25 |       freeze-commit:
26 |         type: string
27 |         description: Commit SHA to use for cut-off
28 |         required: false
29 |         default: main
30 |       dry-run:
31 |         type: boolean
32 |         description: Dry-run of code-freeze
33 |         required: false
34 |         default: true
35 | jobs:
36 |   code-freeze:
37 |     uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_code_freeze.yml@v0.22.5
38 |     with:
39 |       library-name: NeMo-RL
40 |       python-package: nemo_rl
41 |       release-type: ${{ inputs.release-type }}
42 |       freeze-commit: ${{ inputs.freeze-commit }}
43 |       dry-run: ${{ inputs.dry-run }}
44 |     secrets:
45 |       SLACK_WEBHOOK: ${{ secrets.SLACK_RELEASE_ENDPOINT }}
46 |       SLACK_WEBHOOK_ADMIN: ${{ secrets.SLACK_WEBHOOK_ADMIN }}
47 | 


--------------------------------------------------------------------------------
/.github/workflows/release.yaml:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | name: "Release NeMo-RL"
15 | 
16 | on:
17 |   workflow_dispatch:
18 |     inputs:
19 |       release-ref:
20 |         description: Ref (SHA or branch name) to release
21 |         required: true
22 |         type: string
23 |       dry-run:
24 |         description: Do not publish a wheel and GitHub release.
25 |         required: true
26 |         default: true
27 |         type: boolean
28 |       create-gh-release:
29 |         description: Create a GitHub release
30 |         required: true
31 |         default: true
32 |         type: boolean
33 |       version-bump-branch:
34 |         description: Branch for version bump
35 |         required: true
36 |         type: string
37 | 
38 | jobs:
39 |   release:
40 |     uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_release_library.yml@v0.34.0
41 |     with:
42 |       release-ref: ${{ inputs.release-ref }}
43 |       python-package: nemo_rl
44 |       library-name: NeMo-RL
45 |       dry-run: ${{ inputs.dry-run }}
46 |       version-bump-branch: ${{ inputs.version-bump-branch }}
47 |       create-gh-release: ${{ inputs.create-gh-release }}
48 |       packaging: uv
49 |     secrets:
50 |       TWINE_USERNAME: ${{ secrets.TWINE_USERNAME }}
51 |       TWINE_PASSWORD: ${{ secrets.TWINE_PASSWORD }}
52 |       SLACK_WEBHOOK_ADMIN: ${{ secrets.SLACK_WEBHOOK_ADMIN }}
53 |       SLACK_WEBHOOK: ${{ secrets.SLACK_RELEASE_ENDPOINT }}
54 |       PAT: ${{ secrets.PAT }}
55 | 


--------------------------------------------------------------------------------
/.github/workflows/semantic-pull-request.yml:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | name: Validate PR title
15 | 
16 | on:
17 |   pull_request_target:
18 |     types:
19 |       - opened
20 |       - edited
21 |       - synchronize
22 |       - reopened
23 |   pull_request:
24 |     types:
25 |       - opened
26 |       - edited
27 |       - synchronize
28 |       - reopened
29 | 
30 | defaults:
31 |   run:
32 |     shell: bash -x -e -u -o pipefail {0}
33 | 
34 | permissions:
35 |   pull-requests: read
36 | 
37 | jobs:
38 |   semantic-pull-request:
39 |     uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_semantic_pull_request.yml@v0.31.0
40 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Adding to .gitignore helps reduce the size of your working_dir
 2 | 
 3 | .git
 4 | *.out
 5 | *.log
 6 | *.tar
 7 | *.tar.gz
 8 | .venv
 9 | venv
10 | venvs
11 | __pycache__/
12 | _build/
13 | build/
14 | apidocs/
15 | dist/
16 | *.egg-info/
17 | *.vscode/
18 | release_run*
19 | ckpts/
20 | 
21 | # Test
22 | coverage.json
23 | .coverage*
24 | test_assets/
25 | 
26 | # Cache
27 | uv_cache/
28 | hf_home/
29 | hf_datasets_cache/
30 | *logs/
31 | datasets/
32 | docker/*
33 | !docker/Dockerfile
34 | !docker/README.md
35 | wandb/
36 | checkpoints/
37 | results/
38 | code_snapshots/
39 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
 1 | [submodule "3rdparty/NeMo"]
 2 | 	path = 3rdparty/NeMo-workspace/NeMo
 3 | 	url = https://github.com/NVIDIA/NeMo.git
 4 | 	branch = terryk/hemil/automodel-custom-loop-with-sahil-patch
 5 | 	shallow = true
 6 | [submodule "3rdparty/Megatron-LM"]
 7 | 	path = 3rdparty/Megatron-LM-workspace/Megatron-LM
 8 | 	url = https://github.com/terrykong/Megatron-LM.git
 9 | 	branch = terryk/main-2025-05-01-with-sahil-patch
10 | 	shallow = true
11 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 |   - repo: https://github.com/pre-commit/pre-commit-hooks
 3 |     rev: v4.4.0
 4 |     hooks:
 5 |     - id: end-of-file-fixer
 6 |       # only include python files
 7 |       files: \.py$
 8 |     - id: trailing-whitespace
 9 |       # only include python files
10 |       files: \.py$
11 | 
12 |   - repo: https://github.com/astral-sh/ruff-pre-commit
13 |     rev: "v0.9.9" # Use the appropriate version
14 |     hooks:
15 |     - id: ruff
16 |       args: ["--fix"]
17 |     - id: ruff
18 |       args: ["check", "--select", "I", "--fix"]
19 |     - id: ruff-format
20 | 
21 |   - repo: local
22 |     hooks:
23 |       - id: no-underscore-md
24 |         name: "Disallow '_' in Markdown filenames"
25 |         language: system
26 |         entry: |
27 |           bash -c '
28 |             # Report the offending files
29 |             echo "[pre-commit] ERROR: Found Markdown files with underscores:" >&2
30 |             for file in "$@"; do
31 |               echo "  - $file (use hyphens instead)" >&2
32 |             done
33 |             exit 1
34 |           '
35 |         files: '.*\/[^\/]*_[^\/]*\.md$'
36 |         exclude: '^\.github/'
37 |         types: [file]
38 | 


--------------------------------------------------------------------------------
/.python-version:
--------------------------------------------------------------------------------
1 | 3.12
2 | 


--------------------------------------------------------------------------------
/3rdparty/Megatron-LM-workspace/is_megatron_installed.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | try:
15 |     from megatron.core import parallel_state  # noqa: F401
16 | 
17 |     INSTALLED = True
18 | except ImportError:
19 |     INSTALLED = False
20 | 
21 | print(f"Megatron {INSTALLED=}")
22 | 


--------------------------------------------------------------------------------
/3rdparty/Megatron-LM-workspace/pyproject.toml:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.
 2 | 
 3 | [build-system]
 4 | requires = [
 5 |     "setuptools",
 6 |     "pybind11",
 7 | ]
 8 | 
 9 | [project]
10 | name = "megatron-core"
11 | dynamic = ["dependencies", "version"]
12 | description = "Megatron Core - a library for efficient and scalable training of transformer based models"
13 | authors = [{ name = "NVIDIA", email = "nemo-toolkit@nvidia.com" }]
14 | maintainers = [{ name = "NVIDIA", email = "nemo-toolkit@nvidia.com" }]


--------------------------------------------------------------------------------
/3rdparty/NeMo-workspace/is_nemo_installed.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import contextlib
15 | import io
16 | 
17 | try:
18 |     with (
19 |         contextlib.redirect_stdout(io.StringIO()),
20 |         contextlib.redirect_stderr(io.StringIO()),
21 |     ):
22 |         # Silence the logging because NeMo is very verbose
23 |         from nemo.tron.init import initialize_megatron  # noqa: F401
24 |     INSTALLED = True
25 | except ImportError:
26 |     INSTALLED = False
27 | print(f"NeMo {INSTALLED=}")
28 | 


--------------------------------------------------------------------------------
/3rdparty/NeMo-workspace/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["setuptools>=61.0", "wheel"]
 3 | 
 4 | [project]
 5 | name = "nemo-tron"
 6 | dynamic = ["dependencies", "version"]
 7 | authors = [{ name = "NVIDIA", email = "nemo-toolkit@nvidia.com" }]
 8 | description = "Standalone packaging for the NeMo Tron sub-module."
 9 | requires-python = ">=3.10"
10 | # Dependencies will be managed in setup.py
11 | 


--------------------------------------------------------------------------------
/3rdparty/NeMo-workspace/setup.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import os
15 | 
16 | import setuptools
17 | 
18 | # --- Configuration Start ---
19 | final_packages = []
20 | final_package_dir = {}
21 | 
22 | # --- nemo package conditional section ---
23 | nemo_package_source_dir = "NeMo/nemo"
24 | nemo_package_name = "nemo"
25 | 
26 | if os.path.exists(nemo_package_source_dir):
27 |     final_packages.append(nemo_package_name)
28 |     final_package_dir[nemo_package_name] = nemo_package_source_dir
29 | # --- End of nemo package conditional section ---
30 | 
31 | setuptools.setup(
32 |     name="nemo-tron",  # Must match [project].name in pyproject.toml
33 |     version="0.0.0",  # Must match [project].version in pyproject.toml
34 |     description="Standalone packaging for the NeMo Tron sub-module.",  # Can be sourced from pyproject.toml too
35 |     author="NVIDIA",
36 |     author_email="nemo-toolkit@nvidia.com",
37 |     packages=final_packages,
38 |     package_dir=final_package_dir,
39 |     py_modules=["is_nemo_installed"],
40 |     install_requires=[
41 |         "lightning",
42 |         "wget",
43 |         "onnx",
44 |         "fiddle",
45 |         "cloudpickle",
46 |         "braceexpand",
47 |         "webdataset",
48 |         "h5py",
49 |         "ijson",
50 |         "matplotlib",
51 |         "scikit-learn",
52 |         "nemo-run",
53 |         "hatchling",
54 |     ],
55 | )
56 | 


--------------------------------------------------------------------------------
/codecov.yml:
--------------------------------------------------------------------------------
1 | comment: false
2 | coverage:
3 |   status:
4 |     patch: false
5 |     project: false
6 | fixes:
7 |   - "/opt/nemo-rl/::"
8 | 


--------------------------------------------------------------------------------
/docker/Dockerfile:
--------------------------------------------------------------------------------
 1 | ARG BASE_IMAGE=nvcr.io/nvidia/cuda:12.8.1-cudnn-devel-ubuntu24.04
 2 | FROM ${BASE_IMAGE} AS base
 3 | 
 4 | # It is more convenient for users to run as root
 5 | USER root
 6 | 
 7 | RUN apt-get update && apt-get install -y --no-install-recommends \
 8 |     jq \
 9 |     curl \
10 |     git \
11 |     && rm -rf /var/lib/apt/lists/* && \
12 |     apt-get clean
13 | 
14 | # Install uv and python
15 | ARG UV_VERSION=0.7.2
16 | ARG PYTHON_VERSION=3.12
17 | ENV PATH="/root/.local/bin:$PATH"
18 | RUN curl -LsSf https://astral.sh/uv/${UV_VERSION}/install.sh | sh && \
19 |     uv python install ${PYTHON_VERSION}
20 | 
21 | # Disable usage stats by default for users who are sensitive to sharing usage.
22 | # Users are encouraged to enable if the wish.
23 | ENV RAY_USAGE_STATS_ENABLED=0
24 | 
25 | FROM base AS hermetic
26 | 
27 | WORKDIR /opt/nemo-rl
28 | 
29 | # First copy only the dependency files
30 | COPY pyproject.toml uv.lock ./
31 | COPY --link 3rdparty/ ./3rdparty/
32 | 
33 | # Variables to control the build of TE. If there are issues with parallelization, consider
34 | # setting these to 1.
35 | ARG MAX_JOBS
36 | ARG NVTE_BUILD_THREADS_PER_JOB
37 | 
38 | ENV UV_PROJECT_ENVIRONMENT=/opt/nemo_rl_venv
39 | ENV UV_LINK_MODE=copy
40 | 
41 | # Create and activate virtual environment
42 | RUN <<"EOF" bash -exu
43 | uv venv ${UV_PROJECT_ENVIRONMENT}
44 | # uv sync has a more reliable resolver than simple uv pip install which can fail
45 | 
46 | # Sync each training + inference backend one at a time (since they may conflict)
47 | # to warm the uv cache, then at the end just sync the default dependencies.
48 | # Do everything in one layer to prevent large layers.
49 | 
50 | # The venv is symlinked to avoid bloating the layer size
51 | uv sync --link-mode symlink --locked --extra vllm --no-install-project
52 | uv sync --link-mode symlink --locked --extra mcore --no-install-project --no-build-isolation
53 | uv sync --link-mode symlink --locked --all-groups --no-install-project
54 | EOF
55 | 
56 | ENV PATH="/opt/nemo_rl_venv/bin:$PATH"
57 | 
58 | FROM hermetic AS release
59 | 
60 | ARG NEMO_RL_COMMIT
61 | ARG NVIDIA_BUILD_ID
62 | ARG NVIDIA_BUILD_REF
63 | ENV NEMO_RL_COMMIT=${NEMO_RL_COMMIT:-<unknown>}
64 | ENV NVIDIA_BUILD_ID=${NVIDIA_BUILD_ID:-<unknown>}
65 | ENV NVIDIA_BUILD_REF=${NVIDIA_BUILD_REF:-<unknown>}
66 | LABEL com.nvidia.build.id="${NVIDIA_BUILD_ID}"
67 | LABEL com.nvidia.build.ref="${NVIDIA_BUILD_REF}"
68 | 
69 | COPY . /opt/nemo-rl
70 | 
71 | # Make hermetic the default target instead of release since that's the recommended container
72 | FROM hermetic
73 | 


--------------------------------------------------------------------------------
/docker/README.md:
--------------------------------------------------------------------------------
 1 | # Building the Docker Container
 2 | NOTE: *We use `docker buildx` instead of `docker build` for these containers*
 3 | 
 4 | This directory contains the `Dockerfile` for NeMo-RL Docker images.
 5 | You can build two types of images:
 6 | - A **base image**: A minimal image where Python dependencies can be specified at runtime.
 7 | - A **hermetic image**: An image that includes default dependencies for offline use.
 8 | 
 9 | 
10 | For detailed instructions on building these images, please see [docs/docker.md](../docs/docker.md).


--------------------------------------------------------------------------------
/docs/assets/actor-wg-worker-vc.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/NeMo-RL/99ba9a130e72cbf87a3e20acf43bc01a47adc8ee/docs/assets/actor-wg-worker-vc.png


--------------------------------------------------------------------------------
/docs/assets/aime_training_progress.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/NeMo-RL/99ba9a130e72cbf87a3e20acf43bc01a47adc8ee/docs/assets/aime_training_progress.png


--------------------------------------------------------------------------------
/docs/assets/deepscaler_training_progress.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/NeMo-RL/99ba9a130e72cbf87a3e20acf43bc01a47adc8ee/docs/assets/deepscaler_training_progress.png


--------------------------------------------------------------------------------
/docs/assets/ray-debug-step1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/NeMo-RL/99ba9a130e72cbf87a3e20acf43bc01a47adc8ee/docs/assets/ray-debug-step1.png


--------------------------------------------------------------------------------
/docs/assets/ray-debug-step2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/NeMo-RL/99ba9a130e72cbf87a3e20acf43bc01a47adc8ee/docs/assets/ray-debug-step2.png


--------------------------------------------------------------------------------
/docs/assets/ray-debug-step3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/NeMo-RL/99ba9a130e72cbf87a3e20acf43bc01a47adc8ee/docs/assets/ray-debug-step3.png


--------------------------------------------------------------------------------
/docs/assets/ray-debug-step4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/NeMo-RL/99ba9a130e72cbf87a3e20acf43bc01a47adc8ee/docs/assets/ray-debug-step4.png


--------------------------------------------------------------------------------
/docs/assets/sft-openmathinstruct2-train-loss.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/NeMo-RL/99ba9a130e72cbf87a3e20acf43bc01a47adc8ee/docs/assets/sft-openmathinstruct2-train-loss.png


--------------------------------------------------------------------------------
/docs/assets/sft-openmathinstruct2-train1M-loss.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/NeMo-RL/99ba9a130e72cbf87a3e20acf43bc01a47adc8ee/docs/assets/sft-openmathinstruct2-train1M-loss.png


--------------------------------------------------------------------------------
/docs/assets/val-log.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/NeMo-RL/99ba9a130e72cbf87a3e20acf43bc01a47adc8ee/docs/assets/val-log.png


--------------------------------------------------------------------------------
/docs/autodoc2_docstrings_parser.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | from docutils import nodes
15 | from myst_parser.parsers.sphinx_ import MystParser
16 | from sphinx.ext.napoleon.docstring import GoogleDocstring
17 | 
18 | 
19 | class NapoleonParser(MystParser):
20 |     def parse(self, input_string: str, document: nodes.document) -> None:
21 |         # Get the Sphinx configuration
22 |         config = document.settings.env.config
23 | 
24 |         # Process with Google style
25 |         google_parsed = str(GoogleDocstring(input_string, config))
26 | 
27 |         return super().parse(google_parsed, document)
28 | 
29 | 
30 | Parser = NapoleonParser
31 | 


--------------------------------------------------------------------------------
/docs/design-docs/chat-datasets.md:
--------------------------------------------------------------------------------
 1 | # Data Format
 2 | 
 3 | This guide outlines the required data format for Hugging Face chat datasets and demonstrates how to use chat templates with Hugging Face tokenizers to add special tokens or task-specific information.
 4 | 
 5 | ## Hugging Face Chat Datasets
 6 | 
 7 | Hugging Face chat datasets are expected to have the following structure: Each example in the dataset should be a dictionary with a `messages` key. The `messages` should be a list of dictionaries, each with a `role` and `content` key. The `role` typically has one of the following values: `system`, `user`, and `assistant`. For example:
 8 | 
 9 | ```json
10 | {
11 |     "messages": [
12 |         {
13 |             "role": "system",
14 |             "content": "This is a helpful system message."
15 |         },
16 |         {
17 |             "role": "user",
18 |             "content": "This is a user's question"
19 |         },
20 |         {
21 |             "role": "assistant",
22 |             "content": "This is the assistant's response."
23 |         }
24 |     ]
25 | }
26 | ```
27 | 
28 | ## Chat Templates
29 | 
30 | Formatting the data in this way allows us to take advantage of the Hugging Face tokenizers' `apply_chat_template` functionality to combine the messages. Chat templates can be used to add special tokens or task-specific information to each example in the dataset. Refer to the [HuggingFace apply_chat_template documentation](https://huggingface.co/docs/transformers/main/en/chat_templating#applychattemplate) for details.
31 | 
32 | By default, `apply_chat_template` attempts to apply the `chat_template` associated with the tokenizer. However, in some cases, users might want to specify their own chat template. Also, note that many tokenizers do not have associated `chat_template`s, in which case an explicit chat template is required. Users can specify an explicit chat template string using Jinja format and can pass that string to `apply_chat_template`. 
33 | The following is an example using a simple template which prepends a role header to each turn:
34 | 
35 | ```{testcode}
36 | from transformers import AutoTokenizer
37 | 
38 | example_template = "{% for message in messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{{ content }}{% endfor %}"
39 | 
40 | example_input = [
41 |     {
42 |         'role': 'user',
43 |         'content': 'Hello!'
44 |     },
45 |     {
46 |         'role': 'assistant',
47 |         'content': 'Hi there!'
48 |     }
49 | ]
50 | tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-1B")
51 | output = tokenizer.apply_chat_template(example_input, chat_template=example_template, tokenize=False)
52 | 
53 | ## this is the output string we expect
54 | expected_output = '<|start_header_id|>user<|end_header_id|>\n\nHello!<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nHi there!<|eot_id|>'
55 | assert output == expected_output
56 | ```
57 | 
58 | <!-- This testoutput is intentionally empty-->
59 | ```{testoutput}
60 | :hide:
61 | ```
62 | 
63 | For more details on creating chat templates, refer to the [Hugging Face documentation](https://huggingface.co/docs/transformers/v4.34.0/en/chat_templating#how-do-i-create-a-chat-template).


--------------------------------------------------------------------------------
/docs/design-docs/checkpointing.md:
--------------------------------------------------------------------------------
 1 | # Checkpointing with Hugging Face Models 
 2 | 
 3 | NeMo RL provides two checkpoint formats for Hugging Face models: Torch distributed and Hugging Face format. Torch distributed is used by default for efficiency, and Hugging Face format is provided for compatibility with Hugging Face's `AutoModel.from_pretrained` API. Note that Hugging Face format checkpoints save only the model weights, ignoring the optimizer states. It is recommended to use Torch distributed format to save intermediate checkpoints and to save a Hugging Face checkpoint only at the end of training. 
 4 | 
 5 | A checkpoint converter is provided to convert a Torch distributed checkpoint checkpoint to Hugging Face format after training:
 6 | 
 7 | ```sh
 8 | uv run examples/convert_dcp_to_hf.py --config=<YAML CONFIG USED DURING TRAINING> <ANY CONFIG OVERRIDES USED DURING TRAINING> --dcp-ckpt-path=<PATH TO DIST CHECKPOINT TO CONVERT> --hf-ckpt-path=<WHERE TO SAVE HF CHECKPOINT>
 9 | ```
10 | 
11 | Usually Hugging Face checkpoints keep the weights and tokenizer together (which we also recommend for provenance). You can copy it afterwards. Here's an end-to-end example:
12 | 
13 | ```sh
14 | # Change to your appropriate checkpoint directory
15 | CKPT_DIR=results/sft/step_10
16 | 
17 | uv run examples/convert_dcp_to_hf.py --config=$CKPT_DIR/config.yaml --dcp-ckpt-path=$CKPT_DIR/policy/weights --hf-ckpt-path=${CKPT_DIR}-hf
18 | rsync -ahP $CKPT_DIR/policy/tokenizer ${CKPT_DIR}-hf/
19 | ```
20 | 


--------------------------------------------------------------------------------
/docs/design-docs/fsdp2-parallel-plan.md:
--------------------------------------------------------------------------------
 1 | # FSDP2 Parallel Plan
 2 | 
 3 | This guide outlines the parallelization strategy for Fully Sharded Data Parallel version 2 (FSDP2) training in NeMo RL.
 4 | 
 5 | ## Fallback Priority
 6 | 
 7 | NeMo RL supports three parallelization strategies, applied in the following order of fallback priority:
 8 | 
 9 | ### 1. Custom Parallel Plan
10 | 
11 | Your user-defined custom parallel plans always take precedence when available. For detailed implementation and usage, refer to the [Custom Parallel Plan Example](#custom-parallel-plan-example).
12 | 
13 | ### 2. Optimized Parallel Plan
14 | 
15 | Optimized parallel plans are available for specific model architectures. They may offer superior performance compared to Hugging Face's tensor parallel implementation. This approach is used if no custom parallel plan is specified and the model class supports optimized parallelization.
16 | 
17 | ### 3. Hugging Face Tensor Parallel Plan
18 | 
19 | The Hugging Face tensor parallel plan is the default. It's available for most models via `._tp_plan` and is used when neither a custom nor an optimized parallel plan is available.
20 | 
21 | ## Custom Parallel Plan Example
22 | 
23 | A custom parallel plan should be defined in a separate file, such as the example provided in `examples/custom_parallel.py`.
24 | 
25 | To implement the custom parallel plan, either update the value of `custom_parallel_plan` in the `yaml` file directly, or pass the override via the command line. For example:
26 | 
27 | ```bash
28 | uv run examples/run_grpo_math.py \
29 |     policy.dtensor_cfg.custom_parallel_plan=examples.custom_parallel.custom_parallel_plan
30 | ```
31 | 


--------------------------------------------------------------------------------
/docs/docker.md:
--------------------------------------------------------------------------------
 1 | # Build Docker Images
 2 | 
 3 | This guide provides two methods for building Docker images: the base image, ideal for specifying Python dependencies at runtime, and the hermetic image, which includes default dependencies for offline use.
 4 | 
 5 | ## Base Image
 6 | 
 7 | If you only need the base image with ray + uv, you can build it like so:
 8 | 
 9 | ```sh
10 | cd docker/
11 | docker buildx build --target base -t nemo_rl -f Dockerfile ..
12 | ```
13 | 
14 | This is **our recommendation** as it is a small image and allows you to specify your Python dependencies at runtime.
15 | 
16 | ## Hermetic Image
17 | 
18 | The Docker image build without a target stage will include all of the default dependencies to get started.
19 | 
20 | ```sh
21 | cd docker/
22 | docker buildx build -t nemo_rl -f Dockerfile ..
23 | ```
24 | 
25 | This image sets up the Python environment for you, so you do not have to use `uv` if you don't need
26 | any other packages.
27 | 
28 | This image is useful in situations where you may not have network connectivity to re-download packages.
29 | 


--------------------------------------------------------------------------------
/docs/documentation.md:
--------------------------------------------------------------------------------
 1 | # Documentation Development
 2 | 
 3 | - [Documentation Development](#documentation-development)
 4 |   - [Build the Documentation](#build-the-documentation)
 5 |   - [Live Building](#live-building)
 6 |   - [Run Tests in Python Docstrings](#run-tests-in-python-docstrings)
 7 |   - [Write Tests in Python Docstrings](#write-tests-in-python-docstrings)
 8 |   - [Documentation Version](#documentation-version)
 9 | 
10 | 
11 | ## Build the Documentation
12 | 
13 | The following sections describe how to set up and build the NeMo RL documentation.
14 | 
15 | Switch to the documentation source folder and generate HTML output.
16 | 
17 | ```sh
18 | cd docs/
19 | uv run --group docs sphinx-build . _build/html
20 | ```
21 | 
22 | * The resulting HTML files are generated in a `_build/html` folder that is created under the project `docs/` folder.
23 | * The generated python API docs are placed in `apidocs` under the `docs/` folder.
24 | 
25 | ## Live Building
26 | 
27 | When writing documentation, it can be helpful to serve the documentation and have it update live while you edit.
28 | 
29 | To do so, run:
30 | 
31 | ```sh
32 | cd docs/
33 | uv run --group docs sphinx-autobuild . _build/html --port 12345 --host 0.0.0.0
34 | ```
35 | 
36 | Open a web browser and go to `http://${HOST_WHERE_SPHINX_COMMAND_RUN}:12345` to view the output.
37 | 
38 | 
39 | ## Run Tests in Python Docstrings
40 | 
41 | We also run tests in our Python docstrings. You can run them with:
42 | 
43 | ```sh
44 | cd docs/
45 | uv run --group docs sphinx-build -b doctest . _build/doctest
46 | ```
47 | 
48 | ## Write Tests in Python Docstrings
49 | 
50 | Any code in triple backtick blocks with the `{doctest}` directive will be tested. The format follows Python's doctest module syntax, where `>>>` indicates Python input and the following line shows the expected output. Here's an example:
51 | 
52 | ```python
53 | def add(x: int, y: int) -> int:
54 |     """
55 |     Adds two integers together.
56 | 
57 |     Args:
58 |         x (int): The first integer to add.
59 |         y (int): The second integer to add.
60 | 
61 |     Returns:
62 |         int: The sum of x and y.
63 | 
64 |     Examples:
65 |     ```{doctest}
66 |     >>> from nemo_rl.made_up_package import add
67 |     >>> add(1, 2)
68 |     3
69 |     ```
70 | 
71 |     """
72 |     return x + y
73 | ```
74 | 
75 | ## Documentation Version
76 | 
77 | The three files below control the version switcher. Before you attempt to publish a new version of the documentation, update these files to match the latest version numbers.
78 | 
79 | * docs/versions1.json
80 | * docs/project.json
81 | * docs/conf.py
82 | 
83 | 


--------------------------------------------------------------------------------
/docs/helpers.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import json
16 | import tempfile
17 | 
18 | 
19 | def make_dpo_dataset():
20 |     train_file = tempfile.NamedTemporaryFile(mode="w", suffix=".jsonl", delete=False)
21 |     val_file = tempfile.NamedTemporaryFile(mode="w", suffix=".jsonl", delete=False)
22 | 
23 |     # Write train data
24 |     train_data = [
25 |         {"context": "What is 2+2?", "chosen": "4", "rejected": "5"},
26 |         {"context": "What is 3*3?", "chosen": "9", "rejected": "6"},
27 |     ]
28 |     for item in train_data:
29 |         lines = train_file.write(json.dumps(item) + "\n")
30 |     train_file.flush()
31 | 
32 |     # Write validation data
33 |     val_data = [
34 |         {"context": "What is 4+4?", "chosen": "8", "rejected": "7"},
35 |         {"context": "What is 5*5?", "chosen": "25", "rejected": "20"},
36 |     ]
37 |     for item in val_data:
38 |         lines = val_file.write(json.dumps(item) + "\n")
39 |     val_file.flush()
40 | 
41 |     return train_file, val_file
42 | 


--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
 1 | ```{include} ../README.md
 2 | :relative-docs: docs/
 3 | ```
 4 | 
 5 | ```{toctree}
 6 | :caption: 🖥️  Environment Start
 7 | :hidden:
 8 | 
 9 | local-workstation.md
10 | cluster.md
11 | 
12 | ```
13 | 
14 | ```{toctree}
15 | :caption: 🚀 E2E Examples
16 | :hidden:
17 | 
18 | guides/grpo-deepscaler.md
19 | guides/sft-openmathinstruct2.md
20 | ```
21 | 
22 | ```{toctree}
23 | :caption: 📚 Guides
24 | :hidden:
25 | 
26 | adding-new-models.md
27 | guides/sft.md
28 | guides/dpo.md
29 | guides/grpo.md
30 | guides/grpo-deepscaler.md
31 | guides/eval.md
32 | model-quirks.md
33 | ```
34 | 
35 | ```{toctree}
36 | :caption: 🐳  Containers
37 | :hidden:
38 | 
39 | docker.md
40 | ```
41 | 
42 | ```{toctree}
43 | :caption: 🛠️ Development
44 | :hidden:
45 | 
46 | testing.md
47 | documentation.md
48 | debugging.md
49 | apidocs/index.rst
50 | ```
51 | 
52 | ```{toctree}
53 | :caption: 📐 Design Docs
54 | :hidden:
55 | 
56 | design-docs/design-and-philosophy.md
57 | design-docs/padding.md
58 | design-docs/logger.md
59 | design-docs/uv.md
60 | design-docs/chat-datasets.md
61 | design-docs/generation.md
62 | design-docs/checkpointing.md
63 | design-docs/loss-functions.md
64 | design-docs/fsdp2-parallel-plan.md
65 | ```
66 | 


--------------------------------------------------------------------------------
/docs/local-workstation.md:
--------------------------------------------------------------------------------
 1 | # Run on Your Local Workstation
 2 | 
 3 | When launching examples locally with `uv`, {py:class}`init_ray() <nemo_rl.distributed.virtual_cluster.init_ray>` will first attempt to connect to an existing cluster. If none is found, it will start a local one and connect to it using all available GPU and CPU resources on your node.
 4 | 
 5 | To launch a job outside of a container, simply run:
 6 | 
 7 | ```sh
 8 | uv run examples/run_grpo_math.py
 9 | ```
10 | 
11 | In the logs, you will see that Ray has started a local cluster instance, along with details on the resources made available to it:
12 | ```
13 | 2025-03-17 13:37:45,360 INFO worker.py:1841 -- Started a local Ray instance.
14 | ...
15 | INFO:nemo_rl.distributed.virtual_cluster:Started local cluster with: {'node:__internal_head__': 1.0, 'CPU': 24.0, 'object_store_memory': 80448493977.0, 'accelerator_type:RTX': 1.0, 'memory': 177713152615.0, 'GPU': 1.0, 'node:10.0.0.1': 1.0}
16 | ```
17 | 
18 | To have more precise control over the GPUs Ray uses locally, please use `CUDA_VISIBLE_DEVICES`:
19 | 
20 | ```sh
21 | # Use the 0th and 3rd indexed GPU (for a total of 2 GPUs)
22 | CUDA_VISIBLE_DEVICES=0,3 uv run examples/run_grpo_math.py
23 | ```
24 | 
25 | We also allow multiple colocated local clusters, which are uniquely identified by the values in
26 | `CUDA_VISIBLE_DEVICES`. Concretely:
27 | 
28 | ```sh
29 | # (1) Start a fresh cluster on GPU=0
30 | CUDA_VISIBLE_DEVICES=0 uv run examples/run_grpo_math.py
31 | 
32 | # (2) While (1) is running, this will start a new cluster using GPUs 1 and 2 without interferring with (1)
33 | # Ensure that the CUDA_VISIBLE_DEVICES do not overlap already running jobs.
34 | CUDA_VISIBLE_DEVICES=1,2 uv run examples/run_grpo_math.py
35 | ```
36 | 


--------------------------------------------------------------------------------
/docs/model-quirks.md:
--------------------------------------------------------------------------------
 1 | # Model Quirks
 2 | 
 3 | This document outlines special cases and model-specific behaviors that require custom handling in NeMo RL. These special cases are controlled by the `ModelFlag` enum.
 4 | 
 5 | ## Gemma-3
 6 | 
 7 | ### Tied Weights
 8 | 
 9 | Weight tying between the embedding layer (`model.embed_tokens`) and output layer (`lm_head`) is currently not respected when using the FSDP1 policy or the DTensor policy when TP > 1 (See [this issue](https://github.com/NVIDIA/NeMo-RL/issues/227)). To avoid errors when training these models, we only allow training models with tied weights using the DTensor policy with TP=1. For Llama-3 and Qwen2.5 models, weight-tying is only enabled for the smaller models (< 2B), which can typically be trained without tensor parallelism. For Gemma-3, all model sizes have weight-tying enabled, including the larger models which require tensor parallelism. To support training of these models, we specially handle the Gemma-3 models by allowing training using the DTensor policy with TP > 1.
10 | 
11 | **Special Handling:**
12 | - We skip the tied weights check for all Gemma-3 models when using the DTensor policy, allowing training using TP > 1.
13 | - We exclude `model.embed_tokens` and `lm_head` from the DTensor tensor parallel plan to maintain weight tying correctly.
14 | 
15 | ### vLLM Initialization
16 | 
17 | Gemma-3 models have a specific issue with vLLM dummy weight initialization due to a vLLM bug where [a `normalizer` buffer is created](https://github.com/vllm-project/vllm/blob/964472b9667508b1d4a7ed92068ff81740ae0036/vllm/model_executor/models/gemma3.py#L372) that is not present in the Hugging Face model. This causes the `normalizer` buffer to be set to dummy weights at initialization and then never updated with the correct values during model refit. As a workaround for this issue, we do not use dummy weight initialization for vLLM with Gemma-3 models and instead use the `load_format="auto"` setting to load the full weights at initialization.
18 | 
19 | **Special Handling:**
20 | - We automatically use `load_format="auto"` for Gemma-3 models when initializing vLLM.
21 | - This avoids issues with dummy weight initialization, where the dummy weights for this buffer would never get overwritten during refit.
22 | 


--------------------------------------------------------------------------------
/docs/project.json:
--------------------------------------------------------------------------------
1 | {"name": "nemo-rl", "version": "0.2.1"}


--------------------------------------------------------------------------------
/docs/versions1.json:
--------------------------------------------------------------------------------
1 | [
2 |     {
3 |         "preferred": true,
4 |         "version": "0.2.1",
5 |         "url": "../0.2.1"
6 |     }
7 | ]
8 | 


--------------------------------------------------------------------------------
/examples/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/examples/configs/eval.yaml:
--------------------------------------------------------------------------------
 1 | # Evaluation Configuration
 2 | eval:
 3 |   metric: "pass@1" # only pass@1 is supported now
 4 |   num_tests_per_prompt: 1 # every prompt will be tested num_tests_per_prompt times and use the average score as the final score
 5 |   seed: 42
 6 | 
 7 | generation:
 8 |   backend: "vllm" # only vllm is supported for evaluation
 9 |   max_new_tokens: ${generation.vllm_cfg.max_model_len}
10 |   temperature: 0.0
11 |   top_p: 1.0
12 |   top_k: -1 # -1 means disable
13 |   num_prompts_per_step: -1 # -1 means pass all prompts at once
14 |   model_name: "Qwen/Qwen2.5-Math-1.5B-Instruct"
15 |   stop_token_ids: null
16 |   stop_strings: null
17 |   vllm_cfg:
18 |     async_engine: false
19 |     precision: "bfloat16"
20 |     tensor_parallel_size: 1
21 |     pipeline_parallel_size: 1
22 |     gpu_memory_utilization: 0.9
23 |     max_model_len: 2048
24 | 
25 | tokenizer:
26 |   name: ${generation.model_name} ## specify if you'd like to use a tokenizer different from the model's default
27 |   chat_template: "default"
28 | 
29 | data:
30 |   max_input_seq_length: ${generation.vllm_cfg.max_model_len} # useless since we directly use prompts in evaluation
31 |   prompt_file: null
32 |   system_prompt_file: null
33 |   dataset_name: "HuggingFaceH4/aime_2024"
34 |   dataset_key: "train"
35 |   problem_key: "problem"
36 |   solution_key: "answer"
37 | 
38 | env:
39 |   math:
40 |     num_workers: 8
41 | 
42 | cluster:
43 |   gpus_per_node: 1
44 |   num_nodes: 1
45 | 


--------------------------------------------------------------------------------
/examples/configs/grpo-deepscaler-1.5b-16K.yaml:
--------------------------------------------------------------------------------
 1 | # GRPO Algorithm Configuration
 2 | defaults: "grpo-deepscaler-1.5b-8K.yaml"
 3 | 
 4 | loss_fn:
 5 |   reference_policy_kl_penalty: 0.001
 6 |   ratio_clip_max: 0.28
 7 | 
 8 | 
 9 | policy:
10 |   max_total_sequence_length: 16384
11 | 
12 | 
13 |   dynamic_batching:
14 |     enabled: False


--------------------------------------------------------------------------------
/examples/configs/grpo_deepscaler-1.5b-24K.yaml:
--------------------------------------------------------------------------------
 1 | # GRPO Algorithm Configuration
 2 | defaults: "grpo-deepscaler-1.5b-8K.yaml"
 3 | 
 4 | loss_fn:
 5 |   reference_policy_kl_penalty: 0.0001
 6 |   ratio_clip_min: 0.2
 7 |   ratio_clip_max: 0.28
 8 | 
 9 | policy:
10 |   max_total_sequence_length: 24576
11 | 
12 |   dtensor_cfg:
13 |     enabled: true
14 |     cpu_offload: true
15 |     sequence_parallel: true
16 |     activation_checkpointing: true
17 |     tensor_parallel_size: 4
18 |     custom_parallel_plan: null
19 | 
20 |   dynamic_batching:
21 |     enabled: False
22 | 
23 |   optimizer:
24 |     name: "torch.optim.AdamW"
25 |     kwargs:
26 |       lr: 5.0e-7
27 | 
28 |   generation:
29 |     backend: "vllm"
30 |     max_new_tokens: ${policy.max_total_sequence_length}
31 |     temperature: 1.0
32 |     top_p: 1.0
33 |     top_k: null
34 |     stop_token_ids: null
35 |     stop_strings: null
36 |     vllm_cfg:
37 |       precision: ${policy.precision}
38 |       tensor_parallel_size: 1
39 |       pipeline_parallel_size: 1
40 |       gpu_memory_utilization: 0.8
41 |       max_model_len: ${policy.max_total_sequence_length}
42 |       # For most cases, use "dummy" to load the initial weights, since they will be overwritten during refit
43 |       # For Gemma models, we need to use "auto" due to a vllm bug
44 |       load_format: dummy
45 | 


--------------------------------------------------------------------------------
/examples/configs/grpo_math_8B.yaml:
--------------------------------------------------------------------------------
 1 | # GRPO Algorithm Configuration
 2 | defaults: "grpo_math_1B.yaml"
 3 | 
 4 | grpo:
 5 |   num_prompts_per_step: 64
 6 |   num_generations_per_prompt: 32
 7 | 
 8 | policy:
 9 |   model_name: "meta-llama/Llama-3.1-8B-Instruct"
10 |   tokenizer:
11 |     name: ${policy.model_name} ## specify if you'd like to use a tokenizer different from the model's default
12 |   train_global_batch_size: 512
13 |   train_micro_batch_size: 1
14 |   generation_batch_size: 32 # Only used when generating using HF backend
15 |   logprob_batch_size: 2
16 |   max_total_sequence_length: 4096
17 |   precision: "bfloat16"
18 |   fsdp_offload_enabled: false
19 |   activation_checkpointing_enabled: false
20 | 
21 |   dtensor_cfg:
22 |     enabled: True
23 | 
24 |   dynamic_batching:
25 |     train_mb_tokens: 4096
26 |     logprob_mb_tokens: 8192
27 | 
28 |   optimizer:
29 |     name: "torch.optim.AdamW"
30 |     kwargs:
31 |       lr: 3.0e-7
32 |       weight_decay: 0.01
33 |       betas: [0.9, 0.999]
34 |       eps: 1e-8
35 | 
36 |   scheduler:
37 |     - name: "torch.optim.lr_scheduler.LinearLR"
38 |       kwargs:
39 |         start_factor: 0.1
40 |         end_factor: 1.0
41 |         # The scheduler iteration is per GPRO step and is decoupled with the optimizer step (may be >=1 per GPRO step)
42 |         total_iters: 13
43 |     - name: "torch.optim.lr_scheduler.ConstantLR"
44 |       kwargs:
45 |         factor: 1.0
46 |         total_iters: 10000000000
47 |     - milestones: [13]
48 | 
49 |   generation:
50 |     backend: "vllm"
51 |     max_new_tokens: ${policy.max_total_sequence_length}
52 |     temperature: 1.0
53 |     top_p: 1.0
54 |     top_k: null
55 |     stop_token_ids: null
56 |     stop_strings: null
57 |     vllm_cfg:
58 |       tensor_parallel_size: 1
59 |       gpu_memory_utilization: 0.6
60 |       max_model_len: ${policy.max_total_sequence_length}
61 | 
62 | cluster:
63 |   gpus_per_node: 8
64 |   num_nodes: 1
65 | 


--------------------------------------------------------------------------------
/examples/configs/grpo_sliding_puzzle.yaml:
--------------------------------------------------------------------------------
 1 | # GRPO Algorithm Configuration
 2 | defaults: "grpo_math_1B.yaml"
 3 | 
 4 | grpo:
 5 |   num_prompts_per_step: 32
 6 |   num_generations_per_prompt: 16
 7 |   max_rollout_turns: 50 # Maximum turns allowed per rollout
 8 |   max_num_steps: 10000
 9 | 
10 | checkpointing:
11 |   enabled: true
12 |   checkpoint_dir: "results/grpo-sliding-puzzle"
13 |   metric_name: "val_reward"
14 |   higher_is_better: true
15 |   keep_top_k: 3
16 |   save_period: 10
17 | 
18 | policy:
19 |   model_name: "Qwen/Qwen2.5-1.5B-Instruct"
20 |   max_total_sequence_length: 3072
21 | 
22 |   generation:
23 |     backend: "vllm"
24 |     max_new_tokens: ${policy.max_total_sequence_length}
25 |     temperature: 1.0
26 |     # Setting top_p/top_k to 0.999/10000 to strip out Qwen's special/illegal tokens
27 |     # https://github.com/NVIDIA/NeMo-RL/issues/237
28 |     top_p: 0.999
29 |     top_k: 10000
30 |     stop_token_ids: null
31 |     stop_strings: null
32 |     vllm_cfg:
33 |       async_engine: false
34 |       tensor_parallel_size: 1
35 |       pipeline_parallel_size: 1
36 |       gpu_memory_utilization: 0.6
37 |       max_model_len: ${policy.max_total_sequence_length}
38 | 
39 | data:
40 |   add_system_prompt: false
41 |  
42 | env:
43 |   sliding_puzzle_game:
44 |     cfg:
45 |       game_config:
46 |         size: 5 # Size of the puzzle (e.g., 2 for 2x2, 3 for 3x3)
47 |         shuffle_moves: 15 # Number of random moves to shuffle the solved state
48 |       max_moves: 50 # Maximum moves allowed per episode
49 | 
50 | logger:
51 |   log_dir: "logs"  # Base directory for all logs
52 |   num_val_samples_to_print: 0 # Number of validation samples to pretty print on terminal
53 |   wandb_enabled: false
54 |   tensorboard_enabled: false
55 |   monitor_gpus: false  # If true, will monitor GPU usage and log to wandb and/or tensorboard
56 |   wandb:
57 |     project: "grpo-dev"
58 |     name: "grpo-dev-sliding_puzzle"
59 |   tensorboard: {}
60 |   gpu_monitoring:
61 |     collection_interval: 10  # How often to collect GPU usage metrics (in seconds)
62 |     flush_interval: 10  # How often to flush GPU usage metrics to the loggers (in seconds)
63 | 


--------------------------------------------------------------------------------
/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp1-quick.v2.yaml:
--------------------------------------------------------------------------------
 1 | dpo:
 2 |   max_num_epochs: 2
 3 |   max_num_steps: 20
 4 |   val_period: 50
 5 |   val_batches: 16
 6 |   val_global_batch_size: 32
 7 |   val_micro_batch_size: 1
 8 |   val_at_start: false
 9 |   seed: 42
10 | 
11 |   reference_policy_kl_penalty: 0.05
12 |   preference_average_log_probs: False
13 |   sft_average_log_probs: ${.preference_average_log_probs}
14 |   preference_loss_weight: 1
15 |   sft_loss_weight: 0.01
16 | 
17 | checkpointing:
18 |   enabled: false
19 |   checkpoint_dir: "results/dpo"
20 |   metric_name: "val_loss"
21 |   higher_is_better: false
22 |   keep_top_k: 3
23 |   save_period: 10000
24 | 
25 | policy:
26 |   model_name: "meta-llama/Llama-3.1-8B-Instruct"
27 |   tokenizer:
28 |     name: ${policy.model_name}
29 |   train_global_batch_size: 256
30 |   train_micro_batch_size: 1
31 |   max_total_sequence_length: 2048
32 |   precision: "bfloat16"
33 |   fsdp_offload_enabled: false
34 |   activation_checkpointing_enabled: false
35 | 
36 |   dtensor_cfg:
37 |     enabled: false
38 |     cpu_offload: False
39 |     sequence_parallel: false
40 |     activation_checkpointing: false
41 |     tensor_parallel_size: 1
42 |     custom_parallel_plan: null
43 | 
44 |   dynamic_batching:
45 |     enabled: False
46 | 
47 |   make_sequence_length_divisible_by: ${policy.dtensor_cfg.tensor_parallel_size}
48 |   max_grad_norm: 1.0
49 | 
50 |   optimizer:
51 |     name: "torch.optim.AdamW"
52 |     kwargs:
53 |       lr: 5.0e-6
54 |       weight_decay: 0.1
55 |       betas: [0.9, 0.98]
56 |       eps: 1e-8
57 |       foreach: False
58 |       fused: False
59 | 
60 |   scheduler:
61 |     - name: "torch.optim.lr_scheduler.LinearLR"
62 |       kwargs:
63 |         start_factor: 0.000000001
64 |         end_factor: 1.0
65 |         total_iters: 1
66 |     - name: "torch.optim.lr_scheduler.ConstantLR"
67 |       kwargs:
68 |         factor: 1.0
69 |         total_iters: 10000000000
70 |     - milestones: [1]
71 | 
72 | data:
73 |   dataset_name: "HelpSteer3"
74 |   max_input_seq_length: ${policy.max_total_sequence_length}
75 | 
76 | logger:
77 |   log_dir: "logs"
78 |   wandb_enabled: true
79 |   tensorboard_enabled: true
80 |   monitor_gpus: true
81 |   wandb:
82 |     project: nemo-rl
83 |     name: dpo-llama3.1-8b-instruct-4n8g-fsdp2tp1
84 |   tensorboard: {}
85 |   gpu_monitoring:
86 |     collection_interval: 10
87 |     flush_interval: 10
88 | 
89 | cluster:
90 |   gpus_per_node: 8
91 |   num_nodes: 4
92 | 


--------------------------------------------------------------------------------
/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp2tp1.v2.yaml:
--------------------------------------------------------------------------------
 1 | dpo:
 2 |   max_num_epochs: 1
 3 |   max_num_steps: 150
 4 |   val_period: 50
 5 |   val_batches: 16
 6 |   val_global_batch_size: 32
 7 |   val_micro_batch_size: 1
 8 |   val_at_start: false
 9 |   seed: 42
10 | 
11 |   reference_policy_kl_penalty: 0.05
12 |   preference_average_log_probs: False
13 |   sft_average_log_probs: ${.preference_average_log_probs}
14 |   preference_loss_weight: 1
15 |   sft_loss_weight: 0.01
16 | 
17 | checkpointing:
18 |   enabled: true
19 |   checkpoint_dir: "results/dpo"
20 |   metric_name: "val_loss"
21 |   higher_is_better: false
22 |   keep_top_k: 3
23 |   save_period: 10000
24 | 
25 | policy:
26 |   model_name: "meta-llama/Llama-3.1-8B-Instruct"
27 |   tokenizer:
28 |     name: ${policy.model_name}
29 |   train_global_batch_size: 256
30 |   train_micro_batch_size: 1
31 |   max_total_sequence_length: 2048
32 |   precision: "bfloat16"
33 |   fsdp_offload_enabled: false
34 |   activation_checkpointing_enabled: false
35 | 
36 |   dtensor_cfg:
37 |     enabled: true
38 |     cpu_offload: False
39 |     sequence_parallel: false
40 |     activation_checkpointing: false
41 |     tensor_parallel_size: 1
42 |     custom_parallel_plan: null
43 | 
44 |   dynamic_batching:
45 |     enabled: False
46 | 
47 |   make_sequence_length_divisible_by: ${policy.dtensor_cfg.tensor_parallel_size}
48 |   max_grad_norm: 1.0
49 | 
50 |   optimizer:
51 |     name: "torch.optim.AdamW"
52 |     kwargs:
53 |       lr: 5.0e-6
54 |       weight_decay: 0.1
55 |       betas: [0.9, 0.98]
56 |       eps: 1e-8
57 |       foreach: False
58 |       fused: False
59 | 
60 |   scheduler:
61 |     - name: "torch.optim.lr_scheduler.LinearLR"
62 |       kwargs:
63 |         start_factor: 0.000000001
64 |         end_factor: 1.0
65 |         total_iters: 1
66 |     - name: "torch.optim.lr_scheduler.ConstantLR"
67 |       kwargs:
68 |         factor: 1.0
69 |         total_iters: 10000000000
70 |     - milestones: [1]
71 | 
72 | data:
73 |   dataset_name: "HelpSteer3"
74 |   max_input_seq_length: ${policy.max_total_sequence_length}
75 | 
76 | logger:
77 |   log_dir: "logs"
78 |   wandb_enabled: true
79 |   tensorboard_enabled: true
80 |   monitor_gpus: true
81 |   wandb:
82 |     project: nemo-rl
83 |     name: dpo-llama3.1-8b-instruct-4n8g-fsdp2tp1
84 |   tensorboard: {}
85 |   gpu_monitoring:
86 |     collection_interval: 10
87 |     flush_interval: 10
88 | 
89 | cluster:
90 |   gpus_per_node: 8
91 |   num_nodes: 4
92 | 


--------------------------------------------------------------------------------
/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp2tp2-quick.v2.yaml:
--------------------------------------------------------------------------------
 1 | dpo:
 2 |   max_num_epochs: 2
 3 |   max_num_steps: 20
 4 |   val_period: 50
 5 |   val_batches: 16
 6 |   val_global_batch_size: 32
 7 |   val_micro_batch_size: 1
 8 |   val_at_start: false
 9 |   seed: 42
10 | 
11 |   reference_policy_kl_penalty: 0.05
12 |   preference_average_log_probs: False
13 |   sft_average_log_probs: ${.preference_average_log_probs}
14 |   preference_loss_weight: 1
15 |   sft_loss_weight: 0.01
16 | 
17 | checkpointing:
18 |   enabled: true
19 |   checkpoint_dir: "results/dpo"
20 |   metric_name: "val_loss"
21 |   higher_is_better: false
22 |   keep_top_k: 3
23 |   save_period: 10000
24 | 
25 | policy:
26 |   model_name: "meta-llama/Llama-3.1-8B-Instruct"
27 |   tokenizer:
28 |     name: ${policy.model_name}
29 |   train_global_batch_size: 256
30 |   train_micro_batch_size: 1
31 |   max_total_sequence_length: 2048
32 |   precision: "bfloat16"
33 |   fsdp_offload_enabled: false
34 |   activation_checkpointing_enabled: false
35 | 
36 |   dtensor_cfg:
37 |     enabled: true
38 |     cpu_offload: False
39 |     sequence_parallel: false
40 |     activation_checkpointing: false
41 |     tensor_parallel_size: 2
42 |     custom_parallel_plan: null
43 | 
44 |   dynamic_batching:
45 |     enabled: False
46 | 
47 |   make_sequence_length_divisible_by: ${policy.dtensor_cfg.tensor_parallel_size}
48 |   max_grad_norm: 1.0
49 | 
50 |   optimizer:
51 |     name: "torch.optim.AdamW"
52 |     kwargs:
53 |       lr: 5.0e-6
54 |       weight_decay: 0.1
55 |       betas: [0.9, 0.98]
56 |       eps: 1e-8
57 |       foreach: False
58 |       fused: False
59 | 
60 |   scheduler:
61 |     - name: "torch.optim.lr_scheduler.LinearLR"
62 |       kwargs:
63 |         start_factor: 0.000000001
64 |         end_factor: 1.0
65 |         total_iters: 1
66 |     - name: "torch.optim.lr_scheduler.ConstantLR"
67 |       kwargs:
68 |         factor: 1.0
69 |         total_iters: 10000000000
70 |     - milestones: [1]
71 | 
72 | data:
73 |   dataset_name: "HelpSteer3"
74 |   max_input_seq_length: ${policy.max_total_sequence_length}
75 | 
76 | logger:
77 |   log_dir: "logs"
78 |   wandb_enabled: true
79 |   tensorboard_enabled: true
80 |   monitor_gpus: true
81 |   wandb:
82 |     project: nemo-rl
83 |     name: dpo-llama3.1-8b-instruct-4n8g-fsdp2tp1
84 |   tensorboard: {}
85 |   gpu_monitoring:
86 |     collection_interval: 10
87 |     flush_interval: 10
88 | 
89 | cluster:
90 |   gpus_per_node: 8
91 |   num_nodes: 4
92 | 


--------------------------------------------------------------------------------
/examples/configs/recipes/llm/dpo-llama3.2-1b-instruct-1n8g-fsdp2tp1.v2.yaml:
--------------------------------------------------------------------------------
 1 | dpo:
 2 |   max_num_epochs: 1
 3 |   max_num_steps: 150
 4 |   val_period: 25
 5 |   val_batches: 8
 6 |   val_global_batch_size: 32
 7 |   val_micro_batch_size: 1
 8 |   val_at_start: false
 9 |   seed: 42
10 | 
11 |   reference_policy_kl_penalty: 0.05
12 |   preference_average_log_probs: False
13 |   sft_average_log_probs: ${.preference_average_log_probs}
14 |   preference_loss_weight: 1
15 |   sft_loss_weight: 0
16 | 
17 | checkpointing:
18 |   enabled: true
19 |   checkpoint_dir: "results/dpo"
20 |   metric_name: "val_loss"
21 |   higher_is_better: false
22 |   keep_top_k: 3
23 |   save_period: 50
24 | 
25 | policy:
26 |   model_name: "meta-llama/Llama-3.2-1B-Instruct"
27 |   tokenizer:
28 |     name: ${policy.model_name}
29 | 
30 |   train_global_batch_size: 128
31 |   train_micro_batch_size: 2
32 |   max_total_sequence_length: 1024
33 |   precision: "bfloat16"
34 |   fsdp_offload_enabled: false
35 |   activation_checkpointing_enabled: false
36 | 
37 |   dtensor_cfg:
38 |     enabled: true
39 |     cpu_offload: False
40 |     sequence_parallel: false
41 |     activation_checkpointing: false
42 |     tensor_parallel_size: 1
43 |     custom_parallel_plan: null
44 | 
45 |   dynamic_batching:
46 |     enabled: False
47 | 
48 |   make_sequence_length_divisible_by: ${policy.dtensor_cfg.tensor_parallel_size}
49 |   max_grad_norm: 1.0
50 | 
51 |   optimizer:
52 |     name: "torch.optim.AdamW"
53 |     kwargs:
54 |       lr: 5.0e-6
55 |       weight_decay: 0.1
56 |       betas: [0.9, 0.98]
57 |       eps: 1e-5
58 |       foreach: False
59 |       fused: False
60 |     
61 |   scheduler:
62 |     - name: "torch.optim.lr_scheduler.LinearLR"
63 |       kwargs:
64 |         start_factor: 0.1
65 |         end_factor: 1.0
66 |         total_iters: 20
67 |     - name: "torch.optim.lr_scheduler.ConstantLR"
68 |       kwargs:
69 |         factor: 1.0
70 |         total_iters: 10000000000
71 |     - milestones: [20]
72 |     
73 | data:
74 |   dataset_name: "HelpSteer3"
75 |   max_input_seq_length: ${policy.max_total_sequence_length}
76 | logger:
77 |   log_dir: "logs"
78 |   wandb_enabled: true
79 |   tensorboard_enabled: true
80 |   monitor_gpus: true
81 |   wandb:
82 |     project: nemo-rl
83 |     name: dpo-llama3.2-1b-instruct-1n8g-fsdp2tp1
84 |   tensorboard: {}
85 |   gpu_monitoring:
86 |     collection_interval: 10
87 |     flush_interval: 10
88 | 
89 | cluster:
90 |   gpus_per_node: 8
91 |   num_nodes: 1
92 | 


--------------------------------------------------------------------------------
/examples/configs/recipes/llm/grpo-qwen2.5-7b-instruct-4n8g-fsdp1.v3.yaml:
--------------------------------------------------------------------------------
  1 | grpo:
  2 |   num_prompts_per_step: 64
  3 |   num_generations_per_prompt: 32
  4 |   max_rollout_turns: 1
  5 |   max_num_steps: 30
  6 |   normalize_rewards: true
  7 |   use_leave_one_out_baseline: true
  8 |   val_period: 10
  9 |   val_at_start: false
 10 |   max_val_samples: 256
 11 |   val_batch_size: 256
 12 | loss_fn:
 13 |   reference_policy_kl_penalty: 0.01
 14 |   ratio_clip_min: 0.2
 15 |   ratio_clip_max: 0.2
 16 |   ratio_clip_c: null
 17 |   use_on_policy_kl_approximation: false
 18 |   use_importance_sampling_correction: false
 19 |   token_level_loss: true
 20 | checkpointing:
 21 |   enabled: true
 22 |   checkpoint_dir: results/grpo-qwen2.5-7b-instruct-4n8g-fsdp1
 23 |   metric_name: val_reward
 24 |   higher_is_better: true
 25 |   keep_top_k: 3
 26 |   save_period: 10
 27 | policy:
 28 |   model_name: Qwen/Qwen2.5-7B-Instruct
 29 |   tokenizer:
 30 |     name: Qwen/Qwen2.5-7B-Instruct
 31 |   train_global_batch_size: 512
 32 |   train_micro_batch_size: 1
 33 |   generation_batch_size: 32
 34 |   logprob_batch_size: 2
 35 |   max_total_sequence_length: 4096
 36 |   precision: bfloat16
 37 |   fsdp_offload_enabled: false
 38 |   activation_checkpointing_enabled: false
 39 |   dtensor_cfg:
 40 |     enabled: false
 41 |     cpu_offload: false
 42 |     sequence_parallel: false
 43 |     activation_checkpointing: false
 44 |     tensor_parallel_size: 1
 45 |     custom_parallel_plan: null
 46 |   dynamic_batching:
 47 |     enabled: False
 48 |   make_sequence_length_divisible_by: 1
 49 |   max_grad_norm: 1
 50 |   optimizer:
 51 |     name: torch.optim.AdamW
 52 |     kwargs:
 53 |       lr: 3e-07
 54 |       weight_decay: 0.01
 55 |       betas:
 56 |         - 0.9
 57 |         - 0.999
 58 |       eps: 1e-08
 59 |       foreach: false
 60 |       fused: false
 61 |   scheduler:
 62 |     - name: torch.optim.lr_scheduler.LinearLR
 63 |       kwargs:
 64 |         start_factor: 0.1
 65 |         end_factor: 1
 66 |         total_iters: 13
 67 |     - name: torch.optim.lr_scheduler.ConstantLR
 68 |       kwargs:
 69 |         factor: 1
 70 |         total_iters: 10000000000
 71 |     - milestones:
 72 |         - 13
 73 |   generation:
 74 |     backend: vllm
 75 |     max_new_tokens: 4096
 76 |     temperature: 1
 77 |     top_p: 1
 78 |     top_k: null
 79 |     stop_token_ids:
 80 |       - 151645
 81 |     stop_strings: null
 82 |     vllm_cfg:
 83 |       async_engine: false
 84 |       precision: ${policy.precision}
 85 |       tensor_parallel_size: 1
 86 |       pipeline_parallel_size: 1
 87 |       gpu_memory_utilization: 0.6
 88 |       max_model_len: 4096
 89 | data:
 90 |   max_input_seq_length: 4096
 91 |   prompt_file: examples/prompts/cot.txt
 92 |   system_prompt_file: null
 93 |   dataset_name: OpenMathInstruct-2
 94 | env:
 95 |   math:
 96 |     num_workers: 8
 97 | logger:
 98 |   log_dir: logs/grpo-qwen2.5-7b-instruct-4n8g-fsdp1
 99 |   num_val_samples_to_print: 0
100 |   wandb_enabled: true
101 |   tensorboard_enabled: true
102 |   monitor_gpus: true
103 |   wandb:
104 |     project: nemo-rl
105 |     name: grpo-qwen2.5-7b-instruct-4n8g-fsdp1
106 |   tensorboard: {}
107 |   gpu_monitoring:
108 |     collection_interval: 10
109 |     flush_interval: 10
110 | cluster:
111 |   gpus_per_node: 8
112 |   num_nodes: 4
113 | 


--------------------------------------------------------------------------------
/examples/configs/recipes/llm/sft-llama3.1-8b-instruct-1n8g-fsdp1.v2.yaml:
--------------------------------------------------------------------------------
 1 | sft:
 2 |   max_num_epochs: 1
 3 |   max_num_steps: 250
 4 |   val_period: 10
 5 |   val_batches: 8
 6 |   val_global_batch_size: 32
 7 |   val_micro_batch_size: 1
 8 |   val_at_start: true
 9 |   seed: 42
10 | checkpointing:
11 |   enabled: true
12 |   checkpoint_dir: results/sft-llama3.1-8b-instruct-1n8g-fsdp1
13 |   metric_name: val_loss
14 |   higher_is_better: false
15 |   keep_top_k: 3
16 |   save_period: 10
17 | policy:
18 |   model_name: meta-llama/Llama-3.1-8B-Instruct
19 |   tokenizer:
20 |     name: meta-llama/Llama-3.1-8B-Instruct
21 |     chat_template: '{% for message in messages %}{%- if message[''role''] == ''system''  %}{{''Context: '' + message[''content''].strip()}}{%- elif message[''role''] == ''user''  %}{{'' Question: '' + message[''content''].strip() + '' Answer:''}}{%- elif message[''role''] == ''assistant''  %}{{'' '' + message[''content''].strip()}}{%- endif %}{% endfor %}'
22 |   train_global_batch_size: 32
23 |   train_micro_batch_size: 1
24 |   max_total_sequence_length: 1024
25 |   precision: bfloat16
26 |   fsdp_offload_enabled: false
27 |   activation_checkpointing_enabled: false
28 |   dtensor_cfg:
29 |     enabled: false
30 |     cpu_offload: false
31 |     sequence_parallel: false
32 |     activation_checkpointing: false
33 |     tensor_parallel_size: 1
34 |     custom_parallel_plan: null
35 |   dynamic_batching:
36 |     enabled: False
37 |   make_sequence_length_divisible_by: 1
38 |   max_grad_norm: 1
39 |   optimizer:
40 |     name: torch.optim.AdamW
41 |     kwargs:
42 |       lr: 5e-06
43 |       weight_decay: 0.1
44 |       betas:
45 |         - 0.9
46 |         - 0.98
47 |       eps: 1e-05
48 |       foreach: false
49 |       fused: false
50 | data:
51 |   max_input_seq_length: 1024
52 |   dataset_name: squad
53 |   add_bos: true
54 |   add_eos: true
55 |   add_generation_prompt: false
56 | logger:
57 |   log_dir: logs/sft-llama3.1-8b-instruct-1n8g-fsdp1
58 |   wandb_enabled: true
59 |   tensorboard_enabled: true
60 |   monitor_gpus: true
61 |   wandb:
62 |     project: nemo-rl
63 |     name: sft-llama3.1-8b-instruct-1n8g-fsdp1
64 |   tensorboard:
65 |     log_dir: tb_logs-sft-dev-squad
66 |   gpu_monitoring:
67 |     collection_interval: 10
68 |     flush_interval: 10
69 | cluster:
70 |   gpus_per_node: 8
71 |   num_nodes: 1
72 | 


--------------------------------------------------------------------------------
/examples/configs/recipes/llm/sft-llama3.1-8b-instruct-1n8g-fsdp2tp1-long.v2.yaml:
--------------------------------------------------------------------------------
 1 | sft:
 2 |   max_num_epochs: 1
 3 |   max_num_steps: 2730
 4 |   val_period: 10
 5 |   val_batches: 8
 6 |   val_global_batch_size: 32
 7 |   val_micro_batch_size: 1
 8 |   val_at_start: true
 9 |   seed: 42
10 | checkpointing:
11 |   enabled: true
12 |   checkpoint_dir: results/sft-llama3.1-8b-instruct-1n8g-fsdp2tp1-long
13 |   metric_name: val_loss
14 |   higher_is_better: false
15 |   keep_top_k: 3
16 |   save_period: 10
17 | policy:
18 |   model_name: meta-llama/Llama-3.1-8B-Instruct
19 |   tokenizer:
20 |     name: meta-llama/Llama-3.1-8B-Instruct
21 |     chat_template: '{% for message in messages %}{%- if message[''role''] == ''system''  %}{{''Context: '' + message[''content''].strip()}}{%- elif message[''role''] == ''user''  %}{{'' Question: '' + message[''content''].strip() + '' Answer:''}}{%- elif message[''role''] == ''assistant''  %}{{'' '' + message[''content''].strip()}}{%- endif %}{% endfor %}'
22 |   train_global_batch_size: 32
23 |   train_micro_batch_size: 1
24 |   max_total_sequence_length: 1024
25 |   precision: bfloat16
26 |   fsdp_offload_enabled: false
27 |   activation_checkpointing_enabled: false
28 |   dtensor_cfg:
29 |     enabled: true
30 |     cpu_offload: false
31 |     sequence_parallel: false
32 |     activation_checkpointing: false
33 |     tensor_parallel_size: 1
34 |     custom_parallel_plan: null
35 |   dynamic_batching:
36 |     enabled: False
37 |   make_sequence_length_divisible_by: 1
38 |   max_grad_norm: 1
39 |   optimizer:
40 |     name: torch.optim.AdamW
41 |     kwargs:
42 |       lr: 5e-06
43 |       weight_decay: 0.1
44 |       betas:
45 |         - 0.9
46 |         - 0.98
47 |       eps: 1e-05
48 |       foreach: false
49 |       fused: false
50 | data:
51 |   max_input_seq_length: 1024
52 |   dataset_name: squad
53 |   add_bos: true
54 |   add_eos: true
55 |   add_generation_prompt: false
56 | logger:
57 |   log_dir: logs/sft-llama3.1-8b-instruct-1n8g-fsdp2tp1-long
58 |   wandb_enabled: true
59 |   tensorboard_enabled: true
60 |   monitor_gpus: true
61 |   wandb:
62 |     project: nemo-rl
63 |     name: sft-llama3.1-8b-instruct-1n8g-fsdp2tp1-long
64 |   tensorboard:
65 |     log_dir: tb_logs-sft-dev-squad
66 |   gpu_monitoring:
67 |     collection_interval: 10
68 |     flush_interval: 10
69 | cluster:
70 |   gpus_per_node: 8
71 |   num_nodes: 1
72 | 


--------------------------------------------------------------------------------
/examples/configs/recipes/llm/sft-llama3.1-8b-instruct-1n8g-fsdp2tp2sp.v2.yaml:
--------------------------------------------------------------------------------
 1 | sft:
 2 |   max_num_epochs: 1
 3 |   max_num_steps: 350
 4 |   val_period: 10
 5 |   val_batches: 8
 6 |   val_global_batch_size: 32
 7 |   val_micro_batch_size: 1
 8 |   val_at_start: true
 9 |   seed: 42
10 | checkpointing:
11 |   enabled: true
12 |   checkpoint_dir: results/sft-llama3.1-8b-instruct-1n8g-fsdp2tp2sp
13 |   metric_name: val_loss
14 |   higher_is_better: false
15 |   keep_top_k: 3
16 |   save_period: 10
17 | policy:
18 |   model_name: meta-llama/Llama-3.1-8B-Instruct
19 |   tokenizer:
20 |     name: meta-llama/Llama-3.1-8B-Instruct
21 |     chat_template: '{% for message in messages %}{%- if message[''role''] == ''system''  %}{{''Context: '' + message[''content''].strip()}}{%- elif message[''role''] == ''user''  %}{{'' Question: '' + message[''content''].strip() + '' Answer:''}}{%- elif message[''role''] == ''assistant''  %}{{'' '' + message[''content''].strip()}}{%- endif %}{% endfor %}'
22 |   train_global_batch_size: 32
23 |   train_micro_batch_size: 1
24 |   max_total_sequence_length: 1024
25 |   precision: bfloat16
26 |   fsdp_offload_enabled: false
27 |   activation_checkpointing_enabled: false
28 |   dtensor_cfg:
29 |     enabled: true
30 |     cpu_offload: false
31 |     sequence_parallel: true
32 |     activation_checkpointing: false
33 |     tensor_parallel_size: 2
34 |     custom_parallel_plan: null
35 |   dynamic_batching:
36 |     enabled: False
37 |   make_sequence_length_divisible_by: 2
38 |   max_grad_norm: 1
39 |   optimizer:
40 |     name: torch.optim.AdamW
41 |     kwargs:
42 |       lr: 5e-06
43 |       weight_decay: 0.1
44 |       betas:
45 |         - 0.9
46 |         - 0.98
47 |       eps: 1e-05
48 |       foreach: false
49 |       fused: false
50 | data:
51 |   max_input_seq_length: 1024
52 |   dataset_name: squad
53 |   add_bos: true
54 |   add_eos: true
55 |   add_generation_prompt: false
56 | logger:
57 |   log_dir: logs/sft-llama3.1-8b-instruct-1n8g-fsdp2tp2sp
58 |   wandb_enabled: true
59 |   tensorboard_enabled: true
60 |   monitor_gpus: true
61 |   wandb:
62 |     project: nemo-rl
63 |     name: sft-llama3.1-8b-instruct-1n8g-fsdp2tp2sp
64 |   tensorboard:
65 |     log_dir: tb_logs-sft-dev-squad
66 |   gpu_monitoring:
67 |     collection_interval: 10
68 |     flush_interval: 10
69 | cluster:
70 |   gpus_per_node: 8
71 |   num_nodes: 1
72 | 


--------------------------------------------------------------------------------
/examples/configs/recipes/llm/sft-llama3.2-1b-1n8g-fsdp2tp1.v2.yaml:
--------------------------------------------------------------------------------
 1 | sft:
 2 |   max_num_epochs: 1
 3 |   max_num_steps: 500
 4 |   val_period: 10
 5 |   val_batches: 8
 6 |   val_global_batch_size: 32
 7 |   val_micro_batch_size: 1
 8 |   val_at_start: true
 9 |   seed: 42
10 | checkpointing:
11 |   enabled: true
12 |   checkpoint_dir: results/sft-llama3.2-1b-1n8g-fsdp2tp1
13 |   metric_name: val_loss
14 |   higher_is_better: false
15 |   keep_top_k: 3
16 |   save_period: 10
17 | policy:
18 |   model_name: meta-llama/Llama-3.2-1B
19 |   tokenizer:
20 |     name: meta-llama/Llama-3.2-1B
21 |     chat_template: '{% for message in messages %}{%- if message[''role''] == ''system''  %}{{''Context: '' + message[''content''].strip()}}{%- elif message[''role''] == ''user''  %}{{'' Question: '' + message[''content''].strip() + '' Answer:''}}{%- elif message[''role''] == ''assistant''  %}{{'' '' + message[''content''].strip()}}{%- endif %}{% endfor %}'
22 |   train_global_batch_size: 32
23 |   train_micro_batch_size: 1
24 |   max_total_sequence_length: 1024
25 |   precision: bfloat16
26 |   fsdp_offload_enabled: false
27 |   activation_checkpointing_enabled: false
28 |   dtensor_cfg:
29 |     enabled: true
30 |     cpu_offload: false
31 |     sequence_parallel: false
32 |     activation_checkpointing: false
33 |     tensor_parallel_size: 1
34 |     custom_parallel_plan: null
35 |   dynamic_batching:
36 |     enabled: False
37 |   make_sequence_length_divisible_by: 1
38 |   max_grad_norm: 1
39 |   optimizer:
40 |     name: torch.optim.AdamW
41 |     kwargs:
42 |       lr: 5e-06
43 |       weight_decay: 0.1
44 |       betas:
45 |         - 0.9
46 |         - 0.98
47 |       eps: 1e-05
48 |       foreach: false
49 |       fused: false
50 | data:
51 |   max_input_seq_length: 1024
52 |   dataset_name: squad
53 |   add_bos: true
54 |   add_eos: true
55 |   add_generation_prompt: false
56 | logger:
57 |   log_dir: logs/sft-llama3.2-1b-1n8g-fsdp2tp1
58 |   wandb_enabled: true
59 |   tensorboard_enabled: true
60 |   monitor_gpus: true
61 |   wandb:
62 |     project: nemo-rl
63 |     name: sft-llama3.2-1b-1n8g-fsdp2tp1
64 |   tensorboard:
65 |     log_dir: tb_logs-sft-dev-squad
66 |   gpu_monitoring:
67 |     collection_interval: 10
68 |     flush_interval: 10
69 | cluster:
70 |   gpus_per_node: 8
71 |   num_nodes: 1
72 | 


--------------------------------------------------------------------------------
/examples/configs/recipes/llm/sft-qwen2.5-32b-4n8g-fsdp2tp8sp-actckpt.v2.yaml:
--------------------------------------------------------------------------------
 1 | sft:
 2 |   max_num_epochs: 1
 3 |   max_num_steps: 20
 4 |   val_period: 10
 5 |   val_batches: 8
 6 |   val_global_batch_size: 32
 7 |   val_micro_batch_size: 1
 8 |   val_at_start: true
 9 |   seed: 42
10 | checkpointing:
11 |   enabled: true
12 |   checkpoint_dir: results/sft-qwen2.5-32b-4n8g-fsdp2tp8sp-actckpt
13 |   metric_name: val_loss
14 |   higher_is_better: false
15 |   keep_top_k: 3
16 |   save_period: 10
17 | policy:
18 |   model_name: Qwen/Qwen2.5-32B
19 |   tokenizer:
20 |     name: Qwen/Qwen2.5-32B
21 |     chat_template: '{% for message in messages %}{%- if message[''role''] == ''system''  %}{{''Context: '' + message[''content''].strip()}}{%- elif message[''role''] == ''user''  %}{{'' Question: '' + message[''content''].strip() + '' Answer:''}}{%- elif message[''role''] == ''assistant''  %}{{'' '' + message[''content''].strip()}}{%- endif %}{% endfor %}'
22 |   train_global_batch_size: 32
23 |   train_micro_batch_size: 1
24 |   max_total_sequence_length: 16000
25 |   precision: bfloat16
26 |   fsdp_offload_enabled: false
27 |   activation_checkpointing_enabled: false
28 |   dtensor_cfg:
29 |     enabled: true
30 |     cpu_offload: false
31 |     sequence_parallel: true
32 |     activation_checkpointing: true
33 |     tensor_parallel_size: 8
34 |     custom_parallel_plan: null
35 |   dynamic_batching:
36 |     enabled: False
37 |   make_sequence_length_divisible_by: 8
38 |   max_grad_norm: 1
39 |   optimizer:
40 |     name: torch.optim.AdamW
41 |     kwargs:
42 |       lr: 5e-06
43 |       weight_decay: 0.1
44 |       betas:
45 |         - 0.9
46 |         - 0.98
47 |       eps: 1e-05
48 |       foreach: false
49 |       fused: false
50 | data:
51 |   max_input_seq_length: 16000
52 |   dataset_name: squad
53 |   add_bos: true
54 |   add_eos: true
55 |   add_generation_prompt: false
56 | logger:
57 |   log_dir: logs/sft-qwen2.5-32b-4n8g-fsdp2tp8sp-actckpt
58 |   wandb_enabled: true
59 |   tensorboard_enabled: true
60 |   monitor_gpus: true
61 |   wandb:
62 |     project: nemo-rl
63 |     name: sft-qwen2.5-32b-4n8g-fsdp2tp8sp-actckpt
64 |   tensorboard:
65 |     log_dir: tb_logs-sft-dev-squad
66 |   gpu_monitoring:
67 |     collection_interval: 10
68 |     flush_interval: 10
69 | cluster:
70 |   gpus_per_node: 8
71 |   num_nodes: 4
72 | 


--------------------------------------------------------------------------------
/examples/configs/sft.yaml:
--------------------------------------------------------------------------------
 1 | # SFT Algorithm Configuration
 2 | sft:
 3 |   ## total number of steps to train will equal
 4 |   ## min((max_num_epochs * len(train_dataloader)), max_num_steps)
 5 |   max_num_epochs: 1
 6 |   max_num_steps: 60
 7 | 
 8 |   val_period: 10
 9 |   val_batches: 8
10 |   val_global_batch_size: 32
11 |   val_micro_batch_size: 1
12 |   val_at_start: true
13 |   seed: 42
14 | 
15 | checkpointing:
16 |   enabled: true
17 |   checkpoint_dir: "results/sft"
18 |   metric_name: "val_loss"
19 |   higher_is_better: false
20 |   keep_top_k: 3
21 |   save_period: 10
22 | 
23 | policy:
24 |   model_name: "meta-llama/Llama-3.2-1B"
25 |   tokenizer:
26 |     name: ${policy.model_name} ## specify if you'd like to use a tokenizer different from the model's default
27 |     chat_template: "{% for message in messages %}{%- if message['role'] == 'system'  %}{{'Context: ' + message['content'].strip()}}{%- elif message['role'] == 'user'  %}{{' Question: ' + message['content'].strip() + ' Answer:'}}{%- elif message['role'] == 'assistant'  %}{{' ' + message['content'].strip()}}{%- endif %}{% endfor %}"
28 |   train_global_batch_size: 32
29 |   train_micro_batch_size: 1
30 |   max_total_sequence_length: 1024
31 |   precision: "bfloat16"
32 |   fsdp_offload_enabled: false
33 |   activation_checkpointing_enabled: false
34 | 
35 |   dtensor_cfg:
36 |     enabled: true
37 |     cpu_offload: False
38 |     sequence_parallel: false
39 |     activation_checkpointing: false
40 |     tensor_parallel_size: 1
41 |     custom_parallel_plan: null
42 | 
43 |   dynamic_batching:
44 |     enabled: false
45 | 
46 |   # makes the training sequence length divisible by the tensor parallel size
47 |   # this is useful for sequence parallel training
48 |   make_sequence_length_divisible_by: ${policy.dtensor_cfg.tensor_parallel_size}
49 |   max_grad_norm: 1.0
50 | 
51 |   optimizer:
52 |     name: "torch.optim.AdamW"
53 |     kwargs:
54 |       lr: 5.0e-6
55 |       weight_decay: 0.1
56 |       betas: [0.9, 0.98]
57 |       eps: 1e-5
58 |       # when using Dtensor, we need to set foreach
59 |       # and fused to False
60 |       foreach: False
61 |       fused: False
62 |     
63 | data:
64 |   max_input_seq_length: ${policy.max_total_sequence_length}
65 |   dataset_name: "squad"
66 |   add_bos: true
67 |   add_eos: true
68 |   add_generation_prompt: false
69 | 
70 | logger:
71 |   log_dir: "logs"  # Base directory for all logs
72 |   wandb_enabled: true # Make sure you do a ``wandb login [Your API key]'' before running
73 |   tensorboard_enabled: true
74 |   monitor_gpus: false  # If true, will monitor GPU usage and log to wandb and/or tensorboard
75 |   wandb:
76 |     project: "sft-dev"
77 |     name: "sft-dev-${data.dataset_name}"
78 |   tensorboard:
79 |     log_dir: "tb_logs-sft-dev-${data.dataset_name}"
80 |   gpu_monitoring:
81 |     collection_interval: 10  # How often to collect GPU usage metrics (in seconds)
82 |     flush_interval: 10  # How often to flush GPU usage metrics to the loggers (in seconds)
83 | 
84 | cluster:
85 |   gpus_per_node: 1
86 |   num_nodes: 1
87 | 


--------------------------------------------------------------------------------
/examples/configs/sft_openmathinstruct2.yaml:
--------------------------------------------------------------------------------
 1 | # SFT Algorithm Configuration
 2 | sft:
 3 |   max_num_epochs: 1
 4 |   max_num_steps: 1000000
 5 |   val_period: 500
 6 |   val_batches: 4
 7 |   val_global_batch_size: 128
 8 |   val_micro_batch_size: 2
 9 |   val_at_start: true
10 |   seed: 42
11 | 
12 | checkpointing:
13 |   enabled: true
14 |   checkpoint_dir: "results/sft_openmathinstruct2"
15 |   metric_name: "val_loss"
16 |   higher_is_better: false
17 |   keep_top_k: 100
18 |   save_period: 500
19 | 
20 | policy:
21 |   model_name: "meta-llama/Llama-3.1-8B"
22 |   tokenizer:
23 |     name: meta-llama/Llama-3.1-8B-Instruct ## specify if you'd like to use a tokenizer different from the model's default
24 |   train_global_batch_size: 512
25 |   train_micro_batch_size: 2
26 |   max_total_sequence_length: 4096
27 |   precision: "bfloat16"
28 |   fsdp_offload_enabled: false
29 |   activation_checkpointing_enabled: false
30 | 
31 |   dtensor_cfg:
32 |     enabled: true
33 |     cpu_offload: False
34 |     sequence_parallel: false
35 |     activation_checkpointing: false
36 |     tensor_parallel_size: 4
37 |     custom_parallel_plan: null
38 | 
39 |   # makes the training sequence length divisible by the tensor parallel size
40 |   # this is useful for sequence parallel training
41 |   make_sequence_length_divisible_by: ${policy.dtensor_cfg.tensor_parallel_size}
42 |   max_grad_norm: null
43 | 
44 |   optimizer:
45 |     name: "torch.optim.AdamW"
46 |     kwargs:
47 |       lr: 2e-5
48 |       weight_decay: 0.01
49 |       betas: [0.9, 0.98]
50 |       eps: 1e-8
51 |       # when using Dtensor, we need to set foreach
52 |       # and fused to False
53 |       foreach: False
54 |       fused: False
55 | 
56 | data:
57 |   max_input_seq_length: ${policy.max_total_sequence_length}
58 |   dataset_name: "openmathinstruct2"
59 |   prompt_file: examples/prompts/math.txt
60 |   split: "train_1M"
61 |   add_bos: true
62 |   add_eos: true
63 |   add_generation_prompt: true
64 |   output_key: 'generated_solution'
65 | 
66 | logger:
67 |   log_dir: "logs"  # Base directory for all logs
68 |   wandb_enabled: true # Make sure you do a ``wandb login [Your API key]'' before running
69 |   tensorboard_enabled: true
70 |   monitor_gpus: false  # If true, will monitor GPU usage and log to wandb and/or tensorboard
71 |   wandb:
72 |     project: "sft-dev"
73 |     name: "openmathinstruct-nemorl-1M_train"
74 |   tensorboard:
75 |     log_dir: "tb_logs-openmathinstruct-nemorl-1M_train"
76 |   gpu_monitoring:
77 |     collection_interval: 10  # How often to collect GPU usage metrics (in seconds)
78 |     flush_interval: 10  # How often to flush GPU usage metrics to the loggers (in seconds)
79 | 
80 | cluster:
81 |   gpus_per_node: 8
82 |   num_nodes: 1
83 | 


--------------------------------------------------------------------------------
/examples/convert_dcp_to_hf.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import argparse
16 | 
17 | import yaml
18 | 
19 | from nemo_rl.utils.native_checkpoint import convert_dcp_to_hf
20 | 
21 | 
22 | def parse_args():
23 |     """Parse command line arguments."""
24 |     parser = argparse.ArgumentParser(
25 |         description="Convert Torch DCP checkpoint to HF checkpoint"
26 |     )
27 |     parser.add_argument(
28 |         "--config",
29 |         type=str,
30 |         default=None,
31 |         help="Path to config.yaml file in the checkpoint directory",
32 |     )
33 |     parser.add_argument(
34 |         "--dcp-ckpt-path", type=str, default=None, help="Path to DCP checkpoint"
35 |     )
36 |     parser.add_argument(
37 |         "--hf-ckpt-path", type=str, default=None, help="Path to save HF checkpoint"
38 |     )
39 |     # Parse known args for the script
40 |     args = parser.parse_args()
41 | 
42 |     return args
43 | 
44 | 
45 | def main():
46 |     """Main entry point."""
47 |     args = parse_args()
48 | 
49 |     with open(args.config, "r") as f:
50 |         config = yaml.safe_load(f)
51 | 
52 |     model_name_or_path = config["policy"]["model_name"]
53 |     # TODO: After the following PR gets merged:
54 |     # https://github.com/NVIDIA/NeMo-RL/pull/148/files
55 |     # tokenizer should be copied from policy/tokenizer/* instead of relying on the model name
56 |     # We can expose a arg at the top level --tokenizer_path to plumb that through.
57 |     # This is more stable than relying on the current NeMo-RL get_tokenizer() which can
58 |     # change release to release.
59 |     tokenizer_name_or_path = config["policy"]["model_name"]
60 | 
61 |     hf_ckpt = convert_dcp_to_hf(
62 |         dcp_ckpt_path=args.dcp_ckpt_path,
63 |         hf_ckpt_path=args.hf_ckpt_path,
64 |         model_name_or_path=model_name_or_path,
65 |         tokenizer_name_or_path=tokenizer_name_or_path,
66 |     )
67 |     print(f"Saved HF checkpoint to: {hf_ckpt}")
68 | 
69 | 
70 | if __name__ == "__main__":
71 |     main()
72 | 


--------------------------------------------------------------------------------
/examples/custom_parallel.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from torch.distributed.tensor.parallel import ColwiseParallel, RowwiseParallel
16 | from torch.distributed.tensor.placement_types import Replicate, Shard
17 | 
18 | custom_parallel_plan = {
19 |     "model.embed_tokens": RowwiseParallel(input_layouts=Replicate()),
20 |     "model.layers.*.self_attn.q_proj": ColwiseParallel(),
21 |     "model.layers.*.self_attn.k_proj": ColwiseParallel(),
22 |     "model.layers.*.self_attn.v_proj": ColwiseParallel(),
23 |     "model.layers.*.self_attn.o_proj": RowwiseParallel(),
24 |     "model.layers.*.mlp.up_proj": ColwiseParallel(),
25 |     "model.layers.*.mlp.gate_proj": ColwiseParallel(),
26 |     "model.layers.*.mlp.down_proj": RowwiseParallel(),
27 |     "lm_head": ColwiseParallel(output_layouts=Shard(-1), use_local_output=False),
28 | }
29 | 


--------------------------------------------------------------------------------
/examples/prompts/cot.txt:
--------------------------------------------------------------------------------
1 | Think step-by-step to solve the following problem. Output your answer inside of \\boxed{{}} tags.:
2 | {}
3 | 
4 | Let's think step-by-step


--------------------------------------------------------------------------------
/examples/prompts/math.txt:
--------------------------------------------------------------------------------
1 | Solve the following math problem. Make sure to put the answer (and only answer) inside \\boxed{{}}.
2 | 
3 | {}
4 | 
5 | 


--------------------------------------------------------------------------------
/mypy.ini:
--------------------------------------------------------------------------------
 1 | [mypy]
 2 | disable_error_code = no-redef
 3 | allow_redefinition = True
 4 | check_untyped_defs = True
 5 | python_version = 3.12
 6 | 
 7 | [mypy-pynvml.*]
 8 | ignore_missing_imports = True
 9 | 
10 | [mypy-hydra._internal.*]
11 | ignore_missing_imports = True
12 | 
13 | [mypy-hydra.core.override_parser.*]
14 | ignore_missing_imports = True 
15 | 
16 | [mypy-datasets.*]
17 | ignore_missing_imports = True
18 | 
19 | [mypy-transformers.*]
20 | ignore_missing_imports = True
21 | 
22 | [mypy-vllm.*]
23 | ignore_missing_imports = True
24 | 
25 | [mypy-math_verify.*]
26 | ignore_missing_imports = True
27 | 
28 | [mypy-torchdata.*]
29 | ignore_missing_imports = True
30 | 


--------------------------------------------------------------------------------
/nemo_rl/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import os
15 | import sys
16 | from pathlib import Path
17 | 
18 | """
19 | This is a work around to ensure whenever NeMo RL is imported, that we
20 | add Megatron-LM to the python path. This is because the only sub-package
21 | that's officially installed is megatron.core. So we add the whole repo into
22 | the path so we can access megatron.{training,legacy,inference,...}
23 | 
24 | Since users may pip install NeMo RL, this is a convenience so they do not
25 | have to manually run with PYTHONPATH=3rdparty/Megatron-LM-workspace/Megatron-LM.
26 | """
27 | megatron_path = (
28 |     Path(__file__).parent.parent / "3rdparty" / "Megatron-LM-workspace" / "Megatron-LM"
29 | )
30 | if megatron_path.exists() and str(megatron_path) not in sys.path:
31 |     sys.path.append(str(megatron_path))
32 | 
33 | from nemo_rl.package_info import (
34 |     __contact_emails__,
35 |     __contact_names__,
36 |     __description__,
37 |     __download_url__,
38 |     __homepage__,
39 |     __keywords__,
40 |     __license__,
41 |     __package_name__,
42 |     __repository_url__,
43 |     __shortversion__,
44 |     __version__,
45 | )
46 | 
47 | os.environ["RAY_USAGE_STATS_ENABLED"] = "0"
48 | 


--------------------------------------------------------------------------------
/nemo_rl/algorithms/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/NeMo-RL/99ba9a130e72cbf87a3e20acf43bc01a47adc8ee/nemo_rl/algorithms/__init__.py


--------------------------------------------------------------------------------
/nemo_rl/converters/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/NeMo-RL/99ba9a130e72cbf87a3e20acf43bc01a47adc8ee/nemo_rl/converters/__init__.py


--------------------------------------------------------------------------------
/nemo_rl/converters/huggingface/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/NeMo-RL/99ba9a130e72cbf87a3e20acf43bc01a47adc8ee/nemo_rl/converters/huggingface/__init__.py


--------------------------------------------------------------------------------
/nemo_rl/converters/huggingface/vllm_export.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/nemo_rl/converters/megatron/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/NeMo-RL/99ba9a130e72cbf87a3e20acf43bc01a47adc8ee/nemo_rl/converters/megatron/__init__.py


--------------------------------------------------------------------------------
/nemo_rl/converters/megatron/vllm_export.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/nemo_rl/data/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from typing import Optional, TypedDict
16 | 
17 | 
18 | class DataConfig(TypedDict):
19 |     max_input_seq_length: int
20 |     prompt_file: str
21 |     system_prompt_file: Optional[str]
22 |     dataset_name: str
23 |     val_dataset_name: Optional[str]
24 |     add_bos: Optional[bool]
25 |     add_eos: Optional[bool]
26 |     input_key: Optional[str]
27 |     output_key: Optional[str]
28 | 
29 | 
30 | class MathDataConfig(DataConfig):
31 |     problem_key: str
32 |     solution_key: str
33 | 


--------------------------------------------------------------------------------
/nemo_rl/data/hf_datasets/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from nemo_rl.data.hf_datasets.chat_templates import COMMON_CHAT_TEMPLATES
16 | from nemo_rl.data.hf_datasets.dpo import DPODataset
17 | from nemo_rl.data.hf_datasets.helpsteer3 import HelpSteer3Dataset
18 | from nemo_rl.data.hf_datasets.oasst import OasstDataset
19 | from nemo_rl.data.hf_datasets.openmathinstruct2 import OpenMathInstruct2Dataset
20 | from nemo_rl.data.hf_datasets.prompt_response_dataset import (
21 |     PromptResponseDataset,
22 | )
23 | from nemo_rl.data.hf_datasets.squad import SquadDataset
24 | 
25 | __all__ = [
26 |     "DPODataset",
27 |     "HelpSteer3Dataset",
28 |     "OasstDataset",
29 |     "OpenMathInstruct2Dataset",
30 |     "PromptResponseDataset",
31 |     "SquadDataset",
32 |     "COMMON_CHAT_TEMPLATES",
33 | ]
34 | 


--------------------------------------------------------------------------------
/nemo_rl/data/hf_datasets/chat_templates.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | ## a reference to frequently used chat templates for convenience
16 | class COMMON_CHAT_TEMPLATES:
17 |     ### simple template which prepends a role header to the content
18 |     simple_role_header = "{% for message in messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}"
19 | 
20 |     ### passthrough template which just concatenates the content of the messages with no special tokens
21 |     passthrough_prompt_response = (
22 |         "{% for message in messages %}{{ message['content'] }}{% endfor %}"
23 |     )
24 | 


--------------------------------------------------------------------------------
/nemo_rl/data/hf_datasets/deepscaler.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | from typing import Any
17 | 
18 | from datasets import Dataset, load_dataset
19 | 
20 | from nemo_rl.data.interfaces import TaskDataSpec
21 | 
22 | 
23 | def format_math(data: dict[str, str | float | int]) -> dict[str, list[Any] | str]:
24 |     return {
25 |         "messages": [
26 |             {
27 |                 "role": "user",
28 |                 "content": data["problem"],
29 |             },
30 |             {
31 |                 "role": "assistant",
32 |                 "content": data["answer"],
33 |             },
34 |         ],
35 |         # For v0.1 release, nemo rl datasets require a task_name key such that user can map a task processor per unique task.
36 |         "task_name": "math",
37 |     }
38 | 
39 | 
40 | def prepare_deepscaler_dataset(seed: int = 42) -> dict[str, Dataset | None]:
41 |     """Load and split the DeepScaler dataset into train and test sets."""
42 |     # Load the original dataset for training
43 |     train_ds = load_dataset("agentica-org/DeepScaleR-Preview-Dataset", split="train")
44 | 
45 |     # Load hendrydong/aime24 dataset for validation
46 |     val_ds = load_dataset("HuggingFaceH4/aime_2024", split="train")
47 | 
48 |     # Shuffle the training dataset with the specified seed
49 |     train_ds = train_ds.shuffle(seed=seed)
50 | 
51 |     # Format the examples, removing original columns
52 |     train_formatted = train_ds.map(format_math, remove_columns=train_ds.column_names)
53 |     val_formatted = val_ds.map(format_math, remove_columns=val_ds.column_names)
54 | 
55 |     # Compute accuracy 16 times per sample (matching the DeepScaleR evaluation setting)
56 |     val_repeated = []
57 |     for _ in range(16):
58 |         val_repeated.extend(val_formatted)
59 |     val_formatted = val_formatted.from_list(val_repeated)
60 | 
61 |     return {
62 |         "train": train_formatted,
63 |         "validation": val_formatted,
64 |     }
65 | 
66 | 
67 | class DeepScalerDataset:
68 |     def __init__(self, seed: int = 42) -> None:
69 |         """Initialize the DeepScaler dataset with train/test split.
70 | 
71 |         Args:
72 |             seed: Random seed for reproducible splitting
73 |         """
74 |         self.formatted_ds = prepare_deepscaler_dataset(seed=seed)
75 | 
76 |         self.task_spec = TaskDataSpec(
77 |             task_name="DeepScaler",
78 |         )
79 | 


--------------------------------------------------------------------------------
/nemo_rl/data/hf_datasets/dpo.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | from datasets import load_dataset
15 | 
16 | from nemo_rl.data.interfaces import TaskDataSpec
17 | 
18 | 
19 | class DPODataset:
20 |     """Dataset class for Direct Preference Optimization (DPO) training.
21 | 
22 |     This class handles loading of preference data for DPO training.
23 |     The input JSON files should contain examples with the following structure:
24 |     {
25 |         "prompt": str,           # The input prompt/context
26 |         "chosen_response": str,  # The preferred/winning response
27 |         "rejected_response": str # The non-preferred/losing response
28 |     }
29 | 
30 |     Args:
31 |         train_data_path (str): Path to the JSON file containing training data
32 |         val_data_path (str): Path to the JSON file containing validation data
33 | 
34 |     """
35 | 
36 |     def __init__(self, train_data_path: str, val_data_path: str):
37 |         self.formatted_ds = {
38 |             "train": load_dataset("json", data_files=train_data_path, split="train"),
39 |             "validation": load_dataset("json", data_files=val_data_path, split="train"),
40 |         }
41 | 
42 |         self.task_spec = TaskDataSpec(
43 |             task_name="DPO",
44 |         )
45 | 


--------------------------------------------------------------------------------
/nemo_rl/data/hf_datasets/helpsteer3.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | from typing import Any
15 | 
16 | from absl import logging
17 | from datasets import load_dataset
18 | 
19 | from nemo_rl.data.interfaces import TaskDataSpec
20 | 
21 | 
22 | def format_helpsteer3(data: dict[str, Any]) -> dict[str, str | dict[str, str]]:
23 |     response_1 = data["response1"]
24 |     response_2 = data["response2"]
25 |     overall_preference = data["overall_preference"]
26 | 
27 |     if overall_preference < 0:
28 |         chosen = response_1
29 |         rejected = response_2
30 |     elif overall_preference == 0:
31 |         logging.log_every_n(
32 |             logging.WARNING,
33 |             "Preference is 0 for some examples! Setting chosen and rejected to response 1 since we don't know which response is better",
34 |             1000,
35 |         )
36 |         chosen = response_1
37 |         rejected = response_1
38 |     else:
39 |         chosen = response_2
40 |         rejected = response_1
41 | 
42 |     return {
43 |         "prompt": data["context"],
44 |         "chosen_response": chosen,
45 |         "rejected_response": rejected,
46 |     }
47 | 
48 | 
49 | class HelpSteer3Dataset:
50 |     """HelpSteer3 preference dataset for DPO training."""
51 | 
52 |     def __init__(self) -> None:
53 |         ds = load_dataset("nvidia/HelpSteer3", "preference")
54 |         self.formatted_ds = ds.map(format_helpsteer3)
55 | 
56 |         self.task_spec = TaskDataSpec(
57 |             task_name="HelpSteer3",
58 |         )
59 | 


--------------------------------------------------------------------------------
/nemo_rl/data/hf_datasets/prompt_response_dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from typing import Any
16 | 
17 | from datasets import load_dataset
18 | 
19 | from nemo_rl.data.interfaces import TaskDataSpec
20 | 
21 | 
22 | class PromptResponseDataset:
23 |     def __init__(
24 |         self,
25 |         train_ds_path: str,
26 |         val_ds_path: str,
27 |         input_key: str = "input",
28 |         output_key: str = "output",
29 |     ):
30 |         train_original_dataset = load_dataset("json", data_files=train_ds_path)["train"]
31 |         val_original_dataset = load_dataset("json", data_files=val_ds_path)["train"]
32 | 
33 |         self.input_key = input_key
34 |         self.output_key = output_key
35 | 
36 |         formatted_train_dataset = train_original_dataset.map(self.add_messages_key)
37 |         formatted_val_dataset = val_original_dataset.map(self.add_messages_key)
38 | 
39 |         self.formatted_ds = {
40 |             "train": formatted_train_dataset,
41 |             "validation": formatted_val_dataset,
42 |         }
43 | 
44 |         self.task_spec = TaskDataSpec(
45 |             "json_dataset",
46 |         )
47 | 
48 |     def add_messages_key(
49 |         self, example: dict[str, Any]
50 |     ) -> dict[str, list[dict[str, Any]]]:
51 |         return {
52 |             "messages": [
53 |                 {"role": "user", "content": example[self.input_key]},
54 |                 {"role": "assistant", "content": example[self.output_key]},
55 |             ]
56 |         }
57 | 


--------------------------------------------------------------------------------
/nemo_rl/data/hf_datasets/squad.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | from typing import Any
17 | 
18 | from datasets import load_dataset
19 | 
20 | from nemo_rl.data.interfaces import TaskDataSpec
21 | 
22 | 
23 | def format_squad(data: dict[str, Any]) -> dict[str, list[dict[str, str]]]:
24 |     return {
25 |         "messages": [
26 |             {
27 |                 "role": "system",
28 |                 "content": data["context"],
29 |             },
30 |             {
31 |                 "role": "user",
32 |                 "content": data["question"],
33 |             },
34 |             {
35 |                 "role": "assistant",
36 |                 "content": data["answers"]["text"][0],
37 |             },
38 |         ]
39 |     }
40 | 
41 | 
42 | class SquadDataset:
43 |     def __init__(self) -> None:
44 |         original_ds = load_dataset("rajpurkar/squad")
45 |         self.formatted_ds = original_ds.map(format_squad)
46 |         self.task_spec = TaskDataSpec(
47 |             task_name="SQuAD",
48 |         )
49 | 


--------------------------------------------------------------------------------
/nemo_rl/distributed/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/NeMo-RL/99ba9a130e72cbf87a3e20acf43bc01a47adc8ee/nemo_rl/distributed/__init__.py


--------------------------------------------------------------------------------
/nemo_rl/distributed/ray_actor_environment_registry.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from nemo_rl.distributed.virtual_cluster import PY_EXECUTABLES
16 | 
17 | ACTOR_ENVIRONMENT_REGISTRY: dict[str, str] = {
18 |     "nemo_rl.models.generation.vllm.VllmGenerationWorker": PY_EXECUTABLES.VLLM,
19 |     "nemo_rl.models.policy.dtensor_policy_worker.DTensorPolicyWorker": PY_EXECUTABLES.BASE,
20 |     "nemo_rl.models.policy.fsdp1_policy_worker.FSDP1PolicyWorker": PY_EXECUTABLES.BASE,
21 |     "nemo_rl.environments.math_environment.MathEnvironment": PY_EXECUTABLES.SYSTEM,
22 |     "nemo_rl.environments.games.sliding_puzzle.SlidingPuzzleEnv": PY_EXECUTABLES.SYSTEM,
23 | }
24 | 
25 | 
26 | def get_actor_python_env(actor_class_fqn: str) -> str:
27 |     if actor_class_fqn in ACTOR_ENVIRONMENT_REGISTRY:
28 |         return ACTOR_ENVIRONMENT_REGISTRY[actor_class_fqn]
29 |     else:
30 |         raise ValueError(
31 |             f"No actor environment registered for {actor_class_fqn}"
32 |             f"You're attempting to create an actor ({actor_class_fqn})"
33 |             "without specifying a python environment for it. Please either"
34 |             "specify a python environment in the registry "
35 |             "(nemo_rl.distributed.ray_actor_environment_registry.ACTOR_ENVIRONMENT_REGISTRY) "
36 |             "or pass a py_executable to the RayWorkerBuilder. If you're unsure about which "
37 |             "environment to use, a good default is PY_EXECUTABLES.SYSTEM for ray actors that "
38 |             "don't have special dependencies. If you do have special dependencies (say, you're "
39 |             "adding a new generation framework or training backend), you'll need to specify the "
40 |             "appropriate environment. See uv.md for more details."
41 |         )
42 | 


--------------------------------------------------------------------------------
/nemo_rl/environments/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/NeMo-RL/99ba9a130e72cbf87a3e20acf43bc01a47adc8ee/nemo_rl/environments/__init__.py


--------------------------------------------------------------------------------
/nemo_rl/environments/metrics.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import torch
15 | 
16 | 
17 | def calculate_pass_rate_per_prompt(
18 |     prompts: torch.Tensor, is_correct: torch.Tensor
19 | ) -> float:
20 |     """Function to compute fraction of prompts that have at least one correct answer (reward > 0).
21 | 
22 |     prompts:    tensor (b, s)     Tensor of prompts the model used. May be on any device
23 |     is_correct: tensor (b,)       bool-valued label. May be on any device
24 | 
25 |     Returns:
26 |     pass rate: float
27 |     """
28 |     unique_prompts = torch.unique(prompts, dim=0)
29 | 
30 |     correct_prompt_ct = 0
31 |     for i in range(len(unique_prompts)):
32 |         is_matching_prompt = (prompts == unique_prompts[i]).all(1)
33 |         if torch.any(is_correct[is_matching_prompt] > 0):
34 |             correct_prompt_ct += 1
35 | 
36 |     return correct_prompt_ct / len(unique_prompts)
37 | 


--------------------------------------------------------------------------------
/nemo_rl/environments/utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | from typing import Any
15 | 
16 | 
17 | def chunk_list_to_workers(to_chunk: list[Any], num_workers: int) -> list[list[Any]]:
18 |     """Chunk a list into a list of lists, where each sublist is assigned to a worker. Keeps ordering of elements.
19 | 
20 |     If the list is not divisible by the number of workers, the last worker may have fewer elements.
21 |     If there are more workers than elements, the first len(list) workers will have a single element each,
22 |     and the remaining workers will have empty lists.
23 | 
24 |     Args:
25 |         list: The list to be chunked.
26 |         num_workers: The number of workers to distribute the list to.
27 | 
28 |     Returns:
29 |         A list of lists, where each sublist contains elements assigned to a worker.
30 | 
31 |     Examples:
32 |     ```{doctest}
33 |     >>> from nemo_rl.environments.utils import chunk_list_to_workers
34 |     >>> chunk_list_to_workers([1, 2, 3, 4, 5], 3)
35 |     [[1, 2], [3, 4], [5]]
36 |     ```
37 |     """
38 |     if not to_chunk:
39 |         return [[] for _ in range(num_workers)]
40 | 
41 |     # Handle case where we have more workers than elements
42 |     if len(to_chunk) <= num_workers:
43 |         result = [[item] for item in to_chunk]
44 |         # Add empty lists for remaining workers
45 |         result.extend([[] for _ in range(num_workers - len(to_chunk))])
46 |         return result
47 | 
48 |     # Calculate chunk size (ceiling division to ensure all elements are covered)
49 |     chunk_size = (len(to_chunk) + num_workers - 1) // num_workers
50 | 
51 |     # Create chunks
52 |     chunks = []
53 |     for i in range(0, len(to_chunk), chunk_size):
54 |         chunks.append(to_chunk[i : i + chunk_size])
55 | 
56 |     # If we somehow ended up with more chunks than workers (shouldn't happen with ceiling division)
57 |     # merge the last chunks
58 |     if len(chunks) > num_workers:
59 |         chunks[num_workers - 1 :] = [sum(chunks[num_workers - 1 :], [])]
60 | 
61 |     return chunks
62 | 


--------------------------------------------------------------------------------
/nemo_rl/evals/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/NeMo-RL/99ba9a130e72cbf87a3e20acf43bc01a47adc8ee/nemo_rl/evals/__init__.py


--------------------------------------------------------------------------------
/nemo_rl/experience/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/NeMo-RL/99ba9a130e72cbf87a3e20acf43bc01a47adc8ee/nemo_rl/experience/__init__.py


--------------------------------------------------------------------------------
/nemo_rl/metrics/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/NeMo-RL/99ba9a130e72cbf87a3e20acf43bc01a47adc8ee/nemo_rl/metrics/__init__.py


--------------------------------------------------------------------------------
/nemo_rl/metrics/metrics_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/nemo_rl/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/NeMo-RL/99ba9a130e72cbf87a3e20acf43bc01a47adc8ee/nemo_rl/models/__init__.py


--------------------------------------------------------------------------------
/nemo_rl/models/dtensor/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/NeMo-RL/99ba9a130e72cbf87a3e20acf43bc01a47adc8ee/nemo_rl/models/dtensor/__init__.py


--------------------------------------------------------------------------------
/nemo_rl/models/generation/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | from typing import cast
15 | 
16 | from transformers import PreTrainedTokenizerBase
17 | 
18 | from nemo_rl.models.generation.interfaces import GenerationConfig
19 | from nemo_rl.models.generation.vllm import VllmConfig
20 | 
21 | TokenizerType = PreTrainedTokenizerBase
22 | 
23 | 
24 | def configure_generation_config(
25 |     config: GenerationConfig, tokenizer: TokenizerType, is_eval=False
26 | ) -> GenerationConfig:
27 |     """Apply specific configurations to generation config."""
28 |     # tokenizer setting
29 |     config["pad_token_id"] = tokenizer.pad_token_id
30 |     if config["stop_token_ids"] is None:
31 |         config["stop_token_ids"] = [tokenizer.eos_token_id]
32 | 
33 |     # vllm setting
34 |     if config["backend"] == "vllm":
35 |         config = cast(VllmConfig, config)
36 |         # set load_format
37 |         config["vllm_cfg"]["load_format"] = "auto" if is_eval else "dummy"
38 | 
39 |         # set skip_tokenizer_init
40 |         if is_eval or config["stop_strings"] is not None:
41 |             config["vllm_cfg"]["skip_tokenizer_init"] = False
42 |         else:
43 |             config["vllm_cfg"]["skip_tokenizer_init"] = True
44 | 
45 |     return config
46 | 


--------------------------------------------------------------------------------
/nemo_rl/models/generation/vllm_backend.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import torch
15 | 
16 | try:
17 |     import vllm  # noqa: F401
18 | except ImportError:
19 |     raise ImportError(
20 |         "vLLM is not installed. Please check that the py_executable in the runtime_env of VllmGenerationWorker "
21 |         "covers the vllm dependency. You may have to update nemo_rl/distributed/ray_actor_environment_registry.py. "
22 |         "If you are working interactively, you can install by running  `uv sync --extra vllm` anywhere in the repo."
23 |     )
24 | 
25 | 
26 | class VllmInternalWorkerExtension:
27 |     def report_device_id(self) -> str:
28 |         from nemo_rl.utils.nvml import get_device_uuid
29 | 
30 |         return get_device_uuid(self.device.index)
31 | 
32 |     def update_weights_from_ipc_handles(self, ipc_handles):
33 |         """Update weights from IPC handles.
34 | 
35 |         Args:
36 |             ipc_handles (dict): Dictionary mapping device UUIDs to parameter IPC handles.
37 | 
38 |         Returns:
39 |             bool: True if weights were successfully updated.
40 |         """
41 |         try:
42 |             # Get handles for this device
43 |             device_uuid = self.report_device_id()
44 |             handles = ipc_handles[device_uuid]
45 |             device_id = self.device.index
46 |             weights = []
47 | 
48 |             # Process each handle to get the tensor
49 |             for name, handle in handles:
50 |                 func, args = handle
51 |                 list_args = list(args)
52 |                 # Update device ID to match the current device
53 |                 list_args[6] = device_id
54 |                 tensor = func(*list_args)
55 |                 weights.append((name, tensor))
56 | 
57 |             # Load weights into the model
58 |             self.model_runner.model.load_weights(weights=weights)
59 |             torch.cuda.synchronize()
60 |             return True
61 |         except Exception as e:
62 |             print(
63 |                 f"Error in VllmInternalWorkerExtension.update_weights_from_ipc_handles: {e}"
64 |             )
65 |             return False
66 | 


--------------------------------------------------------------------------------
/nemo_rl/models/huggingface/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/NeMo-RL/99ba9a130e72cbf87a3e20acf43bc01a47adc8ee/nemo_rl/models/huggingface/__init__.py


--------------------------------------------------------------------------------
/nemo_rl/models/huggingface/common.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from enum import Enum, auto
16 | 
17 | from transformers import AutoConfig
18 | 
19 | 
20 | class ModelFlag(Enum):
21 |     """Enum that defines special flags for model-specific behaviors.
22 | 
23 |     This enum provides a way to identify models that require special handling or
24 |     configuration in different parts of the NeMo RL codebase.
25 | 
26 |     Flags:
27 |         SKIP_DTENSOR_TIED_WEIGHTS_CHECK: Models that should skip the tied weights check
28 |                                  for the DTensor Policy even without setting the
29 |                                  NRL_SKIP_TIED_WEIGHT_CHECK flag.
30 |         VLLM_LOAD_FORMAT_AUTO: Models that should use the "auto" load format when initializing
31 |                                VLLM.
32 | 
33 |     Each flag has a `matches` method that determines if the flag applies to a given model_name.
34 |     """
35 | 
36 |     SKIP_DTENSOR_TIED_WEIGHTS_CHECK = auto()
37 |     VLLM_LOAD_FORMAT_AUTO = auto()
38 | 
39 |     def matches(self, model_name: str) -> bool:
40 |         match self:
41 |             case ModelFlag.SKIP_DTENSOR_TIED_WEIGHTS_CHECK:
42 |                 return is_gemma_model(model_name)
43 |             case ModelFlag.VLLM_LOAD_FORMAT_AUTO:
44 |                 return is_gemma_model(model_name)
45 |             case _:
46 |                 raise ValueError(f"Unknown ModelFlag: {self}")
47 | 
48 | 
49 | def is_gemma_model(model_name: str) -> bool:
50 |     hf_config = AutoConfig.from_pretrained(model_name, trust_remote_code=True)
51 |     return hasattr(hf_config, "model_type") and hf_config.model_type in [
52 |         "gemma2",
53 |         "gemma3",
54 |         "gemma3_text",
55 |     ]
56 | 


--------------------------------------------------------------------------------
/nemo_rl/models/megatron/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/NeMo-RL/99ba9a130e72cbf87a3e20acf43bc01a47adc8ee/nemo_rl/models/megatron/__init__.py


--------------------------------------------------------------------------------
/nemo_rl/models/megatron/common.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/nemo_rl/models/policy/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from typing import Any, NotRequired, Optional, TypedDict, Union
16 | 
17 | from nemo_rl.models.generation.interfaces import GenerationConfig
18 | 
19 | 
20 | class DTensorConfig(TypedDict):
21 |     enabled: bool
22 |     cpu_offload: bool
23 |     sequence_parallel: bool
24 |     activation_checkpointing: bool
25 |     tensor_parallel_size: int
26 |     custom_parallel_plan: str
27 | 
28 | 
29 | class TokenizerConfig(TypedDict):
30 |     name: str
31 |     chat_template: str
32 | 
33 | 
34 | class PytorchOptimizerConfig(TypedDict):
35 |     name: str
36 |     kwargs: dict[str, Any]
37 | 
38 | 
39 | class SinglePytorchSchedulerConfig(TypedDict):
40 |     name: str
41 |     kwargs: dict[str, Any]
42 | 
43 | 
44 | SchedulerMilestones = dict[str, list[int]]
45 | 
46 | 
47 | class DynamicBatchingConfig(TypedDict):
48 |     # dynamic_batching improves performance by ensuring logprob and training microbatches
49 |     # have a sufficent number of tokens to maximize GPU utilization. Specifically, variable length
50 |     # responses are sorted by sequence length and bucketed into microbatches with a total
51 |     # amount of tokens is approximately close to 'train_mb_tokens' and 'logprob_mb_tokens' for the
52 |     # training and logprob stages respectively.
53 |     enabled: bool
54 |     train_mb_tokens: int
55 |     logprob_mb_tokens: int
56 |     sequence_length_round: int
57 | 
58 | 
59 | class PolicyConfig(TypedDict):
60 |     model_name: str
61 |     tokenizer: TokenizerConfig
62 |     train_global_batch_size: int
63 |     train_micro_batch_size: int
64 |     learning_rate: float
65 |     logprob_batch_size: int
66 |     generation: Optional[GenerationConfig]
67 |     generation_batch_size: NotRequired[
68 |         int
69 |     ]  # used in static batched (framework) generation
70 |     precision: str
71 |     dtensor_cfg: DTensorConfig
72 |     dynamic_batching: DynamicBatchingConfig
73 |     make_sequence_length_divisible_by: int
74 |     max_total_sequence_length: int
75 |     max_grad_norm: Optional[Union[float, int]]
76 |     fsdp_offload_enabled: bool
77 |     activation_checkpointing_enabled: bool
78 |     optimizer: NotRequired[PytorchOptimizerConfig] = None
79 |     scheduler: NotRequired[list[SinglePytorchSchedulerConfig] | SchedulerMilestones] = (
80 |         None
81 |     )
82 | 


--------------------------------------------------------------------------------
/nemo_rl/package_info.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | MAJOR = 0
17 | MINOR = 3
18 | PATCH = 0
19 | PRE_RELEASE = "rc0"
20 | 
21 | # Use the following formatting: (major, minor, patch, pre-release)
22 | VERSION = (MAJOR, MINOR, PATCH, PRE_RELEASE)
23 | 
24 | __shortversion__ = ".".join(map(str, VERSION[:3]))
25 | __version__ = ".".join(map(str, VERSION[:3])) + "".join(VERSION[3:])
26 | 
27 | __package_name__ = "nemo_rl"
28 | __contact_names__ = "NVIDIA"
29 | __contact_emails__ = "nemo-tookit@nvidia.com"
30 | __homepage__ = "https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/stable/"
31 | __repository_url__ = "https://github.com/NVIDIA/NeMo-RL"
32 | __download_url__ = "https://github.com/NVIDIA/NeMo-RL/releases"
33 | __description__ = "NeMo-RL - a toolkit for model alignment"
34 | __license__ = "Apache2"
35 | __keywords__ = "deep learning, machine learning, gpu, NLP, NeMo, nvidia, pytorch, torch, language, reinforcement learning, RLHF, preference modeling, SteerLM, DPO"
36 | 


--------------------------------------------------------------------------------
/nemo_rl/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/NeMo-RL/99ba9a130e72cbf87a3e20acf43bc01a47adc8ee/nemo_rl/utils/__init__.py


--------------------------------------------------------------------------------
/tests/README.md:
--------------------------------------------------------------------------------
 1 | # Tests
 2 | 
 3 | ## Launching Release Tests
 4 | 
 5 | ```sh
 6 | # Assuming in NeMo RL project root
 7 | 
 8 | cd tools/
 9 | 
10 | IS_RELEASE=1 CONTAINER=... ACCOUNT=... PARTITION=... ./launch <script_path> <another_script_path> ...
11 | 
12 | # DRYRUN=1 to get a rough estimate of compute
13 | DRYRUN=1 IS_RELEASE=1 CONTAINER=... ACCOUNT=... PARTITION=... ./launch <script_path> <another_script_path> ...
14 | 
15 | # DRYRUN=2 will create a codesnapshot with a fully hermetic example
16 | DRYRUN=2 IS_RELEASE=1 CONTAINER=... ACCOUNT=... PARTITION=... ./launch <script_path> <another_script_path> ...
17 | 
18 | # Run all (Caution: this will use a lot of compute; consider listing out the jobs)
19 | IS_RELEASE=1 CONTAINER=... ACCOUNT=... PARTITION=... ./launch ../../recipes/**/*.sh
20 | ```
21 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/tests/functional/dpo.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
 4 | PROJECT_ROOT=$(realpath $SCRIPT_DIR/../..)
 5 | # Mark the current repo as safe, since wandb fetches metadata about the repo
 6 | git config --global --add safe.directory $PROJECT_ROOT
 7 | 
 8 | set -eou pipefail
 9 | 
10 | EXP_NAME=$(basename $0 .sh)
11 | EXP_DIR=$SCRIPT_DIR/$EXP_NAME
12 | LOG_DIR=$EXP_DIR/logs
13 | JSON_METRICS=$EXP_DIR/metrics.json
14 | RUN_LOG=$EXP_DIR/run.log
15 | export PYTHONPATH=${PROJECT_ROOT}:${PYTHONPATH:-}
16 | 
17 | rm -rf $EXP_DIR $LOG_DIR
18 | mkdir -p $EXP_DIR $LOG_DIR
19 | 
20 | cd $PROJECT_ROOT
21 | uv run $PROJECT_ROOT/examples/run_dpo.py \
22 |     policy.model_name=Qwen/Qwen3-0.6B \
23 |     cluster.gpus_per_node=2 \
24 |     dpo.max_num_steps=3 \
25 |     dpo.val_batches=1 \
26 |     dpo.val_global_batch_size=8 \
27 |     policy.train_global_batch_size=8 \
28 |     logger.tensorboard_enabled=true \
29 |     logger.log_dir=$LOG_DIR \
30 |     logger.wandb_enabled=false \
31 |     checkpointing.enabled=false \
32 |     $@ \
33 |     2>&1 | tee $RUN_LOG
34 | 
35 | uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
36 | 
37 | # TODO: threshold set higher since test is flaky
38 | # https://github.com/NVIDIA/NeMo-RL/issues/370
39 | uv run tests/check_metrics.py $JSON_METRICS \
40 |   'data["train/loss"]["3"] < 0.8'
41 | 
42 | 


--------------------------------------------------------------------------------
/tests/functional/eval.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
 4 | PROJECT_ROOT=$(realpath $SCRIPT_DIR/../..)
 5 | # Mark the current repo as safe, since wandb fetches metadata about the repo
 6 | git config --global --add safe.directory $PROJECT_ROOT
 7 | 
 8 | set -eou pipefail
 9 | 
10 | EXP_NAME=$(basename $0 .sh)
11 | EXP_DIR=$SCRIPT_DIR/$EXP_NAME
12 | LOG_DIR=$EXP_DIR/logs
13 | JSON_METRICS=$EXP_DIR/metrics.json
14 | RUN_LOG=$EXP_DIR/run.log
15 | export PYTHONPATH=${PROJECT_ROOT}:${PYTHONPATH:-}
16 | 
17 | rm -rf $EXP_DIR $LOG_DIR
18 | mkdir -p $EXP_DIR $LOG_DIR
19 | 
20 | cd $PROJECT_ROOT
21 | uv run $PROJECT_ROOT/examples/run_eval.py \
22 |     cluster.gpus_per_node=2 \
23 |     $@ \
24 |     2>&1 | tee $RUN_LOG
25 | 
26 | cat $RUN_LOG | grep "score=" | sed 's/.*score=\([^ ]*\).*/{"score": \1}/' > $JSON_METRICS
27 | 
28 | uv run tests/check_metrics.py $JSON_METRICS \
29 |   'data["score"] == 0.1' \
30 | 


--------------------------------------------------------------------------------
/tests/functional/grpo.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
 4 | PROJECT_ROOT=$(realpath $SCRIPT_DIR/../..)
 5 | # Mark the current repo as safe, since wandb fetches metadata about the repo
 6 | git config --global --add safe.directory $PROJECT_ROOT
 7 | 
 8 | set -eou pipefail
 9 | 
10 | EXP_NAME=$(basename $0 .sh)
11 | EXP_DIR=$SCRIPT_DIR/$EXP_NAME
12 | LOG_DIR=$EXP_DIR/logs
13 | JSON_METRICS=$EXP_DIR/metrics.json
14 | RUN_LOG=$EXP_DIR/run.log
15 | export PYTHONPATH=${PROJECT_ROOT}:${PYTHONPATH:-}
16 | 
17 | rm -rf $EXP_DIR $LOG_DIR
18 | mkdir -p $EXP_DIR $LOG_DIR
19 | 
20 | cd $PROJECT_ROOT
21 | uv run $PROJECT_ROOT/examples/run_grpo_math.py \
22 |     policy.model_name=Qwen/Qwen3-0.6B \
23 |     grpo.num_prompts_per_step=2 \
24 |     grpo.num_generations_per_prompt=4 \
25 |     policy.train_global_batch_size=4 \
26 |     policy.train_micro_batch_size=1 \
27 |     cluster.gpus_per_node=2 \
28 |     grpo.max_num_steps=2 \
29 |     logger.tensorboard_enabled=true \
30 |     logger.log_dir=$LOG_DIR \
31 |     logger.wandb_enabled=false \
32 |     checkpointing.enabled=false \
33 |     $@ \
34 |     2>&1 | tee $RUN_LOG
35 | 
36 | uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
37 | 
38 | uv run tests/check_metrics.py $JSON_METRICS \
39 |     'max(data["train/token_mult_prob_error"]) < 1.05' \
40 | 
41 | 


--------------------------------------------------------------------------------
/tests/functional/grpo_multiturn.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
 4 | PROJECT_ROOT=$(realpath $SCRIPT_DIR/../..)
 5 | # Mark the current repo as safe, since wandb fetchs metadata about the repo
 6 | git config --global --add safe.directory $PROJECT_ROOT
 7 | 
 8 | set -eou pipefail
 9 | 
10 | EXP_NAME=$(basename $0 .sh)
11 | EXP_DIR=$SCRIPT_DIR/$EXP_NAME
12 | LOG_DIR=$EXP_DIR/logs
13 | JSON_METRICS=$EXP_DIR/metrics.json
14 | RUN_LOG=$EXP_DIR/run.log
15 | export PYTHONPATH=${PROJECT_ROOT}:${PYTHONPATH:-}
16 | 
17 | rm -rf $EXP_DIR $LOG_DIR
18 | mkdir -p $EXP_DIR $LOG_DIR
19 | 
20 | cd $PROJECT_ROOT
21 | uv run $PROJECT_ROOT/examples/run_grpo_sliding_puzzle.py \
22 |     policy.model_name=Qwen/Qwen3-0.6B \
23 |     cluster.gpus_per_node=2 \
24 |     grpo.max_rollout_turns=5 \
25 |     grpo.max_num_steps=3 \
26 |     grpo.num_prompts_per_step=2 \
27 |     grpo.num_generations_per_prompt=4 \
28 |     policy.max_total_sequence_length=1024 \
29 |     policy.train_global_batch_size=4 \
30 |     policy.train_micro_batch_size=1 \
31 |     policy.generation.top_p=0.99 \
32 |     policy.generation.top_k=8000 \
33 |     logger.tensorboard_enabled=true \
34 |     logger.log_dir=$LOG_DIR \
35 |     logger.wandb_enabled=false \
36 |     checkpointing.enabled=false \
37 |     $@ \
38 |     2>&1 | tee $RUN_LOG
39 | 
40 | uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
41 | 
42 | uv run tests/check_metrics.py $JSON_METRICS \
43 |     'max(data["train/token_mult_prob_error"]) < 1.1' \
44 | 
45 | 


--------------------------------------------------------------------------------
/tests/functional/sft.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # clean up checkpoint directory on exit
 4 | trap "rm -rf /tmp/sft_checkpoints" EXIT
 5 | 
 6 | SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
 7 | PROJECT_ROOT=$(realpath $SCRIPT_DIR/../..)
 8 | # Mark the current repo as safe, since wandb fetches metadata about the repo
 9 | git config --global --add safe.directory $PROJECT_ROOT
10 | 
11 | set -eou pipefail
12 | 
13 | EXP_NAME=$(basename $0 .sh)
14 | EXP_DIR=$SCRIPT_DIR/$EXP_NAME
15 | LOG_DIR=$EXP_DIR/logs
16 | JSON_METRICS=$EXP_DIR/metrics.json
17 | RUN_LOG=$EXP_DIR/run.log
18 | export PYTHONPATH=${PROJECT_ROOT}:${PYTHONPATH:-}
19 | 
20 | rm -rf $EXP_DIR $LOG_DIR
21 | mkdir -p $EXP_DIR $LOG_DIR
22 | 
23 | cd $PROJECT_ROOT
24 | uv run $PROJECT_ROOT/examples/run_sft.py \
25 |     policy.model_name=Qwen/Qwen3-0.6B \
26 |     cluster.gpus_per_node=2 \
27 |     sft.max_num_steps=3 \
28 |     sft.val_batches=1 \
29 |     sft.val_period=3 \
30 |     logger.tensorboard_enabled=true \
31 |     logger.log_dir=$LOG_DIR \
32 |     logger.wandb_enabled=false \
33 |     checkpointing.enabled=true \
34 |     checkpointing.save_period=3 \
35 |     checkpointing.checkpoint_dir=/tmp/sft_checkpoints \
36 |     $@ \
37 |     2>&1 | tee $RUN_LOG
38 | 
39 | uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
40 | 
41 | uv run tests/check_metrics.py $JSON_METRICS \
42 |   'data["train/loss"]["3"] < 5.9' \
43 | 
44 | 


--------------------------------------------------------------------------------
/tests/functional/test_mcore_extra_installed_correctly.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -eoux pipefail
 3 | 
 4 | SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
 5 | cd $SCRIPT_DIR
 6 | 
 7 | uv sync
 8 | # Just the first call with --extra mcore is invoked with --reinstall in case submodules were recently updated/downloaded
 9 | uv run --reinstall --extra mcore --no-build-isolation python <<"EOF"
10 | import torch
11 | import transformer_engine.pytorch as te
12 | from transformer_engine.common import recipe
13 | 
14 | # Set dimensions.
15 | in_features = 768
16 | out_features = 3072
17 | hidden_size = 2048
18 | 
19 | # Initialize model and inputs.
20 | model = te.Linear(in_features, out_features, bias=True)
21 | inp = torch.randn(hidden_size, in_features, device="cuda")
22 | 
23 | # TODO: Disabling FP8 testing since CI machines may not support FP8
24 | ## Create an FP8 recipe. Note: All input args are optional.
25 | #fp8_recipe = recipe.DelayedScaling(margin=0, fp8_format=recipe.Format.E4M3)
26 | #
27 | ## Enable autocasting for the forward pass
28 | #with te.fp8_autocast(enabled=True, fp8_recipe=fp8_recipe):
29 | #    out = model(inp)
30 | 
31 | out = model(inp)
32 | 
33 | loss = out.sum()
34 | loss.backward()
35 | print("[TE hello world succeessful]")
36 | EOF
37 | 
38 | uv run --extra mcore --no-build-isolation python <<"EOF"
39 | import is_megatron_installed
40 | import is_nemo_installed
41 | assert is_megatron_installed.INSTALLED, "Megatron is not installed. Please check if the submodule has been initialized. May need to run `git submodule update --init --recursive`"
42 | assert is_nemo_installed.INSTALLED, "NeMo is not installed. Please check if the submodule has been initialized. May need to run `git submodule update --init --recursive`"
43 | 
44 | # This must be the first import to get all of the megatron non-core packages added to the path
45 | import nemo_rl
46 | import megatron.core
47 | from megatron.training.utils import get_ltor_masks_and_position_ids
48 | from nemo.tron.init import initialize_megatron
49 | from nemo.tron.config import (
50 |     ConfigContainer,
51 |     TrainingConfig,
52 |     LoggerConfig,
53 |     OptimizerConfig,
54 |     SchedulerConfig,
55 |     CheckpointConfig,
56 |     DistributedDataParallelConfig,
57 | )
58 | from nemo.tron.utils.common_utils import get_rank_safe
59 | from nemo.tron.config import TokenizerConfig
60 | from nemo.tron.model import get_model_from_config
61 | from nemo.tron.checkpointing import checkpoint_exists, load_checkpoint
62 | from nemo.tron.init import initialize_megatron, set_jit_fusion_options
63 | from nemo.tron.setup import _init_checkpointing_context, _update_model_config_funcs
64 | from nemo.tron.state import GlobalState
65 | from nemo.tron.optim import setup_optimizer
66 | from nemo.tron import fault_tolerance
67 | from nemo.tron.tokenizers.tokenizer import build_tokenizer
68 | from nemo.tron.utils.train_utils import (
69 |     calc_params_l2_norm,
70 |     logical_and_across_model_parallel_group,
71 |     reduce_max_stat_across_model_parallel_group,
72 | )
73 | from nemo.tron.train import train_step
74 | from nemo.tron.setup import HAVE_FSDP2
75 | print("[Nemo/Mcore imports successful]")
76 | EOF
77 | 
78 | # Sync just to return the environment to the original base state
79 | uv sync
80 | echo Success
81 | 


--------------------------------------------------------------------------------
/tests/run_functional_in_docker.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
 3 | PROJECT_ROOT=$(realpath $SCRIPT_DIR/..)
 4 | 
 5 | set -eou pipefail
 6 | 
 7 | # Ensure Docker is installed
 8 | if ! command -v docker &> /dev/null; then
 9 |     echo "Error: Docker is not installed or not in PATH."
10 |     exit 1
11 | fi
12 | 
13 | # CONTAINER is expected to be set as an environment variable
14 | if [[ -z "${CONTAINER:-}" ]]; then
15 |     echo "Error: CONTAINER environment variable is not set."
16 |     echo "Usage: CONTAINER=<docker-container> $0 <script to run, e.g., functional/grpo.sh>"
17 |     exit 1
18 | fi
19 | 
20 | if [[ $# -ne 1 ]]; then
21 |     echo "Error: Did not provide functional test script to run."
22 |     echo "Usage: CONTAINER=<docker-container> $0 <script to run, e.g., functional/grpo.sh>"
23 |     exit 1
24 | fi
25 | 
26 | TEST_SCRIPT=$(realpath $1)
27 | CONTAINER=${CONTAINER}
28 | 
29 | export HF_HOME=${HF_HOME:-$(realpath $SCRIPT_DIR/../hf_home)}
30 | export HF_DATASETS_CACHE=${HF_DATASETS_CACHE:-$(realpath $SCRIPT_DIR/../hf_datasets_cache)}
31 | mkdir -p $HF_HOME
32 | mkdir -p $HF_DATASETS_CACHE
33 | 
34 | # Check if running in GitLab CI
35 | INTERACTIVE_FLAG=""
36 | if [[ "${CI:-false}" != "true" ]]; then
37 |     # Setting this interactively lets us issue a keyboard interrupt.
38 |     INTERACTIVE_FLAG="-it"
39 | fi
40 | 
41 | # Note: we run as root because:
42 | #  1. running as ray prevents us from writing into the current working directory
43 | #  2. running as ourselves (-u $(id -u):$(id -g)) causes torch compile to fail
44 | #
45 | # The workaround is we launch the job but set umask 000 so all files created as root are rwxrwxrwx.
46 | # We have found that 111 does not always work and can leave the filesystem permissions in a bad state.
47 | 
48 | # Run the script inside the Docker container with GPU support
49 | docker run -u root $INTERACTIVE_FLAG --ulimit memlock=-1 --ulimit stack=67108864 --rm --gpus '"device=0,1"' \
50 |   -v "$PROJECT_ROOT:$PROJECT_ROOT" \
51 |   -v $HF_HOME:/hf_home \
52 |   -v $HF_DATASETS_CACHE:/hf_datasets_cache \
53 |   -e WANDB_API_KEY \
54 |   -e HF_TOKEN \
55 |   -e HF_HOME=/hf_home \
56 |   -e HF_DATASETS_CACHE=/hf_datasets_cache \
57 |   -e HOME=/tmp/ \
58 |   -w $SCRIPT_DIR \
59 |   "$CONTAINER" -- \
60 |   bash -x -c "umask 000 && uv run bash -x $TEST_SCRIPT"
61 | 


--------------------------------------------------------------------------------
/tests/run_unit.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
 4 | PROJECT_ROOT=$(realpath ${SCRIPT_DIR}/..)
 5 | 
 6 | set -eou pipefail
 7 | 
 8 | cd $SCRIPT_DIR
 9 | 
10 | if ! command -v pytest >/dev/null 2>&1; then
11 |     echo "[ERROR] pytest not found. Make sure it's installed."
12 |     exit 1
13 | elif ! command -v ray >/dev/null 2>&1; then
14 |     echo "[ERROR] ray binary not installed, which suggests this package is not installed."
15 |     exit 1
16 | fi
17 | 
18 | # First try to connect to a ray cluster
19 | if ! ray status &>/dev/null; then
20 |     # If we cannot, then check if the local machine has at least two gpus to run the tests
21 |     GPUS_PER_NODE=$(nvidia-smi -L | grep -c '^GPU')
22 |     if [[ $GPUS_PER_NODE -lt 2 ]]; then
23 |         echo "[ERROR]: Unit tests need at least 2 GPUs, but found $GPUS_PER_NODE"
24 |         exit 1
25 |     fi
26 |     export CUDA_DEVICE_ORDER=PCI_BUS_ID
27 |     nvidia-smi
28 |     export CUDA_VISIBLE_DEVICES=0,1
29 | fi
30 | 
31 | export PYTHONPATH=$(realpath ${SCRIPT_DIR}/..):${PYTHONPATH:-}
32 | 
33 | # Run unit tests
34 | echo "Running unit tests..."
35 | if ! pytest unit/ "$@"; then
36 |     echo "[ERROR]: Unit tests failed."
37 |     exit 1
38 | fi
39 | echo "Unit tests passed!"
40 | 


--------------------------------------------------------------------------------
/tests/run_unit_in_docker.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
 3 | 
 4 | set -eou pipefail
 5 | 
 6 | # Ensure Docker is installed
 7 | if ! command -v docker &> /dev/null; then
 8 |     echo "Error: Docker is not installed or not in PATH."
 9 |     exit 1
10 | fi
11 | 
12 | # CONTAINER is expected to be set as an environment variable
13 | if [[ -z "${CONTAINER:-}" ]]; then
14 |     echo "Error: CONTAINER environment variable is not set."
15 |     echo "Usage: CONTAINER=<docker-container> $0 [optional pytest-args...]"
16 |     exit 1
17 | fi
18 | 
19 | CONTAINER=${CONTAINER}
20 | 
21 | export HF_HOME=${HF_HOME:-$(realpath $SCRIPT_DIR/../hf_home)}
22 | mkdir -p $HF_HOME
23 | 
24 | # Check if running in GitLab CI
25 | INTERACTIVE_FLAG=""
26 | if [[ "${CI:-false}" != "true" ]]; then
27 |     # Setting this interactively lets us issue a keyboard interrupt.
28 |     INTERACTIVE_FLAG="-it"
29 | fi
30 | 
31 | # Note: we run as root because:
32 | #  1. running as ray prevents us from writing into the current working directory
33 | #  2. running as ourselves (-u $(id -u):$(id -g)) causes torch compile to fail
34 | #
35 | # The workaround is we launch the job but set umask 000 so all files created as root are rwxrwxrwx.
36 | # We have found that 111 does not always work and can leave the filesystem permissions in a bad state.
37 | 
38 | # Run the script inside the Docker container with GPU support
39 | docker run -u root $INTERACTIVE_FLAG --ulimit memlock=-1 --ulimit stack=67108864 --rm --gpus '"device=0,1"' -v "$(realpath $SCRIPT_DIR/..):/workspace" -v $HF_HOME:/hf_home -e HF_TOKEN -e HF_HOME=/hf_home -e HOME=/tmp/ -w /workspace/tests "$CONTAINER" -- bash -x -c "umask 000 && uv run --group test bash -x ./run_unit.sh $@"
40 | 


--------------------------------------------------------------------------------
/tests/test_suites/README.md:
--------------------------------------------------------------------------------
 1 | # Recipes
 2 | 
 3 | ## Naming
 4 | 
 5 | Each test is named:
 6 | ```
 7 | <algo>-<model>-#n#g-<parallelism>-<opt:long><opt:v$N>.sh
 8 | ```
 9 | 
10 | Examples:
11 | * sft-llama3.2-1b-1n8g-fsdp2tp1.sh
12 | * grpo-qwen2-1.5B-instruct-4n8g-fsdp2tp2.sh
13 | * grpo-qwen2-1.5B-instruct-4n8g-fsdp2tp2-long.sh
14 | * grpo-qwen2-1.5B-instruct-4n8g-fsdp2tp2-long.v2.sh
15 |     * The final verison suffix (starts with `.v2`, `.v3`, ...), is reserved for cases contributors believe the recipe's 
16 |       convergence has changed due to their commit. Versioning signals that this recipe should not be compared to its
17 |       predecessor due to a change in convergence behavior. Examples of this change include: changing dataset, changing loss,
18 |       convergence bug fix. Changes affecting performance do not need a version change. 
19 | 
20 | ## Running manually
21 | 
22 | Each recipe can be run on the head node:
23 | 
24 | ```sh
25 | uv run ./llm/sft-llama3.2-1b-1n8g-fsdp2tp1.sh
26 | ```
27 | 
28 | and the result directory can be found at the same level of the script (w/o `.sh` prefix):
29 | 
30 | ```sh
31 | ls -lh llm/sft-llama3.2-1b-1n8g-fsdp2tp1/
32 | # drwxr-xr-x 2 terryk dip 4.0K Apr 23 18:07 ckpts
33 | # drwxr-xr-x 3 terryk dip 4.0K Apr 23 18:07 logs
34 | # -rw-r--r-- 1 terryk dip 142K Apr 23 18:23 metrics.json
35 | # -rw-r--r-- 1 terryk dip  94K Apr 23 18:23 run.log
36 | ```
37 | 
38 | ## Launching with code snapshots
39 | 
40 | We provide a convenience script that will create a code snapshot and launch `NUM_RUNS` number of slurm jobs (`NUM_RUNS` is defined in the script itself). We create a code snapshot to
41 | ensure that even as the master repo changes its code, you can always run your experiment with
42 | the snapshot of the code at the time the experiment was initially launched.
43 | 
44 | ```sh
45 | # Launch
46 | CONTAINER=... ACCOUNT=... PARTITION=... ../tools/launch ./llm/sft-llama3.2-1b-1n8g-fsdp2tp1.sh
47 | 
48 | # Prints Estimated GPUhrs and then exits
49 | DRYRUN=1 CONTAINER=... ACCOUNT=... PARTITION=... ../tools/launch ./llm/sft-llama3.2-1b-1n8g-fsdp2tp1.sh
50 | 
51 | # Prints Estimated GPUhrs, creates code snapshot, then exits
52 | DRYRUN=2 CONTAINER=... ACCOUNT=... PARTITION=... ../tools/launch ./llm/sft-llama3.2-1b-1n8g-fsdp2tp1.sh
53 | ```
54 | 
55 | After this completes, you can find the result under
56 | 
57 | ```sh
58 | ls -lh ../code_snapshots/sft-llama3.2-1b-1n8g-fsdp2tp1/recipes/llm/sft-llama3.2-1b-1n8g-fsdp2tp1/
59 | # drwxr-xr-x 2 terryk dip 4.0K Apr 23 18:07 ckpts
60 | # drwxr-xr-x 3 terryk dip 4.0K Apr 23 18:07 logs
61 | # -rw-r--r-- 1 terryk dip 142K Apr 23 18:23 metrics.json
62 | # -rw-r--r-- 1 terryk dip  94K Apr 23 18:23 run.log
63 | ```
64 | 
65 | As a convenience, there's also a `continue.sh` script under that will launch
66 | another run using the same arguments. This is helpful if your job was
67 | unexpectedly cancelled or you want to run it for a little longer.
68 | 
69 | ```sh
70 | # This launches one more run of the same experiment
71 | ../code_snapshots/sft-llama3.2-1b-1n8g-fsdp2tp1/continue.sh
72 | ```
73 | 


--------------------------------------------------------------------------------
/tests/test_suites/llm/common.env:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Source this file before running test to setup
 3 | #
 4 | #   source ./common.env
 5 | set -eou pipefail
 6 | 
 7 | SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
 8 | # Mark all repos as safe in the test context, since wandb fetchs metadata about the repo and it's a
 9 | # catch-22 to get the project root and mark it safe if you don't know the project root
10 | git config --global --add safe.directory "*"
11 | PROJECT_ROOT=$(git rev-parse --show-toplevel)
12 | 
13 | exit_if_max_steps_reached() {
14 |   # Early stopping to save compute if max step has been reached
15 |   STEPS_SO_FAR=$(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS || echo 0)
16 |   if [[ $STEPS_SO_FAR -ge $MAX_STEPS ]]; then
17 |       echo "[INFO] Target step $MAX_STEPS reached, skipping run"
18 |       exit 0
19 |   fi
20 |   echo "[INFO] Steps so far: $STEPS_SO_FAR, running till $MAX_STEPS steps"
21 | }
22 | 
23 | EXP_NAME=$(basename $0 .sh)
24 | EXP_DIR=$SCRIPT_DIR/$EXP_NAME
25 | LOG_DIR=$EXP_DIR/logs
26 | CKPT_DIR=$EXP_DIR/ckpts
27 | JSON_METRICS=$EXP_DIR/metrics.json
28 | RUN_LOG=$EXP_DIR/run.log
29 | 
30 | # Test script has path:  tests/test_suites/llm/${EXP_NAME}.sh
31 | # where config has path:  examples/configs/recipes/llm/${EXP_NAME}.yaml
32 | # We will assume/check the path matches this pattern
33 | CONFIG_PATH=$(echo $SCRIPT_DIR/${EXP_NAME}.yaml | sed 's#tests/test_suites#examples/configs/recipes#')
34 | if [[ ! -f $CONFIG_PATH ]]; then
35 |   echo "[ERROR] Config file $CONFIG_PATH not found"
36 |   exit 1
37 | fi
38 | 
39 | export PYTHONPATH=${PROJECT_ROOT}:${PYTHONPATH:-}
40 | 
41 | if [[ -n "${TEST_DRYRUN:-}" ]]; then
42 |   echo "[INFO] TEST_DRYRUN mode: used for testing"
43 |   exit
44 | fi
45 | 
46 | mkdir -p $EXP_DIR $LOG_DIR $CKPT_DIR
47 | 


--------------------------------------------------------------------------------
/tests/test_suites/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp1-quick.v2.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
 3 | source $SCRIPT_DIR/common.env
 4 | 
 5 | # ===== BEGIN CONFIG =====
 6 | NUM_NODES=4
 7 | STEPS_PER_RUN=20
 8 | MAX_STEPS=20
 9 | NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
10 | NUM_MINUTES=30
11 | # ===== END CONFIG =====
12 | 
13 | exit_if_max_steps_reached
14 | 
15 | # Run the experiment
16 | cd $PROJECT_ROOT
17 | uv run examples/run_dpo.py \
18 |     --config $CONFIG_PATH \
19 |     dpo.max_num_steps=$MAX_STEPS \
20 |     logger.log_dir=$LOG_DIR \
21 |     logger.wandb_enabled=True \
22 |     logger.wandb.project=nemo-rl \
23 |     logger.wandb.name=$EXP_NAME \
24 |     logger.monitor_gpus=True \
25 |     logger.tensorboard_enabled=True \
26 |     checkpointing.enabled=True \
27 |     checkpointing.checkpoint_dir=$CKPT_DIR \
28 |     $@ \
29 |     2>&1 | tee $RUN_LOG
30 | 
31 | # Convert tensorboard logs to json
32 | uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
33 | 
34 | # Only run metrics if the target step is reached
35 | if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
36 |     uv run tests/check_metrics.py $JSON_METRICS \
37 |         'data["train/loss"]["1"] < 3.6' \
38 |         'data["train/loss"]["20"] < 3.4' \
39 |         'data["train/preference_loss"]["1"] > 0.69314' \
40 |         'data["train/preference_loss"]["1"] < 0.69316' \
41 |         'data["train/preference_loss"]["20"] < 0.6'
42 | fi 
43 | 


--------------------------------------------------------------------------------
/tests/test_suites/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp2tp1.v2.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
 3 | source $SCRIPT_DIR/common.env
 4 | 
 5 | # ===== BEGIN CONFIG =====
 6 | NUM_NODES=4
 7 | STEPS_PER_RUN=150
 8 | MAX_STEPS=150
 9 | NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
10 | NUM_MINUTES=140
11 | # ===== END CONFIG =====
12 | 
13 | exit_if_max_steps_reached
14 | 
15 | # Run the experiment
16 | cd $PROJECT_ROOT
17 | uv run examples/run_dpo.py \
18 |     --config $CONFIG_PATH \
19 |     dpo.max_num_steps=$MAX_STEPS \
20 |     logger.log_dir=$LOG_DIR \
21 |     logger.wandb_enabled=True \
22 |     logger.wandb.project=nemo-rl \
23 |     logger.wandb.name=$EXP_NAME \
24 |     logger.monitor_gpus=True \
25 |     logger.tensorboard_enabled=True \
26 |     checkpointing.enabled=True \
27 |     checkpointing.checkpoint_dir=$CKPT_DIR \
28 |     $@ \
29 |     2>&1 | tee $RUN_LOG
30 | 
31 | # Convert tensorboard logs to json
32 | uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
33 | 
34 | # Only run metrics if the target step is reached
35 | if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
36 |     uv run tests/check_metrics.py $JSON_METRICS \
37 |         'data["train/loss"]["1"] < 3.6' \
38 |         'data["train/loss"]["150"] < 3.0' \
39 |         'data["train/preference_loss"]["1"] > 0.69314' \
40 |         'data["train/preference_loss"]["1"] < 0.69316' \
41 |         'data["train/preference_loss"]["150"] < 0.4'
42 | fi 
43 | 


--------------------------------------------------------------------------------
/tests/test_suites/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp2tp2-quick.v2.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
 3 | source $SCRIPT_DIR/common.env
 4 | 
 5 | # ===== BEGIN CONFIG =====
 6 | NUM_NODES=4
 7 | STEPS_PER_RUN=20
 8 | MAX_STEPS=20
 9 | NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
10 | NUM_MINUTES=30
11 | # ===== END CONFIG =====
12 | 
13 | exit_if_max_steps_reached
14 | 
15 | # Run the experiment
16 | cd $PROJECT_ROOT
17 | uv run examples/run_dpo.py \
18 |     --config $CONFIG_PATH \
19 |     dpo.max_num_steps=$MAX_STEPS \
20 |     logger.log_dir=$LOG_DIR \
21 |     logger.wandb_enabled=True \
22 |     logger.wandb.project=nemo-rl \
23 |     logger.wandb.name=$EXP_NAME \
24 |     logger.monitor_gpus=True \
25 |     logger.tensorboard_enabled=True \
26 |     checkpointing.enabled=True \
27 |     checkpointing.checkpoint_dir=$CKPT_DIR \
28 |     $@ \
29 |     2>&1 | tee $RUN_LOG
30 | 
31 | # Convert tensorboard logs to json
32 | uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
33 | 
34 | # Only run metrics if the target step is reached
35 | if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
36 |     uv run tests/check_metrics.py $JSON_METRICS \
37 |         'data["train/loss"]["1"] < 3.6' \
38 |         'data["train/loss"]["20"] < 3.4' \
39 |         'data["train/preference_loss"]["1"] > 0.69314' \
40 |         'data["train/preference_loss"]["1"] < 0.69316' \
41 |         'data["train/preference_loss"]["20"] < 0.6'
42 | fi 
43 | 


--------------------------------------------------------------------------------
/tests/test_suites/llm/dpo-llama3.2-1b-instruct-1n8g-fsdp2tp1.v2.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
 3 | source $SCRIPT_DIR/common.env
 4 | 
 5 | # ===== BEGIN CONFIG =====
 6 | NUM_NODES=1
 7 | STEPS_PER_RUN=150
 8 | MAX_STEPS=150
 9 | NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
10 | NUM_MINUTES=45
11 | # ===== END CONFIG =====
12 | 
13 | exit_if_max_steps_reached
14 | 
15 | # Run the experiment
16 | cd $PROJECT_ROOT
17 | uv run examples/run_dpo.py \
18 |     --config $CONFIG_PATH \
19 |     dpo.max_num_steps=$MAX_STEPS \
20 |     logger.log_dir=$LOG_DIR \
21 |     logger.wandb_enabled=True \
22 |     logger.wandb.project=nemo-rl \
23 |     logger.wandb.name=$EXP_NAME \
24 |     logger.monitor_gpus=True \
25 |     logger.tensorboard_enabled=True \
26 |     checkpointing.enabled=True \
27 |     checkpointing.checkpoint_dir=$CKPT_DIR \
28 |     $@ \
29 |     2>&1 | tee $RUN_LOG
30 | 
31 | # Convert tensorboard logs to json
32 | uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
33 | 
34 | # Only run metrics if the target step is reached
35 | if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
36 |     uv run tests/check_metrics.py $JSON_METRICS \
37 |         'data["train/loss"]["1"] > 0.69314' \
38 |         'data["train/loss"]["1"] < 0.69316' \
39 |         'data["train/loss"]["150"] < 0.55'
40 | fi 
41 | 


--------------------------------------------------------------------------------
/tests/test_suites/llm/grpo-gemma3-1b-it-1n8g-fsdp2tp1.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
 3 | source $SCRIPT_DIR/common.env
 4 | 
 5 | # ===== BEGIN CONFIG =====
 6 | NUM_NODES=1
 7 | STEPS_PER_RUN=500
 8 | MAX_STEPS=500
 9 | NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
10 | NUM_MINUTES=120
11 | # ===== END CONFIG =====
12 | 
13 | exit_if_max_steps_reached
14 | 
15 | # Run the experiment
16 | cd $PROJECT_ROOT
17 | uv run examples/run_grpo_math.py \
18 |     --config $CONFIG_PATH \
19 |     grpo.max_num_steps=$MAX_STEPS \
20 |     logger.log_dir=$LOG_DIR \
21 |     logger.wandb_enabled=True \
22 |     logger.wandb.project=nemo-rl \
23 |     logger.wandb.name=$EXP_NAME \
24 |     logger.monitor_gpus=True \
25 |     logger.tensorboard_enabled=True \
26 |     checkpointing.enabled=True \
27 |     checkpointing.checkpoint_dir=$CKPT_DIR \
28 |     $@ \
29 |     2>&1 | tee $RUN_LOG
30 | 
31 | # Convert tensorboard logs to json
32 | uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
33 | 
34 | # Only run metrics if the target step is reached
35 | if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
36 |     uv run tests/check_metrics.py $JSON_METRICS \
37 |         'mean(data["train/token_mult_prob_error"]) < 1.1' \
38 |         'data["train/token_mult_prob_error"]["500"] < 1.1'
39 | fi
40 | 


--------------------------------------------------------------------------------
/tests/test_suites/llm/grpo-gemma3-27b-it-16n8g-fsdp2tp8sp-actckpt-long.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
 3 | source $SCRIPT_DIR/common.env
 4 | 
 5 | # ===== BEGIN CONFIG =====
 6 | NUM_NODES=16
 7 | STEPS_PER_RUN=10
 8 | MAX_STEPS=20
 9 | NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
10 | NUM_MINUTES=240
11 | # ===== END CONFIG =====
12 | 
13 | exit_if_max_steps_reached
14 | 
15 | # Run the experiment
16 | cd $PROJECT_ROOT
17 | uv run examples/run_grpo_math.py \
18 |     --config $CONFIG_PATH \
19 |     grpo.max_num_steps=$MAX_STEPS \
20 |     logger.log_dir=$LOG_DIR \
21 |     logger.wandb_enabled=True \
22 |     logger.wandb.project=nemo-rl \
23 |     logger.wandb.name=$EXP_NAME \
24 |     logger.monitor_gpus=True \
25 |     logger.tensorboard_enabled=True \
26 |     checkpointing.enabled=True \
27 |     checkpointing.checkpoint_dir=$CKPT_DIR \
28 |     $@ \
29 |     2>&1 | tee $RUN_LOG
30 | 
31 | # Convert tensorboard logs to json
32 | uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
33 | 
34 | # Only run metrics if the target step is reached
35 | if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
36 |     uv run tests/check_metrics.py $JSON_METRICS \
37 |         'mean(data["train/token_mult_prob_error"]) < 1.1' \
38 |         'data["train/token_mult_prob_error"]["20"] < 1.1'
39 | fi
40 | 
41 | 


--------------------------------------------------------------------------------
/tests/test_suites/llm/grpo-llama3.1-8b-instruct-4n8g-fsdp2tp1-long.v3.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
 3 | source $SCRIPT_DIR/common.env
 4 | 
 5 | # ===== BEGIN CONFIG =====
 6 | NUM_NODES=4
 7 | STEPS_PER_RUN=100
 8 | MAX_STEPS=500
 9 | NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
10 | NUM_MINUTES=240
11 | # ===== END CONFIG =====
12 | 
13 | exit_if_max_steps_reached
14 | 
15 | # Run the experiment
16 | cd $PROJECT_ROOT
17 | uv run examples/run_grpo_math.py \
18 |     --config $CONFIG_PATH \
19 |     grpo.max_num_steps=$MAX_STEPS \
20 |     logger.log_dir=$LOG_DIR \
21 |     logger.wandb_enabled=True \
22 |     logger.wandb.project=nemo-rl \
23 |     logger.wandb.name=$EXP_NAME \
24 |     logger.monitor_gpus=True \
25 |     logger.tensorboard_enabled=True \
26 |     checkpointing.enabled=True \
27 |     checkpointing.checkpoint_dir=$CKPT_DIR \
28 |     $@ \
29 |     2>&1 | tee $RUN_LOG
30 | 
31 | # Convert tensorboard logs to json
32 | uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
33 | 
34 | # Only run metrics if the target step is reached
35 | if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
36 |     uv run tests/check_metrics.py $JSON_METRICS \
37 |         'mean(data["train/token_mult_prob_error"]) < 1.1' \
38 |         'data["train/token_mult_prob_error"]["100"] < 1.1'
39 | fi
40 | 
41 | 


--------------------------------------------------------------------------------
/tests/test_suites/llm/grpo-llama3.2-1b-instruct-1n8g-fsdp2tp1.v3.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
 3 | source $SCRIPT_DIR/common.env
 4 | 
 5 | # ===== BEGIN CONFIG =====
 6 | NUM_NODES=1
 7 | STEPS_PER_RUN=500
 8 | MAX_STEPS=500
 9 | NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
10 | NUM_MINUTES=120
11 | # ===== END CONFIG =====
12 | 
13 | exit_if_max_steps_reached
14 | 
15 | # Run the experiment
16 | cd $PROJECT_ROOT
17 | uv run examples/run_grpo_math.py \
18 |     --config $CONFIG_PATH \
19 |     grpo.max_num_steps=$MAX_STEPS \
20 |     logger.log_dir=$LOG_DIR \
21 |     logger.wandb_enabled=True \
22 |     logger.wandb.project=nemo-rl \
23 |     logger.wandb.name=$EXP_NAME \
24 |     logger.monitor_gpus=True \
25 |     logger.tensorboard_enabled=True \
26 |     checkpointing.enabled=True \
27 |     checkpointing.checkpoint_dir=$CKPT_DIR \
28 |     $@ \
29 |     2>&1 | tee $RUN_LOG
30 | 
31 | # Convert tensorboard logs to json
32 | uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
33 | 
34 | # Only run metrics if the target step is reached
35 | if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
36 |     uv run tests/check_metrics.py $JSON_METRICS \
37 |         'mean(data["train/token_mult_prob_error"]) < 1.1' \
38 |         'data["train/token_mult_prob_error"]["500"] < 1.1'
39 | fi
40 | 
41 | 


--------------------------------------------------------------------------------
/tests/test_suites/llm/grpo-qwen2.5-32b-16n8g-fsdp2tp8sp-actckpt-long.v3.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
 3 | source $SCRIPT_DIR/common.env
 4 | 
 5 | # ===== BEGIN CONFIG =====
 6 | NUM_NODES=16
 7 | STEPS_PER_RUN=10
 8 | MAX_STEPS=20
 9 | NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
10 | NUM_MINUTES=240
11 | # ===== END CONFIG =====
12 | 
13 | exit_if_max_steps_reached
14 | 
15 | # Run the experiment
16 | cd $PROJECT_ROOT
17 | uv run examples/run_grpo_math.py \
18 |     --config $CONFIG_PATH \
19 |     grpo.max_num_steps=$MAX_STEPS \
20 |     logger.log_dir=$LOG_DIR \
21 |     logger.wandb_enabled=True \
22 |     logger.wandb.project=nemo-rl \
23 |     logger.wandb.name=$EXP_NAME \
24 |     logger.monitor_gpus=True \
25 |     logger.tensorboard_enabled=True \
26 |     checkpointing.enabled=True \
27 |     checkpointing.checkpoint_dir=$CKPT_DIR \
28 |     $@ \
29 |     2>&1 | tee $RUN_LOG
30 | 
31 | # Convert tensorboard logs to json
32 | uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
33 | 
34 | # Only run metrics if the target step is reached
35 | if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
36 |     uv run tests/check_metrics.py $JSON_METRICS \
37 |         'mean(data["train/token_mult_prob_error"]) < 1.1' \
38 |         'data["train/token_mult_prob_error"]["20"] < 1.1'
39 | fi
40 | 
41 | 


--------------------------------------------------------------------------------
/tests/test_suites/llm/grpo-qwen2.5-32b-16n8g-fsdp2tp8sp-actckpt.v3.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
 3 | source $SCRIPT_DIR/common.env
 4 | 
 5 | # ===== BEGIN CONFIG =====
 6 | NUM_NODES=16
 7 | STEPS_PER_RUN=2  # 40min: step_time: [1341, 801]
 8 | MAX_STEPS=2
 9 | NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
10 | NUM_MINUTES=60
11 | # ===== END CONFIG =====
12 | 
13 | exit_if_max_steps_reached
14 | 
15 | # Run the experiment
16 | cd $PROJECT_ROOT
17 | uv run examples/run_grpo_math.py \
18 |     --config $CONFIG_PATH \
19 |     grpo.max_num_steps=$MAX_STEPS \
20 |     logger.log_dir=$LOG_DIR \
21 |     logger.wandb_enabled=True \
22 |     logger.wandb.project=nemo-rl \
23 |     logger.wandb.name=$EXP_NAME \
24 |     logger.monitor_gpus=True \
25 |     logger.tensorboard_enabled=True \
26 |     checkpointing.enabled=True \
27 |     checkpointing.checkpoint_dir=$CKPT_DIR \
28 |     $@ \
29 |     2>&1 | tee $RUN_LOG
30 | 
31 | # Convert tensorboard logs to json
32 | uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
33 | 
34 | # Only run metrics if the target step is reached
35 | if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
36 |     uv run tests/check_metrics.py $JSON_METRICS \
37 |         'mean(data["train/token_mult_prob_error"]) < 1.1' \
38 |         'data["train/token_mult_prob_error"]["2"] < 1.1'
39 | fi
40 | 
41 | 


--------------------------------------------------------------------------------
/tests/test_suites/llm/grpo-qwen2.5-7b-instruct-4n8g-fsdp1.v3.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
 3 | source $SCRIPT_DIR/common.env
 4 | 
 5 | # ===== BEGIN CONFIG =====
 6 | NUM_NODES=4
 7 | STEPS_PER_RUN=30
 8 | MAX_STEPS=30
 9 | NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
10 | NUM_MINUTES=90
11 | # ===== END CONFIG =====
12 | 
13 | exit_if_max_steps_reached
14 | 
15 | # Run the experiment
16 | cd $PROJECT_ROOT
17 | uv run examples/run_grpo_math.py \
18 |     --config $CONFIG_PATH \
19 |     grpo.max_num_steps=$MAX_STEPS \
20 |     logger.log_dir=$LOG_DIR \
21 |     logger.wandb_enabled=True \
22 |     logger.wandb.project=nemo-rl \
23 |     logger.wandb.name=$EXP_NAME \
24 |     logger.monitor_gpus=True \
25 |     logger.tensorboard_enabled=True \
26 |     checkpointing.enabled=True \
27 |     checkpointing.checkpoint_dir=$CKPT_DIR \
28 |     $@ \
29 |     2>&1 | tee $RUN_LOG
30 | 
31 | # Convert tensorboard logs to json
32 | uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
33 | 
34 | # Only run metrics if the target step is reached
35 | if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
36 |     uv run tests/check_metrics.py $JSON_METRICS \
37 |         'mean(data["train/token_mult_prob_error"]) < 1.1' \
38 |         'data["train/token_mult_prob_error"]["30"] < 1.1'
39 | fi
40 | 
41 | 


--------------------------------------------------------------------------------
/tests/test_suites/llm/grpo-qwen2.5-7b-instruct-4n8g-fsdp2tp4sp.v3.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
 3 | source $SCRIPT_DIR/common.env
 4 | 
 5 | # ===== BEGIN CONFIG =====
 6 | NUM_NODES=4
 7 | STEPS_PER_RUN=30
 8 | MAX_STEPS=30
 9 | NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
10 | NUM_MINUTES=180
11 | # ===== END CONFIG =====
12 | 
13 | exit_if_max_steps_reached
14 | 
15 | # Run the experiment
16 | cd $PROJECT_ROOT
17 | uv run examples/run_grpo_math.py \
18 |     --config $CONFIG_PATH \
19 |     grpo.max_num_steps=$MAX_STEPS \
20 |     logger.log_dir=$LOG_DIR \
21 |     logger.wandb_enabled=True \
22 |     logger.wandb.project=nemo-rl \
23 |     logger.wandb.name=$EXP_NAME \
24 |     logger.monitor_gpus=True \
25 |     logger.tensorboard_enabled=True \
26 |     checkpointing.enabled=True \
27 |     checkpointing.checkpoint_dir=$CKPT_DIR \
28 |     $@ \
29 |     2>&1 | tee $RUN_LOG
30 | 
31 | # Convert tensorboard logs to json
32 | uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
33 | 
34 | # Only run metrics if the target step is reached
35 | if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
36 |     uv run tests/check_metrics.py $JSON_METRICS \
37 |         'mean(data["train/token_mult_prob_error"]) < 1.1' \
38 |         'data["train/token_mult_prob_error"]["30"] < 1.1'
39 | fi
40 | 
41 | 


--------------------------------------------------------------------------------
/tests/test_suites/llm/grpo-qwen2.5-math-1.5b-instruct-1n8g-fsdp2tp1.v3.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
 3 | source $SCRIPT_DIR/common.env
 4 | 
 5 | # ===== BEGIN CONFIG =====
 6 | NUM_NODES=1
 7 | STEPS_PER_RUN=450
 8 | MAX_STEPS=450
 9 | NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
10 | NUM_MINUTES=120
11 | # ===== END CONFIG =====
12 | 
13 | exit_if_max_steps_reached
14 | 
15 | # Run the experiment
16 | cd $PROJECT_ROOT
17 | uv run examples/run_grpo_math.py \
18 |     --config $CONFIG_PATH \
19 |     grpo.max_num_steps=$MAX_STEPS \
20 |     logger.log_dir=$LOG_DIR \
21 |     logger.wandb_enabled=True \
22 |     logger.wandb.project=nemo-rl \
23 |     logger.wandb.name=$EXP_NAME \
24 |     logger.monitor_gpus=True \
25 |     logger.tensorboard_enabled=True \
26 |     checkpointing.enabled=True \
27 |     checkpointing.checkpoint_dir=$CKPT_DIR \
28 |     $@ \
29 |     2>&1 | tee $RUN_LOG
30 | 
31 | # Convert tensorboard logs to json
32 | uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
33 | 
34 | # Only run metrics if the target step is reached
35 | if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
36 |     uv run tests/check_metrics.py $JSON_METRICS \
37 |         'mean(data["train/token_mult_prob_error"]) < 1.1' \
38 |         'data["train/token_mult_prob_error"]["450"] < 1.1'
39 | fi
40 | 
41 | 


--------------------------------------------------------------------------------
/tests/test_suites/llm/performance/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/NeMo-RL/99ba9a130e72cbf87a3e20acf43bc01a47adc8ee/tests/test_suites/llm/performance/.gitkeep


--------------------------------------------------------------------------------
/tests/test_suites/llm/sft-llama3.1-8b-instruct-1n8g-fsdp1.v2.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
 3 | source $SCRIPT_DIR/common.env
 4 | 
 5 | # ===== BEGIN CONFIG =====
 6 | NUM_NODES=1
 7 | STEPS_PER_RUN=250
 8 | MAX_STEPS=250
 9 | NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
10 | NUM_MINUTES=30
11 | # ===== END CONFIG =====
12 | 
13 | exit_if_max_steps_reached
14 | 
15 | # Run the experiment
16 | cd $PROJECT_ROOT
17 | uv run examples/run_sft.py \
18 |     --config $CONFIG_PATH \
19 |     sft.max_num_steps=$MAX_STEPS \
20 |     logger.log_dir=$LOG_DIR \
21 |     logger.wandb_enabled=True \
22 |     logger.wandb.project=nemo-rl \
23 |     logger.wandb.name=$EXP_NAME \
24 |     logger.monitor_gpus=True \
25 |     logger.tensorboard_enabled=True \
26 |     checkpointing.enabled=True \
27 |     checkpointing.checkpoint_dir=$CKPT_DIR \
28 |     $@ \
29 |     2>&1 | tee $RUN_LOG
30 | 
31 | # Convert tensorboard logs to json
32 | uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
33 | 
34 | # Only run metrics if the target step is reached
35 | if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
36 |     # TODO: FIGURE OUT CORRECT METRICS
37 |     uv run tests/check_metrics.py $JSON_METRICS \
38 |         'data["train/loss"]["1"] < 4' \
39 |         'data["train/loss"]["250"] < 0.5' \
40 |         'max(data["ray/node.0.gpu.0.memory"]) < 60000'
41 | fi 
42 | 


--------------------------------------------------------------------------------
/tests/test_suites/llm/sft-llama3.1-8b-instruct-1n8g-fsdp2tp1-long.v2.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
 3 | source $SCRIPT_DIR/common.env
 4 | 
 5 | # TODO: @ashors real convergence run (dataset only has 2737)
 6 | # ===== BEGIN CONFIG =====
 7 | NUM_NODES=1
 8 | STEPS_PER_RUN=2730
 9 | MAX_STEPS=2730
10 | NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
11 | NUM_MINUTES=120
12 | # ===== END CONFIG =====
13 | 
14 | exit_if_max_steps_reached
15 | 
16 | # Run the experiment
17 | cd $PROJECT_ROOT
18 | uv run examples/run_sft.py \
19 |     --config $CONFIG_PATH \
20 |     sft.max_num_steps=$MAX_STEPS \
21 |     logger.log_dir=$LOG_DIR \
22 |     logger.wandb_enabled=True \
23 |     logger.wandb.project=nemo-rl \
24 |     logger.wandb.name=$EXP_NAME \
25 |     logger.monitor_gpus=True \
26 |     logger.tensorboard_enabled=True \
27 |     checkpointing.enabled=True \
28 |     checkpointing.checkpoint_dir=$CKPT_DIR \
29 |     $@ \
30 |     2>&1 | tee $RUN_LOG
31 | 
32 | # Convert tensorboard logs to json
33 | uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
34 | 
35 | # TODO: the memory check is known to OOM. see https://github.com/NVIDIA/NeMo-RL/issues/263
36 | # Only run metrics if the target step is reached
37 | if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
38 |     # TODO: FIGURE OUT CORRECT METRICS
39 |     uv run tests/check_metrics.py $JSON_METRICS \
40 |         'data["train/loss"]["1"] < 5' \
41 |         'data["train/loss"]["2730"] < 0.3' \
42 |         'max(data["ray/node.0.gpu.0.memory"]) < 45000'
43 | fi 
44 | 


--------------------------------------------------------------------------------
/tests/test_suites/llm/sft-llama3.1-8b-instruct-1n8g-fsdp2tp2sp.v2.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
 3 | source $SCRIPT_DIR/common.env
 4 | 
 5 | # ===== BEGIN CONFIG =====
 6 | NUM_NODES=1
 7 | STEPS_PER_RUN=350
 8 | MAX_STEPS=350
 9 | NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
10 | NUM_MINUTES=45
11 | # ===== END CONFIG =====
12 | 
13 | exit_if_max_steps_reached
14 | 
15 | # Run the experiment
16 | cd $PROJECT_ROOT
17 | uv run examples/run_sft.py \
18 |     --config $CONFIG_PATH \
19 |     sft.max_num_steps=$MAX_STEPS \
20 |     logger.log_dir=$LOG_DIR \
21 |     logger.wandb_enabled=True \
22 |     logger.wandb.project=nemo-rl \
23 |     logger.wandb.name=$EXP_NAME \
24 |     logger.monitor_gpus=True \
25 |     logger.tensorboard_enabled=True \
26 |     checkpointing.enabled=True \
27 |     checkpointing.checkpoint_dir=$CKPT_DIR \
28 |     $@ \
29 |     2>&1 | tee $RUN_LOG
30 | 
31 | # Convert tensorboard logs to json
32 | uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
33 | 
34 | # TODO: memory check will fail due to OOM tracked here https://github.com/NVIDIA/NeMo-RL/issues/263
35 | 
36 | # Only run metrics if the target step is reached
37 | if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
38 |     # TODO: FIGURE OUT CORRECT METRICS
39 |     uv run tests/check_metrics.py $JSON_METRICS \
40 |         'data["train/loss"]["1"] < 5' \
41 |         'data["train/loss"]["350"] < 0.5' \
42 |         'max(data["ray/node.0.gpu.0.memory"]) < 45000'
43 | fi 
44 | 


--------------------------------------------------------------------------------
/tests/test_suites/llm/sft-llama3.2-1b-1n8g-fsdp2tp1.v2.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
 3 | source $SCRIPT_DIR/common.env
 4 | 
 5 | # ===== BEGIN CONFIG =====
 6 | NUM_NODES=1
 7 | STEPS_PER_RUN=500
 8 | MAX_STEPS=500
 9 | NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
10 | NUM_MINUTES=15
11 | # ===== END CONFIG =====
12 | 
13 | exit_if_max_steps_reached
14 | 
15 | # Run the experiment
16 | cd $PROJECT_ROOT
17 | uv run examples/run_sft.py \
18 |     --config $CONFIG_PATH \
19 |     sft.max_num_steps=$MAX_STEPS \
20 |     logger.log_dir=$LOG_DIR \
21 |     logger.wandb_enabled=True \
22 |     logger.wandb.project=nemo-rl \
23 |     logger.wandb.name=$EXP_NAME \
24 |     logger.monitor_gpus=True \
25 |     logger.tensorboard_enabled=True \
26 |     checkpointing.enabled=True \
27 |     checkpointing.checkpoint_dir=$CKPT_DIR \
28 |     $@ \
29 |     2>&1 | tee $RUN_LOG
30 | 
31 | # Convert tensorboard logs to json
32 | uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
33 | 
34 | # Only run metrics if the target step is reached
35 | if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
36 |     uv run tests/check_metrics.py $JSON_METRICS \
37 |         'data["train/loss"]["1"] < 2.4' \
38 |         'data["train/loss"]["500"] < 0.5' \
39 |         'max(data["ray/node.0.gpu.0.memory"]) < 25000'
40 | fi
41 | 
42 | 


--------------------------------------------------------------------------------
/tests/test_suites/llm/sft-qwen2.5-32b-4n8g-fsdp2tp8sp-actckpt.v2.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
 3 | source $SCRIPT_DIR/common.env
 4 | 
 5 | # TODO: this config can crash on OOM
 6 | # https://github.com/NVIDIA/NeMo-RL/issues/263
 7 | 
 8 | # ===== BEGIN CONFIG =====
 9 | NUM_NODES=4
10 | STEPS_PER_RUN=20  # step_time ~ 29sec
11 | MAX_STEPS=20
12 | NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
13 | NUM_MINUTES=30
14 | # ===== END CONFIG =====
15 | 
16 | exit_if_max_steps_reached
17 | 
18 | # Run the experiment
19 | cd $PROJECT_ROOT
20 | uv run examples/run_sft.py \
21 |     --config $CONFIG_PATH \
22 |     sft.max_num_steps=$MAX_STEPS \
23 |     logger.log_dir=$LOG_DIR \
24 |     logger.wandb_enabled=True \
25 |     logger.wandb.project=nemo-rl \
26 |     logger.wandb.name=$EXP_NAME \
27 |     logger.monitor_gpus=True \
28 |     logger.tensorboard_enabled=True \
29 |     checkpointing.enabled=True \
30 |     checkpointing.checkpoint_dir=$CKPT_DIR \
31 |     $@ \
32 |     2>&1 | tee $RUN_LOG
33 | 
34 | # Convert tensorboard logs to json
35 | uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
36 | 
37 | # Only run metrics if the target step is reached
38 | if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
39 |     uv run tests/check_metrics.py $JSON_METRICS \
40 |         'data["train/loss"]["1"] < 1.5' \
41 |         'data["train/loss"]["20"] < 0.3' \
42 |         'max(data["ray/node.0.gpu.0.memory"]) < 35000'
43 | fi 
44 | 


--------------------------------------------------------------------------------
/tests/test_suites/nightly.txt:
--------------------------------------------------------------------------------
 1 | ########
 2 | # GRPO #
 3 | ########
 4 | 
 5 | # Short 1N/1B runs (go past 200 steps - usually divergence happens by now) -- going to 4 nodes doesn't help that much
 6 | tests/test_suites/llm/grpo-qwen2.5-math-1.5b-instruct-1n8g-fsdp2tp1.v3.sh
 7 | tests/test_suites/llm/grpo-llama3.2-1b-instruct-1n8g-fsdp2tp1.v3.sh
 8 | tests/test_suites/llm/grpo-gemma3-1b-it-1n8g-fsdp2tp1.sh
 9 | 
10 | # FSDP1 vs Dtensor (Qwen/Qwen2.5-7B-Instruct)
11 | tests/test_suites/llm/grpo-qwen2.5-7b-instruct-4n8g-fsdp1.v3.sh
12 | tests/test_suites/llm/grpo-qwen2.5-7b-instruct-4n8g-fsdp2tp4sp.v3.sh
13 | 
14 | # Functional 32b run
15 | tests/test_suites/llm/grpo-qwen2.5-32b-16n8g-fsdp2tp8sp-actckpt.v3.sh
16 | 
17 | #######
18 | # SFT #
19 | #######
20 | 
21 | # 1N 1B/8B runs
22 | tests/test_suites/llm/sft-llama3.2-1b-1n8g-fsdp2tp1.v2.sh
23 | 
24 | # Dtensor vs fsdp1 (8B)
25 | tests/test_suites/llm/sft-llama3.1-8b-instruct-1n8g-fsdp2tp2sp.v2.sh
26 | tests/test_suites/llm/sft-llama3.1-8b-instruct-1n8g-fsdp1.v2.sh
27 | 
28 | # Functional 32b test
29 | tests/test_suites/llm/sft-qwen2.5-32b-4n8g-fsdp2tp8sp-actckpt.v2.sh
30 | 
31 | #######
32 | # DPO #
33 | #######
34 | 
35 | # 1N dtensor
36 | tests/test_suites/llm/dpo-llama3.2-1b-instruct-1n8g-fsdp2tp1.v2.sh
37 | 
38 | # Short dtensor vs fsdp1 comparison
39 | tests/test_suites/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp1-quick.v2.sh
40 | tests/test_suites/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp2tp2-quick.v2.sh
41 | 


--------------------------------------------------------------------------------
/tests/test_suites/nightly_performance.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/NeMo-RL/99ba9a130e72cbf87a3e20acf43bc01a47adc8ee/tests/test_suites/nightly_performance.txt


--------------------------------------------------------------------------------
/tests/test_suites/release.txt:
--------------------------------------------------------------------------------
 1 | ########
 2 | # GRPO #
 3 | ########
 4 | 
 5 | # Long 8b run
 6 | tests/test_suites/llm/grpo-llama3.1-8b-instruct-4n8g-fsdp2tp1-long.v3.sh
 7 | 
 8 | # Long 32b run
 9 | tests/test_suites/llm/grpo-qwen2.5-32b-16n8g-fsdp2tp8sp-actckpt-long.v3.sh
10 | 
11 | # Long Gemma3 27b run
12 | tests/test_suites/llm/grpo-gemma3-27b-it-16n8g-fsdp2tp8sp-actckpt-long.sh
13 | 
14 | #######
15 | # SFT #
16 | #######
17 | 
18 | # Long 8b convergence
19 | tests/test_suites/llm/sft-llama3.1-8b-instruct-1n8g-fsdp2tp1-long.v2.sh
20 | 
21 | #######
22 | # DPO #
23 | #######
24 | 
25 | # Long 8b convergence
26 | tests/test_suites/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp2tp1.v2.sh


--------------------------------------------------------------------------------
/tests/test_suites/release_performance.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/NeMo-RL/99ba9a130e72cbf87a3e20acf43bc01a47adc8ee/tests/test_suites/release_performance.txt


--------------------------------------------------------------------------------
/tests/unit/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/tests/unit/algorithms/test_dpo.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from unittest.mock import MagicMock
16 | 
17 | import torch
18 | 
19 | from nemo_rl.algorithms.dpo import add_ref_logprobs_to_data
20 | 
21 | 
22 | class MockPolicy:
23 |     def __init__(self, logprobs):
24 |         self.logprobs = logprobs
25 | 
26 |     def get_reference_policy_logprobs(self, batch, micro_batch_size):
27 |         return {"reference_logprobs": self.logprobs}
28 | 
29 | 
30 | def test_add_logprobs_to_batch():
31 |     """Test that add_ref_logprobs_to_data correctly adds reference policy logprobs to batches."""
32 |     # Create mock data
33 |     batch_size = 2
34 |     seq_len = 4
35 |     vocab_size = 16
36 | 
37 |     # Create a mock batch
38 |     mock_batch = {
39 |         "input_ids": torch.randint(0, vocab_size, (batch_size, seq_len)),
40 |         "attention_mask": torch.ones(batch_size, seq_len),
41 |     }
42 | 
43 |     # Create mock logprobs that will be returned by the policy
44 |     mock_logprobs = torch.randn(batch_size, seq_len)
45 | 
46 |     # Create a mock dataloader that yields our mock batch
47 |     mock_dataloader = MagicMock()
48 |     mock_dataloader.__iter__.return_value = iter([mock_batch])
49 | 
50 |     # Create a mock policy that returns our mock logprobs
51 |     mock_policy = MockPolicy(mock_logprobs)
52 | 
53 |     # Create a mock master config
54 |     mock_master_config = {"policy": {"train_micro_batch_size": 1}}
55 | 
56 |     # Get the augmented batches
57 |     augmented_batches = list(
58 |         add_ref_logprobs_to_data(mock_dataloader, mock_policy, mock_master_config)
59 |     )
60 | 
61 |     # Verify we got exactly one batch
62 |     assert len(augmented_batches) == 1
63 |     augmented_batch = augmented_batches[0]
64 | 
65 |     # Verify the original batch data is preserved
66 |     assert torch.equal(augmented_batch["input_ids"], mock_batch["input_ids"])
67 |     assert torch.equal(augmented_batch["attention_mask"], mock_batch["attention_mask"])
68 | 
69 |     # Verify the reference policy logprobs were added correctly
70 |     assert "reference_policy_logprobs" in augmented_batch
71 |     assert augmented_batch["reference_policy_logprobs"].shape == (batch_size, seq_len)
72 | 
73 |     # Verify the logprobs were rolled by -1 as expected
74 |     expected_logprobs = torch.roll(mock_logprobs, -1, dims=-1)
75 |     assert torch.equal(augmented_batch["reference_policy_logprobs"], expected_logprobs)
76 | 


--------------------------------------------------------------------------------
/tests/unit/data/hf_datasets/test_helpsteer.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | from nemo_rl.data.hf_datasets.helpsteer3 import (
17 |     HelpSteer3Dataset,
18 |     format_helpsteer3,
19 | )
20 | 
21 | 
22 | def test_format_helpsteer3():
23 |     """Test the format_helpsteer3 function with different preference values."""
24 |     # Test case 1: response1 is preferred (overall_preference < 0)
25 |     data1 = {
26 |         "context": "What is 2+2?",
27 |         "response1": "The answer is 4.",
28 |         "response2": "I don't know.",
29 |         "overall_preference": -1,
30 |     }
31 |     result1 = format_helpsteer3(data1)
32 |     assert result1["prompt"] == "What is 2+2?"
33 |     assert result1["chosen_response"] == "The answer is 4."
34 |     assert result1["rejected_response"] == "I don't know."
35 | 
36 |     # Test case 2: response2 is preferred (overall_preference > 0)
37 |     data2 = {
38 |         "context": "What is the capital of France?",
39 |         "response1": "The capital of France is London.",
40 |         "response2": "The capital of France is Paris.",
41 |         "overall_preference": 1,
42 |     }
43 |     result2 = format_helpsteer3(data2)
44 |     assert result2["prompt"] == "What is the capital of France?"
45 |     assert result2["chosen_response"] == "The capital of France is Paris."
46 |     assert result2["rejected_response"] == "The capital of France is London."
47 | 
48 |     # Test case 3: no preference (overall_preference = 0)
49 |     data3 = {
50 |         "context": "What is the weather like?",
51 |         "response1": "It's sunny today.",
52 |         "response2": "The weather is sunny.",
53 |         "overall_preference": 0,
54 |     }
55 |     result3 = format_helpsteer3(data3)
56 |     assert result3["prompt"] == "What is the weather like?"
57 |     # When preference is 0, neither response is preferred, so
58 |     # response 1 is used for both chosen and rejected
59 |     assert result3["chosen_response"] == "It's sunny today."
60 |     assert result3["rejected_response"] == "It's sunny today."
61 | 
62 | 
63 | def test_helpsteer3_dataset_initialization():
64 |     """Test that HelpSteer3Dataset initializes correctly."""
65 | 
66 |     dataset = HelpSteer3Dataset()
67 | 
68 |     # Verify dataset initialization
69 |     assert dataset.task_spec.task_name == "HelpSteer3"
70 | 
71 | 
72 | def test_helpsteer3_dataset_data_format():
73 |     """Test that HelpSteer3Dataset correctly formats the data."""
74 | 
75 |     dataset = HelpSteer3Dataset()
76 | 
77 |     assert isinstance(dataset.formatted_ds, dict)
78 |     assert "train" in dataset.formatted_ds
79 |     assert "validation" in dataset.formatted_ds
80 | 
81 |     # Verify data format
82 |     sample = dataset.formatted_ds["train"][0]
83 |     assert "prompt" in sample
84 |     assert "chosen_response" in sample
85 |     assert "rejected_response" in sample
86 | 


--------------------------------------------------------------------------------
/tests/unit/data/hf_datasets/test_squad.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import pytest
15 | from transformers import AutoTokenizer
16 | 
17 | from nemo_rl.data.hf_datasets.squad import SquadDataset
18 | 
19 | 
20 | @pytest.mark.skip(reason="dataset download is flaky")
21 | def test_squad_dataset():
22 |     tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct")
23 |     squad_dataset = SquadDataset()
24 | 
25 |     # check that the dataset is formatted correctly
26 |     for example in squad_dataset.formatted_ds["train"].take(5):
27 |         assert "messages" in example
28 |         assert len(example["messages"]) == 3
29 | 
30 |         assert example["messages"][0]["role"] == "system"
31 |         assert example["messages"][1]["role"] == "user"
32 |         assert example["messages"][2]["role"] == "assistant"
33 | 
34 |         template = "{% for message in messages %}{%- if message['role'] == 'system'  %}{{'Context: ' + message['content'].strip()}}{%- elif message['role'] == 'user'  %}{{' Question: ' + message['content'].strip() + ' Answer:'}}{%- elif message['role'] == 'assistant'  %}{{' ' + message['content'].strip()}}{%- endif %}{% endfor %}"
35 | 
36 |         ## check that applying chat template works as expected
37 |         default_templated = tokenizer.apply_chat_template(
38 |             example["messages"],
39 |             chat_template=template,
40 |             tokenize=False,
41 |             add_generation_prompt=False,
42 |             add_special_tokens=False,
43 |         )
44 | 
45 |         assert default_templated == (
46 |             "Context: "
47 |             + example["messages"][0]["content"]
48 |             + " Question: "
49 |             + example["messages"][1]["content"]
50 |             + " Answer: "
51 |             + example["messages"][2]["content"]
52 |         )
53 | 


--------------------------------------------------------------------------------
/tests/unit/data/test_data_processor.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import os
16 | import sys
17 | 
18 | from datasets import Dataset
19 | 
20 | abspath = os.path.abspath(__file__)
21 | sys.path.append("/".join(abspath.split("/")[:-4]))
22 | 
23 | from examples.run_grpo_math import math_data_processor
24 | from nemo_rl.algorithms.utils import get_tokenizer
25 | from nemo_rl.data.datasets import AllTaskProcessedDataset
26 | from nemo_rl.data.interfaces import TaskDataSpec
27 | from nemo_rl.models.policy import TokenizerConfig
28 | 
29 | basic_tokenizer_test_config: TokenizerConfig = {
30 |     "name": "Qwen/Qwen2.5-Math-1.5B-Instruct",
31 |     "chat_template": "default",
32 | }
33 | 
34 | 
35 | def test_math_data_processor():
36 |     raw_dataset = Dataset.from_list(
37 |         [
38 |             {"problem": "problem1", "expected_answer": "answer1"},
39 |             {"problem": "problem2", "expected_answer": "answer2"},
40 |         ]
41 |     )
42 | 
43 |     tokenizer = get_tokenizer(basic_tokenizer_test_config)
44 | 
45 |     math_task_spec = TaskDataSpec(
46 |         task_name="math",
47 |         prompt_file=None,
48 |         system_prompt_file=None,
49 |     )
50 | 
51 |     dataset = AllTaskProcessedDataset(
52 |         dataset=raw_dataset,
53 |         tokenizer=tokenizer,
54 |         default_task_data_spec=math_task_spec,
55 |         task_data_processors=math_data_processor,
56 |         max_seq_length=128,
57 |     )
58 | 
59 |     assert dataset[0]["extra_env_info"]["ground_truth"] == "answer1"
60 |     assert dataset[1]["extra_env_info"]["ground_truth"] == "answer2"
61 | 


--------------------------------------------------------------------------------
/tests/unit/distributed/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/tests/unit/distributed/test_collectives.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import torch
15 | 
16 | from nemo_rl.distributed.collectives import (
17 |     gather_jagged_object_lists,
18 |     rebalance_nd_tensor,
19 | )
20 | 
21 | 
22 | def run_rebalance_test(rank, world_size):
23 |     """Test function for rebalance_nd_tensor"""
24 |     # Create different sized tensors on each GPU
25 |     # Rank 0: batch size 3, Rank 1: batch size 5, Rank 2: batch size 2
26 |     batch_sizes = [3, 5, 2]
27 |     my_batch_size = batch_sizes[rank]
28 | 
29 |     tensor = torch.ones(
30 |         (my_batch_size, 4), dtype=torch.float32, device=f"cuda:{rank}"
31 |     ) * (rank + 1)
32 |     result = rebalance_nd_tensor(tensor)
33 | 
34 |     # Verify the shape is correct (sum of all batch sizes)
35 |     total_batch_size = sum(batch_sizes)
36 |     assert result.shape[0] == total_batch_size, (
37 |         f"Expected shape {total_batch_size}, got {result.shape[0]}"
38 |     )
39 |     assert result.shape[1:] == tensor.shape[1:], "Feature dimensions should match"
40 | 
41 | 
42 | def run_gather_test(rank, world_size):
43 |     """Test function for gather_jagged_object_lists"""
44 |     object_lists = [
45 |         ["obj0", "obj1"],  # rank 0: 2 objects
46 |         ["obj2", "obj3", "obj4"],  # rank 1: 3 objects
47 |         ["obj5"],  # rank 2: 1 object
48 |     ]
49 |     my_objects = object_lists[rank]
50 | 
51 |     result = gather_jagged_object_lists(my_objects)
52 | 
53 |     expected = ["obj0", "obj1", "obj2", "obj3", "obj4", "obj5"]
54 |     assert len(result) == len(expected), (
55 |         f"Expected {len(expected)} objects, got {len(result)}"
56 |     )
57 |     assert set(result) == set(expected), "All objects should be gathered"
58 | 
59 | 
60 | def test_rebalance_nd_tensor(distributed_test_runner):
61 |     """Test rebalance_nd_tensor by spawning multiple processes"""
62 |     distributed_test_runner(run_rebalance_test, world_size=3)
63 | 
64 | 
65 | def test_gather_jagged_object_lists(distributed_test_runner):
66 |     """Test gather_jagged_object_lists by spawning multiple processes"""
67 |     distributed_test_runner(run_gather_test, world_size=3)
68 | 


--------------------------------------------------------------------------------
/tests/unit/models/huggingface/test_common.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import pytest
16 | 
17 | from nemo_rl.models.huggingface.common import ModelFlag, is_gemma_model
18 | 
19 | 
20 | @pytest.mark.parametrize(
21 |     "model_name",
22 |     [
23 |         "google/gemma-2-2b",
24 |         "google/gemma-2-9b",
25 |         "google/gemma-2-27b",
26 |         "google/gemma-2-2b-it",
27 |         "google/gemma-2-9b-it",
28 |         "google/gemma-2-27b-it",
29 |         "google/gemma-3-1b-pt",
30 |         "google/gemma-3-4b-pt",
31 |         "google/gemma-3-12b-pt",
32 |         "google/gemma-3-27b-pt",
33 |         "google/gemma-3-1b-it",
34 |         "google/gemma-3-4b-it",
35 |         "google/gemma-3-12b-it",
36 |         "google/gemma-3-27b-it",
37 |     ],
38 | )
39 | def test_gemma_models(model_name):
40 |     assert is_gemma_model(model_name)
41 |     assert ModelFlag.SKIP_DTENSOR_TIED_WEIGHTS_CHECK.matches(model_name)
42 |     assert ModelFlag.VLLM_LOAD_FORMAT_AUTO.matches(model_name)
43 | 
44 | 
45 | @pytest.mark.parametrize(
46 |     "model_name",
47 |     [
48 |         "meta-llama/Llama-3.1-8B",
49 |         "meta-llama/Llama-3.1-8B-Instruct",
50 |         "Qwen/Qwen2.5-3B-Instruct",
51 |     ],
52 | )
53 | def test_non_gemma_models(model_name):
54 |     assert not is_gemma_model(model_name)
55 |     assert not ModelFlag.SKIP_DTENSOR_TIED_WEIGHTS_CHECK.matches(model_name)
56 |     assert not ModelFlag.VLLM_LOAD_FORMAT_AUTO.matches(model_name)
57 | 


--------------------------------------------------------------------------------
/tests/unit/test_meta.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | # This module tests things outside of any package (e.g., things in the root __init__.py)
16 | 
17 | import os
18 | 
19 | 
20 | def test_usage_stats_disabled_by_default():
21 |     assert os.environ["RAY_USAGE_STATS_ENABLED"] == "0", (
22 |         "Our dockerfile, slurm submission script and default environment setting when importing nemo rl should all disable usage stats collection. This failing is not expected."
23 |     )
24 | 
25 | 
26 | def test_usage_stats_disabled_in_tests():
27 |     assert os.environ["RAY_USAGE_STATS_ENABLED"] == "0", (
28 |         "Our dockerfile, slurm submission script and default environment setting when importing nemo rl should all disable usage stats collection. This failing is not expected."
29 |     )
30 | 


--------------------------------------------------------------------------------
/tests/unit/test_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | from typing import Any
15 | 
16 | import torch
17 | 
18 | from nemo_rl.algorithms.interfaces import LossType
19 | from nemo_rl.distributed.batched_data_dict import BatchedDataDict
20 | 
21 | 
22 | class SimpleLoss:
23 |     loss_type = LossType.SEQUENCE_LEVEL
24 | 
25 |     def __call__(
26 |         self,
27 |         next_token_logits: torch.Tensor,
28 |         data: BatchedDataDict,
29 |         global_valid_seqs: torch.Tensor | None,
30 |         global_valid_toks: torch.Tensor | None,
31 |     ) -> tuple[torch.Tensor, dict[str, Any]]:
32 |         # Just return mean of logprobs as the loss for testing
33 |         loss = next_token_logits.mean()
34 |         metrics = {
35 |             "test_metric": loss.item() * 0.5,
36 |             "num_valid_samples": 1,
37 |         }
38 |         return loss, metrics
39 | 
40 | 
41 | # Create a simple masked NLL loss function
42 | class SimpleNLLLoss:
43 |     loss_type = LossType.SEQUENCE_LEVEL
44 | 
45 |     def __call__(
46 |         self,
47 |         next_token_logits: torch.Tensor,
48 |         data: BatchedDataDict,
49 |         global_valid_seqs: torch.Tensor | None,
50 |         global_valid_toks: torch.Tensor | None,
51 |     ) -> tuple[torch.Tensor, dict[str, Any]]:
52 |         # logits shape: [batch_size, seq_len, vocab_size]
53 |         # Get the next token logits for each position
54 |         next_tokens = data["input_ids"][:, 1:].cuda()  # Skip first token
55 |         next_token_logprobs = torch.nn.functional.log_softmax(next_token_logits, dim=-1)
56 |         logprobs = next_token_logprobs[:, :-1]  # Remove last position's logits
57 | 
58 |         # Gather the logprobs for the actual next tokens
59 |         token_logprobs = logprobs.gather(
60 |             dim=-1, index=next_tokens.unsqueeze(-1)
61 |         ).squeeze(-1)
62 | 
63 |         # Only compute loss on generated tokens (not input tokens)
64 |         # by applying the token_loss_mask (shifted by 1 since we're predicting next tokens)
65 |         token_loss_mask = data["token_loss_mask"][:, 1:].cuda()
66 |         loss = -torch.sum(token_logprobs * token_loss_mask)
67 | 
68 |         return loss, {
69 |             "loss": loss.item(),
70 |             "num_valid_samples": 1,
71 |         }
72 | 


--------------------------------------------------------------------------------
/tests/unit/utils/test_pynvml.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import os
15 | from unittest.mock import patch
16 | 
17 | from nemo_rl.utils.nvml import (
18 |     device_id_to_physical_device_id,
19 |     get_device_uuid,
20 |     nvml_context,
21 | )
22 | 
23 | 
24 | @patch("nemo_rl.utils.nvml.pynvml")
25 | def test_nvml_context(mock_pynvml):
26 |     """Test that nvml_context initializes and shuts down NVML."""
27 |     with nvml_context():
28 |         pass
29 | 
30 |     # Verify init and shutdown were called
31 |     mock_pynvml.nvmlInit.assert_called_once()
32 |     mock_pynvml.nvmlShutdown.assert_called_once()
33 | 
34 | 
35 | def test_device_id_conversion():
36 |     """Test device ID conversion with and without CUDA_VISIBLE_DEVICES."""
37 |     with patch.dict(os.environ, {}, clear=True):
38 |         assert device_id_to_physical_device_id(0) == 0
39 | 
40 |     with patch.dict(os.environ, {"CUDA_VISIBLE_DEVICES": "2,3"}):
41 |         assert device_id_to_physical_device_id(0) == 2
42 |         assert device_id_to_physical_device_id(1) == 3
43 | 
44 | 
45 | @patch("nemo_rl.utils.nvml.device_id_to_physical_device_id")
46 | @patch("nemo_rl.utils.nvml.pynvml")
47 | def test_get_device_uuid(mock_pynvml, mock_convert_id):
48 |     """Test that get_device_uuid correctly retrieves a UUID."""
49 | 
50 |     # Setup
51 |     mock_convert_id.return_value = 1
52 |     mock_handle = mock_pynvml.nvmlDeviceGetHandleByIndex.return_value
53 |     mock_pynvml.nvmlDeviceGetUUID.return_value = b"GPU-12345"
54 | 
55 |     # Call function
56 |     uuid = get_device_uuid(0)
57 | 
58 |     # Verify
59 |     assert uuid == "GPU-12345"
60 |     mock_convert_id.assert_called_once_with(0)
61 |     mock_pynvml.nvmlDeviceGetHandleByIndex.assert_called_once_with(1)
62 | 


--------------------------------------------------------------------------------
/tests/unit/utils/test_venvs.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import os
15 | import subprocess
16 | from tempfile import TemporaryDirectory
17 | from unittest.mock import patch
18 | 
19 | from nemo_rl.utils.venvs import create_local_venv
20 | from tests.unit.conftest import TEST_ASSETS_DIR
21 | 
22 | 
23 | def test_create_local_venv():
24 |     # The temporary directory is created within the project.
25 |     # For some reason, creating a virtual environment outside of the project
26 |     # doesn't work reliably.
27 |     with TemporaryDirectory(dir=TEST_ASSETS_DIR) as tempdir:
28 |         # Mock os.environ to set NEMO_RL_VENV_DIR for this test
29 |         with patch.dict(os.environ, {"NEMO_RL_VENV_DIR": tempdir}):
30 |             venv_python = create_local_venv(
31 |                 py_executable="uv run --group docs", venv_name="test_venv"
32 |             )
33 |             assert os.path.exists(venv_python)
34 |             assert venv_python == f"{tempdir}/test_venv/bin/python"
35 |             # Check if sphinx package is installed in the created venv
36 | 
37 |             # Run a Python command to check if sphinx can be imported
38 |             result = subprocess.run(
39 |                 [
40 |                     venv_python,
41 |                     "-c",
42 |                     "import sphinx; print('Sphinx package is installed')",
43 |                 ],
44 |                 capture_output=True,
45 |                 text=True,
46 |             )
47 | 
48 |             # Verify the command executed successfully (return code 0)
49 |             assert result.returncode == 0, f"Failed to import sphinx: {result.stderr}"
50 |             assert "Sphinx package is installed" in result.stdout
51 | 


--------------------------------------------------------------------------------
/tools/autoformat.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | set -euo pipefail
16 | 
17 | GIT_VERSION=$(git version | awk '{print $3}')
18 | GIT_MAJOR=$(echo $GIT_VERSION | awk -F. '{print $1}')
19 | GIT_MINOR=$(echo $GIT_VERSION | awk -F. '{print $2}')
20 | 
21 | if [[ $GIT_MAJOR -eq 2 && $GIT_MINOR -lt 31 ]]; then
22 |     echo "Git version must be at least 2.31.0. Found $GIT_VERSION"
23 |     exit 1
24 | fi
25 | 
26 | SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)
27 | PACKAGE_ROOT=$(realpath $SCRIPT_DIR/../nemo_rl)
28 | 
29 | ruff check $PACKAGE_ROOT --fix
30 | ruff format $PACKAGE_ROOT


--------------------------------------------------------------------------------
/tools/code_snapshot.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -euo pipefail
 4 | 
 5 | SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)
 6 | PROJECT_ROOT=${SCRIPT_DIR}/..
 7 | cd ${PROJECT_ROOT}
 8 | 
 9 | echo2() {
10 |     echo "$@" >&2
11 | }
12 | 
13 | if [[ ! -e "$PROJECT_ROOT/.git" ]]; then
14 |   echo2 "[Error]: This script was not run from the root of NeMo RL git repo. Please clone it first."
15 |   exit 1
16 | elif [[ $# -lt 1 ]]; then
17 |   echo2 "[Error]: This script requires one argument: the name of the experiment to be used as the snapshot directory name"
18 |   echo2 "Usage: bash tools/code_snapshot.sh <experiment_name>"
19 |   exit 1
20 | fi
21 | 
22 | EXP_NAME=$1
23 | 
24 | SNAPSHOT_DIR="$PROJECT_ROOT/code_snapshots/${EXP_NAME}"
25 | if [[ ! -d "$SNAPSHOT_DIR" ]]; then
26 |   echo2 "Creating new code snapshot in $SNAPSHOT_DIR"
27 |   mkdir -p $SNAPSHOT_DIR
28 | else
29 |   echo2 "Using existing code snapshot in $SNAPSHOT_DIR"
30 |   # Echo the snapshot directory so the caller can use it to `cd` into it
31 |   echo ${SNAPSHOT_DIR}
32 |   exit
33 | fi
34 | 
35 | echo2 "Copying git-tracked files..."
36 | rsync -a --files-from=<(git ls-files) ./ $SNAPSHOT_DIR/
37 | 
38 | 
39 | # Echo the snapshot directory so the caller can use it to `cd` into it
40 | echo ${SNAPSHOT_DIR}


--------------------------------------------------------------------------------
/tools/copyright.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | # Files ending with .py should have Copyright notice in the first line.
17 | SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
18 | 
19 | # Move to the project root
20 | cd $SCRIPT_DIR/..
21 | find_files_with_missing_copyright() {
22 | find ./nemo_rl/ ./docs/*.py ./examples/ ./tests/ ./tools/ ./3rdparty/*/*.py -type f -name '*.py' | while read path; do
23 |     echo -en $path"\t"
24 |     head -2 $path | grep -iv 'coding=' | head -1
25 | done \
26 |    | egrep -iv 'Copyright.*NVIDIA CORPORATION.*All rights reserved.' \
27 |    | grep -iv 'BSD 3-Clause License' \
28 |    | grep -iv 'Copyright.*Microsoft' \
29 |    | grep -iv 'Copyright.*The Open AI Team' \
30 |    | grep -iv 'Copyright.*The Google AI' \
31 |    | grep -iv 'Copyright.*Facebook' | while read line; do
32 |      echo $line | cut -d' ' -f1
33 |    done
34 | }
35 | 
36 | 
37 | declare RESULT=($(find_files_with_missing_copyright))  # (..) = array
38 | 
39 | if [ "${#RESULT[@]}" -gt 0 ]; then
40 |    echo "Error: Found files with missing copyright:"
41 |    for (( i=0; i<"${#RESULT[@]}"; i++ )); do
42 |       echo "path= ${RESULT[$i]}"
43 |    done
44 |    cat <<EOF
45 | =====================
46 | = Example Copyright =
47 | =====================
48 | # Copyright (c) $(date +%Y), NVIDIA CORPORATION.  All rights reserved.
49 | #
50 | # Licensed under the Apache License, Version 2.0 (the "License");
51 | # you may not use this file except in compliance with the License.
52 | # You may obtain a copy of the License at
53 | #
54 | #     http://www.apache.org/licenses/LICENSE-2.0
55 | #
56 | # Unless required by applicable law or agreed to in writing, software
57 | # distributed under the License is distributed on an "AS IS" BASIS,
58 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
59 | # See the License for the specific language governing permissions and
60 | # limitations under the License.
61 | EOF
62 |    exit 1;
63 | else
64 |    echo "Ok: All files start with copyright notice"
65 | fi
66 | 


--------------------------------------------------------------------------------
/tools/find_available_port_ranges.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import socket
16 | 
17 | 
18 | def is_free(port):
19 |     with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
20 |         return s.connect_ex(("localhost", port)) != 0
21 | 
22 | 
23 | # Print header
24 | print("Size\tRange")
25 | print("-" * 20)
26 | 
27 | start = None
28 | for port in range(1024, 65536):
29 |     if is_free(port):
30 |         if start is None:
31 |             start = port
32 |     else:
33 |         if start is not None:
34 |             if start == port - 1:
35 |                 size = 1
36 |                 print(f"{size:4d}\t{start}")
37 |             else:
38 |                 size = port - start
39 |                 print(f"{size:4d}\t{start}-{port - 1}")
40 |             start = None
41 | 
42 | # If it ends on a free range, print it
43 | if start is not None:
44 |     if start == 65535:
45 |         size = 1
46 |         print(f"{size:4d}\t{start}")
47 |     else:
48 |         size = 65536 - start
49 |         print(f"{size:4d}\t{start}-65535")
50 | 


--------------------------------------------------------------------------------
/tools/package_release_runs.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # This script packages all release runs into a tarball with a git SHA so that we can upload to our
 4 | # release page. The SHA is to avoid conflicts with previous runs, but when we upload we should
 5 | # remove that so that users can expect that the name is release_runs.tar.gz (this renaming can be
 6 | # done in the Github Release UI).
 7 | 
 8 | SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
 9 | PROJECT_ROOT=$(realpath $SCRIPT_DIR/..)
10 | cd $PROJECT_ROOT
11 | 
12 | set -eou pipefail
13 | # Enable recursive globbing
14 | shopt -s globstar
15 | 
16 | OUTPUT_TAR="release_runs-$(git rev-parse --short HEAD).tar.gz"
17 | 
18 | TB_EVENTS=$(ls code_snapshots/*/tests/test_suites/**/logs/*/tensorboard/events* || true)
19 | 
20 | # Check if the glob expanded to any files
21 | if [ -z "$TB_EVENTS" ]; then
22 |     echo "Error: No tensorboard event files found matching the pattern."
23 |     exit 1
24 | elif [[ -f $OUTPUT_TAR ]]; then
25 |     echo "Error: $OUTPUT_TAR already exists. Clean it up before continuing."
26 |     exit 1
27 | fi
28 | 
29 | TMP_DIR=$(mktemp -d)
30 | echo "Created temporary directory: $TMP_DIR"
31 | 
32 | # Set up trap to clean up temporary directory on exit
33 | trap "echo 'Cleaning up temporary directory $TMP_DIR'; rm -rf $TMP_DIR" EXIT
34 | 
35 | # Loop over all the recipe runs and package them into a tarball
36 | for tbevent in $TB_EVENTS; do
37 |     exp_name=$(basename -- $(cut -d/ -f2 <<<$tbevent) -logs)
38 |     # Obfuscate the hostname
39 |     # events.out.tfevents.1744822578.<host-name>.780899.0
40 |     obfuscated_event_path=$(basename $tbevent | awk -F. '{print $1"."$2"."$3"."$4".HOSTNAME."$(NF-1)"."$NF}')
41 |     
42 |     # Create subdirectory for experiment if it doesn't exist
43 |     mkdir -p "$TMP_DIR/$exp_name"
44 |     
45 |     # Copy the event file with obfuscated name to the experiment subdirectory
46 |     cp "$tbevent" "$TMP_DIR/$exp_name/$obfuscated_event_path"
47 |     
48 |     echo "[$exp_name] Copied $tbevent to $TMP_DIR/$exp_name/$obfuscated_event_path"
49 | done
50 | 
51 | # Create a tarball of all the processed event files
52 | tar -czf "$OUTPUT_TAR" -C "$TMP_DIR" .
53 | echo "Created tarball: $OUTPUT_TAR"
54 | 


--------------------------------------------------------------------------------