├── .github
    ├── dependabot.yml
    └── workflows
    │   ├── dataset.yml
    │   ├── e2e_digit_completion.yml
    │   ├── e2e_gsm8k.yml
    │   ├── e2e_gsm8k_megatron.yml
    │   ├── e2e_lora.yml
    │   ├── e2e_sft.yml
    │   ├── model.yml
    │   ├── ray_test.yml
    │   ├── sandbox.yml
    │   ├── sanity.yml
    │   ├── scorecard.yml
    │   ├── vllm.yml
    │   └── yapf_format.yml
├── .gitignore
├── .readthedocs.yaml
├── .style.yapf
├── LICENSE
├── Notice.txt
├── Qwen2.5-Eval
    ├── README.md
    └── evaluation
    │   ├── LICENSE
    │   ├── data
    │       ├── aime24
    │       │   └── test.jsonl
    │       ├── aime24x8
    │       │   └── test.jsonl
    │       ├── aime25
    │       │   └── test.jsonl
    │       ├── aime25x8
    │       │   └── test.jsonl
    │       ├── amc23
    │       │   └── test.jsonl
    │       ├── amc23x8
    │       │   └── test.jsonl
    │       ├── aqua
    │       │   └── test.jsonl
    │       ├── asdiv
    │       │   └── test.jsonl
    │       ├── carp_en
    │       │   ├── demo.json
    │       │   └── test.jsonl
    │       ├── cmath
    │       │   └── test.jsonl
    │       ├── cn_middle_school
    │       │   └── test.jsonl
    │       ├── college_math
    │       │   └── test.jsonl
    │       ├── eval_rm_maj_example
    │       │   └── math_cot_100.jsonl
    │       ├── gaokao2023en
    │       │   └── test.jsonl
    │       ├── gaokao2024_I
    │       │   └── test.jsonl
    │       ├── gaokao2024_II
    │       │   └── test.jsonl
    │       ├── gaokao2024_mix
    │       │   └── test.jsonl
    │       ├── gaokao_math_cloze
    │       │   └── test.jsonl
    │       ├── gaokao_math_qa
    │       │   └── test.jsonl
    │       ├── gsm8k
    │       │   ├── test.jsonl
    │       │   └── train.jsonl
    │       ├── math
    │       │   ├── test.jsonl
    │       │   └── train.jsonl
    │       ├── math500
    │       │   └── test.jsonl
    │       ├── mawps
    │       │   ├── addsub.jsonl
    │       │   ├── multiarith.jsonl
    │       │   ├── singleeq.jsonl
    │       │   ├── singleop.jsonl
    │       │   └── test.jsonl
    │       ├── minerva_math
    │       │   ├── README.md
    │       │   └── test.jsonl
    │       ├── mmlu_stem
    │       │   └── test.jsonl
    │       ├── olympiadbench
    │       │   ├── test.json
    │       │   └── test.jsonl
    │       ├── phy1
    │       │   └── test.jsonl
    │       ├── sat_math
    │       │   └── test.jsonl
    │       ├── svamp
    │       │   └── test.jsonl
    │       └── tabmwp
    │       │   └── test.jsonl
    │   ├── data_loader.py
    │   ├── evaluate.py
    │   ├── examples.py
    │   ├── grader.py
    │   ├── latex2sympy
    │       ├── .coveragerc
    │       ├── .gitignore
    │       ├── LICENSE.txt
    │       ├── PS.g4
    │       ├── README.md
    │       ├── __init__.py
    │       ├── antlr-4.11.1-complete.jar
    │       ├── asciimath_printer.py
    │       ├── description.txt
    │       ├── dev-requirements.in
    │       ├── dev-requirements.txt
    │       ├── gen
    │       │   ├── PS.interp
    │       │   ├── PS.tokens
    │       │   ├── PSLexer.interp
    │       │   ├── PSLexer.py
    │       │   ├── PSLexer.tokens
    │       │   ├── PSListener.py
    │       │   ├── PSParser.py
    │       │   └── __init__.py
    │       ├── icon.png
    │       ├── latex2sympy2.py
    │       ├── requirements.in
    │       ├── requirements.txt
    │       ├── sandbox
    │       │   ├── linalg_equations.py
    │       │   ├── linalg_span.py
    │       │   ├── matrix.py
    │       │   ├── matrix_placeholders.py
    │       │   ├── sandbox.py
    │       │   ├── sandbox_equality.py
    │       │   ├── sectan.py
    │       │   └── vector.py
    │       ├── scripts
    │       │   ├── compile.sh
    │       │   ├── coverage-ci.sh
    │       │   ├── coverage.sh
    │       │   ├── pre-commit
    │       │   ├── pre-push
    │       │   ├── publish.sh
    │       │   ├── setup-hooks.sh
    │       │   ├── setup.sh
    │       │   └── test.sh
    │       ├── setup.cfg
    │       ├── setup.py
    │       └── tests
    │       │   ├── __init__.py
    │       │   ├── abs_test.py
    │       │   ├── all_bad_test.py
    │       │   ├── all_good_test.py
    │       │   ├── atom_expr_test.py
    │       │   ├── binomial_test.py
    │       │   ├── ceil_test.py
    │       │   ├── complex_test.py
    │       │   ├── context.py
    │       │   ├── exp_test.py
    │       │   ├── floor_test.py
    │       │   ├── gcd_test.py
    │       │   ├── greek_test.py
    │       │   ├── grouping_test.py
    │       │   ├── lcm_test.py
    │       │   ├── left_right_cdot_test.py
    │       │   ├── linalg_test.py
    │       │   ├── max_test.py
    │       │   ├── min_test.py
    │       │   ├── mod_test.py
    │       │   ├── overline_test.py
    │       │   ├── pi_test.py
    │       │   ├── trig_test.py
    │       │   └── variable_test.py
    │   ├── math_eval.py
    │   ├── math_utils.py
    │   ├── model_utils.py
    │   ├── parser.py
    │   ├── python_executor.py
    │   ├── requirements.txt
    │   ├── rm_maj_eval.py
    │   ├── sh
    │       ├── eval_all_math.sh
    │       └── eval_one_experiment_all_ckpts.sh
    │   ├── trajectory.py
    │   └── utils.py
├── README.md
├── data
    ├── acc_step_500.json
    ├── data_selection.py
    ├── data_selection.sh
    ├── deepscaler_dataset.py
    ├── test
    │   └── math500.parquet
    └── train
    │   └── one_shot_rlvr
    │       ├── dsr_sub.parquet
    │       ├── merge_pi1_pi13_r128.parquet
    │       ├── merge_pi1_pi2_pi13_pi1209_r128.parquet
    │       ├── pi1209_r128.parquet
    │       ├── pi13_r128.parquet
    │       ├── pi1_r128.parquet
    │       └── pi2_r128.parquet
├── docker
    ├── Dockerfile.ngc.vllm
    └── Dockerfile.vemlp.vllm.te
├── docs
    ├── Makefile
    ├── README.md
    ├── README_vllm0.7.md
    ├── _static
    │   └── logo.png
    ├── advance
    │   ├── dpo_extension.rst
    │   ├── fsdp_extension.rst
    │   ├── megatron_extension.rst
    │   └── placement.rst
    ├── conf.py
    ├── examples
    │   ├── config.rst
    │   ├── gsm8k_example.rst
    │   └── ppo_code_architecture.rst
    ├── experiment
    │   └── ppo.rst
    ├── faq
    │   └── faq.rst
    ├── hybrid_flow.rst
    ├── index.rst
    ├── perf
    │   └── perf_tuning.rst
    ├── preparation
    │   ├── prepare_data.rst
    │   └── reward_function.rst
    ├── requirements-docs.txt
    ├── start
    │   ├── install.rst
    │   └── quickstart.rst
    └── workers
    │   ├── fsdp_workers.rst
    │   ├── megatron_workers.rst
    │   └── ray_trainer.rst
├── examples
    ├── data_preprocess
    │   ├── aime_val_dataset.py
    │   ├── full_hh_rlhf.py
    │   ├── gsm8k.py
    │   ├── hellaswag.py
    │   └── math_dataset.py
    ├── generation
    │   └── run_deepseek_v2_lite_math.sh
    ├── grpo_trainer
    │   ├── run_deepseek7b_llm.sh
    │   ├── run_deepseek7b_llm_seq_balance.sh
    │   ├── run_qwen2-7b.sh
    │   └── run_qwen2-7b_seq_balance.sh
    ├── ppo_trainer
    │   ├── run_deepseek7b_llm.sh
    │   ├── run_deepseek7b_llm_sp2.sh
    │   ├── run_deepseek_full_hh_rlhf.sh
    │   ├── run_deepseek_math_gsm8k_megatron.sh
    │   ├── run_deepseek_megatron.sh
    │   ├── run_gemma.sh
    │   ├── run_qwen2-7b.sh
    │   ├── run_qwen2-7b_math_gsm8k_megatron.sh
    │   ├── run_qwen2-7b_rm.sh
    │   ├── run_qwen2-7b_rm_seq_balance.sh
    │   ├── run_qwen2-7b_seq_balance.sh
    │   ├── run_qwen2.5-32b.sh
    │   └── verl_getting_started.ipynb
    ├── ray
    │   └── tutorial.ipynb
    ├── remax_trainer
    │   ├── run_qwen2.5-3b_seq_balance.sh
    │   └── run_qwen2.5-7b_seq_balance.sh
    ├── sft
    │   └── gsm8k
    │   │   ├── run_deepseek_6b7.sh
    │   │   ├── run_gemma_2b.sh
    │   │   ├── run_gemma_7b.sh
    │   │   ├── run_qwen_05_peft.sh
    │   │   ├── run_qwen_05_sp2.sh
    │   │   └── run_qwen_05_sp2_liger.sh
    ├── slurm
    │   └── ray_on_slurm.slurm
    └── split_placement
    │   ├── README.md
    │   ├── config
    │       └── ppo_trainer_split.yaml
    │   ├── main_ppo_split.py
    │   ├── run_deepseek7b_llm.sh
    │   └── split_monkey_patch.py
├── patches
    └── megatron_v4.patch
├── pyproject.toml
├── requirements.txt
├── requirements_train.txt
├── scripts
    ├── format.sh
    ├── model_merger.py
    └── train
    │   ├── training_1.5b_dsr_sub.sh
    │   └── training_1.5b_pi1_r128.sh
├── setup.py
├── tests
    ├── __init__.py
    ├── checkpoint
    │   └── test_fsdp_ckpt.py
    ├── distro
    │   └── requirements.py
    ├── e2e
    │   ├── __init__.py
    │   ├── arithmetic_sequence
    │   │   ├── data
    │   │   │   ├── create_dataset.py
    │   │   │   ├── test.parquet
    │   │   │   └── train.parquet
    │   │   ├── model
    │   │   │   ├── config.json
    │   │   │   ├── create_model_tokenizer.py
    │   │   │   ├── generation_config.json
    │   │   │   ├── model.safetensors
    │   │   │   └── tokenizer_config.json
    │   │   └── rl
    │   │   │   ├── README.md
    │   │   │   └── main_trainer.py
    │   ├── check_results.py
    │   ├── envs
    │   │   ├── __init__.py
    │   │   └── digit_completion
    │   │   │   ├── __init__.py
    │   │   │   ├── task.py
    │   │   │   └── tokenizer.py
    │   ├── run_deepseek_megatron.sh
    │   ├── run_qwen_gsm8k_function_rm.sh
    │   ├── run_qwen_gsm8k_function_rm_grpo.sh
    │   ├── run_qwen_gsm8k_function_rm_no_rmpad.sh
    │   ├── run_qwen_gsm8k_function_rm_remax.sh
    │   ├── run_qwen_gsm8k_model_rm.sh
    │   ├── run_qwen_gsm8k_model_rm_liger_kernel.sh
    │   ├── run_qwen_gsm8k_model_rm_no_rmpad.sh
    │   ├── run_qwen_gsm8k_model_rm_seq_balance.sh
    │   ├── run_qwen_gsm8k_model_rm_ulysses.sh
    │   ├── run_qwen_megatron.sh
    │   ├── run_ray_trainer.sh
    │   └── run_ray_trainer_rmpad.sh
    ├── gpu_utility
    │   ├── test_memory_buffers.py
    │   ├── test_ops.py
    │   └── test_torch_functional.py
    ├── model
    │   ├── test_transformer.py
    │   └── test_transformers_ulysses.py
    ├── ray
    │   ├── check_worker_alive
    │   │   └── main.py
    │   ├── detached_worker
    │   │   ├── README.md
    │   │   ├── client.py
    │   │   ├── run.sh
    │   │   └── server.py
    │   ├── test_check_worker_alive.py
    │   ├── test_colocated_workers.py
    │   ├── test_data_transfer.py
    │   ├── test_driverfunc_to_worker.py
    │   ├── test_high_level_scheduling_api.py
    │   ├── test_ray_local_envs.py
    │   ├── test_rvdz.py
    │   ├── test_worker_group_basics.py
    │   └── test_worker_group_torch.py
    ├── rollout
    │   ├── run_fsdp_vllm.py
    │   ├── test_vllm_hf_loader.py
    │   └── test_vllm_spmd.py
    ├── sandbox
    │   └── test_sandbox.py
    ├── sanity
    │   ├── check_license.py
    │   └── test_import.py
    ├── sft
    │   ├── run_sft.sh
    │   ├── run_sft_qwen05_peft.sh
    │   ├── run_sft_qwen05_sp2_liger.sh
    │   ├── run_sft_sp_loss_match.sh
    │   └── test_sp_loss_match.py
    ├── utility
    │   └── test_tensor_dict_utilities.py
    └── verl
    │   └── utils
    │       └── dataset
    │           ├── test_rl_dataset.py
    │           ├── test_rm_dataset.py
    │           └── test_sft_dataset.py
└── verl
    ├── __init__.py
    ├── models
        ├── README.md
        ├── __init__.py
        ├── llama
        │   ├── __init__.py
        │   └── megatron
        │   │   ├── __init__.py
        │   │   ├── checkpoint_utils
        │   │       ├── __init__.py
        │   │       ├── llama_loader.py
        │   │       └── llama_saver.py
        │   │   ├── layers
        │   │       ├── __init__.py
        │   │       ├── parallel_attention.py
        │   │       ├── parallel_decoder.py
        │   │       ├── parallel_linear.py
        │   │       ├── parallel_mlp.py
        │   │       └── parallel_rmsnorm.py
        │   │   └── modeling_llama_megatron.py
        ├── qwen2
        │   ├── __init__.py
        │   └── megatron
        │   │   ├── __init__.py
        │   │   ├── checkpoint_utils
        │   │       ├── __init__.py
        │   │       ├── qwen2_loader.py
        │   │       └── qwen2_saver.py
        │   │   ├── layers
        │   │       ├── __init__.py
        │   │       ├── parallel_attention.py
        │   │       ├── parallel_decoder.py
        │   │       ├── parallel_linear.py
        │   │       ├── parallel_mlp.py
        │   │       └── parallel_rmsnorm.py
        │   │   └── modeling_qwen2_megatron.py
        ├── registry.py
        ├── transformers
        │   ├── __init__.py
        │   ├── llama.py
        │   ├── monkey_patch.py
        │   └── qwen2.py
        └── weight_loader_registry.py
    ├── protocol.py
    ├── single_controller
        ├── __init__.py
        ├── base
        │   ├── __init__.py
        │   ├── decorator.py
        │   ├── megatron
        │   │   ├── __init__.py
        │   │   ├── worker.py
        │   │   └── worker_group.py
        │   ├── register_center
        │   │   ├── __init__.py
        │   │   └── ray.py
        │   ├── worker.py
        │   └── worker_group.py
        └── ray
        │   ├── __init__.py
        │   ├── base.py
        │   └── megatron.py
    ├── third_party
        ├── __init__.py
        └── vllm
        │   ├── __init__.py
        │   ├── vllm_spmd
        │       ├── __init__.py
        │       └── dtensor_weight_loaders.py
        │   ├── vllm_v_0_3_1
        │       ├── __init__.py
        │       ├── arg_utils.py
        │       ├── config.py
        │       ├── llm.py
        │       ├── llm_engine_sp.py
        │       ├── model_loader.py
        │       ├── model_runner.py
        │       ├── parallel_state.py
        │       ├── tokenizer.py
        │       ├── weight_loaders.py
        │       └── worker.py
        │   ├── vllm_v_0_4_2
        │       ├── __init__.py
        │       ├── arg_utils.py
        │       ├── config.py
        │       ├── dtensor_weight_loaders.py
        │       ├── hf_weight_loader.py
        │       ├── llm.py
        │       ├── llm_engine_sp.py
        │       ├── megatron_weight_loaders.py
        │       ├── model_loader.py
        │       ├── model_runner.py
        │       ├── parallel_state.py
        │       ├── spmd_gpu_executor.py
        │       ├── tokenizer.py
        │       └── worker.py
        │   ├── vllm_v_0_5_4
        │       ├── __init__.py
        │       ├── arg_utils.py
        │       ├── config.py
        │       ├── dtensor_weight_loaders.py
        │       ├── hf_weight_loader.py
        │       ├── llm.py
        │       ├── llm_engine_sp.py
        │       ├── megatron_weight_loaders.py
        │       ├── model_loader.py
        │       ├── model_runner.py
        │       ├── parallel_state.py
        │       ├── spmd_gpu_executor.py
        │       ├── tokenizer.py
        │       └── worker.py
        │   └── vllm_v_0_6_3
        │       ├── __init__.py
        │       ├── arg_utils.py
        │       ├── config.py
        │       ├── dtensor_weight_loaders.py
        │       ├── hf_weight_loader.py
        │       ├── llm.py
        │       ├── llm_engine_sp.py
        │       ├── megatron_weight_loaders.py
        │       ├── model_loader.py
        │       ├── model_runner.py
        │       ├── parallel_state.py
        │       ├── spmd_gpu_executor.py
        │       ├── tokenizer.py
        │       └── worker.py
    ├── trainer
        ├── __init__.py
        ├── config
        │   ├── evaluation.yaml
        │   ├── generation.yaml
        │   ├── ppo_megatron_trainer.yaml
        │   ├── ppo_trainer.yaml
        │   └── sft_trainer.yaml
        ├── fsdp_sft_trainer.py
        ├── main_eval.py
        ├── main_evaluation_all_checkpoints.py
        ├── main_generation.py
        ├── main_ppo.py
        ├── ppo
        │   ├── __init__.py
        │   ├── core_algos.py
        │   └── ray_trainer.py
        └── runtime_env.yaml
    ├── utils
        ├── __init__.py
        ├── checkpoint
        │   ├── __init__.py
        │   ├── checkpoint_manager.py
        │   └── fsdp_checkpoint_manager.py
        ├── config.py
        ├── dataset
        │   ├── README.md
        │   ├── __init__.py
        │   ├── rl_dataset.py
        │   ├── rm_dataset.py
        │   └── sft_dataset.py
        ├── debug
        │   ├── __init__.py
        │   ├── performance.py
        │   └── trajectory_tracker.py
        ├── distributed.py
        ├── flops_counter.py
        ├── fs.py
        ├── fsdp_utils.py
        ├── hdfs_io.py
        ├── import_utils.py
        ├── logger
        │   ├── __init__.py
        │   └── aggregate_logger.py
        ├── logging_utils.py
        ├── megatron
        │   ├── __init__.py
        │   ├── memory.py
        │   ├── optimizer.py
        │   ├── optimizer_config.py
        │   ├── pipeline_parallel.py
        │   ├── sequence_parallel.py
        │   └── tensor_parallel.py
        ├── megatron_utils.py
        ├── memory_buffer.py
        ├── model.py
        ├── py_functional.py
        ├── ray_utils.py
        ├── rendezvous
        │   ├── __init__.py
        │   └── ray_backend.py
        ├── reward_score
        │   ├── __init__.py
        │   ├── deepscaler.py
        │   ├── gsm8k.py
        │   ├── math.py
        │   ├── prime_code
        │   │   ├── __init__.py
        │   │   ├── testing_util.py
        │   │   └── utils.py
        │   ├── prime_math
        │   │   ├── __init__.py
        │   │   ├── grader.py
        │   │   └── math_normalize.py
        │   └── utils
        │   │   └── utils.py
        ├── seqlen_balancing.py
        ├── tokenizer.py
        ├── torch_dtypes.py
        ├── torch_functional.py
        ├── tracking.py
        └── ulysses.py
    ├── version
        └── version
    └── workers
        ├── __init__.py
        ├── actor
            ├── __init__.py
            ├── base.py
            ├── dp_actor.py
            └── megatron_actor.py
        ├── critic
            ├── __init__.py
            ├── base.py
            ├── dp_critic.py
            └── megatron_critic.py
        ├── fsdp_workers.py
        ├── megatron_workers.py
        ├── reward_manager
            ├── __init__.py
            ├── naive.py
            └── prime.py
        ├── reward_model
            ├── __init__.py
            ├── base.py
            └── megatron
            │   ├── __init__.py
            │   └── reward_model.py
        ├── rollout
            ├── __init__.py
            ├── base.py
            ├── hf_rollout.py
            ├── naive
            │   ├── __init__.py
            │   └── naive_rollout.py
            ├── tokenizer.py
            └── vllm_rollout
            │   ├── __init__.py
            │   ├── vllm_rollout.py
            │   └── vllm_rollout_spmd.py
        └── sharding_manager
            ├── __init__.py
            ├── base.py
            ├── fsdp_ulysses.py
            ├── fsdp_vllm.py
            └── megatron_vllm.py


/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | ## Enabled the dependabot to check the dependencies of the project
2 | ## Dependabot will open pull requests to update dependencies automatically
3 | 
4 | version: 2
5 | updates:
6 |   - package-ecosystem: pip
7 |     directory: "/"
8 |     schedule:
9 |       interval: weekly


--------------------------------------------------------------------------------
/.github/workflows/dataset.yml:
--------------------------------------------------------------------------------
 1 | name: dataset
 2 | 
 3 | on:
 4 |   # Trigger the workflow on push or pull request,
 5 |   # but only for the main branch
 6 |   push:
 7 |     branches:
 8 |       - main
 9 |     paths:
10 |       - "**/*.py"
11 |       - .github/workflows/dataset.yml
12 |   pull_request:
13 |     branches:
14 |       - main
15 |     paths:
16 |       - "**/*.py"
17 |       - .github/workflows/dataset.yml
18 | 
19 | # Declare permissions just read content.
20 | permissions: 
21 |   contents: read
22 | 
23 | jobs:
24 |   ray:
25 |     runs-on: [self-hosted, gpu]
26 |     steps:
27 |       - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
28 |         with:
29 |             fetch-depth: 0
30 |       - name: Install the current repository
31 |         run: |
32 |           pip install -e .[test] --user
33 |       - name: Running dataset tests
34 |         run: |
35 |           [ ! -d "$HOME/verl-data" ] && git clone --depth 1 https://github.com/eric-haibin-lin/verl-data ~/verl-data
36 |           pytest -s -x tests/verl
37 |       - name: Running ray test using cupy (move it to L20 when dockerfile ready)
38 |         run: |
39 |           cd tests/ray
40 |           pytest -s -x test_rvdz.py


--------------------------------------------------------------------------------
/.github/workflows/e2e_digit_completion.yml:
--------------------------------------------------------------------------------
 1 | name: e2e_digit_completion
 2 | 
 3 | on:
 4 |   # Trigger the workflow on push or pull request,
 5 |   # but only for the main branch
 6 |   push:
 7 |     branches:
 8 |       - main
 9 |     paths:
10 |       - "**/*.py"
11 |       - .github/workflows/e2e_digit_completion.yml
12 |   pull_request:
13 |     branches:
14 |       - main
15 |     paths:
16 |       - "**/*.py"
17 |       - .github/workflows/e2e_digit_completion.yml
18 |       - "tests/e2e/*.sh"
19 | 
20 | # Declare permissions just read content.
21 | permissions: 
22 |   contents: read
23 | 
24 | jobs:
25 |   e2e_digit_completion:
26 |     runs-on: [self-hosted, l20-0]
27 |     env:
28 |       HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
29 |       HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
30 |       NO_PROXY: "localhost,127.0.0.1"
31 |       HF_HUB_ENABLE_HF_TRANSFER: 1
32 |     container:
33 |       image: verlai/verl:vemlp-th2.4.0-cu124-vllm0.6.3-ray2.10-te1.7-v0.0.3
34 |       options: --gpus all --shm-size=10g
35 |     steps:
36 |       - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
37 |         with:
38 |             fetch-depth: 0
39 |       - name: Install the current repository
40 |         run: |
41 |           pip3 install hf_transfer
42 |           pip3 install -e .[test]
43 |       - name: Running digit completon e2e training tests on 8 L20 GPUs
44 |         run: |
45 |           ray stop --force
46 |           bash tests/e2e/run_ray_trainer.sh
47 | 


--------------------------------------------------------------------------------
/.github/workflows/e2e_gsm8k_megatron.yml:
--------------------------------------------------------------------------------
 1 | name: e2e_gsm8k_megatron
 2 | 
 3 | on:
 4 |   # Trigger the workflow on push or pull request,
 5 |   # but only for the main branch
 6 |   push:
 7 |     branches:
 8 |       - main
 9 |     paths:
10 |       - "**/*.py"
11 |       - .github/workflows/e2e_gsm8k_megatron.yml
12 |   pull_request:
13 |     branches:
14 |       - main
15 |     paths:
16 |       - "**/*.py"
17 |       - .github/workflows/e2e_gsm8k_megatron.yml
18 |       - "tests/e2e/*.sh"
19 | 
20 | # Declare permissions just read content.
21 | permissions: 
22 |   contents: read
23 | 
24 | jobs:
25 |   e2e_gsm8k_megatron:
26 |     runs-on: [self-hosted, l20-0]
27 |     env:
28 |       HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
29 |       HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
30 |       NO_PROXY: "localhost,127.0.0.1"
31 |       HF_HUB_ENABLE_HF_TRANSFER: 1
32 |     container:
33 |       image: verlai/verl:vemlp-th2.4.0-cu124-vllm0.6.3-ray2.10-te1.7-v0.0.3
34 |       options: --gpus all --shm-size=10g
35 |     steps:
36 |       - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
37 |         with:
38 |             fetch-depth: 0
39 |       - name: Install the current repository
40 |         run: |
41 |           pip3 install hf_transfer
42 |           pip3 install -e .[test]
43 |       - name: Prepare gsm8k dataset
44 |         run: |
45 |           python3 examples/data_preprocess/gsm8k.py
46 |       - name: Running gsm8k e2e training tests on 8 L20 GPUs with Megatron (Deepseek)
47 |         run: |
48 |           ray stop --force
49 |           [ ! -d "$HOME/Megatron-LM" ] && git clone -b core_v0.4.0_verl https://github.com/eric-haibin-lin/Megatron-LM $HOME/Megatron-LM
50 |           export PYTHONPATH=$PYTHONPATH:$HOME/Megatron-LM
51 |           bash tests/e2e/run_deepseek_megatron.sh
52 |       - name: Running gsm8k e2e training tests on 8 L20 GPUs with Megatron (Qwen)
53 |         run: |
54 |           ray stop --force
55 |           export PYTHONPATH=$PYTHONPATH:$HOME/Megatron-LM
56 |           bash tests/e2e/run_qwen_megatron.sh


--------------------------------------------------------------------------------
/.github/workflows/e2e_lora.yml:
--------------------------------------------------------------------------------
 1 | name: e2e_lora
 2 | 
 3 | on:
 4 |   # Trigger the workflow on push or pull request,
 5 |   # but only for the main branch
 6 |   push:
 7 |     branches:
 8 |       - main
 9 |     paths:
10 |       - "**/*.py"
11 |       - .github/workflows/e2e_lora.yml
12 |   pull_request:
13 |     branches:
14 |       - main
15 |     paths:
16 |       - "**/*.py"
17 |       - .github/workflows/e2e_lora.yml
18 |       - "tests/e2e/*.sh"
19 | 
20 | # Declare permissions just read content.
21 | permissions: 
22 |   contents: read
23 | 
24 | jobs:
25 |   e2e_lora:
26 |     runs-on: [self-hosted, l20-1]
27 |     env:
28 |       HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
29 |       HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
30 |       NO_PROXY: "localhost,127.0.0.1"
31 |       HF_HUB_ENABLE_HF_TRANSFER: 1
32 |     container:
33 |       image: verlai/verl:vemlp-th2.4.0-cu124-vllm0.6.3-ray2.10-te1.7-v0.0.3
34 |       options: --gpus all --shm-size=10g
35 |     steps:
36 |       - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
37 |         with:
38 |             fetch-depth: 0
39 |       - name: Install the current repository
40 |         run: |
41 |           pip3 install hf_transfer peft
42 |           pip3 install -e .[test]
43 |       - name: Prepare gsm8k dataset
44 |         run: |
45 |           ray stop --force
46 |           python3 examples/data_preprocess/gsm8k.py
47 |       - name: Running gsm8k e2e training tests with LoRA
48 |         run: |
49 |           ray stop --force
50 |           bash tests/sft/run_sft_qwen05_peft.sh 8 $HOME/ckpts/
51 |           rm -rf $HOME/ckpts/*


--------------------------------------------------------------------------------
/.github/workflows/model.yml:
--------------------------------------------------------------------------------
 1 | name: model_rmpad
 2 | 
 3 | on:
 4 |   # Trigger the workflow on push or pull request,
 5 |   # but only for the main branch
 6 |   push:
 7 |     branches:
 8 |       - main
 9 |     paths:
10 |       - "**/*.py"
11 |       - .github/workflows/model.yml
12 |   pull_request:
13 |     branches:
14 |       - main
15 |     paths:
16 |       - "**/*.py"
17 |       - .github/workflows/model.yml
18 | 
19 | # Declare permissions just read content.
20 | permissions: 
21 |   contents: read
22 | 
23 | jobs:
24 |   model_rmpad:
25 |     runs-on: [self-hosted, l20-1]
26 |     env:
27 |       HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
28 |       HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
29 |       NO_PROXY: "localhost,127.0.0.1"
30 |       HF_HUB_ENABLE_HF_TRANSFER: 1
31 |     container:
32 |       image: verlai/verl:vemlp-th2.4.0-cu124-vllm0.6.3-ray2.10-te1.7-v0.0.3
33 |       options: --gpus all --shm-size=10g
34 |     steps:
35 |       - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
36 |         with:
37 |             fetch-depth: 0
38 |       - name: Install the current repository and upgrade to latest transformers/flash_attn
39 |         run: |
40 |           pip3 install -e .[test]
41 |           pip3 install --upgrade transformers
42 |       - name: Running rmpad model tests on 8 L20 GPUs + flash_attn 2.5.8
43 |         run: |
44 |           pytest -s tests/model/test_transformer.py
45 |       - name: Running rmpad model tests on 8 L20 GPUs + latest flash_attn
46 |         run: |
47 |           pip3 install --upgrade flash_attn --no-build-isolation
48 |           pytest -s tests/model/test_transformer.py
49 |       - name: Running rmpad model tests on 8 L20 GPUs + latest flash_attn
50 |         run: |
51 |           pip3 install hf_transfer
52 |           torchrun --nproc_per_node=8 tests/checkpoint/test_fsdp_ckpt.py
53 | 


--------------------------------------------------------------------------------
/.github/workflows/ray_test.yml:
--------------------------------------------------------------------------------
 1 | name: ray
 2 | 
 3 | on:
 4 |   # Trigger the workflow on push or pull request,
 5 |   # but only for the main branch
 6 |   push:
 7 |     branches:
 8 |       - main
 9 |     paths:
10 |       - "**/*.py"
11 |       - .github/workflows/ray_test.yml
12 |   pull_request:
13 |     branches:
14 |       - main
15 |     paths:
16 |       - "**/*.py"
17 |       - .github/workflows/ray_test.yml
18 | 
19 | # Declare permissions just read content.
20 | permissions: 
21 |   contents: read
22 | 
23 | jobs:
24 |   ray:
25 |     runs-on: [self-hosted, l20-0]
26 |     env:
27 |       HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
28 |       HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
29 |       NO_PROXY: "localhost,127.0.0.1"
30 |       HF_HUB_ENABLE_HF_TRANSFER: 1
31 |     container:
32 |       image: verlai/verl:vemlp-th2.4.0-cu124-vllm0.6.3-ray2.10-te1.7-v0.0.3
33 |       options: --gpus all --shm-size=10g
34 |     steps:
35 |       - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
36 |         with:
37 |             fetch-depth: 0
38 |       - name: Install the current repository
39 |         run: |
40 |           pip install hf_transfer
41 |           pip install -e .[test]
42 |           pip install --upgrade "ray>=2.40.0"
43 |       - name: Running ray tests that need 8 GPUs
44 |         run: |
45 |           cd tests/ray
46 |           pytest -s -x --ignore=test_check_worker_alive.py --ignore=test_rvdz.py .
47 | 


--------------------------------------------------------------------------------
/.github/workflows/sandbox.yml:
--------------------------------------------------------------------------------
 1 | name: sandbox
 2 | 
 3 | on:
 4 |   # Trigger the workflow on push or pull request,
 5 |   # but only for the main branch
 6 |   push:
 7 |     branches:
 8 |       - main
 9 |     paths:
10 |       - "**/*.py"
11 |       - .github/workflows/sandbox.yml
12 |   pull_request:
13 |     branches:
14 |       - main
15 |     paths:
16 |       - "**/*.py"
17 |       - .github/workflows/sandbox.yml
18 | 
19 | # Declare permissions just read content.
20 | permissions: 
21 |   contents: read
22 | 
23 | jobs:
24 |   sandbox:
25 |     runs-on: [self-hosted, l20-0]
26 |     env:
27 |       HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
28 |       HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
29 |       NO_PROXY: "localhost,127.0.0.1"
30 |       HF_HUB_ENABLE_HF_TRANSFER: 1
31 |     container:
32 |       image: verlai/verl:vemlp-th2.4.0-cu124-vllm0.6.3-ray2.10-te1.7-v0.0.3
33 |       options: --gpus all --shm-size=10g
34 |     steps:
35 |       - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
36 |         with:
37 |             fetch-depth: 0
38 |       - name: Install the current repository
39 |         run: |
40 |           pip3 install hf_transfer
41 |           pip3 install -e .[test,prime]
42 |           pip3 install vllm==0.5.4
43 |       - name: Running sandbox tests on 8 L20 GPUs
44 |         run: |
45 |           cd tests/sandbox
46 |           pytest -s -x .
47 | 


--------------------------------------------------------------------------------
/.github/workflows/sanity.yml:
--------------------------------------------------------------------------------
 1 | name: sanity
 2 | 
 3 | on:
 4 |   # Trigger the workflow on push or pull request,
 5 |   # but only for the main branch
 6 |   push:
 7 |     branches:
 8 |       - main
 9 |     paths:
10 |       - "**/*.py"
11 |       - .github/workflows/sanity.yml
12 |   pull_request:
13 |     branches:
14 |       - main
15 |     paths:
16 |       - "**/*.py"
17 |       - .github/workflows/sanity.yml
18 | 
19 | # Declare permissions just read content.
20 | permissions: 
21 |   contents: read
22 | 
23 | jobs:
24 |   sanity:
25 |     runs-on: ubuntu-latest
26 |     strategy:
27 |       matrix:
28 |         python-version: ["3.10"]
29 |     steps:
30 |       - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
31 |       - name: Set up Python ${{ matrix.python-version }}
32 |         uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
33 |         with:
34 |           python-version: ${{ matrix.python-version }}
35 |       - name: Install the current repository
36 |         run: |
37 |           pip install -e .[test]
38 |       - name: Run sanity test
39 |         run: |
40 |           pytest -s -x tests/sanity
41 |       - name: Run untility test
42 |         run: |
43 |           pytest -s -x tests/utility
44 |       - name: Run license test
45 |         run: |
46 |           python3 tests/sanity/check_license.py --directory .
47 |       - name: Run dependency test
48 |         run: |
49 |           pip install tomli
50 |           pytest -s -x tests/distro/requirements.py
51 | 


--------------------------------------------------------------------------------
/.github/workflows/vllm.yml:
--------------------------------------------------------------------------------
 1 | name: vllm
 2 | 
 3 | on:
 4 |   # Trigger the workflow on push or pull request,
 5 |   # but only for the main branch
 6 |   push:
 7 |     branches:
 8 |       - main
 9 |     paths:
10 |       - "**/*.py"
11 |       - .github/workflows/vllm.yml
12 |   pull_request:
13 |     branches:
14 |       - main
15 |     paths:
16 |       - "**/*.py"
17 |       - .github/workflows/vllm.yml
18 | 
19 | # Declare permissions just read content.
20 | permissions: 
21 |   contents: read
22 | 
23 | jobs:
24 |   vllm:
25 |     runs-on: [self-hosted, l20-0]
26 |     env:
27 |       HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
28 |       HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
29 |       NO_PROXY: "localhost,127.0.0.1"
30 |       HF_HUB_ENABLE_HF_TRANSFER: 1
31 |     container:
32 |       image: verlai/verl:vemlp-th2.4.0-cu124-vllm0.6.3-ray2.10-te1.7-v0.0.3
33 |       options: --gpus all --shm-size=10g
34 |     steps:
35 |       - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
36 |         with:
37 |             fetch-depth: 0
38 |       - name: Install the current repository
39 |         run: |
40 |           pip3 install hf_transfer
41 |           pip3 install -e .[test]
42 |           pip3 install vllm==0.5.4
43 |       - name: Running vllm tests on 8 L20 GPUs
44 |         run: |
45 |           cd tests/rollout
46 |           torchrun --standalone --nnodes=1 --nproc_per_node=8 $(which pytest) -s test_vllm_hf_loader.py
47 |       - name: Test the latest vLLM
48 |         run: |
49 |           pip3 install --upgrade vllm
50 |           cd tests/rollout
51 |           torchrun --standalone --nnodes=1 --nproc_per_node=4 $(which pytest) -s test_vllm_spmd.py
52 | 


--------------------------------------------------------------------------------
/.github/workflows/yapf_format.yml:
--------------------------------------------------------------------------------
 1 | name: yapf
 2 | 
 3 | on:
 4 |   # Trigger the workflow on push or pull request,
 5 |   # but only for the main branch
 6 |   push:
 7 |     branches:
 8 |       - main
 9 |     paths:
10 |       - "**/*.py"
11 |       - .github/workflows/yapf_format.yml
12 |   pull_request:
13 |     branches:
14 |       - main
15 |     paths:
16 |       - "**/*.py"
17 |       - .github/workflows/yapf_format.yml
18 | 
19 | # Declare permissions just read content.
20 | permissions: 
21 |   contents: read
22 | 
23 | jobs:
24 |   yapf:
25 |     runs-on: ubuntu-latest
26 |     strategy:
27 |       matrix:
28 |         python-version: ["3.12"]
29 |     steps:
30 |       - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
31 |       # - name: checkout
32 |       #   run: |
33 |       #     commits=${{ github.event.pull_request.commits }}
34 |       #     if [[ -n "$commits" ]]; then
35 |       #       # Prepare enough depth for diffs with main
36 |       #       git fetch --depth="$(( commits + 1 ))"
37 |       #     fi
38 |       - name: Set up Python ${{ matrix.python-version }}
39 |         uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
40 |         with:
41 |           python-version: ${{ matrix.python-version }}
42 |       - name: Install dependencies
43 |         run: |
44 |           python -m pip install --upgrade pip
45 |           pip install --upgrade yapf
46 |           pip install toml==0.10.2
47 |       - name: Running yapf
48 |         run: |
49 |           yapf -r -vv -d --style=./.style.yapf verl tests examples
50 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | **/*.pt
  2 | **/checkpoints
  3 | **/wget-log
  4 | **/_build/
  5 | **/*.ckpt
  6 | **/outputs
  7 | **/*.tar.gz
  8 | **/playground
  9 | **/wandb
 10 | /scores/
 11 | accuracy/*
 12 | !accuracy/acc_baseline/
 13 | !accuracy/acc_baseline/*
 14 | !checkpoints/*/*/eval/figures
 15 | 
 16 | # Byte-compiled / optimized / DLL files
 17 | __pycache__/
 18 | *.py[cod]
 19 | *$py.class
 20 | dataset/*
 21 | tensorflow/my_graph/*
 22 | .idea/
 23 | # C extensions
 24 | *.so
 25 | 
 26 | # Distribution / packaging
 27 | .Python
 28 | env/
 29 | build/
 30 | develop-eggs/
 31 | dist/
 32 | downloads/
 33 | eggs/
 34 | .eggs/
 35 | lib/
 36 | lib64/
 37 | parts/
 38 | sdist/
 39 | var/
 40 | *.egg-info/
 41 | .installed.cfg
 42 | *.egg
 43 | 
 44 | # PyInstaller
 45 | #  Usually these files are written by a python script from a template
 46 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 47 | *.manifest
 48 | *.spec
 49 | 
 50 | # Installer logs
 51 | pip-log.txt
 52 | pip-delete-this-directory.txt
 53 | 
 54 | # Unit test / coverage reports
 55 | htmlcov/
 56 | .tox/
 57 | .coverage
 58 | .coverage.*
 59 | .cache
 60 | nosetests.xml
 61 | coverage.xml
 62 | *,cover
 63 | .hypothesis/
 64 | 
 65 | # Translations
 66 | *.mo
 67 | *.pot
 68 | 
 69 | # Django stuff:
 70 | *.log
 71 | local_settings.py
 72 | 
 73 | # Flask stuff:
 74 | instance/
 75 | .webassets-cache
 76 | 
 77 | # Scrapy stuff:
 78 | .scrapy
 79 | 
 80 | # Sphinx documentation
 81 | docs/_build/
 82 | 
 83 | # PyBuilder
 84 | target/
 85 | 
 86 | # IPython Notebook
 87 | .ipynb_checkpoints
 88 | 
 89 | # pyenv
 90 | .python-version
 91 | 
 92 | # celery beat schedule file
 93 | celerybeat-schedule
 94 | 
 95 | # dotenv
 96 | .env
 97 | 
 98 | # virtualenv
 99 | venv/
100 | ENV/
101 | 
102 | # Spyder project settings
103 | .spyderproject
104 | 
105 | # Rope project settings
106 | .ropeproject
107 | 
108 | # vscode
109 | .vscode
110 | 
111 | # Mac
112 | .DS_Store
113 | 
114 | # output logs
115 | tests/e2e/toy_examples/deepspeed/synchronous/output.txt
116 | 
117 | # vim
118 | *.swp
119 | 
120 | # ckpt
121 | *.lock
122 | 
123 | 


--------------------------------------------------------------------------------
/.readthedocs.yaml:
--------------------------------------------------------------------------------
 1 | # Read the Docs configuration file
 2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
 3 | 
 4 | version: 2
 5 | 
 6 | build:
 7 |   os: ubuntu-22.04
 8 |   tools:
 9 |     python: "3.8"
10 | 
11 | sphinx:
12 |   configuration: docs/conf.py
13 | 
14 | python:
15 |   install:
16 |     - requirements: docs/requirements-docs.txt


--------------------------------------------------------------------------------
/.style.yapf:
--------------------------------------------------------------------------------
1 | [style]
2 | based_on_style = google
3 | column_limit = 120
4 | indent_width = 4
5 | split_arguments_when_comma_terminated: true


--------------------------------------------------------------------------------
/Notice.txt:
--------------------------------------------------------------------------------
1 | Copyright 2023-2024 Bytedance Ltd. and/or its affiliates 


--------------------------------------------------------------------------------
/Qwen2.5-Eval/evaluation/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 Zhibin Gou
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Qwen2.5-Eval/evaluation/data/minerva_math/README.md:
--------------------------------------------------------------------------------
1 | MIT OpenCourseWare:
2 |     - Solving Quantitative Reasoning Problems with Language Models. https://openreview.net/forum?id=IFXTZERXdM7
3 | 


--------------------------------------------------------------------------------
/Qwen2.5-Eval/evaluation/data/phy1/test.jsonl:
--------------------------------------------------------------------------------
1 | {"problem":"The pressure \\( P \\) exerted by wind on a sail varies jointly as the area \\( A \\) of the sail and the cube of the wind's velocity \\( V \\). When the velocity is \\( 8 \\) miles per hour, the pressure on a sail of \\( 2 \\) square feet is \\( 4 \\) pounds. Find the wind velocity when the pressure on \\( 4 \\) square feet of sail is \\( 32 \\) pounds. Let's think step by step and output the final answer within \\boxed{}.", "solution":"", "answer":"12.8", "url":"na", "question":"The pressure \\( P \\) exerted by wind on a sail varies jointly as the area \\( A \\) of the sail and the cube of the wind's velocity \\( V \\). When the velocity is \\( 8 \\) miles per hour, the pressure on a sail of \\( 2 \\) square feet is \\( 4 \\) pounds. Find the wind velocity when the pressure on \\( 4 \\) square feet of sail is \\( 32 \\) pounds. Let's think step by step and output the final answer within \\boxed{}."}
2 | {"problem":"Given that circle $C$ passes through points $P(0,-4)$, $Q(2,0)$, and $R(3,-1)$.  \n$(1)$ Find the equation of circle $C$.  \n$(2)$ If the line $l: mx+y-1=0$ intersects circle $C$ at points $A$ and $B$, and $|AB|=4$, find the value of $m$. Let's think step by step and output the final answer within \\boxed{}.", "solution":"", "answer":"\\frac{4}{3}", "url":"", "question":"Given that circle $C$ passes through points $P(0,-4)$, $Q(2,0)$, and $R(3,-1)$.  \n$(1)$ Find the equation of circle $C$.  \n$(2)$ If the line $l: mx+y-1=0$ intersects circle $C$ at points $A$ and $B$, and $|AB|=4$, find the value of $m$. Let's think step by step and output the final answer within \\boxed{}."}


--------------------------------------------------------------------------------
/Qwen2.5-Eval/evaluation/latex2sympy/.coveragerc:
--------------------------------------------------------------------------------
 1 | # .coveragerc to control coverage.py
 2 | [run]
 3 | branch = True
 4 | include =
 5 |     latex2sympy.py
 6 | omit = 
 7 |     sandbox/*
 8 |     gen/*
 9 |     asciimath_printer.py
10 |     setup.py
11 |     __init__.py
12 | 
13 | [report]
14 | # Regexes for lines to exclude from consideration
15 | exclude_lines =
16 |     # Have to re-enable the standard pragma
17 |     pragma: no cover
18 | 
19 |     # Don't complain about missing debug-only code:
20 |     def __repr__
21 |     if self\.debug
22 | 
23 |     # Don't complain if tests don't hit defensive assertion code:
24 |     raise AssertionError
25 |     raise NotImplementedError
26 | 
27 |     # Don't complain if non-runnable code isn't run:
28 |     if 0:
29 |     if __name__ == .__main__.:
30 | 
31 | ignore_errors = True
32 | 


--------------------------------------------------------------------------------
/Qwen2.5-Eval/evaluation/latex2sympy/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright 2016, latex2sympy
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Qwen2.5-Eval/evaluation/latex2sympy/__init__.py:
--------------------------------------------------------------------------------
1 | import latex2sympy


--------------------------------------------------------------------------------
/Qwen2.5-Eval/evaluation/latex2sympy/antlr-4.11.1-complete.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ypwang61/One-Shot-RLVR/b9d2e9a1cc32682b8ec77922fe4a236d75a7e9b0/Qwen2.5-Eval/evaluation/latex2sympy/antlr-4.11.1-complete.jar


--------------------------------------------------------------------------------
/Qwen2.5-Eval/evaluation/latex2sympy/asciimath_printer.py:
--------------------------------------------------------------------------------
 1 | from sympy.printing.str import StrPrinter
 2 | from sympy.core import S
 3 | 
 4 | class AsciiMathPrinter(StrPrinter):
 5 | 
 6 |     def _print_Limit(self, expr):
 7 |         e, z = expr.args
 8 | 
 9 |         return "lim_(%s -> %s) %s" % (self._print(z), self._print(z), self._print(e))
10 | 
11 |     def _print_Integral(self, expr):
12 |         e, lims = expr.args
13 |         if len(lims) > 1:
14 |             return "int_(%s)^(%s) %s d%s" % (self._print(lims[1]), self._print(lims[2]), self._print(e), self._print(lims[0]))
15 |         else:
16 |             return "int %s d%s" % (self._print(e), self._print(lims))
17 |     
18 |     def _print_Sum(self, expr):
19 |         e, lims = expr.args
20 |         return "sum_(%s = %s)^(%s) %s" % (self._print(lims[0]), self._print(lims[1]), self._print(lims[2]), self._print(e))
21 | 
22 |     def _print_Product(self, expr):
23 |         e, lims = expr.args
24 |         return "prod_(%s = %s)^(%s) %s" % (self._print(lims[0]), self._print(lims[1]), self._print(lims[2]), self._print(e))
25 | 
26 |     def _print_factorial(self, expr):
27 |         return "%s!" % self._print(expr.args[0])
28 | 
29 |     def _print_Derivative(self, expr):
30 |         e = expr.args[0]
31 |         wrt = expr.args[1]
32 |         return "d/d%s %s" % (self._print(wrt), self._print(e))
33 | 
34 |     def _print_Abs(self, expr):
35 |         return "|%s|" % self._print(expr.args[0])
36 | 
37 |     def _print_Equality(self, expr):
38 |         return "%s = %s" % (self._print(expr.args[0]), self._print(expr.args[1]))
39 | 
40 |     def _print_Pow(self, expr):
41 |         b = self._print(expr.base)
42 |         if expr.exp is S.Half:
43 |             return "sqrt(%s)" % b
44 | 
45 |         if -expr.exp is S.Half:
46 |             return "1/sqrt(%s)" % b
47 |         if expr.exp is -S.One:
48 |             return "1/%s" % b
49 | 
50 |         return "%s^(%s)" % (b, self._print(expr.exp)) 
51 | 


--------------------------------------------------------------------------------
/Qwen2.5-Eval/evaluation/latex2sympy/dev-requirements.in:
--------------------------------------------------------------------------------
1 | -r requirements.txt
2 | # Development
3 | pip-tools
4 | pytest
5 | pytest-cov
6 | pycodestyle
7 | autopep8
8 | -e .
9 | 


--------------------------------------------------------------------------------
/Qwen2.5-Eval/evaluation/latex2sympy/dev-requirements.txt:
--------------------------------------------------------------------------------
 1 | #
 2 | # This file is autogenerated by pip-compile with Python 3.10
 3 | # by the following command:
 4 | #
 5 | #    pip-compile dev-requirements.in
 6 | #
 7 |     # via -r dev-requirements.in
 8 | antlr4-python3-runtime==4.11.1
 9 |     # via
10 |     #   -r requirements.txt
11 |     #   latex2sympy2
12 | atomicwrites==1.3.0
13 |     # via pytest
14 | attrs==19.3.0
15 |     # via pytest
16 | autopep8==1.4.4
17 |     # via -r dev-requirements.in
18 | click==7.0
19 |     # via pip-tools
20 | coverage==4.5.4
21 |     # via pytest-cov
22 | more-itertools==7.2.0
23 |     # via pytest
24 | mpmath==1.3.0
25 |     # via
26 |     #   -r requirements.txt
27 |     #   sympy
28 | packaging==19.2
29 |     # via pytest
30 | pip-tools==4.2.0
31 |     # via -r dev-requirements.in
32 | pluggy==0.13.0
33 |     # via pytest
34 | py==1.8.0
35 |     # via pytest
36 | pycodestyle==2.5.0
37 |     # via
38 |     #   -r dev-requirements.in
39 |     #   autopep8
40 | pyparsing==2.4.4
41 |     # via packaging
42 | pytest==5.2.2
43 |     # via
44 |     #   -r dev-requirements.in
45 |     #   pytest-cov
46 | pytest-cov==2.8.1
47 |     # via -r dev-requirements.in
48 | six==1.13.0
49 |     # via
50 |     #   packaging
51 |     #   pip-tools
52 | sympy==1.12
53 |     # via
54 |     #   -r requirements.txt
55 |     #   latex2sympy2
56 | wcwidth==0.1.7
57 |     # via pytest
58 | 
59 | # THIS MUST BE MAINTAINED AS-IS
60 | -e .


--------------------------------------------------------------------------------
/Qwen2.5-Eval/evaluation/latex2sympy/gen/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ypwang61/One-Shot-RLVR/b9d2e9a1cc32682b8ec77922fe4a236d75a7e9b0/Qwen2.5-Eval/evaluation/latex2sympy/gen/__init__.py


--------------------------------------------------------------------------------
/Qwen2.5-Eval/evaluation/latex2sympy/icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ypwang61/One-Shot-RLVR/b9d2e9a1cc32682b8ec77922fe4a236d75a7e9b0/Qwen2.5-Eval/evaluation/latex2sympy/icon.png


--------------------------------------------------------------------------------
/Qwen2.5-Eval/evaluation/latex2sympy/requirements.in:
--------------------------------------------------------------------------------
1 | sympy
2 | antlr4-python3-runtime
3 | 


--------------------------------------------------------------------------------
/Qwen2.5-Eval/evaluation/latex2sympy/requirements.txt:
--------------------------------------------------------------------------------
 1 | #
 2 | # This file is autogenerated by pip-compile with Python 3.10
 3 | # by the following command:
 4 | #
 5 | #    pip-compile requirements.in
 6 | #
 7 | antlr4-python3-runtime==4.11.1
 8 |     # via -r requirements.in
 9 | mpmath==1.3.0
10 |     # via sympy
11 | sympy==1.12
12 |     # via -r requirements.in
13 | 


--------------------------------------------------------------------------------
/Qwen2.5-Eval/evaluation/latex2sympy/sandbox/linalg_equations.py:
--------------------------------------------------------------------------------
 1 | from latex2sympy import process_sympy
 2 | import sys
 3 | sys.path.append("..")
 4 | 
 5 | # latex = "2\\begin{pmatrix}1&1&1\\\\0&1&1\\\\0&0&1\\end{pmatrix}\\begin{pmatrix}1&1&1\\\\0&1&1\\\\0&0&1\\end{pmatrix}"
 6 | latex = "\\frac{a^{2} \\left(3 \\pi - 4 \\sin{\\left(\\pi \\right)} + \\frac{\\sin{\\left(2 \\pi \\right)}}{2}\\right)}{2}"
 7 | math = process_sympy(latex)
 8 | 
 9 | print(type(math))
10 | print("latex: %s to math: %s" % (latex, math))
11 | 


--------------------------------------------------------------------------------
/Qwen2.5-Eval/evaluation/latex2sympy/sandbox/linalg_span.py:
--------------------------------------------------------------------------------
 1 | from latex2sympy import process_sympy
 2 | import sys
 3 | sys.path.append("..")
 4 | 
 5 | latex = "\\begin{pmatrix}1\\\\2\\\\3\\end{pmatrix}"
 6 | math = process_sympy(latex)
 7 | print("latex: %s to math: %s" % (latex, math))
 8 | 
 9 | latex = "\\begin{pmatrix}1\\\\2\\\\3\\end{pmatrix},\\begin{pmatrix}4\\\\3\\\\1\\end{pmatrix}"
10 | math = process_sympy(latex)
11 | print("latex: %s to math: %s" % (latex, math))
12 | 
13 | latex = "[\\begin{pmatrix}1\\\\2\\\\3\\end{pmatrix},\\begin{pmatrix}4\\\\3\\\\1\\end{pmatrix}]"
14 | math = process_sympy(latex)
15 | print("latex: %s to math: %s" % (latex, math))
16 | 
17 | latex = "\\left\\{\\begin{pmatrix}1\\\\2\\\\3\\end{pmatrix},\\begin{pmatrix}4\\\\3\\\\1\\end{pmatrix}\\right\\}"
18 | math = process_sympy(latex)
19 | print("latex: %s to math: %s" % (latex, math))
20 | 


--------------------------------------------------------------------------------
/Qwen2.5-Eval/evaluation/latex2sympy/sandbox/matrix.py:
--------------------------------------------------------------------------------
 1 | from latex2sympy import process_sympy
 2 | from sympy import *
 3 | import sys
 4 | sys.path.append("..")
 5 | 
 6 | theta = Symbol('theta', real=True)
 7 | 
 8 | latex = "\\begin{matrix}1&2\\\\3&4\\end{matrix}"
 9 | math = process_sympy(latex)
10 | print("latex: %s to math: %s" % (latex, math))
11 | 
12 | latex = "\\begin{matrix}1&2\\\\3&4\\\\5&6\\end{matrix}"
13 | math = process_sympy(latex)
14 | print("latex: %s to math: %s" % (latex, math))
15 | 
16 | latex = "\\begin{matrix}1&2&3\\\\4&5&6\\\\7&8&9\\end{matrix}"
17 | math = process_sympy(latex)
18 | print("latex: %s to math: %s" % (latex, math))
19 | 
20 | latex = "\\begin{matrix}x^1&x^2&x^3\\\\y^1&y^2&y^3\\\\z^1&z^2&z^3\\end{matrix}"
21 | math = process_sympy(latex)
22 | print("latex: %s to math: %s" % (latex, math))
23 | 
24 | latex = "\\begin{matrix}x\\\\y\\end{matrix}"
25 | math = process_sympy(latex)
26 | print("latex: %s to math: %s" % (latex, math))
27 | 
28 | latex = "2\\cdot\\begin{matrix}x\\\\y\\end{matrix}"
29 | math = process_sympy(latex)
30 | print("latex: %s to math: %s" % (latex, math))
31 | 
32 | latex = "2\\cdot\\begin{matrix}x\\\\y\\end{matrix} + \\begin{matrix}2\\\\3\\end{matrix}"
33 | math = process_sympy(latex)
34 | print("latex: %s to math: %s" % (latex, math))
35 | 
36 | latex = "-2\\begin{matrix}1&2\\\\3&4\\end{matrix}"
37 | math = process_sympy(latex)
38 | print("latex: %s to math: %s" % (latex, math))
39 | 
40 | latex = "2\\cdot\\theta\\begin{matrix}x\\\\y\\end{matrix} + \\begin{matrix}2\\\\3\\end{matrix}"
41 | math = process_sympy(latex)
42 | print("latex: %s to math: %s" % (latex, math))
43 | 
44 | latex = "\\theta\\begin{matrix}1\\\\3\\end{matrix} - \\begin{matrix}-1\\\\2\\end{matrix}"
45 | math = process_sympy(latex)
46 | print("latex: %s to math: %s" % (latex, math))
47 | 


--------------------------------------------------------------------------------
/Qwen2.5-Eval/evaluation/latex2sympy/sandbox/sandbox.py:
--------------------------------------------------------------------------------
 1 | from sympy import *
 2 | from latex2sympy import process_sympy
 3 | 
 4 | 
 5 | # latex = '\\variable{a}^{\\variable{b}}'
 6 | # variables = {'a': process_sympy('658.95998'), 'b': process_sympy('185083.8060')}
 7 | # c_ans_expr = process_sympy(latex, variables)
 8 | # print(c_ans_expr)
 9 | # print(srepr(c_ans_expr))
10 | # c_ans = c_ans_expr.doit(deep=False).evalf(chop=True)
11 | # print(c_ans)
12 | # print(srepr(c_ans))
13 | 
14 | 
15 | # numeric_responses = ['1', '1.0', '-1', '-1.0', '.5', '-.5', '3x10^3', '3E3', '3,000x10^{-3}', '0.5E-1', '\\frac{1}{3}', '(5\\times 3)^3', '\\sin(1)']
16 | # for latex in numeric_responses:
17 | #     parsed = process_sympy(latex)
18 | #     print('latex: ', latex)
19 | #     print('sympy: ', parsed)
20 | #     print('is_number: ', parsed.is_number)
21 | #     print('is_Number: ', parsed.is_Number)
22 | #     print('srepr: ', srepr(parsed))
23 | #     print('-----------------------------------------------------')
24 | 


--------------------------------------------------------------------------------
/Qwen2.5-Eval/evaluation/latex2sympy/sandbox/sectan.py:
--------------------------------------------------------------------------------
 1 | from sympy import *
 2 | import sys
 3 | sys.path.append("..")
 4 | 
 5 | # # x^2\cdot \left(3\cdot \tan \left([!a!]\cdot x+[!c!]\right)+[!a!]\cdot x\left(\sec \left([!a!]\cdot x+[!c!]\right)\right)^2\right)
 6 | # latex1 = "x^2\\cdot \\left(3\\cdot \\tan \\left(2\\cdot x+5\\right)+2\\cdot x\\left(\\sec \\left(2\\cdot x+5\\right)\\right)^2\\right)"
 7 | # math1 = process_sympy(latex1)
 8 | # print("latex: %s to math: %s" %(latex1,math1))
 9 | #
10 | # latex2 = "x^2\\cdot \\left(3\\cdot \\tan \\left(2\\cdot x+5\\right)+2\\cdot x\\left(\\sec \\left(2\\cdot x+5\\right)^2\\right)\\right)"
11 | # math2 = process_sympy(latex2)
12 | # print("latex: %s to math: %s" %(latex2,math2))
13 | #
14 | # latex3 = "x^2\\cdot \\left(3\\cdot \\tan \\left(2\\cdot x+5\\right)+2\\cdot x\\left(1+\\tan \\left(2\\cdot x+5\\right)^2\\right)\\right)"
15 | # math3 = process_sympy(latex3)
16 | # print("latex: %s to math: %s" %(latex3,math3))
17 | #
18 | # print(simplify(math1 - math2))
19 | # print(simplify(math1 - math3))
20 | 
21 | #
22 | # latex1 = "\\sec^2(2\\cdot x+5)"
23 | # math1 = process_sympy(latex1)
24 | # print("latex: %s to math: %s" %(latex1,math1))
25 | #
26 | # latex2 = "1+\\tan^2(2\\cdot x+5)"
27 | # math2 = process_sympy(latex2)
28 | # print("latex: %s to math: %s" %(latex2,math2))
29 | # print(simplify(math1 - math2))
30 | 
31 | 
32 | x = Symbol('x', real=True)
33 | y = Symbol('y', real=True)
34 | 
35 | # BUG: 1 + tan^2(x+1) should be == sec^2(x+1) but isnt
36 | lhs = (1 + (tan(x + 1))**2)
37 | rhs = (sec(x + 1))**2
38 | eq = lhs - rhs
39 | print(simplify(lhs))
40 | print(simplify(rhs))
41 | print(simplify(eq))
42 | print(simplify(lhs) == simplify(rhs))
43 | 
44 | # 1 + tan^2(x) == sec^2(x) but isnt
45 | lhs = (1 + (tan(x))**2)
46 | rhs = (sec(x))**2
47 | eq = lhs - rhs
48 | print(simplify(lhs))
49 | print(simplify(rhs))
50 | print(simplify(eq))
51 | print(simplify(lhs) == simplify(rhs))
52 | 


--------------------------------------------------------------------------------
/Qwen2.5-Eval/evaluation/latex2sympy/sandbox/vector.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from sympy import *
 3 | import sys
 4 | sys.path.append("..")
 5 | 
 6 | # row column matrix = vector
 7 | v = [1, 2, 3]
 8 | 
 9 | # single column matrix = vector
10 | m = Matrix([1, 2, 3])
11 | print(m[:, 0])
12 | 
13 | # a three row and 2 column matrix
14 | m = Matrix([[1, 2], [3, 4], [5, 6]])
15 | print(m[:, 0])
16 | 
17 | # determinant of lin indp system != 0
18 | m = Matrix([[1, 1], [1, 2]])
19 | print(m.det())
20 | 
21 | # determinant of lin dep system = 0
22 | m = Matrix([[1, 1], [2, 2]])
23 | print(m.det())
24 | 
25 | # determinant of lin dep system = 0
26 | x = Symbol('x')
27 | y = Symbol('y')
28 | m = Matrix([[x, y], [x, y]])
29 | print(m.det())
30 | # Reduced Row-Echelon Form
31 | _, ind = m.rref()
32 | print(len(ind))
33 | 
34 | # determinant of lin dep system != 0
35 | m = Matrix([[x, y], [y, x]])
36 | print(m.det())
37 | # Reduced Row-Echelon Form
38 | _, ind = m.rref()
39 | print(len(ind))
40 | 
41 | # determinant of lin dep system != 0
42 | # Reduced Row-Echelon Form
43 | m = Matrix([[x, x, y], [y, y, y]])
44 | _, ind = m.rref()
45 | # Reduced Row-Echelon Form
46 | print(len(ind))
47 | 
48 | #==================#
49 | #===== Numpy ======#
50 | #==================#
51 | # http://kitchingroup.cheme.cmu.edu/blog/2013/03/01/Determining-linear-independence-of-a-set-of-vectors/
52 | # Lin Indp of set of numerical vectors
53 | TOLERANCE = 1e-14
54 | v1 = [6, 0, 3, 1, 4, 2]
55 | v2 = [0, -1, 2, 7, 0, 5]
56 | v3 = [12, 3, 0, -19, 8, -11]
57 | 
58 | A = np.row_stack([v1, v2, v3])
59 | 
60 | U, s, V = np.linalg.svd(A)
61 | print(s)
62 | print(np.sum(s > TOLERANCE))
63 | 
64 | v1 = [1, 1]
65 | v2 = [4, 4]
66 | 
67 | A = np.row_stack([v1, v2])
68 | U, s, V = np.linalg.svd(A)
69 | print(s)
70 | print(np.sum(s > TOLERANCE))
71 | 
72 | 
73 | latex = "\\begin{matrix}1&2\\\\3&4\\end{matrix}"
74 | # math = process_sympy(latex)
75 | print("latex: %s to math: %s" % (latex, 1))
76 | 


--------------------------------------------------------------------------------
/Qwen2.5-Eval/evaluation/latex2sympy/scripts/compile.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | # Get relative path of the root directory of the project
 4 | rdir=`git rev-parse --git-dir`
 5 | rel_path="$(dirname "$rdir")"
 6 | # Change to that path and run the file
 7 | cd $rel_path
 8 | 
 9 | java -jar antlr-4.11.1-complete.jar PS.g4 -o gen
10 | 


--------------------------------------------------------------------------------
/Qwen2.5-Eval/evaluation/latex2sympy/scripts/coverage-ci.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | 
3 | pytest --doctest-modules --junitxml=junit/test-results.xml --cov-report=xml --cov-config=.coveragerc --cov=latex2sympy tests


--------------------------------------------------------------------------------
/Qwen2.5-Eval/evaluation/latex2sympy/scripts/coverage.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | # Get relative path of the root directory of the project
 4 | rdir=`git rev-parse --git-dir`
 5 | rel_path="$(dirname "$rdir")"
 6 | # Change to that path and run the file
 7 | cd $rel_path
 8 | 
 9 | # Activate virtual environment
10 | echo "activating venv..."
11 | if test -f .env/bin/activate
12 | then source .env/bin/activate && echo "venv activate (bin)"
13 | elif test -f .env/Scripts/activate
14 | then source .env/Scripts/activate && echo "venv activated (Scripts)"
15 | else exit 1
16 | fi
17 | 
18 | # Run unit test coverage
19 | echo "starting coverage..."
20 | if pytest --doctest-modules --cov-report=html --cov-config=.coveragerc --cov=latex2sympy tests
21 | then echo "coverage finished"
22 | else exit 1
23 | fi
24 | 


--------------------------------------------------------------------------------
/Qwen2.5-Eval/evaluation/latex2sympy/scripts/pre-commit:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | # Get relative path of the root directory of the project
 4 | rdir=`git rev-parse --git-dir`
 5 | rel_path="$(dirname "$rdir")"
 6 | 
 7 | # Change to that path and run the file
 8 | cd $rel_path
 9 | 
10 | echo "pre-commit hook started..."
11 | 
12 | # Activate virtual environment
13 | echo "activating venv..."
14 | if test -f .env/bin/activate
15 | then source .env/bin/activate && echo "venv activated."
16 | elif test -f .env/Scripts/activate
17 | then source .env/Scripts/activate && echo "venv activated."
18 | else exit 1
19 | fi
20 | 
21 | # Run auto formatting on all staged python files, then add those changes
22 | echo "auto-formatting code..."
23 | if autopep8 --in-place `git diff --name-status --cached | grep '.py' | awk 'match($1, "A|M"){print $2}'` && git add `git diff --name-status --cached | grep '.py' | awk 'match($1, "A|M"){print $2}'`
24 | then echo "code was auto-formatted."
25 | else echo "no code was auto-formatted."
26 | fi
27 | 
28 | exit 0
29 | 


--------------------------------------------------------------------------------
/Qwen2.5-Eval/evaluation/latex2sympy/scripts/pre-push:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | # Get relative path of the root directory of the project
 4 | rdir=`git rev-parse --git-dir`
 5 | rel_path="$(dirname "$rdir")"
 6 | 
 7 | # Change to that path and run the file
 8 | cd $rel_path
 9 | 
10 | echo "pre-push hook started..."
11 | 
12 | # Activate virtual environment
13 | echo "activating venv..."
14 | if test -f .env/bin/activate
15 | then source .env/bin/activate && echo "venv activated."
16 | elif test -f .env/Scripts/activate
17 | then source .env/Scripts/activate && echo "venv activated."
18 | else exit 1
19 | fi
20 | 
21 | # Run unit tests
22 | echo "starting tests..."
23 | # if pytest tests
24 | # then echo "tests finished."
25 | # else exit 1
26 | # fi
27 | 
28 | exit 0
29 | 


--------------------------------------------------------------------------------
/Qwen2.5-Eval/evaluation/latex2sympy/scripts/publish.sh:
--------------------------------------------------------------------------------
1 | rm ./dist/*
2 | python3 setup.py bdist_wheel
3 | twine upload dist/*
4 | 


--------------------------------------------------------------------------------
/Qwen2.5-Eval/evaluation/latex2sympy/scripts/setup-hooks.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | cp scripts/pre-push .git/hooks/
3 | cp scripts/pre-commit .git/hooks/


--------------------------------------------------------------------------------
/Qwen2.5-Eval/evaluation/latex2sympy/scripts/setup.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | # Get relative path of the root directory of the project
 4 | rdir=`git rev-parse --git-dir`
 5 | rel_path="$(dirname "$rdir")"
 6 | # Change to that path and run the file
 7 | cd $rel_path
 8 | 
 9 | echo "creating venv..."
10 | if test -d .env
11 | then echo "venv exists"
12 | else python3 -m venv .env && echo "venv created"
13 | fi
14 | 
15 | echo ''
16 | # Activate virtual environment
17 | echo "activating venv..."
18 | if test -f .env/bin/activate
19 | then source .env/bin/activate && echo "venv activate (bin)"
20 | elif test -f .env/Scripts/activate
21 | then source .env/Scripts/activate && echo "venv activated (Scripts)"
22 | else exit 1
23 | fi
24 | 
25 | echo ''
26 | echo "installing requirements..."
27 | if pip install -r dev-requirements.txt
28 | then echo "requirements installed"
29 | else exit 1
30 | fi
31 | 
32 | echo ''
33 | echo "compiling parser..."
34 | sh scripts/compile.sh
35 | echo "parser compiled"
36 | 
37 | echo ''
38 | echo "setup git hooks..."
39 | sh scripts/setup-hooks.sh
40 | echo "git hooks setup"
41 | 
42 | exit 0
43 | 


--------------------------------------------------------------------------------
/Qwen2.5-Eval/evaluation/latex2sympy/scripts/test.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | # Get relative path of the root directory of the project
 4 | rdir=`git rev-parse --git-dir`
 5 | rel_path="$(dirname "$rdir")"
 6 | # Change to that path and run the file
 7 | cd $rel_path
 8 | 
 9 | # Activate virtual environment
10 | echo "activating venv..."
11 | if test -f .env/bin/activate
12 | then source .env/bin/activate && echo "venv activate (bin)"
13 | elif test -f .env/Scripts/activate
14 | then source .env/Scripts/activate && echo "venv activated (Scripts)"
15 | else exit 1
16 | fi
17 | 
18 | echo ''
19 | echo "compiling parser..."
20 | sh scripts/compile.sh
21 | echo "parser compiled"
22 | 
23 | echo ''
24 | # Run unit tests
25 | echo "starting tests..."
26 | if pytest tests
27 | then echo "tests finished"
28 | else exit 1
29 | fi
30 | 
31 | exit 0
32 | 


--------------------------------------------------------------------------------
/Qwen2.5-Eval/evaluation/latex2sympy/setup.cfg:
--------------------------------------------------------------------------------
1 | [pycodestyle]
2 | max-line-length = 120
3 | ignore = E501
4 | 


--------------------------------------------------------------------------------
/Qwen2.5-Eval/evaluation/latex2sympy/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | from codecs import open
 3 | from os import path
 4 | here = path.abspath(path.dirname(__file__))
 5 | 
 6 | 
 7 | setup(
 8 |     name="latex2sympy2",
 9 |     version="1.9.0",
10 |     description='Convert latex to sympy with ANTLR and support Matrix, Linear Algebra and CAS functions.',
11 |     long_description_content_type='text/markdown',
12 |     long_description=open(path.join(here, "README.md"), encoding='utf-8').read(),
13 |     # The project's main homepage.
14 |     url='https://github.com/ZubinGou/latex2sympy',
15 |     # Author details
16 |     author='ZubinGou',
17 |     author_email='zebgou@gmail.com',
18 |     # Choose your license
19 |     license='MIT',
20 |     classifiers=[
21 |         'Development Status :: 4 - Beta',
22 |         'Intended Audience :: Developers',
23 |         'Intended Audience :: Education',
24 |         'Intended Audience :: Science/Research',
25 |         'License :: OSI Approved :: MIT License',
26 |         'Topic :: Education',
27 |         'Topic :: Scientific/Engineering :: Mathematics',
28 |         'Topic :: Software Development :: Compilers',
29 |         'Topic :: Text Processing :: Markup :: LaTeX',
30 |         'Topic :: Text Processing :: Markup :: Markdown',
31 |         'Programming Language :: Python :: 3',
32 |         'Programming Language :: Python :: 3.3',
33 |         'Programming Language :: Python :: 3.4',
34 |         'Programming Language :: Python :: 3.5',
35 |         'Programming Language :: Python :: 3.6',
36 |         'Programming Language :: Python :: 3.7',
37 |         'Programming Language :: Python :: 3.8',
38 |     ],
39 |     packages=find_packages(exclude=('tests')),
40 |     py_modules=['asciimath_printer', 'latex2sympy2'],
41 |     install_requires=[
42 |         'sympy>=1.4',
43 |         'antlr4-python3-runtime==4.11.1'
44 |     ],
45 | )
46 | 


--------------------------------------------------------------------------------
/Qwen2.5-Eval/evaluation/latex2sympy/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ypwang61/One-Shot-RLVR/b9d2e9a1cc32682b8ec77922fe4a236d75a7e9b0/Qwen2.5-Eval/evaluation/latex2sympy/tests/__init__.py


--------------------------------------------------------------------------------
/Qwen2.5-Eval/evaluation/latex2sympy/tests/abs_test.py:
--------------------------------------------------------------------------------
 1 | from .context import assert_equal, get_simple_examples
 2 | import pytest
 3 | from sympy import Abs
 4 | 
 5 | examples = get_simple_examples(Abs)
 6 | 
 7 | delimiter_pairs = {
 8 |     '|': '|',
 9 |     '\\vert': '\\vert',
10 |     '\\lvert': '\\rvert'
11 | }
12 | 
13 | 
14 | @pytest.mark.parametrize('input, output, symbolically', examples)
15 | def test_abs(input, output, symbolically):
16 |     for left, right in delimiter_pairs.items():
17 |         assert_equal("{left}{input}{right}".format(left=left, right=right, input=input), output, symbolically=symbolically)
18 |         assert_equal("\\left{left}{input}\\right{right}".format(left=left, right=right, input=input), output, symbolically=symbolically)
19 |         assert_equal("\\mleft{left}{input}\\mright{right}".format(left=left, right=right, input=input), output, symbolically=symbolically)
20 | 


--------------------------------------------------------------------------------
/Qwen2.5-Eval/evaluation/latex2sympy/tests/binomial_test.py:
--------------------------------------------------------------------------------
 1 | from .context import assert_equal, _Add, _Mul, _Pow
 2 | import pytest
 3 | from sympy import binomial, Symbol
 4 | 
 5 | x = Symbol('x', real=True)
 6 | y = Symbol('y', real=True)
 7 | theta = Symbol('theta', real=True)
 8 | gamma = Symbol('gamma', real=True)
 9 | 
10 | 
11 | def test_binomial_numeric():
12 |     assert_equal("\\binom{16}{2}", binomial(16, 2))
13 | 
14 | 
15 | def test_binomial_symbols():
16 |     assert_equal("\\binom{x}{y}", binomial(x, y))
17 | 
18 | 
19 | def test_binomial_greek_symbols():
20 |     assert_equal("\\binom{\\theta}{\\gamma}", binomial(theta, gamma))
21 | 
22 | 
23 | def test_binomial_expr():
24 |     assert_equal("\\binom{16+2}{\\frac{4}{2}}", binomial(_Add(16, 2), _Mul(4, _Pow(2, -1)), evaluate=False))
25 | 
26 | 
27 | def test_choose_numeric():
28 |     assert_equal("\\choose{16}{2}", binomial(16, 2))
29 | 
30 | 
31 | def test_choose_symbols():
32 |     assert_equal("\\choose{x}{y}", binomial(x, y))
33 | 
34 | 
35 | def test_choose_greek_symbols():
36 |     assert_equal("\\choose{\\theta}{\\gamma}", binomial(theta, gamma))
37 | 


--------------------------------------------------------------------------------
/Qwen2.5-Eval/evaluation/latex2sympy/tests/ceil_test.py:
--------------------------------------------------------------------------------
 1 | from .context import assert_equal, get_simple_examples
 2 | import pytest
 3 | from sympy import ceiling
 4 | 
 5 | examples = get_simple_examples(ceiling)
 6 | 
 7 | 
 8 | @pytest.mark.parametrize('input, output, symbolically', examples)
 9 | def test_ceil_func(input, output, symbolically):
10 |     assert_equal("\\ceil({input})".format(input=input), output, symbolically=symbolically)
11 | 
12 | 
13 | @pytest.mark.parametrize('input, output, symbolically', examples)
14 | def test_ceil_operatorname(input, output, symbolically):
15 |     assert_equal("\\operatorname{{ceil}}({input})".format(input=input), output, symbolically=symbolically)
16 | 
17 | 
18 | @pytest.mark.parametrize('input, output, symbolically', examples)
19 | def test_ceil_cmd(input, output, symbolically):
20 |     assert_equal("\\lceil {input}\\rceil".format(input=input), output, symbolically=symbolically)
21 |     assert_equal("\\left\\lceil {input}\\right\\rceil".format(input=input), output, symbolically=symbolically)
22 |     assert_equal("\\mleft\\lceil {input}\\mright\\rceil".format(input=input), output, symbolically=symbolically)
23 | 
24 | 
25 | @pytest.mark.parametrize('input, output, symbolically', examples)
26 | def test_ceil_corners(input, output, symbolically):
27 |     assert_equal("\\ulcorner {input}\\urcorner".format(input=input), output, symbolically=symbolically)
28 |     assert_equal("\\left\\ulcorner {input}\\right\\urcorner".format(input=input), output, symbolically=symbolically)
29 |     assert_equal("\\mleft\\ulcorner {input}\\mright\\urcorner".format(input=input), output, symbolically=symbolically)
30 | 


--------------------------------------------------------------------------------
/Qwen2.5-Eval/evaluation/latex2sympy/tests/complex_test.py:
--------------------------------------------------------------------------------
 1 | from .context import assert_equal
 2 | import pytest
 3 | from sympy import Sum, I, Symbol, Integer
 4 | 
 5 | a = Symbol('a', real=True)
 6 | b = Symbol('b', real=True)
 7 | i = Symbol('i', real=True)
 8 | n = Symbol('n', real=True)
 9 | x = Symbol('x', real=True)
10 | 
11 | 
12 | def test_complex():
13 |     assert_equal("a+Ib", a + I * b)
14 | 
15 | 
16 | def test_complex_e():
17 |     assert_equal("e^{I\\pi}", Integer(-1))
18 | 
19 | 
20 | def test_complex_sum():
21 |     assert_equal("\\sum_{i=0}^{n} i \\cdot x", Sum(i * x, (i, 0, n)))
22 | 


--------------------------------------------------------------------------------
/Qwen2.5-Eval/evaluation/latex2sympy/tests/exp_test.py:
--------------------------------------------------------------------------------
 1 | from .context import assert_equal
 2 | import pytest
 3 | from sympy import exp, sin, Symbol, E
 4 | 
 5 | x = Symbol('x', real=True)
 6 | y = Symbol('y', real=True)
 7 | 
 8 | 
 9 | def test_exp_letter():
10 |     assert_equal("e", E)
11 |     assert_equal("e", exp(1))
12 | 
13 | 
14 | def test_exp_func():
15 |     assert_equal("\\exp(3)", exp(3))
16 | 
17 | 
18 | def test_exp_func_no_delim():
19 |     assert_equal("\\exp3", exp(3))
20 | 
21 | 
22 | def test_exp_command_symbol():
23 |     assert_equal("\\exponentialE", E)
24 |     assert_equal("\\exponentialE", exp(1))
25 | 
26 | 
27 | def test_exp_command_symbol_expression():
28 |     assert_equal("\\exponentialE^{3}", exp(3))
29 | 
30 | 
31 | def test_exp_command_symbol_multiplied():
32 |     '''
33 |     \\exponentialE is NOT a function, so using the following notation equates to multiplication
34 |     '''
35 |     assert_equal("\\exponentialE (3)", E * 3)
36 |     assert_equal("\\exponentialE \\left( 3\\right)", E * 3)
37 |     assert_equal("\\exponentialE \\times 3", E * 3)
38 | 
39 | 
40 | def test_exp_numeric():
41 |     assert_equal("e^3", exp(3))
42 | 
43 | 
44 | def test_exp_symbol():
45 |     assert_equal("e^x", exp(x))
46 | 
47 | 
48 | def test_exp_symbol_expr():
49 |     assert_equal("e^{x+y}", exp(x + y))
50 | 
51 | 
52 | def test_exp_symbol_expr_group():
53 |     assert_equal("e^{(x+y)}", exp(x + y))
54 | 
55 | 
56 | def test_exp_expr():
57 |     assert_equal("\\sin(x)*e^x", sin(x) * exp(x))
58 | 


--------------------------------------------------------------------------------
/Qwen2.5-Eval/evaluation/latex2sympy/tests/floor_test.py:
--------------------------------------------------------------------------------
 1 | from .context import assert_equal, get_simple_examples
 2 | import pytest
 3 | from sympy import floor
 4 | 
 5 | examples = get_simple_examples(floor)
 6 | 
 7 | 
 8 | @pytest.mark.parametrize('input, output, symbolically', examples)
 9 | def test_floor_func(input, output, symbolically):
10 |     assert_equal("\\floor({input})".format(input=input), output, symbolically=symbolically)
11 | 
12 | 
13 | @pytest.mark.parametrize('input, output, symbolically', examples)
14 | def test_floor_operatorname(input, output, symbolically):
15 |     assert_equal("\\operatorname{{floor}}({input})".format(input=input), output, symbolically=symbolically)
16 | 
17 | 
18 | @pytest.mark.parametrize('input, output, symbolically', examples)
19 | def test_floor_cmd(input, output, symbolically):
20 |     assert_equal("\\lfloor {input}\\rfloor".format(input=input), output, symbolically=symbolically)
21 |     assert_equal("\\left\\lfloor {input}\\right\\rfloor".format(input=input), output, symbolically=symbolically)
22 |     assert_equal("\\mleft\\lfloor {input}\\mright\\rfloor".format(input=input), output, symbolically=symbolically)
23 | 
24 | 
25 | @pytest.mark.parametrize('input, output, symbolically', examples)
26 | def test_floor_corners(input, output, symbolically):
27 |     assert_equal("\\llcorner {input}\\lrcorner".format(input=input), output, symbolically=symbolically)
28 |     assert_equal("\\left\\llcorner {input}\\right\\lrcorner".format(input=input), output, symbolically=symbolically)
29 |     assert_equal("\\mleft\\llcorner {input}\\mright\\lrcorner".format(input=input), output, symbolically=symbolically)
30 | 


--------------------------------------------------------------------------------
/Qwen2.5-Eval/evaluation/latex2sympy/tests/greek_test.py:
--------------------------------------------------------------------------------
 1 | from .context import assert_equal
 2 | import pytest
 3 | from sympy import Symbol
 4 | 
 5 | epsilon_upper = Symbol('char"000190', real=True)
 6 | epsilon_lower = Symbol('epsilon', real=True)
 7 | varepsilon = Symbol('varepsilon', real=True)
 8 | 
 9 | 
10 | def test_greek_epsilon():
11 |     assert_equal("\\epsilon", epsilon_lower)
12 | 
13 | 
14 | def test_greek_epsilon_upper():
15 |     assert_equal('\\char"000190', epsilon_upper)
16 | 
17 | 
18 | def test_greek_varepsilon():
19 |     assert_equal('\\varepsilon', varepsilon)
20 | 


--------------------------------------------------------------------------------
/Qwen2.5-Eval/evaluation/latex2sympy/tests/left_right_cdot_test.py:
--------------------------------------------------------------------------------
 1 | from .context import assert_equal
 2 | import pytest
 3 | from sympy import sin, Symbol
 4 | 
 5 | x = Symbol('x', real=True)
 6 | 
 7 | 
 8 | def test_left_right_cdot():
 9 |     assert_equal("\\sin\\left(x\\right)\\cdot x", sin(x) * x)
10 | 


--------------------------------------------------------------------------------
/Qwen2.5-Eval/evaluation/latex2sympy/tests/linalg_test.py:
--------------------------------------------------------------------------------
 1 | from .context import assert_equal
 2 | import pytest
 3 | from sympy import MatMul, Matrix
 4 | 
 5 | 
 6 | def test_linalg_placeholder():
 7 |     assert_equal("\\begin{pmatrix}1&2\\\\3&4\\end{pmatrix}\\cdot\\variable{v}", MatMul(Matrix([[1, 2], [3, 4]]), Matrix([1, 2])), {'v': Matrix([1, 2])})
 8 | 
 9 | 
10 | def test_linalg_placeholder_multiple():
11 |     assert_equal("\\variable{M}\\cdot\\variable{v}", MatMul(Matrix([[1, 2], [3, 4]]), Matrix([1, 2])), {'M': Matrix([[1, 2], [3, 4]]), 'v': Matrix([1, 2])})
12 | 
13 | 
14 | def test_linalg_placeholder_multiple_mul():
15 |     assert_equal("\\begin{pmatrix}3&-1\\end{pmatrix}\\cdot\\variable{M}\\cdot\\variable{v}", MatMul(Matrix([[3, -1]]), Matrix([[1, 2], [3, 4]]), Matrix([1, 2])), {'M': Matrix([[1, 2], [3, 4]]), 'v': Matrix([1, 2])})
16 | 


--------------------------------------------------------------------------------
/Qwen2.5-Eval/evaluation/latex2sympy/tests/overline_test.py:
--------------------------------------------------------------------------------
 1 | from .context import assert_equal
 2 | import pytest
 3 | from sympy import sin, Symbol
 4 | 
 5 | x = Symbol('x', real=True)
 6 | 
 7 | 
 8 | def test_overline():
 9 |     assert_equal("\\frac{\\sin(x)}{\\overline{x}_n}", sin(x) / Symbol('xbar_n', real=True))
10 | 


--------------------------------------------------------------------------------
/Qwen2.5-Eval/evaluation/latex2sympy/tests/pi_test.py:
--------------------------------------------------------------------------------
 1 | from .context import assert_equal, _Mul, _Pow
 2 | import pytest
 3 | from sympy import pi, Symbol, acos, cos
 4 | 
 5 | 
 6 | def test_pi_frac():
 7 |     assert_equal("\\frac{\\pi}{3}", _Mul(pi, _Pow(3, -1)))
 8 | 
 9 | 
10 | def test_pi_nested():
11 |     assert_equal("\\arccos{\\cos{\\frac{\\pi}{3}}}", acos(cos(_Mul(pi, _Pow(3, -1)), evaluate=False), evaluate=False))
12 | 
13 | 
14 | def test_pi_arccos():
15 |     assert_equal("\\arccos{-1}", pi, symbolically=True)
16 | 


--------------------------------------------------------------------------------
/Qwen2.5-Eval/evaluation/latex2sympy/tests/trig_test.py:
--------------------------------------------------------------------------------
 1 | from .context import assert_equal
 2 | import pytest
 3 | from sympy import asinh, Symbol
 4 | 
 5 | # x = Symbol('x', real=True);
 6 | 
 7 | # latex = "\\sinh(x)"
 8 | # math = process_sympy(latex)
 9 | # print("latex: %s to math: %s" %(latex,math))
10 | #
11 | # latex = "\\arcsinh(x)"
12 | # math = process_sympy(latex)
13 | # print("latex: %s to math: %s" %(latex,math))
14 | #
15 | # latex = "\\arsinh(x)"
16 | # math = process_sympy(latex)
17 | # print("latex: %s to math: %s" %(latex,math))
18 | 
19 | 
20 | def test_arcsinh():
21 |     assert_equal("\\operatorname{arcsinh}\\left(1\\right)", asinh(1, evaluate=False))
22 | 


--------------------------------------------------------------------------------
/Qwen2.5-Eval/evaluation/requirements.txt:
--------------------------------------------------------------------------------
 1 | # common
 2 | vllm
 3 | tqdm
 4 | datasets
 5 | torch
 6 | transformers
 7 | python_dateutil
 8 | flash_attn
 9 | 
10 | # math_eval
11 | sympy==1.12
12 | antlr4-python3-runtime==4.11.1 # ! The version needs to be compatible with sympy.
13 | word2number
14 | Pebble
15 | timeout-decorator


--------------------------------------------------------------------------------
/Qwen2.5-Eval/evaluation/sh/eval_one_experiment_all_ckpts.sh:
--------------------------------------------------------------------------------
 1 | # rm -rf sh/eval_checkpoint_yiping.sh; vim sh/eval_checkpoint_yiping.sh
 2 | PROMPT_TYPE="qwen25-math-cot"
 3 | export CUDA_VISIBLE_DEVICES="0,1,2,3"
 4 | MAX_TOKENS="3072"
 5 | 
 6 | # CHECKPOINTS_DIR=... # replace with your own path for storing checkpoints
 7 | 
 8 | 
 9 | 
10 | 
11 | ####### pi1 #######
12 | PROJECT_NAME="verl_few_shot"
13 | EXPERIMENT_NAME="Qwen2.5-Math-1.5B-pi1_r128"
14 | GLOBAL_STEP_LIST=($(seq 20 20 2000))
15 | 
16 | # # Loop through each step in the list
17 | for GLOBAL_STEP in "${GLOBAL_STEP_LIST[@]}"; do
18 |     echo "======== Evaluating checkpoint at global step: ${GLOBAL_STEP} ========"
19 |     MODEL_NAME_OR_PATH=${CHECKPOINTS_DIR}/${PROJECT_NAME}/${EXPERIMENT_NAME}/global_step_${GLOBAL_STEP}/actor
20 |     OUTPUT_DIR=${CHECKPOINTS_DIR}/${PROJECT_NAME}/${EXPERIMENT_NAME}/eval/global_step_${GLOBAL_STEP}
21 |     bash sh/eval_all_math.sh $PROMPT_TYPE $MODEL_NAME_OR_PATH $MAX_TOKENS $OUTPUT_DIR
22 | done
23 | 
24 | 
25 | ####### DSR-sub #######
26 | # PROJECT_NAME="verl_few_shot"
27 | # EXPERIMENT_NAME="Qwen2.5-Math-1.5B-dsr_sub"
28 | # GLOBAL_STEP_LIST=($(seq 20 20 2000))
29 | 
30 | # # # Loop through each step in the list
31 | # for GLOBAL_STEP in "${GLOBAL_STEP_LIST[@]}"; do
32 | #     echo "======== Evaluating checkpoint at global step: ${GLOBAL_STEP} ========"
33 | #     MODEL_NAME_OR_PATH=${CHECKPOINTS_DIR}/${PROJECT_NAME}/${EXPERIMENT_NAME}/global_step_${GLOBAL_STEP}/actor
34 | #     OUTPUT_DIR=${CHECKPOINTS_DIR}/${PROJECT_NAME}/${EXPERIMENT_NAME}/eval/global_step_${GLOBAL_STEP}
35 | #     bash sh/eval_all_math.sh $PROMPT_TYPE $MODEL_NAME_OR_PATH $MAX_TOKENS $OUTPUT_DIR
36 | # done
37 | 
38 | 
39 | 


--------------------------------------------------------------------------------
/data/data_selection.sh:
--------------------------------------------------------------------------------
 1 | 
 2 | # get pi_{i} -> top_index = i-1
 3 | python data_selection.py \
 4 |     --index_json_path acc_step_500.json \
 5 |     --data_dir train/one_shot_rlvr \
 6 |     --parquet_file_name dsr_sub.parquet \
 7 |     --repeat_time 128 \
 8 |     --top_index 0 \
 9 |     --method std \
10 |     --top_n 0


--------------------------------------------------------------------------------
/data/test/math500.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ypwang61/One-Shot-RLVR/b9d2e9a1cc32682b8ec77922fe4a236d75a7e9b0/data/test/math500.parquet


--------------------------------------------------------------------------------
/data/train/one_shot_rlvr/dsr_sub.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ypwang61/One-Shot-RLVR/b9d2e9a1cc32682b8ec77922fe4a236d75a7e9b0/data/train/one_shot_rlvr/dsr_sub.parquet


--------------------------------------------------------------------------------
/data/train/one_shot_rlvr/merge_pi1_pi13_r128.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ypwang61/One-Shot-RLVR/b9d2e9a1cc32682b8ec77922fe4a236d75a7e9b0/data/train/one_shot_rlvr/merge_pi1_pi13_r128.parquet


--------------------------------------------------------------------------------
/data/train/one_shot_rlvr/merge_pi1_pi2_pi13_pi1209_r128.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ypwang61/One-Shot-RLVR/b9d2e9a1cc32682b8ec77922fe4a236d75a7e9b0/data/train/one_shot_rlvr/merge_pi1_pi2_pi13_pi1209_r128.parquet


--------------------------------------------------------------------------------
/data/train/one_shot_rlvr/pi1209_r128.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ypwang61/One-Shot-RLVR/b9d2e9a1cc32682b8ec77922fe4a236d75a7e9b0/data/train/one_shot_rlvr/pi1209_r128.parquet


--------------------------------------------------------------------------------
/data/train/one_shot_rlvr/pi13_r128.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ypwang61/One-Shot-RLVR/b9d2e9a1cc32682b8ec77922fe4a236d75a7e9b0/data/train/one_shot_rlvr/pi13_r128.parquet


--------------------------------------------------------------------------------
/data/train/one_shot_rlvr/pi1_r128.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ypwang61/One-Shot-RLVR/b9d2e9a1cc32682b8ec77922fe4a236d75a7e9b0/data/train/one_shot_rlvr/pi1_r128.parquet


--------------------------------------------------------------------------------
/data/train/one_shot_rlvr/pi2_r128.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ypwang61/One-Shot-RLVR/b9d2e9a1cc32682b8ec77922fe4a236d75a7e9b0/data/train/one_shot_rlvr/pi2_r128.parquet


--------------------------------------------------------------------------------
/docker/Dockerfile.ngc.vllm:
--------------------------------------------------------------------------------
 1 | # docker buildx build --platform linux/x86_64 -t "verlai/verl:ngc-th2.4.0-cu124-vllm0.6.3-ray2.4-te1.7-v0.0.6" -f docker/Dockerfile.ngc.vllm . --builder cloud-verlai-verl-builder --progress=plain --push
 2 | FROM nvcr.io/nvidia/pytorch:24.05-py3
 3 | 
 4 | # uninstall nv-pytorch fork
 5 | RUN pip3 uninstall pytorch-quantization \
 6 |      pytorch-triton \
 7 |      torch \
 8 |      torch-tensorrt \
 9 |      torchvision \
10 |      xgboost transformer_engine flash_attn \
11 |      apex megatron-core -y
12 | 
13 | RUN pip3 install torch==2.4.0 torchvision==0.19.0 torchaudio==2.4.0 --index-url https://download.pytorch.org/whl/cu124
14 | 
15 | # =============== Megatron dependencies (optional) =================
16 | # install apex, set MAX_JOBS to avoid OOMs
17 | RUN MAX_JOBS=4 pip3 install -v --disable-pip-version-check --no-cache-dir --no-build-isolation \
18 |     --config-settings "--build-option=--cpp_ext" --config-settings "--build-option=--cuda_ext" \
19 |     git+https://github.com/NVIDIA/apex
20 | # =============== End of Megatron dependencies (optional) =================
21 | 
22 | RUN pip3 install --no-cache-dir \
23 |     accelerate \
24 |     codetiming \
25 |     datasets \
26 |     dill \
27 |     hydra-core \
28 |     numpy \
29 |     'pandas' \
30 |     'peft' \
31 |     'pyarrow>=15.0.0' \
32 |     'pybind11' \
33 |     'pylatexenc' \
34 |     'ray>=2.10' \
35 |     'tensordict<0.6' \
36 |     'transformers' \
37 |     'vllm==0.6.3.post1' \
38 |     'wandb'
39 | 
40 | # full dependencies
41 | RUN pip3 install pytest yapf py-spy pyext liger-kernel
42 | 
43 | # =============== Megatron dependencies (optional) =================
44 | # install Transformer Engine, which requires FA 2.5.8. Do it in a separate step for docker cache
45 | RUN MAX_JOBS=4 NINJA_FLAGS="-j4" pip3 install flash-attn==2.5.8 --no-cache-dir --no-build-isolation
46 | RUN MAX_JOBS=1 NINJA_FLAGS="-j1" TE_BUILD_WITH_NINJA=0 pip3 install git+https://github.com/eric-haibin-lin/TransformerEngine.git@v1.7.0
47 | # =============== End of Megatron dependencies (optional) =================
48 | 


--------------------------------------------------------------------------------
/docker/Dockerfile.vemlp.vllm.te:
--------------------------------------------------------------------------------
 1 | # docker buildx build --platform linux/x86_64 -t "verlai/verl:$TAG" -f docker/$FILE .
 2 | 
 3 | # the one in docker.io is an alias for the one veturbo
 4 | # FROM vemlp-cn-beijing.cr.volces.com/veturbo/pytorch:2.4-cu124
 5 | FROM docker.io/haibinlin/verl:v0.0.5-th2.4.0-cu124-base
 6 | 
 7 | # only config pip index with https://pypi.tuna.tsinghua.edu.cn/simple if needed
 8 | # unset for now
 9 | RUN pip3 config unset global.index-url
10 | 
11 | # transformers 4.47.0 contains the following bug:
12 | # AttributeError: 'Gemma2Attention' object has no attribute '_flash_attn_uses_top_left_mask'
13 | RUN pip3 install --no-cache-dir \
14 |     torch==2.4.0 \
15 |     accelerate \
16 |     codetiming \
17 |     dill \
18 |     hydra-core \
19 |     numpy \
20 |     pybind11 \
21 |     tensordict \
22 |     "transformers <= 4.46.0"
23 | 
24 | RUN pip3 install --no-cache-dir flash-attn==2.7.0.post2 --no-build-isolation
25 | 
26 | # vllm depends on ray, and veRL does not support ray > 2.37
27 | RUN pip3 install --no-cache-dir vllm==0.6.3 ray==2.10
28 | 
29 | # install apex
30 | RUN MAX_JOBS=4 pip3 install -v --disable-pip-version-check --no-cache-dir --no-build-isolation \
31 |     --config-settings "--build-option=--cpp_ext" --config-settings "--build-option=--cuda_ext" \
32 |     git+https://github.com/NVIDIA/apex
33 | 
34 | # install Transformer Engine
35 | # - flash-attn pinned to 2.5.3 by TransformerEngine, switch to eric-haibin-lin/TransformerEngine.git@v1.7.0 to relax version req
36 | # - install with: MAX_JOBS=1 NINJA_FLAGS="-j1" TE_BUILD_WITH_NINJA=0 to avoid OOM
37 | # - cudnn is required by TransformerEngine
38 | # RUN CUDNN_PATH=/opt/conda/lib/python3.11/site-packages/nvidia/cudnn \
39 | #     pip3 install git+https://github.com/eric-haibin-lin/TransformerEngine.git@v1.7.0
40 | RUN MAX_JOBS=1 NINJA_FLAGS="-j1" pip3 install flash-attn==2.5.3 --no-cache-dir --no-build-isolation
41 | RUN MAX_JOBS=1 NINJA_FLAGS="-j1" pip3 install git+https://github.com/NVIDIA/TransformerEngine.git@v1.7
42 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line.
 5 | SPHINXOPTS    =
 6 | SPHINXBUILD   = sphinx-build
 7 | SPHINXPROJ    = verl
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
 1 | # verl documents
 2 | 
 3 | ## Build the docs
 4 | 
 5 | ```bash
 6 | # Install dependencies.
 7 | pip install -r requirements-docs.txt
 8 | 
 9 | # Build the docs.
10 | make clean
11 | make html
12 | ```
13 | 
14 | ## Open the docs with your browser
15 | 
16 | ```bash
17 | python -m http.server -d _build/html/
18 | ```
19 | Launch your browser and open localhost:8000.


--------------------------------------------------------------------------------
/docs/_static/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ypwang61/One-Shot-RLVR/b9d2e9a1cc32682b8ec77922fe4a236d75a7e9b0/docs/_static/logo.png


--------------------------------------------------------------------------------
/docs/advance/megatron_extension.rst:
--------------------------------------------------------------------------------
 1 | Add models with the Megatron-LM backend
 2 | =========================================
 3 | 
 4 | Model
 5 | -----------
 6 | 
 7 | The most challenging aspect to use the Megatron-LM backend is implementing
 8 | the models for training. Currently, we implement Llama model that
 9 | support data parallelism, tensor parallelism, pipeline parallelism (also
10 | vPP) and sequence parallelism. We also implement remove padding (sequence packing) on Llama
11 | model, which can be found in `modeling_llama_megatron.py <https://github.com/volcengine/verl/blob/main/verl/models/llama/megatron/modeling_llama_megatron.py>`_.
12 | 
13 | To support other model, users are required to implement:
14 | 
15 | 1. Implemnt a model similar to ``modeling_llama_megatron.py`` that satisfy the
16 |    parallelism requirements of Megatron-LM. Then register your model in
17 |    the `registry.py <https://github.com/volcengine/verl/blob/main/verl/models/registry.py>`_.
18 | 2. Checkpoint utils that can load full checkpoint (e.g. huggingface
19 |    checkpoint) to partitioned models during the runtime. Then register
20 |    your loader to ``weight_loader_registry`` in `weight_loader_registry.py <https://github.com/volcengine/verl/blob/main/verl/models/weight_loader_registry.py>`_.
21 | 3. Weight loader that synchronize the weight from Megatron to rollout
22 |    (vLLM) model. Note that both the actor model and rollout model are
23 |    partitioned during runtime. So, it's advisable to map the model name
24 |    in actor model implementation. Otherwise, you may need an additional
25 |    name mapping and even weight transformation. The weight loader implementation
26 |    is in `megatron_weight_loaders.py <https://github.com/volcengine/verl/blob/main/verl/third_party/vllm/vllm_v_0_6_3/megatron_weight_loaders.py>`_.


--------------------------------------------------------------------------------
/docs/advance/placement.rst:
--------------------------------------------------------------------------------
 1 | Ray API Design Tutorial
 2 | =======================================
 3 | 
 4 | We provide a tutorial for our Ray API design, including:
 5 | 
 6 | - Ray basic concepts
 7 | - Resource Pool and RayWorkerGroup
 8 | - Data Dispatch, Execution and Collection
 9 | - Initialize the RayWorkerGroup and execute the distributed computation in the given Resource Pool
10 | 
11 | See details in `tutorial.ipynb <https://github.com/volcengine/verl/blob/main/examples/ray/tutorial.ipynb>`_.


--------------------------------------------------------------------------------
/docs/requirements-docs.txt:
--------------------------------------------------------------------------------
1 | # markdown suport
2 | recommonmark
3 | # markdown table suport
4 | sphinx-markdown-tables
5 | 
6 | # theme default rtd
7 | 
8 | # crate-docs-theme
9 | sphinx-rtd-theme


--------------------------------------------------------------------------------
/examples/generation/run_deepseek_v2_lite_math.sh:
--------------------------------------------------------------------------------
 1 | python3 -m verl.trainer.main_generation \
 2 |     trainer.nnodes=1 \
 3 |     trainer.n_gpus_per_node=8 \
 4 |     data.path=~/data/rlhf/gsm8k/test.parquet \
 5 |     data.prompt_key=prompt \
 6 |     data.n_samples=1 \
 7 |     data.output_path=~/data/rlhf/math/deepseek_v2_lite_gen_test.parquet \
 8 |     model.path=deepseek-ai/deepseek-llm-7b-chat \
 9 |     +model.trust_remote_code=True \
10 |     rollout.temperature=1.0 \
11 |     rollout.top_k=50 \
12 |     rollout.top_p=0.7 \
13 |     rollout.prompt_length=2048 \
14 |     rollout.response_length=1024 \
15 |     rollout.tensor_model_parallel_size=2 \
16 |     rollout.gpu_memory_utilization=0.8
17 | 


--------------------------------------------------------------------------------
/examples/grpo_trainer/run_deepseek7b_llm.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | python3 -m verl.trainer.main_ppo \
 4 |     algorithm.adv_estimator=grpo \
 5 |     data.train_files=$HOME/data/gsm8k/train.parquet \
 6 |     data.val_files=$HOME/data/gsm8k/test.parquet \
 7 |     data.train_batch_size=1024 \
 8 |     data.val_batch_size=1312 \
 9 |     data.max_prompt_length=512 \
10 |     data.max_response_length=1024 \
11 |     actor_rollout_ref.model.path=deepseek-ai/deepseek-llm-7b-chat \
12 |     actor_rollout_ref.actor.optim.lr=1e-6 \
13 |     actor_rollout_ref.model.use_remove_padding=True \
14 |     actor_rollout_ref.actor.ppo_mini_batch_size=256 \
15 |     actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=80 \
16 |     actor_rollout_ref.actor.use_kl_loss=True \
17 |     actor_rollout_ref.actor.kl_loss_coef=0.001 \
18 |     actor_rollout_ref.actor.kl_loss_type=low_var_kl \
19 |     actor_rollout_ref.model.enable_gradient_checkpointing=True \
20 |     actor_rollout_ref.actor.fsdp_config.param_offload=False \
21 |     actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \
22 |     actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=160 \
23 |     actor_rollout_ref.rollout.tensor_model_parallel_size=2 \
24 |     actor_rollout_ref.rollout.name=vllm \
25 |     actor_rollout_ref.rollout.gpu_memory_utilization=0.6 \
26 |     actor_rollout_ref.rollout.n=5 \
27 |     actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=160 \
28 |     actor_rollout_ref.ref.fsdp_config.param_offload=True \
29 |     algorithm.kl_ctrl.kl_coef=0.001 \
30 |     trainer.critic_warmup=0 \
31 |     trainer.logger=['console'] \
32 |     trainer.project_name='verl_grpo_example_gsm8k' \
33 |     trainer.experiment_name='deepseek_llm_7b_function_rm' \
34 |     trainer.n_gpus_per_node=8 \
35 |     trainer.nnodes=1 \
36 |     trainer.save_freq=-1 \
37 |     trainer.test_freq=5 \
38 |     trainer.total_epochs=15 $@


--------------------------------------------------------------------------------
/examples/grpo_trainer/run_deepseek7b_llm_seq_balance.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | python3 -m verl.trainer.main_ppo \
 4 |     algorithm.adv_estimator=grpo \
 5 |     data.train_files=$HOME/data/gsm8k/train.parquet \
 6 |     data.val_files=$HOME/data/gsm8k/test.parquet \
 7 |     data.train_batch_size=1024 \
 8 |     data.val_batch_size=1312 \
 9 |     data.max_prompt_length=512 \
10 |     data.max_response_length=512 \
11 |     actor_rollout_ref.model.path=deepseek-ai/deepseek-llm-7b-chat \
12 |     actor_rollout_ref.actor.optim.lr=1e-6 \
13 |     actor_rollout_ref.model.use_remove_padding=True \
14 |     actor_rollout_ref.actor.ppo_mini_batch_size=256 \
15 |     actor_rollout_ref.actor.use_dynamic_bsz=True \
16 |     actor_rollout_ref.actor.ppo_max_token_len_per_gpu=24000 \
17 |     actor_rollout_ref.actor.use_kl_loss=True \
18 |     actor_rollout_ref.actor.kl_loss_coef=0.001 \
19 |     actor_rollout_ref.actor.kl_loss_type=low_var_kl \
20 |     actor_rollout_ref.model.enable_gradient_checkpointing=True \
21 |     actor_rollout_ref.actor.fsdp_config.param_offload=False \
22 |     actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \
23 |     actor_rollout_ref.rollout.tensor_model_parallel_size=2 \
24 |     actor_rollout_ref.rollout.name=vllm \
25 |     actor_rollout_ref.rollout.gpu_memory_utilization=0.6 \
26 |     actor_rollout_ref.rollout.n=5 \
27 |     actor_rollout_ref.ref.fsdp_config.param_offload=True \
28 |     algorithm.kl_ctrl.kl_coef=0.001 \
29 |     trainer.critic_warmup=0 \
30 |     trainer.logger=['console','wandb'] \
31 |     trainer.project_name='verl_grpo_example_gsm8k' \
32 |     trainer.experiment_name='deepseek_llm_7b_function_rm_seq_packing' \
33 |     trainer.n_gpus_per_node=8 \
34 |     trainer.nnodes=1 \
35 |     trainer.save_freq=-1 \
36 |     trainer.test_freq=5 \
37 |     trainer.total_epochs=15 $@


--------------------------------------------------------------------------------
/examples/grpo_trainer/run_qwen2-7b.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | export VLLM_ATTENTION_BACKEND=XFORMERS
 4 | 
 5 | python3 -m verl.trainer.main_ppo \
 6 |     algorithm.adv_estimator=grpo \
 7 |     data.train_files=$HOME/data/gsm8k/train.parquet \
 8 |     data.val_files=$HOME/data/gsm8k/test.parquet \
 9 |     data.train_batch_size=1024 \
10 |     data.val_batch_size=1312 \
11 |     data.max_prompt_length=512 \
12 |     data.max_response_length=1024 \
13 |     actor_rollout_ref.model.path=Qwen/Qwen2-7B-Instruct \
14 |     actor_rollout_ref.actor.optim.lr=1e-6 \
15 |     actor_rollout_ref.model.use_remove_padding=True \
16 |     actor_rollout_ref.actor.ppo_mini_batch_size=256 \
17 |     actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=80 \
18 |     actor_rollout_ref.actor.use_kl_loss=True \
19 |     actor_rollout_ref.actor.kl_loss_coef=0.001 \
20 |     actor_rollout_ref.actor.kl_loss_type=low_var_kl \
21 |     actor_rollout_ref.model.enable_gradient_checkpointing=True \
22 |     actor_rollout_ref.actor.fsdp_config.param_offload=False \
23 |     actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \
24 |     actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=160 \
25 |     actor_rollout_ref.rollout.tensor_model_parallel_size=2 \
26 |     actor_rollout_ref.rollout.name=vllm \
27 |     actor_rollout_ref.rollout.gpu_memory_utilization=0.6 \
28 |     actor_rollout_ref.rollout.n=5 \
29 |     actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=160 \
30 |     actor_rollout_ref.ref.fsdp_config.param_offload=True \
31 |     algorithm.kl_ctrl.kl_coef=0.001 \
32 |     trainer.critic_warmup=0 \
33 |     trainer.logger=['console','wandb'] \
34 |     trainer.project_name='verl_grpo_example_gsm8k' \
35 |     trainer.experiment_name='qwen2_7b_function_rm' \
36 |     trainer.n_gpus_per_node=8 \
37 |     trainer.nnodes=1 \
38 |     trainer.save_freq=-1 \
39 |     trainer.test_freq=5 \
40 |     trainer.total_epochs=15 $@


--------------------------------------------------------------------------------
/examples/grpo_trainer/run_qwen2-7b_seq_balance.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | export VLLM_ATTENTION_BACKEND=XFORMERS
 4 | 
 5 | python3 -m verl.trainer.main_ppo \
 6 |     algorithm.adv_estimator=grpo \
 7 |     data.train_files=$HOME/data/gsm8k/train.parquet \
 8 |     data.val_files=$HOME/data/gsm8k/test.parquet \
 9 |     data.train_batch_size=1024 \
10 |     data.val_batch_size=1312 \
11 |     data.max_prompt_length=512 \
12 |     data.max_response_length=1024 \
13 |     actor_rollout_ref.model.path=Qwen/Qwen2-7B-Instruct \
14 |     actor_rollout_ref.actor.optim.lr=1e-6 \
15 |     actor_rollout_ref.model.use_remove_padding=True \
16 |     actor_rollout_ref.actor.ppo_mini_batch_size=256 \
17 |     actor_rollout_ref.actor.use_dynamic_bsz=True \
18 |     actor_rollout_ref.actor.ppo_max_token_len_per_gpu=24000 \
19 |     actor_rollout_ref.actor.use_kl_loss=True \
20 |     actor_rollout_ref.actor.kl_loss_coef=0.001 \
21 |     actor_rollout_ref.actor.kl_loss_type=low_var_kl \
22 |     actor_rollout_ref.model.enable_gradient_checkpointing=True \
23 |     actor_rollout_ref.actor.fsdp_config.param_offload=False \
24 |     actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \
25 |     actor_rollout_ref.rollout.tensor_model_parallel_size=2 \
26 |     actor_rollout_ref.rollout.name=vllm \
27 |     actor_rollout_ref.rollout.gpu_memory_utilization=0.6 \
28 |     actor_rollout_ref.rollout.n=5 \
29 |     actor_rollout_ref.ref.fsdp_config.param_offload=True \
30 |     algorithm.kl_ctrl.kl_coef=0.001 \
31 |     trainer.critic_warmup=0 \
32 |     trainer.logger=['console','wandb'] \
33 |     trainer.project_name='verl_grpo_example_gsm8k' \
34 |     trainer.experiment_name='qwen2_7b_function_rm_kl1e-3' \
35 |     +trainer.val_before_train=False \
36 |     trainer.n_gpus_per_node=8 \
37 |     trainer.nnodes=1 \
38 |     trainer.save_freq=-1 \
39 |     trainer.test_freq=5 \
40 |     trainer.total_epochs=15 $@


--------------------------------------------------------------------------------
/examples/ppo_trainer/run_deepseek7b_llm.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | python3 -m verl.trainer.main_ppo \
 4 |     data.train_files=$HOME/data/gsm8k/train.parquet \
 5 |     data.val_files=$HOME/data/gsm8k/test.parquet \
 6 |     data.train_batch_size=1024 \
 7 |     data.val_batch_size=1312 \
 8 |     data.max_prompt_length=512 \
 9 |     data.max_response_length=512 \
10 |     actor_rollout_ref.model.path=deepseek-ai/deepseek-llm-7b-chat \
11 |     actor_rollout_ref.actor.optim.lr=1e-6 \
12 |     actor_rollout_ref.model.use_remove_padding=True \
13 |     actor_rollout_ref.actor.ppo_mini_batch_size=256 \
14 |     actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=16 \
15 |     actor_rollout_ref.actor.fsdp_config.param_offload=False \
16 |     actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \
17 |     actor_rollout_ref.model.enable_gradient_checkpointing=True \
18 |     actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=32 \
19 |     actor_rollout_ref.rollout.tensor_model_parallel_size=4 \
20 |     actor_rollout_ref.rollout.name=vllm \
21 |     actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \
22 |     actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=32 \
23 |     actor_rollout_ref.ref.fsdp_config.param_offload=True \
24 |     critic.optim.lr=1e-5 \
25 |     critic.model.use_remove_padding=True \
26 |     critic.model.path=deepseek-ai/deepseek-llm-7b-chat \
27 |     critic.model.enable_gradient_checkpointing=True \
28 |     critic.ppo_micro_batch_size_per_gpu=32 \
29 |     critic.model.fsdp_config.param_offload=False \
30 |     critic.model.fsdp_config.optimizer_offload=False \
31 |     algorithm.kl_ctrl.kl_coef=0.001 \
32 |     trainer.critic_warmup=0 \
33 |     trainer.logger=['console','wandb'] \
34 |     trainer.project_name='verl_example_gsm8k' \
35 |     trainer.experiment_name='deepseek_llm_7b_function_rm' \
36 |     trainer.n_gpus_per_node=8 \
37 |     trainer.nnodes=1 \
38 |     trainer.save_freq=-1 \
39 |     trainer.test_freq=1 \
40 |     trainer.total_epochs=15 $@
41 | 


--------------------------------------------------------------------------------
/examples/ppo_trainer/run_deepseek_full_hh_rlhf.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | train_files=$HOME/data/full_hh_rlhf/rl/train.parquet
 4 | test_files=$HOME/data/full_hh_rlhf/rl/train.parquet # no use
 5 | 
 6 | python3 -m verl.trainer.main_ppo --config-path=./config --config-name='ppo_megatron_trainer'\
 7 |     data.train_files="$train_files" \
 8 |     data.val_files="$test_files" \
 9 |     data.train_batch_size=512 \
10 |     data.val_batch_size=128 \
11 |     data.max_prompt_length=128 \
12 |     data.max_response_length=128 \
13 |     actor_rollout_ref.model.path=deepseek-ai/deepseek-llm-7b-chat \
14 |     actor_rollout_ref.actor.optim.lr=1e-6 \
15 |     actor_rollout_ref.actor.ppo_mini_batch_size=128 \
16 |     actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=4 \
17 |     actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=4 \
18 |     actor_rollout_ref.rollout.tensor_model_parallel_size=4 \
19 |     actor_rollout_ref.rollout.name=vllm \
20 |     actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \
21 |     actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=4 \
22 |     actor_rollout_ref.ref.param_offload=False \
23 |     critic.optim.lr=1e-5 \
24 |     critic.model.path=deepseek-ai/deepseek-llm-7b-chat \
25 |     critic.model.enable_gradient_checkpointing=False \
26 |     critic.ppo_micro_batch_size_per_gpu=4 \
27 |     reward_model.enable=True \
28 |     reward_model.megatron.tensor_model_parallel_size=4 \
29 |     reward_model.model.path=deepseek-ai/deepseek-llm-7b-chat \
30 |     reward_model.micro_batch_size_per_gpu=4 \
31 |     reward_model.param_offload=False \
32 |     algorithm.kl_ctrl.kl_coef=0.001 \
33 |     trainer.critic_warmup=0 \
34 |     trainer.logger=['console','wandb'] \
35 |     trainer.project_name='verl_megatron_full_hh_rlhf_examples' \
36 |     trainer.experiment_name='deepseek_llm_7b_model_rm' \
37 |     trainer.n_gpus_per_node=8 \
38 |     trainer.nnodes=1 \
39 |     trainer.save_freq=-1 \
40 |     trainer.test_freq=5 \
41 |     trainer.total_epochs=100 $@
42 | 


--------------------------------------------------------------------------------
/examples/ppo_trainer/run_deepseek_math_gsm8k_megatron.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | gsm8k_train_path=$HOME/data/gsm8k/train.parquet
 4 | gsm8k_test_path=$HOME/data/gsm8k/test.parquet
 5 | math_train_path=$HOME/data/math/train.parquet
 6 | math_test_path=$HOME/data/math/test.parquet
 7 | 
 8 | train_files="['$gsm8k_train_path', '$math_train_path']"
 9 | test_files="['$gsm8k_test_path', '$math_test_path']"
10 | 
11 | python3 -m verl.trainer.main_ppo --config-path=./config --config-name='ppo_megatron_trainer'\
12 |     data.train_files="$train_files" \
13 |     data.val_files="$test_files" \
14 |     data.train_batch_size=1024 \
15 |     data.val_batch_size=6312 \
16 |     data.max_prompt_length=1024 \
17 |     data.max_response_length=512 \
18 |     actor_rollout_ref.model.path=deepseek-ai/deepseek-coder-6.7b-instruct \
19 |     actor_rollout_ref.actor.optim.lr=1e-6 \
20 |     actor_rollout_ref.actor.ppo_mini_batch_size=256 \
21 |     actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=4 \
22 |     actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=4 \
23 |     actor_rollout_ref.rollout.tensor_model_parallel_size=4 \
24 |     actor_rollout_ref.rollout.name=vllm \
25 |     actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \
26 |     actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=4 \
27 |     critic.optim.lr=1e-5 \
28 |     critic.model.path=deepseek-ai/deepseek-coder-6.7b-instruct \
29 |     critic.model.enable_gradient_checkpointing=False \
30 |     critic.ppo_micro_batch_size_per_gpu=4 \
31 |     algorithm.kl_ctrl.kl_coef=0.001 \
32 |     trainer.critic_warmup=0 \
33 |     trainer.logger=['console','wandb'] \
34 |     trainer.project_name='verl_megatron_math_gsm8k_examples' \
35 |     trainer.experiment_name='deepseek_llm_7b_function_rm' \
36 |     trainer.n_gpus_per_node=8 \
37 |     trainer.nnodes=1 \
38 |     trainer.save_freq=-1 \
39 |     trainer.test_freq=5 \
40 |     trainer.total_epochs=100 $@
41 | 


--------------------------------------------------------------------------------
/examples/ppo_trainer/run_gemma.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | python3 -m verl.trainer.main_ppo \
 4 |     data.train_files=$HOME/data/gsm8k/train.parquet \
 5 |     data.val_files=$HOME/data/gsm8k/test.parquet \
 6 |     data.train_batch_size=512 \
 7 |     data.val_batch_size=1312 \
 8 |     data.max_prompt_length=1024 \
 9 |     data.max_response_length=512 \
10 |     actor_rollout_ref.model.path=google/gemma-2-2b-it \
11 |     actor_rollout_ref.actor.optim.lr=1e-6 \
12 |     actor_rollout_ref.model.use_remove_padding=False \
13 |     actor_rollout_ref.actor.ppo_mini_batch_size=128 \
14 |     actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=4 \
15 |     actor_rollout_ref.actor.fsdp_config.param_offload=False \
16 |     actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \
17 |     actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=4 \
18 |     actor_rollout_ref.rollout.tensor_model_parallel_size=2 \
19 |     actor_rollout_ref.rollout.name=vllm \
20 |     actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \
21 |     actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=4 \
22 |     actor_rollout_ref.ref.fsdp_config.param_offload=True \
23 |     critic.optim.lr=1e-5 \
24 |     critic.model.use_remove_padding=False \
25 |     critic.model.path=google/gemma-2-2b-it \
26 |     critic.model.enable_gradient_checkpointing=False \
27 |     critic.ppo_micro_batch_size_per_gpu=4 \
28 |     critic.model.fsdp_config.param_offload=False \
29 |     critic.model.fsdp_config.optimizer_offload=False \
30 |     algorithm.kl_ctrl.kl_coef=0.001 \
31 |     trainer.critic_warmup=0 \
32 |     trainer.logger=['console','wandb'] \
33 |     trainer.project_name='verl_example' \
34 |     trainer.experiment_name='gemma2b_function_rm' \
35 |     trainer.n_gpus_per_node=2 \
36 |     trainer.nnodes=1 \
37 |     trainer.save_freq=-1 \
38 |     trainer.test_freq=10 \
39 |     trainer.total_epochs=15 $@
40 | 


--------------------------------------------------------------------------------
/examples/ppo_trainer/run_qwen2-7b_math_gsm8k_megatron.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | export VLLM_ATTENTION_BACKEND=XFORMERS
 4 | 
 5 | gsm8k_train_path=$HOME/data/gsm8k/train.parquet
 6 | gsm8k_test_path=$HOME/data/gsm8k/test.parquet
 7 | math_train_path=$HOME/data/math/train.parquet
 8 | math_test_path=$HOME/data/math/test.parquet
 9 | 
10 | train_files="['$gsm8k_train_path', '$math_train_path']"
11 | test_files="['$gsm8k_test_path', '$math_test_path']"
12 | 
13 | python3 -m verl.trainer.main_ppo --config-path=./config --config-name='ppo_megatron_trainer'\
14 |     data.train_files="$train_files" \
15 |     data.val_files="$test_files" \
16 |     data.train_batch_size=1024 \
17 |     data.val_batch_size=6312 \
18 |     data.max_prompt_length=1024 \
19 |     data.max_response_length=512 \
20 |     actor_rollout_ref.model.path=Qwen/Qwen2-7B-Instruct \
21 |     actor_rollout_ref.actor.optim.lr=1e-6 \
22 |     actor_rollout_ref.actor.ppo_mini_batch_size=256 \
23 |     actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=4 \
24 |     actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=4 \
25 |     actor_rollout_ref.rollout.tensor_model_parallel_size=4 \
26 |     actor_rollout_ref.rollout.name=vllm \
27 |     actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \
28 |     actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=4 \
29 |     critic.optim.lr=1e-5 \
30 |     critic.model.path=Qwen/Qwen2-7B-Instruct \
31 |     critic.model.enable_gradient_checkpointing=False \
32 |     critic.ppo_micro_batch_size_per_gpu=4 \
33 |     algorithm.kl_ctrl.kl_coef=0.001 \
34 |     trainer.critic_warmup=0 \
35 |     trainer.logger=['console','wandb'] \
36 |     trainer.project_name='verl_megatron_math_gsm8k_examples' \
37 |     trainer.experiment_name='qwen2_7b_function_rm' \
38 |     trainer.n_gpus_per_node=8 \
39 |     trainer.nnodes=1 \
40 |     trainer.save_freq=-1 \
41 |     trainer.test_freq=5 \
42 |     trainer.total_epochs=100 $@
43 | 


--------------------------------------------------------------------------------
/examples/remax_trainer/run_qwen2.5-3b_seq_balance.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | export HF_DATASETS_OFFLINE=1
 4 | export TRANSFORMERS_OFFLINE=1
 5 | 
 6 | export VLLM_ATTENTION_BACKEND=XFORMERS
 7 | 
 8 | python3 -m verl.trainer.main_ppo \
 9 |     algorithm.adv_estimator=remax \
10 |     data.train_files=$HOME/data/gsm8k/train.parquet \
11 |     data.val_files=$HOME/data/gsm8k/train.parquet \
12 |     data.train_batch_size=512 \
13 |     data.val_batch_size=1312 \
14 |     data.max_prompt_length=512 \
15 |     data.max_response_length=1024 \
16 |     actor_rollout_ref.model.path=Qwen/Qwen2.5-3B-Instruct \
17 |     actor_rollout_ref.actor.optim.lr=1e-6 \
18 |     actor_rollout_ref.model.use_remove_padding=True \
19 |     actor_rollout_ref.actor.ppo_mini_batch_size=128 \
20 |     actor_rollout_ref.actor.use_dynamic_bsz=True \
21 |     actor_rollout_ref.actor.ppo_max_token_len_per_gpu=30000 \
22 |     actor_rollout_ref.actor.use_kl_loss=True \
23 |     actor_rollout_ref.actor.kl_loss_coef=0.001 \
24 |     actor_rollout_ref.actor.kl_loss_type=low_var_kl \
25 |     actor_rollout_ref.model.enable_gradient_checkpointing=True \
26 |     actor_rollout_ref.actor.fsdp_config.param_offload=False \
27 |     actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \
28 |     actor_rollout_ref.rollout.tensor_model_parallel_size=2 \
29 |     actor_rollout_ref.rollout.name=vllm \
30 |     actor_rollout_ref.rollout.gpu_memory_utilization=0.8 \
31 |     actor_rollout_ref.rollout.n=4 \
32 |     actor_rollout_ref.ref.fsdp_config.param_offload=True \
33 |     algorithm.kl_ctrl.kl_coef=0.001 \
34 |     trainer.critic_warmup=0 \
35 |     trainer.logger=['console','wandb'] \
36 |     trainer.project_name='verl_remax_example_gsm8k' \
37 |     trainer.experiment_name='qwen2.5_3b_function_rm_kl1e-3' \
38 |     +trainer.val_before_train=False \
39 |     trainer.n_gpus_per_node=8 \
40 |     trainer.nnodes=1 \
41 |     trainer.save_freq=-1 \
42 |     trainer.test_freq=5 \
43 |     trainer.total_epochs=5 $@


--------------------------------------------------------------------------------
/examples/remax_trainer/run_qwen2.5-7b_seq_balance.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | export HF_DATASETS_OFFLINE=1
 4 | export TRANSFORMERS_OFFLINE=1
 5 | 
 6 | export VLLM_ATTENTION_BACKEND=XFORMERS
 7 | 
 8 | python3 -m verl.trainer.main_ppo \
 9 |     algorithm.adv_estimator=remax \
10 |     data.train_files=$HOME/data/gsm8k/train.parquet \
11 |     data.val_files=$HOME/data/gsm8k/train.parquet \
12 |     data.train_batch_size=1024 \
13 |     data.val_batch_size=1312 \
14 |     data.max_prompt_length=512 \
15 |     data.max_response_length=1024 \
16 |     actor_rollout_ref.model.path=Qwen/Qwen2.5-7B-Instruct \
17 |     actor_rollout_ref.actor.optim.lr=1e-6 \
18 |     actor_rollout_ref.model.use_remove_padding=True \
19 |     actor_rollout_ref.actor.ppo_mini_batch_size=256 \
20 |     actor_rollout_ref.actor.use_dynamic_bsz=True \
21 |     actor_rollout_ref.actor.ppo_max_token_len_per_gpu=24000 \
22 |     actor_rollout_ref.actor.use_kl_loss=True \
23 |     actor_rollout_ref.actor.kl_loss_coef=0.001 \
24 |     actor_rollout_ref.actor.kl_loss_type=low_var_kl \
25 |     actor_rollout_ref.model.enable_gradient_checkpointing=True \
26 |     actor_rollout_ref.actor.fsdp_config.param_offload=False \
27 |     actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \
28 |     actor_rollout_ref.rollout.tensor_model_parallel_size=2 \
29 |     actor_rollout_ref.rollout.name=vllm \
30 |     actor_rollout_ref.rollout.gpu_memory_utilization=0.8 \
31 |     actor_rollout_ref.rollout.n=4 \
32 |     actor_rollout_ref.ref.fsdp_config.param_offload=True \
33 |     algorithm.kl_ctrl.kl_coef=0.001 \
34 |     trainer.critic_warmup=0 \
35 |     trainer.logger=['console','wandb'] \
36 |     trainer.project_name='verl_remax_example_gsm8k' \
37 |     trainer.experiment_name='qwen2.5_7b_function_rm_kl1e-3' \
38 |     +trainer.val_before_train=False \
39 |     trainer.n_gpus_per_node=8 \
40 |     trainer.nnodes=1 \
41 |     trainer.save_freq=-1 \
42 |     trainer.test_freq=5 \
43 |     trainer.total_epochs=10 $@


--------------------------------------------------------------------------------
/examples/sft/gsm8k/run_deepseek_6b7.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | if [ "$#" -lt 2 ]; then
 4 |     echo "Usage: run_deepseek_6b7.sh <nproc_per_node> <save_path> [other_configs...]"
 5 |     exit 1
 6 | fi
 7 | 
 8 | nproc_per_node=$1
 9 | save_path=$2
10 | 
11 | # Shift the arguments so $@ refers to the rest
12 | shift 2
13 | 
14 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \
15 |      -m verl.trainer.fsdp_sft_trainer \
16 |     data.train_files=data/gsm8k/train.parquet \
17 |     data.val_files=data/gsm8k/test.parquet \
18 |     data.prompt_key=extra_info \
19 |     data.response_key=extra_info \
20 |     +data.prompt_dict_keys=['question'] \
21 |     +data.response_dict_keys=['answer'] \
22 |     data.micro_batch_size_per_gpu=4 \
23 |     model.partial_pretrain=deepseek-ai/deepseek-coder-6.7b-instruct \
24 |     trainer.default_local_dir=$save_path \
25 |     trainer.project_name=gsm8k-sft \
26 |     trainer.experiment_name=gsm8k-sft-deepseek-coder-6.7b-instruct \
27 |     trainer.total_epochs=4 \
28 |     trainer.logger=['console','wandb'] \
29 |     trainer.default_hdfs_dir=null $@


--------------------------------------------------------------------------------
/examples/sft/gsm8k/run_gemma_2b.sh:
--------------------------------------------------------------------------------
 1 | # Tested with 2 & 4 GPUs
 2 | 
 3 | set -x
 4 | 
 5 | if [ "$#" -lt 2 ]; then
 6 |     echo "Usage: run_gemma_2b.sh <nproc_per_node> <save_path> [other_configs...]"
 7 |     exit 1
 8 | fi
 9 | 
10 | nproc_per_node=$1
11 | save_path=$2
12 | 
13 | # Shift the arguments so $@ refers to the rest
14 | shift 2
15 | 
16 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \
17 |      -m verl.trainer.fsdp_sft_trainer \
18 |     data.train_files=$HOME/data/gsm8k/train.parquet \
19 |     data.val_files=$HOME/data/gsm8k/test.parquet \
20 |     data.prompt_key=extra_info \
21 |     data.response_key=extra_info \
22 |     +data.prompt_dict_keys=['question'] \
23 |     +data.response_dict_keys=['answer'] \
24 |     data.micro_batch_size_per_gpu=4 \
25 |     model.partial_pretrain=google/gemma-2b-it \
26 |     trainer.default_local_dir=$save_path \
27 |     trainer.project_name=gsm8k-sft \
28 |     trainer.experiment_name=gsm8k-sft-gemma-2b-it \
29 |     trainer.total_epochs=2 \
30 |     trainer.logger=['console','wandb'] \
31 |     trainer.default_hdfs_dir=null $@


--------------------------------------------------------------------------------
/examples/sft/gsm8k/run_gemma_7b.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | if [ "$#" -lt 2 ]; then
 4 |     echo "Usage: run_gemma_7b.sh <nproc_per_node> <save_path> [other_configs...]"
 5 |     exit 1
 6 | fi
 7 | 
 8 | nproc_per_node=$1
 9 | save_path=$2
10 | 
11 | # Shift the arguments so $@ refers to the rest
12 | shift 2
13 | 
14 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \
15 |      -m verl.trainer.fsdp_sft_trainer \
16 |     data.train_files=$HOME/data/gsm8k/train.parquet \
17 |     data.val_files=$HOME/data/gsm8k/test.parquet \
18 |     data.prompt_key=prompt \
19 |     data.response_key=answer \
20 |     data.micro_batch_size_per_gpu=4 \
21 |     model.partial_pretrain=google/gemma-1.1-7b-it \
22 |     trainer.default_local_dir=$save_path \
23 |     trainer.project_name=gsm8k-sft \
24 |     trainer.experiment_name=gsm8k-sft-gemma-1.1-7b-it \
25 |     trainer.total_epochs=4 \
26 |     trainer.logger=['console','wandb'] \
27 |     trainer.default_hdfs_dir=null $@


--------------------------------------------------------------------------------
/examples/sft/gsm8k/run_qwen_05_peft.sh:
--------------------------------------------------------------------------------
 1 | # Tested with 2 & 4 GPUs
 2 | 
 3 | set -x
 4 | 
 5 | if [ "$#" -lt 2 ]; then
 6 |     echo "Usage: run_qwen_05_peft.sh <nproc_per_node> <save_path> [other_configs...]"
 7 |     exit 1
 8 | fi
 9 | 
10 | nproc_per_node=$1
11 | save_path=$2
12 | 
13 | # Shift the arguments so $@ refers to the rest
14 | shift 2
15 | 
16 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \
17 |      -m verl.trainer.fsdp_sft_trainer \
18 |     data.train_files=data/gsm8k/train.parquet \
19 |     data.val_files=data/gsm8k/test.parquet \
20 |     data.prompt_key=extra_info \
21 |     data.response_key=extra_info \
22 |     optim.lr=1e-4 \
23 |     +data.prompt_dict_keys=['question'] \
24 |     +data.response_dict_keys=['answer'] \
25 |     data.micro_batch_size_per_gpu=4 \
26 |     model.partial_pretrain=Qwen/Qwen2.5-0.5B-Instruct \
27 |     trainer.default_local_dir=$save_path \
28 |     trainer.project_name=gsm8k-sft \
29 |     trainer.experiment_name=gsm8k-sft-qwen-2.5-0.5b-instruct \
30 |     trainer.logger=['console'] \
31 |     trainer.total_epochs=1 \
32 |     trainer.default_hdfs_dir=null $@ \
33 |     model.lora_rank=32\
34 |     model.lora_alpha=16 \
35 |     model.target_modules=all-linear
36 | 
37 |     # Or you can do this:
38 |     # model.target_modules=[q_proj,v_proj] \
39 | 


--------------------------------------------------------------------------------
/examples/sft/gsm8k/run_qwen_05_sp2.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | if [ "$#" -lt 2 ]; then
 4 |     echo "Usage: run_qwen_05_sp2.sh <nproc_per_node> <save_path> [other_configs...]"
 5 |     exit 1
 6 | fi
 7 | 
 8 | nproc_per_node=$1
 9 | save_path=$2
10 | 
11 | # Shift the arguments so $@ refers to the rest
12 | shift 2
13 | 
14 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \
15 |      -m verl.trainer.fsdp_sft_trainer \
16 |     data.train_files=$HOME/data/gsm8k/train.parquet \
17 |     data.val_files=$HOME/data/gsm8k/test.parquet \
18 |     data.prompt_key=extra_info \
19 |     data.response_key=extra_info \
20 |     optim.lr=1e-4 \
21 |     +data.prompt_dict_keys=['question'] \
22 |     +data.response_dict_keys=['answer'] \
23 |     data.micro_batch_size=4 \
24 |     model.partial_pretrain=Qwen/Qwen2.5-0.5B-Instruct \
25 |     trainer.default_local_dir=$save_path \
26 |     trainer.project_name=gsm8k-sft \
27 |     trainer.experiment_name=gsm8k-sft-qwen-2.5-0.5b-instruct-sp2 \
28 |     trainer.logger=['console'] \
29 |     trainer.total_training_steps=1 \
30 |     trainer.default_hdfs_dir=null $@ \
31 |     ulysses_sequence_parallel_size=2 \
32 |     use_remove_padding=true
33 | 


--------------------------------------------------------------------------------
/examples/sft/gsm8k/run_qwen_05_sp2_liger.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | if [ "$#" -lt 2 ]; then
 4 |     echo "Usage: run_qwen_05_sp2.sh <nproc_per_node> <save_path> [other_configs...]"
 5 |     exit 1
 6 | fi
 7 | 
 8 | nproc_per_node=$1
 9 | save_path=$2
10 | 
11 | # Shift the arguments so $@ refers to the rest
12 | shift 2
13 | 
14 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \
15 |      -m verl.trainer.fsdp_sft_trainer \
16 |     data.train_files=$HOME/data/gsm8k/train.parquet \
17 |     data.val_files=$HOME/data/gsm8k/test.parquet \
18 |     data.prompt_key=extra_info \
19 |     data.response_key=extra_info \
20 |     optim.lr=1e-4 \
21 |     +data.prompt_dict_keys=['question'] \
22 |     +data.response_dict_keys=['answer'] \
23 |     data.micro_batch_size=4 \
24 |     model.partial_pretrain=Qwen/Qwen2.5-0.5B-Instruct \
25 |     model.use_liger=True \
26 |     trainer.default_local_dir=$save_path \
27 |     trainer.project_name=gsm8k-sft \
28 |     trainer.experiment_name=gsm8k-sft-qwen-2.5-0.5b-instruct-sp2-liger \
29 |     trainer.logger=['console'] \
30 |     trainer.default_hdfs_dir=null $@ \
31 |     ulysses_sequence_parallel_size=2 \
32 |     use_remove_padding=true
33 | 


--------------------------------------------------------------------------------
/examples/split_placement/run_deepseek7b_llm.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | python3 main_ppo_split.py \
 4 |     data.train_files=$HOME/data/gsm8k/train.parquet \
 5 |     data.val_files=$HOME/data/gsm8k/test.parquet \
 6 |     data.train_batch_size=1024 \
 7 |     data.val_batch_size=1312 \
 8 |     data.max_prompt_length=512 \
 9 |     data.max_response_length=512 \
10 |     actor_rollout_ref.model.path=deepseek-ai/deepseek-llm-7b-chat \
11 |     actor_rollout_ref.actor.optim.lr=1e-6 \
12 |     actor_rollout_ref.actor.ppo_mini_batch_size=256 \
13 |     actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=8 \
14 |     actor_rollout_ref.actor.fsdp_config.param_offload=False \
15 |     actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \
16 |     actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=8 \
17 |     actor_rollout_ref.rollout.tensor_model_parallel_size=4 \
18 |     actor_rollout_ref.rollout.name=vllm \
19 |     actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \
20 |     actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=8 \
21 |     actor_rollout_ref.ref.fsdp_config.param_offload=True \
22 |     critic.optim.lr=1e-5 \
23 |     critic.model.path=deepseek-ai/deepseek-llm-7b-chat \
24 |     critic.model.enable_gradient_checkpointing=False \
25 |     critic.ppo_micro_batch_size_per_gpu=8 \
26 |     critic.model.fsdp_config.param_offload=False \
27 |     critic.model.fsdp_config.optimizer_offload=False \
28 |     algorithm.kl_ctrl.kl_coef=0.001 \
29 |     trainer.critic_warmup=0 \
30 |     trainer.logger=['console','wandb'] \
31 |     trainer.project_name='verl_example_gsm8k' \
32 |     trainer.experiment_name='deepseek_llm_7b_function_rm' \
33 |     trainer.n_gpus_per_node=8 \
34 |     trainer.nnodes=1 \
35 |     trainer.save_freq=-1 \
36 |     trainer.total_epochs=15 $@
37 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | # requirements.txt records the full set of dependencies for development
 2 | accelerate
 3 | codetiming
 4 | datasets
 5 | dill
 6 | flash-attn
 7 | hydra-core
 8 | liger-kernel
 9 | numpy
10 | pandas
11 | peft
12 | pyarrow>=15.0.0
13 | pybind11
14 | pylatexenc
15 | ray
16 | tensordict<0.6
17 | transformers
18 | vllm==0.6.3.post1
19 | wandb
20 | 


--------------------------------------------------------------------------------
/scripts/format.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | pip3 install --upgrade yapf
3 | python3 -m yapf -ir -vv --style ./.style.yapf verl tests single_controller examples
4 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.


--------------------------------------------------------------------------------
/tests/e2e/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/tests/e2e/arithmetic_sequence/data/create_dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from tests.e2e.envs.digit_completion import DigitCompletion, generate_ground_truth_response
16 | from torch.utils import data
17 | import os
18 | 
19 | if __name__ == '__main__':
20 |     simple_task = DigitCompletion(max_number=9, max_diff=9, max_num_in_response=9)
21 |     all_prompts = simple_task.get_all_prompts()
22 | 
23 |     # 21 * 6 * 4
24 |     train_data, test_data = data.random_split(all_prompts, lengths=[0.8, 0.2])
25 |     train_data = list(train_data)
26 |     test_data = list(test_data)
27 | 
28 |     train_data = [[{'role': 'user', 'content': str(item)}] \
29 |                      for item in train_data]
30 |     test_data = [[{'role': 'user', 'content': str(item)}] \
31 |                      for item in test_data]
32 | 
33 |     print(f'Size of train: {len(train_data)}, size of test: {len(test_data)}')
34 | 
35 |     train_data = {'prompt': train_data}
36 |     test_data = {'prompt': test_data}
37 | 
38 |     model_folder = os.path.join(os.path.dirname(os.path.abspath(__file__)))
39 | 
40 |     import pandas as pd
41 | 
42 |     train_data_frame = pd.DataFrame(train_data)
43 |     test_data_frame = pd.DataFrame(test_data)
44 | 
45 |     train_data_frame.to_parquet(os.path.join(model_folder, 'train.parquet'))
46 |     test_data_frame.to_parquet(os.path.join(model_folder, 'test.parquet'))
47 | 


--------------------------------------------------------------------------------
/tests/e2e/arithmetic_sequence/data/test.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ypwang61/One-Shot-RLVR/b9d2e9a1cc32682b8ec77922fe4a236d75a7e9b0/tests/e2e/arithmetic_sequence/data/test.parquet


--------------------------------------------------------------------------------
/tests/e2e/arithmetic_sequence/data/train.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ypwang61/One-Shot-RLVR/b9d2e9a1cc32682b8ec77922fe4a236d75a7e9b0/tests/e2e/arithmetic_sequence/data/train.parquet


--------------------------------------------------------------------------------
/tests/e2e/arithmetic_sequence/model/config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "architectures": [
 3 |     "LlamaForCausalLM"
 4 |   ],
 5 |   "attention_bias": false,
 6 |   "attention_dropout": 0.0,
 7 |   "bos_token_id": null,
 8 |   "eos_token_id": 1,
 9 |   "hidden_act": "silu",
10 |   "hidden_size": 128,
11 |   "initializer_range": 0.02,
12 |   "intermediate_size": 344,
13 |   "max_position_embeddings": 2048,
14 |   "mlp_bias": false,
15 |   "model_type": "llama",
16 |   "num_attention_heads": 4,
17 |   "num_hidden_layers": 4,
18 |   "num_key_value_heads": 4,
19 |   "pad_token_id": 2,
20 |   "pretraining_tp": 1,
21 |   "rms_norm_eps": 1e-06,
22 |   "rope_scaling": null,
23 |   "rope_theta": 10000.0,
24 |   "tie_word_embeddings": false,
25 |   "torch_dtype": "bfloat16",
26 |   "transformers_version": "4.43.3",
27 |   "use_cache": true,
28 |   "vocab_size": 16
29 | }
30 | 


--------------------------------------------------------------------------------
/tests/e2e/arithmetic_sequence/model/generation_config.json:
--------------------------------------------------------------------------------
1 | {
2 |   "_from_model_config": true,
3 |   "eos_token_id": 1,
4 |   "pad_token_id": 2,
5 |   "transformers_version": "4.43.3"
6 | }
7 | 


--------------------------------------------------------------------------------
/tests/e2e/arithmetic_sequence/model/model.safetensors:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ypwang61/One-Shot-RLVR/b9d2e9a1cc32682b8ec77922fe4a236d75a7e9b0/tests/e2e/arithmetic_sequence/model/model.safetensors


--------------------------------------------------------------------------------
/tests/e2e/arithmetic_sequence/model/tokenizer_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "char_ords": [
 3 |         48,
 4 |         49,
 5 |         50,
 6 |         51,
 7 |         52,
 8 |         53,
 9 |         54,
10 |         55,
11 |         56,
12 |         57,
13 |         44,
14 |         58
15 |     ],
16 |     "model_max_length": 2048,
17 |     "chat_template": "{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% set role = message['role'] %}{{ message['content'] }}{% endfor %}{% if add_generation_prompt %}{{ sep_token }}{% endif %}"
18 | }


--------------------------------------------------------------------------------
/tests/e2e/arithmetic_sequence/rl/README.md:
--------------------------------------------------------------------------------
 1 | # Digit completion
 2 | 
 3 | This is an example of solving a digit completion problem. The problem is defined as below:
 4 | 
 5 | The prompt is a sequence of numbers with fixed difference. The agent's goal is to complete the next N numbers.
 6 | If the max number is reached, the next number should be modulo with max number.
 7 | 
 8 | For example,
 9 | - prompt = [1, 2, 3]
10 | - N = 5
11 | - max_number = 6
12 | 
13 | The response should be [4, 5, 6, 7%6, 8%6] = [4, 5, 6, 0, 1].
14 | 
15 | # Environment definition
16 | 
17 | The core definition of the task is defined in verl/envs/digit_completion/task.py
18 | 
19 | It is highly recommended to take a look at it for better understanding.
20 | 
21 | 
22 | 
23 | # Run experiments
24 | 
25 | The users are required to specify the config path and config name (and the relative model config path to the current working directory)
26 | 
27 | ```bash
28 | # cd examples/arithmetic_sequence/rl
29 | 
30 | # Specify the config path and config name (current working dir)
31 | python3 -m verl.trainer.ppo.ray_megatron_train_synchronous --config-path=$(pwd)/config --config-name='ray_megatron'
32 | 
33 | # The default relative path of model config is 'config/model_config', if you want to change it, you can rewrite it in ray_megatron.yaml or using:
34 | python3 -m verl.trainer.ppo.ray_megatron_train_synchronous --config-path=$(pwd)/config --config-name='ray_megatron' ++model.base_path=config/model_config
35 | 
36 | ```
37 | 
38 | 


--------------------------------------------------------------------------------
/tests/e2e/check_results.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import argparse
16 | 
17 | import numpy as np
18 | 
19 | 
20 | def extract_reward_from_line(line):
21 |     # TODO: this function needs error handling
22 |     try:
23 |         key_vals = line.split(' - ')
24 |         for key_val in key_vals:
25 |             key, val = key_val.split(':')
26 |             if key == 'critic/rewards/mean':
27 |                 reward = float(val)
28 |                 return reward
29 |         return -np.inf
30 |     except Exception:
31 |         return -np.inf
32 | 
33 | 
34 | if __name__ == '__main__':
35 |     parser = argparse.ArgumentParser()
36 |     parser.add_argument('--output_file', required=True, type=str)
37 | 
38 |     args = parser.parse_args()
39 | 
40 |     with open(args.output_file, 'r') as f:
41 |         output = f.read().split('\n')
42 | 
43 |     best_reward = -np.inf
44 |     for line in output:
45 |         if line.startswith('step'):
46 |             reward = extract_reward_from_line(line)
47 |             if reward > best_reward:
48 |                 best_reward = reward
49 | 
50 |     print(f'Best reward is {best_reward}')
51 |     assert best_reward > 0.2, f'Best reward must be greater than 0.2. best_reward: {best_reward}'
52 |     print('Check passes')
53 | 


--------------------------------------------------------------------------------
/tests/e2e/envs/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .digit_completion import DigitCompletion
16 | 
17 | __all__ = ['DigitCompletion']


--------------------------------------------------------------------------------
/tests/e2e/envs/digit_completion/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .task import DigitCompletion, generate_ground_truth_response
16 | from .tokenizer import CharTokenizer
17 | 
18 | from transformers import AutoTokenizer, LlamaConfig
19 | 
20 | AutoTokenizer.register(LlamaConfig, CharTokenizer, exist_ok=True)
21 | 
22 | __all__ = ['DigitCompletion', 'generate_ground_truth_response', 'CharTokenizer']


--------------------------------------------------------------------------------
/tests/e2e/run_deepseek_megatron.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | # the config file used: verl/trainer/main_ppo/config/ppo_megatron_trainer.yaml
 4 | 
 5 | huggingface-cli download deepseek-ai/deepseek-coder-1.3b-instruct
 6 | 
 7 | python3 -m verl.trainer.main_ppo --config-path=config \
 8 |     --config-name='ppo_megatron_trainer.yaml'\
 9 |     data.train_files=$HOME/data/gsm8k/train.parquet \
10 |     data.val_files=$HOME/data/gsm8k/test.parquet \
11 |     data.train_batch_size=1024 \
12 |     data.val_batch_size=1312 \
13 |     data.max_prompt_length=512 \
14 |     data.max_response_length=512 \
15 |     actor_rollout_ref.model.path=deepseek-ai/deepseek-coder-1.3b-instruct \
16 |     actor_rollout_ref.actor.optim.lr=2e-6 \
17 |     actor_rollout_ref.actor.ppo_mini_batch_size=256 \
18 |     actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=4 \
19 |     actor_rollout_ref.actor.megatron.tensor_model_parallel_size=2 \
20 |     actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=8 \
21 |     actor_rollout_ref.rollout.tensor_model_parallel_size=2 \
22 |     actor_rollout_ref.rollout.name=vllm \
23 |     actor_rollout_ref.rollout.gpu_memory_utilization=0.5 \
24 |     actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=16 \
25 |     actor_rollout_ref.ref.megatron.tensor_model_parallel_size=2 \
26 |     critic.optim.lr=2e-5 \
27 |     critic.model.path=deepseek-ai/deepseek-coder-1.3b-instruct \
28 |     critic.model.enable_gradient_checkpointing=False \
29 |     critic.ppo_micro_batch_size_per_gpu=4 \
30 |     critic.megatron.tensor_model_parallel_size=2 \
31 |     algorithm.kl_ctrl.kl_coef=0.001 \
32 |     trainer.critic_warmup=0 \
33 |     trainer.logger=['console'] \
34 |     trainer.project_name='verl_megatron_gsm8k_examples' \
35 |     trainer.experiment_name='deepseek_llm_1b3_function_rm' \
36 |     trainer.n_gpus_per_node=8 \
37 |     trainer.nnodes=1 \
38 |     trainer.save_freq=-1 \
39 |     trainer.test_freq=1 \
40 |     trainer.total_epochs=15 \
41 |     trainer.total_training_steps=3 $@
42 | 


--------------------------------------------------------------------------------
/tests/e2e/run_qwen_gsm8k_function_rm.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | export VLLM_ATTENTION_BACKEND=XFORMERS
 4 | 
 5 | python3 -m verl.trainer.main_ppo \
 6 |     data.train_files=$HOME/data/gsm8k/train.parquet \
 7 |     data.val_files=$HOME/data/gsm8k/test.parquet \
 8 |     data.train_batch_size=1024 \
 9 |     data.val_batch_size=1312 \
10 |     data.max_prompt_length=512 \
11 |     data.max_response_length=512 \
12 |     actor_rollout_ref.model.path=Qwen/Qwen2.5-0.5B \
13 |     actor_rollout_ref.actor.optim.lr=1e-6 \
14 |     actor_rollout_ref.model.use_remove_padding=True \
15 |     actor_rollout_ref.actor.ppo_mini_batch_size=256 \
16 |     actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=4 \
17 |     actor_rollout_ref.actor.fsdp_config.param_offload=False \
18 |     actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \
19 |     actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=16 \
20 |     actor_rollout_ref.rollout.tensor_model_parallel_size=2 \
21 |     actor_rollout_ref.rollout.name=vllm \
22 |     actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \
23 |     actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=16 \
24 |     actor_rollout_ref.ref.fsdp_config.param_offload=True \
25 |     critic.optim.lr=1e-5 \
26 |     critic.model.use_remove_padding=True \
27 |     critic.model.path=Qwen/Qwen2.5-0.5B \
28 |     critic.model.enable_gradient_checkpointing=False \
29 |     critic.ppo_micro_batch_size_per_gpu=4 \
30 |     critic.model.fsdp_config.param_offload=False \
31 |     critic.model.fsdp_config.optimizer_offload=False \
32 |     algorithm.kl_ctrl.kl_coef=0.001 \
33 |     trainer.critic_warmup=0 \
34 |     trainer.logger=['console'] \
35 |     trainer.project_name='verl_example_gsm8k' \
36 |     trainer.experiment_name='qwen_e2e_ci_function_rm' \
37 |     trainer.n_gpus_per_node=8 \
38 |     trainer.nnodes=1 \
39 |     trainer.save_freq=1 \
40 |     trainer.default_local_dir=$HOME/ckpt/ \
41 |     trainer.total_training_steps=1 $@
42 | 


--------------------------------------------------------------------------------
/tests/e2e/run_qwen_gsm8k_function_rm_grpo.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | export VLLM_ATTENTION_BACKEND=XFORMERS
 4 | 
 5 | python3 -m verl.trainer.main_ppo \
 6 |     data.train_files=$HOME/data/gsm8k/train.parquet \
 7 |     data.val_files=$HOME/data/gsm8k/test.parquet \
 8 |     data.train_batch_size=1024 \
 9 |     data.val_batch_size=1312 \
10 |     data.max_prompt_length=512 \
11 |     data.max_response_length=512 \
12 |     actor_rollout_ref.model.path=Qwen/Qwen2.5-0.5B \
13 |     actor_rollout_ref.actor.optim.lr=1e-6 \
14 |     actor_rollout_ref.model.use_remove_padding=True \
15 |     actor_rollout_ref.actor.ppo_mini_batch_size=256 \
16 |     actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=4 \
17 |     actor_rollout_ref.actor.fsdp_config.param_offload=False \
18 |     actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \
19 |     actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=16 \
20 |     actor_rollout_ref.rollout.tensor_model_parallel_size=2 \
21 |     actor_rollout_ref.rollout.name=vllm \
22 |     actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \
23 |     actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=16 \
24 |     actor_rollout_ref.ref.fsdp_config.param_offload=True \
25 |     algorithm.kl_ctrl.kl_coef=0.001 \
26 |     algorithm.adv_estimator=grpo \
27 |     trainer.critic_warmup=0 \
28 |     trainer.logger=['console'] \
29 |     trainer.project_name='verl_example_gsm8k' \
30 |     trainer.experiment_name='qwen_e2e_ci_function_rm' \
31 |     trainer.n_gpus_per_node=8 \
32 |     trainer.nnodes=1 \
33 |     trainer.save_freq=-1 \
34 |     trainer.total_training_steps=1 $@
35 | 


--------------------------------------------------------------------------------
/tests/e2e/run_qwen_gsm8k_function_rm_no_rmpad.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | export VLLM_ATTENTION_BACKEND=XFORMERS
 4 | 
 5 | python3 -m verl.trainer.main_ppo \
 6 |     data.train_files=$HOME/data/gsm8k/train.parquet \
 7 |     data.val_files=$HOME/data/gsm8k/test.parquet \
 8 |     data.train_batch_size=1024 \
 9 |     data.val_batch_size=1312 \
10 |     data.max_prompt_length=512 \
11 |     data.max_response_length=512 \
12 |     actor_rollout_ref.model.path=Qwen/Qwen2.5-0.5B \
13 |     actor_rollout_ref.actor.optim.lr=1e-6 \
14 |     actor_rollout_ref.model.use_remove_padding=False \
15 |     actor_rollout_ref.actor.ppo_mini_batch_size=256 \
16 |     actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=4 \
17 |     actor_rollout_ref.actor.fsdp_config.param_offload=False \
18 |     actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \
19 |     actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=16 \
20 |     actor_rollout_ref.rollout.tensor_model_parallel_size=2 \
21 |     actor_rollout_ref.rollout.name=vllm \
22 |     actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \
23 |     actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=16 \
24 |     actor_rollout_ref.ref.fsdp_config.param_offload=True \
25 |     critic.optim.lr=1e-5 \
26 |     critic.model.use_remove_padding=False \
27 |     critic.model.path=Qwen/Qwen2.5-0.5B \
28 |     critic.model.enable_gradient_checkpointing=False \
29 |     critic.ppo_micro_batch_size_per_gpu=4 \
30 |     critic.model.fsdp_config.param_offload=False \
31 |     critic.model.fsdp_config.optimizer_offload=False \
32 |     algorithm.kl_ctrl.kl_coef=0.001 \
33 |     trainer.critic_warmup=0 \
34 |     trainer.logger=['console'] \
35 |     +trainer.val_before_train=False \
36 |     trainer.project_name='verl_example_gsm8k' \
37 |     trainer.experiment_name='qwen_e2e_ci_function_rm' \
38 |     trainer.n_gpus_per_node=8 \
39 |     trainer.nnodes=1 \
40 |     trainer.save_freq=-1 \
41 |     trainer.total_training_steps=1 $@
42 | 


--------------------------------------------------------------------------------
/tests/e2e/run_qwen_gsm8k_function_rm_remax.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | export VLLM_ATTENTION_BACKEND=XFORMERS
 4 | 
 5 | python3 -m verl.trainer.main_ppo \
 6 |     data.train_files=$HOME/data/gsm8k/train.parquet \
 7 |     data.val_files=$HOME/data/gsm8k/test.parquet \
 8 |     data.train_batch_size=1024 \
 9 |     data.val_batch_size=1312 \
10 |     data.max_prompt_length=512 \
11 |     data.max_response_length=512 \
12 |     actor_rollout_ref.model.path=Qwen/Qwen2.5-0.5B \
13 |     actor_rollout_ref.actor.optim.lr=1e-6 \
14 |     actor_rollout_ref.model.use_remove_padding=True \
15 |     actor_rollout_ref.actor.ppo_mini_batch_size=256 \
16 |     actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=4 \
17 |     actor_rollout_ref.actor.fsdp_config.param_offload=False \
18 |     actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \
19 |     actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=16 \
20 |     actor_rollout_ref.rollout.tensor_model_parallel_size=2 \
21 |     actor_rollout_ref.rollout.name=vllm \
22 |     actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \
23 |     actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=16 \
24 |     actor_rollout_ref.ref.fsdp_config.param_offload=True \
25 |     algorithm.kl_ctrl.kl_coef=0.001 \
26 |     algorithm.adv_estimator=remax \
27 |     trainer.critic_warmup=0 \
28 |     trainer.logger=['console'] \
29 |     trainer.project_name='verl_example_gsm8k' \
30 |     trainer.experiment_name='qwen_e2e_ci_function_rm' \
31 |     trainer.n_gpus_per_node=8 \
32 |     trainer.nnodes=1 \
33 |     trainer.save_freq=-1 \
34 |     trainer.total_training_steps=1 $@
35 | 


--------------------------------------------------------------------------------
/tests/e2e/run_qwen_megatron.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | # the config file used: verl/trainer/main_ppo/config/ppo_megatron_trainer.yaml
 4 | 
 5 | huggingface-cli download Qwen/Qwen2.5-0.5B
 6 | 
 7 | python3 -m verl.trainer.main_ppo --config-path=config \
 8 |     --config-name='ppo_megatron_trainer.yaml'\
 9 |     data.train_files=$HOME/data/gsm8k/train.parquet \
10 |     data.val_files=$HOME/data/gsm8k/test.parquet \
11 |     data.train_batch_size=1024 \
12 |     data.val_batch_size=1312 \
13 |     data.max_prompt_length=512 \
14 |     data.max_response_length=512 \
15 |     actor_rollout_ref.model.path=Qwen/Qwen2.5-0.5B \
16 |     actor_rollout_ref.actor.optim.lr=2e-6 \
17 |     actor_rollout_ref.actor.ppo_mini_batch_size=256 \
18 |     actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=4 \
19 |     actor_rollout_ref.actor.megatron.tensor_model_parallel_size=2 \
20 |     actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=8 \
21 |     actor_rollout_ref.rollout.tensor_model_parallel_size=2 \
22 |     actor_rollout_ref.rollout.name=vllm \
23 |     actor_rollout_ref.rollout.gpu_memory_utilization=0.5 \
24 |     actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=16 \
25 |     actor_rollout_ref.ref.megatron.tensor_model_parallel_size=2 \
26 |     critic.optim.lr=2e-5 \
27 |     critic.model.path=Qwen/Qwen2.5-0.5B \
28 |     critic.model.enable_gradient_checkpointing=False \
29 |     critic.ppo_micro_batch_size_per_gpu=4 \
30 |     critic.megatron.tensor_model_parallel_size=2 \
31 |     algorithm.kl_ctrl.kl_coef=0.001 \
32 |     trainer.critic_warmup=0 \
33 |     trainer.logger=['console'] \
34 |     trainer.project_name='verl_megatron_gsm8k_examples' \
35 |     trainer.experiment_name='qwen2_5_0b5_function_rm' \
36 |     trainer.n_gpus_per_node=8 \
37 |     trainer.nnodes=1 \
38 |     trainer.save_freq=-1 \
39 |     trainer.test_freq=1 \
40 |     trainer.total_epochs=15 \
41 |     trainer.total_training_steps=3 $@
42 | 


--------------------------------------------------------------------------------
/tests/e2e/run_ray_trainer.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -e -x
 4 | 
 5 | OUTPUT_FILE="/tmp/output_ray_trainer.txt"
 6 | 
 7 | export PATH=$PATH:~/.local/bin
 8 | 
 9 | rm -rf $OUTPUT_FILE
10 | python3 tests/e2e/arithmetic_sequence/rl/main_trainer.py \
11 |     data.train_files=tests/e2e/arithmetic_sequence/data/train.parquet \
12 |     data.val_files=tests/e2e/arithmetic_sequence/data/test.parquet \
13 |     data.train_batch_size=800 \
14 |     data.val_batch_size=200 \
15 |     data.max_prompt_length=16 \
16 |     data.max_response_length=32 \
17 |     data.return_raw_input_ids=True \
18 |     actor_rollout_ref.model.path=tests/e2e/arithmetic_sequence/model \
19 |     actor_rollout_ref.model.external_lib=tests.e2e.envs.digit_completion \
20 |     actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=200 \
21 |     actor_rollout_ref.actor.entropy_coeff=0 \
22 |     actor_rollout_ref.actor.optim.lr=1e-4 \
23 |     actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=200 \
24 |     actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=200 \
25 |     actor_rollout_ref.rollout.name=hf \
26 |     actor_rollout_ref.rollout.tensor_model_parallel_size=1 \
27 |     critic.ppo_micro_batch_size_per_gpu=200 \
28 |     critic.model.path=tests/e2e/arithmetic_sequence/model \
29 |     critic.optim.lr=1e-3 \
30 |     algorithm.kl_ctrl.kl_coef=0.005 \
31 |     trainer.total_epochs=200 \
32 |     trainer.experiment_name=arithmetic_sequences \
33 |     trainer.logger=['console'] \
34 |     trainer.n_gpus_per_node=1 \
35 |     trainer.test_freq=1 \
36 |     trainer.save_freq=110 | tee $OUTPUT_FILE;
37 | 
38 | python3 tests/e2e/check_results.py --output_file=$OUTPUT_FILE
39 | rm -rf $OUTPUT_FILE
40 | 


--------------------------------------------------------------------------------
/tests/e2e/run_ray_trainer_rmpad.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -e -x
 4 | 
 5 | python3 tests/e2e/arithmetic_sequence/rl/main_trainer.py \
 6 |     data.train_files=tests/e2e/arithmetic_sequence/data/train.parquet \
 7 |     data.val_files=tests/e2e/arithmetic_sequence/data/test.parquet \
 8 |     actor_rollout_ref.model.path=tests/e2e/arithmetic_sequence/model \
 9 |     actor_rollout_ref.rollout.name=vllm \
10 |     actor_rollout_ref.rollout.tensor_model_parallel_size=1 \
11 |     actor_rollout_ref.model.tokenizer_path=tests/e2e/arithmetic_sequence/model \
12 |     critic.model.path=Qwen/Qwen2.5-0.5B \
13 |     critic.model.use_remove_padding=True \
14 |     trainer.total_epochs=1


--------------------------------------------------------------------------------
/tests/gpu_utility/test_ops.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | def test_flash_attn_cross_entropy():
17 |     from verl.utils.torch_functional import logprobs_from_logits_naive
18 | 
19 |     from verl.utils.debug import log_gpu_memory_usage
20 | 
21 |     from flash_attn.ops.triton.cross_entropy import cross_entropy_loss
22 | 
23 |     import torch
24 |     from torch import nn
25 | 
26 |     log_gpu_memory_usage('At start')
27 | 
28 |     hidden_states = torch.randn(size=(2048, 5120), device='cuda', requires_grad=True, dtype=torch.bfloat16)
29 | 
30 |     linear = nn.Linear(in_features=5120, out_features=155136, bias=False, device='cuda', dtype=torch.bfloat16)
31 | 
32 |     logits = linear(hidden_states)
33 | 
34 |     # logits = logits.float()
35 |     labels = torch.randint(low=0, high=155136, size=(2048,), device='cuda')
36 | 
37 |     log_gpu_memory_usage('before computation')
38 |     # output = checkpoint.checkpoint(logprobs_from_logits, logits, labels, use_reentrant=True)
39 |     output = -cross_entropy_loss(logits, labels)[0]
40 |     # output = logprobs_from_logits(logits, labels)
41 |     log_gpu_memory_usage('After forward')
42 |     output.sum().backward()
43 |     log_gpu_memory_usage('After backward')
44 | 
45 |     groundtruth = logprobs_from_logits_naive(logits.float(), labels)
46 | 
47 |     torch.testing.assert_close(output, groundtruth)
48 | 


--------------------------------------------------------------------------------
/tests/ray/detached_worker/README.md:
--------------------------------------------------------------------------------
 1 | # Detached Worker
 2 | ## How to run (Only on a single node)
 3 | - Start a local ray cluster: 
 4 | ```bash
 5 | ray start --head --port=6379
 6 | ```
 7 | - Run the server
 8 | ```bash
 9 | python3 server.py
10 | ```
11 | - On another terminal, Run the client
12 | ```bash
13 | python3 client.py
14 | ```
15 | 


--------------------------------------------------------------------------------
/tests/ray/detached_worker/run.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | ray start --head --port=6379
3 | python3 server.py
4 | python3 client.py
5 | ray stop --force


--------------------------------------------------------------------------------
/tests/ray/test_check_worker_alive.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import time
16 | import os
17 | import subprocess
18 | 
19 | 
20 | def test():
21 |     wait_time = 10
22 | 
23 |     my_env = os.environ.copy()
24 |     my_env["WAIT_TIME"] = str(wait_time)
25 | 
26 |     p = subprocess.Popen(["python3", "-u", "./check_worker_alive/main.py"], env=my_env, stdout=subprocess.PIPE)
27 | 
28 |     count = 0
29 |     while b"foo started" not in p.stdout.read():
30 |         time.sleep(1)
31 |         count += 1
32 |         if count > 40:
33 |             raise RuntimeError("timeout for start foo in check_worker_alive/main.py")
34 | 
35 |     print(
36 |         time.time(),
37 |         f"wait 1.5 wait time {wait_time*1.5} to let signal returned to process but still not exceed process wait time")
38 |     time.sleep(wait_time * 1.5)
39 |     print(time.time(), f"start checking")
40 |     assert p.poll() is not None, f"process {p} still alive, expecting signal raised abort"
41 |     assert p.returncode != 0, f"process {p} exit with code 0, expecting not-zero exit code"
42 |     print(f"test passed")
43 | 
44 | 
45 | if __name__ == "__main__":
46 |     test()
47 | 


--------------------------------------------------------------------------------
/tests/ray/test_ray_local_envs.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | e2e test verl.single_controller.ray
16 | """
17 | import os
18 | import ray
19 | 
20 | from verl.single_controller.ray.base import RayResourcePool, RayClassWithInitArgs, RayWorkerGroup
21 | from verl.single_controller.base.worker import Worker
22 | from verl.single_controller.base.decorator import register, Dispatch, collect_all_to_all, Execute
23 | 
24 | 
25 | @ray.remote
26 | class TestActor(Worker):
27 | 
28 |     def __init__(self) -> None:
29 |         super().__init__()
30 | 
31 |     def getenv(self, key):
32 |         val = os.getenv(key, f"{key} not set")
33 |         return val
34 | 
35 | 
36 | def test_basics():
37 |     ray.init()
38 | 
39 |     # create 4 workers, each hold a GPU
40 |     resource_pool = RayResourcePool([4], use_gpu=True)
41 |     class_with_args = RayClassWithInitArgs(cls=TestActor)
42 | 
43 |     worker_group = RayWorkerGroup(resource_pool=resource_pool,
44 |                                   ray_cls_with_init=class_with_args,
45 |                                   name_prefix="worker_group_basic")
46 | 
47 |     output = worker_group.execute_all_sync("getenv", key="RAY_LOCAL_WORLD_SIZE")
48 |     assert output == ["4", "4", "4", "4"]
49 | 
50 |     output = worker_group.execute_all_sync("getenv", key="RAY_LOCAL_RANK")
51 |     assert set(output) == set(["0", "1", "2", "3"])
52 | 
53 |     ray.shutdown()
54 | 
55 | 
56 | if __name__ == '__main__':
57 |     test_basics()
58 | 


--------------------------------------------------------------------------------
/tests/ray/test_rvdz.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import ray
16 | 
17 | 
18 | @ray.remote
19 | class TestWorker:
20 | 
21 |     def __init__(self, rank, world_size, group_name):
22 |         self.rank = rank
23 |         self.world_size = world_size
24 |         self.group_name = group_name
25 |         self.communicator = None
26 | 
27 |     def init(self):
28 |         from verl.utils.rendezvous.ray_backend import create_nccl_communicator_in_ray
29 |         self.communicator = create_nccl_communicator_in_ray(self.rank, self.world_size, self.group_name)
30 | 
31 |     def test(self):
32 |         if self.communicator is None:
33 |             return None
34 |         return self.communicator.rank_id()
35 | 
36 | 
37 | def test_rvdz():
38 |     ray.init()
39 | 
40 |     group_name = "test_group"
41 |     world_size = 2
42 | 
43 |     workers = [TestWorker.options(num_gpus=1).remote(rank, world_size, group_name) for rank in range(world_size)]
44 | 
45 |     ray.get([worker.init.remote() for worker in workers])
46 | 
47 |     ranks = ray.get([worker.test.remote() for worker in workers])
48 | 
49 |     assert ranks == [0, 1], f"expecting [0, 1], got {ranks}"
50 | 
51 |     ray.shutdown()
52 | 


--------------------------------------------------------------------------------
/tests/sanity/check_license.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | license_head_bytedance = "Copyright 2024 Bytedance Ltd. and/or its affiliates"
16 | license_head_bytedance_25 = "Copyright 2025 Bytedance Ltd. and/or its affiliates"
17 | # Add custom license headers below
18 | license_head_prime = "Copyright 2024 PRIME team and/or its affiliates"
19 | 
20 | license_headers = [license_head_bytedance, license_head_bytedance_25, license_head_prime]
21 | 
22 | from pathlib import Path
23 | from argparse import ArgumentParser
24 | 
25 | if __name__ == '__main__':
26 |     parser = ArgumentParser()
27 |     parser.add_argument('--directory', '-d', required=True, type=str)
28 |     args = parser.parse_args()
29 |     directory_in_str = args.directory
30 | 
31 |     pathlist = Path(directory_in_str).glob('**/*.py')
32 |     for path in pathlist:
33 |         # because path is object not string
34 |         path_in_str = str(path.absolute())
35 |         print(path_in_str)
36 |         with open(path_in_str, 'r', encoding='utf-8') as f:
37 |             file_content = f.read()
38 | 
39 |             has_license = False
40 |             for lh in license_headers:
41 |                 if lh in file_content:
42 |                     has_license = True
43 |                     break
44 |             assert has_license, f'file {path_in_str} does not contain license'
45 | 


--------------------------------------------------------------------------------
/tests/sanity/test_import.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | def test_import():
17 |     import verl
18 |     print(verl.__version__)
19 | 
20 | 
21 | def test_single_controller_import():
22 |     import verl.single_controller
23 |     print(verl.single_controller.__version__)
24 | 


--------------------------------------------------------------------------------
/tests/sft/run_sft.sh:
--------------------------------------------------------------------------------
 1 | # Tested with 2 & 4 GPUs
 2 | 
 3 | set -x
 4 | 
 5 | torchrun --standalone --nnodes=1 --nproc_per_node=8 \
 6 |      -m verl.trainer.fsdp_sft_trainer \
 7 |     data.train_files=$HOME/data/gsm8k/train.parquet \
 8 |     data.val_files=$HOME/data/gsm8k/test.parquet \
 9 |     data.prompt_key=extra_info \
10 |     data.response_key=extra_info \
11 |     +data.prompt_dict_keys=['question'] \
12 |     +data.response_dict_keys=['answer'] \
13 |     data.micro_batch_size_per_gpu=32 \
14 |     model.partial_pretrain=Qwen/Qwen2.5-0.5B-Instruct \
15 |     trainer.default_local_dir=$HOME/ckpts/ \
16 |     trainer.project_name=qwen2.5-sft \
17 |     trainer.experiment_name=gsm8k-sft-gemma-2b-it \
18 |     trainer.total_training_steps=1 \
19 |     trainer.logger=['console'] \
20 |     trainer.default_hdfs_dir=null $@
21 | 
22 | rm -rf $HOME/ckpts/


--------------------------------------------------------------------------------
/tests/sft/run_sft_qwen05_peft.sh:
--------------------------------------------------------------------------------
 1 | # Tested with 2 & 4 GPUs
 2 | 
 3 | set -x
 4 | 
 5 | if [ "$#" -lt 2 ]; then
 6 |     echo "Usage: run_sft_qwen05_peft.sh <nproc_per_node> <save_path> [other_configs...]"
 7 |     exit 1
 8 | fi
 9 | 
10 | nproc_per_node=$1
11 | save_path=$2
12 | 
13 | # Shift the arguments so $@ refers to the rest
14 | shift 2
15 | 
16 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \
17 |      -m verl.trainer.fsdp_sft_trainer \
18 |     data.train_files=$HOME/data/gsm8k/train.parquet \
19 |     data.val_files=$HOME/data/gsm8k/test.parquet \
20 |     data.prompt_key=extra_info \
21 |     data.response_key=extra_info \
22 |     optim.lr=1e-4 \
23 |     +data.prompt_dict_keys=['question'] \
24 |     +data.response_dict_keys=['answer'] \
25 |     data.micro_batch_size_per_gpu=4 \
26 |     model.partial_pretrain=Qwen/Qwen2.5-0.5B-Instruct \
27 |     trainer.default_local_dir=$save_path \
28 |     trainer.project_name=gsm8k-sft \
29 |     trainer.experiment_name=gsm8k-sft-qwen-2.5-0.5b-instruct \
30 |     trainer.logger=['console'] \
31 |     trainer.total_training_steps=1 \
32 |     trainer.default_hdfs_dir=null $@ \
33 |     model.lora_rank=32\
34 |     model.lora_alpha=16 \
35 |     model.target_modules=all-linear
36 | 
37 |     # Or you can do this:
38 |     # model.target_modules=[q_proj,v_proj] \
39 | 


--------------------------------------------------------------------------------
/tests/sft/run_sft_qwen05_sp2_liger.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | if [ "$#" -lt 2 ]; then
 4 |     echo "Usage: run_sft_qwen05_sp2_liger.sh <nproc_per_node> <save_path> [other_configs...]"
 5 |     exit 1
 6 | fi
 7 | 
 8 | nproc_per_node=$1
 9 | save_path=$2
10 | 
11 | # Shift the arguments so $@ refers to the rest
12 | shift 2
13 | 
14 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \
15 |      -m verl.trainer.fsdp_sft_trainer \
16 |     data.train_files=$HOME/data/gsm8k/train.parquet \
17 |     data.val_files=$HOME/data/gsm8k/test.parquet \
18 |     data.prompt_key=extra_info \
19 |     data.response_key=extra_info \
20 |     optim.lr=1e-4 \
21 |     +data.prompt_dict_keys=['question'] \
22 |     +data.response_dict_keys=['answer'] \
23 |     data.micro_batch_size=4 \
24 |     model.partial_pretrain=Qwen/Qwen2.5-0.5B-Instruct \
25 |     model.use_liger=True \
26 |     trainer.default_local_dir=$save_path \
27 |     trainer.project_name=gsm8k-sft \
28 |     trainer.experiment_name=gsm8k-sft-qwen-2.5-0.5b-instruct-sp2-liger \
29 |     trainer.logger=['console'] \
30 |     trainer.total_training_steps=1 \
31 |     trainer.default_hdfs_dir=null $@ \
32 |     ulysses_sequence_parallel_size=2 \
33 |     use_remove_padding=true


--------------------------------------------------------------------------------
/tests/sft/run_sft_sp_loss_match.sh:
--------------------------------------------------------------------------------
 1 | # Tested with 2 & 4 GPUs
 2 | 
 3 | set -x
 4 | 
 5 | torchrun --standalone --nnodes=1 --nproc_per_node=8 \
 6 |     tests/sft/test_sp_loss_match.py \
 7 |     data.train_files=$HOME/data/gsm8k/train.parquet \
 8 |     data.val_files=$HOME/data/gsm8k/test.parquet \
 9 |     data.prompt_key=extra_info \
10 |     data.response_key=extra_info \
11 |     +data.prompt_dict_keys=['question'] \
12 |     +data.response_dict_keys=['answer'] \
13 |     data.micro_batch_size=32 \
14 |     model.partial_pretrain=Qwen/Qwen2.5-0.5B-Instruct \
15 |     ulysses_sequence_parallel_size=2 \
16 |     use_remove_padding=True \
17 |     trainer.default_local_dir=$HOME/ckpts/ \
18 |     trainer.project_name=qwen2.5-sft \
19 |     trainer.experiment_name=gsm8k-sft-gemma-2b-it \
20 |     trainer.total_training_steps=1 \
21 |     trainer.logger=['console'] \
22 |     trainer.default_hdfs_dir=null $@
23 | 
24 | rm -rf $HOME/ckpts/
25 | 


--------------------------------------------------------------------------------
/tests/verl/utils/dataset/test_rm_dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import os
15 | 
16 | from transformers import AutoTokenizer
17 | from verl.utils import hf_tokenizer
18 | from verl.utils.dataset.rm_dataset import RMDataset
19 | 
20 | 
21 | def get_rm_data():
22 |     # prepare test dataset
23 |     url = "https://github.com/eric-haibin-lin/verl-data/raw/refs/heads/main/full_hh_rlhf/rm/test.parquet"
24 |     local_folder = os.path.expanduser('~/verl-data/full_hh_rlhf/rm/')
25 |     local_path = os.path.join(local_folder, 'test.parquet')
26 |     os.makedirs(local_folder, exist_ok=True)
27 |     return local_path
28 | 
29 | 
30 | def test_rm_dataset():
31 |     tokenizer = hf_tokenizer("facebook/opt-1.3b")
32 |     local_path = get_rm_data()
33 |     dataset = RMDataset(parquet_files=local_path, tokenizer=tokenizer, max_length=512)
34 |     data = dataset[0]['input_ids']
35 |     output = tokenizer.batch_decode(data)
36 |     assert len(output) > 1
37 |     assert type(output[0]) == str
38 | 


--------------------------------------------------------------------------------
/verl/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import os
16 | 
17 | version_folder = os.path.dirname(os.path.join(os.path.abspath(__file__)))
18 | 
19 | with open(os.path.join(version_folder, 'version/version')) as f:
20 |     __version__ = f.read().strip()
21 | 
22 | from .protocol import DataProto
23 | 
24 | from .utils.logging_utils import set_basic_config
25 | import logging
26 | 
27 | set_basic_config(level=logging.WARNING)
28 | 


--------------------------------------------------------------------------------
/verl/models/README.md:
--------------------------------------------------------------------------------
 1 | # Models
 2 | Common modelzoo such as huggingface/transformers stuggles when using Pytorch native model parallelism. Following the design principle of vLLM, we keep a simple, parallelizable, highly-optimized with packed inputs in verl. 
 3 | ## Adding a New Huggingface Model
 4 | ### Step 1: Copy the model file from HF to verl
 5 | - Add a new file under verl/models/hf
 6 | - Copy ONLY the model file from huggingface/transformers/models to verl/models/hf
 7 | 
 8 | ### Step 2: Modify the model file to use packed inputs
 9 | - Remove all the code related to inference (kv cache)
10 | - Modify the inputs to include only
11 |     - input_ids (total_nnz,)
12 |     - cu_seqlens (total_nnz + 1,)
13 |     - max_seqlen_in_batch: int
14 | - Note that this requires using flash attention with causal mask.
15 | 
16 | ### Step 2.5: Add tests
17 | - Add a test to compare this version and the huggingface version
18 | - Following the infrastructure and add tests to tests/models/hf
19 | 
20 | ### Step 3: Add a function to apply tensor parallelism
21 | - Please follow
22 |     - https://pytorch.org/docs/stable/distributed.tensor.parallel.html
23 |     - https://pytorch.org/tutorials/intermediate/TP_tutorial.html
24 | - General comments
25 |     - Tensor Parallelism in native Pytorch is NOT auto-parallelism. The way it works is to specify how model parameters and input/output reshards using configs. These configs are then registered as hooks to perform input/output resharding before/after model forward.
26 | 
27 | ### Step 4: Add a function to apply data parallelism
28 | - Please use FSDP2 APIs
29 | - See demo here https://github.com/pytorch/torchtitan/blob/main/torchtitan/parallelisms/parallelize_llama.py#L413
30 | 
31 | ### Step 5: Add a function to apply pipeline parallelism
32 | - Comes in Pytorch 2.4
33 | - Currently only in alpha in nightly version
34 | - Check torchtitan for more details
35 | 
36 | 


--------------------------------------------------------------------------------
/verl/models/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/models/llama/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/models/llama/megatron/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .modeling_llama_megatron import (
16 |     # original model with megatron
17 |     ParallelLlamaModel,
18 |     ParallelLlamaForCausalLM,
19 |     # rmpad with megatron
20 |     ParallelLlamaForCausalLMRmPad,
21 |     ParallelLlamaForValueRmPad,
22 |     # rmpad with megatron and pipeline parallelism
23 |     ParallelLlamaForCausalLMRmPadPP,
24 |     ParallelLlamaForValueRmPadPP)
25 | 


--------------------------------------------------------------------------------
/verl/models/llama/megatron/checkpoint_utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/models/llama/megatron/layers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .parallel_attention import ParallelLlamaAttention
16 | from .parallel_decoder import ParallelLlamaDecoderLayer, ParallelLlamaDecoderLayerRmPad
17 | from .parallel_mlp import ParallelLlamaMLP
18 | from .parallel_rmsnorm import ParallelLlamaRMSNorm
19 | 


--------------------------------------------------------------------------------
/verl/models/llama/megatron/layers/parallel_rmsnorm.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import numbers
16 | import torch
17 | from megatron.core import ModelParallelConfig
18 | from torch import nn
19 | from transformers import LlamaConfig
20 | 
21 | from apex.normalization.fused_layer_norm import fused_rms_norm_affine
22 | from verl.utils.megatron import sequence_parallel as sp_utils
23 | 
24 | 
25 | class ParallelLlamaRMSNorm(nn.Module):
26 | 
27 |     def __init__(self, config: LlamaConfig, megatron_config: ModelParallelConfig):
28 |         """
29 |         LlamaRMSNorm is equivalent to T5LayerNorm
30 |         """
31 |         super().__init__()
32 |         if isinstance(config.hidden_size, numbers.Integral):
33 |             normalized_shape = (config.hidden_size,)
34 |         self.normalized_shape = torch.Size(normalized_shape)
35 |         self.weight = nn.Parameter(torch.ones(self.normalized_shape))
36 |         self.variance_epsilon = config.rms_norm_eps
37 | 
38 |         if megatron_config.sequence_parallel:
39 |             sp_utils.mark_parameter_as_sequence_parallel(self.weight)
40 | 
41 |     def forward(self, hidden_states):
42 |         return fused_rms_norm_affine(input=hidden_states,
43 |                                      weight=self.weight,
44 |                                      normalized_shape=self.normalized_shape,
45 |                                      eps=self.variance_epsilon,
46 |                                      memory_efficient=True)


--------------------------------------------------------------------------------
/verl/models/qwen2/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/models/qwen2/megatron/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .modeling_qwen2_megatron import (
16 |     # original model with megatron
17 |     ParallelQwen2Model,
18 |     ParallelQwen2ForCausalLM,
19 |     # rmpad with megatron
20 |     ParallelQwen2ForCausalLMRmPad,
21 |     ParallelQwen2ForValueRmPad,
22 |     # rmpad with megatron and pipeline parallelism
23 |     ParallelQwen2ForCausalLMRmPadPP,
24 |     ParallelQwen2ForValueRmPadPP)
25 | 


--------------------------------------------------------------------------------
/verl/models/qwen2/megatron/checkpoint_utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/models/qwen2/megatron/layers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .parallel_attention import ParallelQwen2Attention
16 | from .parallel_decoder import ParallelQwen2DecoderLayer, ParallelQwen2DecoderLayerRmPad
17 | from .parallel_mlp import ParallelQwen2MLP
18 | from .parallel_rmsnorm import ParallelQwen2RMSNorm
19 | 


--------------------------------------------------------------------------------
/verl/models/qwen2/megatron/layers/parallel_rmsnorm.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import numbers
16 | import torch
17 | from megatron.core import ModelParallelConfig
18 | from torch import nn
19 | from transformers import Qwen2Config
20 | 
21 | from apex.normalization.fused_layer_norm import fused_rms_norm_affine
22 | from verl.utils.megatron import sequence_parallel as sp_utils
23 | 
24 | 
25 | class ParallelQwen2RMSNorm(nn.Module):
26 | 
27 |     def __init__(self, config: Qwen2Config, megatron_config: ModelParallelConfig):
28 |         """
29 |         Qwen2RMSNorm is equivalent to T5LayerNorm
30 |         """
31 |         super().__init__()
32 |         if isinstance(config.hidden_size, numbers.Integral):
33 |             normalized_shape = (config.hidden_size,)
34 |         self.normalized_shape = torch.Size(normalized_shape)
35 |         self.weight = nn.Parameter(torch.ones(self.normalized_shape))
36 |         self.variance_epsilon = config.rms_norm_eps
37 | 
38 |         if megatron_config.sequence_parallel:
39 |             sp_utils.mark_parameter_as_sequence_parallel(self.weight)
40 | 
41 |     def forward(self, hidden_states):
42 |         return fused_rms_norm_affine(input=hidden_states,
43 |                                      weight=self.weight,
44 |                                      normalized_shape=self.normalized_shape,
45 |                                      eps=self.variance_epsilon,
46 |                                      memory_efficient=True)


--------------------------------------------------------------------------------
/verl/models/transformers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/models/weight_loader_registry.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | def get_weight_loader(arch: str):
17 |     from verl.models.llama.megatron.checkpoint_utils.llama_loader import load_state_dict_to_megatron_llama
18 |     from verl.models.qwen2.megatron.checkpoint_utils.qwen2_loader import load_state_dict_to_megatron_qwen2
19 |     _MODEL_WEIGHT_MEGATRON_LOADER_REGISTRY = {
20 |         'LlamaForCausalLM': load_state_dict_to_megatron_llama,
21 |         'Qwen2ForCausalLM': load_state_dict_to_megatron_qwen2,
22 |     }
23 | 
24 |     if arch in _MODEL_WEIGHT_MEGATRON_LOADER_REGISTRY:
25 |         return _MODEL_WEIGHT_MEGATRON_LOADER_REGISTRY[arch]
26 |     raise ValueError(f"Model architectures {arch} are not supported for now. "
27 |                      f"Supported architectures: {_MODEL_WEIGHT_MEGATRON_LOADER_REGISTRY.keys()}")
28 | 


--------------------------------------------------------------------------------
/verl/single_controller/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import os
16 | 
17 | version_folder = os.path.dirname(os.path.join(os.path.abspath(__file__)))
18 | 
19 | with open(os.path.join(os.path.join(version_folder, os.pardir), 'version/version')) as f:
20 |     __version__ = f.read().strip()
21 | 


--------------------------------------------------------------------------------
/verl/single_controller/base/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .worker import Worker
16 | from .worker_group import WorkerGroup, ClassWithInitArgs, ResourcePool
17 | 


--------------------------------------------------------------------------------
/verl/single_controller/base/megatron/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/single_controller/base/megatron/worker.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import os
16 | from dataclasses import dataclass
17 | from verl.single_controller.base.worker import Worker, DistRankInfo, DistGlobalInfo
18 | 
19 | 
20 | class MegatronWorker(Worker):
21 | 
22 |     def __init__(self, cuda_visible_devices=None) -> None:
23 |         super().__init__(cuda_visible_devices)
24 | 
25 |     def get_megatron_global_info(self):
26 |         from megatron.core import parallel_state as mpu
27 |         tp_size = mpu.get_tensor_model_parallel_world_size()
28 |         dp_size = mpu.get_data_parallel_world_size()
29 |         pp_size = mpu.get_pipeline_model_parallel_world_size()
30 |         info = DistGlobalInfo(tp_size=tp_size, dp_size=dp_size, pp_size=pp_size)
31 |         return info
32 | 
33 |     def get_megatron_rank_info(self):
34 |         from megatron.core import parallel_state as mpu
35 |         tp_rank = mpu.get_tensor_model_parallel_rank()
36 |         dp_rank = mpu.get_data_parallel_rank()
37 |         pp_rank = mpu.get_pipeline_model_parallel_rank()
38 |         info = DistRankInfo(tp_rank=tp_rank, dp_rank=dp_rank, pp_rank=pp_rank)
39 |         return info


--------------------------------------------------------------------------------
/verl/single_controller/base/register_center/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/single_controller/base/register_center/ray.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import ray
16 | 
17 | 
18 | @ray.remote
19 | class WorkerGroupRegisterCenter:
20 | 
21 |     def __init__(self, rank_zero_info):
22 |         self.rank_zero_info = rank_zero_info
23 | 
24 |     def get_rank_zero_info(self):
25 |         return self.rank_zero_info
26 | 
27 | 
28 | def create_worker_group_register_center(name, info):
29 |     return WorkerGroupRegisterCenter.options(name=name).remote(info)
30 | 


--------------------------------------------------------------------------------
/verl/single_controller/ray/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .base import RayResourcePool, RayClassWithInitArgs, RayWorkerGroup, create_colocated_worker_cls
16 | from .megatron import (MegatronRayWorkerGroup, DistRankInfo, DistGlobalInfo)


--------------------------------------------------------------------------------
/verl/third_party/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_spmd/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_3_1/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_4_2/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_5_4/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_6_3/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_6_3/hf_weight_loader.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | # Copyright 2023 The vLLM team.
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # Adapted from https://github.com/vllm-project/vllm/tree/main/vllm/model_executor/model_loader
15 | 
16 | from typing import Dict
17 | 
18 | import torch.nn as nn
19 | from vllm.model_executor.model_loader.utils import set_default_torch_dtype
20 | 
21 | 
22 | def update_hf_weight_loader():
23 |     print("no hf weight loader need to be updated")
24 |     return
25 | 
26 | 
27 | def load_hf_weights(actor_weights: Dict, vllm_model: nn.Module):
28 |     assert isinstance(actor_weights, Dict)
29 |     with set_default_torch_dtype(next(vllm_model.parameters()).dtype):  # TODO
30 |         if vllm_model.config.tie_word_embeddings and "lm_head.weight" in actor_weights.keys():
31 |             del actor_weights["lm_head.weight"]
32 |         vllm_model.load_weights(actor_weights.items())
33 |     for _, module in vllm_model.named_modules():
34 |         quant_method = getattr(module, "quant_method", None)
35 |         if quant_method is not None:
36 |             quant_method.process_weights_after_loading(module)
37 |         # FIXME: Remove this after Mixtral is updated
38 |         # to use quant_method.
39 |         if hasattr(module, "process_weights_after_loading"):
40 |             module.process_weights_after_loading()
41 |     vllm_model = vllm_model.cuda()
42 | 


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_6_3/tokenizer.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | # Copyright 2023 The vLLM team.
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # Adapted from https://github.com/vllm-project/vllm/blob/main/vllm/transformers_utils/tokenizer_group/tokenizer_group.py
15 | 
16 | from typing import Optional
17 | 
18 | from transformers import PreTrainedTokenizer
19 | from vllm.transformers_utils.tokenizer_group import TokenizerGroup
20 | from vllm.utils import LRUCache
21 | 
22 | 
23 | class TokenizerGroup(TokenizerGroup):
24 |     """A group of tokenizers that can be used for LoRA adapters."""
25 | 
26 |     def __init__(self, tokenizer: PreTrainedTokenizer, enable_lora: bool, max_num_seqs: int,
27 |                  max_input_length: Optional[int]):
28 |         self.enable_lora = enable_lora
29 |         self.max_input_length = max_input_length
30 |         self.tokenizer = tokenizer
31 |         self.lora_tokenizers = LRUCache[PreTrainedTokenizer](capacity=max_num_seqs) if enable_lora else None
32 | 
33 |     # FIXME(sgm): for simplicity, we assign the special token here
34 |     @property
35 |     def pad_token_id(self):
36 |         return self.tokenizer.pad_token_id
37 | 
38 |     @property
39 |     def eos_token_id(self):
40 |         return self.tokenizer.eos_token_id
41 | 


--------------------------------------------------------------------------------
/verl/trainer/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/trainer/config/evaluation.yaml:
--------------------------------------------------------------------------------
1 | data:
2 |   path: /tmp/math_Qwen2-7B-Instruct.parquet
3 |   prompt_key: prompt
4 |   response_key: responses
5 |   data_source_key: data_source
6 |   reward_model_key: reward_model


--------------------------------------------------------------------------------
/verl/trainer/config/generation.yaml:
--------------------------------------------------------------------------------
 1 | trainer:
 2 |   nnodes: 1
 3 |   n_gpus_per_node: 8
 4 | 
 5 | data:
 6 |   path: ~/data/rlhf/math/test.parquet
 7 |   prompt_key: prompt
 8 |   n_samples: 5
 9 |   output_path: /opt/tiger/math_Qwen2-7B-Instruct.parquet
10 |   batch_size: 128
11 | 
12 | model:
13 |   path: ~/models/Qwen2-7B-Instruct
14 |   external_lib: null
15 | rollout:
16 |   name: vllm
17 |   temperature: 1.0
18 |   top_k: 50 # 0 for hf rollout, -1 for vllm rollout
19 |   top_p: 0.7
20 |   prompt_length: 1536
21 |   response_length: 512
22 |   # for vllm rollout
23 |   dtype: bfloat16 # should align with FSDP
24 |   gpu_memory_utilization: 0.5
25 |   ignore_eos: False
26 |   enforce_eager: True
27 |   free_cache_engine: True
28 |   load_format: dummy_dtensor
29 |   tensor_model_parallel_size: 1
30 |   max_num_batched_tokens: 8192
31 |   max_num_seqs: 1024
32 |   log_prob_micro_batch_size: null # will be deprecated, use log_prob_micro_batch_size_per_gpu
33 |   log_prob_micro_batch_size_per_gpu: 8
34 |   # for hf rollout
35 |   do_sample: True


--------------------------------------------------------------------------------
/verl/trainer/config/sft_trainer.yaml:
--------------------------------------------------------------------------------
 1 | data:
 2 |   train_batch_size: 256
 3 |   micro_batch_size: null # will be deprecated, use micro_batch_size_per_gpu
 4 |   micro_batch_size_per_gpu: 4  # this is also val batch size
 5 |   train_files: ~/data/gsm8k/train.parquet
 6 |   val_files: ~/data/gsm8k/test.parquet
 7 |   prompt_key: question
 8 |   response_key: answer
 9 |   max_length: 1024
10 |   truncation: error
11 |   balance_dp_token: False
12 |   chat_template: null
13 | model:
14 |   partial_pretrain: ~/models/gemma-1.1-7b-it
15 |   fsdp_config:
16 |     wrap_policy:
17 |       min_num_params: 0
18 |     cpu_offload: False
19 |     offload_params: False
20 |   external_lib: null
21 |   enable_gradient_checkpointing: False
22 |   trust_remote_code: False
23 |   lora_rank: 0  # Set to positive value to enable LoRA (e.g., 32)
24 |   lora_alpha: 16  # LoRA scaling factor
25 |   target_modules: all-linear  # Target modules for LoRA adaptation
26 |   use_liger: False
27 | optim:
28 |   lr: 1e-5
29 |   betas: [0.9, 0.95]
30 |   weight_decay: 0.01
31 |   warmup_steps_ratio: 0.1
32 |   clip_grad: 1.0
33 | ulysses_sequence_parallel_size: 1
34 | use_remove_padding: False
35 | trainer:
36 |   default_local_dir: /tmp/sft_model
37 |   default_hdfs_dir: hdfs://tmp/experiments/gsm8k/gemma-1.1-7b-it/ # change the hdfs path here
38 |   resume_path: null
39 |   project_name: gsm8k-sft
40 |   experiment_name: test
41 |   total_epochs: 4
42 |   total_training_steps: null
43 |   logger: ['console']
44 |   seed: 1
45 | 
46 | 


--------------------------------------------------------------------------------
/verl/trainer/ppo/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/trainer/runtime_env.yaml:
--------------------------------------------------------------------------------
1 | working_dir: ./
2 | excludes: ["/.git/"]
3 | env_vars:
4 |   TORCH_NCCL_AVOID_RECORD_STREAMS: "1"
5 |   VLLM_ATTENTION_BACKEND: "XFORMERS"


--------------------------------------------------------------------------------
/verl/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from . import tokenizer
16 | from .tokenizer import *
17 | 
18 | __all__ = tokenizer.__all__


--------------------------------------------------------------------------------
/verl/utils/checkpoint/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.


--------------------------------------------------------------------------------
/verl/utils/config.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from typing import Dict
16 | 
17 | from omegaconf import DictConfig
18 | 
19 | 
20 | def update_dict_with_config(dictionary: Dict, config: DictConfig):
21 |     for key in dictionary:
22 |         if hasattr(config, key):
23 |             dictionary[key] = getattr(config, key)
24 | 


--------------------------------------------------------------------------------
/verl/utils/dataset/README.md:
--------------------------------------------------------------------------------
 1 | # Dataset Format
 2 | ## RLHF dataset
 3 | We combine all the data sources into a single parquet files. We directly organize the prompt into the chat format so that multi-turn chats can be easily incorporated. In the prompt, we may add instruction following texts to guide the model output the answers in a particular format so that we can extract the answers.
 4 | 
 5 | Math problems
 6 | ```json
 7 | {
 8 |     "data_source": "openai/gsm8k",
 9 |     "prompt": [{"role": "user", "content": "Natalia sold clips to 48 of her friends in April, and then she sold half as many clips in May. How many clips did Natalia sell altogether in April and May? Let's think step by step and output the final answer after \"####\""}],
10 |     "ability": "math",
11 |     "reward_model": {
12 |         "style": "rule",
13 |         "ground_truth": ["72"]
14 |     },
15 | }
16 | ```
17 | 


--------------------------------------------------------------------------------
/verl/utils/dataset/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .rl_dataset import RLHFDataset
16 | from .rm_dataset import RMDataset
17 | from .sft_dataset import SFTDataset
18 | 


--------------------------------------------------------------------------------
/verl/utils/debug/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .performance import log_gpu_memory_usage


--------------------------------------------------------------------------------
/verl/utils/debug/performance.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import torch
16 | import torch.distributed as dist
17 | import logging
18 | 
19 | 
20 | def log_gpu_memory_usage(head: str, logger: logging.Logger = None, level=logging.DEBUG, rank: int = 0):
21 |     if (not dist.is_initialized()) or (rank is None) or (dist.get_rank() == rank):
22 |         memory_allocated = torch.cuda.memory_allocated() / 1024**3
23 |         memory_reserved = torch.cuda.memory_reserved() / 1024**3
24 | 
25 |         message = f'{head}, memory allocated (GB): {memory_allocated}, memory reserved (GB): {memory_reserved}'
26 | 
27 |         if logger is None:
28 |             print(message)
29 |         else:
30 |             logger.log(msg=message, level=level)
31 | 


--------------------------------------------------------------------------------
/verl/utils/distributed.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Utilities for distributed training."""
15 | import os
16 | 
17 | 
18 | def initialize_global_process_group(timeout_second=36000):
19 |     import torch.distributed
20 |     from datetime import timedelta
21 |     torch.distributed.init_process_group('nccl', timeout=timedelta(seconds=timeout_second))
22 |     local_rank = int(os.environ["LOCAL_RANK"])
23 |     rank = int(os.environ["RANK"])
24 |     world_size = int(os.environ["WORLD_SIZE"])
25 | 
26 |     if torch.distributed.is_initialized():
27 |         torch.cuda.set_device(local_rank)
28 |     return local_rank, rank, world_size
29 | 


--------------------------------------------------------------------------------
/verl/utils/import_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | Utilities to check if packages are available.
16 | We assume package availability won't change during runtime.
17 | """
18 | 
19 | from functools import cache
20 | from typing import List
21 | 
22 | 
23 | @cache
24 | def is_megatron_core_available():
25 |     try:
26 |         from megatron.core import parallel_state as mpu
27 |         return True
28 |     except ImportError:
29 |         return False
30 | 
31 | 
32 | @cache
33 | def is_vllm_available():
34 |     try:
35 |         import vllm
36 |         return True
37 |     except ImportError:
38 |         return False
39 | 
40 | 
41 | def import_external_libs(external_libs=None):
42 |     if external_libs is None:
43 |         return
44 |     if not isinstance(external_libs, List):
45 |         external_libs = [external_libs]
46 |     import importlib
47 |     for external_lib in external_libs:
48 |         importlib.import_module(external_lib)
49 | 


--------------------------------------------------------------------------------
/verl/utils/logger/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/utils/logger/aggregate_logger.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | A Ray logger will receive logging info from different processes.
16 | """
17 | import numbers
18 | from typing import Dict
19 | 
20 | 
21 | def concat_dict_to_str(dict: Dict, step):
22 |     output = [f'step:{step}']
23 |     for k, v in dict.items():
24 |         if isinstance(v, numbers.Number):
25 |             output.append(f'{k}:{v:.3f}')
26 |     output_str = ' - '.join(output)
27 |     return output_str
28 | 
29 | 
30 | class LocalLogger:
31 | 
32 |     def __init__(self, remote_logger=None, enable_wandb=False, print_to_console=False):
33 |         self.print_to_console = print_to_console
34 |         if print_to_console:
35 |             print('Using LocalLogger is deprecated. The constructor API will change ')
36 | 
37 |     def flush(self):
38 |         pass
39 | 
40 |     def log(self, data, step):
41 |         if self.print_to_console:
42 |             print(concat_dict_to_str(data, step=step), flush=True)


--------------------------------------------------------------------------------
/verl/utils/logging_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import logging
16 | 
17 | 
18 | def set_basic_config(level):
19 |     """
20 |     This function sets the global logging format and level. It will be called when import verl
21 |     """
22 |     logging.basicConfig(format='%(levelname)s:%(asctime)s:%(message)s', level=level)
23 | 


--------------------------------------------------------------------------------
/verl/utils/megatron/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/utils/megatron/memory.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import torch
16 | 
17 | 
18 | class MemoryBuffer:
19 | 
20 |     def __init__(self, numel, numel_padded, dtype):
21 |         self.numel = numel
22 |         self.numel_padded = numel_padded
23 |         self.dtype = dtype
24 |         self.data = torch.zeros(self.numel_padded,
25 |                                 dtype=self.dtype,
26 |                                 device=torch.cuda.current_device(),
27 |                                 requires_grad=False)
28 | 
29 |     def zero(self):
30 |         """Reset the buffer to zero."""
31 |         self.data.zero_()
32 | 
33 |     def get(self, shape, start_index):
34 |         """Return a tensor with the input `shape` as a view into the
35 |         1-D data starting at `start_index`."""
36 |         end_index = start_index + shape.numel()
37 |         assert end_index <= self.numel, \
38 |             'requested tensor is out of the buffer range.'
39 |         buffer_tensor = self.data[start_index:end_index]
40 |         buffer_tensor = buffer_tensor.view(shape)
41 |         return buffer_tensor
42 | 


--------------------------------------------------------------------------------
/verl/utils/megatron/sequence_parallel.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | # Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | import torch
17 | import torch.nn.functional as F
18 | from megatron.core import parallel_state as mpu
19 | 
20 | 
21 | def mark_parameter_as_sequence_parallel(parameter):
22 |     setattr(parameter, 'sequence_parallel', True)
23 | 
24 | 
25 | def is_sequence_parallel_param(param):
26 |     return hasattr(param, 'sequence_parallel') and param.sequence_parallel
27 | 
28 | 
29 | def pad_to_sequence_parallel(unpad_tokens: torch.Tensor):
30 |     """pad the tokens such that the total length is a multiple of sp world size
31 | 
32 |     Args:
33 |         unpad_tokens: (total_nnz, ...). Tokens after removing padding
34 | 
35 |     Returns:
36 | 
37 |     """
38 |     total_nnz = unpad_tokens.shape[0]
39 |     sp_world_size = mpu.get_tensor_model_parallel_world_size()
40 | 
41 |     if total_nnz % sp_world_size == 0:
42 |         pad_size = 0
43 |     else:
44 |         pad_size = sp_world_size - total_nnz % sp_world_size
45 | 
46 |     if pad_size > 0:
47 |         if unpad_tokens.ndim == 1:
48 |             unpad_tokens = F.pad(unpad_tokens, (0, pad_size))
49 |         elif unpad_tokens.ndim == 2:
50 |             unpad_tokens = F.pad(unpad_tokens, (0, 0, 0, pad_size))
51 |         else:
52 |             raise NotImplementedError(f'Padding dim {unpad_tokens.ndim()} is not supported')
53 | 
54 |     return unpad_tokens
55 | 


--------------------------------------------------------------------------------
/verl/utils/py_functional.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | Contain small python utility functions
16 | """
17 | 
18 | from typing import Dict
19 | from types import SimpleNamespace
20 | 
21 | 
22 | def union_two_dict(dict1: Dict, dict2: Dict):
23 |     """Union two dict. Will throw an error if there is an item not the same object with the same key.
24 | 
25 |     Args:
26 |         dict1:
27 |         dict2:
28 | 
29 |     Returns:
30 | 
31 |     """
32 |     for key, val in dict2.items():
33 |         if key in dict1:
34 |             assert dict2[key] == dict1[key], \
35 |                 f'{key} in meta_dict1 and meta_dict2 are not the same object'
36 |         dict1[key] = val
37 | 
38 |     return dict1
39 | 
40 | 
41 | def append_to_dict(data: Dict, new_data: Dict):
42 |     for key, val in new_data.items():
43 |         if key not in data:
44 |             data[key] = []
45 |         data[key].append(val)
46 | 
47 | 
48 | class NestedNamespace(SimpleNamespace):
49 | 
50 |     def __init__(self, dictionary, **kwargs):
51 |         super().__init__(**kwargs)
52 |         for key, value in dictionary.items():
53 |             if isinstance(value, dict):
54 |                 self.__setattr__(key, NestedNamespace(value))
55 |             else:
56 |                 self.__setattr__(key, value)
57 | 


--------------------------------------------------------------------------------
/verl/utils/ray_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | Contains commonly used utilities for ray
16 | """
17 | 
18 | import ray
19 | 
20 | import concurrent.futures
21 | 
22 | 
23 | def parallel_put(data_list, max_workers=None):
24 | 
25 |     def put_data(index, data):
26 |         return index, ray.put(data)
27 | 
28 |     if max_workers is None:
29 |         max_workers = min(len(data_list), 16)
30 | 
31 |     with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
32 |         data_list_f = [executor.submit(put_data, i, data) for i, data in enumerate(data_list)]
33 |         res_lst = []
34 |         for future in concurrent.futures.as_completed(data_list_f):
35 |             res_lst.append(future.result())
36 | 
37 |         # reorder based on index
38 |         output = [None for _ in range(len(data_list))]
39 |         for res in res_lst:
40 |             index, data_ref = res
41 |             output[index] = data_ref
42 | 
43 |     return output
44 | 


--------------------------------------------------------------------------------
/verl/utils/rendezvous/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/version/version:
--------------------------------------------------------------------------------
1 | 0.2
2 | 


--------------------------------------------------------------------------------
/verl/workers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/workers/actor/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .base import BasePPOActor
16 | from .dp_actor import DataParallelPPOActor
17 | 
18 | __all__ = ["BasePPOActor", "DataParallelPPOActor"]
19 | 


--------------------------------------------------------------------------------
/verl/workers/critic/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .base import BasePPOCritic
16 | from .dp_critic import DataParallelPPOCritic
17 | 
18 | __all__ = ["BasePPOCritic", "DataParallelPPOCritic"]
19 | 


--------------------------------------------------------------------------------
/verl/workers/critic/base.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | Base class for a critic
16 | """
17 | from abc import ABC, abstractmethod
18 | 
19 | import torch
20 | 
21 | from verl import DataProto
22 | 
23 | __all__ = ['BasePPOCritic']
24 | 
25 | 
26 | class BasePPOCritic(ABC):
27 | 
28 |     def __init__(self, config):
29 |         super().__init__()
30 |         self.config = config
31 | 
32 |     @abstractmethod
33 |     def compute_values(self, data: DataProto) -> torch.Tensor:
34 |         """Compute values"""
35 |         pass
36 | 
37 |     @abstractmethod
38 |     def update_critic(self, data: DataProto):
39 |         """Update the critic"""
40 |         pass
41 | 


--------------------------------------------------------------------------------
/verl/workers/reward_manager/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 PRIME team and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .naive import NaiveRewardManager
16 | from .prime import PrimeRewardManager


--------------------------------------------------------------------------------
/verl/workers/reward_model/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .base import BasePPORewardModel
16 | 


--------------------------------------------------------------------------------
/verl/workers/reward_model/base.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | The base class for reward model
16 | """
17 | 
18 | from abc import ABC, abstractmethod
19 | 
20 | from verl import DataProto
21 | 
22 | 
23 | class BasePPORewardModel(ABC):
24 | 
25 |     def __init__(self, config):
26 |         self.config = config
27 | 
28 |     @abstractmethod
29 |     def compute_reward(self, data: DataProto) -> DataProto:
30 |         """Computing reward given input_ids. The transformers should output a tensor with shape
31 |            [batch_size, sequence_length], and the value at [EOS] mask should be gathered.
32 | 
33 |         Args:
34 |             data: must contain keys "input_ids", "attention_mask" and "position_ids".
35 |                 - input_ids: [batch_size, sequence_length]
36 |                 - attention_mask: [batch_size, sequence_length]
37 |                 - position_ids: [batch_size, sequence_length]
38 | 
39 |         Returns: a data pass protocol containing "reward". Only the [EOS] position contains the reward.
40 |             Other position should have zero reward. Note that this may change in the future if we use
41 |             dense reward. So, we leave the interface for general case.
42 |             - reward: [batch_size, sequence_length].
43 | 
44 |         """
45 |         pass
46 | 


--------------------------------------------------------------------------------
/verl/workers/reward_model/megatron/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .reward_model import MegatronRewardModel
16 | 


--------------------------------------------------------------------------------
/verl/workers/rollout/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .base import BaseRollout
16 | from .naive import NaiveRollout
17 | from .hf_rollout import HFRollout
18 | 
19 | __all__ = ["BaseRollout", "NaiveRollout", "HFRollout"]
20 | 


--------------------------------------------------------------------------------
/verl/workers/rollout/base.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from abc import ABC, abstractmethod
16 | from typing import Iterable, Union
17 | 
18 | from verl import DataProto
19 | 
20 | __all__ = ['BaseRollout']
21 | 
22 | 
23 | class BaseRollout(ABC):
24 | 
25 |     def __init__(self):
26 |         """
27 | 
28 |         Args:
29 |             dataloader: an Iterable of TensorDict that consistently generates prompts. Note that the dataloader
30 |             should handle when the training stops.
31 |         """
32 |         super().__init__()
33 | 
34 |     @abstractmethod
35 |     def generate_sequences(self, prompts: DataProto) -> DataProto:
36 |         """Generate sequences"""
37 |         pass
38 | 


--------------------------------------------------------------------------------
/verl/workers/rollout/naive/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .naive_rollout import NaiveRollout
16 | 


--------------------------------------------------------------------------------
/verl/workers/rollout/vllm_rollout/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from importlib.metadata import version, PackageNotFoundError
16 | 
17 | 
18 | def get_version(pkg):
19 |     try:
20 |         return version(pkg)
21 |     except PackageNotFoundError:
22 |         return None
23 | 
24 | 
25 | package_name = 'vllm'
26 | package_version = get_version(package_name)
27 | 
28 | if package_version <= '0.6.3':
29 |     vllm_mode = 'customized'
30 |     from .vllm_rollout import vLLMRollout
31 | else:
32 |     vllm_mode = 'spmd'
33 |     from .vllm_rollout_spmd import vLLMRollout
34 | 


--------------------------------------------------------------------------------
/verl/workers/sharding_manager/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from verl.utils.import_utils import is_vllm_available, is_megatron_core_available
16 | 
17 | from .base import BaseShardingManager
18 | from .fsdp_ulysses import FSDPUlyssesShardingManager
19 | 
20 | AllGatherPPModel = None
21 | 
22 | if is_megatron_core_available() and is_vllm_available():
23 |     from .megatron_vllm import AllGatherPPModel, MegatronVLLMShardingManager
24 | elif AllGatherPPModel is not None:
25 |     pass
26 | else:
27 |     AllGatherPPModel = None
28 |     MegatronVLLMShardingManager = None
29 | 
30 | if is_vllm_available():
31 |     from .fsdp_vllm import FSDPVLLMShardingManager
32 | else:
33 |     FSDPVLLMShardingManager = None
34 | 


--------------------------------------------------------------------------------
/verl/workers/sharding_manager/base.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | Sharding manager to implement HybridEngine
16 | """
17 | 
18 | from verl import DataProto
19 | 
20 | 
21 | class BaseShardingManager:
22 | 
23 |     def __enter__(self):
24 |         pass
25 | 
26 |     def __exit__(self, exc_type, exc_value, traceback):
27 |         pass
28 | 
29 |     def preprocess_data(self, data: DataProto) -> DataProto:
30 |         return data
31 | 
32 |     def postprocess_data(self, data: DataProto) -> DataProto:
33 |         return data
34 | 


--------------------------------------------------------------------------------