├── .github ├── dependabot.yml └── workflows │ ├── dataset.yml │ ├── e2e_digit_completion.yml │ ├── e2e_gsm8k.yml │ ├── e2e_gsm8k_megatron.yml │ ├── e2e_lora.yml │ ├── e2e_sft.yml │ ├── model.yml │ ├── ray_test.yml │ ├── sandbox.yml │ ├── sanity.yml │ ├── scorecard.yml │ ├── vllm.yml │ └── yapf_format.yml ├── .gitignore ├── .readthedocs.yaml ├── .style.yapf ├── LICENSE ├── Notice.txt ├── Qwen2.5-Eval ├── README.md └── evaluation │ ├── LICENSE │ ├── data │ ├── aime24 │ │ └── test.jsonl │ ├── aime24x8 │ │ └── test.jsonl │ ├── aime25 │ │ └── test.jsonl │ ├── aime25x8 │ │ └── test.jsonl │ ├── amc23 │ │ └── test.jsonl │ ├── amc23x8 │ │ └── test.jsonl │ ├── aqua │ │ └── test.jsonl │ ├── asdiv │ │ └── test.jsonl │ ├── carp_en │ │ ├── demo.json │ │ └── test.jsonl │ ├── cmath │ │ └── test.jsonl │ ├── cn_middle_school │ │ └── test.jsonl │ ├── college_math │ │ └── test.jsonl │ ├── eval_rm_maj_example │ │ └── math_cot_100.jsonl │ ├── gaokao2023en │ │ └── test.jsonl │ ├── gaokao2024_I │ │ └── test.jsonl │ ├── gaokao2024_II │ │ └── test.jsonl │ ├── gaokao2024_mix │ │ └── test.jsonl │ ├── gaokao_math_cloze │ │ └── test.jsonl │ ├── gaokao_math_qa │ │ └── test.jsonl │ ├── gsm8k │ │ ├── test.jsonl │ │ └── train.jsonl │ ├── math │ │ ├── test.jsonl │ │ └── train.jsonl │ ├── math500 │ │ └── test.jsonl │ ├── mawps │ │ ├── addsub.jsonl │ │ ├── multiarith.jsonl │ │ ├── singleeq.jsonl │ │ ├── singleop.jsonl │ │ └── test.jsonl │ ├── minerva_math │ │ ├── README.md │ │ └── test.jsonl │ ├── mmlu_stem │ │ └── test.jsonl │ ├── olympiadbench │ │ ├── test.json │ │ └── test.jsonl │ ├── phy1 │ │ └── test.jsonl │ ├── sat_math │ │ └── test.jsonl │ ├── svamp │ │ └── test.jsonl │ └── tabmwp │ │ └── test.jsonl │ ├── data_loader.py │ ├── evaluate.py │ ├── examples.py │ ├── grader.py │ ├── latex2sympy │ ├── .coveragerc │ ├── .gitignore │ ├── LICENSE.txt │ ├── PS.g4 │ ├── README.md │ ├── __init__.py │ ├── antlr-4.11.1-complete.jar │ ├── asciimath_printer.py │ ├── description.txt │ ├── dev-requirements.in │ ├── dev-requirements.txt │ ├── gen │ │ ├── PS.interp │ │ ├── PS.tokens │ │ ├── PSLexer.interp │ │ ├── PSLexer.py │ │ ├── PSLexer.tokens │ │ ├── PSListener.py │ │ ├── PSParser.py │ │ └── __init__.py │ ├── icon.png │ ├── latex2sympy2.py │ ├── requirements.in │ ├── requirements.txt │ ├── sandbox │ │ ├── linalg_equations.py │ │ ├── linalg_span.py │ │ ├── matrix.py │ │ ├── matrix_placeholders.py │ │ ├── sandbox.py │ │ ├── sandbox_equality.py │ │ ├── sectan.py │ │ └── vector.py │ ├── scripts │ │ ├── compile.sh │ │ ├── coverage-ci.sh │ │ ├── coverage.sh │ │ ├── pre-commit │ │ ├── pre-push │ │ ├── publish.sh │ │ ├── setup-hooks.sh │ │ ├── setup.sh │ │ └── test.sh │ ├── setup.cfg │ ├── setup.py │ └── tests │ │ ├── __init__.py │ │ ├── abs_test.py │ │ ├── all_bad_test.py │ │ ├── all_good_test.py │ │ ├── atom_expr_test.py │ │ ├── binomial_test.py │ │ ├── ceil_test.py │ │ ├── complex_test.py │ │ ├── context.py │ │ ├── exp_test.py │ │ ├── floor_test.py │ │ ├── gcd_test.py │ │ ├── greek_test.py │ │ ├── grouping_test.py │ │ ├── lcm_test.py │ │ ├── left_right_cdot_test.py │ │ ├── linalg_test.py │ │ ├── max_test.py │ │ ├── min_test.py │ │ ├── mod_test.py │ │ ├── overline_test.py │ │ ├── pi_test.py │ │ ├── trig_test.py │ │ └── variable_test.py │ ├── math_eval.py │ ├── math_utils.py │ ├── model_utils.py │ ├── parser.py │ ├── python_executor.py │ ├── requirements.txt │ ├── rm_maj_eval.py │ ├── sh │ ├── eval_all_math.sh │ └── eval_one_experiment_all_ckpts.sh │ ├── trajectory.py │ └── utils.py ├── README.md ├── data ├── acc_step_500.json ├── data_selection.py ├── data_selection.sh ├── deepscaler_dataset.py ├── test │ └── math500.parquet └── train │ └── one_shot_rlvr │ ├── dsr_sub.parquet │ ├── merge_pi1_pi13_r128.parquet │ ├── merge_pi1_pi2_pi13_pi1209_r128.parquet │ ├── pi1209_r128.parquet │ ├── pi13_r128.parquet │ ├── pi1_r128.parquet │ └── pi2_r128.parquet ├── docker ├── Dockerfile.ngc.vllm └── Dockerfile.vemlp.vllm.te ├── docs ├── Makefile ├── README.md ├── README_vllm0.7.md ├── _static │ └── logo.png ├── advance │ ├── dpo_extension.rst │ ├── fsdp_extension.rst │ ├── megatron_extension.rst │ └── placement.rst ├── conf.py ├── examples │ ├── config.rst │ ├── gsm8k_example.rst │ └── ppo_code_architecture.rst ├── experiment │ └── ppo.rst ├── faq │ └── faq.rst ├── hybrid_flow.rst ├── index.rst ├── perf │ └── perf_tuning.rst ├── preparation │ ├── prepare_data.rst │ └── reward_function.rst ├── requirements-docs.txt ├── start │ ├── install.rst │ └── quickstart.rst └── workers │ ├── fsdp_workers.rst │ ├── megatron_workers.rst │ └── ray_trainer.rst ├── examples ├── data_preprocess │ ├── aime_val_dataset.py │ ├── full_hh_rlhf.py │ ├── gsm8k.py │ ├── hellaswag.py │ └── math_dataset.py ├── generation │ └── run_deepseek_v2_lite_math.sh ├── grpo_trainer │ ├── run_deepseek7b_llm.sh │ ├── run_deepseek7b_llm_seq_balance.sh │ ├── run_qwen2-7b.sh │ └── run_qwen2-7b_seq_balance.sh ├── ppo_trainer │ ├── run_deepseek7b_llm.sh │ ├── run_deepseek7b_llm_sp2.sh │ ├── run_deepseek_full_hh_rlhf.sh │ ├── run_deepseek_math_gsm8k_megatron.sh │ ├── run_deepseek_megatron.sh │ ├── run_gemma.sh │ ├── run_qwen2-7b.sh │ ├── run_qwen2-7b_math_gsm8k_megatron.sh │ ├── run_qwen2-7b_rm.sh │ ├── run_qwen2-7b_rm_seq_balance.sh │ ├── run_qwen2-7b_seq_balance.sh │ ├── run_qwen2.5-32b.sh │ └── verl_getting_started.ipynb ├── ray │ └── tutorial.ipynb ├── remax_trainer │ ├── run_qwen2.5-3b_seq_balance.sh │ └── run_qwen2.5-7b_seq_balance.sh ├── sft │ └── gsm8k │ │ ├── run_deepseek_6b7.sh │ │ ├── run_gemma_2b.sh │ │ ├── run_gemma_7b.sh │ │ ├── run_qwen_05_peft.sh │ │ ├── run_qwen_05_sp2.sh │ │ └── run_qwen_05_sp2_liger.sh ├── slurm │ └── ray_on_slurm.slurm └── split_placement │ ├── README.md │ ├── config │ └── ppo_trainer_split.yaml │ ├── main_ppo_split.py │ ├── run_deepseek7b_llm.sh │ └── split_monkey_patch.py ├── patches └── megatron_v4.patch ├── pyproject.toml ├── requirements.txt ├── requirements_train.txt ├── scripts ├── format.sh ├── model_merger.py └── train │ ├── training_1.5b_dsr_sub.sh │ └── training_1.5b_pi1_r128.sh ├── setup.py ├── tests ├── __init__.py ├── checkpoint │ └── test_fsdp_ckpt.py ├── distro │ └── requirements.py ├── e2e │ ├── __init__.py │ ├── arithmetic_sequence │ │ ├── data │ │ │ ├── create_dataset.py │ │ │ ├── test.parquet │ │ │ └── train.parquet │ │ ├── model │ │ │ ├── config.json │ │ │ ├── create_model_tokenizer.py │ │ │ ├── generation_config.json │ │ │ ├── model.safetensors │ │ │ └── tokenizer_config.json │ │ └── rl │ │ │ ├── README.md │ │ │ └── main_trainer.py │ ├── check_results.py │ ├── envs │ │ ├── __init__.py │ │ └── digit_completion │ │ │ ├── __init__.py │ │ │ ├── task.py │ │ │ └── tokenizer.py │ ├── run_deepseek_megatron.sh │ ├── run_qwen_gsm8k_function_rm.sh │ ├── run_qwen_gsm8k_function_rm_grpo.sh │ ├── run_qwen_gsm8k_function_rm_no_rmpad.sh │ ├── run_qwen_gsm8k_function_rm_remax.sh │ ├── run_qwen_gsm8k_model_rm.sh │ ├── run_qwen_gsm8k_model_rm_liger_kernel.sh │ ├── run_qwen_gsm8k_model_rm_no_rmpad.sh │ ├── run_qwen_gsm8k_model_rm_seq_balance.sh │ ├── run_qwen_gsm8k_model_rm_ulysses.sh │ ├── run_qwen_megatron.sh │ ├── run_ray_trainer.sh │ └── run_ray_trainer_rmpad.sh ├── gpu_utility │ ├── test_memory_buffers.py │ ├── test_ops.py │ └── test_torch_functional.py ├── model │ ├── test_transformer.py │ └── test_transformers_ulysses.py ├── ray │ ├── check_worker_alive │ │ └── main.py │ ├── detached_worker │ │ ├── README.md │ │ ├── client.py │ │ ├── run.sh │ │ └── server.py │ ├── test_check_worker_alive.py │ ├── test_colocated_workers.py │ ├── test_data_transfer.py │ ├── test_driverfunc_to_worker.py │ ├── test_high_level_scheduling_api.py │ ├── test_ray_local_envs.py │ ├── test_rvdz.py │ ├── test_worker_group_basics.py │ └── test_worker_group_torch.py ├── rollout │ ├── run_fsdp_vllm.py │ ├── test_vllm_hf_loader.py │ └── test_vllm_spmd.py ├── sandbox │ └── test_sandbox.py ├── sanity │ ├── check_license.py │ └── test_import.py ├── sft │ ├── run_sft.sh │ ├── run_sft_qwen05_peft.sh │ ├── run_sft_qwen05_sp2_liger.sh │ ├── run_sft_sp_loss_match.sh │ └── test_sp_loss_match.py ├── utility │ └── test_tensor_dict_utilities.py └── verl │ └── utils │ └── dataset │ ├── test_rl_dataset.py │ ├── test_rm_dataset.py │ └── test_sft_dataset.py └── verl ├── __init__.py ├── models ├── README.md ├── __init__.py ├── llama │ ├── __init__.py │ └── megatron │ │ ├── __init__.py │ │ ├── checkpoint_utils │ │ ├── __init__.py │ │ ├── llama_loader.py │ │ └── llama_saver.py │ │ ├── layers │ │ ├── __init__.py │ │ ├── parallel_attention.py │ │ ├── parallel_decoder.py │ │ ├── parallel_linear.py │ │ ├── parallel_mlp.py │ │ └── parallel_rmsnorm.py │ │ └── modeling_llama_megatron.py ├── qwen2 │ ├── __init__.py │ └── megatron │ │ ├── __init__.py │ │ ├── checkpoint_utils │ │ ├── __init__.py │ │ ├── qwen2_loader.py │ │ └── qwen2_saver.py │ │ ├── layers │ │ ├── __init__.py │ │ ├── parallel_attention.py │ │ ├── parallel_decoder.py │ │ ├── parallel_linear.py │ │ ├── parallel_mlp.py │ │ └── parallel_rmsnorm.py │ │ └── modeling_qwen2_megatron.py ├── registry.py ├── transformers │ ├── __init__.py │ ├── llama.py │ ├── monkey_patch.py │ └── qwen2.py └── weight_loader_registry.py ├── protocol.py ├── single_controller ├── __init__.py ├── base │ ├── __init__.py │ ├── decorator.py │ ├── megatron │ │ ├── __init__.py │ │ ├── worker.py │ │ └── worker_group.py │ ├── register_center │ │ ├── __init__.py │ │ └── ray.py │ ├── worker.py │ └── worker_group.py └── ray │ ├── __init__.py │ ├── base.py │ └── megatron.py ├── third_party ├── __init__.py └── vllm │ ├── __init__.py │ ├── vllm_spmd │ ├── __init__.py │ └── dtensor_weight_loaders.py │ ├── vllm_v_0_3_1 │ ├── __init__.py │ ├── arg_utils.py │ ├── config.py │ ├── llm.py │ ├── llm_engine_sp.py │ ├── model_loader.py │ ├── model_runner.py │ ├── parallel_state.py │ ├── tokenizer.py │ ├── weight_loaders.py │ └── worker.py │ ├── vllm_v_0_4_2 │ ├── __init__.py │ ├── arg_utils.py │ ├── config.py │ ├── dtensor_weight_loaders.py │ ├── hf_weight_loader.py │ ├── llm.py │ ├── llm_engine_sp.py │ ├── megatron_weight_loaders.py │ ├── model_loader.py │ ├── model_runner.py │ ├── parallel_state.py │ ├── spmd_gpu_executor.py │ ├── tokenizer.py │ └── worker.py │ ├── vllm_v_0_5_4 │ ├── __init__.py │ ├── arg_utils.py │ ├── config.py │ ├── dtensor_weight_loaders.py │ ├── hf_weight_loader.py │ ├── llm.py │ ├── llm_engine_sp.py │ ├── megatron_weight_loaders.py │ ├── model_loader.py │ ├── model_runner.py │ ├── parallel_state.py │ ├── spmd_gpu_executor.py │ ├── tokenizer.py │ └── worker.py │ └── vllm_v_0_6_3 │ ├── __init__.py │ ├── arg_utils.py │ ├── config.py │ ├── dtensor_weight_loaders.py │ ├── hf_weight_loader.py │ ├── llm.py │ ├── llm_engine_sp.py │ ├── megatron_weight_loaders.py │ ├── model_loader.py │ ├── model_runner.py │ ├── parallel_state.py │ ├── spmd_gpu_executor.py │ ├── tokenizer.py │ └── worker.py ├── trainer ├── __init__.py ├── config │ ├── evaluation.yaml │ ├── generation.yaml │ ├── ppo_megatron_trainer.yaml │ ├── ppo_trainer.yaml │ └── sft_trainer.yaml ├── fsdp_sft_trainer.py ├── main_eval.py ├── main_evaluation_all_checkpoints.py ├── main_generation.py ├── main_ppo.py ├── ppo │ ├── __init__.py │ ├── core_algos.py │ └── ray_trainer.py └── runtime_env.yaml ├── utils ├── __init__.py ├── checkpoint │ ├── __init__.py │ ├── checkpoint_manager.py │ └── fsdp_checkpoint_manager.py ├── config.py ├── dataset │ ├── README.md │ ├── __init__.py │ ├── rl_dataset.py │ ├── rm_dataset.py │ └── sft_dataset.py ├── debug │ ├── __init__.py │ ├── performance.py │ └── trajectory_tracker.py ├── distributed.py ├── flops_counter.py ├── fs.py ├── fsdp_utils.py ├── hdfs_io.py ├── import_utils.py ├── logger │ ├── __init__.py │ └── aggregate_logger.py ├── logging_utils.py ├── megatron │ ├── __init__.py │ ├── memory.py │ ├── optimizer.py │ ├── optimizer_config.py │ ├── pipeline_parallel.py │ ├── sequence_parallel.py │ └── tensor_parallel.py ├── megatron_utils.py ├── memory_buffer.py ├── model.py ├── py_functional.py ├── ray_utils.py ├── rendezvous │ ├── __init__.py │ └── ray_backend.py ├── reward_score │ ├── __init__.py │ ├── deepscaler.py │ ├── gsm8k.py │ ├── math.py │ ├── prime_code │ │ ├── __init__.py │ │ ├── testing_util.py │ │ └── utils.py │ ├── prime_math │ │ ├── __init__.py │ │ ├── grader.py │ │ └── math_normalize.py │ └── utils │ │ └── utils.py ├── seqlen_balancing.py ├── tokenizer.py ├── torch_dtypes.py ├── torch_functional.py ├── tracking.py └── ulysses.py ├── version └── version └── workers ├── __init__.py ├── actor ├── __init__.py ├── base.py ├── dp_actor.py └── megatron_actor.py ├── critic ├── __init__.py ├── base.py ├── dp_critic.py └── megatron_critic.py ├── fsdp_workers.py ├── megatron_workers.py ├── reward_manager ├── __init__.py ├── naive.py └── prime.py ├── reward_model ├── __init__.py ├── base.py └── megatron │ ├── __init__.py │ └── reward_model.py ├── rollout ├── __init__.py ├── base.py ├── hf_rollout.py ├── naive │ ├── __init__.py │ └── naive_rollout.py ├── tokenizer.py └── vllm_rollout │ ├── __init__.py │ ├── vllm_rollout.py │ └── vllm_rollout_spmd.py └── sharding_manager ├── __init__.py ├── base.py ├── fsdp_ulysses.py ├── fsdp_vllm.py └── megatron_vllm.py /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | ## Enabled the dependabot to check the dependencies of the project 2 | ## Dependabot will open pull requests to update dependencies automatically 3 | 4 | version: 2 5 | updates: 6 | - package-ecosystem: pip 7 | directory: "/" 8 | schedule: 9 | interval: weekly -------------------------------------------------------------------------------- /.github/workflows/dataset.yml: -------------------------------------------------------------------------------- 1 | name: dataset 2 | 3 | on: 4 | # Trigger the workflow on push or pull request, 5 | # but only for the main branch 6 | push: 7 | branches: 8 | - main 9 | paths: 10 | - "**/*.py" 11 | - .github/workflows/dataset.yml 12 | pull_request: 13 | branches: 14 | - main 15 | paths: 16 | - "**/*.py" 17 | - .github/workflows/dataset.yml 18 | 19 | # Declare permissions just read content. 20 | permissions: 21 | contents: read 22 | 23 | jobs: 24 | ray: 25 | runs-on: [self-hosted, gpu] 26 | steps: 27 | - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 28 | with: 29 | fetch-depth: 0 30 | - name: Install the current repository 31 | run: | 32 | pip install -e .[test] --user 33 | - name: Running dataset tests 34 | run: | 35 | [ ! -d "$HOME/verl-data" ] && git clone --depth 1 https://github.com/eric-haibin-lin/verl-data ~/verl-data 36 | pytest -s -x tests/verl 37 | - name: Running ray test using cupy (move it to L20 when dockerfile ready) 38 | run: | 39 | cd tests/ray 40 | pytest -s -x test_rvdz.py -------------------------------------------------------------------------------- /.github/workflows/e2e_digit_completion.yml: -------------------------------------------------------------------------------- 1 | name: e2e_digit_completion 2 | 3 | on: 4 | # Trigger the workflow on push or pull request, 5 | # but only for the main branch 6 | push: 7 | branches: 8 | - main 9 | paths: 10 | - "**/*.py" 11 | - .github/workflows/e2e_digit_completion.yml 12 | pull_request: 13 | branches: 14 | - main 15 | paths: 16 | - "**/*.py" 17 | - .github/workflows/e2e_digit_completion.yml 18 | - "tests/e2e/*.sh" 19 | 20 | # Declare permissions just read content. 21 | permissions: 22 | contents: read 23 | 24 | jobs: 25 | e2e_digit_completion: 26 | runs-on: [self-hosted, l20-0] 27 | env: 28 | HTTP_PROXY: ${{ secrets.PROXY_HTTP }} 29 | HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }} 30 | NO_PROXY: "localhost,127.0.0.1" 31 | HF_HUB_ENABLE_HF_TRANSFER: 1 32 | container: 33 | image: verlai/verl:vemlp-th2.4.0-cu124-vllm0.6.3-ray2.10-te1.7-v0.0.3 34 | options: --gpus all --shm-size=10g 35 | steps: 36 | - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 37 | with: 38 | fetch-depth: 0 39 | - name: Install the current repository 40 | run: | 41 | pip3 install hf_transfer 42 | pip3 install -e .[test] 43 | - name: Running digit completon e2e training tests on 8 L20 GPUs 44 | run: | 45 | ray stop --force 46 | bash tests/e2e/run_ray_trainer.sh 47 | -------------------------------------------------------------------------------- /.github/workflows/e2e_gsm8k_megatron.yml: -------------------------------------------------------------------------------- 1 | name: e2e_gsm8k_megatron 2 | 3 | on: 4 | # Trigger the workflow on push or pull request, 5 | # but only for the main branch 6 | push: 7 | branches: 8 | - main 9 | paths: 10 | - "**/*.py" 11 | - .github/workflows/e2e_gsm8k_megatron.yml 12 | pull_request: 13 | branches: 14 | - main 15 | paths: 16 | - "**/*.py" 17 | - .github/workflows/e2e_gsm8k_megatron.yml 18 | - "tests/e2e/*.sh" 19 | 20 | # Declare permissions just read content. 21 | permissions: 22 | contents: read 23 | 24 | jobs: 25 | e2e_gsm8k_megatron: 26 | runs-on: [self-hosted, l20-0] 27 | env: 28 | HTTP_PROXY: ${{ secrets.PROXY_HTTP }} 29 | HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }} 30 | NO_PROXY: "localhost,127.0.0.1" 31 | HF_HUB_ENABLE_HF_TRANSFER: 1 32 | container: 33 | image: verlai/verl:vemlp-th2.4.0-cu124-vllm0.6.3-ray2.10-te1.7-v0.0.3 34 | options: --gpus all --shm-size=10g 35 | steps: 36 | - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 37 | with: 38 | fetch-depth: 0 39 | - name: Install the current repository 40 | run: | 41 | pip3 install hf_transfer 42 | pip3 install -e .[test] 43 | - name: Prepare gsm8k dataset 44 | run: | 45 | python3 examples/data_preprocess/gsm8k.py 46 | - name: Running gsm8k e2e training tests on 8 L20 GPUs with Megatron (Deepseek) 47 | run: | 48 | ray stop --force 49 | [ ! -d "$HOME/Megatron-LM" ] && git clone -b core_v0.4.0_verl https://github.com/eric-haibin-lin/Megatron-LM $HOME/Megatron-LM 50 | export PYTHONPATH=$PYTHONPATH:$HOME/Megatron-LM 51 | bash tests/e2e/run_deepseek_megatron.sh 52 | - name: Running gsm8k e2e training tests on 8 L20 GPUs with Megatron (Qwen) 53 | run: | 54 | ray stop --force 55 | export PYTHONPATH=$PYTHONPATH:$HOME/Megatron-LM 56 | bash tests/e2e/run_qwen_megatron.sh -------------------------------------------------------------------------------- /.github/workflows/e2e_lora.yml: -------------------------------------------------------------------------------- 1 | name: e2e_lora 2 | 3 | on: 4 | # Trigger the workflow on push or pull request, 5 | # but only for the main branch 6 | push: 7 | branches: 8 | - main 9 | paths: 10 | - "**/*.py" 11 | - .github/workflows/e2e_lora.yml 12 | pull_request: 13 | branches: 14 | - main 15 | paths: 16 | - "**/*.py" 17 | - .github/workflows/e2e_lora.yml 18 | - "tests/e2e/*.sh" 19 | 20 | # Declare permissions just read content. 21 | permissions: 22 | contents: read 23 | 24 | jobs: 25 | e2e_lora: 26 | runs-on: [self-hosted, l20-1] 27 | env: 28 | HTTP_PROXY: ${{ secrets.PROXY_HTTP }} 29 | HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }} 30 | NO_PROXY: "localhost,127.0.0.1" 31 | HF_HUB_ENABLE_HF_TRANSFER: 1 32 | container: 33 | image: verlai/verl:vemlp-th2.4.0-cu124-vllm0.6.3-ray2.10-te1.7-v0.0.3 34 | options: --gpus all --shm-size=10g 35 | steps: 36 | - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 37 | with: 38 | fetch-depth: 0 39 | - name: Install the current repository 40 | run: | 41 | pip3 install hf_transfer peft 42 | pip3 install -e .[test] 43 | - name: Prepare gsm8k dataset 44 | run: | 45 | ray stop --force 46 | python3 examples/data_preprocess/gsm8k.py 47 | - name: Running gsm8k e2e training tests with LoRA 48 | run: | 49 | ray stop --force 50 | bash tests/sft/run_sft_qwen05_peft.sh 8 $HOME/ckpts/ 51 | rm -rf $HOME/ckpts/* -------------------------------------------------------------------------------- /.github/workflows/model.yml: -------------------------------------------------------------------------------- 1 | name: model_rmpad 2 | 3 | on: 4 | # Trigger the workflow on push or pull request, 5 | # but only for the main branch 6 | push: 7 | branches: 8 | - main 9 | paths: 10 | - "**/*.py" 11 | - .github/workflows/model.yml 12 | pull_request: 13 | branches: 14 | - main 15 | paths: 16 | - "**/*.py" 17 | - .github/workflows/model.yml 18 | 19 | # Declare permissions just read content. 20 | permissions: 21 | contents: read 22 | 23 | jobs: 24 | model_rmpad: 25 | runs-on: [self-hosted, l20-1] 26 | env: 27 | HTTP_PROXY: ${{ secrets.PROXY_HTTP }} 28 | HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }} 29 | NO_PROXY: "localhost,127.0.0.1" 30 | HF_HUB_ENABLE_HF_TRANSFER: 1 31 | container: 32 | image: verlai/verl:vemlp-th2.4.0-cu124-vllm0.6.3-ray2.10-te1.7-v0.0.3 33 | options: --gpus all --shm-size=10g 34 | steps: 35 | - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 36 | with: 37 | fetch-depth: 0 38 | - name: Install the current repository and upgrade to latest transformers/flash_attn 39 | run: | 40 | pip3 install -e .[test] 41 | pip3 install --upgrade transformers 42 | - name: Running rmpad model tests on 8 L20 GPUs + flash_attn 2.5.8 43 | run: | 44 | pytest -s tests/model/test_transformer.py 45 | - name: Running rmpad model tests on 8 L20 GPUs + latest flash_attn 46 | run: | 47 | pip3 install --upgrade flash_attn --no-build-isolation 48 | pytest -s tests/model/test_transformer.py 49 | - name: Running rmpad model tests on 8 L20 GPUs + latest flash_attn 50 | run: | 51 | pip3 install hf_transfer 52 | torchrun --nproc_per_node=8 tests/checkpoint/test_fsdp_ckpt.py 53 | -------------------------------------------------------------------------------- /.github/workflows/ray_test.yml: -------------------------------------------------------------------------------- 1 | name: ray 2 | 3 | on: 4 | # Trigger the workflow on push or pull request, 5 | # but only for the main branch 6 | push: 7 | branches: 8 | - main 9 | paths: 10 | - "**/*.py" 11 | - .github/workflows/ray_test.yml 12 | pull_request: 13 | branches: 14 | - main 15 | paths: 16 | - "**/*.py" 17 | - .github/workflows/ray_test.yml 18 | 19 | # Declare permissions just read content. 20 | permissions: 21 | contents: read 22 | 23 | jobs: 24 | ray: 25 | runs-on: [self-hosted, l20-0] 26 | env: 27 | HTTP_PROXY: ${{ secrets.PROXY_HTTP }} 28 | HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }} 29 | NO_PROXY: "localhost,127.0.0.1" 30 | HF_HUB_ENABLE_HF_TRANSFER: 1 31 | container: 32 | image: verlai/verl:vemlp-th2.4.0-cu124-vllm0.6.3-ray2.10-te1.7-v0.0.3 33 | options: --gpus all --shm-size=10g 34 | steps: 35 | - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 36 | with: 37 | fetch-depth: 0 38 | - name: Install the current repository 39 | run: | 40 | pip install hf_transfer 41 | pip install -e .[test] 42 | pip install --upgrade "ray>=2.40.0" 43 | - name: Running ray tests that need 8 GPUs 44 | run: | 45 | cd tests/ray 46 | pytest -s -x --ignore=test_check_worker_alive.py --ignore=test_rvdz.py . 47 | -------------------------------------------------------------------------------- /.github/workflows/sandbox.yml: -------------------------------------------------------------------------------- 1 | name: sandbox 2 | 3 | on: 4 | # Trigger the workflow on push or pull request, 5 | # but only for the main branch 6 | push: 7 | branches: 8 | - main 9 | paths: 10 | - "**/*.py" 11 | - .github/workflows/sandbox.yml 12 | pull_request: 13 | branches: 14 | - main 15 | paths: 16 | - "**/*.py" 17 | - .github/workflows/sandbox.yml 18 | 19 | # Declare permissions just read content. 20 | permissions: 21 | contents: read 22 | 23 | jobs: 24 | sandbox: 25 | runs-on: [self-hosted, l20-0] 26 | env: 27 | HTTP_PROXY: ${{ secrets.PROXY_HTTP }} 28 | HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }} 29 | NO_PROXY: "localhost,127.0.0.1" 30 | HF_HUB_ENABLE_HF_TRANSFER: 1 31 | container: 32 | image: verlai/verl:vemlp-th2.4.0-cu124-vllm0.6.3-ray2.10-te1.7-v0.0.3 33 | options: --gpus all --shm-size=10g 34 | steps: 35 | - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 36 | with: 37 | fetch-depth: 0 38 | - name: Install the current repository 39 | run: | 40 | pip3 install hf_transfer 41 | pip3 install -e .[test,prime] 42 | pip3 install vllm==0.5.4 43 | - name: Running sandbox tests on 8 L20 GPUs 44 | run: | 45 | cd tests/sandbox 46 | pytest -s -x . 47 | -------------------------------------------------------------------------------- /.github/workflows/sanity.yml: -------------------------------------------------------------------------------- 1 | name: sanity 2 | 3 | on: 4 | # Trigger the workflow on push or pull request, 5 | # but only for the main branch 6 | push: 7 | branches: 8 | - main 9 | paths: 10 | - "**/*.py" 11 | - .github/workflows/sanity.yml 12 | pull_request: 13 | branches: 14 | - main 15 | paths: 16 | - "**/*.py" 17 | - .github/workflows/sanity.yml 18 | 19 | # Declare permissions just read content. 20 | permissions: 21 | contents: read 22 | 23 | jobs: 24 | sanity: 25 | runs-on: ubuntu-latest 26 | strategy: 27 | matrix: 28 | python-version: ["3.10"] 29 | steps: 30 | - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 31 | - name: Set up Python ${{ matrix.python-version }} 32 | uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 33 | with: 34 | python-version: ${{ matrix.python-version }} 35 | - name: Install the current repository 36 | run: | 37 | pip install -e .[test] 38 | - name: Run sanity test 39 | run: | 40 | pytest -s -x tests/sanity 41 | - name: Run untility test 42 | run: | 43 | pytest -s -x tests/utility 44 | - name: Run license test 45 | run: | 46 | python3 tests/sanity/check_license.py --directory . 47 | - name: Run dependency test 48 | run: | 49 | pip install tomli 50 | pytest -s -x tests/distro/requirements.py 51 | -------------------------------------------------------------------------------- /.github/workflows/vllm.yml: -------------------------------------------------------------------------------- 1 | name: vllm 2 | 3 | on: 4 | # Trigger the workflow on push or pull request, 5 | # but only for the main branch 6 | push: 7 | branches: 8 | - main 9 | paths: 10 | - "**/*.py" 11 | - .github/workflows/vllm.yml 12 | pull_request: 13 | branches: 14 | - main 15 | paths: 16 | - "**/*.py" 17 | - .github/workflows/vllm.yml 18 | 19 | # Declare permissions just read content. 20 | permissions: 21 | contents: read 22 | 23 | jobs: 24 | vllm: 25 | runs-on: [self-hosted, l20-0] 26 | env: 27 | HTTP_PROXY: ${{ secrets.PROXY_HTTP }} 28 | HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }} 29 | NO_PROXY: "localhost,127.0.0.1" 30 | HF_HUB_ENABLE_HF_TRANSFER: 1 31 | container: 32 | image: verlai/verl:vemlp-th2.4.0-cu124-vllm0.6.3-ray2.10-te1.7-v0.0.3 33 | options: --gpus all --shm-size=10g 34 | steps: 35 | - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 36 | with: 37 | fetch-depth: 0 38 | - name: Install the current repository 39 | run: | 40 | pip3 install hf_transfer 41 | pip3 install -e .[test] 42 | pip3 install vllm==0.5.4 43 | - name: Running vllm tests on 8 L20 GPUs 44 | run: | 45 | cd tests/rollout 46 | torchrun --standalone --nnodes=1 --nproc_per_node=8 $(which pytest) -s test_vllm_hf_loader.py 47 | - name: Test the latest vLLM 48 | run: | 49 | pip3 install --upgrade vllm 50 | cd tests/rollout 51 | torchrun --standalone --nnodes=1 --nproc_per_node=4 $(which pytest) -s test_vllm_spmd.py 52 | -------------------------------------------------------------------------------- /.github/workflows/yapf_format.yml: -------------------------------------------------------------------------------- 1 | name: yapf 2 | 3 | on: 4 | # Trigger the workflow on push or pull request, 5 | # but only for the main branch 6 | push: 7 | branches: 8 | - main 9 | paths: 10 | - "**/*.py" 11 | - .github/workflows/yapf_format.yml 12 | pull_request: 13 | branches: 14 | - main 15 | paths: 16 | - "**/*.py" 17 | - .github/workflows/yapf_format.yml 18 | 19 | # Declare permissions just read content. 20 | permissions: 21 | contents: read 22 | 23 | jobs: 24 | yapf: 25 | runs-on: ubuntu-latest 26 | strategy: 27 | matrix: 28 | python-version: ["3.12"] 29 | steps: 30 | - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 31 | # - name: checkout 32 | # run: | 33 | # commits=${{ github.event.pull_request.commits }} 34 | # if [[ -n "$commits" ]]; then 35 | # # Prepare enough depth for diffs with main 36 | # git fetch --depth="$(( commits + 1 ))" 37 | # fi 38 | - name: Set up Python ${{ matrix.python-version }} 39 | uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 40 | with: 41 | python-version: ${{ matrix.python-version }} 42 | - name: Install dependencies 43 | run: | 44 | python -m pip install --upgrade pip 45 | pip install --upgrade yapf 46 | pip install toml==0.10.2 47 | - name: Running yapf 48 | run: | 49 | yapf -r -vv -d --style=./.style.yapf verl tests examples 50 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | **/*.pt 2 | **/checkpoints 3 | **/wget-log 4 | **/_build/ 5 | **/*.ckpt 6 | **/outputs 7 | **/*.tar.gz 8 | **/playground 9 | **/wandb 10 | /scores/ 11 | accuracy/* 12 | !accuracy/acc_baseline/ 13 | !accuracy/acc_baseline/* 14 | !checkpoints/*/*/eval/figures 15 | 16 | # Byte-compiled / optimized / DLL files 17 | __pycache__/ 18 | *.py[cod] 19 | *$py.class 20 | dataset/* 21 | tensorflow/my_graph/* 22 | .idea/ 23 | # C extensions 24 | *.so 25 | 26 | # Distribution / packaging 27 | .Python 28 | env/ 29 | build/ 30 | develop-eggs/ 31 | dist/ 32 | downloads/ 33 | eggs/ 34 | .eggs/ 35 | lib/ 36 | lib64/ 37 | parts/ 38 | sdist/ 39 | var/ 40 | *.egg-info/ 41 | .installed.cfg 42 | *.egg 43 | 44 | # PyInstaller 45 | # Usually these files are written by a python script from a template 46 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 47 | *.manifest 48 | *.spec 49 | 50 | # Installer logs 51 | pip-log.txt 52 | pip-delete-this-directory.txt 53 | 54 | # Unit test / coverage reports 55 | htmlcov/ 56 | .tox/ 57 | .coverage 58 | .coverage.* 59 | .cache 60 | nosetests.xml 61 | coverage.xml 62 | *,cover 63 | .hypothesis/ 64 | 65 | # Translations 66 | *.mo 67 | *.pot 68 | 69 | # Django stuff: 70 | *.log 71 | local_settings.py 72 | 73 | # Flask stuff: 74 | instance/ 75 | .webassets-cache 76 | 77 | # Scrapy stuff: 78 | .scrapy 79 | 80 | # Sphinx documentation 81 | docs/_build/ 82 | 83 | # PyBuilder 84 | target/ 85 | 86 | # IPython Notebook 87 | .ipynb_checkpoints 88 | 89 | # pyenv 90 | .python-version 91 | 92 | # celery beat schedule file 93 | celerybeat-schedule 94 | 95 | # dotenv 96 | .env 97 | 98 | # virtualenv 99 | venv/ 100 | ENV/ 101 | 102 | # Spyder project settings 103 | .spyderproject 104 | 105 | # Rope project settings 106 | .ropeproject 107 | 108 | # vscode 109 | .vscode 110 | 111 | # Mac 112 | .DS_Store 113 | 114 | # output logs 115 | tests/e2e/toy_examples/deepspeed/synchronous/output.txt 116 | 117 | # vim 118 | *.swp 119 | 120 | # ckpt 121 | *.lock 122 | 123 | -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | # Read the Docs configuration file 2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 3 | 4 | version: 2 5 | 6 | build: 7 | os: ubuntu-22.04 8 | tools: 9 | python: "3.8" 10 | 11 | sphinx: 12 | configuration: docs/conf.py 13 | 14 | python: 15 | install: 16 | - requirements: docs/requirements-docs.txt -------------------------------------------------------------------------------- /.style.yapf: -------------------------------------------------------------------------------- 1 | [style] 2 | based_on_style = google 3 | column_limit = 120 4 | indent_width = 4 5 | split_arguments_when_comma_terminated: true -------------------------------------------------------------------------------- /Notice.txt: -------------------------------------------------------------------------------- 1 | Copyright 2023-2024 Bytedance Ltd. and/or its affiliates -------------------------------------------------------------------------------- /Qwen2.5-Eval/evaluation/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Zhibin Gou 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Qwen2.5-Eval/evaluation/data/minerva_math/README.md: -------------------------------------------------------------------------------- 1 | MIT OpenCourseWare: 2 | - Solving Quantitative Reasoning Problems with Language Models. https://openreview.net/forum?id=IFXTZERXdM7 3 | -------------------------------------------------------------------------------- /Qwen2.5-Eval/evaluation/data/phy1/test.jsonl: -------------------------------------------------------------------------------- 1 | {"problem":"The pressure \\( P \\) exerted by wind on a sail varies jointly as the area \\( A \\) of the sail and the cube of the wind's velocity \\( V \\). When the velocity is \\( 8 \\) miles per hour, the pressure on a sail of \\( 2 \\) square feet is \\( 4 \\) pounds. Find the wind velocity when the pressure on \\( 4 \\) square feet of sail is \\( 32 \\) pounds. Let's think step by step and output the final answer within \\boxed{}.", "solution":"", "answer":"12.8", "url":"na", "question":"The pressure \\( P \\) exerted by wind on a sail varies jointly as the area \\( A \\) of the sail and the cube of the wind's velocity \\( V \\). When the velocity is \\( 8 \\) miles per hour, the pressure on a sail of \\( 2 \\) square feet is \\( 4 \\) pounds. Find the wind velocity when the pressure on \\( 4 \\) square feet of sail is \\( 32 \\) pounds. Let's think step by step and output the final answer within \\boxed{}."} 2 | {"problem":"Given that circle $C$ passes through points $P(0,-4)$, $Q(2,0)$, and $R(3,-1)$. \n$(1)$ Find the equation of circle $C$. \n$(2)$ If the line $l: mx+y-1=0$ intersects circle $C$ at points $A$ and $B$, and $|AB|=4$, find the value of $m$. Let's think step by step and output the final answer within \\boxed{}.", "solution":"", "answer":"\\frac{4}{3}", "url":"", "question":"Given that circle $C$ passes through points $P(0,-4)$, $Q(2,0)$, and $R(3,-1)$. \n$(1)$ Find the equation of circle $C$. \n$(2)$ If the line $l: mx+y-1=0$ intersects circle $C$ at points $A$ and $B$, and $|AB|=4$, find the value of $m$. Let's think step by step and output the final answer within \\boxed{}."} -------------------------------------------------------------------------------- /Qwen2.5-Eval/evaluation/latex2sympy/.coveragerc: -------------------------------------------------------------------------------- 1 | # .coveragerc to control coverage.py 2 | [run] 3 | branch = True 4 | include = 5 | latex2sympy.py 6 | omit = 7 | sandbox/* 8 | gen/* 9 | asciimath_printer.py 10 | setup.py 11 | __init__.py 12 | 13 | [report] 14 | # Regexes for lines to exclude from consideration 15 | exclude_lines = 16 | # Have to re-enable the standard pragma 17 | pragma: no cover 18 | 19 | # Don't complain about missing debug-only code: 20 | def __repr__ 21 | if self\.debug 22 | 23 | # Don't complain if tests don't hit defensive assertion code: 24 | raise AssertionError 25 | raise NotImplementedError 26 | 27 | # Don't complain if non-runnable code isn't run: 28 | if 0: 29 | if __name__ == .__main__.: 30 | 31 | ignore_errors = True 32 | -------------------------------------------------------------------------------- /Qwen2.5-Eval/evaluation/latex2sympy/LICENSE.txt: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright 2016, latex2sympy 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Qwen2.5-Eval/evaluation/latex2sympy/__init__.py: -------------------------------------------------------------------------------- 1 | import latex2sympy -------------------------------------------------------------------------------- /Qwen2.5-Eval/evaluation/latex2sympy/antlr-4.11.1-complete.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ypwang61/One-Shot-RLVR/b9d2e9a1cc32682b8ec77922fe4a236d75a7e9b0/Qwen2.5-Eval/evaluation/latex2sympy/antlr-4.11.1-complete.jar -------------------------------------------------------------------------------- /Qwen2.5-Eval/evaluation/latex2sympy/asciimath_printer.py: -------------------------------------------------------------------------------- 1 | from sympy.printing.str import StrPrinter 2 | from sympy.core import S 3 | 4 | class AsciiMathPrinter(StrPrinter): 5 | 6 | def _print_Limit(self, expr): 7 | e, z = expr.args 8 | 9 | return "lim_(%s -> %s) %s" % (self._print(z), self._print(z), self._print(e)) 10 | 11 | def _print_Integral(self, expr): 12 | e, lims = expr.args 13 | if len(lims) > 1: 14 | return "int_(%s)^(%s) %s d%s" % (self._print(lims[1]), self._print(lims[2]), self._print(e), self._print(lims[0])) 15 | else: 16 | return "int %s d%s" % (self._print(e), self._print(lims)) 17 | 18 | def _print_Sum(self, expr): 19 | e, lims = expr.args 20 | return "sum_(%s = %s)^(%s) %s" % (self._print(lims[0]), self._print(lims[1]), self._print(lims[2]), self._print(e)) 21 | 22 | def _print_Product(self, expr): 23 | e, lims = expr.args 24 | return "prod_(%s = %s)^(%s) %s" % (self._print(lims[0]), self._print(lims[1]), self._print(lims[2]), self._print(e)) 25 | 26 | def _print_factorial(self, expr): 27 | return "%s!" % self._print(expr.args[0]) 28 | 29 | def _print_Derivative(self, expr): 30 | e = expr.args[0] 31 | wrt = expr.args[1] 32 | return "d/d%s %s" % (self._print(wrt), self._print(e)) 33 | 34 | def _print_Abs(self, expr): 35 | return "|%s|" % self._print(expr.args[0]) 36 | 37 | def _print_Equality(self, expr): 38 | return "%s = %s" % (self._print(expr.args[0]), self._print(expr.args[1])) 39 | 40 | def _print_Pow(self, expr): 41 | b = self._print(expr.base) 42 | if expr.exp is S.Half: 43 | return "sqrt(%s)" % b 44 | 45 | if -expr.exp is S.Half: 46 | return "1/sqrt(%s)" % b 47 | if expr.exp is -S.One: 48 | return "1/%s" % b 49 | 50 | return "%s^(%s)" % (b, self._print(expr.exp)) 51 | -------------------------------------------------------------------------------- /Qwen2.5-Eval/evaluation/latex2sympy/dev-requirements.in: -------------------------------------------------------------------------------- 1 | -r requirements.txt 2 | # Development 3 | pip-tools 4 | pytest 5 | pytest-cov 6 | pycodestyle 7 | autopep8 8 | -e . 9 | -------------------------------------------------------------------------------- /Qwen2.5-Eval/evaluation/latex2sympy/dev-requirements.txt: -------------------------------------------------------------------------------- 1 | # 2 | # This file is autogenerated by pip-compile with Python 3.10 3 | # by the following command: 4 | # 5 | # pip-compile dev-requirements.in 6 | # 7 | # via -r dev-requirements.in 8 | antlr4-python3-runtime==4.11.1 9 | # via 10 | # -r requirements.txt 11 | # latex2sympy2 12 | atomicwrites==1.3.0 13 | # via pytest 14 | attrs==19.3.0 15 | # via pytest 16 | autopep8==1.4.4 17 | # via -r dev-requirements.in 18 | click==7.0 19 | # via pip-tools 20 | coverage==4.5.4 21 | # via pytest-cov 22 | more-itertools==7.2.0 23 | # via pytest 24 | mpmath==1.3.0 25 | # via 26 | # -r requirements.txt 27 | # sympy 28 | packaging==19.2 29 | # via pytest 30 | pip-tools==4.2.0 31 | # via -r dev-requirements.in 32 | pluggy==0.13.0 33 | # via pytest 34 | py==1.8.0 35 | # via pytest 36 | pycodestyle==2.5.0 37 | # via 38 | # -r dev-requirements.in 39 | # autopep8 40 | pyparsing==2.4.4 41 | # via packaging 42 | pytest==5.2.2 43 | # via 44 | # -r dev-requirements.in 45 | # pytest-cov 46 | pytest-cov==2.8.1 47 | # via -r dev-requirements.in 48 | six==1.13.0 49 | # via 50 | # packaging 51 | # pip-tools 52 | sympy==1.12 53 | # via 54 | # -r requirements.txt 55 | # latex2sympy2 56 | wcwidth==0.1.7 57 | # via pytest 58 | 59 | # THIS MUST BE MAINTAINED AS-IS 60 | -e . -------------------------------------------------------------------------------- /Qwen2.5-Eval/evaluation/latex2sympy/gen/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ypwang61/One-Shot-RLVR/b9d2e9a1cc32682b8ec77922fe4a236d75a7e9b0/Qwen2.5-Eval/evaluation/latex2sympy/gen/__init__.py -------------------------------------------------------------------------------- /Qwen2.5-Eval/evaluation/latex2sympy/icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ypwang61/One-Shot-RLVR/b9d2e9a1cc32682b8ec77922fe4a236d75a7e9b0/Qwen2.5-Eval/evaluation/latex2sympy/icon.png -------------------------------------------------------------------------------- /Qwen2.5-Eval/evaluation/latex2sympy/requirements.in: -------------------------------------------------------------------------------- 1 | sympy 2 | antlr4-python3-runtime 3 | -------------------------------------------------------------------------------- /Qwen2.5-Eval/evaluation/latex2sympy/requirements.txt: -------------------------------------------------------------------------------- 1 | # 2 | # This file is autogenerated by pip-compile with Python 3.10 3 | # by the following command: 4 | # 5 | # pip-compile requirements.in 6 | # 7 | antlr4-python3-runtime==4.11.1 8 | # via -r requirements.in 9 | mpmath==1.3.0 10 | # via sympy 11 | sympy==1.12 12 | # via -r requirements.in 13 | -------------------------------------------------------------------------------- /Qwen2.5-Eval/evaluation/latex2sympy/sandbox/linalg_equations.py: -------------------------------------------------------------------------------- 1 | from latex2sympy import process_sympy 2 | import sys 3 | sys.path.append("..") 4 | 5 | # latex = "2\\begin{pmatrix}1&1&1\\\\0&1&1\\\\0&0&1\\end{pmatrix}\\begin{pmatrix}1&1&1\\\\0&1&1\\\\0&0&1\\end{pmatrix}" 6 | latex = "\\frac{a^{2} \\left(3 \\pi - 4 \\sin{\\left(\\pi \\right)} + \\frac{\\sin{\\left(2 \\pi \\right)}}{2}\\right)}{2}" 7 | math = process_sympy(latex) 8 | 9 | print(type(math)) 10 | print("latex: %s to math: %s" % (latex, math)) 11 | -------------------------------------------------------------------------------- /Qwen2.5-Eval/evaluation/latex2sympy/sandbox/linalg_span.py: -------------------------------------------------------------------------------- 1 | from latex2sympy import process_sympy 2 | import sys 3 | sys.path.append("..") 4 | 5 | latex = "\\begin{pmatrix}1\\\\2\\\\3\\end{pmatrix}" 6 | math = process_sympy(latex) 7 | print("latex: %s to math: %s" % (latex, math)) 8 | 9 | latex = "\\begin{pmatrix}1\\\\2\\\\3\\end{pmatrix},\\begin{pmatrix}4\\\\3\\\\1\\end{pmatrix}" 10 | math = process_sympy(latex) 11 | print("latex: %s to math: %s" % (latex, math)) 12 | 13 | latex = "[\\begin{pmatrix}1\\\\2\\\\3\\end{pmatrix},\\begin{pmatrix}4\\\\3\\\\1\\end{pmatrix}]" 14 | math = process_sympy(latex) 15 | print("latex: %s to math: %s" % (latex, math)) 16 | 17 | latex = "\\left\\{\\begin{pmatrix}1\\\\2\\\\3\\end{pmatrix},\\begin{pmatrix}4\\\\3\\\\1\\end{pmatrix}\\right\\}" 18 | math = process_sympy(latex) 19 | print("latex: %s to math: %s" % (latex, math)) 20 | -------------------------------------------------------------------------------- /Qwen2.5-Eval/evaluation/latex2sympy/sandbox/matrix.py: -------------------------------------------------------------------------------- 1 | from latex2sympy import process_sympy 2 | from sympy import * 3 | import sys 4 | sys.path.append("..") 5 | 6 | theta = Symbol('theta', real=True) 7 | 8 | latex = "\\begin{matrix}1&2\\\\3&4\\end{matrix}" 9 | math = process_sympy(latex) 10 | print("latex: %s to math: %s" % (latex, math)) 11 | 12 | latex = "\\begin{matrix}1&2\\\\3&4\\\\5&6\\end{matrix}" 13 | math = process_sympy(latex) 14 | print("latex: %s to math: %s" % (latex, math)) 15 | 16 | latex = "\\begin{matrix}1&2&3\\\\4&5&6\\\\7&8&9\\end{matrix}" 17 | math = process_sympy(latex) 18 | print("latex: %s to math: %s" % (latex, math)) 19 | 20 | latex = "\\begin{matrix}x^1&x^2&x^3\\\\y^1&y^2&y^3\\\\z^1&z^2&z^3\\end{matrix}" 21 | math = process_sympy(latex) 22 | print("latex: %s to math: %s" % (latex, math)) 23 | 24 | latex = "\\begin{matrix}x\\\\y\\end{matrix}" 25 | math = process_sympy(latex) 26 | print("latex: %s to math: %s" % (latex, math)) 27 | 28 | latex = "2\\cdot\\begin{matrix}x\\\\y\\end{matrix}" 29 | math = process_sympy(latex) 30 | print("latex: %s to math: %s" % (latex, math)) 31 | 32 | latex = "2\\cdot\\begin{matrix}x\\\\y\\end{matrix} + \\begin{matrix}2\\\\3\\end{matrix}" 33 | math = process_sympy(latex) 34 | print("latex: %s to math: %s" % (latex, math)) 35 | 36 | latex = "-2\\begin{matrix}1&2\\\\3&4\\end{matrix}" 37 | math = process_sympy(latex) 38 | print("latex: %s to math: %s" % (latex, math)) 39 | 40 | latex = "2\\cdot\\theta\\begin{matrix}x\\\\y\\end{matrix} + \\begin{matrix}2\\\\3\\end{matrix}" 41 | math = process_sympy(latex) 42 | print("latex: %s to math: %s" % (latex, math)) 43 | 44 | latex = "\\theta\\begin{matrix}1\\\\3\\end{matrix} - \\begin{matrix}-1\\\\2\\end{matrix}" 45 | math = process_sympy(latex) 46 | print("latex: %s to math: %s" % (latex, math)) 47 | -------------------------------------------------------------------------------- /Qwen2.5-Eval/evaluation/latex2sympy/sandbox/sandbox.py: -------------------------------------------------------------------------------- 1 | from sympy import * 2 | from latex2sympy import process_sympy 3 | 4 | 5 | # latex = '\\variable{a}^{\\variable{b}}' 6 | # variables = {'a': process_sympy('658.95998'), 'b': process_sympy('185083.8060')} 7 | # c_ans_expr = process_sympy(latex, variables) 8 | # print(c_ans_expr) 9 | # print(srepr(c_ans_expr)) 10 | # c_ans = c_ans_expr.doit(deep=False).evalf(chop=True) 11 | # print(c_ans) 12 | # print(srepr(c_ans)) 13 | 14 | 15 | # numeric_responses = ['1', '1.0', '-1', '-1.0', '.5', '-.5', '3x10^3', '3E3', '3,000x10^{-3}', '0.5E-1', '\\frac{1}{3}', '(5\\times 3)^3', '\\sin(1)'] 16 | # for latex in numeric_responses: 17 | # parsed = process_sympy(latex) 18 | # print('latex: ', latex) 19 | # print('sympy: ', parsed) 20 | # print('is_number: ', parsed.is_number) 21 | # print('is_Number: ', parsed.is_Number) 22 | # print('srepr: ', srepr(parsed)) 23 | # print('-----------------------------------------------------') 24 | -------------------------------------------------------------------------------- /Qwen2.5-Eval/evaluation/latex2sympy/sandbox/sectan.py: -------------------------------------------------------------------------------- 1 | from sympy import * 2 | import sys 3 | sys.path.append("..") 4 | 5 | # # x^2\cdot \left(3\cdot \tan \left([!a!]\cdot x+[!c!]\right)+[!a!]\cdot x\left(\sec \left([!a!]\cdot x+[!c!]\right)\right)^2\right) 6 | # latex1 = "x^2\\cdot \\left(3\\cdot \\tan \\left(2\\cdot x+5\\right)+2\\cdot x\\left(\\sec \\left(2\\cdot x+5\\right)\\right)^2\\right)" 7 | # math1 = process_sympy(latex1) 8 | # print("latex: %s to math: %s" %(latex1,math1)) 9 | # 10 | # latex2 = "x^2\\cdot \\left(3\\cdot \\tan \\left(2\\cdot x+5\\right)+2\\cdot x\\left(\\sec \\left(2\\cdot x+5\\right)^2\\right)\\right)" 11 | # math2 = process_sympy(latex2) 12 | # print("latex: %s to math: %s" %(latex2,math2)) 13 | # 14 | # latex3 = "x^2\\cdot \\left(3\\cdot \\tan \\left(2\\cdot x+5\\right)+2\\cdot x\\left(1+\\tan \\left(2\\cdot x+5\\right)^2\\right)\\right)" 15 | # math3 = process_sympy(latex3) 16 | # print("latex: %s to math: %s" %(latex3,math3)) 17 | # 18 | # print(simplify(math1 - math2)) 19 | # print(simplify(math1 - math3)) 20 | 21 | # 22 | # latex1 = "\\sec^2(2\\cdot x+5)" 23 | # math1 = process_sympy(latex1) 24 | # print("latex: %s to math: %s" %(latex1,math1)) 25 | # 26 | # latex2 = "1+\\tan^2(2\\cdot x+5)" 27 | # math2 = process_sympy(latex2) 28 | # print("latex: %s to math: %s" %(latex2,math2)) 29 | # print(simplify(math1 - math2)) 30 | 31 | 32 | x = Symbol('x', real=True) 33 | y = Symbol('y', real=True) 34 | 35 | # BUG: 1 + tan^2(x+1) should be == sec^2(x+1) but isnt 36 | lhs = (1 + (tan(x + 1))**2) 37 | rhs = (sec(x + 1))**2 38 | eq = lhs - rhs 39 | print(simplify(lhs)) 40 | print(simplify(rhs)) 41 | print(simplify(eq)) 42 | print(simplify(lhs) == simplify(rhs)) 43 | 44 | # 1 + tan^2(x) == sec^2(x) but isnt 45 | lhs = (1 + (tan(x))**2) 46 | rhs = (sec(x))**2 47 | eq = lhs - rhs 48 | print(simplify(lhs)) 49 | print(simplify(rhs)) 50 | print(simplify(eq)) 51 | print(simplify(lhs) == simplify(rhs)) 52 | -------------------------------------------------------------------------------- /Qwen2.5-Eval/evaluation/latex2sympy/sandbox/vector.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sympy import * 3 | import sys 4 | sys.path.append("..") 5 | 6 | # row column matrix = vector 7 | v = [1, 2, 3] 8 | 9 | # single column matrix = vector 10 | m = Matrix([1, 2, 3]) 11 | print(m[:, 0]) 12 | 13 | # a three row and 2 column matrix 14 | m = Matrix([[1, 2], [3, 4], [5, 6]]) 15 | print(m[:, 0]) 16 | 17 | # determinant of lin indp system != 0 18 | m = Matrix([[1, 1], [1, 2]]) 19 | print(m.det()) 20 | 21 | # determinant of lin dep system = 0 22 | m = Matrix([[1, 1], [2, 2]]) 23 | print(m.det()) 24 | 25 | # determinant of lin dep system = 0 26 | x = Symbol('x') 27 | y = Symbol('y') 28 | m = Matrix([[x, y], [x, y]]) 29 | print(m.det()) 30 | # Reduced Row-Echelon Form 31 | _, ind = m.rref() 32 | print(len(ind)) 33 | 34 | # determinant of lin dep system != 0 35 | m = Matrix([[x, y], [y, x]]) 36 | print(m.det()) 37 | # Reduced Row-Echelon Form 38 | _, ind = m.rref() 39 | print(len(ind)) 40 | 41 | # determinant of lin dep system != 0 42 | # Reduced Row-Echelon Form 43 | m = Matrix([[x, x, y], [y, y, y]]) 44 | _, ind = m.rref() 45 | # Reduced Row-Echelon Form 46 | print(len(ind)) 47 | 48 | #==================# 49 | #===== Numpy ======# 50 | #==================# 51 | # http://kitchingroup.cheme.cmu.edu/blog/2013/03/01/Determining-linear-independence-of-a-set-of-vectors/ 52 | # Lin Indp of set of numerical vectors 53 | TOLERANCE = 1e-14 54 | v1 = [6, 0, 3, 1, 4, 2] 55 | v2 = [0, -1, 2, 7, 0, 5] 56 | v3 = [12, 3, 0, -19, 8, -11] 57 | 58 | A = np.row_stack([v1, v2, v3]) 59 | 60 | U, s, V = np.linalg.svd(A) 61 | print(s) 62 | print(np.sum(s > TOLERANCE)) 63 | 64 | v1 = [1, 1] 65 | v2 = [4, 4] 66 | 67 | A = np.row_stack([v1, v2]) 68 | U, s, V = np.linalg.svd(A) 69 | print(s) 70 | print(np.sum(s > TOLERANCE)) 71 | 72 | 73 | latex = "\\begin{matrix}1&2\\\\3&4\\end{matrix}" 74 | # math = process_sympy(latex) 75 | print("latex: %s to math: %s" % (latex, 1)) 76 | -------------------------------------------------------------------------------- /Qwen2.5-Eval/evaluation/latex2sympy/scripts/compile.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # Get relative path of the root directory of the project 4 | rdir=`git rev-parse --git-dir` 5 | rel_path="$(dirname "$rdir")" 6 | # Change to that path and run the file 7 | cd $rel_path 8 | 9 | java -jar antlr-4.11.1-complete.jar PS.g4 -o gen 10 | -------------------------------------------------------------------------------- /Qwen2.5-Eval/evaluation/latex2sympy/scripts/coverage-ci.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | pytest --doctest-modules --junitxml=junit/test-results.xml --cov-report=xml --cov-config=.coveragerc --cov=latex2sympy tests -------------------------------------------------------------------------------- /Qwen2.5-Eval/evaluation/latex2sympy/scripts/coverage.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # Get relative path of the root directory of the project 4 | rdir=`git rev-parse --git-dir` 5 | rel_path="$(dirname "$rdir")" 6 | # Change to that path and run the file 7 | cd $rel_path 8 | 9 | # Activate virtual environment 10 | echo "activating venv..." 11 | if test -f .env/bin/activate 12 | then source .env/bin/activate && echo "venv activate (bin)" 13 | elif test -f .env/Scripts/activate 14 | then source .env/Scripts/activate && echo "venv activated (Scripts)" 15 | else exit 1 16 | fi 17 | 18 | # Run unit test coverage 19 | echo "starting coverage..." 20 | if pytest --doctest-modules --cov-report=html --cov-config=.coveragerc --cov=latex2sympy tests 21 | then echo "coverage finished" 22 | else exit 1 23 | fi 24 | -------------------------------------------------------------------------------- /Qwen2.5-Eval/evaluation/latex2sympy/scripts/pre-commit: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # Get relative path of the root directory of the project 4 | rdir=`git rev-parse --git-dir` 5 | rel_path="$(dirname "$rdir")" 6 | 7 | # Change to that path and run the file 8 | cd $rel_path 9 | 10 | echo "pre-commit hook started..." 11 | 12 | # Activate virtual environment 13 | echo "activating venv..." 14 | if test -f .env/bin/activate 15 | then source .env/bin/activate && echo "venv activated." 16 | elif test -f .env/Scripts/activate 17 | then source .env/Scripts/activate && echo "venv activated." 18 | else exit 1 19 | fi 20 | 21 | # Run auto formatting on all staged python files, then add those changes 22 | echo "auto-formatting code..." 23 | if autopep8 --in-place `git diff --name-status --cached | grep '.py' | awk 'match($1, "A|M"){print $2}'` && git add `git diff --name-status --cached | grep '.py' | awk 'match($1, "A|M"){print $2}'` 24 | then echo "code was auto-formatted." 25 | else echo "no code was auto-formatted." 26 | fi 27 | 28 | exit 0 29 | -------------------------------------------------------------------------------- /Qwen2.5-Eval/evaluation/latex2sympy/scripts/pre-push: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # Get relative path of the root directory of the project 4 | rdir=`git rev-parse --git-dir` 5 | rel_path="$(dirname "$rdir")" 6 | 7 | # Change to that path and run the file 8 | cd $rel_path 9 | 10 | echo "pre-push hook started..." 11 | 12 | # Activate virtual environment 13 | echo "activating venv..." 14 | if test -f .env/bin/activate 15 | then source .env/bin/activate && echo "venv activated." 16 | elif test -f .env/Scripts/activate 17 | then source .env/Scripts/activate && echo "venv activated." 18 | else exit 1 19 | fi 20 | 21 | # Run unit tests 22 | echo "starting tests..." 23 | # if pytest tests 24 | # then echo "tests finished." 25 | # else exit 1 26 | # fi 27 | 28 | exit 0 29 | -------------------------------------------------------------------------------- /Qwen2.5-Eval/evaluation/latex2sympy/scripts/publish.sh: -------------------------------------------------------------------------------- 1 | rm ./dist/* 2 | python3 setup.py bdist_wheel 3 | twine upload dist/* 4 | -------------------------------------------------------------------------------- /Qwen2.5-Eval/evaluation/latex2sympy/scripts/setup-hooks.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | cp scripts/pre-push .git/hooks/ 3 | cp scripts/pre-commit .git/hooks/ -------------------------------------------------------------------------------- /Qwen2.5-Eval/evaluation/latex2sympy/scripts/setup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # Get relative path of the root directory of the project 4 | rdir=`git rev-parse --git-dir` 5 | rel_path="$(dirname "$rdir")" 6 | # Change to that path and run the file 7 | cd $rel_path 8 | 9 | echo "creating venv..." 10 | if test -d .env 11 | then echo "venv exists" 12 | else python3 -m venv .env && echo "venv created" 13 | fi 14 | 15 | echo '' 16 | # Activate virtual environment 17 | echo "activating venv..." 18 | if test -f .env/bin/activate 19 | then source .env/bin/activate && echo "venv activate (bin)" 20 | elif test -f .env/Scripts/activate 21 | then source .env/Scripts/activate && echo "venv activated (Scripts)" 22 | else exit 1 23 | fi 24 | 25 | echo '' 26 | echo "installing requirements..." 27 | if pip install -r dev-requirements.txt 28 | then echo "requirements installed" 29 | else exit 1 30 | fi 31 | 32 | echo '' 33 | echo "compiling parser..." 34 | sh scripts/compile.sh 35 | echo "parser compiled" 36 | 37 | echo '' 38 | echo "setup git hooks..." 39 | sh scripts/setup-hooks.sh 40 | echo "git hooks setup" 41 | 42 | exit 0 43 | -------------------------------------------------------------------------------- /Qwen2.5-Eval/evaluation/latex2sympy/scripts/test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # Get relative path of the root directory of the project 4 | rdir=`git rev-parse --git-dir` 5 | rel_path="$(dirname "$rdir")" 6 | # Change to that path and run the file 7 | cd $rel_path 8 | 9 | # Activate virtual environment 10 | echo "activating venv..." 11 | if test -f .env/bin/activate 12 | then source .env/bin/activate && echo "venv activate (bin)" 13 | elif test -f .env/Scripts/activate 14 | then source .env/Scripts/activate && echo "venv activated (Scripts)" 15 | else exit 1 16 | fi 17 | 18 | echo '' 19 | echo "compiling parser..." 20 | sh scripts/compile.sh 21 | echo "parser compiled" 22 | 23 | echo '' 24 | # Run unit tests 25 | echo "starting tests..." 26 | if pytest tests 27 | then echo "tests finished" 28 | else exit 1 29 | fi 30 | 31 | exit 0 32 | -------------------------------------------------------------------------------- /Qwen2.5-Eval/evaluation/latex2sympy/setup.cfg: -------------------------------------------------------------------------------- 1 | [pycodestyle] 2 | max-line-length = 120 3 | ignore = E501 4 | -------------------------------------------------------------------------------- /Qwen2.5-Eval/evaluation/latex2sympy/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | from codecs import open 3 | from os import path 4 | here = path.abspath(path.dirname(__file__)) 5 | 6 | 7 | setup( 8 | name="latex2sympy2", 9 | version="1.9.0", 10 | description='Convert latex to sympy with ANTLR and support Matrix, Linear Algebra and CAS functions.', 11 | long_description_content_type='text/markdown', 12 | long_description=open(path.join(here, "README.md"), encoding='utf-8').read(), 13 | # The project's main homepage. 14 | url='https://github.com/ZubinGou/latex2sympy', 15 | # Author details 16 | author='ZubinGou', 17 | author_email='zebgou@gmail.com', 18 | # Choose your license 19 | license='MIT', 20 | classifiers=[ 21 | 'Development Status :: 4 - Beta', 22 | 'Intended Audience :: Developers', 23 | 'Intended Audience :: Education', 24 | 'Intended Audience :: Science/Research', 25 | 'License :: OSI Approved :: MIT License', 26 | 'Topic :: Education', 27 | 'Topic :: Scientific/Engineering :: Mathematics', 28 | 'Topic :: Software Development :: Compilers', 29 | 'Topic :: Text Processing :: Markup :: LaTeX', 30 | 'Topic :: Text Processing :: Markup :: Markdown', 31 | 'Programming Language :: Python :: 3', 32 | 'Programming Language :: Python :: 3.3', 33 | 'Programming Language :: Python :: 3.4', 34 | 'Programming Language :: Python :: 3.5', 35 | 'Programming Language :: Python :: 3.6', 36 | 'Programming Language :: Python :: 3.7', 37 | 'Programming Language :: Python :: 3.8', 38 | ], 39 | packages=find_packages(exclude=('tests')), 40 | py_modules=['asciimath_printer', 'latex2sympy2'], 41 | install_requires=[ 42 | 'sympy>=1.4', 43 | 'antlr4-python3-runtime==4.11.1' 44 | ], 45 | ) 46 | -------------------------------------------------------------------------------- /Qwen2.5-Eval/evaluation/latex2sympy/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ypwang61/One-Shot-RLVR/b9d2e9a1cc32682b8ec77922fe4a236d75a7e9b0/Qwen2.5-Eval/evaluation/latex2sympy/tests/__init__.py -------------------------------------------------------------------------------- /Qwen2.5-Eval/evaluation/latex2sympy/tests/abs_test.py: -------------------------------------------------------------------------------- 1 | from .context import assert_equal, get_simple_examples 2 | import pytest 3 | from sympy import Abs 4 | 5 | examples = get_simple_examples(Abs) 6 | 7 | delimiter_pairs = { 8 | '|': '|', 9 | '\\vert': '\\vert', 10 | '\\lvert': '\\rvert' 11 | } 12 | 13 | 14 | @pytest.mark.parametrize('input, output, symbolically', examples) 15 | def test_abs(input, output, symbolically): 16 | for left, right in delimiter_pairs.items(): 17 | assert_equal("{left}{input}{right}".format(left=left, right=right, input=input), output, symbolically=symbolically) 18 | assert_equal("\\left{left}{input}\\right{right}".format(left=left, right=right, input=input), output, symbolically=symbolically) 19 | assert_equal("\\mleft{left}{input}\\mright{right}".format(left=left, right=right, input=input), output, symbolically=symbolically) 20 | -------------------------------------------------------------------------------- /Qwen2.5-Eval/evaluation/latex2sympy/tests/binomial_test.py: -------------------------------------------------------------------------------- 1 | from .context import assert_equal, _Add, _Mul, _Pow 2 | import pytest 3 | from sympy import binomial, Symbol 4 | 5 | x = Symbol('x', real=True) 6 | y = Symbol('y', real=True) 7 | theta = Symbol('theta', real=True) 8 | gamma = Symbol('gamma', real=True) 9 | 10 | 11 | def test_binomial_numeric(): 12 | assert_equal("\\binom{16}{2}", binomial(16, 2)) 13 | 14 | 15 | def test_binomial_symbols(): 16 | assert_equal("\\binom{x}{y}", binomial(x, y)) 17 | 18 | 19 | def test_binomial_greek_symbols(): 20 | assert_equal("\\binom{\\theta}{\\gamma}", binomial(theta, gamma)) 21 | 22 | 23 | def test_binomial_expr(): 24 | assert_equal("\\binom{16+2}{\\frac{4}{2}}", binomial(_Add(16, 2), _Mul(4, _Pow(2, -1)), evaluate=False)) 25 | 26 | 27 | def test_choose_numeric(): 28 | assert_equal("\\choose{16}{2}", binomial(16, 2)) 29 | 30 | 31 | def test_choose_symbols(): 32 | assert_equal("\\choose{x}{y}", binomial(x, y)) 33 | 34 | 35 | def test_choose_greek_symbols(): 36 | assert_equal("\\choose{\\theta}{\\gamma}", binomial(theta, gamma)) 37 | -------------------------------------------------------------------------------- /Qwen2.5-Eval/evaluation/latex2sympy/tests/ceil_test.py: -------------------------------------------------------------------------------- 1 | from .context import assert_equal, get_simple_examples 2 | import pytest 3 | from sympy import ceiling 4 | 5 | examples = get_simple_examples(ceiling) 6 | 7 | 8 | @pytest.mark.parametrize('input, output, symbolically', examples) 9 | def test_ceil_func(input, output, symbolically): 10 | assert_equal("\\ceil({input})".format(input=input), output, symbolically=symbolically) 11 | 12 | 13 | @pytest.mark.parametrize('input, output, symbolically', examples) 14 | def test_ceil_operatorname(input, output, symbolically): 15 | assert_equal("\\operatorname{{ceil}}({input})".format(input=input), output, symbolically=symbolically) 16 | 17 | 18 | @pytest.mark.parametrize('input, output, symbolically', examples) 19 | def test_ceil_cmd(input, output, symbolically): 20 | assert_equal("\\lceil {input}\\rceil".format(input=input), output, symbolically=symbolically) 21 | assert_equal("\\left\\lceil {input}\\right\\rceil".format(input=input), output, symbolically=symbolically) 22 | assert_equal("\\mleft\\lceil {input}\\mright\\rceil".format(input=input), output, symbolically=symbolically) 23 | 24 | 25 | @pytest.mark.parametrize('input, output, symbolically', examples) 26 | def test_ceil_corners(input, output, symbolically): 27 | assert_equal("\\ulcorner {input}\\urcorner".format(input=input), output, symbolically=symbolically) 28 | assert_equal("\\left\\ulcorner {input}\\right\\urcorner".format(input=input), output, symbolically=symbolically) 29 | assert_equal("\\mleft\\ulcorner {input}\\mright\\urcorner".format(input=input), output, symbolically=symbolically) 30 | -------------------------------------------------------------------------------- /Qwen2.5-Eval/evaluation/latex2sympy/tests/complex_test.py: -------------------------------------------------------------------------------- 1 | from .context import assert_equal 2 | import pytest 3 | from sympy import Sum, I, Symbol, Integer 4 | 5 | a = Symbol('a', real=True) 6 | b = Symbol('b', real=True) 7 | i = Symbol('i', real=True) 8 | n = Symbol('n', real=True) 9 | x = Symbol('x', real=True) 10 | 11 | 12 | def test_complex(): 13 | assert_equal("a+Ib", a + I * b) 14 | 15 | 16 | def test_complex_e(): 17 | assert_equal("e^{I\\pi}", Integer(-1)) 18 | 19 | 20 | def test_complex_sum(): 21 | assert_equal("\\sum_{i=0}^{n} i \\cdot x", Sum(i * x, (i, 0, n))) 22 | -------------------------------------------------------------------------------- /Qwen2.5-Eval/evaluation/latex2sympy/tests/exp_test.py: -------------------------------------------------------------------------------- 1 | from .context import assert_equal 2 | import pytest 3 | from sympy import exp, sin, Symbol, E 4 | 5 | x = Symbol('x', real=True) 6 | y = Symbol('y', real=True) 7 | 8 | 9 | def test_exp_letter(): 10 | assert_equal("e", E) 11 | assert_equal("e", exp(1)) 12 | 13 | 14 | def test_exp_func(): 15 | assert_equal("\\exp(3)", exp(3)) 16 | 17 | 18 | def test_exp_func_no_delim(): 19 | assert_equal("\\exp3", exp(3)) 20 | 21 | 22 | def test_exp_command_symbol(): 23 | assert_equal("\\exponentialE", E) 24 | assert_equal("\\exponentialE", exp(1)) 25 | 26 | 27 | def test_exp_command_symbol_expression(): 28 | assert_equal("\\exponentialE^{3}", exp(3)) 29 | 30 | 31 | def test_exp_command_symbol_multiplied(): 32 | ''' 33 | \\exponentialE is NOT a function, so using the following notation equates to multiplication 34 | ''' 35 | assert_equal("\\exponentialE (3)", E * 3) 36 | assert_equal("\\exponentialE \\left( 3\\right)", E * 3) 37 | assert_equal("\\exponentialE \\times 3", E * 3) 38 | 39 | 40 | def test_exp_numeric(): 41 | assert_equal("e^3", exp(3)) 42 | 43 | 44 | def test_exp_symbol(): 45 | assert_equal("e^x", exp(x)) 46 | 47 | 48 | def test_exp_symbol_expr(): 49 | assert_equal("e^{x+y}", exp(x + y)) 50 | 51 | 52 | def test_exp_symbol_expr_group(): 53 | assert_equal("e^{(x+y)}", exp(x + y)) 54 | 55 | 56 | def test_exp_expr(): 57 | assert_equal("\\sin(x)*e^x", sin(x) * exp(x)) 58 | -------------------------------------------------------------------------------- /Qwen2.5-Eval/evaluation/latex2sympy/tests/floor_test.py: -------------------------------------------------------------------------------- 1 | from .context import assert_equal, get_simple_examples 2 | import pytest 3 | from sympy import floor 4 | 5 | examples = get_simple_examples(floor) 6 | 7 | 8 | @pytest.mark.parametrize('input, output, symbolically', examples) 9 | def test_floor_func(input, output, symbolically): 10 | assert_equal("\\floor({input})".format(input=input), output, symbolically=symbolically) 11 | 12 | 13 | @pytest.mark.parametrize('input, output, symbolically', examples) 14 | def test_floor_operatorname(input, output, symbolically): 15 | assert_equal("\\operatorname{{floor}}({input})".format(input=input), output, symbolically=symbolically) 16 | 17 | 18 | @pytest.mark.parametrize('input, output, symbolically', examples) 19 | def test_floor_cmd(input, output, symbolically): 20 | assert_equal("\\lfloor {input}\\rfloor".format(input=input), output, symbolically=symbolically) 21 | assert_equal("\\left\\lfloor {input}\\right\\rfloor".format(input=input), output, symbolically=symbolically) 22 | assert_equal("\\mleft\\lfloor {input}\\mright\\rfloor".format(input=input), output, symbolically=symbolically) 23 | 24 | 25 | @pytest.mark.parametrize('input, output, symbolically', examples) 26 | def test_floor_corners(input, output, symbolically): 27 | assert_equal("\\llcorner {input}\\lrcorner".format(input=input), output, symbolically=symbolically) 28 | assert_equal("\\left\\llcorner {input}\\right\\lrcorner".format(input=input), output, symbolically=symbolically) 29 | assert_equal("\\mleft\\llcorner {input}\\mright\\lrcorner".format(input=input), output, symbolically=symbolically) 30 | -------------------------------------------------------------------------------- /Qwen2.5-Eval/evaluation/latex2sympy/tests/greek_test.py: -------------------------------------------------------------------------------- 1 | from .context import assert_equal 2 | import pytest 3 | from sympy import Symbol 4 | 5 | epsilon_upper = Symbol('char"000190', real=True) 6 | epsilon_lower = Symbol('epsilon', real=True) 7 | varepsilon = Symbol('varepsilon', real=True) 8 | 9 | 10 | def test_greek_epsilon(): 11 | assert_equal("\\epsilon", epsilon_lower) 12 | 13 | 14 | def test_greek_epsilon_upper(): 15 | assert_equal('\\char"000190', epsilon_upper) 16 | 17 | 18 | def test_greek_varepsilon(): 19 | assert_equal('\\varepsilon', varepsilon) 20 | -------------------------------------------------------------------------------- /Qwen2.5-Eval/evaluation/latex2sympy/tests/left_right_cdot_test.py: -------------------------------------------------------------------------------- 1 | from .context import assert_equal 2 | import pytest 3 | from sympy import sin, Symbol 4 | 5 | x = Symbol('x', real=True) 6 | 7 | 8 | def test_left_right_cdot(): 9 | assert_equal("\\sin\\left(x\\right)\\cdot x", sin(x) * x) 10 | -------------------------------------------------------------------------------- /Qwen2.5-Eval/evaluation/latex2sympy/tests/linalg_test.py: -------------------------------------------------------------------------------- 1 | from .context import assert_equal 2 | import pytest 3 | from sympy import MatMul, Matrix 4 | 5 | 6 | def test_linalg_placeholder(): 7 | assert_equal("\\begin{pmatrix}1&2\\\\3&4\\end{pmatrix}\\cdot\\variable{v}", MatMul(Matrix([[1, 2], [3, 4]]), Matrix([1, 2])), {'v': Matrix([1, 2])}) 8 | 9 | 10 | def test_linalg_placeholder_multiple(): 11 | assert_equal("\\variable{M}\\cdot\\variable{v}", MatMul(Matrix([[1, 2], [3, 4]]), Matrix([1, 2])), {'M': Matrix([[1, 2], [3, 4]]), 'v': Matrix([1, 2])}) 12 | 13 | 14 | def test_linalg_placeholder_multiple_mul(): 15 | assert_equal("\\begin{pmatrix}3&-1\\end{pmatrix}\\cdot\\variable{M}\\cdot\\variable{v}", MatMul(Matrix([[3, -1]]), Matrix([[1, 2], [3, 4]]), Matrix([1, 2])), {'M': Matrix([[1, 2], [3, 4]]), 'v': Matrix([1, 2])}) 16 | -------------------------------------------------------------------------------- /Qwen2.5-Eval/evaluation/latex2sympy/tests/overline_test.py: -------------------------------------------------------------------------------- 1 | from .context import assert_equal 2 | import pytest 3 | from sympy import sin, Symbol 4 | 5 | x = Symbol('x', real=True) 6 | 7 | 8 | def test_overline(): 9 | assert_equal("\\frac{\\sin(x)}{\\overline{x}_n}", sin(x) / Symbol('xbar_n', real=True)) 10 | -------------------------------------------------------------------------------- /Qwen2.5-Eval/evaluation/latex2sympy/tests/pi_test.py: -------------------------------------------------------------------------------- 1 | from .context import assert_equal, _Mul, _Pow 2 | import pytest 3 | from sympy import pi, Symbol, acos, cos 4 | 5 | 6 | def test_pi_frac(): 7 | assert_equal("\\frac{\\pi}{3}", _Mul(pi, _Pow(3, -1))) 8 | 9 | 10 | def test_pi_nested(): 11 | assert_equal("\\arccos{\\cos{\\frac{\\pi}{3}}}", acos(cos(_Mul(pi, _Pow(3, -1)), evaluate=False), evaluate=False)) 12 | 13 | 14 | def test_pi_arccos(): 15 | assert_equal("\\arccos{-1}", pi, symbolically=True) 16 | -------------------------------------------------------------------------------- /Qwen2.5-Eval/evaluation/latex2sympy/tests/trig_test.py: -------------------------------------------------------------------------------- 1 | from .context import assert_equal 2 | import pytest 3 | from sympy import asinh, Symbol 4 | 5 | # x = Symbol('x', real=True); 6 | 7 | # latex = "\\sinh(x)" 8 | # math = process_sympy(latex) 9 | # print("latex: %s to math: %s" %(latex,math)) 10 | # 11 | # latex = "\\arcsinh(x)" 12 | # math = process_sympy(latex) 13 | # print("latex: %s to math: %s" %(latex,math)) 14 | # 15 | # latex = "\\arsinh(x)" 16 | # math = process_sympy(latex) 17 | # print("latex: %s to math: %s" %(latex,math)) 18 | 19 | 20 | def test_arcsinh(): 21 | assert_equal("\\operatorname{arcsinh}\\left(1\\right)", asinh(1, evaluate=False)) 22 | -------------------------------------------------------------------------------- /Qwen2.5-Eval/evaluation/requirements.txt: -------------------------------------------------------------------------------- 1 | # common 2 | vllm 3 | tqdm 4 | datasets 5 | torch 6 | transformers 7 | python_dateutil 8 | flash_attn 9 | 10 | # math_eval 11 | sympy==1.12 12 | antlr4-python3-runtime==4.11.1 # ! The version needs to be compatible with sympy. 13 | word2number 14 | Pebble 15 | timeout-decorator -------------------------------------------------------------------------------- /Qwen2.5-Eval/evaluation/sh/eval_one_experiment_all_ckpts.sh: -------------------------------------------------------------------------------- 1 | # rm -rf sh/eval_checkpoint_yiping.sh; vim sh/eval_checkpoint_yiping.sh 2 | PROMPT_TYPE="qwen25-math-cot" 3 | export CUDA_VISIBLE_DEVICES="0,1,2,3" 4 | MAX_TOKENS="3072" 5 | 6 | # CHECKPOINTS_DIR=... # replace with your own path for storing checkpoints 7 | 8 | 9 | 10 | 11 | ####### pi1 ####### 12 | PROJECT_NAME="verl_few_shot" 13 | EXPERIMENT_NAME="Qwen2.5-Math-1.5B-pi1_r128" 14 | GLOBAL_STEP_LIST=($(seq 20 20 2000)) 15 | 16 | # # Loop through each step in the list 17 | for GLOBAL_STEP in "${GLOBAL_STEP_LIST[@]}"; do 18 | echo "======== Evaluating checkpoint at global step: ${GLOBAL_STEP} ========" 19 | MODEL_NAME_OR_PATH=${CHECKPOINTS_DIR}/${PROJECT_NAME}/${EXPERIMENT_NAME}/global_step_${GLOBAL_STEP}/actor 20 | OUTPUT_DIR=${CHECKPOINTS_DIR}/${PROJECT_NAME}/${EXPERIMENT_NAME}/eval/global_step_${GLOBAL_STEP} 21 | bash sh/eval_all_math.sh $PROMPT_TYPE $MODEL_NAME_OR_PATH $MAX_TOKENS $OUTPUT_DIR 22 | done 23 | 24 | 25 | ####### DSR-sub ####### 26 | # PROJECT_NAME="verl_few_shot" 27 | # EXPERIMENT_NAME="Qwen2.5-Math-1.5B-dsr_sub" 28 | # GLOBAL_STEP_LIST=($(seq 20 20 2000)) 29 | 30 | # # # Loop through each step in the list 31 | # for GLOBAL_STEP in "${GLOBAL_STEP_LIST[@]}"; do 32 | # echo "======== Evaluating checkpoint at global step: ${GLOBAL_STEP} ========" 33 | # MODEL_NAME_OR_PATH=${CHECKPOINTS_DIR}/${PROJECT_NAME}/${EXPERIMENT_NAME}/global_step_${GLOBAL_STEP}/actor 34 | # OUTPUT_DIR=${CHECKPOINTS_DIR}/${PROJECT_NAME}/${EXPERIMENT_NAME}/eval/global_step_${GLOBAL_STEP} 35 | # bash sh/eval_all_math.sh $PROMPT_TYPE $MODEL_NAME_OR_PATH $MAX_TOKENS $OUTPUT_DIR 36 | # done 37 | 38 | 39 | -------------------------------------------------------------------------------- /data/data_selection.sh: -------------------------------------------------------------------------------- 1 | 2 | # get pi_{i} -> top_index = i-1 3 | python data_selection.py \ 4 | --index_json_path acc_step_500.json \ 5 | --data_dir train/one_shot_rlvr \ 6 | --parquet_file_name dsr_sub.parquet \ 7 | --repeat_time 128 \ 8 | --top_index 0 \ 9 | --method std \ 10 | --top_n 0 -------------------------------------------------------------------------------- /data/test/math500.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ypwang61/One-Shot-RLVR/b9d2e9a1cc32682b8ec77922fe4a236d75a7e9b0/data/test/math500.parquet -------------------------------------------------------------------------------- /data/train/one_shot_rlvr/dsr_sub.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ypwang61/One-Shot-RLVR/b9d2e9a1cc32682b8ec77922fe4a236d75a7e9b0/data/train/one_shot_rlvr/dsr_sub.parquet -------------------------------------------------------------------------------- /data/train/one_shot_rlvr/merge_pi1_pi13_r128.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ypwang61/One-Shot-RLVR/b9d2e9a1cc32682b8ec77922fe4a236d75a7e9b0/data/train/one_shot_rlvr/merge_pi1_pi13_r128.parquet -------------------------------------------------------------------------------- /data/train/one_shot_rlvr/merge_pi1_pi2_pi13_pi1209_r128.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ypwang61/One-Shot-RLVR/b9d2e9a1cc32682b8ec77922fe4a236d75a7e9b0/data/train/one_shot_rlvr/merge_pi1_pi2_pi13_pi1209_r128.parquet -------------------------------------------------------------------------------- /data/train/one_shot_rlvr/pi1209_r128.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ypwang61/One-Shot-RLVR/b9d2e9a1cc32682b8ec77922fe4a236d75a7e9b0/data/train/one_shot_rlvr/pi1209_r128.parquet -------------------------------------------------------------------------------- /data/train/one_shot_rlvr/pi13_r128.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ypwang61/One-Shot-RLVR/b9d2e9a1cc32682b8ec77922fe4a236d75a7e9b0/data/train/one_shot_rlvr/pi13_r128.parquet -------------------------------------------------------------------------------- /data/train/one_shot_rlvr/pi1_r128.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ypwang61/One-Shot-RLVR/b9d2e9a1cc32682b8ec77922fe4a236d75a7e9b0/data/train/one_shot_rlvr/pi1_r128.parquet -------------------------------------------------------------------------------- /data/train/one_shot_rlvr/pi2_r128.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ypwang61/One-Shot-RLVR/b9d2e9a1cc32682b8ec77922fe4a236d75a7e9b0/data/train/one_shot_rlvr/pi2_r128.parquet -------------------------------------------------------------------------------- /docker/Dockerfile.ngc.vllm: -------------------------------------------------------------------------------- 1 | # docker buildx build --platform linux/x86_64 -t "verlai/verl:ngc-th2.4.0-cu124-vllm0.6.3-ray2.4-te1.7-v0.0.6" -f docker/Dockerfile.ngc.vllm . --builder cloud-verlai-verl-builder --progress=plain --push 2 | FROM nvcr.io/nvidia/pytorch:24.05-py3 3 | 4 | # uninstall nv-pytorch fork 5 | RUN pip3 uninstall pytorch-quantization \ 6 | pytorch-triton \ 7 | torch \ 8 | torch-tensorrt \ 9 | torchvision \ 10 | xgboost transformer_engine flash_attn \ 11 | apex megatron-core -y 12 | 13 | RUN pip3 install torch==2.4.0 torchvision==0.19.0 torchaudio==2.4.0 --index-url https://download.pytorch.org/whl/cu124 14 | 15 | # =============== Megatron dependencies (optional) ================= 16 | # install apex, set MAX_JOBS to avoid OOMs 17 | RUN MAX_JOBS=4 pip3 install -v --disable-pip-version-check --no-cache-dir --no-build-isolation \ 18 | --config-settings "--build-option=--cpp_ext" --config-settings "--build-option=--cuda_ext" \ 19 | git+https://github.com/NVIDIA/apex 20 | # =============== End of Megatron dependencies (optional) ================= 21 | 22 | RUN pip3 install --no-cache-dir \ 23 | accelerate \ 24 | codetiming \ 25 | datasets \ 26 | dill \ 27 | hydra-core \ 28 | numpy \ 29 | 'pandas' \ 30 | 'peft' \ 31 | 'pyarrow>=15.0.0' \ 32 | 'pybind11' \ 33 | 'pylatexenc' \ 34 | 'ray>=2.10' \ 35 | 'tensordict<0.6' \ 36 | 'transformers' \ 37 | 'vllm==0.6.3.post1' \ 38 | 'wandb' 39 | 40 | # full dependencies 41 | RUN pip3 install pytest yapf py-spy pyext liger-kernel 42 | 43 | # =============== Megatron dependencies (optional) ================= 44 | # install Transformer Engine, which requires FA 2.5.8. Do it in a separate step for docker cache 45 | RUN MAX_JOBS=4 NINJA_FLAGS="-j4" pip3 install flash-attn==2.5.8 --no-cache-dir --no-build-isolation 46 | RUN MAX_JOBS=1 NINJA_FLAGS="-j1" TE_BUILD_WITH_NINJA=0 pip3 install git+https://github.com/eric-haibin-lin/TransformerEngine.git@v1.7.0 47 | # =============== End of Megatron dependencies (optional) ================= 48 | -------------------------------------------------------------------------------- /docker/Dockerfile.vemlp.vllm.te: -------------------------------------------------------------------------------- 1 | # docker buildx build --platform linux/x86_64 -t "verlai/verl:$TAG" -f docker/$FILE . 2 | 3 | # the one in docker.io is an alias for the one veturbo 4 | # FROM vemlp-cn-beijing.cr.volces.com/veturbo/pytorch:2.4-cu124 5 | FROM docker.io/haibinlin/verl:v0.0.5-th2.4.0-cu124-base 6 | 7 | # only config pip index with https://pypi.tuna.tsinghua.edu.cn/simple if needed 8 | # unset for now 9 | RUN pip3 config unset global.index-url 10 | 11 | # transformers 4.47.0 contains the following bug: 12 | # AttributeError: 'Gemma2Attention' object has no attribute '_flash_attn_uses_top_left_mask' 13 | RUN pip3 install --no-cache-dir \ 14 | torch==2.4.0 \ 15 | accelerate \ 16 | codetiming \ 17 | dill \ 18 | hydra-core \ 19 | numpy \ 20 | pybind11 \ 21 | tensordict \ 22 | "transformers <= 4.46.0" 23 | 24 | RUN pip3 install --no-cache-dir flash-attn==2.7.0.post2 --no-build-isolation 25 | 26 | # vllm depends on ray, and veRL does not support ray > 2.37 27 | RUN pip3 install --no-cache-dir vllm==0.6.3 ray==2.10 28 | 29 | # install apex 30 | RUN MAX_JOBS=4 pip3 install -v --disable-pip-version-check --no-cache-dir --no-build-isolation \ 31 | --config-settings "--build-option=--cpp_ext" --config-settings "--build-option=--cuda_ext" \ 32 | git+https://github.com/NVIDIA/apex 33 | 34 | # install Transformer Engine 35 | # - flash-attn pinned to 2.5.3 by TransformerEngine, switch to eric-haibin-lin/TransformerEngine.git@v1.7.0 to relax version req 36 | # - install with: MAX_JOBS=1 NINJA_FLAGS="-j1" TE_BUILD_WITH_NINJA=0 to avoid OOM 37 | # - cudnn is required by TransformerEngine 38 | # RUN CUDNN_PATH=/opt/conda/lib/python3.11/site-packages/nvidia/cudnn \ 39 | # pip3 install git+https://github.com/eric-haibin-lin/TransformerEngine.git@v1.7.0 40 | RUN MAX_JOBS=1 NINJA_FLAGS="-j1" pip3 install flash-attn==2.5.3 --no-cache-dir --no-build-isolation 41 | RUN MAX_JOBS=1 NINJA_FLAGS="-j1" pip3 install git+https://github.com/NVIDIA/TransformerEngine.git@v1.7 42 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SPHINXPROJ = verl 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | # verl documents 2 | 3 | ## Build the docs 4 | 5 | ```bash 6 | # Install dependencies. 7 | pip install -r requirements-docs.txt 8 | 9 | # Build the docs. 10 | make clean 11 | make html 12 | ``` 13 | 14 | ## Open the docs with your browser 15 | 16 | ```bash 17 | python -m http.server -d _build/html/ 18 | ``` 19 | Launch your browser and open localhost:8000. -------------------------------------------------------------------------------- /docs/_static/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ypwang61/One-Shot-RLVR/b9d2e9a1cc32682b8ec77922fe4a236d75a7e9b0/docs/_static/logo.png -------------------------------------------------------------------------------- /docs/advance/megatron_extension.rst: -------------------------------------------------------------------------------- 1 | Add models with the Megatron-LM backend 2 | ========================================= 3 | 4 | Model 5 | ----------- 6 | 7 | The most challenging aspect to use the Megatron-LM backend is implementing 8 | the models for training. Currently, we implement Llama model that 9 | support data parallelism, tensor parallelism, pipeline parallelism (also 10 | vPP) and sequence parallelism. We also implement remove padding (sequence packing) on Llama 11 | model, which can be found in `modeling_llama_megatron.py `_. 12 | 13 | To support other model, users are required to implement: 14 | 15 | 1. Implemnt a model similar to ``modeling_llama_megatron.py`` that satisfy the 16 | parallelism requirements of Megatron-LM. Then register your model in 17 | the `registry.py `_. 18 | 2. Checkpoint utils that can load full checkpoint (e.g. huggingface 19 | checkpoint) to partitioned models during the runtime. Then register 20 | your loader to ``weight_loader_registry`` in `weight_loader_registry.py `_. 21 | 3. Weight loader that synchronize the weight from Megatron to rollout 22 | (vLLM) model. Note that both the actor model and rollout model are 23 | partitioned during runtime. So, it's advisable to map the model name 24 | in actor model implementation. Otherwise, you may need an additional 25 | name mapping and even weight transformation. The weight loader implementation 26 | is in `megatron_weight_loaders.py `_. -------------------------------------------------------------------------------- /docs/advance/placement.rst: -------------------------------------------------------------------------------- 1 | Ray API Design Tutorial 2 | ======================================= 3 | 4 | We provide a tutorial for our Ray API design, including: 5 | 6 | - Ray basic concepts 7 | - Resource Pool and RayWorkerGroup 8 | - Data Dispatch, Execution and Collection 9 | - Initialize the RayWorkerGroup and execute the distributed computation in the given Resource Pool 10 | 11 | See details in `tutorial.ipynb `_. -------------------------------------------------------------------------------- /docs/requirements-docs.txt: -------------------------------------------------------------------------------- 1 | # markdown suport 2 | recommonmark 3 | # markdown table suport 4 | sphinx-markdown-tables 5 | 6 | # theme default rtd 7 | 8 | # crate-docs-theme 9 | sphinx-rtd-theme -------------------------------------------------------------------------------- /examples/generation/run_deepseek_v2_lite_math.sh: -------------------------------------------------------------------------------- 1 | python3 -m verl.trainer.main_generation \ 2 | trainer.nnodes=1 \ 3 | trainer.n_gpus_per_node=8 \ 4 | data.path=~/data/rlhf/gsm8k/test.parquet \ 5 | data.prompt_key=prompt \ 6 | data.n_samples=1 \ 7 | data.output_path=~/data/rlhf/math/deepseek_v2_lite_gen_test.parquet \ 8 | model.path=deepseek-ai/deepseek-llm-7b-chat \ 9 | +model.trust_remote_code=True \ 10 | rollout.temperature=1.0 \ 11 | rollout.top_k=50 \ 12 | rollout.top_p=0.7 \ 13 | rollout.prompt_length=2048 \ 14 | rollout.response_length=1024 \ 15 | rollout.tensor_model_parallel_size=2 \ 16 | rollout.gpu_memory_utilization=0.8 17 | -------------------------------------------------------------------------------- /examples/grpo_trainer/run_deepseek7b_llm.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | python3 -m verl.trainer.main_ppo \ 4 | algorithm.adv_estimator=grpo \ 5 | data.train_files=$HOME/data/gsm8k/train.parquet \ 6 | data.val_files=$HOME/data/gsm8k/test.parquet \ 7 | data.train_batch_size=1024 \ 8 | data.val_batch_size=1312 \ 9 | data.max_prompt_length=512 \ 10 | data.max_response_length=1024 \ 11 | actor_rollout_ref.model.path=deepseek-ai/deepseek-llm-7b-chat \ 12 | actor_rollout_ref.actor.optim.lr=1e-6 \ 13 | actor_rollout_ref.model.use_remove_padding=True \ 14 | actor_rollout_ref.actor.ppo_mini_batch_size=256 \ 15 | actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=80 \ 16 | actor_rollout_ref.actor.use_kl_loss=True \ 17 | actor_rollout_ref.actor.kl_loss_coef=0.001 \ 18 | actor_rollout_ref.actor.kl_loss_type=low_var_kl \ 19 | actor_rollout_ref.model.enable_gradient_checkpointing=True \ 20 | actor_rollout_ref.actor.fsdp_config.param_offload=False \ 21 | actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \ 22 | actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=160 \ 23 | actor_rollout_ref.rollout.tensor_model_parallel_size=2 \ 24 | actor_rollout_ref.rollout.name=vllm \ 25 | actor_rollout_ref.rollout.gpu_memory_utilization=0.6 \ 26 | actor_rollout_ref.rollout.n=5 \ 27 | actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=160 \ 28 | actor_rollout_ref.ref.fsdp_config.param_offload=True \ 29 | algorithm.kl_ctrl.kl_coef=0.001 \ 30 | trainer.critic_warmup=0 \ 31 | trainer.logger=['console'] \ 32 | trainer.project_name='verl_grpo_example_gsm8k' \ 33 | trainer.experiment_name='deepseek_llm_7b_function_rm' \ 34 | trainer.n_gpus_per_node=8 \ 35 | trainer.nnodes=1 \ 36 | trainer.save_freq=-1 \ 37 | trainer.test_freq=5 \ 38 | trainer.total_epochs=15 $@ -------------------------------------------------------------------------------- /examples/grpo_trainer/run_deepseek7b_llm_seq_balance.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | python3 -m verl.trainer.main_ppo \ 4 | algorithm.adv_estimator=grpo \ 5 | data.train_files=$HOME/data/gsm8k/train.parquet \ 6 | data.val_files=$HOME/data/gsm8k/test.parquet \ 7 | data.train_batch_size=1024 \ 8 | data.val_batch_size=1312 \ 9 | data.max_prompt_length=512 \ 10 | data.max_response_length=512 \ 11 | actor_rollout_ref.model.path=deepseek-ai/deepseek-llm-7b-chat \ 12 | actor_rollout_ref.actor.optim.lr=1e-6 \ 13 | actor_rollout_ref.model.use_remove_padding=True \ 14 | actor_rollout_ref.actor.ppo_mini_batch_size=256 \ 15 | actor_rollout_ref.actor.use_dynamic_bsz=True \ 16 | actor_rollout_ref.actor.ppo_max_token_len_per_gpu=24000 \ 17 | actor_rollout_ref.actor.use_kl_loss=True \ 18 | actor_rollout_ref.actor.kl_loss_coef=0.001 \ 19 | actor_rollout_ref.actor.kl_loss_type=low_var_kl \ 20 | actor_rollout_ref.model.enable_gradient_checkpointing=True \ 21 | actor_rollout_ref.actor.fsdp_config.param_offload=False \ 22 | actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \ 23 | actor_rollout_ref.rollout.tensor_model_parallel_size=2 \ 24 | actor_rollout_ref.rollout.name=vllm \ 25 | actor_rollout_ref.rollout.gpu_memory_utilization=0.6 \ 26 | actor_rollout_ref.rollout.n=5 \ 27 | actor_rollout_ref.ref.fsdp_config.param_offload=True \ 28 | algorithm.kl_ctrl.kl_coef=0.001 \ 29 | trainer.critic_warmup=0 \ 30 | trainer.logger=['console','wandb'] \ 31 | trainer.project_name='verl_grpo_example_gsm8k' \ 32 | trainer.experiment_name='deepseek_llm_7b_function_rm_seq_packing' \ 33 | trainer.n_gpus_per_node=8 \ 34 | trainer.nnodes=1 \ 35 | trainer.save_freq=-1 \ 36 | trainer.test_freq=5 \ 37 | trainer.total_epochs=15 $@ -------------------------------------------------------------------------------- /examples/grpo_trainer/run_qwen2-7b.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | export VLLM_ATTENTION_BACKEND=XFORMERS 4 | 5 | python3 -m verl.trainer.main_ppo \ 6 | algorithm.adv_estimator=grpo \ 7 | data.train_files=$HOME/data/gsm8k/train.parquet \ 8 | data.val_files=$HOME/data/gsm8k/test.parquet \ 9 | data.train_batch_size=1024 \ 10 | data.val_batch_size=1312 \ 11 | data.max_prompt_length=512 \ 12 | data.max_response_length=1024 \ 13 | actor_rollout_ref.model.path=Qwen/Qwen2-7B-Instruct \ 14 | actor_rollout_ref.actor.optim.lr=1e-6 \ 15 | actor_rollout_ref.model.use_remove_padding=True \ 16 | actor_rollout_ref.actor.ppo_mini_batch_size=256 \ 17 | actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=80 \ 18 | actor_rollout_ref.actor.use_kl_loss=True \ 19 | actor_rollout_ref.actor.kl_loss_coef=0.001 \ 20 | actor_rollout_ref.actor.kl_loss_type=low_var_kl \ 21 | actor_rollout_ref.model.enable_gradient_checkpointing=True \ 22 | actor_rollout_ref.actor.fsdp_config.param_offload=False \ 23 | actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \ 24 | actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=160 \ 25 | actor_rollout_ref.rollout.tensor_model_parallel_size=2 \ 26 | actor_rollout_ref.rollout.name=vllm \ 27 | actor_rollout_ref.rollout.gpu_memory_utilization=0.6 \ 28 | actor_rollout_ref.rollout.n=5 \ 29 | actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=160 \ 30 | actor_rollout_ref.ref.fsdp_config.param_offload=True \ 31 | algorithm.kl_ctrl.kl_coef=0.001 \ 32 | trainer.critic_warmup=0 \ 33 | trainer.logger=['console','wandb'] \ 34 | trainer.project_name='verl_grpo_example_gsm8k' \ 35 | trainer.experiment_name='qwen2_7b_function_rm' \ 36 | trainer.n_gpus_per_node=8 \ 37 | trainer.nnodes=1 \ 38 | trainer.save_freq=-1 \ 39 | trainer.test_freq=5 \ 40 | trainer.total_epochs=15 $@ -------------------------------------------------------------------------------- /examples/grpo_trainer/run_qwen2-7b_seq_balance.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | export VLLM_ATTENTION_BACKEND=XFORMERS 4 | 5 | python3 -m verl.trainer.main_ppo \ 6 | algorithm.adv_estimator=grpo \ 7 | data.train_files=$HOME/data/gsm8k/train.parquet \ 8 | data.val_files=$HOME/data/gsm8k/test.parquet \ 9 | data.train_batch_size=1024 \ 10 | data.val_batch_size=1312 \ 11 | data.max_prompt_length=512 \ 12 | data.max_response_length=1024 \ 13 | actor_rollout_ref.model.path=Qwen/Qwen2-7B-Instruct \ 14 | actor_rollout_ref.actor.optim.lr=1e-6 \ 15 | actor_rollout_ref.model.use_remove_padding=True \ 16 | actor_rollout_ref.actor.ppo_mini_batch_size=256 \ 17 | actor_rollout_ref.actor.use_dynamic_bsz=True \ 18 | actor_rollout_ref.actor.ppo_max_token_len_per_gpu=24000 \ 19 | actor_rollout_ref.actor.use_kl_loss=True \ 20 | actor_rollout_ref.actor.kl_loss_coef=0.001 \ 21 | actor_rollout_ref.actor.kl_loss_type=low_var_kl \ 22 | actor_rollout_ref.model.enable_gradient_checkpointing=True \ 23 | actor_rollout_ref.actor.fsdp_config.param_offload=False \ 24 | actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \ 25 | actor_rollout_ref.rollout.tensor_model_parallel_size=2 \ 26 | actor_rollout_ref.rollout.name=vllm \ 27 | actor_rollout_ref.rollout.gpu_memory_utilization=0.6 \ 28 | actor_rollout_ref.rollout.n=5 \ 29 | actor_rollout_ref.ref.fsdp_config.param_offload=True \ 30 | algorithm.kl_ctrl.kl_coef=0.001 \ 31 | trainer.critic_warmup=0 \ 32 | trainer.logger=['console','wandb'] \ 33 | trainer.project_name='verl_grpo_example_gsm8k' \ 34 | trainer.experiment_name='qwen2_7b_function_rm_kl1e-3' \ 35 | +trainer.val_before_train=False \ 36 | trainer.n_gpus_per_node=8 \ 37 | trainer.nnodes=1 \ 38 | trainer.save_freq=-1 \ 39 | trainer.test_freq=5 \ 40 | trainer.total_epochs=15 $@ -------------------------------------------------------------------------------- /examples/ppo_trainer/run_deepseek7b_llm.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | python3 -m verl.trainer.main_ppo \ 4 | data.train_files=$HOME/data/gsm8k/train.parquet \ 5 | data.val_files=$HOME/data/gsm8k/test.parquet \ 6 | data.train_batch_size=1024 \ 7 | data.val_batch_size=1312 \ 8 | data.max_prompt_length=512 \ 9 | data.max_response_length=512 \ 10 | actor_rollout_ref.model.path=deepseek-ai/deepseek-llm-7b-chat \ 11 | actor_rollout_ref.actor.optim.lr=1e-6 \ 12 | actor_rollout_ref.model.use_remove_padding=True \ 13 | actor_rollout_ref.actor.ppo_mini_batch_size=256 \ 14 | actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=16 \ 15 | actor_rollout_ref.actor.fsdp_config.param_offload=False \ 16 | actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \ 17 | actor_rollout_ref.model.enable_gradient_checkpointing=True \ 18 | actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=32 \ 19 | actor_rollout_ref.rollout.tensor_model_parallel_size=4 \ 20 | actor_rollout_ref.rollout.name=vllm \ 21 | actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \ 22 | actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=32 \ 23 | actor_rollout_ref.ref.fsdp_config.param_offload=True \ 24 | critic.optim.lr=1e-5 \ 25 | critic.model.use_remove_padding=True \ 26 | critic.model.path=deepseek-ai/deepseek-llm-7b-chat \ 27 | critic.model.enable_gradient_checkpointing=True \ 28 | critic.ppo_micro_batch_size_per_gpu=32 \ 29 | critic.model.fsdp_config.param_offload=False \ 30 | critic.model.fsdp_config.optimizer_offload=False \ 31 | algorithm.kl_ctrl.kl_coef=0.001 \ 32 | trainer.critic_warmup=0 \ 33 | trainer.logger=['console','wandb'] \ 34 | trainer.project_name='verl_example_gsm8k' \ 35 | trainer.experiment_name='deepseek_llm_7b_function_rm' \ 36 | trainer.n_gpus_per_node=8 \ 37 | trainer.nnodes=1 \ 38 | trainer.save_freq=-1 \ 39 | trainer.test_freq=1 \ 40 | trainer.total_epochs=15 $@ 41 | -------------------------------------------------------------------------------- /examples/ppo_trainer/run_deepseek_full_hh_rlhf.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | train_files=$HOME/data/full_hh_rlhf/rl/train.parquet 4 | test_files=$HOME/data/full_hh_rlhf/rl/train.parquet # no use 5 | 6 | python3 -m verl.trainer.main_ppo --config-path=./config --config-name='ppo_megatron_trainer'\ 7 | data.train_files="$train_files" \ 8 | data.val_files="$test_files" \ 9 | data.train_batch_size=512 \ 10 | data.val_batch_size=128 \ 11 | data.max_prompt_length=128 \ 12 | data.max_response_length=128 \ 13 | actor_rollout_ref.model.path=deepseek-ai/deepseek-llm-7b-chat \ 14 | actor_rollout_ref.actor.optim.lr=1e-6 \ 15 | actor_rollout_ref.actor.ppo_mini_batch_size=128 \ 16 | actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=4 \ 17 | actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=4 \ 18 | actor_rollout_ref.rollout.tensor_model_parallel_size=4 \ 19 | actor_rollout_ref.rollout.name=vllm \ 20 | actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \ 21 | actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=4 \ 22 | actor_rollout_ref.ref.param_offload=False \ 23 | critic.optim.lr=1e-5 \ 24 | critic.model.path=deepseek-ai/deepseek-llm-7b-chat \ 25 | critic.model.enable_gradient_checkpointing=False \ 26 | critic.ppo_micro_batch_size_per_gpu=4 \ 27 | reward_model.enable=True \ 28 | reward_model.megatron.tensor_model_parallel_size=4 \ 29 | reward_model.model.path=deepseek-ai/deepseek-llm-7b-chat \ 30 | reward_model.micro_batch_size_per_gpu=4 \ 31 | reward_model.param_offload=False \ 32 | algorithm.kl_ctrl.kl_coef=0.001 \ 33 | trainer.critic_warmup=0 \ 34 | trainer.logger=['console','wandb'] \ 35 | trainer.project_name='verl_megatron_full_hh_rlhf_examples' \ 36 | trainer.experiment_name='deepseek_llm_7b_model_rm' \ 37 | trainer.n_gpus_per_node=8 \ 38 | trainer.nnodes=1 \ 39 | trainer.save_freq=-1 \ 40 | trainer.test_freq=5 \ 41 | trainer.total_epochs=100 $@ 42 | -------------------------------------------------------------------------------- /examples/ppo_trainer/run_deepseek_math_gsm8k_megatron.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | gsm8k_train_path=$HOME/data/gsm8k/train.parquet 4 | gsm8k_test_path=$HOME/data/gsm8k/test.parquet 5 | math_train_path=$HOME/data/math/train.parquet 6 | math_test_path=$HOME/data/math/test.parquet 7 | 8 | train_files="['$gsm8k_train_path', '$math_train_path']" 9 | test_files="['$gsm8k_test_path', '$math_test_path']" 10 | 11 | python3 -m verl.trainer.main_ppo --config-path=./config --config-name='ppo_megatron_trainer'\ 12 | data.train_files="$train_files" \ 13 | data.val_files="$test_files" \ 14 | data.train_batch_size=1024 \ 15 | data.val_batch_size=6312 \ 16 | data.max_prompt_length=1024 \ 17 | data.max_response_length=512 \ 18 | actor_rollout_ref.model.path=deepseek-ai/deepseek-coder-6.7b-instruct \ 19 | actor_rollout_ref.actor.optim.lr=1e-6 \ 20 | actor_rollout_ref.actor.ppo_mini_batch_size=256 \ 21 | actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=4 \ 22 | actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=4 \ 23 | actor_rollout_ref.rollout.tensor_model_parallel_size=4 \ 24 | actor_rollout_ref.rollout.name=vllm \ 25 | actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \ 26 | actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=4 \ 27 | critic.optim.lr=1e-5 \ 28 | critic.model.path=deepseek-ai/deepseek-coder-6.7b-instruct \ 29 | critic.model.enable_gradient_checkpointing=False \ 30 | critic.ppo_micro_batch_size_per_gpu=4 \ 31 | algorithm.kl_ctrl.kl_coef=0.001 \ 32 | trainer.critic_warmup=0 \ 33 | trainer.logger=['console','wandb'] \ 34 | trainer.project_name='verl_megatron_math_gsm8k_examples' \ 35 | trainer.experiment_name='deepseek_llm_7b_function_rm' \ 36 | trainer.n_gpus_per_node=8 \ 37 | trainer.nnodes=1 \ 38 | trainer.save_freq=-1 \ 39 | trainer.test_freq=5 \ 40 | trainer.total_epochs=100 $@ 41 | -------------------------------------------------------------------------------- /examples/ppo_trainer/run_gemma.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | python3 -m verl.trainer.main_ppo \ 4 | data.train_files=$HOME/data/gsm8k/train.parquet \ 5 | data.val_files=$HOME/data/gsm8k/test.parquet \ 6 | data.train_batch_size=512 \ 7 | data.val_batch_size=1312 \ 8 | data.max_prompt_length=1024 \ 9 | data.max_response_length=512 \ 10 | actor_rollout_ref.model.path=google/gemma-2-2b-it \ 11 | actor_rollout_ref.actor.optim.lr=1e-6 \ 12 | actor_rollout_ref.model.use_remove_padding=False \ 13 | actor_rollout_ref.actor.ppo_mini_batch_size=128 \ 14 | actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=4 \ 15 | actor_rollout_ref.actor.fsdp_config.param_offload=False \ 16 | actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \ 17 | actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=4 \ 18 | actor_rollout_ref.rollout.tensor_model_parallel_size=2 \ 19 | actor_rollout_ref.rollout.name=vllm \ 20 | actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \ 21 | actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=4 \ 22 | actor_rollout_ref.ref.fsdp_config.param_offload=True \ 23 | critic.optim.lr=1e-5 \ 24 | critic.model.use_remove_padding=False \ 25 | critic.model.path=google/gemma-2-2b-it \ 26 | critic.model.enable_gradient_checkpointing=False \ 27 | critic.ppo_micro_batch_size_per_gpu=4 \ 28 | critic.model.fsdp_config.param_offload=False \ 29 | critic.model.fsdp_config.optimizer_offload=False \ 30 | algorithm.kl_ctrl.kl_coef=0.001 \ 31 | trainer.critic_warmup=0 \ 32 | trainer.logger=['console','wandb'] \ 33 | trainer.project_name='verl_example' \ 34 | trainer.experiment_name='gemma2b_function_rm' \ 35 | trainer.n_gpus_per_node=2 \ 36 | trainer.nnodes=1 \ 37 | trainer.save_freq=-1 \ 38 | trainer.test_freq=10 \ 39 | trainer.total_epochs=15 $@ 40 | -------------------------------------------------------------------------------- /examples/ppo_trainer/run_qwen2-7b_math_gsm8k_megatron.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | export VLLM_ATTENTION_BACKEND=XFORMERS 4 | 5 | gsm8k_train_path=$HOME/data/gsm8k/train.parquet 6 | gsm8k_test_path=$HOME/data/gsm8k/test.parquet 7 | math_train_path=$HOME/data/math/train.parquet 8 | math_test_path=$HOME/data/math/test.parquet 9 | 10 | train_files="['$gsm8k_train_path', '$math_train_path']" 11 | test_files="['$gsm8k_test_path', '$math_test_path']" 12 | 13 | python3 -m verl.trainer.main_ppo --config-path=./config --config-name='ppo_megatron_trainer'\ 14 | data.train_files="$train_files" \ 15 | data.val_files="$test_files" \ 16 | data.train_batch_size=1024 \ 17 | data.val_batch_size=6312 \ 18 | data.max_prompt_length=1024 \ 19 | data.max_response_length=512 \ 20 | actor_rollout_ref.model.path=Qwen/Qwen2-7B-Instruct \ 21 | actor_rollout_ref.actor.optim.lr=1e-6 \ 22 | actor_rollout_ref.actor.ppo_mini_batch_size=256 \ 23 | actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=4 \ 24 | actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=4 \ 25 | actor_rollout_ref.rollout.tensor_model_parallel_size=4 \ 26 | actor_rollout_ref.rollout.name=vllm \ 27 | actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \ 28 | actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=4 \ 29 | critic.optim.lr=1e-5 \ 30 | critic.model.path=Qwen/Qwen2-7B-Instruct \ 31 | critic.model.enable_gradient_checkpointing=False \ 32 | critic.ppo_micro_batch_size_per_gpu=4 \ 33 | algorithm.kl_ctrl.kl_coef=0.001 \ 34 | trainer.critic_warmup=0 \ 35 | trainer.logger=['console','wandb'] \ 36 | trainer.project_name='verl_megatron_math_gsm8k_examples' \ 37 | trainer.experiment_name='qwen2_7b_function_rm' \ 38 | trainer.n_gpus_per_node=8 \ 39 | trainer.nnodes=1 \ 40 | trainer.save_freq=-1 \ 41 | trainer.test_freq=5 \ 42 | trainer.total_epochs=100 $@ 43 | -------------------------------------------------------------------------------- /examples/remax_trainer/run_qwen2.5-3b_seq_balance.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | export HF_DATASETS_OFFLINE=1 4 | export TRANSFORMERS_OFFLINE=1 5 | 6 | export VLLM_ATTENTION_BACKEND=XFORMERS 7 | 8 | python3 -m verl.trainer.main_ppo \ 9 | algorithm.adv_estimator=remax \ 10 | data.train_files=$HOME/data/gsm8k/train.parquet \ 11 | data.val_files=$HOME/data/gsm8k/train.parquet \ 12 | data.train_batch_size=512 \ 13 | data.val_batch_size=1312 \ 14 | data.max_prompt_length=512 \ 15 | data.max_response_length=1024 \ 16 | actor_rollout_ref.model.path=Qwen/Qwen2.5-3B-Instruct \ 17 | actor_rollout_ref.actor.optim.lr=1e-6 \ 18 | actor_rollout_ref.model.use_remove_padding=True \ 19 | actor_rollout_ref.actor.ppo_mini_batch_size=128 \ 20 | actor_rollout_ref.actor.use_dynamic_bsz=True \ 21 | actor_rollout_ref.actor.ppo_max_token_len_per_gpu=30000 \ 22 | actor_rollout_ref.actor.use_kl_loss=True \ 23 | actor_rollout_ref.actor.kl_loss_coef=0.001 \ 24 | actor_rollout_ref.actor.kl_loss_type=low_var_kl \ 25 | actor_rollout_ref.model.enable_gradient_checkpointing=True \ 26 | actor_rollout_ref.actor.fsdp_config.param_offload=False \ 27 | actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \ 28 | actor_rollout_ref.rollout.tensor_model_parallel_size=2 \ 29 | actor_rollout_ref.rollout.name=vllm \ 30 | actor_rollout_ref.rollout.gpu_memory_utilization=0.8 \ 31 | actor_rollout_ref.rollout.n=4 \ 32 | actor_rollout_ref.ref.fsdp_config.param_offload=True \ 33 | algorithm.kl_ctrl.kl_coef=0.001 \ 34 | trainer.critic_warmup=0 \ 35 | trainer.logger=['console','wandb'] \ 36 | trainer.project_name='verl_remax_example_gsm8k' \ 37 | trainer.experiment_name='qwen2.5_3b_function_rm_kl1e-3' \ 38 | +trainer.val_before_train=False \ 39 | trainer.n_gpus_per_node=8 \ 40 | trainer.nnodes=1 \ 41 | trainer.save_freq=-1 \ 42 | trainer.test_freq=5 \ 43 | trainer.total_epochs=5 $@ -------------------------------------------------------------------------------- /examples/remax_trainer/run_qwen2.5-7b_seq_balance.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | export HF_DATASETS_OFFLINE=1 4 | export TRANSFORMERS_OFFLINE=1 5 | 6 | export VLLM_ATTENTION_BACKEND=XFORMERS 7 | 8 | python3 -m verl.trainer.main_ppo \ 9 | algorithm.adv_estimator=remax \ 10 | data.train_files=$HOME/data/gsm8k/train.parquet \ 11 | data.val_files=$HOME/data/gsm8k/train.parquet \ 12 | data.train_batch_size=1024 \ 13 | data.val_batch_size=1312 \ 14 | data.max_prompt_length=512 \ 15 | data.max_response_length=1024 \ 16 | actor_rollout_ref.model.path=Qwen/Qwen2.5-7B-Instruct \ 17 | actor_rollout_ref.actor.optim.lr=1e-6 \ 18 | actor_rollout_ref.model.use_remove_padding=True \ 19 | actor_rollout_ref.actor.ppo_mini_batch_size=256 \ 20 | actor_rollout_ref.actor.use_dynamic_bsz=True \ 21 | actor_rollout_ref.actor.ppo_max_token_len_per_gpu=24000 \ 22 | actor_rollout_ref.actor.use_kl_loss=True \ 23 | actor_rollout_ref.actor.kl_loss_coef=0.001 \ 24 | actor_rollout_ref.actor.kl_loss_type=low_var_kl \ 25 | actor_rollout_ref.model.enable_gradient_checkpointing=True \ 26 | actor_rollout_ref.actor.fsdp_config.param_offload=False \ 27 | actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \ 28 | actor_rollout_ref.rollout.tensor_model_parallel_size=2 \ 29 | actor_rollout_ref.rollout.name=vllm \ 30 | actor_rollout_ref.rollout.gpu_memory_utilization=0.8 \ 31 | actor_rollout_ref.rollout.n=4 \ 32 | actor_rollout_ref.ref.fsdp_config.param_offload=True \ 33 | algorithm.kl_ctrl.kl_coef=0.001 \ 34 | trainer.critic_warmup=0 \ 35 | trainer.logger=['console','wandb'] \ 36 | trainer.project_name='verl_remax_example_gsm8k' \ 37 | trainer.experiment_name='qwen2.5_7b_function_rm_kl1e-3' \ 38 | +trainer.val_before_train=False \ 39 | trainer.n_gpus_per_node=8 \ 40 | trainer.nnodes=1 \ 41 | trainer.save_freq=-1 \ 42 | trainer.test_freq=5 \ 43 | trainer.total_epochs=10 $@ -------------------------------------------------------------------------------- /examples/sft/gsm8k/run_deepseek_6b7.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | if [ "$#" -lt 2 ]; then 4 | echo "Usage: run_deepseek_6b7.sh [other_configs...]" 5 | exit 1 6 | fi 7 | 8 | nproc_per_node=$1 9 | save_path=$2 10 | 11 | # Shift the arguments so $@ refers to the rest 12 | shift 2 13 | 14 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \ 15 | -m verl.trainer.fsdp_sft_trainer \ 16 | data.train_files=data/gsm8k/train.parquet \ 17 | data.val_files=data/gsm8k/test.parquet \ 18 | data.prompt_key=extra_info \ 19 | data.response_key=extra_info \ 20 | +data.prompt_dict_keys=['question'] \ 21 | +data.response_dict_keys=['answer'] \ 22 | data.micro_batch_size_per_gpu=4 \ 23 | model.partial_pretrain=deepseek-ai/deepseek-coder-6.7b-instruct \ 24 | trainer.default_local_dir=$save_path \ 25 | trainer.project_name=gsm8k-sft \ 26 | trainer.experiment_name=gsm8k-sft-deepseek-coder-6.7b-instruct \ 27 | trainer.total_epochs=4 \ 28 | trainer.logger=['console','wandb'] \ 29 | trainer.default_hdfs_dir=null $@ -------------------------------------------------------------------------------- /examples/sft/gsm8k/run_gemma_2b.sh: -------------------------------------------------------------------------------- 1 | # Tested with 2 & 4 GPUs 2 | 3 | set -x 4 | 5 | if [ "$#" -lt 2 ]; then 6 | echo "Usage: run_gemma_2b.sh [other_configs...]" 7 | exit 1 8 | fi 9 | 10 | nproc_per_node=$1 11 | save_path=$2 12 | 13 | # Shift the arguments so $@ refers to the rest 14 | shift 2 15 | 16 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \ 17 | -m verl.trainer.fsdp_sft_trainer \ 18 | data.train_files=$HOME/data/gsm8k/train.parquet \ 19 | data.val_files=$HOME/data/gsm8k/test.parquet \ 20 | data.prompt_key=extra_info \ 21 | data.response_key=extra_info \ 22 | +data.prompt_dict_keys=['question'] \ 23 | +data.response_dict_keys=['answer'] \ 24 | data.micro_batch_size_per_gpu=4 \ 25 | model.partial_pretrain=google/gemma-2b-it \ 26 | trainer.default_local_dir=$save_path \ 27 | trainer.project_name=gsm8k-sft \ 28 | trainer.experiment_name=gsm8k-sft-gemma-2b-it \ 29 | trainer.total_epochs=2 \ 30 | trainer.logger=['console','wandb'] \ 31 | trainer.default_hdfs_dir=null $@ -------------------------------------------------------------------------------- /examples/sft/gsm8k/run_gemma_7b.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | if [ "$#" -lt 2 ]; then 4 | echo "Usage: run_gemma_7b.sh [other_configs...]" 5 | exit 1 6 | fi 7 | 8 | nproc_per_node=$1 9 | save_path=$2 10 | 11 | # Shift the arguments so $@ refers to the rest 12 | shift 2 13 | 14 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \ 15 | -m verl.trainer.fsdp_sft_trainer \ 16 | data.train_files=$HOME/data/gsm8k/train.parquet \ 17 | data.val_files=$HOME/data/gsm8k/test.parquet \ 18 | data.prompt_key=prompt \ 19 | data.response_key=answer \ 20 | data.micro_batch_size_per_gpu=4 \ 21 | model.partial_pretrain=google/gemma-1.1-7b-it \ 22 | trainer.default_local_dir=$save_path \ 23 | trainer.project_name=gsm8k-sft \ 24 | trainer.experiment_name=gsm8k-sft-gemma-1.1-7b-it \ 25 | trainer.total_epochs=4 \ 26 | trainer.logger=['console','wandb'] \ 27 | trainer.default_hdfs_dir=null $@ -------------------------------------------------------------------------------- /examples/sft/gsm8k/run_qwen_05_peft.sh: -------------------------------------------------------------------------------- 1 | # Tested with 2 & 4 GPUs 2 | 3 | set -x 4 | 5 | if [ "$#" -lt 2 ]; then 6 | echo "Usage: run_qwen_05_peft.sh [other_configs...]" 7 | exit 1 8 | fi 9 | 10 | nproc_per_node=$1 11 | save_path=$2 12 | 13 | # Shift the arguments so $@ refers to the rest 14 | shift 2 15 | 16 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \ 17 | -m verl.trainer.fsdp_sft_trainer \ 18 | data.train_files=data/gsm8k/train.parquet \ 19 | data.val_files=data/gsm8k/test.parquet \ 20 | data.prompt_key=extra_info \ 21 | data.response_key=extra_info \ 22 | optim.lr=1e-4 \ 23 | +data.prompt_dict_keys=['question'] \ 24 | +data.response_dict_keys=['answer'] \ 25 | data.micro_batch_size_per_gpu=4 \ 26 | model.partial_pretrain=Qwen/Qwen2.5-0.5B-Instruct \ 27 | trainer.default_local_dir=$save_path \ 28 | trainer.project_name=gsm8k-sft \ 29 | trainer.experiment_name=gsm8k-sft-qwen-2.5-0.5b-instruct \ 30 | trainer.logger=['console'] \ 31 | trainer.total_epochs=1 \ 32 | trainer.default_hdfs_dir=null $@ \ 33 | model.lora_rank=32\ 34 | model.lora_alpha=16 \ 35 | model.target_modules=all-linear 36 | 37 | # Or you can do this: 38 | # model.target_modules=[q_proj,v_proj] \ 39 | -------------------------------------------------------------------------------- /examples/sft/gsm8k/run_qwen_05_sp2.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | if [ "$#" -lt 2 ]; then 4 | echo "Usage: run_qwen_05_sp2.sh [other_configs...]" 5 | exit 1 6 | fi 7 | 8 | nproc_per_node=$1 9 | save_path=$2 10 | 11 | # Shift the arguments so $@ refers to the rest 12 | shift 2 13 | 14 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \ 15 | -m verl.trainer.fsdp_sft_trainer \ 16 | data.train_files=$HOME/data/gsm8k/train.parquet \ 17 | data.val_files=$HOME/data/gsm8k/test.parquet \ 18 | data.prompt_key=extra_info \ 19 | data.response_key=extra_info \ 20 | optim.lr=1e-4 \ 21 | +data.prompt_dict_keys=['question'] \ 22 | +data.response_dict_keys=['answer'] \ 23 | data.micro_batch_size=4 \ 24 | model.partial_pretrain=Qwen/Qwen2.5-0.5B-Instruct \ 25 | trainer.default_local_dir=$save_path \ 26 | trainer.project_name=gsm8k-sft \ 27 | trainer.experiment_name=gsm8k-sft-qwen-2.5-0.5b-instruct-sp2 \ 28 | trainer.logger=['console'] \ 29 | trainer.total_training_steps=1 \ 30 | trainer.default_hdfs_dir=null $@ \ 31 | ulysses_sequence_parallel_size=2 \ 32 | use_remove_padding=true 33 | -------------------------------------------------------------------------------- /examples/sft/gsm8k/run_qwen_05_sp2_liger.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | if [ "$#" -lt 2 ]; then 4 | echo "Usage: run_qwen_05_sp2.sh [other_configs...]" 5 | exit 1 6 | fi 7 | 8 | nproc_per_node=$1 9 | save_path=$2 10 | 11 | # Shift the arguments so $@ refers to the rest 12 | shift 2 13 | 14 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \ 15 | -m verl.trainer.fsdp_sft_trainer \ 16 | data.train_files=$HOME/data/gsm8k/train.parquet \ 17 | data.val_files=$HOME/data/gsm8k/test.parquet \ 18 | data.prompt_key=extra_info \ 19 | data.response_key=extra_info \ 20 | optim.lr=1e-4 \ 21 | +data.prompt_dict_keys=['question'] \ 22 | +data.response_dict_keys=['answer'] \ 23 | data.micro_batch_size=4 \ 24 | model.partial_pretrain=Qwen/Qwen2.5-0.5B-Instruct \ 25 | model.use_liger=True \ 26 | trainer.default_local_dir=$save_path \ 27 | trainer.project_name=gsm8k-sft \ 28 | trainer.experiment_name=gsm8k-sft-qwen-2.5-0.5b-instruct-sp2-liger \ 29 | trainer.logger=['console'] \ 30 | trainer.default_hdfs_dir=null $@ \ 31 | ulysses_sequence_parallel_size=2 \ 32 | use_remove_padding=true 33 | -------------------------------------------------------------------------------- /examples/split_placement/run_deepseek7b_llm.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | python3 main_ppo_split.py \ 4 | data.train_files=$HOME/data/gsm8k/train.parquet \ 5 | data.val_files=$HOME/data/gsm8k/test.parquet \ 6 | data.train_batch_size=1024 \ 7 | data.val_batch_size=1312 \ 8 | data.max_prompt_length=512 \ 9 | data.max_response_length=512 \ 10 | actor_rollout_ref.model.path=deepseek-ai/deepseek-llm-7b-chat \ 11 | actor_rollout_ref.actor.optim.lr=1e-6 \ 12 | actor_rollout_ref.actor.ppo_mini_batch_size=256 \ 13 | actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=8 \ 14 | actor_rollout_ref.actor.fsdp_config.param_offload=False \ 15 | actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \ 16 | actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=8 \ 17 | actor_rollout_ref.rollout.tensor_model_parallel_size=4 \ 18 | actor_rollout_ref.rollout.name=vllm \ 19 | actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \ 20 | actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=8 \ 21 | actor_rollout_ref.ref.fsdp_config.param_offload=True \ 22 | critic.optim.lr=1e-5 \ 23 | critic.model.path=deepseek-ai/deepseek-llm-7b-chat \ 24 | critic.model.enable_gradient_checkpointing=False \ 25 | critic.ppo_micro_batch_size_per_gpu=8 \ 26 | critic.model.fsdp_config.param_offload=False \ 27 | critic.model.fsdp_config.optimizer_offload=False \ 28 | algorithm.kl_ctrl.kl_coef=0.001 \ 29 | trainer.critic_warmup=0 \ 30 | trainer.logger=['console','wandb'] \ 31 | trainer.project_name='verl_example_gsm8k' \ 32 | trainer.experiment_name='deepseek_llm_7b_function_rm' \ 33 | trainer.n_gpus_per_node=8 \ 34 | trainer.nnodes=1 \ 35 | trainer.save_freq=-1 \ 36 | trainer.total_epochs=15 $@ 37 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # requirements.txt records the full set of dependencies for development 2 | accelerate 3 | codetiming 4 | datasets 5 | dill 6 | flash-attn 7 | hydra-core 8 | liger-kernel 9 | numpy 10 | pandas 11 | peft 12 | pyarrow>=15.0.0 13 | pybind11 14 | pylatexenc 15 | ray 16 | tensordict<0.6 17 | transformers 18 | vllm==0.6.3.post1 19 | wandb 20 | -------------------------------------------------------------------------------- /scripts/format.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | pip3 install --upgrade yapf 3 | python3 -m yapf -ir -vv --style ./.style.yapf verl tests single_controller examples 4 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. -------------------------------------------------------------------------------- /tests/e2e/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /tests/e2e/arithmetic_sequence/data/create_dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from tests.e2e.envs.digit_completion import DigitCompletion, generate_ground_truth_response 16 | from torch.utils import data 17 | import os 18 | 19 | if __name__ == '__main__': 20 | simple_task = DigitCompletion(max_number=9, max_diff=9, max_num_in_response=9) 21 | all_prompts = simple_task.get_all_prompts() 22 | 23 | # 21 * 6 * 4 24 | train_data, test_data = data.random_split(all_prompts, lengths=[0.8, 0.2]) 25 | train_data = list(train_data) 26 | test_data = list(test_data) 27 | 28 | train_data = [[{'role': 'user', 'content': str(item)}] \ 29 | for item in train_data] 30 | test_data = [[{'role': 'user', 'content': str(item)}] \ 31 | for item in test_data] 32 | 33 | print(f'Size of train: {len(train_data)}, size of test: {len(test_data)}') 34 | 35 | train_data = {'prompt': train_data} 36 | test_data = {'prompt': test_data} 37 | 38 | model_folder = os.path.join(os.path.dirname(os.path.abspath(__file__))) 39 | 40 | import pandas as pd 41 | 42 | train_data_frame = pd.DataFrame(train_data) 43 | test_data_frame = pd.DataFrame(test_data) 44 | 45 | train_data_frame.to_parquet(os.path.join(model_folder, 'train.parquet')) 46 | test_data_frame.to_parquet(os.path.join(model_folder, 'test.parquet')) 47 | -------------------------------------------------------------------------------- /tests/e2e/arithmetic_sequence/data/test.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ypwang61/One-Shot-RLVR/b9d2e9a1cc32682b8ec77922fe4a236d75a7e9b0/tests/e2e/arithmetic_sequence/data/test.parquet -------------------------------------------------------------------------------- /tests/e2e/arithmetic_sequence/data/train.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ypwang61/One-Shot-RLVR/b9d2e9a1cc32682b8ec77922fe4a236d75a7e9b0/tests/e2e/arithmetic_sequence/data/train.parquet -------------------------------------------------------------------------------- /tests/e2e/arithmetic_sequence/model/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "architectures": [ 3 | "LlamaForCausalLM" 4 | ], 5 | "attention_bias": false, 6 | "attention_dropout": 0.0, 7 | "bos_token_id": null, 8 | "eos_token_id": 1, 9 | "hidden_act": "silu", 10 | "hidden_size": 128, 11 | "initializer_range": 0.02, 12 | "intermediate_size": 344, 13 | "max_position_embeddings": 2048, 14 | "mlp_bias": false, 15 | "model_type": "llama", 16 | "num_attention_heads": 4, 17 | "num_hidden_layers": 4, 18 | "num_key_value_heads": 4, 19 | "pad_token_id": 2, 20 | "pretraining_tp": 1, 21 | "rms_norm_eps": 1e-06, 22 | "rope_scaling": null, 23 | "rope_theta": 10000.0, 24 | "tie_word_embeddings": false, 25 | "torch_dtype": "bfloat16", 26 | "transformers_version": "4.43.3", 27 | "use_cache": true, 28 | "vocab_size": 16 29 | } 30 | -------------------------------------------------------------------------------- /tests/e2e/arithmetic_sequence/model/generation_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "_from_model_config": true, 3 | "eos_token_id": 1, 4 | "pad_token_id": 2, 5 | "transformers_version": "4.43.3" 6 | } 7 | -------------------------------------------------------------------------------- /tests/e2e/arithmetic_sequence/model/model.safetensors: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ypwang61/One-Shot-RLVR/b9d2e9a1cc32682b8ec77922fe4a236d75a7e9b0/tests/e2e/arithmetic_sequence/model/model.safetensors -------------------------------------------------------------------------------- /tests/e2e/arithmetic_sequence/model/tokenizer_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "char_ords": [ 3 | 48, 4 | 49, 5 | 50, 6 | 51, 7 | 52, 8 | 53, 9 | 54, 10 | 55, 11 | 56, 12 | 57, 13 | 44, 14 | 58 15 | ], 16 | "model_max_length": 2048, 17 | "chat_template": "{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% set role = message['role'] %}{{ message['content'] }}{% endfor %}{% if add_generation_prompt %}{{ sep_token }}{% endif %}" 18 | } -------------------------------------------------------------------------------- /tests/e2e/arithmetic_sequence/rl/README.md: -------------------------------------------------------------------------------- 1 | # Digit completion 2 | 3 | This is an example of solving a digit completion problem. The problem is defined as below: 4 | 5 | The prompt is a sequence of numbers with fixed difference. The agent's goal is to complete the next N numbers. 6 | If the max number is reached, the next number should be modulo with max number. 7 | 8 | For example, 9 | - prompt = [1, 2, 3] 10 | - N = 5 11 | - max_number = 6 12 | 13 | The response should be [4, 5, 6, 7%6, 8%6] = [4, 5, 6, 0, 1]. 14 | 15 | # Environment definition 16 | 17 | The core definition of the task is defined in verl/envs/digit_completion/task.py 18 | 19 | It is highly recommended to take a look at it for better understanding. 20 | 21 | 22 | 23 | # Run experiments 24 | 25 | The users are required to specify the config path and config name (and the relative model config path to the current working directory) 26 | 27 | ```bash 28 | # cd examples/arithmetic_sequence/rl 29 | 30 | # Specify the config path and config name (current working dir) 31 | python3 -m verl.trainer.ppo.ray_megatron_train_synchronous --config-path=$(pwd)/config --config-name='ray_megatron' 32 | 33 | # The default relative path of model config is 'config/model_config', if you want to change it, you can rewrite it in ray_megatron.yaml or using: 34 | python3 -m verl.trainer.ppo.ray_megatron_train_synchronous --config-path=$(pwd)/config --config-name='ray_megatron' ++model.base_path=config/model_config 35 | 36 | ``` 37 | 38 | -------------------------------------------------------------------------------- /tests/e2e/check_results.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import argparse 16 | 17 | import numpy as np 18 | 19 | 20 | def extract_reward_from_line(line): 21 | # TODO: this function needs error handling 22 | try: 23 | key_vals = line.split(' - ') 24 | for key_val in key_vals: 25 | key, val = key_val.split(':') 26 | if key == 'critic/rewards/mean': 27 | reward = float(val) 28 | return reward 29 | return -np.inf 30 | except Exception: 31 | return -np.inf 32 | 33 | 34 | if __name__ == '__main__': 35 | parser = argparse.ArgumentParser() 36 | parser.add_argument('--output_file', required=True, type=str) 37 | 38 | args = parser.parse_args() 39 | 40 | with open(args.output_file, 'r') as f: 41 | output = f.read().split('\n') 42 | 43 | best_reward = -np.inf 44 | for line in output: 45 | if line.startswith('step'): 46 | reward = extract_reward_from_line(line) 47 | if reward > best_reward: 48 | best_reward = reward 49 | 50 | print(f'Best reward is {best_reward}') 51 | assert best_reward > 0.2, f'Best reward must be greater than 0.2. best_reward: {best_reward}' 52 | print('Check passes') 53 | -------------------------------------------------------------------------------- /tests/e2e/envs/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .digit_completion import DigitCompletion 16 | 17 | __all__ = ['DigitCompletion'] -------------------------------------------------------------------------------- /tests/e2e/envs/digit_completion/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .task import DigitCompletion, generate_ground_truth_response 16 | from .tokenizer import CharTokenizer 17 | 18 | from transformers import AutoTokenizer, LlamaConfig 19 | 20 | AutoTokenizer.register(LlamaConfig, CharTokenizer, exist_ok=True) 21 | 22 | __all__ = ['DigitCompletion', 'generate_ground_truth_response', 'CharTokenizer'] -------------------------------------------------------------------------------- /tests/e2e/run_deepseek_megatron.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | # the config file used: verl/trainer/main_ppo/config/ppo_megatron_trainer.yaml 4 | 5 | huggingface-cli download deepseek-ai/deepseek-coder-1.3b-instruct 6 | 7 | python3 -m verl.trainer.main_ppo --config-path=config \ 8 | --config-name='ppo_megatron_trainer.yaml'\ 9 | data.train_files=$HOME/data/gsm8k/train.parquet \ 10 | data.val_files=$HOME/data/gsm8k/test.parquet \ 11 | data.train_batch_size=1024 \ 12 | data.val_batch_size=1312 \ 13 | data.max_prompt_length=512 \ 14 | data.max_response_length=512 \ 15 | actor_rollout_ref.model.path=deepseek-ai/deepseek-coder-1.3b-instruct \ 16 | actor_rollout_ref.actor.optim.lr=2e-6 \ 17 | actor_rollout_ref.actor.ppo_mini_batch_size=256 \ 18 | actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=4 \ 19 | actor_rollout_ref.actor.megatron.tensor_model_parallel_size=2 \ 20 | actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=8 \ 21 | actor_rollout_ref.rollout.tensor_model_parallel_size=2 \ 22 | actor_rollout_ref.rollout.name=vllm \ 23 | actor_rollout_ref.rollout.gpu_memory_utilization=0.5 \ 24 | actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=16 \ 25 | actor_rollout_ref.ref.megatron.tensor_model_parallel_size=2 \ 26 | critic.optim.lr=2e-5 \ 27 | critic.model.path=deepseek-ai/deepseek-coder-1.3b-instruct \ 28 | critic.model.enable_gradient_checkpointing=False \ 29 | critic.ppo_micro_batch_size_per_gpu=4 \ 30 | critic.megatron.tensor_model_parallel_size=2 \ 31 | algorithm.kl_ctrl.kl_coef=0.001 \ 32 | trainer.critic_warmup=0 \ 33 | trainer.logger=['console'] \ 34 | trainer.project_name='verl_megatron_gsm8k_examples' \ 35 | trainer.experiment_name='deepseek_llm_1b3_function_rm' \ 36 | trainer.n_gpus_per_node=8 \ 37 | trainer.nnodes=1 \ 38 | trainer.save_freq=-1 \ 39 | trainer.test_freq=1 \ 40 | trainer.total_epochs=15 \ 41 | trainer.total_training_steps=3 $@ 42 | -------------------------------------------------------------------------------- /tests/e2e/run_qwen_gsm8k_function_rm.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | export VLLM_ATTENTION_BACKEND=XFORMERS 4 | 5 | python3 -m verl.trainer.main_ppo \ 6 | data.train_files=$HOME/data/gsm8k/train.parquet \ 7 | data.val_files=$HOME/data/gsm8k/test.parquet \ 8 | data.train_batch_size=1024 \ 9 | data.val_batch_size=1312 \ 10 | data.max_prompt_length=512 \ 11 | data.max_response_length=512 \ 12 | actor_rollout_ref.model.path=Qwen/Qwen2.5-0.5B \ 13 | actor_rollout_ref.actor.optim.lr=1e-6 \ 14 | actor_rollout_ref.model.use_remove_padding=True \ 15 | actor_rollout_ref.actor.ppo_mini_batch_size=256 \ 16 | actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=4 \ 17 | actor_rollout_ref.actor.fsdp_config.param_offload=False \ 18 | actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \ 19 | actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=16 \ 20 | actor_rollout_ref.rollout.tensor_model_parallel_size=2 \ 21 | actor_rollout_ref.rollout.name=vllm \ 22 | actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \ 23 | actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=16 \ 24 | actor_rollout_ref.ref.fsdp_config.param_offload=True \ 25 | critic.optim.lr=1e-5 \ 26 | critic.model.use_remove_padding=True \ 27 | critic.model.path=Qwen/Qwen2.5-0.5B \ 28 | critic.model.enable_gradient_checkpointing=False \ 29 | critic.ppo_micro_batch_size_per_gpu=4 \ 30 | critic.model.fsdp_config.param_offload=False \ 31 | critic.model.fsdp_config.optimizer_offload=False \ 32 | algorithm.kl_ctrl.kl_coef=0.001 \ 33 | trainer.critic_warmup=0 \ 34 | trainer.logger=['console'] \ 35 | trainer.project_name='verl_example_gsm8k' \ 36 | trainer.experiment_name='qwen_e2e_ci_function_rm' \ 37 | trainer.n_gpus_per_node=8 \ 38 | trainer.nnodes=1 \ 39 | trainer.save_freq=1 \ 40 | trainer.default_local_dir=$HOME/ckpt/ \ 41 | trainer.total_training_steps=1 $@ 42 | -------------------------------------------------------------------------------- /tests/e2e/run_qwen_gsm8k_function_rm_grpo.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | export VLLM_ATTENTION_BACKEND=XFORMERS 4 | 5 | python3 -m verl.trainer.main_ppo \ 6 | data.train_files=$HOME/data/gsm8k/train.parquet \ 7 | data.val_files=$HOME/data/gsm8k/test.parquet \ 8 | data.train_batch_size=1024 \ 9 | data.val_batch_size=1312 \ 10 | data.max_prompt_length=512 \ 11 | data.max_response_length=512 \ 12 | actor_rollout_ref.model.path=Qwen/Qwen2.5-0.5B \ 13 | actor_rollout_ref.actor.optim.lr=1e-6 \ 14 | actor_rollout_ref.model.use_remove_padding=True \ 15 | actor_rollout_ref.actor.ppo_mini_batch_size=256 \ 16 | actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=4 \ 17 | actor_rollout_ref.actor.fsdp_config.param_offload=False \ 18 | actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \ 19 | actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=16 \ 20 | actor_rollout_ref.rollout.tensor_model_parallel_size=2 \ 21 | actor_rollout_ref.rollout.name=vllm \ 22 | actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \ 23 | actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=16 \ 24 | actor_rollout_ref.ref.fsdp_config.param_offload=True \ 25 | algorithm.kl_ctrl.kl_coef=0.001 \ 26 | algorithm.adv_estimator=grpo \ 27 | trainer.critic_warmup=0 \ 28 | trainer.logger=['console'] \ 29 | trainer.project_name='verl_example_gsm8k' \ 30 | trainer.experiment_name='qwen_e2e_ci_function_rm' \ 31 | trainer.n_gpus_per_node=8 \ 32 | trainer.nnodes=1 \ 33 | trainer.save_freq=-1 \ 34 | trainer.total_training_steps=1 $@ 35 | -------------------------------------------------------------------------------- /tests/e2e/run_qwen_gsm8k_function_rm_no_rmpad.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | export VLLM_ATTENTION_BACKEND=XFORMERS 4 | 5 | python3 -m verl.trainer.main_ppo \ 6 | data.train_files=$HOME/data/gsm8k/train.parquet \ 7 | data.val_files=$HOME/data/gsm8k/test.parquet \ 8 | data.train_batch_size=1024 \ 9 | data.val_batch_size=1312 \ 10 | data.max_prompt_length=512 \ 11 | data.max_response_length=512 \ 12 | actor_rollout_ref.model.path=Qwen/Qwen2.5-0.5B \ 13 | actor_rollout_ref.actor.optim.lr=1e-6 \ 14 | actor_rollout_ref.model.use_remove_padding=False \ 15 | actor_rollout_ref.actor.ppo_mini_batch_size=256 \ 16 | actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=4 \ 17 | actor_rollout_ref.actor.fsdp_config.param_offload=False \ 18 | actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \ 19 | actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=16 \ 20 | actor_rollout_ref.rollout.tensor_model_parallel_size=2 \ 21 | actor_rollout_ref.rollout.name=vllm \ 22 | actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \ 23 | actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=16 \ 24 | actor_rollout_ref.ref.fsdp_config.param_offload=True \ 25 | critic.optim.lr=1e-5 \ 26 | critic.model.use_remove_padding=False \ 27 | critic.model.path=Qwen/Qwen2.5-0.5B \ 28 | critic.model.enable_gradient_checkpointing=False \ 29 | critic.ppo_micro_batch_size_per_gpu=4 \ 30 | critic.model.fsdp_config.param_offload=False \ 31 | critic.model.fsdp_config.optimizer_offload=False \ 32 | algorithm.kl_ctrl.kl_coef=0.001 \ 33 | trainer.critic_warmup=0 \ 34 | trainer.logger=['console'] \ 35 | +trainer.val_before_train=False \ 36 | trainer.project_name='verl_example_gsm8k' \ 37 | trainer.experiment_name='qwen_e2e_ci_function_rm' \ 38 | trainer.n_gpus_per_node=8 \ 39 | trainer.nnodes=1 \ 40 | trainer.save_freq=-1 \ 41 | trainer.total_training_steps=1 $@ 42 | -------------------------------------------------------------------------------- /tests/e2e/run_qwen_gsm8k_function_rm_remax.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | export VLLM_ATTENTION_BACKEND=XFORMERS 4 | 5 | python3 -m verl.trainer.main_ppo \ 6 | data.train_files=$HOME/data/gsm8k/train.parquet \ 7 | data.val_files=$HOME/data/gsm8k/test.parquet \ 8 | data.train_batch_size=1024 \ 9 | data.val_batch_size=1312 \ 10 | data.max_prompt_length=512 \ 11 | data.max_response_length=512 \ 12 | actor_rollout_ref.model.path=Qwen/Qwen2.5-0.5B \ 13 | actor_rollout_ref.actor.optim.lr=1e-6 \ 14 | actor_rollout_ref.model.use_remove_padding=True \ 15 | actor_rollout_ref.actor.ppo_mini_batch_size=256 \ 16 | actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=4 \ 17 | actor_rollout_ref.actor.fsdp_config.param_offload=False \ 18 | actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \ 19 | actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=16 \ 20 | actor_rollout_ref.rollout.tensor_model_parallel_size=2 \ 21 | actor_rollout_ref.rollout.name=vllm \ 22 | actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \ 23 | actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=16 \ 24 | actor_rollout_ref.ref.fsdp_config.param_offload=True \ 25 | algorithm.kl_ctrl.kl_coef=0.001 \ 26 | algorithm.adv_estimator=remax \ 27 | trainer.critic_warmup=0 \ 28 | trainer.logger=['console'] \ 29 | trainer.project_name='verl_example_gsm8k' \ 30 | trainer.experiment_name='qwen_e2e_ci_function_rm' \ 31 | trainer.n_gpus_per_node=8 \ 32 | trainer.nnodes=1 \ 33 | trainer.save_freq=-1 \ 34 | trainer.total_training_steps=1 $@ 35 | -------------------------------------------------------------------------------- /tests/e2e/run_qwen_megatron.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | # the config file used: verl/trainer/main_ppo/config/ppo_megatron_trainer.yaml 4 | 5 | huggingface-cli download Qwen/Qwen2.5-0.5B 6 | 7 | python3 -m verl.trainer.main_ppo --config-path=config \ 8 | --config-name='ppo_megatron_trainer.yaml'\ 9 | data.train_files=$HOME/data/gsm8k/train.parquet \ 10 | data.val_files=$HOME/data/gsm8k/test.parquet \ 11 | data.train_batch_size=1024 \ 12 | data.val_batch_size=1312 \ 13 | data.max_prompt_length=512 \ 14 | data.max_response_length=512 \ 15 | actor_rollout_ref.model.path=Qwen/Qwen2.5-0.5B \ 16 | actor_rollout_ref.actor.optim.lr=2e-6 \ 17 | actor_rollout_ref.actor.ppo_mini_batch_size=256 \ 18 | actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=4 \ 19 | actor_rollout_ref.actor.megatron.tensor_model_parallel_size=2 \ 20 | actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=8 \ 21 | actor_rollout_ref.rollout.tensor_model_parallel_size=2 \ 22 | actor_rollout_ref.rollout.name=vllm \ 23 | actor_rollout_ref.rollout.gpu_memory_utilization=0.5 \ 24 | actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=16 \ 25 | actor_rollout_ref.ref.megatron.tensor_model_parallel_size=2 \ 26 | critic.optim.lr=2e-5 \ 27 | critic.model.path=Qwen/Qwen2.5-0.5B \ 28 | critic.model.enable_gradient_checkpointing=False \ 29 | critic.ppo_micro_batch_size_per_gpu=4 \ 30 | critic.megatron.tensor_model_parallel_size=2 \ 31 | algorithm.kl_ctrl.kl_coef=0.001 \ 32 | trainer.critic_warmup=0 \ 33 | trainer.logger=['console'] \ 34 | trainer.project_name='verl_megatron_gsm8k_examples' \ 35 | trainer.experiment_name='qwen2_5_0b5_function_rm' \ 36 | trainer.n_gpus_per_node=8 \ 37 | trainer.nnodes=1 \ 38 | trainer.save_freq=-1 \ 39 | trainer.test_freq=1 \ 40 | trainer.total_epochs=15 \ 41 | trainer.total_training_steps=3 $@ 42 | -------------------------------------------------------------------------------- /tests/e2e/run_ray_trainer.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e -x 4 | 5 | OUTPUT_FILE="/tmp/output_ray_trainer.txt" 6 | 7 | export PATH=$PATH:~/.local/bin 8 | 9 | rm -rf $OUTPUT_FILE 10 | python3 tests/e2e/arithmetic_sequence/rl/main_trainer.py \ 11 | data.train_files=tests/e2e/arithmetic_sequence/data/train.parquet \ 12 | data.val_files=tests/e2e/arithmetic_sequence/data/test.parquet \ 13 | data.train_batch_size=800 \ 14 | data.val_batch_size=200 \ 15 | data.max_prompt_length=16 \ 16 | data.max_response_length=32 \ 17 | data.return_raw_input_ids=True \ 18 | actor_rollout_ref.model.path=tests/e2e/arithmetic_sequence/model \ 19 | actor_rollout_ref.model.external_lib=tests.e2e.envs.digit_completion \ 20 | actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=200 \ 21 | actor_rollout_ref.actor.entropy_coeff=0 \ 22 | actor_rollout_ref.actor.optim.lr=1e-4 \ 23 | actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=200 \ 24 | actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=200 \ 25 | actor_rollout_ref.rollout.name=hf \ 26 | actor_rollout_ref.rollout.tensor_model_parallel_size=1 \ 27 | critic.ppo_micro_batch_size_per_gpu=200 \ 28 | critic.model.path=tests/e2e/arithmetic_sequence/model \ 29 | critic.optim.lr=1e-3 \ 30 | algorithm.kl_ctrl.kl_coef=0.005 \ 31 | trainer.total_epochs=200 \ 32 | trainer.experiment_name=arithmetic_sequences \ 33 | trainer.logger=['console'] \ 34 | trainer.n_gpus_per_node=1 \ 35 | trainer.test_freq=1 \ 36 | trainer.save_freq=110 | tee $OUTPUT_FILE; 37 | 38 | python3 tests/e2e/check_results.py --output_file=$OUTPUT_FILE 39 | rm -rf $OUTPUT_FILE 40 | -------------------------------------------------------------------------------- /tests/e2e/run_ray_trainer_rmpad.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e -x 4 | 5 | python3 tests/e2e/arithmetic_sequence/rl/main_trainer.py \ 6 | data.train_files=tests/e2e/arithmetic_sequence/data/train.parquet \ 7 | data.val_files=tests/e2e/arithmetic_sequence/data/test.parquet \ 8 | actor_rollout_ref.model.path=tests/e2e/arithmetic_sequence/model \ 9 | actor_rollout_ref.rollout.name=vllm \ 10 | actor_rollout_ref.rollout.tensor_model_parallel_size=1 \ 11 | actor_rollout_ref.model.tokenizer_path=tests/e2e/arithmetic_sequence/model \ 12 | critic.model.path=Qwen/Qwen2.5-0.5B \ 13 | critic.model.use_remove_padding=True \ 14 | trainer.total_epochs=1 -------------------------------------------------------------------------------- /tests/gpu_utility/test_ops.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | def test_flash_attn_cross_entropy(): 17 | from verl.utils.torch_functional import logprobs_from_logits_naive 18 | 19 | from verl.utils.debug import log_gpu_memory_usage 20 | 21 | from flash_attn.ops.triton.cross_entropy import cross_entropy_loss 22 | 23 | import torch 24 | from torch import nn 25 | 26 | log_gpu_memory_usage('At start') 27 | 28 | hidden_states = torch.randn(size=(2048, 5120), device='cuda', requires_grad=True, dtype=torch.bfloat16) 29 | 30 | linear = nn.Linear(in_features=5120, out_features=155136, bias=False, device='cuda', dtype=torch.bfloat16) 31 | 32 | logits = linear(hidden_states) 33 | 34 | # logits = logits.float() 35 | labels = torch.randint(low=0, high=155136, size=(2048,), device='cuda') 36 | 37 | log_gpu_memory_usage('before computation') 38 | # output = checkpoint.checkpoint(logprobs_from_logits, logits, labels, use_reentrant=True) 39 | output = -cross_entropy_loss(logits, labels)[0] 40 | # output = logprobs_from_logits(logits, labels) 41 | log_gpu_memory_usage('After forward') 42 | output.sum().backward() 43 | log_gpu_memory_usage('After backward') 44 | 45 | groundtruth = logprobs_from_logits_naive(logits.float(), labels) 46 | 47 | torch.testing.assert_close(output, groundtruth) 48 | -------------------------------------------------------------------------------- /tests/ray/detached_worker/README.md: -------------------------------------------------------------------------------- 1 | # Detached Worker 2 | ## How to run (Only on a single node) 3 | - Start a local ray cluster: 4 | ```bash 5 | ray start --head --port=6379 6 | ``` 7 | - Run the server 8 | ```bash 9 | python3 server.py 10 | ``` 11 | - On another terminal, Run the client 12 | ```bash 13 | python3 client.py 14 | ``` 15 | -------------------------------------------------------------------------------- /tests/ray/detached_worker/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ray start --head --port=6379 3 | python3 server.py 4 | python3 client.py 5 | ray stop --force -------------------------------------------------------------------------------- /tests/ray/test_check_worker_alive.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import time 16 | import os 17 | import subprocess 18 | 19 | 20 | def test(): 21 | wait_time = 10 22 | 23 | my_env = os.environ.copy() 24 | my_env["WAIT_TIME"] = str(wait_time) 25 | 26 | p = subprocess.Popen(["python3", "-u", "./check_worker_alive/main.py"], env=my_env, stdout=subprocess.PIPE) 27 | 28 | count = 0 29 | while b"foo started" not in p.stdout.read(): 30 | time.sleep(1) 31 | count += 1 32 | if count > 40: 33 | raise RuntimeError("timeout for start foo in check_worker_alive/main.py") 34 | 35 | print( 36 | time.time(), 37 | f"wait 1.5 wait time {wait_time*1.5} to let signal returned to process but still not exceed process wait time") 38 | time.sleep(wait_time * 1.5) 39 | print(time.time(), f"start checking") 40 | assert p.poll() is not None, f"process {p} still alive, expecting signal raised abort" 41 | assert p.returncode != 0, f"process {p} exit with code 0, expecting not-zero exit code" 42 | print(f"test passed") 43 | 44 | 45 | if __name__ == "__main__": 46 | test() 47 | -------------------------------------------------------------------------------- /tests/ray/test_ray_local_envs.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | e2e test verl.single_controller.ray 16 | """ 17 | import os 18 | import ray 19 | 20 | from verl.single_controller.ray.base import RayResourcePool, RayClassWithInitArgs, RayWorkerGroup 21 | from verl.single_controller.base.worker import Worker 22 | from verl.single_controller.base.decorator import register, Dispatch, collect_all_to_all, Execute 23 | 24 | 25 | @ray.remote 26 | class TestActor(Worker): 27 | 28 | def __init__(self) -> None: 29 | super().__init__() 30 | 31 | def getenv(self, key): 32 | val = os.getenv(key, f"{key} not set") 33 | return val 34 | 35 | 36 | def test_basics(): 37 | ray.init() 38 | 39 | # create 4 workers, each hold a GPU 40 | resource_pool = RayResourcePool([4], use_gpu=True) 41 | class_with_args = RayClassWithInitArgs(cls=TestActor) 42 | 43 | worker_group = RayWorkerGroup(resource_pool=resource_pool, 44 | ray_cls_with_init=class_with_args, 45 | name_prefix="worker_group_basic") 46 | 47 | output = worker_group.execute_all_sync("getenv", key="RAY_LOCAL_WORLD_SIZE") 48 | assert output == ["4", "4", "4", "4"] 49 | 50 | output = worker_group.execute_all_sync("getenv", key="RAY_LOCAL_RANK") 51 | assert set(output) == set(["0", "1", "2", "3"]) 52 | 53 | ray.shutdown() 54 | 55 | 56 | if __name__ == '__main__': 57 | test_basics() 58 | -------------------------------------------------------------------------------- /tests/ray/test_rvdz.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import ray 16 | 17 | 18 | @ray.remote 19 | class TestWorker: 20 | 21 | def __init__(self, rank, world_size, group_name): 22 | self.rank = rank 23 | self.world_size = world_size 24 | self.group_name = group_name 25 | self.communicator = None 26 | 27 | def init(self): 28 | from verl.utils.rendezvous.ray_backend import create_nccl_communicator_in_ray 29 | self.communicator = create_nccl_communicator_in_ray(self.rank, self.world_size, self.group_name) 30 | 31 | def test(self): 32 | if self.communicator is None: 33 | return None 34 | return self.communicator.rank_id() 35 | 36 | 37 | def test_rvdz(): 38 | ray.init() 39 | 40 | group_name = "test_group" 41 | world_size = 2 42 | 43 | workers = [TestWorker.options(num_gpus=1).remote(rank, world_size, group_name) for rank in range(world_size)] 44 | 45 | ray.get([worker.init.remote() for worker in workers]) 46 | 47 | ranks = ray.get([worker.test.remote() for worker in workers]) 48 | 49 | assert ranks == [0, 1], f"expecting [0, 1], got {ranks}" 50 | 51 | ray.shutdown() 52 | -------------------------------------------------------------------------------- /tests/sanity/check_license.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | license_head_bytedance = "Copyright 2024 Bytedance Ltd. and/or its affiliates" 16 | license_head_bytedance_25 = "Copyright 2025 Bytedance Ltd. and/or its affiliates" 17 | # Add custom license headers below 18 | license_head_prime = "Copyright 2024 PRIME team and/or its affiliates" 19 | 20 | license_headers = [license_head_bytedance, license_head_bytedance_25, license_head_prime] 21 | 22 | from pathlib import Path 23 | from argparse import ArgumentParser 24 | 25 | if __name__ == '__main__': 26 | parser = ArgumentParser() 27 | parser.add_argument('--directory', '-d', required=True, type=str) 28 | args = parser.parse_args() 29 | directory_in_str = args.directory 30 | 31 | pathlist = Path(directory_in_str).glob('**/*.py') 32 | for path in pathlist: 33 | # because path is object not string 34 | path_in_str = str(path.absolute()) 35 | print(path_in_str) 36 | with open(path_in_str, 'r', encoding='utf-8') as f: 37 | file_content = f.read() 38 | 39 | has_license = False 40 | for lh in license_headers: 41 | if lh in file_content: 42 | has_license = True 43 | break 44 | assert has_license, f'file {path_in_str} does not contain license' 45 | -------------------------------------------------------------------------------- /tests/sanity/test_import.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | def test_import(): 17 | import verl 18 | print(verl.__version__) 19 | 20 | 21 | def test_single_controller_import(): 22 | import verl.single_controller 23 | print(verl.single_controller.__version__) 24 | -------------------------------------------------------------------------------- /tests/sft/run_sft.sh: -------------------------------------------------------------------------------- 1 | # Tested with 2 & 4 GPUs 2 | 3 | set -x 4 | 5 | torchrun --standalone --nnodes=1 --nproc_per_node=8 \ 6 | -m verl.trainer.fsdp_sft_trainer \ 7 | data.train_files=$HOME/data/gsm8k/train.parquet \ 8 | data.val_files=$HOME/data/gsm8k/test.parquet \ 9 | data.prompt_key=extra_info \ 10 | data.response_key=extra_info \ 11 | +data.prompt_dict_keys=['question'] \ 12 | +data.response_dict_keys=['answer'] \ 13 | data.micro_batch_size_per_gpu=32 \ 14 | model.partial_pretrain=Qwen/Qwen2.5-0.5B-Instruct \ 15 | trainer.default_local_dir=$HOME/ckpts/ \ 16 | trainer.project_name=qwen2.5-sft \ 17 | trainer.experiment_name=gsm8k-sft-gemma-2b-it \ 18 | trainer.total_training_steps=1 \ 19 | trainer.logger=['console'] \ 20 | trainer.default_hdfs_dir=null $@ 21 | 22 | rm -rf $HOME/ckpts/ -------------------------------------------------------------------------------- /tests/sft/run_sft_qwen05_peft.sh: -------------------------------------------------------------------------------- 1 | # Tested with 2 & 4 GPUs 2 | 3 | set -x 4 | 5 | if [ "$#" -lt 2 ]; then 6 | echo "Usage: run_sft_qwen05_peft.sh [other_configs...]" 7 | exit 1 8 | fi 9 | 10 | nproc_per_node=$1 11 | save_path=$2 12 | 13 | # Shift the arguments so $@ refers to the rest 14 | shift 2 15 | 16 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \ 17 | -m verl.trainer.fsdp_sft_trainer \ 18 | data.train_files=$HOME/data/gsm8k/train.parquet \ 19 | data.val_files=$HOME/data/gsm8k/test.parquet \ 20 | data.prompt_key=extra_info \ 21 | data.response_key=extra_info \ 22 | optim.lr=1e-4 \ 23 | +data.prompt_dict_keys=['question'] \ 24 | +data.response_dict_keys=['answer'] \ 25 | data.micro_batch_size_per_gpu=4 \ 26 | model.partial_pretrain=Qwen/Qwen2.5-0.5B-Instruct \ 27 | trainer.default_local_dir=$save_path \ 28 | trainer.project_name=gsm8k-sft \ 29 | trainer.experiment_name=gsm8k-sft-qwen-2.5-0.5b-instruct \ 30 | trainer.logger=['console'] \ 31 | trainer.total_training_steps=1 \ 32 | trainer.default_hdfs_dir=null $@ \ 33 | model.lora_rank=32\ 34 | model.lora_alpha=16 \ 35 | model.target_modules=all-linear 36 | 37 | # Or you can do this: 38 | # model.target_modules=[q_proj,v_proj] \ 39 | -------------------------------------------------------------------------------- /tests/sft/run_sft_qwen05_sp2_liger.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | if [ "$#" -lt 2 ]; then 4 | echo "Usage: run_sft_qwen05_sp2_liger.sh [other_configs...]" 5 | exit 1 6 | fi 7 | 8 | nproc_per_node=$1 9 | save_path=$2 10 | 11 | # Shift the arguments so $@ refers to the rest 12 | shift 2 13 | 14 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \ 15 | -m verl.trainer.fsdp_sft_trainer \ 16 | data.train_files=$HOME/data/gsm8k/train.parquet \ 17 | data.val_files=$HOME/data/gsm8k/test.parquet \ 18 | data.prompt_key=extra_info \ 19 | data.response_key=extra_info \ 20 | optim.lr=1e-4 \ 21 | +data.prompt_dict_keys=['question'] \ 22 | +data.response_dict_keys=['answer'] \ 23 | data.micro_batch_size=4 \ 24 | model.partial_pretrain=Qwen/Qwen2.5-0.5B-Instruct \ 25 | model.use_liger=True \ 26 | trainer.default_local_dir=$save_path \ 27 | trainer.project_name=gsm8k-sft \ 28 | trainer.experiment_name=gsm8k-sft-qwen-2.5-0.5b-instruct-sp2-liger \ 29 | trainer.logger=['console'] \ 30 | trainer.total_training_steps=1 \ 31 | trainer.default_hdfs_dir=null $@ \ 32 | ulysses_sequence_parallel_size=2 \ 33 | use_remove_padding=true -------------------------------------------------------------------------------- /tests/sft/run_sft_sp_loss_match.sh: -------------------------------------------------------------------------------- 1 | # Tested with 2 & 4 GPUs 2 | 3 | set -x 4 | 5 | torchrun --standalone --nnodes=1 --nproc_per_node=8 \ 6 | tests/sft/test_sp_loss_match.py \ 7 | data.train_files=$HOME/data/gsm8k/train.parquet \ 8 | data.val_files=$HOME/data/gsm8k/test.parquet \ 9 | data.prompt_key=extra_info \ 10 | data.response_key=extra_info \ 11 | +data.prompt_dict_keys=['question'] \ 12 | +data.response_dict_keys=['answer'] \ 13 | data.micro_batch_size=32 \ 14 | model.partial_pretrain=Qwen/Qwen2.5-0.5B-Instruct \ 15 | ulysses_sequence_parallel_size=2 \ 16 | use_remove_padding=True \ 17 | trainer.default_local_dir=$HOME/ckpts/ \ 18 | trainer.project_name=qwen2.5-sft \ 19 | trainer.experiment_name=gsm8k-sft-gemma-2b-it \ 20 | trainer.total_training_steps=1 \ 21 | trainer.logger=['console'] \ 22 | trainer.default_hdfs_dir=null $@ 23 | 24 | rm -rf $HOME/ckpts/ 25 | -------------------------------------------------------------------------------- /tests/verl/utils/dataset/test_rm_dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import os 15 | 16 | from transformers import AutoTokenizer 17 | from verl.utils import hf_tokenizer 18 | from verl.utils.dataset.rm_dataset import RMDataset 19 | 20 | 21 | def get_rm_data(): 22 | # prepare test dataset 23 | url = "https://github.com/eric-haibin-lin/verl-data/raw/refs/heads/main/full_hh_rlhf/rm/test.parquet" 24 | local_folder = os.path.expanduser('~/verl-data/full_hh_rlhf/rm/') 25 | local_path = os.path.join(local_folder, 'test.parquet') 26 | os.makedirs(local_folder, exist_ok=True) 27 | return local_path 28 | 29 | 30 | def test_rm_dataset(): 31 | tokenizer = hf_tokenizer("facebook/opt-1.3b") 32 | local_path = get_rm_data() 33 | dataset = RMDataset(parquet_files=local_path, tokenizer=tokenizer, max_length=512) 34 | data = dataset[0]['input_ids'] 35 | output = tokenizer.batch_decode(data) 36 | assert len(output) > 1 37 | assert type(output[0]) == str 38 | -------------------------------------------------------------------------------- /verl/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import os 16 | 17 | version_folder = os.path.dirname(os.path.join(os.path.abspath(__file__))) 18 | 19 | with open(os.path.join(version_folder, 'version/version')) as f: 20 | __version__ = f.read().strip() 21 | 22 | from .protocol import DataProto 23 | 24 | from .utils.logging_utils import set_basic_config 25 | import logging 26 | 27 | set_basic_config(level=logging.WARNING) 28 | -------------------------------------------------------------------------------- /verl/models/README.md: -------------------------------------------------------------------------------- 1 | # Models 2 | Common modelzoo such as huggingface/transformers stuggles when using Pytorch native model parallelism. Following the design principle of vLLM, we keep a simple, parallelizable, highly-optimized with packed inputs in verl. 3 | ## Adding a New Huggingface Model 4 | ### Step 1: Copy the model file from HF to verl 5 | - Add a new file under verl/models/hf 6 | - Copy ONLY the model file from huggingface/transformers/models to verl/models/hf 7 | 8 | ### Step 2: Modify the model file to use packed inputs 9 | - Remove all the code related to inference (kv cache) 10 | - Modify the inputs to include only 11 | - input_ids (total_nnz,) 12 | - cu_seqlens (total_nnz + 1,) 13 | - max_seqlen_in_batch: int 14 | - Note that this requires using flash attention with causal mask. 15 | 16 | ### Step 2.5: Add tests 17 | - Add a test to compare this version and the huggingface version 18 | - Following the infrastructure and add tests to tests/models/hf 19 | 20 | ### Step 3: Add a function to apply tensor parallelism 21 | - Please follow 22 | - https://pytorch.org/docs/stable/distributed.tensor.parallel.html 23 | - https://pytorch.org/tutorials/intermediate/TP_tutorial.html 24 | - General comments 25 | - Tensor Parallelism in native Pytorch is NOT auto-parallelism. The way it works is to specify how model parameters and input/output reshards using configs. These configs are then registered as hooks to perform input/output resharding before/after model forward. 26 | 27 | ### Step 4: Add a function to apply data parallelism 28 | - Please use FSDP2 APIs 29 | - See demo here https://github.com/pytorch/torchtitan/blob/main/torchtitan/parallelisms/parallelize_llama.py#L413 30 | 31 | ### Step 5: Add a function to apply pipeline parallelism 32 | - Comes in Pytorch 2.4 33 | - Currently only in alpha in nightly version 34 | - Check torchtitan for more details 35 | 36 | -------------------------------------------------------------------------------- /verl/models/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/models/llama/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/models/llama/megatron/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .modeling_llama_megatron import ( 16 | # original model with megatron 17 | ParallelLlamaModel, 18 | ParallelLlamaForCausalLM, 19 | # rmpad with megatron 20 | ParallelLlamaForCausalLMRmPad, 21 | ParallelLlamaForValueRmPad, 22 | # rmpad with megatron and pipeline parallelism 23 | ParallelLlamaForCausalLMRmPadPP, 24 | ParallelLlamaForValueRmPadPP) 25 | -------------------------------------------------------------------------------- /verl/models/llama/megatron/checkpoint_utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/models/llama/megatron/layers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .parallel_attention import ParallelLlamaAttention 16 | from .parallel_decoder import ParallelLlamaDecoderLayer, ParallelLlamaDecoderLayerRmPad 17 | from .parallel_mlp import ParallelLlamaMLP 18 | from .parallel_rmsnorm import ParallelLlamaRMSNorm 19 | -------------------------------------------------------------------------------- /verl/models/llama/megatron/layers/parallel_rmsnorm.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import numbers 16 | import torch 17 | from megatron.core import ModelParallelConfig 18 | from torch import nn 19 | from transformers import LlamaConfig 20 | 21 | from apex.normalization.fused_layer_norm import fused_rms_norm_affine 22 | from verl.utils.megatron import sequence_parallel as sp_utils 23 | 24 | 25 | class ParallelLlamaRMSNorm(nn.Module): 26 | 27 | def __init__(self, config: LlamaConfig, megatron_config: ModelParallelConfig): 28 | """ 29 | LlamaRMSNorm is equivalent to T5LayerNorm 30 | """ 31 | super().__init__() 32 | if isinstance(config.hidden_size, numbers.Integral): 33 | normalized_shape = (config.hidden_size,) 34 | self.normalized_shape = torch.Size(normalized_shape) 35 | self.weight = nn.Parameter(torch.ones(self.normalized_shape)) 36 | self.variance_epsilon = config.rms_norm_eps 37 | 38 | if megatron_config.sequence_parallel: 39 | sp_utils.mark_parameter_as_sequence_parallel(self.weight) 40 | 41 | def forward(self, hidden_states): 42 | return fused_rms_norm_affine(input=hidden_states, 43 | weight=self.weight, 44 | normalized_shape=self.normalized_shape, 45 | eps=self.variance_epsilon, 46 | memory_efficient=True) -------------------------------------------------------------------------------- /verl/models/qwen2/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/models/qwen2/megatron/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .modeling_qwen2_megatron import ( 16 | # original model with megatron 17 | ParallelQwen2Model, 18 | ParallelQwen2ForCausalLM, 19 | # rmpad with megatron 20 | ParallelQwen2ForCausalLMRmPad, 21 | ParallelQwen2ForValueRmPad, 22 | # rmpad with megatron and pipeline parallelism 23 | ParallelQwen2ForCausalLMRmPadPP, 24 | ParallelQwen2ForValueRmPadPP) 25 | -------------------------------------------------------------------------------- /verl/models/qwen2/megatron/checkpoint_utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/models/qwen2/megatron/layers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .parallel_attention import ParallelQwen2Attention 16 | from .parallel_decoder import ParallelQwen2DecoderLayer, ParallelQwen2DecoderLayerRmPad 17 | from .parallel_mlp import ParallelQwen2MLP 18 | from .parallel_rmsnorm import ParallelQwen2RMSNorm 19 | -------------------------------------------------------------------------------- /verl/models/qwen2/megatron/layers/parallel_rmsnorm.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import numbers 16 | import torch 17 | from megatron.core import ModelParallelConfig 18 | from torch import nn 19 | from transformers import Qwen2Config 20 | 21 | from apex.normalization.fused_layer_norm import fused_rms_norm_affine 22 | from verl.utils.megatron import sequence_parallel as sp_utils 23 | 24 | 25 | class ParallelQwen2RMSNorm(nn.Module): 26 | 27 | def __init__(self, config: Qwen2Config, megatron_config: ModelParallelConfig): 28 | """ 29 | Qwen2RMSNorm is equivalent to T5LayerNorm 30 | """ 31 | super().__init__() 32 | if isinstance(config.hidden_size, numbers.Integral): 33 | normalized_shape = (config.hidden_size,) 34 | self.normalized_shape = torch.Size(normalized_shape) 35 | self.weight = nn.Parameter(torch.ones(self.normalized_shape)) 36 | self.variance_epsilon = config.rms_norm_eps 37 | 38 | if megatron_config.sequence_parallel: 39 | sp_utils.mark_parameter_as_sequence_parallel(self.weight) 40 | 41 | def forward(self, hidden_states): 42 | return fused_rms_norm_affine(input=hidden_states, 43 | weight=self.weight, 44 | normalized_shape=self.normalized_shape, 45 | eps=self.variance_epsilon, 46 | memory_efficient=True) -------------------------------------------------------------------------------- /verl/models/transformers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/models/weight_loader_registry.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | def get_weight_loader(arch: str): 17 | from verl.models.llama.megatron.checkpoint_utils.llama_loader import load_state_dict_to_megatron_llama 18 | from verl.models.qwen2.megatron.checkpoint_utils.qwen2_loader import load_state_dict_to_megatron_qwen2 19 | _MODEL_WEIGHT_MEGATRON_LOADER_REGISTRY = { 20 | 'LlamaForCausalLM': load_state_dict_to_megatron_llama, 21 | 'Qwen2ForCausalLM': load_state_dict_to_megatron_qwen2, 22 | } 23 | 24 | if arch in _MODEL_WEIGHT_MEGATRON_LOADER_REGISTRY: 25 | return _MODEL_WEIGHT_MEGATRON_LOADER_REGISTRY[arch] 26 | raise ValueError(f"Model architectures {arch} are not supported for now. " 27 | f"Supported architectures: {_MODEL_WEIGHT_MEGATRON_LOADER_REGISTRY.keys()}") 28 | -------------------------------------------------------------------------------- /verl/single_controller/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import os 16 | 17 | version_folder = os.path.dirname(os.path.join(os.path.abspath(__file__))) 18 | 19 | with open(os.path.join(os.path.join(version_folder, os.pardir), 'version/version')) as f: 20 | __version__ = f.read().strip() 21 | -------------------------------------------------------------------------------- /verl/single_controller/base/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .worker import Worker 16 | from .worker_group import WorkerGroup, ClassWithInitArgs, ResourcePool 17 | -------------------------------------------------------------------------------- /verl/single_controller/base/megatron/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/single_controller/base/megatron/worker.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import os 16 | from dataclasses import dataclass 17 | from verl.single_controller.base.worker import Worker, DistRankInfo, DistGlobalInfo 18 | 19 | 20 | class MegatronWorker(Worker): 21 | 22 | def __init__(self, cuda_visible_devices=None) -> None: 23 | super().__init__(cuda_visible_devices) 24 | 25 | def get_megatron_global_info(self): 26 | from megatron.core import parallel_state as mpu 27 | tp_size = mpu.get_tensor_model_parallel_world_size() 28 | dp_size = mpu.get_data_parallel_world_size() 29 | pp_size = mpu.get_pipeline_model_parallel_world_size() 30 | info = DistGlobalInfo(tp_size=tp_size, dp_size=dp_size, pp_size=pp_size) 31 | return info 32 | 33 | def get_megatron_rank_info(self): 34 | from megatron.core import parallel_state as mpu 35 | tp_rank = mpu.get_tensor_model_parallel_rank() 36 | dp_rank = mpu.get_data_parallel_rank() 37 | pp_rank = mpu.get_pipeline_model_parallel_rank() 38 | info = DistRankInfo(tp_rank=tp_rank, dp_rank=dp_rank, pp_rank=pp_rank) 39 | return info -------------------------------------------------------------------------------- /verl/single_controller/base/register_center/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/single_controller/base/register_center/ray.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import ray 16 | 17 | 18 | @ray.remote 19 | class WorkerGroupRegisterCenter: 20 | 21 | def __init__(self, rank_zero_info): 22 | self.rank_zero_info = rank_zero_info 23 | 24 | def get_rank_zero_info(self): 25 | return self.rank_zero_info 26 | 27 | 28 | def create_worker_group_register_center(name, info): 29 | return WorkerGroupRegisterCenter.options(name=name).remote(info) 30 | -------------------------------------------------------------------------------- /verl/single_controller/ray/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .base import RayResourcePool, RayClassWithInitArgs, RayWorkerGroup, create_colocated_worker_cls 16 | from .megatron import (MegatronRayWorkerGroup, DistRankInfo, DistGlobalInfo) -------------------------------------------------------------------------------- /verl/third_party/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/third_party/vllm/vllm_spmd/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/third_party/vllm/vllm_v_0_3_1/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/third_party/vllm/vllm_v_0_4_2/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/third_party/vllm/vllm_v_0_5_4/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/third_party/vllm/vllm_v_0_6_3/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/third_party/vllm/vllm_v_0_6_3/hf_weight_loader.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # Copyright 2023 The vLLM team. 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # Adapted from https://github.com/vllm-project/vllm/tree/main/vllm/model_executor/model_loader 15 | 16 | from typing import Dict 17 | 18 | import torch.nn as nn 19 | from vllm.model_executor.model_loader.utils import set_default_torch_dtype 20 | 21 | 22 | def update_hf_weight_loader(): 23 | print("no hf weight loader need to be updated") 24 | return 25 | 26 | 27 | def load_hf_weights(actor_weights: Dict, vllm_model: nn.Module): 28 | assert isinstance(actor_weights, Dict) 29 | with set_default_torch_dtype(next(vllm_model.parameters()).dtype): # TODO 30 | if vllm_model.config.tie_word_embeddings and "lm_head.weight" in actor_weights.keys(): 31 | del actor_weights["lm_head.weight"] 32 | vllm_model.load_weights(actor_weights.items()) 33 | for _, module in vllm_model.named_modules(): 34 | quant_method = getattr(module, "quant_method", None) 35 | if quant_method is not None: 36 | quant_method.process_weights_after_loading(module) 37 | # FIXME: Remove this after Mixtral is updated 38 | # to use quant_method. 39 | if hasattr(module, "process_weights_after_loading"): 40 | module.process_weights_after_loading() 41 | vllm_model = vllm_model.cuda() 42 | -------------------------------------------------------------------------------- /verl/third_party/vllm/vllm_v_0_6_3/tokenizer.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # Copyright 2023 The vLLM team. 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # Adapted from https://github.com/vllm-project/vllm/blob/main/vllm/transformers_utils/tokenizer_group/tokenizer_group.py 15 | 16 | from typing import Optional 17 | 18 | from transformers import PreTrainedTokenizer 19 | from vllm.transformers_utils.tokenizer_group import TokenizerGroup 20 | from vllm.utils import LRUCache 21 | 22 | 23 | class TokenizerGroup(TokenizerGroup): 24 | """A group of tokenizers that can be used for LoRA adapters.""" 25 | 26 | def __init__(self, tokenizer: PreTrainedTokenizer, enable_lora: bool, max_num_seqs: int, 27 | max_input_length: Optional[int]): 28 | self.enable_lora = enable_lora 29 | self.max_input_length = max_input_length 30 | self.tokenizer = tokenizer 31 | self.lora_tokenizers = LRUCache[PreTrainedTokenizer](capacity=max_num_seqs) if enable_lora else None 32 | 33 | # FIXME(sgm): for simplicity, we assign the special token here 34 | @property 35 | def pad_token_id(self): 36 | return self.tokenizer.pad_token_id 37 | 38 | @property 39 | def eos_token_id(self): 40 | return self.tokenizer.eos_token_id 41 | -------------------------------------------------------------------------------- /verl/trainer/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/trainer/config/evaluation.yaml: -------------------------------------------------------------------------------- 1 | data: 2 | path: /tmp/math_Qwen2-7B-Instruct.parquet 3 | prompt_key: prompt 4 | response_key: responses 5 | data_source_key: data_source 6 | reward_model_key: reward_model -------------------------------------------------------------------------------- /verl/trainer/config/generation.yaml: -------------------------------------------------------------------------------- 1 | trainer: 2 | nnodes: 1 3 | n_gpus_per_node: 8 4 | 5 | data: 6 | path: ~/data/rlhf/math/test.parquet 7 | prompt_key: prompt 8 | n_samples: 5 9 | output_path: /opt/tiger/math_Qwen2-7B-Instruct.parquet 10 | batch_size: 128 11 | 12 | model: 13 | path: ~/models/Qwen2-7B-Instruct 14 | external_lib: null 15 | rollout: 16 | name: vllm 17 | temperature: 1.0 18 | top_k: 50 # 0 for hf rollout, -1 for vllm rollout 19 | top_p: 0.7 20 | prompt_length: 1536 21 | response_length: 512 22 | # for vllm rollout 23 | dtype: bfloat16 # should align with FSDP 24 | gpu_memory_utilization: 0.5 25 | ignore_eos: False 26 | enforce_eager: True 27 | free_cache_engine: True 28 | load_format: dummy_dtensor 29 | tensor_model_parallel_size: 1 30 | max_num_batched_tokens: 8192 31 | max_num_seqs: 1024 32 | log_prob_micro_batch_size: null # will be deprecated, use log_prob_micro_batch_size_per_gpu 33 | log_prob_micro_batch_size_per_gpu: 8 34 | # for hf rollout 35 | do_sample: True -------------------------------------------------------------------------------- /verl/trainer/config/sft_trainer.yaml: -------------------------------------------------------------------------------- 1 | data: 2 | train_batch_size: 256 3 | micro_batch_size: null # will be deprecated, use micro_batch_size_per_gpu 4 | micro_batch_size_per_gpu: 4 # this is also val batch size 5 | train_files: ~/data/gsm8k/train.parquet 6 | val_files: ~/data/gsm8k/test.parquet 7 | prompt_key: question 8 | response_key: answer 9 | max_length: 1024 10 | truncation: error 11 | balance_dp_token: False 12 | chat_template: null 13 | model: 14 | partial_pretrain: ~/models/gemma-1.1-7b-it 15 | fsdp_config: 16 | wrap_policy: 17 | min_num_params: 0 18 | cpu_offload: False 19 | offload_params: False 20 | external_lib: null 21 | enable_gradient_checkpointing: False 22 | trust_remote_code: False 23 | lora_rank: 0 # Set to positive value to enable LoRA (e.g., 32) 24 | lora_alpha: 16 # LoRA scaling factor 25 | target_modules: all-linear # Target modules for LoRA adaptation 26 | use_liger: False 27 | optim: 28 | lr: 1e-5 29 | betas: [0.9, 0.95] 30 | weight_decay: 0.01 31 | warmup_steps_ratio: 0.1 32 | clip_grad: 1.0 33 | ulysses_sequence_parallel_size: 1 34 | use_remove_padding: False 35 | trainer: 36 | default_local_dir: /tmp/sft_model 37 | default_hdfs_dir: hdfs://tmp/experiments/gsm8k/gemma-1.1-7b-it/ # change the hdfs path here 38 | resume_path: null 39 | project_name: gsm8k-sft 40 | experiment_name: test 41 | total_epochs: 4 42 | total_training_steps: null 43 | logger: ['console'] 44 | seed: 1 45 | 46 | -------------------------------------------------------------------------------- /verl/trainer/ppo/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/trainer/runtime_env.yaml: -------------------------------------------------------------------------------- 1 | working_dir: ./ 2 | excludes: ["/.git/"] 3 | env_vars: 4 | TORCH_NCCL_AVOID_RECORD_STREAMS: "1" 5 | VLLM_ATTENTION_BACKEND: "XFORMERS" -------------------------------------------------------------------------------- /verl/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from . import tokenizer 16 | from .tokenizer import * 17 | 18 | __all__ = tokenizer.__all__ -------------------------------------------------------------------------------- /verl/utils/checkpoint/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. -------------------------------------------------------------------------------- /verl/utils/config.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from typing import Dict 16 | 17 | from omegaconf import DictConfig 18 | 19 | 20 | def update_dict_with_config(dictionary: Dict, config: DictConfig): 21 | for key in dictionary: 22 | if hasattr(config, key): 23 | dictionary[key] = getattr(config, key) 24 | -------------------------------------------------------------------------------- /verl/utils/dataset/README.md: -------------------------------------------------------------------------------- 1 | # Dataset Format 2 | ## RLHF dataset 3 | We combine all the data sources into a single parquet files. We directly organize the prompt into the chat format so that multi-turn chats can be easily incorporated. In the prompt, we may add instruction following texts to guide the model output the answers in a particular format so that we can extract the answers. 4 | 5 | Math problems 6 | ```json 7 | { 8 | "data_source": "openai/gsm8k", 9 | "prompt": [{"role": "user", "content": "Natalia sold clips to 48 of her friends in April, and then she sold half as many clips in May. How many clips did Natalia sell altogether in April and May? Let's think step by step and output the final answer after \"####\""}], 10 | "ability": "math", 11 | "reward_model": { 12 | "style": "rule", 13 | "ground_truth": ["72"] 14 | }, 15 | } 16 | ``` 17 | -------------------------------------------------------------------------------- /verl/utils/dataset/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .rl_dataset import RLHFDataset 16 | from .rm_dataset import RMDataset 17 | from .sft_dataset import SFTDataset 18 | -------------------------------------------------------------------------------- /verl/utils/debug/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .performance import log_gpu_memory_usage -------------------------------------------------------------------------------- /verl/utils/debug/performance.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import torch 16 | import torch.distributed as dist 17 | import logging 18 | 19 | 20 | def log_gpu_memory_usage(head: str, logger: logging.Logger = None, level=logging.DEBUG, rank: int = 0): 21 | if (not dist.is_initialized()) or (rank is None) or (dist.get_rank() == rank): 22 | memory_allocated = torch.cuda.memory_allocated() / 1024**3 23 | memory_reserved = torch.cuda.memory_reserved() / 1024**3 24 | 25 | message = f'{head}, memory allocated (GB): {memory_allocated}, memory reserved (GB): {memory_reserved}' 26 | 27 | if logger is None: 28 | print(message) 29 | else: 30 | logger.log(msg=message, level=level) 31 | -------------------------------------------------------------------------------- /verl/utils/distributed.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Utilities for distributed training.""" 15 | import os 16 | 17 | 18 | def initialize_global_process_group(timeout_second=36000): 19 | import torch.distributed 20 | from datetime import timedelta 21 | torch.distributed.init_process_group('nccl', timeout=timedelta(seconds=timeout_second)) 22 | local_rank = int(os.environ["LOCAL_RANK"]) 23 | rank = int(os.environ["RANK"]) 24 | world_size = int(os.environ["WORLD_SIZE"]) 25 | 26 | if torch.distributed.is_initialized(): 27 | torch.cuda.set_device(local_rank) 28 | return local_rank, rank, world_size 29 | -------------------------------------------------------------------------------- /verl/utils/import_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | Utilities to check if packages are available. 16 | We assume package availability won't change during runtime. 17 | """ 18 | 19 | from functools import cache 20 | from typing import List 21 | 22 | 23 | @cache 24 | def is_megatron_core_available(): 25 | try: 26 | from megatron.core import parallel_state as mpu 27 | return True 28 | except ImportError: 29 | return False 30 | 31 | 32 | @cache 33 | def is_vllm_available(): 34 | try: 35 | import vllm 36 | return True 37 | except ImportError: 38 | return False 39 | 40 | 41 | def import_external_libs(external_libs=None): 42 | if external_libs is None: 43 | return 44 | if not isinstance(external_libs, List): 45 | external_libs = [external_libs] 46 | import importlib 47 | for external_lib in external_libs: 48 | importlib.import_module(external_lib) 49 | -------------------------------------------------------------------------------- /verl/utils/logger/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/utils/logger/aggregate_logger.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | A Ray logger will receive logging info from different processes. 16 | """ 17 | import numbers 18 | from typing import Dict 19 | 20 | 21 | def concat_dict_to_str(dict: Dict, step): 22 | output = [f'step:{step}'] 23 | for k, v in dict.items(): 24 | if isinstance(v, numbers.Number): 25 | output.append(f'{k}:{v:.3f}') 26 | output_str = ' - '.join(output) 27 | return output_str 28 | 29 | 30 | class LocalLogger: 31 | 32 | def __init__(self, remote_logger=None, enable_wandb=False, print_to_console=False): 33 | self.print_to_console = print_to_console 34 | if print_to_console: 35 | print('Using LocalLogger is deprecated. The constructor API will change ') 36 | 37 | def flush(self): 38 | pass 39 | 40 | def log(self, data, step): 41 | if self.print_to_console: 42 | print(concat_dict_to_str(data, step=step), flush=True) -------------------------------------------------------------------------------- /verl/utils/logging_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import logging 16 | 17 | 18 | def set_basic_config(level): 19 | """ 20 | This function sets the global logging format and level. It will be called when import verl 21 | """ 22 | logging.basicConfig(format='%(levelname)s:%(asctime)s:%(message)s', level=level) 23 | -------------------------------------------------------------------------------- /verl/utils/megatron/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/utils/megatron/memory.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import torch 16 | 17 | 18 | class MemoryBuffer: 19 | 20 | def __init__(self, numel, numel_padded, dtype): 21 | self.numel = numel 22 | self.numel_padded = numel_padded 23 | self.dtype = dtype 24 | self.data = torch.zeros(self.numel_padded, 25 | dtype=self.dtype, 26 | device=torch.cuda.current_device(), 27 | requires_grad=False) 28 | 29 | def zero(self): 30 | """Reset the buffer to zero.""" 31 | self.data.zero_() 32 | 33 | def get(self, shape, start_index): 34 | """Return a tensor with the input `shape` as a view into the 35 | 1-D data starting at `start_index`.""" 36 | end_index = start_index + shape.numel() 37 | assert end_index <= self.numel, \ 38 | 'requested tensor is out of the buffer range.' 39 | buffer_tensor = self.data[start_index:end_index] 40 | buffer_tensor = buffer_tensor.view(shape) 41 | return buffer_tensor 42 | -------------------------------------------------------------------------------- /verl/utils/megatron/sequence_parallel.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | import torch 17 | import torch.nn.functional as F 18 | from megatron.core import parallel_state as mpu 19 | 20 | 21 | def mark_parameter_as_sequence_parallel(parameter): 22 | setattr(parameter, 'sequence_parallel', True) 23 | 24 | 25 | def is_sequence_parallel_param(param): 26 | return hasattr(param, 'sequence_parallel') and param.sequence_parallel 27 | 28 | 29 | def pad_to_sequence_parallel(unpad_tokens: torch.Tensor): 30 | """pad the tokens such that the total length is a multiple of sp world size 31 | 32 | Args: 33 | unpad_tokens: (total_nnz, ...). Tokens after removing padding 34 | 35 | Returns: 36 | 37 | """ 38 | total_nnz = unpad_tokens.shape[0] 39 | sp_world_size = mpu.get_tensor_model_parallel_world_size() 40 | 41 | if total_nnz % sp_world_size == 0: 42 | pad_size = 0 43 | else: 44 | pad_size = sp_world_size - total_nnz % sp_world_size 45 | 46 | if pad_size > 0: 47 | if unpad_tokens.ndim == 1: 48 | unpad_tokens = F.pad(unpad_tokens, (0, pad_size)) 49 | elif unpad_tokens.ndim == 2: 50 | unpad_tokens = F.pad(unpad_tokens, (0, 0, 0, pad_size)) 51 | else: 52 | raise NotImplementedError(f'Padding dim {unpad_tokens.ndim()} is not supported') 53 | 54 | return unpad_tokens 55 | -------------------------------------------------------------------------------- /verl/utils/py_functional.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | Contain small python utility functions 16 | """ 17 | 18 | from typing import Dict 19 | from types import SimpleNamespace 20 | 21 | 22 | def union_two_dict(dict1: Dict, dict2: Dict): 23 | """Union two dict. Will throw an error if there is an item not the same object with the same key. 24 | 25 | Args: 26 | dict1: 27 | dict2: 28 | 29 | Returns: 30 | 31 | """ 32 | for key, val in dict2.items(): 33 | if key in dict1: 34 | assert dict2[key] == dict1[key], \ 35 | f'{key} in meta_dict1 and meta_dict2 are not the same object' 36 | dict1[key] = val 37 | 38 | return dict1 39 | 40 | 41 | def append_to_dict(data: Dict, new_data: Dict): 42 | for key, val in new_data.items(): 43 | if key not in data: 44 | data[key] = [] 45 | data[key].append(val) 46 | 47 | 48 | class NestedNamespace(SimpleNamespace): 49 | 50 | def __init__(self, dictionary, **kwargs): 51 | super().__init__(**kwargs) 52 | for key, value in dictionary.items(): 53 | if isinstance(value, dict): 54 | self.__setattr__(key, NestedNamespace(value)) 55 | else: 56 | self.__setattr__(key, value) 57 | -------------------------------------------------------------------------------- /verl/utils/ray_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | Contains commonly used utilities for ray 16 | """ 17 | 18 | import ray 19 | 20 | import concurrent.futures 21 | 22 | 23 | def parallel_put(data_list, max_workers=None): 24 | 25 | def put_data(index, data): 26 | return index, ray.put(data) 27 | 28 | if max_workers is None: 29 | max_workers = min(len(data_list), 16) 30 | 31 | with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor: 32 | data_list_f = [executor.submit(put_data, i, data) for i, data in enumerate(data_list)] 33 | res_lst = [] 34 | for future in concurrent.futures.as_completed(data_list_f): 35 | res_lst.append(future.result()) 36 | 37 | # reorder based on index 38 | output = [None for _ in range(len(data_list))] 39 | for res in res_lst: 40 | index, data_ref = res 41 | output[index] = data_ref 42 | 43 | return output 44 | -------------------------------------------------------------------------------- /verl/utils/rendezvous/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/version/version: -------------------------------------------------------------------------------- 1 | 0.2 2 | -------------------------------------------------------------------------------- /verl/workers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/workers/actor/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .base import BasePPOActor 16 | from .dp_actor import DataParallelPPOActor 17 | 18 | __all__ = ["BasePPOActor", "DataParallelPPOActor"] 19 | -------------------------------------------------------------------------------- /verl/workers/critic/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .base import BasePPOCritic 16 | from .dp_critic import DataParallelPPOCritic 17 | 18 | __all__ = ["BasePPOCritic", "DataParallelPPOCritic"] 19 | -------------------------------------------------------------------------------- /verl/workers/critic/base.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | Base class for a critic 16 | """ 17 | from abc import ABC, abstractmethod 18 | 19 | import torch 20 | 21 | from verl import DataProto 22 | 23 | __all__ = ['BasePPOCritic'] 24 | 25 | 26 | class BasePPOCritic(ABC): 27 | 28 | def __init__(self, config): 29 | super().__init__() 30 | self.config = config 31 | 32 | @abstractmethod 33 | def compute_values(self, data: DataProto) -> torch.Tensor: 34 | """Compute values""" 35 | pass 36 | 37 | @abstractmethod 38 | def update_critic(self, data: DataProto): 39 | """Update the critic""" 40 | pass 41 | -------------------------------------------------------------------------------- /verl/workers/reward_manager/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 PRIME team and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .naive import NaiveRewardManager 16 | from .prime import PrimeRewardManager -------------------------------------------------------------------------------- /verl/workers/reward_model/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .base import BasePPORewardModel 16 | -------------------------------------------------------------------------------- /verl/workers/reward_model/base.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | The base class for reward model 16 | """ 17 | 18 | from abc import ABC, abstractmethod 19 | 20 | from verl import DataProto 21 | 22 | 23 | class BasePPORewardModel(ABC): 24 | 25 | def __init__(self, config): 26 | self.config = config 27 | 28 | @abstractmethod 29 | def compute_reward(self, data: DataProto) -> DataProto: 30 | """Computing reward given input_ids. The transformers should output a tensor with shape 31 | [batch_size, sequence_length], and the value at [EOS] mask should be gathered. 32 | 33 | Args: 34 | data: must contain keys "input_ids", "attention_mask" and "position_ids". 35 | - input_ids: [batch_size, sequence_length] 36 | - attention_mask: [batch_size, sequence_length] 37 | - position_ids: [batch_size, sequence_length] 38 | 39 | Returns: a data pass protocol containing "reward". Only the [EOS] position contains the reward. 40 | Other position should have zero reward. Note that this may change in the future if we use 41 | dense reward. So, we leave the interface for general case. 42 | - reward: [batch_size, sequence_length]. 43 | 44 | """ 45 | pass 46 | -------------------------------------------------------------------------------- /verl/workers/reward_model/megatron/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .reward_model import MegatronRewardModel 16 | -------------------------------------------------------------------------------- /verl/workers/rollout/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .base import BaseRollout 16 | from .naive import NaiveRollout 17 | from .hf_rollout import HFRollout 18 | 19 | __all__ = ["BaseRollout", "NaiveRollout", "HFRollout"] 20 | -------------------------------------------------------------------------------- /verl/workers/rollout/base.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from abc import ABC, abstractmethod 16 | from typing import Iterable, Union 17 | 18 | from verl import DataProto 19 | 20 | __all__ = ['BaseRollout'] 21 | 22 | 23 | class BaseRollout(ABC): 24 | 25 | def __init__(self): 26 | """ 27 | 28 | Args: 29 | dataloader: an Iterable of TensorDict that consistently generates prompts. Note that the dataloader 30 | should handle when the training stops. 31 | """ 32 | super().__init__() 33 | 34 | @abstractmethod 35 | def generate_sequences(self, prompts: DataProto) -> DataProto: 36 | """Generate sequences""" 37 | pass 38 | -------------------------------------------------------------------------------- /verl/workers/rollout/naive/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .naive_rollout import NaiveRollout 16 | -------------------------------------------------------------------------------- /verl/workers/rollout/vllm_rollout/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from importlib.metadata import version, PackageNotFoundError 16 | 17 | 18 | def get_version(pkg): 19 | try: 20 | return version(pkg) 21 | except PackageNotFoundError: 22 | return None 23 | 24 | 25 | package_name = 'vllm' 26 | package_version = get_version(package_name) 27 | 28 | if package_version <= '0.6.3': 29 | vllm_mode = 'customized' 30 | from .vllm_rollout import vLLMRollout 31 | else: 32 | vllm_mode = 'spmd' 33 | from .vllm_rollout_spmd import vLLMRollout 34 | -------------------------------------------------------------------------------- /verl/workers/sharding_manager/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from verl.utils.import_utils import is_vllm_available, is_megatron_core_available 16 | 17 | from .base import BaseShardingManager 18 | from .fsdp_ulysses import FSDPUlyssesShardingManager 19 | 20 | AllGatherPPModel = None 21 | 22 | if is_megatron_core_available() and is_vllm_available(): 23 | from .megatron_vllm import AllGatherPPModel, MegatronVLLMShardingManager 24 | elif AllGatherPPModel is not None: 25 | pass 26 | else: 27 | AllGatherPPModel = None 28 | MegatronVLLMShardingManager = None 29 | 30 | if is_vllm_available(): 31 | from .fsdp_vllm import FSDPVLLMShardingManager 32 | else: 33 | FSDPVLLMShardingManager = None 34 | -------------------------------------------------------------------------------- /verl/workers/sharding_manager/base.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | Sharding manager to implement HybridEngine 16 | """ 17 | 18 | from verl import DataProto 19 | 20 | 21 | class BaseShardingManager: 22 | 23 | def __enter__(self): 24 | pass 25 | 26 | def __exit__(self, exc_type, exc_value, traceback): 27 | pass 28 | 29 | def preprocess_data(self, data: DataProto) -> DataProto: 30 | return data 31 | 32 | def postprocess_data(self, data: DataProto) -> DataProto: 33 | return data 34 | --------------------------------------------------------------------------------