├── .gitmodules ├── AgentGym-RL ├── .gitignore ├── .readthedocs.yaml ├── .style.yapf ├── Notice.txt ├── VERL_LICENSE ├── VERL_README.md ├── docs │ ├── Makefile │ ├── README.md │ ├── README_vllm0.7.md │ ├── _static │ │ └── logo.png │ ├── advance │ │ ├── dpo_extension.rst │ │ ├── fsdp_extension.rst │ │ ├── megatron_extension.rst │ │ └── placement.rst │ ├── conf.py │ ├── examples │ │ ├── config.rst │ │ ├── gsm8k_example.rst │ │ └── ppo_code_architecture.rst │ ├── experiment │ │ └── ppo.rst │ ├── faq │ │ └── faq.rst │ ├── hybrid_flow.rst │ ├── index.rst │ ├── perf │ │ └── perf_tuning.rst │ ├── preparation │ │ ├── prepare_data.rst │ │ └── reward_function.rst │ ├── requirements-docs.txt │ ├── start │ │ ├── install.rst │ │ └── quickstart.rst │ └── workers │ │ ├── fsdp_workers.rst │ │ ├── megatron_workers.rst │ │ └── ray_trainer.rst ├── pyproject.toml ├── requirements.txt ├── scripts │ ├── format.sh │ ├── model_merger.py │ ├── multiple_model_merger.py │ └── single_model_merger.py ├── setup.py └── verl │ ├── __init__.py │ ├── agent_trainer │ ├── __init__.py │ ├── config │ │ ├── evaluation.yaml │ │ ├── generation.yaml │ │ ├── ppo_trainer.yaml │ │ └── sft_trainer.yaml │ ├── fsdp_sft_trainer.py │ ├── main_eval.py │ ├── main_generation.py │ ├── main_ppo.py │ ├── ppo │ │ ├── __init__.py │ │ ├── core_algos.py │ │ └── ray_trainer.py │ └── runtime_env.yaml │ ├── models │ ├── README.md │ ├── __init__.py │ ├── llama │ │ ├── __init__.py │ │ └── megatron │ │ │ ├── __init__.py │ │ │ ├── checkpoint_utils │ │ │ ├── __init__.py │ │ │ ├── llama_loader.py │ │ │ └── llama_saver.py │ │ │ ├── layers │ │ │ ├── __init__.py │ │ │ ├── parallel_attention.py │ │ │ ├── parallel_decoder.py │ │ │ ├── parallel_linear.py │ │ │ ├── parallel_mlp.py │ │ │ └── parallel_rmsnorm.py │ │ │ └── modeling_llama_megatron.py │ ├── registry.py │ ├── transformers │ │ ├── __init__.py │ │ ├── llama.py │ │ ├── monkey_patch.py │ │ └── qwen2.py │ └── weight_loader_registry.py │ ├── protocol.py │ ├── single_controller │ ├── __init__.py │ ├── base │ │ ├── __init__.py │ │ ├── decorator.py │ │ ├── megatron │ │ │ ├── __init__.py │ │ │ ├── worker.py │ │ │ └── worker_group.py │ │ ├── register_center │ │ │ ├── __init__.py │ │ │ └── ray.py │ │ ├── worker.py │ │ └── worker_group.py │ └── ray │ │ ├── __init__.py │ │ ├── base.py │ │ └── megatron.py │ ├── third_party │ ├── __init__.py │ └── vllm │ │ ├── __init__.py │ │ ├── vllm_spmd │ │ ├── __init__.py │ │ └── dtensor_weight_loaders.py │ │ ├── vllm_v_0_3_1 │ │ ├── __init__.py │ │ ├── arg_utils.py │ │ ├── config.py │ │ ├── llm.py │ │ ├── llm_engine_sp.py │ │ ├── model_loader.py │ │ ├── model_runner.py │ │ ├── parallel_state.py │ │ ├── tokenizer.py │ │ ├── weight_loaders.py │ │ └── worker.py │ │ ├── vllm_v_0_4_2 │ │ ├── __init__.py │ │ ├── arg_utils.py │ │ ├── config.py │ │ ├── dtensor_weight_loaders.py │ │ ├── hf_weight_loader.py │ │ ├── llm.py │ │ ├── llm_engine_sp.py │ │ ├── megatron_weight_loaders.py │ │ ├── model_loader.py │ │ ├── model_runner.py │ │ ├── parallel_state.py │ │ ├── spmd_gpu_executor.py │ │ ├── tokenizer.py │ │ └── worker.py │ │ ├── vllm_v_0_5_4 │ │ ├── __init__.py │ │ ├── arg_utils.py │ │ ├── config.py │ │ ├── dtensor_weight_loaders.py │ │ ├── hf_weight_loader.py │ │ ├── llm.py │ │ ├── llm_engine_sp.py │ │ ├── megatron_weight_loaders.py │ │ ├── model_loader.py │ │ ├── model_runner.py │ │ ├── parallel_state.py │ │ ├── spmd_gpu_executor.py │ │ ├── tokenizer.py │ │ └── worker.py │ │ └── vllm_v_0_6_3 │ │ ├── __init__.py │ │ ├── arg_utils.py │ │ ├── config.py │ │ ├── dtensor_weight_loaders.py │ │ ├── hf_weight_loader.py │ │ ├── llm.py │ │ ├── llm_engine_sp.py │ │ ├── megatron_weight_loaders.py │ │ ├── model_loader.py │ │ ├── model_runner.py │ │ ├── parallel_state.py │ │ ├── spmd_gpu_executor.py │ │ ├── tokenizer.py │ │ └── worker.py │ ├── utils │ ├── __init__.py │ ├── agent_dataset │ │ ├── README.md │ │ ├── __init__.py │ │ ├── rl_dataset.py │ │ └── sft_dataset.py │ ├── agentgym │ │ └── client.py │ ├── checkpoint │ │ ├── __init__.py │ │ ├── checkpoint_manager.py │ │ └── fsdp_checkpoint_manager.py │ ├── config.py │ ├── debug │ │ ├── __init__.py │ │ ├── performance.py │ │ └── trajectory_tracker.py │ ├── distributed.py │ ├── flops_counter.py │ ├── fs.py │ ├── fsdp_utils.py │ ├── hdfs_io.py │ ├── import_utils.py │ ├── logger │ │ ├── __init__.py │ │ └── aggregate_logger.py │ ├── logging_utils.py │ ├── megatron │ │ ├── __init__.py │ │ ├── memory.py │ │ ├── optimizer.py │ │ ├── optimizer_config.py │ │ ├── pipeline_parallel.py │ │ ├── sequence_parallel.py │ │ └── tensor_parallel.py │ ├── megatron_utils.py │ ├── memory_buffer.py │ ├── model.py │ ├── py_functional.py │ ├── ray_utils.py │ ├── rendezvous │ │ ├── __init__.py │ │ └── ray_backend.py │ ├── reward_score │ │ ├── __init__.py │ │ ├── gsm8k.py │ │ ├── math.py │ │ ├── prime_code │ │ │ ├── __init__.py │ │ │ ├── testing_util.py │ │ │ └── utils.py │ │ └── prime_math │ │ │ ├── __init__.py │ │ │ ├── grader.py │ │ │ └── math_normalize.py │ ├── seqlen_balancing.py │ ├── tokenizer.py │ ├── torch_dtypes.py │ ├── torch_functional.py │ ├── tracking.py │ └── ulysses.py │ ├── version │ └── version │ └── workers │ ├── __init__.py │ ├── agent_actor │ ├── __init__.py │ ├── base.py │ └── dp_actor.py │ ├── agent_critic │ ├── __init__.py │ ├── base.py │ └── dp_critic.py │ ├── agent_fsdp_workers.py │ ├── megatron_workers.py │ ├── reward_manager │ ├── __init__.py │ ├── naive.py │ └── prime.py │ ├── reward_model │ ├── __init__.py │ ├── base.py │ └── megatron │ │ ├── __init__.py │ │ └── reward_model.py │ ├── rollout │ ├── __init__.py │ ├── agent_vllm_rollout │ │ ├── __init__.py │ │ └── vllm_rollout.py │ ├── base.py │ ├── hf_rollout.py │ ├── naive │ │ ├── __init__.py │ │ └── naive_rollout.py │ ├── schemas.py │ └── tokenizer.py │ └── sharding_manager │ ├── __init__.py │ ├── base.py │ ├── fsdp_ulysses.py │ ├── fsdp_vllm.py │ └── megatron_vllm.py ├── LICENSE ├── README.md ├── assets ├── AgentGym-RL-main.png ├── ScalingInter-RL-Method.png ├── bytedance.jpg ├── env.jpg ├── fudannlp_logo.png ├── main_greedy_performance.jpg ├── main_performance.jpg ├── pseudo.jpg ├── searchqa_performance.jpg ├── shanghai_innovation_institute_logo.png └── webarena_performance.png └── examples ├── eval ├── babyai_eval.sh ├── sciworld_eval.sh ├── searchqa_eval.sh ├── textcraft_eval.sh └── webarena_eval.sh └── train ├── AgentGym-RL ├── babyai_train.sh ├── sciworld_train.sh ├── searchqa_train.sh ├── textcraft_train.sh └── webarena_train.sh └── ScalingInter-RL ├── babyai_train.sh ├── sciworld_train.sh ├── searchqa_train.sh ├── textcraft_train.sh └── webarena_train.sh /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "AgentGym"] 2 | path = AgentGym 3 | url = https://github.com/WooooDyy/AgentGym 4 | -------------------------------------------------------------------------------- /AgentGym-RL/.gitignore: -------------------------------------------------------------------------------- 1 | **/*.pt 2 | **/checkpoints 3 | **/wget-log 4 | **/_build/ 5 | **/*.ckpt 6 | **/outputs 7 | **/*.tar.gz 8 | **/playground 9 | **/wandb 10 | 11 | # Byte-compiled / optimized / DLL files 12 | __pycache__/ 13 | *.py[cod] 14 | *$py.class 15 | dataset/* 16 | tensorflow/my_graph/* 17 | .idea/ 18 | # C extensions 19 | *.so 20 | 21 | # Distribution / packaging 22 | .Python 23 | env/ 24 | build/ 25 | develop-eggs/ 26 | dist/ 27 | downloads/ 28 | eggs/ 29 | .eggs/ 30 | lib/ 31 | lib64/ 32 | parts/ 33 | sdist/ 34 | var/ 35 | *.egg-info/ 36 | .installed.cfg 37 | *.egg 38 | 39 | # PyInstaller 40 | # Usually these files are written by a python script from a template 41 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 42 | *.manifest 43 | *.spec 44 | 45 | # Installer logs 46 | pip-log.txt 47 | pip-delete-this-directory.txt 48 | 49 | # Unit test / coverage reports 50 | htmlcov/ 51 | .tox/ 52 | .coverage 53 | .coverage.* 54 | .cache 55 | nosetests.xml 56 | coverage.xml 57 | *,cover 58 | .hypothesis/ 59 | 60 | # Translations 61 | *.mo 62 | *.pot 63 | 64 | # Django stuff: 65 | *.log 66 | local_settings.py 67 | 68 | # Flask stuff: 69 | instance/ 70 | .webassets-cache 71 | 72 | # Scrapy stuff: 73 | .scrapy 74 | 75 | # Sphinx documentation 76 | docs/_build/ 77 | 78 | # PyBuilder 79 | target/ 80 | 81 | # IPython Notebook 82 | .ipynb_checkpoints 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # celery beat schedule file 88 | celerybeat-schedule 89 | 90 | # dotenv 91 | .env 92 | 93 | # virtualenv 94 | venv/ 95 | ENV/ 96 | 97 | # Spyder project settings 98 | .spyderproject 99 | 100 | # Rope project settings 101 | .ropeproject 102 | 103 | # vscode 104 | .vscode 105 | 106 | # Mac 107 | .DS_Store 108 | 109 | # output logs 110 | tests/e2e/toy_examples/deepspeed/synchronous/output.txt 111 | 112 | # vim 113 | *.swp 114 | 115 | # ckpt 116 | *.lock -------------------------------------------------------------------------------- /AgentGym-RL/.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | # Read the Docs configuration file 2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 3 | 4 | version: 2 5 | 6 | build: 7 | os: ubuntu-22.04 8 | tools: 9 | python: "3.8" 10 | 11 | sphinx: 12 | configuration: docs/conf.py 13 | 14 | python: 15 | install: 16 | - requirements: docs/requirements-docs.txt -------------------------------------------------------------------------------- /AgentGym-RL/.style.yapf: -------------------------------------------------------------------------------- 1 | [style] 2 | based_on_style = google 3 | column_limit = 120 4 | indent_width = 4 5 | split_arguments_when_comma_terminated: true -------------------------------------------------------------------------------- /AgentGym-RL/Notice.txt: -------------------------------------------------------------------------------- 1 | Copyright 2023-2024 Bytedance Ltd. and/or its affiliates -------------------------------------------------------------------------------- /AgentGym-RL/docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SPHINXPROJ = verl 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /AgentGym-RL/docs/README.md: -------------------------------------------------------------------------------- 1 | # verl documents 2 | 3 | ## Build the docs 4 | 5 | ```bash 6 | # Install dependencies. 7 | pip install -r requirements-docs.txt 8 | 9 | # Build the docs. 10 | make clean 11 | make html 12 | ``` 13 | 14 | ## Open the docs with your browser 15 | 16 | ```bash 17 | python -m http.server -d _build/html/ 18 | ``` 19 | Launch your browser and open localhost:8000. -------------------------------------------------------------------------------- /AgentGym-RL/docs/README_vllm0.7.md: -------------------------------------------------------------------------------- 1 | # Readme for verl(vllm>=0.7) version 2 | 3 | ## Installation 4 | 5 | Note: This version of veRL supports **FSDP** for training and **vLLM** for rollout. (Megatron-LM is not supported yet.) 6 | 7 | ``` 8 | # Create the conda environment 9 | conda create -n verl python==3.10 10 | conda activate verl 11 | 12 | # Install verl 13 | git clone https://github.com/volcengine/verl.git 14 | cd verl 15 | pip3 install -e . 16 | 17 | # Install vLLM>=0.7 18 | # (Option1) pip3 install vllm --pre --extra-index-url https://wheels.vllm.ai/nightly 19 | # (Option2) pip3 install "vllm>=0.7.0" 20 | 21 | # Install flash-attn 22 | pip3 install flash-attn --no-build-isolation 23 | 24 | ``` 25 | 26 | Note that if you are installing stable versions of vLLM (Option2), you need to make some tiny patches manually on vllm (/path/to/site-packages/vllm after installation) after the above steps: 27 | 28 | - vllm/distributed/parallel_state.py: Remove the assertion below: 29 | 30 | ``` 31 | if (world_size 32 | != tensor_model_parallel_size * pipeline_model_parallel_size): 33 | raise RuntimeError( 34 | f"world_size ({world_size}) is not equal to " 35 | f"tensor_model_parallel_size ({tensor_model_parallel_size}) x " 36 | f"pipeline_model_parallel_size ({pipeline_model_parallel_size})") 37 | 38 | ``` 39 | 40 | - vllm/executor/uniproc_executor.py: change `local_rank = rank` to `local_rank = int(os.environ["LOCAL_RANK"])` 41 | - vllm/model_executor/model_loader/weight_utils.py: remove the `torch.cuda.empty_cache()` in `pt_weights_iterator` 42 | 43 | These modifications have already been merged into the main branch of vLLM. Thus nightly vLLM or building vLLM from source do not need these patches. 44 | 45 | ## Features 46 | 47 | ### Use cuda graph 48 | 49 | After installation, examples using FSDP as training backends can be used. By default, the `enforce_eager` is set to True, which disables the cuda graph. To enjoy cuda graphs and the sleep mode of vLLM>=0.7, add the following lines to the bash script: 50 | 51 | ``` 52 | actor_rollout_ref.rollout.enforce_eager=False \ 53 | actor_rollout_ref.rollout.free_cache_engine=False \ 54 | 55 | ``` 56 | 57 | For a typical job like examples/ppo_trainer/run_qwen2-7b_seq_balance.sh, the rollout generation time is 115 seconds with vLLM0.6.3, while it is 85 seconds with vLLM0.7.0. By enabling the cudagraph, the generation duration is further reduced to 62 seconds. 58 | 59 | **Note:** Currently, if the `n` is greater than 1 in `SamplingParams` in vLLM>=0.7, there is a potential performance issue on the stability of rollout generation time (Some iterations would see generation time bursts). We are working with the vLLM team to check this issue. 60 | 61 | ### Other features in vLLM 62 | 63 | 1. **num_scheduler_step>1:** not supported yet (weight loading has not been aligned with `MultiStepModelRunner`) 64 | 2. **Prefix caching:** not supported yet (vLLM sleep mode does not support prefix caching) 65 | 3. **Chunked prefill:** supported -------------------------------------------------------------------------------- /AgentGym-RL/docs/_static/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WooooDyy/AgentGym-RL/cd49e7776784c3021db63c181eee4b26f18c7ee6/AgentGym-RL/docs/_static/logo.png -------------------------------------------------------------------------------- /AgentGym-RL/docs/advance/megatron_extension.rst: -------------------------------------------------------------------------------- 1 | Add models with the Megatron-LM backend 2 | ========================================= 3 | 4 | Model 5 | ----------- 6 | 7 | The most challenging aspect to use the Megatron-LM backend is implementing 8 | the models for training. Currently, we implement Llama model that 9 | support data parallelism, tensor parallelism, pipeline parallelism (also 10 | vPP) and sequence parallelism. We also implement remove padding (sequence packing) on Llama 11 | model, which can be found in `modeling_llama_megatron.py `_. 12 | 13 | To support other model, users are required to implement: 14 | 15 | 1. Implemnt a model similar to ``modeling_llama_megatron.py`` that satisfy the 16 | parallelism requirements of Megatron-LM. Then register your model in 17 | the `registry.py `_. 18 | 2. Checkpoint utils that can load full checkpoint (e.g. huggingface 19 | checkpoint) to partitioned models during the runtime. Then register 20 | your loader to ``weight_loader_registry`` in `weight_loader_registry.py `_. 21 | 3. Weight loader that synchronize the weight from Megatron to rollout 22 | (vLLM) model. Note that both the actor model and rollout model are 23 | partitioned during runtime. So, it's advisable to map the model name 24 | in actor model implementation. Otherwise, you may need an additional 25 | name mapping and even weight transformation. The weight loader implementation 26 | is in `megatron_weight_loaders.py `_. -------------------------------------------------------------------------------- /AgentGym-RL/docs/advance/placement.rst: -------------------------------------------------------------------------------- 1 | Ray API Design Tutorial 2 | ======================================= 3 | 4 | We provide a tutorial for our Ray API design, including: 5 | 6 | - Ray basic concepts 7 | - Resource Pool and RayWorkerGroup 8 | - Data Dispatch, Execution and Collection 9 | - Initialize the RayWorkerGroup and execute the distributed computation in the given Resource Pool 10 | 11 | See details in `tutorial.ipynb `_. -------------------------------------------------------------------------------- /AgentGym-RL/docs/conf.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # Configuration file for the Sphinx documentation builder. 16 | # 17 | # This file only contains a selection of the most common options. For a full 18 | # list see the documentation: 19 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 20 | 21 | # -- Path setup -------------------------------------------------------------- 22 | 23 | # If extensions (or modules to document with autodoc) are in another directory, 24 | # add these directories to sys.path here. If the directory is relative to the 25 | # documentation root, use os.path.abspath to make it absolute, like shown here. 26 | # 27 | # import os 28 | # import sys 29 | # sys.path.insert(0, os.path.abspath('.')) 30 | 31 | 32 | # -- Project information ----------------------------------------------------- 33 | 34 | project = u'verl' 35 | # pylint: disable=W0622 36 | copyright = u'2024 ByteDance Seed Foundation MLSys Team' 37 | author = u'Guangming Sheng, Chi Zhang, Yanghua Peng, Haibin Lin' 38 | 39 | 40 | # -- General configuration --------------------------------------------------- 41 | # The master toctree document. 42 | master_doc = 'index' 43 | 44 | # Add any Sphinx extension module names here, as strings. They can be 45 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 46 | # ones. 47 | extensions = ['recommonmark', 48 | 'sphinx.ext.autosectionlabel', 49 | ] 50 | 51 | # The suffix(es) of source filenames. 52 | # You can specify multiple suffix as a list of string: 53 | source_suffix = ['.rst', 'rest', '.md'] 54 | 55 | # Add any paths that contain templates here, relative to this directory. 56 | templates_path = ['_templates'] 57 | 58 | # The language for content autogenerated by Sphinx. Refer to documentation 59 | # for a list of supported languages. 60 | # 61 | # This is also used if you do content translation via gettext catalogs. 62 | # Usually you set "language" from the command line for these cases. 63 | language = u'en' 64 | 65 | # List of patterns, relative to source directory, that match files and 66 | # directories to ignore when looking for source files. 67 | # This pattern also affects html_static_path and html_extra_path. 68 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] 69 | 70 | 71 | # -- Options for HTML output ------------------------------------------------- 72 | 73 | # The theme to use for HTML and HTML Help pages. See the documentation for 74 | # a list of builtin themes. 75 | # 76 | html_theme = 'sphinx_rtd_theme' 77 | 78 | # Add any paths that contain custom static files (such as style sheets) here, 79 | # relative to this directory. They are copied after the builtin static files, 80 | # so a file named "default.css" will overwrite the builtin "default.css". 81 | html_static_path = ['_static'] -------------------------------------------------------------------------------- /AgentGym-RL/docs/faq/faq.rst: -------------------------------------------------------------------------------- 1 | Frequently Asked Questions 2 | ==================================== 3 | 4 | Ray related 5 | ------------ 6 | 7 | How to add breakpoint for debugging with distributed Ray? 8 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 9 | 10 | Please checkout the official debugging guide from Ray: https://docs.ray.io/en/latest/ray-observability/ray-distributed-debugger.html 11 | 12 | 13 | Distributed training 14 | ------------------------ 15 | 16 | How to run multi-node post-training with Ray? 17 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 18 | 19 | You can start a ray cluster and submit a ray job, following the official guide from Ray: https://docs.ray.io/en/latest/ray-core/starting-ray.html 20 | 21 | If your cluster is managed by Slurm, please refer to the guide for deploying Ray on Slurm: https://docs.ray.io/en/latest/cluster/vms/user-guides/community/slurm.html 22 | -------------------------------------------------------------------------------- /AgentGym-RL/docs/index.rst: -------------------------------------------------------------------------------- 1 | Welcome to verl's documentation! 2 | ================================================ 3 | 4 | .. _hf_arxiv: https://arxiv.org/pdf/2409.19256 5 | 6 | verl is a flexible, efficient and production-ready RL training framework designed for large language models (LLMs) post-training. It is an open source implementation of the `HybridFlow `_ paper. 7 | 8 | verl is flexible and easy to use with: 9 | 10 | - **Easy extension of diverse RL algorithms**: The Hybrid programming model combines the strengths of single-controller and multi-controller paradigms to enable flexible representation and efficient execution of complex Post-Training dataflows. Allowing users to build RL dataflows in a few lines of code. 11 | 12 | - **Seamless integration of existing LLM infra with modular APIs**: Decouples computation and data dependencies, enabling seamless integration with existing LLM frameworks, such as PyTorch FSDP, Megatron-LM and vLLM. Moreover, users can easily extend to other LLM training and inference frameworks. 13 | 14 | - **Flexible device mapping and parallelism**: Supports various placement of models onto different sets of GPUs for efficient resource utilization and scalability across different cluster sizes. 15 | 16 | - Readily integration with popular HuggingFace models 17 | 18 | 19 | verl is fast with: 20 | 21 | - **State-of-the-art throughput**: By seamlessly integrating existing SOTA LLM training and inference frameworks, verl achieves high generation and training throughput. 22 | 23 | - **Efficient actor model resharding with 3D-HybridEngine**: Eliminates memory redundancy and significantly reduces communication overhead during transitions between training and generation phases. 24 | 25 | -------------------------------------------- 26 | 27 | .. _Contents: 28 | 29 | .. toctree:: 30 | :maxdepth: 5 31 | :caption: Quickstart 32 | 33 | start/install 34 | start/quickstart 35 | 36 | .. toctree:: 37 | :maxdepth: 4 38 | :caption: Programming guide 39 | 40 | hybrid_flow 41 | 42 | .. toctree:: 43 | :maxdepth: 5 44 | :caption: Data Preparation 45 | 46 | preparation/prepare_data 47 | preparation/reward_function 48 | 49 | .. toctree:: 50 | :maxdepth: 5 51 | :caption: Configurations 52 | 53 | examples/config 54 | 55 | .. toctree:: 56 | :maxdepth: 2 57 | :caption: PPO Example 58 | 59 | examples/ppo_code_architecture 60 | examples/gsm8k_example 61 | 62 | .. toctree:: 63 | :maxdepth: 1 64 | :caption: PPO Trainer and Workers 65 | 66 | workers/ray_trainer 67 | workers/fsdp_workers 68 | workers/megatron_workers 69 | 70 | .. toctree:: 71 | :maxdepth: 1 72 | :caption: Performance Tuning Guide 73 | 74 | perf/perf_tuning 75 | 76 | .. toctree:: 77 | :maxdepth: 1 78 | :caption: Experimental Results 79 | 80 | experiment/ppo 81 | 82 | .. toctree:: 83 | :maxdepth: 1 84 | :caption: Advance Usage and Extension 85 | 86 | advance/placement 87 | advance/dpo_extension 88 | advance/fsdp_extension 89 | advance/megatron_extension 90 | 91 | .. toctree:: 92 | :maxdepth: 1 93 | :caption: FAQ 94 | 95 | faq/faq 96 | 97 | Contribution 98 | ------------- 99 | 100 | verl is free software; you can redistribute it and/or modify it under the terms 101 | of the Apache License 2.0. We welcome contributions. 102 | Join us on `GitHub `_, `Slack `_ and `Wechat `_ for discussions. 103 | 104 | Code formatting 105 | ^^^^^^^^^^^^^^^^^^^^^^^^ 106 | We use yapf (Google style) to enforce strict code formatting when reviewing MRs. Run yapf at the top level of verl repo: 107 | 108 | .. code-block:: bash 109 | 110 | pip3 install yapf 111 | yapf -ir -vv --style ./.style.yapf verl examples tests 112 | -------------------------------------------------------------------------------- /AgentGym-RL/docs/preparation/reward_function.rst: -------------------------------------------------------------------------------- 1 | Implement Reward Function for Dataset 2 | ====================================== 3 | 4 | For each dataset, we need to implement a reward function or utilize a reward model to compute the rewards for the generated responses. 5 | We already pre-implemented some reward functions in `reward_score directory `_. 6 | 7 | Currently, we support reward functions for GSM8k and MATH datasets. For RLHF datasets (e.g., 8 | full_hh_rlhf) and Code Generation (e.g., APPS), we utilize reward model 9 | and SandBox (will opensource soon) for evaluation respectively. 10 | 11 | RewardManager 12 | ------------- 13 | 14 | In the entrypoint of the PPO Post-Training script `main_ppo.py `_, 15 | we implement a ``RewardManager`` that utilze pre-implemented reward functions to compute the scores for each response. 16 | 17 | In the ``RewardManager``, we implemented a ``__call__`` function to 18 | compute the score for each response. 19 | All the reward functions are executed by ``compute_score_fn``. 20 | The input is a ``DataProto``, which includes: 21 | 22 | - ``input_ids``, ``attention_mask``: ``input_ids`` and ``attention_mask`` after applying 23 | chat_template, including prompt and response 24 | - ``responses``: response tokens 25 | - ``ground_truth``: The ground truth string of the current prompt. 26 | Stored in ``non_tensor_batch`` in the ``DataProto``, which should be 27 | preprocessed in the parquet files. 28 | - ``data_source``: The dataset name of the current prompt. Stored in 29 | ``non_tensor_batch`` in the ``DataProto``, which should be 30 | preprocessed in the parquet files. 31 | 32 | After detokenize the responses, the responses string and the ground 33 | truth string will be input to the ``compute_score_fn`` to compute the 34 | score for each response. 35 | 36 | Reward Functions 37 | ---------------- 38 | We already pre-implemented some reward functions in `reward_score directory `_. 39 | 40 | - In the `GSM8k example `_, we 41 | force the response to output the final answer after four ####, then 42 | use string matching to compare with the ground truth. If completely 43 | correct, score 1 point; if the format is correct, score 0.1 points; if 44 | the format is incorrect, score 0 points. 45 | - In the `MATH example `_, we follow 46 | the implementation in `lm-evaluation-harness repository `_. 47 | -------------------------------------------------------------------------------- /AgentGym-RL/docs/requirements-docs.txt: -------------------------------------------------------------------------------- 1 | # markdown suport 2 | recommonmark 3 | # markdown table suport 4 | sphinx-markdown-tables 5 | 6 | # theme default rtd 7 | 8 | # crate-docs-theme 9 | sphinx-rtd-theme -------------------------------------------------------------------------------- /AgentGym-RL/pyproject.toml: -------------------------------------------------------------------------------- 1 | # ------------------------------- 2 | # build-system 3 | # ------------------------------- 4 | [build-system] 5 | requires = [ 6 | "setuptools>=61.0", 7 | "wheel" 8 | ] 9 | build-backend = "setuptools.build_meta" 10 | 11 | # ------------------------------- 12 | # project (PEP 621 metadata) 13 | # ------------------------------- 14 | [project] 15 | name = "verl" 16 | # We'll mark the version as "dynamic" because it's read from the file "verl/version/version" 17 | # (PEP 621 calls this "dynamic version"). 18 | # The actual version is specified in the [tool.setuptools.dynamic] section below. 19 | dynamic = ["version"] 20 | 21 | description = "verl: Volcano Engine Reinforcement Learning for LLM" 22 | license = {file = "LICENSE"} # or "Apache-2.0", if you prefer an SPDX identifier 23 | readme = {file = "README.md", content-type = "text/markdown"} 24 | requires-python = ">=3.8" 25 | 26 | authors = [ 27 | { name = "Bytedance - Seed - MLSys", email = "zhangchi.usc1992@bytedance.com" }, 28 | { name = "Bytedance - Seed - MLSys", email = "gmsheng@connect.hku.hk" }, 29 | ] 30 | 31 | # Dependencies corresponding to install_requires in setup.py 32 | dependencies = [ 33 | "accelerate", 34 | "codetiming", 35 | "datasets", 36 | "dill", 37 | "hydra-core", 38 | "numpy", 39 | "pandas", 40 | "peft", 41 | "pyarrow>=15.0.0", 42 | "pybind11", 43 | "pylatexenc", 44 | "ray>=2.10", 45 | "tensordict<0.6", 46 | "transformers", 47 | "vllm<=0.6.3", 48 | 'wandb', 49 | 'torch==2.4.0', 50 | ] 51 | 52 | # Optional dependencies (extras_require in setup.py) 53 | [project.optional-dependencies] 54 | test = [ 55 | "pytest", "yapf", "py-spy", 56 | ] 57 | prime = ["pyext"] 58 | gpu = ["liger-kernel", "flash-attn"] 59 | 60 | # URLs 61 | [project.urls] 62 | Homepage = "https://github.com/volcengine/verl" 63 | 64 | # ------------------------------- 65 | # tool.setuptools - Additional config 66 | # ------------------------------- 67 | [tool.setuptools] 68 | # True means `setuptools` will attempt to include all relevant files in package_data automatically. 69 | # This corresponds to `include_package_data=True` in setup.py. 70 | include-package-data = true 71 | 72 | # We read the version from a file in 'verl/version/version' 73 | [tool.setuptools.dynamic] 74 | version = {file = "verl/version/version"} 75 | 76 | # If you need to mimic `package_dir={'': '.'}`: 77 | [tool.setuptools.package-dir] 78 | "" = "." 79 | 80 | # If you need to include specific non-Python data (like YAML files or version file): 81 | # This is the rough equivalent of package_data={'': ['version/*'], 'verl': ['trainer/config/*.yaml']} 82 | [tool.setuptools.package-data] 83 | verl = [ 84 | "version/*", 85 | "trainer/config/*.yaml" 86 | ] 87 | -------------------------------------------------------------------------------- /AgentGym-RL/requirements.txt: -------------------------------------------------------------------------------- 1 | # requirements.txt records the full set of dependencies for development 2 | accelerate 3 | codetiming 4 | datasets 5 | dill 6 | flash-attn 7 | hydra-core 8 | liger-kernel 9 | numpy 10 | pandas 11 | peft 12 | pyarrow>=15.0.0 13 | pybind11 14 | pylatexenc 15 | ray 16 | tensordict<0.6 17 | transformers 18 | vllm<=0.6.3 19 | wandb 20 | -------------------------------------------------------------------------------- /AgentGym-RL/scripts/format.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | pip3 install --upgrade yapf 3 | python3 -m yapf -ir -vv --style ./.style.yapf verl tests single_controller examples 4 | -------------------------------------------------------------------------------- /AgentGym-RL/scripts/single_model_merger.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import re 16 | import os 17 | import torch 18 | import argparse 19 | from transformers import AutoConfig, AutoModelForCausalLM, AutoModelForTokenClassification 20 | 21 | import shutil 22 | 23 | 24 | if __name__ == '__main__': 25 | parser = argparse.ArgumentParser() 26 | parser.add_argument('--local_dir', required=True, type = str, help="The path for your saved model") 27 | parser.add_argument('--save_dir', default=None, type = str, help="The path to save your model") 28 | parser.add_argument("--hf_upload_path", default=False, type = str, help="The path of the huggingface repo to upload") 29 | args = parser.parse_args() 30 | 31 | assert not args.local_dir.endswith("huggingface"), "The local_dir should not end with huggingface" 32 | local_dir = args.local_dir 33 | save_dir = args.save_dir 34 | 35 | # copy rank zero to find the shape of (dp, fsdp) 36 | rank = 0 37 | world_size = 0 38 | for filename in os.listdir(local_dir): 39 | match = re.match(r"model_world_size_(\d+)_rank_0\.pt", filename) 40 | if match: 41 | world_size = match.group(1) 42 | break 43 | assert world_size, "No model file with the proper format" 44 | 45 | state_dict = torch.load(os.path.join(local_dir, f'model_world_size_{world_size}_rank_{rank}.pt'), map_location='cpu') 46 | 47 | print('Writing to local disk') 48 | if save_dir and os.path.abspath(save_dir) != os.path.abspath(local_dir): 49 | hf_path = os.path.join(save_dir, 'huggingface') 50 | shutil.copytree(os.path.join(local_dir, 'huggingface'), hf_path) 51 | else: 52 | hf_path = os.path.join(local_dir, 'huggingface') 53 | config = AutoConfig.from_pretrained(hf_path) 54 | 55 | if 'ForTokenClassification' in config.architectures[0]: 56 | auto_model = AutoModelForTokenClassification 57 | elif 'ForCausalLM' in config.architectures[0]: 58 | auto_model = AutoModelForCausalLM 59 | else: 60 | raise NotImplementedError(f'Unknown architecture {config["architectures"]}') 61 | 62 | with torch.device('meta'): 63 | model = auto_model.from_config(config, torch_dtype=torch.bfloat16) 64 | model.to_empty(device='cpu') 65 | 66 | print(f'Saving model to {hf_path}') 67 | model.save_pretrained(hf_path, state_dict=state_dict) 68 | del state_dict 69 | del model 70 | if args.hf_upload_path: 71 | # Push to hugging face 72 | from huggingface_hub import HfApi 73 | api = HfApi() 74 | api.create_repo(repo_id=args.hf_upload_path, private=False, exist_ok=True) 75 | api.upload_folder( 76 | folder_path=hf_path, 77 | repo_id=args.hf_upload_path, 78 | repo_type="model" 79 | ) 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | -------------------------------------------------------------------------------- /AgentGym-RL/setup.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # setup.py is the fallback installation script when pyproject.toml does not work 16 | from setuptools import setup, find_packages 17 | import os 18 | 19 | version_folder = os.path.dirname(os.path.join(os.path.abspath(__file__))) 20 | 21 | with open(os.path.join(version_folder, 'verl/version/version')) as f: 22 | __version__ = f.read().strip() 23 | 24 | install_requires = [ 25 | 'accelerate', 26 | 'codetiming', 27 | 'datasets', 28 | 'dill', 29 | 'hydra-core', 30 | 'numpy', 31 | 'pandas', 32 | 'peft', 33 | 'pyarrow>=15.0.0', 34 | 'pybind11', 35 | 'pylatexenc', 36 | 'ray>=2.10', 37 | 'tensordict<0.6', 38 | 'transformers', 39 | 'vllm<=0.6.3', 40 | 'wandb', 41 | ] 42 | 43 | TEST_REQUIRES = ['pytest', 'yapf', 'py-spy'] 44 | PRIME_REQUIRES = ['pyext'] 45 | GPU_REQUIRES = ['liger-kernel', 'flash-attn'] 46 | 47 | extras_require = { 48 | 'test': TEST_REQUIRES, 49 | 'prime': PRIME_REQUIRES, 50 | 'gpu': GPU_REQUIRES, 51 | } 52 | 53 | from pathlib import Path 54 | this_directory = Path(__file__).parent 55 | long_description = (this_directory / "VERL_README.md").read_text() 56 | 57 | setup( 58 | name='verl', 59 | version=__version__, 60 | package_dir={'': '.'}, 61 | packages=find_packages(where='.'), 62 | url='https://github.com/volcengine/verl', 63 | license='Apache 2.0', 64 | author='Bytedance - Seed - MLSys', 65 | author_email='zhangchi.usc1992@bytedance.com, gmsheng@connect.hku.hk', 66 | description='verl: Volcano Engine Reinforcement Learning for LLM', 67 | install_requires=install_requires, 68 | extras_require=extras_require, 69 | package_data={'': ['version/*'], 70 | 'verl': ['trainer/config/*.yaml'],}, 71 | include_package_data=True, 72 | long_description=long_description, 73 | long_description_content_type='text/markdown' 74 | ) -------------------------------------------------------------------------------- /AgentGym-RL/verl/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import os 16 | 17 | version_folder = os.path.dirname(os.path.join(os.path.abspath(__file__))) 18 | 19 | with open(os.path.join(version_folder, 'version/version')) as f: 20 | __version__ = f.read().strip() 21 | 22 | from .protocol import DataProto 23 | 24 | from .utils.logging_utils import set_basic_config 25 | import logging 26 | 27 | set_basic_config(level=logging.WARNING) 28 | -------------------------------------------------------------------------------- /AgentGym-RL/verl/agent_trainer/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /AgentGym-RL/verl/agent_trainer/config/evaluation.yaml: -------------------------------------------------------------------------------- 1 | data: 2 | path: null 3 | reward_model_key: rewards -------------------------------------------------------------------------------- /AgentGym-RL/verl/agent_trainer/config/generation.yaml: -------------------------------------------------------------------------------- 1 | trainer: 2 | nnodes: 1 3 | n_gpus_per_node: 8 4 | 5 | data: 6 | path: null 7 | max_prompt_length: 1024 8 | max_response_length: 15360 9 | prompt_key: item_id 10 | n_samples: 1 11 | output_path: null 12 | batch_size: 16 13 | 14 | agentgym: 15 | task_name: textcraft 16 | env_addr: 'http://localhost:5000' 17 | max_retries: 10 18 | max_rounds: 10 19 | timeout: 300 20 | 21 | model: 22 | path: null 23 | external_lib: null 24 | rollout: 25 | name: vllm 26 | temperature: 1.0 27 | top_k: -1 # 0 for hf rollout, -1 for vllm rollout 28 | top_p: 1 29 | prompt_length: ${data.max_prompt_length} # not use for opensource 30 | response_length: ${data.max_response_length} 31 | # for vllm rollout 32 | dtype: bfloat16 # should align with FSDP 33 | gpu_memory_utilization: 0.5 34 | ignore_eos: False 35 | enforce_eager: True 36 | free_cache_engine: True 37 | load_format: dummy_dtensor 38 | tensor_model_parallel_size: 2 39 | max_num_batched_tokens: 8192 40 | max_model_len: 32768 41 | max_num_seqs: 1024 42 | log_prob_micro_batch_size: null # will be deprecated, use log_prob_micro_batch_size_per_gpu 43 | log_prob_micro_batch_size_per_gpu: 1 44 | log_prob_use_dynamic_bsz: ${actor.use_dynamic_bsz} 45 | log_prob_max_token_len_per_gpu: ${actor.ppo_max_token_len_per_gpu} 46 | max_tokens: 1024 47 | disable_log_stats: True 48 | enable_chunked_prefill: True # may get higher throughput when set to True. When activated, Please increase max_num_batched_tokens or decrease max_model_len. 49 | # for hf rollout 50 | do_sample: True 51 | # number of responses (i.e. num sample times) 52 | n: 1 # > 1 for grpo 53 | send_interval: 1 54 | rollout_log_dir: null 55 | 56 | actor: 57 | strategy: fsdp # This is for backward-compatibility 58 | ppo_mini_batch_size: 16 59 | ppo_micro_batch_size: null # will be deprecated, use ppo_micro_batch_size_per_gpu 60 | ppo_micro_batch_size_per_gpu: 1 61 | use_dynamic_bsz: False 62 | ppo_max_token_len_per_gpu: 16384 # n * ${data.max_prompt_length} + ${data.max_response_length} 63 | grad_clip: 1.0 64 | clip_ratio: 0.2 65 | entropy_coeff: 0.001 66 | use_kl_loss: null # True for GRPO 67 | kl_loss_coef: 0.001 # for grpo 68 | kl_loss_type: low_var_kl # for grpo 69 | ppo_epochs: 1 70 | shuffle: False 71 | ulysses_sequence_parallel_size: 1 # sp size 72 | optim: 73 | lr: 1e-6 74 | lr_warmup_steps_ratio: 0. # the total steps will be injected during runtime 75 | min_lr_ratio: null # only useful for warmup with cosine 76 | warmup_style: constant # select from constant/cosine 77 | total_training_steps: -1 # must be override by program 78 | fsdp_config: 79 | wrap_policy: 80 | # transformer_layer_cls_to_wrap: None 81 | min_num_params: 0 82 | param_offload: False 83 | grad_offload: False 84 | optimizer_offload: False 85 | fsdp_size: -1 -------------------------------------------------------------------------------- /AgentGym-RL/verl/agent_trainer/config/sft_trainer.yaml: -------------------------------------------------------------------------------- 1 | data: 2 | train_batch_size: 16 3 | micro_batch_size: null # will be deprecated, use micro_batch_size_per_gpu 4 | micro_batch_size_per_gpu: 2 # this is also val batch size 5 | train_files: null 6 | prompt_key: conversations 7 | max_length: 4096 8 | truncation: right 9 | balance_dp_token: False 10 | chat_template: null 11 | model: 12 | partial_pretrain: null 13 | fsdp_config: 14 | wrap_policy: 15 | min_num_params: 0 16 | cpu_offload: False 17 | offload_params: False 18 | external_lib: null 19 | enable_gradient_checkpointing: False 20 | trust_remote_code: False 21 | lora_rank: 0 # Set to positive value to enable LoRA (e.g., 32) 22 | lora_alpha: 16 # LoRA scaling factor 23 | target_modules: all-linear # Target modules for LoRA adaptation 24 | use_liger: False 25 | optim: 26 | lr: 1e-4 27 | betas: [0.9, 0.95] 28 | weight_decay: 0.01 29 | warmup_steps_ratio: 0.1 30 | clip_grad: 1.0 31 | ulysses_sequence_parallel_size: 1 32 | use_remove_padding: False 33 | trainer: 34 | default_local_dir: null 35 | resume_path: null 36 | project_name: verl_agent_sft 37 | experiment_name: verl_agent_sft 38 | total_epochs: 1 39 | total_training_steps: null 40 | logger: ['console'] 41 | seed: 1 42 | storage_mode: local 43 | 44 | -------------------------------------------------------------------------------- /AgentGym-RL/verl/agent_trainer/main_eval.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | Offline evaluate the performance of a generated file using reward model and ground truth verifier. 16 | The input is a parquet file that contains N generated sequences and (optional) the ground truth. 17 | 18 | """ 19 | 20 | import hydra 21 | from verl.utils.fs import copy_local_path_from_hdfs 22 | import pandas as pd 23 | import numpy as np 24 | 25 | 26 | @hydra.main(config_path='config', config_name='evaluation', version_base=None) 27 | def main(config): 28 | local_path = copy_local_path_from_hdfs(config.data.path) 29 | dataset = pd.read_json(local_path) 30 | reward_model_data = dataset[config.data.reward_model_key] 31 | 32 | passes = 0 33 | avgs = 0 34 | 35 | total = len(dataset) 36 | 37 | for i in range(total): 38 | passes += np.max(reward_model_data[i]) 39 | avgs += np.mean(reward_model_data[i]) 40 | 41 | print(f'pass@{len(reward_model_data[0])}: {passes / total}') 42 | print(f'avg@{len(reward_model_data[0])}: {avgs / total}') 43 | 44 | 45 | if __name__ == '__main__': 46 | main() 47 | -------------------------------------------------------------------------------- /AgentGym-RL/verl/agent_trainer/main_ppo.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | Note that we don't combine the main with ray_trainer as ray_trainer is used by other main. 16 | """ 17 | from verl.agent_trainer.ppo.ray_trainer import RayPPOTrainer 18 | 19 | import ray 20 | import hydra 21 | 22 | 23 | @hydra.main(config_path='config', config_name='ppo_trainer', version_base=None) 24 | def main(config): 25 | run_ppo(config) 26 | 27 | 28 | def run_ppo(config): 29 | if not ray.is_initialized(): 30 | # this is for local ray cluster 31 | ray.init(runtime_env={'env_vars': {'TOKENIZERS_PARALLELISM': 'true', 'NCCL_DEBUG': 'WARN'}}) 32 | 33 | ray.get(main_task.remote(config)) 34 | 35 | 36 | @ray.remote(num_cpus=1) # please make sure main_task is not scheduled on head 37 | def main_task(config): 38 | from verl.utils.fs import copy_local_path_from_hdfs 39 | # print initial config 40 | from pprint import pprint 41 | from omegaconf import OmegaConf 42 | pprint(OmegaConf.to_container(config, resolve=True)) # resolve=True will eval symbol values 43 | OmegaConf.resolve(config) 44 | 45 | # download the checkpoint from hdfs 46 | local_path = copy_local_path_from_hdfs(config.actor_rollout_ref.model.path) 47 | 48 | # instantiate tokenizer 49 | from verl.utils import hf_tokenizer 50 | tokenizer = hf_tokenizer(local_path) 51 | 52 | # define worker classes 53 | if config.actor_rollout_ref.actor.strategy == 'fsdp': 54 | assert config.actor_rollout_ref.actor.strategy == config.critic.strategy 55 | from verl.workers.agent_fsdp_workers import ActorRolloutRefWorker, CriticWorker 56 | from verl.single_controller.ray import RayWorkerGroup 57 | ray_worker_group_cls = RayWorkerGroup 58 | 59 | else: 60 | raise NotImplementedError 61 | 62 | from verl.agent_trainer.ppo.ray_trainer import ResourcePoolManager, Role 63 | 64 | role_worker_mapping = { 65 | Role.ActorRollout: ray.remote(ActorRolloutRefWorker), 66 | Role.Critic: ray.remote(CriticWorker), 67 | Role.RefPolicy: ray.remote(ActorRolloutRefWorker) 68 | } 69 | 70 | global_pool_id = 'global_pool' 71 | resource_pool_spec = { 72 | global_pool_id: [config.trainer.n_gpus_per_node] * config.trainer.nnodes, 73 | } 74 | mapping = { 75 | Role.ActorRollout: global_pool_id, 76 | Role.Critic: global_pool_id, 77 | Role.RefPolicy: global_pool_id, 78 | } 79 | 80 | resource_pool_manager = ResourcePoolManager(resource_pool_spec=resource_pool_spec, mapping=mapping) 81 | 82 | trainer = RayPPOTrainer(config=config, 83 | tokenizer=tokenizer, 84 | role_worker_mapping=role_worker_mapping, 85 | resource_pool_manager=resource_pool_manager, 86 | ray_worker_group_cls=ray_worker_group_cls) 87 | trainer.init_workers() 88 | trainer.fit() 89 | 90 | 91 | if __name__ == '__main__': 92 | # import socket 93 | # print(socket.gethostbyname(socket.gethostname())) 94 | # socket.sethostname("localhost") 95 | # print(socket.gethostbyname(socket.gethostname())) 96 | main() 97 | -------------------------------------------------------------------------------- /AgentGym-RL/verl/agent_trainer/ppo/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /AgentGym-RL/verl/agent_trainer/runtime_env.yaml: -------------------------------------------------------------------------------- 1 | working_dir: ./ 2 | excludes: ["/.git/"] 3 | env_vars: 4 | TORCH_NCCL_AVOID_RECORD_STREAMS: "1" 5 | VLLM_ATTENTION_BACKEND: "XFORMERS" -------------------------------------------------------------------------------- /AgentGym-RL/verl/models/README.md: -------------------------------------------------------------------------------- 1 | # Models 2 | Common modelzoo such as huggingface/transformers stuggles when using Pytorch native model parallelism. Following the design principle of vLLM, we keep a simple, parallelizable, highly-optimized with packed inputs in verl. 3 | ## Adding a New Huggingface Model 4 | ### Step 1: Copy the model file from HF to verl 5 | - Add a new file under verl/models/hf 6 | - Copy ONLY the model file from huggingface/transformers/models to verl/models/hf 7 | 8 | ### Step 2: Modify the model file to use packed inputs 9 | - Remove all the code related to inference (kv cache) 10 | - Modify the inputs to include only 11 | - input_ids (total_nnz,) 12 | - cu_seqlens (total_nnz + 1,) 13 | - max_seqlen_in_batch: int 14 | - Note that this requires using flash attention with causal mask. 15 | 16 | ### Step 2.5: Add tests 17 | - Add a test to compare this version and the huggingface version 18 | - Following the infrastructure and add tests to tests/models/hf 19 | 20 | ### Step 3: Add a function to apply tensor parallelism 21 | - Please follow 22 | - https://pytorch.org/docs/stable/distributed.tensor.parallel.html 23 | - https://pytorch.org/tutorials/intermediate/TP_tutorial.html 24 | - General comments 25 | - Tensor Parallelism in native Pytorch is NOT auto-parallelism. The way it works is to specify how model parameters and input/output reshards using configs. These configs are then registered as hooks to perform input/output resharding before/after model forward. 26 | 27 | ### Step 4: Add a function to apply data parallelism 28 | - Please use FSDP2 APIs 29 | - See demo here https://github.com/pytorch/torchtitan/blob/main/torchtitan/parallelisms/parallelize_llama.py#L413 30 | 31 | ### Step 5: Add a function to apply pipeline parallelism 32 | - Comes in Pytorch 2.4 33 | - Currently only in alpha in nightly version 34 | - Check torchtitan for more details 35 | 36 | -------------------------------------------------------------------------------- /AgentGym-RL/verl/models/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /AgentGym-RL/verl/models/llama/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /AgentGym-RL/verl/models/llama/megatron/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .modeling_llama_megatron import ( 16 | # original model with megatron 17 | ParallelLlamaModel, 18 | ParallelLlamaForCausalLM, 19 | # rmpad with megatron 20 | ParallelLlamaForCausalLMRmPad, 21 | ParallelLlamaForValueRmPad, 22 | # rmpad with megatron and pipeline parallelism 23 | ParallelLlamaForCausalLMRmPadPP, 24 | ParallelLlamaForValueRmPadPP) 25 | -------------------------------------------------------------------------------- /AgentGym-RL/verl/models/llama/megatron/checkpoint_utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /AgentGym-RL/verl/models/llama/megatron/layers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .parallel_attention import ParallelLlamaAttention 16 | from .parallel_decoder import ParallelLlamaDecoderLayer, ParallelLlamaDecoderLayerRmPad 17 | from .parallel_mlp import ParallelLlamaMLP 18 | from .parallel_rmsnorm import ParallelLlamaRMSNorm 19 | -------------------------------------------------------------------------------- /AgentGym-RL/verl/models/llama/megatron/layers/parallel_linear.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # Copyright 2023 The vLLM team. 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # Adapted from https://github.com/vllm-project/vllm/blob/main/vllm/model_executor/layers/linear.py 15 | 16 | from typing import Optional, Tuple 17 | 18 | from megatron.core import tensor_parallel 19 | 20 | 21 | class QKVParallelLinear(tensor_parallel.ColumnParallelLinear): 22 | 23 | def __init__(self, 24 | input_size, 25 | num_heads, 26 | num_key_value_heads, 27 | head_dim, 28 | *, 29 | bias=True, 30 | gather_output=True, 31 | skip_bias_add=False, 32 | **kwargs): 33 | # Keep input parameters, and already restrict the head numbers 34 | self.input_size = input_size 35 | self.q_output_size = num_heads * head_dim 36 | self.kv_output_size = num_key_value_heads * head_dim 37 | self.head_dim = head_dim 38 | self.gather_output = gather_output 39 | self.skip_bias_add = skip_bias_add 40 | 41 | input_size = self.input_size 42 | output_size = (num_heads + 2 * num_key_value_heads) * self.head_dim 43 | 44 | super().__init__(input_size=input_size, 45 | output_size=output_size, 46 | bias=bias, 47 | gather_output=gather_output, 48 | skip_bias_add=skip_bias_add, 49 | **kwargs) 50 | 51 | 52 | class MergedColumnParallelLinear(tensor_parallel.ColumnParallelLinear): 53 | 54 | def __init__(self, 55 | input_size, 56 | gate_ouput_size, 57 | up_output_size, 58 | *, 59 | bias=True, 60 | gather_output=True, 61 | skip_bias_add=False, 62 | **kwargs): 63 | # Keep input parameters, and already restrict the head numbers 64 | self.input_size = input_size 65 | self.output_size = gate_ouput_size + up_output_size 66 | self.gather_output = gather_output 67 | self.skip_bias_add = skip_bias_add 68 | 69 | super().__init__(input_size=self.input_size, 70 | output_size=self.output_size, 71 | bias=bias, 72 | gather_output=gather_output, 73 | skip_bias_add=skip_bias_add, 74 | **kwargs) 75 | -------------------------------------------------------------------------------- /AgentGym-RL/verl/models/llama/megatron/layers/parallel_mlp.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # Copyright 2022 EleutherAI and the HuggingFace Inc. team. All rights reserved. 3 | # 4 | # This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX 5 | # and OPT implementations in this library. It has been modified from its 6 | # original forms to accommodate minor architectural differences compared 7 | # to GPT-NeoX and OPT used by the Meta AI team that trained the model. 8 | # 9 | # Licensed under the Apache License, Version 2.0 (the "License"); 10 | # you may not use this file except in compliance with the License. 11 | # You may obtain a copy of the License at 12 | # 13 | # http://www.apache.org/licenses/LICENSE-2.0 14 | # 15 | # Unless required by applicable law or agreed to in writing, software 16 | # distributed under the License is distributed on an "AS IS" BASIS, 17 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | # See the License for the specific language governing permissions and 19 | # limitations under the License. 20 | 21 | from megatron.core import parallel_state as mpu 22 | from megatron.core import tensor_parallel 23 | from megatron.core import ModelParallelConfig 24 | from torch import nn 25 | from transformers.activations import ACT2FN 26 | from verl.models.llama.megatron.layers.parallel_linear import MergedColumnParallelLinear 27 | 28 | from verl.utils.megatron import tensor_parallel as tp_utils 29 | 30 | 31 | class ParallelLlamaMLP(nn.Module): 32 | 33 | def __init__(self, config, megatron_config: ModelParallelConfig = None) -> None: 34 | super().__init__() 35 | self.config = config 36 | self.hidden_size = config.hidden_size 37 | self.intermediate_size = config.intermediate_size 38 | # The weight is only [hidden_size, intermediate_size // model_parallel_world_size] 39 | 40 | column_kwargs = tp_utils.get_default_kwargs_for_column_parallel_linear() 41 | row_kwargs = tp_utils.get_default_kwargs_for_row_parallel_linear() 42 | 43 | if megatron_config is not None: 44 | assert column_kwargs.get('config', False), 'must have ModelParallelConfig' 45 | assert row_kwargs.get('config', False), 'must have ModelParallelConfig' 46 | tp_utils.update_kwargs_with_config(row_kwargs, megatron_config) 47 | tp_utils.update_kwargs_with_config(column_kwargs, megatron_config) 48 | 49 | tp_size = mpu.get_tensor_model_parallel_world_size() 50 | 51 | self.gate_up_proj = MergedColumnParallelLinear( 52 | input_size=self.hidden_size, 53 | gate_ouput_size=self.intermediate_size, 54 | up_output_size=self.intermediate_size, 55 | bias=False, 56 | gather_output=False, 57 | skip_bias_add=False, 58 | **column_kwargs, 59 | ) 60 | self.gate_size = self.intermediate_size // tp_size 61 | 62 | self.down_proj = tensor_parallel.RowParallelLinear(input_size=self.intermediate_size, 63 | output_size=self.hidden_size, 64 | bias=False, 65 | input_is_parallel=True, 66 | skip_bias_add=False, 67 | **row_kwargs) 68 | 69 | self.act_fn = ACT2FN[config.hidden_act] 70 | 71 | def forward(self, x): 72 | gate_up = self.gate_up_proj(x)[0] 73 | gate, up = gate_up.split(self.gate_size, dim=-1) 74 | return self.down_proj(self.act_fn(gate) * up)[0] 75 | -------------------------------------------------------------------------------- /AgentGym-RL/verl/models/llama/megatron/layers/parallel_rmsnorm.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import numbers 16 | import torch 17 | from megatron.core import ModelParallelConfig 18 | from torch import nn 19 | from transformers import LlamaConfig 20 | 21 | from apex.normalization.fused_layer_norm import fused_rms_norm_affine 22 | from verl.utils.megatron import sequence_parallel as sp_utils 23 | 24 | 25 | class ParallelLlamaRMSNorm(nn.Module): 26 | 27 | def __init__(self, config: LlamaConfig, megatron_config: ModelParallelConfig): 28 | """ 29 | LlamaRMSNorm is equivalent to T5LayerNorm 30 | """ 31 | super().__init__() 32 | if isinstance(config.hidden_size, numbers.Integral): 33 | normalized_shape = (config.hidden_size,) 34 | self.normalized_shape = torch.Size(normalized_shape) 35 | self.weight = nn.Parameter(torch.ones(self.normalized_shape)) 36 | self.variance_epsilon = config.rms_norm_eps 37 | 38 | if megatron_config.sequence_parallel: 39 | sp_utils.mark_parameter_as_sequence_parallel(self.weight) 40 | 41 | def forward(self, hidden_states): 42 | return fused_rms_norm_affine(input=hidden_states, 43 | weight=self.weight, 44 | normalized_shape=self.normalized_shape, 45 | eps=self.variance_epsilon, 46 | memory_efficient=True) -------------------------------------------------------------------------------- /AgentGym-RL/verl/models/registry.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import importlib 16 | from typing import List, Optional, Type 17 | 18 | import torch.nn as nn 19 | 20 | # Supported models using HF Rmpad 21 | # TODO(sgm): HF may supported more than listed here, we should add more after testing 22 | from transformers import LlamaConfig, MistralConfig, GemmaConfig, Qwen2Config 23 | 24 | _REOVEPAD_MODELS = {'llama': LlamaConfig, 'mistral': MistralConfig, 'gemma': GemmaConfig, 'qwen2': Qwen2Config} 25 | 26 | 27 | def check_model_support_rmpad(model_type: str): 28 | assert isinstance(model_type, str) 29 | if not model_type in _REOVEPAD_MODELS.keys(): 30 | raise ValueError(f"Model architecture {model_type} is not supported for now. " 31 | f"RMPad supported architectures: {_REOVEPAD_MODELS.keys()}." 32 | f"Please set `use_remove_padding=False` in the model config.") 33 | 34 | 35 | # Supported models in Megatron-LM 36 | # Architecture -> (module, class). 37 | _MODELS = { 38 | "LlamaForCausalLM": 39 | ("llama", ("ParallelLlamaForCausalLMRmPadPP", "ParallelLlamaForValueRmPadPP", "ParallelLlamaForCausalLMRmPad")), 40 | "MistralForCausalLM": ("mistral", ("ParallelMistralForCausalLMRmPadPP", "ParallelMistralForValueRmPadPP", 41 | "ParallelMistralForCausalLMRmPad")) 42 | } 43 | 44 | 45 | # return model class 46 | class ModelRegistry: 47 | 48 | @staticmethod 49 | def load_model_cls(model_arch: str, value=False) -> Optional[Type[nn.Module]]: 50 | if model_arch not in _MODELS: 51 | return None 52 | 53 | megatron = "megatron" 54 | 55 | module_name, model_cls_name = _MODELS[model_arch] 56 | if not value: # actor/ref 57 | model_cls_name = model_cls_name[0] 58 | elif value: # critic/rm 59 | model_cls_name = model_cls_name[1] 60 | 61 | module = importlib.import_module(f"verl.models.{module_name}.{megatron}.modeling_{module_name}_megatron") 62 | return getattr(module, model_cls_name, None) 63 | 64 | @staticmethod 65 | def get_supported_archs() -> List[str]: 66 | return list(_MODELS.keys()) 67 | -------------------------------------------------------------------------------- /AgentGym-RL/verl/models/transformers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /AgentGym-RL/verl/models/transformers/monkey_patch.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | Apply monkey-patch function to models 16 | """ 17 | 18 | #### Open Source Models 19 | #### transformers version < 4.48 20 | 21 | 22 | def apply_monkey_patch_to_llama(): 23 | from transformers.models.llama.modeling_llama import LlamaFlashAttention2 24 | from verl.models.transformers.llama import llama_flash_attn_forward 25 | LlamaFlashAttention2.forward = llama_flash_attn_forward 26 | 27 | 28 | def apply_monkey_patch_to_qwen2(): 29 | from transformers.models.qwen2.modeling_qwen2 import Qwen2FlashAttention2 30 | from verl.models.transformers.qwen2 import qwen2_flash_attn_forward 31 | Qwen2FlashAttention2.forward = qwen2_flash_attn_forward 32 | 33 | 34 | _PATCH_NAME_TO_FUNC = { 35 | 'llama': apply_monkey_patch_to_llama, 36 | 'qwen2': apply_monkey_patch_to_qwen2, 37 | } 38 | 39 | from transformers import PretrainedConfig 40 | 41 | 42 | def apply_monkey_patch(config: PretrainedConfig, verbose=True): 43 | if not is_transformers_version_in_range("4.45.0", "4.47.1"): 44 | raise AssertionError("The installed `transformers` version doesn't support ulysses patch. " 45 | "Please install a version between 4.45.0 and 4.47.1 to use this ulysses feature.") 46 | success_apply_monkey_patch = False 47 | if config.model_type in _PATCH_NAME_TO_FUNC: 48 | _PATCH_NAME_TO_FUNC[config.model_type]() 49 | success_apply_monkey_patch = True 50 | 51 | if success_apply_monkey_patch and verbose: 52 | print(f'Applying monkey patch to model {config.model_type}') 53 | elif not success_apply_monkey_patch: 54 | raise NotImplementedError(f'Ulysses for model {config.model_type} is not implemented, \ 55 | please set `ulysses_sequence_parallel_size=1`') 56 | 57 | return success_apply_monkey_patch 58 | 59 | 60 | from functools import lru_cache 61 | from packaging import version 62 | import importlib.metadata 63 | 64 | 65 | @lru_cache() 66 | def is_transformers_version_in_range(min_version: str, max_version: str) -> bool: 67 | try: 68 | # Get the installed version of the transformers library 69 | transformers_version = importlib.metadata.version("transformers") 70 | except importlib.metadata.PackageNotFoundError: 71 | raise ModuleNotFoundError("The `transformers` package is not installed.") 72 | 73 | # Check if the version is within the specified range 74 | return version.parse(min_version) <= version.parse(transformers_version) <= version.parse(max_version) 75 | -------------------------------------------------------------------------------- /AgentGym-RL/verl/models/weight_loader_registry.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | def get_weight_loader(arch: str): 17 | from verl.models.llama.megatron.checkpoint_utils.llama_loader import load_state_dict_to_megatron_llama 18 | _MODEL_WEIGHT_MEGATRON_LOADER_REGISTRY = {'LlamaForCausalLM': load_state_dict_to_megatron_llama} 19 | 20 | if arch in _MODEL_WEIGHT_MEGATRON_LOADER_REGISTRY: 21 | return _MODEL_WEIGHT_MEGATRON_LOADER_REGISTRY[arch] 22 | raise ValueError(f"Model architectures {arch} are not supported for now. " 23 | f"Supported architectures: {_MODEL_WEIGHT_MEGATRON_LOADER_REGISTRY.keys()}") 24 | -------------------------------------------------------------------------------- /AgentGym-RL/verl/single_controller/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import os 16 | 17 | version_folder = os.path.dirname(os.path.join(os.path.abspath(__file__))) 18 | 19 | with open(os.path.join(os.path.join(version_folder, os.pardir), 'version/version')) as f: 20 | __version__ = f.read().strip() 21 | -------------------------------------------------------------------------------- /AgentGym-RL/verl/single_controller/base/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .worker import Worker 16 | from .worker_group import WorkerGroup, ClassWithInitArgs, ResourcePool 17 | -------------------------------------------------------------------------------- /AgentGym-RL/verl/single_controller/base/megatron/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /AgentGym-RL/verl/single_controller/base/megatron/worker.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import os 16 | from dataclasses import dataclass 17 | from verl.single_controller.base.worker import Worker, DistRankInfo, DistGlobalInfo 18 | 19 | 20 | class MegatronWorker(Worker): 21 | 22 | def __init__(self, cuda_visible_devices=None) -> None: 23 | super().__init__(cuda_visible_devices) 24 | 25 | def get_megatron_global_info(self): 26 | from megatron.core import parallel_state as mpu 27 | tp_size = mpu.get_tensor_model_parallel_world_size() 28 | dp_size = mpu.get_data_parallel_world_size() 29 | pp_size = mpu.get_pipeline_model_parallel_world_size() 30 | info = DistGlobalInfo(tp_size=tp_size, dp_size=dp_size, pp_size=pp_size) 31 | return info 32 | 33 | def get_megatron_rank_info(self): 34 | from megatron.core import parallel_state as mpu 35 | tp_rank = mpu.get_tensor_model_parallel_rank() 36 | dp_rank = mpu.get_data_parallel_rank() 37 | pp_rank = mpu.get_pipeline_model_parallel_rank() 38 | info = DistRankInfo(tp_rank=tp_rank, dp_rank=dp_rank, pp_rank=pp_rank) 39 | return info -------------------------------------------------------------------------------- /AgentGym-RL/verl/single_controller/base/megatron/worker_group.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from typing import Dict 16 | 17 | from .worker import DistRankInfo, DistGlobalInfo 18 | from verl.single_controller.base import ResourcePool, WorkerGroup 19 | 20 | 21 | class MegatronWorkerGroup(WorkerGroup): 22 | 23 | def __init__(self, resource_pool: ResourcePool, **kwargs): 24 | super().__init__(resource_pool=resource_pool, **kwargs) 25 | self._megatron_rank_info = None 26 | self._megatron_global_info: DistGlobalInfo = None 27 | 28 | def init_megatron(self, default_megatron_kwargs: Dict = None): 29 | raise NotImplementedError(f"MegatronWorkerGroup.init_megatron should be overwritten") 30 | 31 | def get_megatron_rank_info(self, rank: int) -> DistRankInfo: 32 | assert 0 <= rank < self.world_size, f'rank must be from [0, world_size), Got {rank}' 33 | return self._megatron_rank_info[rank] 34 | 35 | @property 36 | def tp_size(self): 37 | assert self._megatron_global_info is not None, "MegatronWorkerGroup._megatron_global_info must be initialized" 38 | return self._megatron_global_info.tp_size 39 | 40 | @property 41 | def dp_size(self): 42 | assert self._megatron_global_info is not None, "MegatronWorkerGroup._megatron_global_info must be initialized" 43 | return self._megatron_global_info.dp_size 44 | 45 | @property 46 | def pp_size(self): 47 | assert self._megatron_global_info is not None, "MegatronWorkerGroup._megatron_global_info must be initialized" 48 | return self._megatron_global_info.pp_size 49 | 50 | def get_megatron_global_info(self): 51 | return self._megatron_global_info 52 | -------------------------------------------------------------------------------- /AgentGym-RL/verl/single_controller/base/register_center/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /AgentGym-RL/verl/single_controller/base/register_center/ray.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import ray 16 | 17 | 18 | @ray.remote 19 | class WorkerGroupRegisterCenter: 20 | 21 | def __init__(self, rank_zero_info): 22 | self.rank_zero_info = rank_zero_info 23 | 24 | def get_rank_zero_info(self): 25 | return self.rank_zero_info 26 | 27 | 28 | def create_worker_group_register_center(name, info): 29 | return WorkerGroupRegisterCenter.options(name=name).remote(info) 30 | -------------------------------------------------------------------------------- /AgentGym-RL/verl/single_controller/ray/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .base import RayResourcePool, RayClassWithInitArgs, RayWorkerGroup, create_colocated_worker_cls 16 | from .megatron import (MegatronRayWorkerGroup, DistRankInfo, DistGlobalInfo) -------------------------------------------------------------------------------- /AgentGym-RL/verl/single_controller/ray/megatron.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from typing import Dict, Optional 16 | 17 | import ray 18 | 19 | from .base import RayWorkerGroup, RayResourcePool, RayClassWithInitArgs 20 | from verl.single_controller.base.megatron.worker import DistRankInfo, DistGlobalInfo 21 | from verl.single_controller.base.megatron.worker_group import MegatronWorkerGroup 22 | 23 | 24 | # NOTE(sgm): for open-source megatron-core 25 | class NVMegatronRayWorkerGroup(RayWorkerGroup, MegatronWorkerGroup): 26 | """ 27 | MegatronWorkerGroup will query each worker of its megatron rank info and store it inside the WorkerGroup 28 | so that the dispatcher can use it to dispatch data. 29 | """ 30 | 31 | def __init__(self, resource_pool: RayResourcePool, ray_cls_with_init: RayClassWithInitArgs, **kwargs): 32 | super().__init__(resource_pool=resource_pool, ray_cls_with_init=ray_cls_with_init, **kwargs) 33 | self._megatron_rank_info: DistRankInfo = self.execute_all_sync(method_name='get_megatron_rank_info') 34 | self._megatron_global_info: DistGlobalInfo = ray.get( 35 | self.execute_rank_zero_async(method_name='get_megatron_global_info')) 36 | 37 | 38 | class MegatronRayWorkerGroup(RayWorkerGroup, MegatronWorkerGroup): 39 | """ 40 | MegatronWorkerGroup will query each worker of its megatron rank info and store it inside the WorkerGroup 41 | so that the dispatcher can use it to dispatch data. 42 | """ 43 | 44 | def __init__(self, 45 | resource_pool: RayResourcePool, 46 | ray_cls_with_init: RayClassWithInitArgs, 47 | default_megatron_kwargs: Dict = None, 48 | **kwargs): 49 | super().__init__(resource_pool=resource_pool, 50 | ray_cls_with_init=ray_cls_with_init, 51 | default_megatron_kwargs=default_megatron_kwargs, 52 | **kwargs) 53 | self.init_megatron(default_megatron_kwargs=default_megatron_kwargs) 54 | self._megatron_rank_info: DistRankInfo = self.execute_all_sync(method_name='get_megatron_rank_info') 55 | self._megatron_global_info: DistGlobalInfo = ray.get( 56 | self.execute_rank_zero_async(method_name='get_megatron_global_info')) 57 | 58 | def init_megatron(self, default_megatron_kwargs: Optional[Dict] = None): 59 | # after super, we will call init of each worker 60 | if not self._is_init_with_detached_workers: 61 | # only init_megatron if the WorkerGroup is created from scratch 62 | self.execute_all_sync(method_name='init_megatron', default_megatron_kwargs=default_megatron_kwargs) 63 | -------------------------------------------------------------------------------- /AgentGym-RL/verl/third_party/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /AgentGym-RL/verl/third_party/vllm/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from importlib.metadata import version, PackageNotFoundError 16 | from packaging import version as vs 17 | 18 | 19 | def get_version(pkg): 20 | try: 21 | return version(pkg) 22 | except PackageNotFoundError: 23 | return None 24 | 25 | 26 | package_name = 'vllm' 27 | package_version = get_version(package_name) 28 | vllm_version = None 29 | 30 | if package_version == '0.3.1': 31 | vllm_version = '0.3.1' 32 | from .vllm_v_0_3_1.llm import LLM 33 | from .vllm_v_0_3_1.llm import LLMEngine 34 | from .vllm_v_0_3_1 import parallel_state 35 | elif package_version == '0.4.2': 36 | vllm_version = '0.4.2' 37 | from .vllm_v_0_4_2.llm import LLM 38 | from .vllm_v_0_4_2.llm import LLMEngine 39 | from .vllm_v_0_4_2 import parallel_state 40 | elif package_version == '0.5.4': 41 | vllm_version = '0.5.4' 42 | from .vllm_v_0_5_4.llm import LLM 43 | from .vllm_v_0_5_4.llm import LLMEngine 44 | from .vllm_v_0_5_4 import parallel_state 45 | elif package_version == '0.6.3': 46 | vllm_version = '0.6.3' 47 | from .vllm_v_0_6_3.llm import LLM 48 | from .vllm_v_0_6_3.llm import LLMEngine 49 | from .vllm_v_0_6_3 import parallel_state 50 | elif vs.parse(package_version) >= vs.parse('0.6.6.post2.dev252+g8027a724'): 51 | # From 0.6.6.post2 on, vllm supports SPMD inference 52 | # See https://github.com/vllm-project/vllm/pull/12071 53 | 54 | from vllm import LLM 55 | from vllm.distributed import parallel_state 56 | from .vllm_spmd.dtensor_weight_loaders import load_dtensor_weights 57 | else: 58 | raise ValueError( 59 | f'vllm version {package_version} not supported. Currently supported versions are 0.3.1, 0.4.2, 0.5.4, 0.6.3 and 0.7.0+' 60 | ) 61 | -------------------------------------------------------------------------------- /AgentGym-RL/verl/third_party/vllm/vllm_spmd/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /AgentGym-RL/verl/third_party/vllm/vllm_v_0_3_1/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /AgentGym-RL/verl/third_party/vllm/vllm_v_0_3_1/tokenizer.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # Copyright 2023 The vLLM team. 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # Adapted from https://github.com/vllm-project/vllm/blob/main/vllm/transformers_utils/tokenizer_group/tokenizer_group.py 15 | 16 | from typing import List, Optional, Tuple, Union 17 | 18 | from transformers import (AutoTokenizer, PreTrainedTokenizer, PreTrainedTokenizerFast) 19 | 20 | from vllm.lora.request import LoRARequest 21 | from vllm.utils import make_async, LRUCache 22 | from vllm.transformers_utils.tokenizers import * 23 | 24 | 25 | class TokenizerGroup: 26 | """A group of tokenizers that can be used for LoRA adapters.""" 27 | 28 | def __init__(self, tokenizer: PreTrainedTokenizer, enable_lora: bool, max_num_seqs: int, 29 | max_input_length: Optional[int]): 30 | self.enable_lora = enable_lora 31 | self.max_input_length = max_input_length 32 | self.tokenizer = tokenizer 33 | if enable_lora: 34 | self.lora_tokenizers = LRUCache(capacity=max_num_seqs) 35 | else: 36 | self.lora_tokenizers = None 37 | 38 | def encode(self, 39 | prompt: str, 40 | request_id: Optional[str] = None, 41 | lora_request: Optional[LoRARequest] = None) -> List[int]: 42 | tokenizer = self.get_lora_tokenizer(lora_request) 43 | return tokenizer.encode(prompt) 44 | 45 | async def encode_async(self, 46 | prompt: str, 47 | request_id: Optional[str] = None, 48 | lora_request: Optional[LoRARequest] = None) -> List[int]: 49 | tokenizer = await self.get_lora_tokenizer_async(lora_request) 50 | return tokenizer.encode(prompt) 51 | 52 | def get_lora_tokenizer(self, lora_request: Optional[LoRARequest]) -> "PreTrainedTokenizer": 53 | if not lora_request or not self.enable_lora: 54 | return self.tokenizer 55 | if lora_request.lora_int_id not in self.lora_tokenizers: 56 | # TODO(sgm): the lora tokenizer is also passed, but may be different 57 | tokenizer = self.tokenizer 58 | # tokenizer = (get_lora_tokenizer( 59 | # lora_request, **self.tokenizer_config) or self.tokenizer) 60 | self.lora_tokenizers.put(lora_request.lora_int_id, tokenizer) 61 | return tokenizer 62 | else: 63 | return self.lora_tokenizers.get(lora_request.lora_int_id) 64 | 65 | # FIXME(sgm): for simplicity, we assign the special token here 66 | @property 67 | def pad_token_id(self): 68 | return self.tokenizer.pad_token_id 69 | 70 | @property 71 | def eos_token_id(self): 72 | return self.tokenizer.eos_token_id 73 | -------------------------------------------------------------------------------- /AgentGym-RL/verl/third_party/vllm/vllm_v_0_4_2/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /AgentGym-RL/verl/third_party/vllm/vllm_v_0_4_2/tokenizer.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # Copyright 2023 The vLLM team. 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # Adapted from https://github.com/vllm-project/vllm/blob/main/vllm/transformers_utils/tokenizer_group/tokenizer_group.py 15 | 16 | from typing import List, Optional, Tuple, Union 17 | 18 | from transformers import (AutoTokenizer, PreTrainedTokenizer, PreTrainedTokenizerFast) 19 | 20 | from vllm.lora.request import LoRARequest 21 | from vllm.utils import make_async, LRUCache 22 | from vllm.transformers_utils.tokenizers import * 23 | 24 | 25 | class TokenizerGroup: 26 | """A group of tokenizers that can be used for LoRA adapters.""" 27 | 28 | def __init__(self, tokenizer: PreTrainedTokenizer, enable_lora: bool, max_num_seqs: int, 29 | max_input_length: Optional[int]): 30 | self.enable_lora = enable_lora 31 | self.max_input_length = max_input_length 32 | self.tokenizer = tokenizer 33 | self.lora_tokenizers = LRUCache[PreTrainedTokenizer](capacity=max_num_seqs) if enable_lora else None 34 | 35 | def ping(self) -> bool: 36 | """Check if the tokenizer group is alive.""" 37 | return True 38 | 39 | def get_max_input_len(self, lora_request: Optional[LoRARequest] = None) -> Optional[int]: 40 | """Get the maximum input length for the LoRA request.""" 41 | return self.max_input_length 42 | 43 | def encode(self, 44 | prompt: str, 45 | request_id: Optional[str] = None, 46 | lora_request: Optional[LoRARequest] = None) -> List[int]: 47 | tokenizer = self.get_lora_tokenizer(lora_request) 48 | return tokenizer.encode(prompt) 49 | 50 | async def encode_async(self, 51 | prompt: str, 52 | request_id: Optional[str] = None, 53 | lora_request: Optional[LoRARequest] = None) -> List[int]: 54 | tokenizer = await self.get_lora_tokenizer_async(lora_request) 55 | return tokenizer.encode(prompt) 56 | 57 | def get_lora_tokenizer(self, lora_request: Optional[LoRARequest]) -> "PreTrainedTokenizer": 58 | if not lora_request or not self.enable_lora: 59 | return self.tokenizer 60 | if lora_request.lora_int_id not in self.lora_tokenizers: 61 | # TODO(sgm): the lora tokenizer is also passed, but may be different 62 | tokenizer = self.tokenizer 63 | # tokenizer = (get_lora_tokenizer( 64 | # lora_request, **self.tokenizer_config) or self.tokenizer) 65 | self.lora_tokenizers.put(lora_request.lora_int_id, tokenizer) 66 | return tokenizer 67 | else: 68 | return self.lora_tokenizers.get(lora_request.lora_int_id) 69 | 70 | # FIXME(sgm): for simplicity, we assign the special token here 71 | @property 72 | def pad_token_id(self): 73 | return self.tokenizer.pad_token_id 74 | 75 | @property 76 | def eos_token_id(self): 77 | return self.tokenizer.eos_token_id 78 | -------------------------------------------------------------------------------- /AgentGym-RL/verl/third_party/vllm/vllm_v_0_5_4/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /AgentGym-RL/verl/third_party/vllm/vllm_v_0_5_4/hf_weight_loader.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # Copyright 2023 The vLLM team. 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # Adapted from https://github.com/vllm-project/vllm/tree/main/vllm/model_executor/models 15 | 16 | from typing import Dict, Union, Optional, Iterable, Tuple 17 | 18 | import torch 19 | import torch.nn as nn 20 | 21 | from vllm.model_executor.model_loader.utils import set_default_torch_dtype 22 | from vllm.model_executor.model_loader.weight_utils import default_weight_loader 23 | 24 | 25 | def update_hf_weight_loader(): 26 | print('no hf weight loader need to be updated') 27 | return 28 | 29 | 30 | def load_hf_weights(actor_weights: Dict, vllm_model: nn.Module): 31 | assert isinstance(actor_weights, Dict) 32 | with set_default_torch_dtype(next(vllm_model.parameters()).dtype): # TODO 33 | if vllm_model.config.tie_word_embeddings and "lm_head.weight" in actor_weights.keys(): 34 | del actor_weights["lm_head.weight"] 35 | vllm_model.load_weights(actor_weights.items()) 36 | for _, module in vllm_model.named_modules(): 37 | quant_method = getattr(module, "quant_method", None) 38 | if quant_method is not None: 39 | quant_method.process_weights_after_loading(module) 40 | # FIXME: Remove this after Mixtral is updated 41 | # to use quant_method. 42 | if hasattr(module, "process_weights_after_loading"): 43 | module.process_weights_after_loading() 44 | vllm_model = vllm_model.cuda() 45 | -------------------------------------------------------------------------------- /AgentGym-RL/verl/third_party/vllm/vllm_v_0_5_4/tokenizer.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # Copyright 2023 The vLLM team. 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # Adapted from https://github.com/vllm-project/vllm/blob/main/vllm/transformers_utils/tokenizer_group/tokenizer_group.py 15 | 16 | from typing import List, Optional, Tuple, Union 17 | 18 | from transformers import (AutoTokenizer, PreTrainedTokenizer, PreTrainedTokenizerFast) 19 | 20 | from vllm.lora.request import LoRARequest 21 | from vllm.utils import make_async, LRUCache 22 | from vllm.transformers_utils.tokenizers import * 23 | 24 | 25 | class TokenizerGroup: 26 | """A group of tokenizers that can be used for LoRA adapters.""" 27 | 28 | def __init__(self, tokenizer: PreTrainedTokenizer, enable_lora: bool, max_num_seqs: int, 29 | max_input_length: Optional[int]): 30 | self.enable_lora = enable_lora 31 | self.max_input_length = max_input_length 32 | self.tokenizer = tokenizer 33 | self.lora_tokenizers = LRUCache[PreTrainedTokenizer](capacity=max_num_seqs) if enable_lora else None 34 | 35 | def ping(self) -> bool: 36 | """Check if the tokenizer group is alive.""" 37 | return True 38 | 39 | def get_max_input_len(self, lora_request: Optional[LoRARequest] = None) -> Optional[int]: 40 | """Get the maximum input length for the LoRA request.""" 41 | return self.max_input_length 42 | 43 | def encode(self, 44 | prompt: str, 45 | request_id: Optional[str] = None, 46 | lora_request: Optional[LoRARequest] = None) -> List[int]: 47 | tokenizer = self.get_lora_tokenizer(lora_request) 48 | return tokenizer.encode(prompt) 49 | 50 | async def encode_async(self, 51 | prompt: str, 52 | request_id: Optional[str] = None, 53 | lora_request: Optional[LoRARequest] = None) -> List[int]: 54 | tokenizer = await self.get_lora_tokenizer_async(lora_request) 55 | return tokenizer.encode(prompt) 56 | 57 | def get_lora_tokenizer(self, lora_request: Optional[LoRARequest]) -> "PreTrainedTokenizer": 58 | if not lora_request or not self.enable_lora: 59 | return self.tokenizer 60 | if lora_request.lora_int_id not in self.lora_tokenizers: 61 | # TODO(sgm): the lora tokenizer is also passed, but may be different 62 | tokenizer = self.tokenizer 63 | # tokenizer = (get_lora_tokenizer( 64 | # lora_request, **self.tokenizer_config) or self.tokenizer) 65 | self.lora_tokenizers.put(lora_request.lora_int_id, tokenizer) 66 | return tokenizer 67 | else: 68 | return self.lora_tokenizers.get(lora_request.lora_int_id) 69 | 70 | # FIXME(sgm): for simplicity, we assign the special token here 71 | @property 72 | def pad_token_id(self): 73 | return self.tokenizer.pad_token_id 74 | 75 | @property 76 | def eos_token_id(self): 77 | return self.tokenizer.eos_token_id 78 | -------------------------------------------------------------------------------- /AgentGym-RL/verl/third_party/vllm/vllm_v_0_6_3/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /AgentGym-RL/verl/third_party/vllm/vllm_v_0_6_3/arg_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # Copyright 2023 The vLLM team. 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # Adapted from https://github.com/vllm-project/vllm/blob/main/vllm/engine/arg_utils.py 15 | 16 | import os 17 | from dataclasses import dataclass 18 | 19 | from transformers import PretrainedConfig 20 | from vllm.config import EngineConfig 21 | from vllm.engine.arg_utils import EngineArgs 22 | 23 | from .config import LoadConfig, ModelConfig 24 | 25 | 26 | @dataclass 27 | class EngineArgs(EngineArgs): 28 | model_hf_config: PretrainedConfig = None # for verl 29 | 30 | def __post_init__(self): 31 | pass 32 | 33 | def create_model_config(self) -> ModelConfig: 34 | return ModelConfig( 35 | hf_config=self.model_hf_config, 36 | tokenizer_mode=self.tokenizer_mode, 37 | trust_remote_code=self.trust_remote_code, 38 | dtype=self.dtype, 39 | seed=self.seed, 40 | revision=self.revision, 41 | code_revision=self.code_revision, 42 | rope_scaling=self.rope_scaling, 43 | rope_theta=self.rope_theta, 44 | tokenizer_revision=self.tokenizer_revision, 45 | max_model_len=self.max_model_len, 46 | quantization=self.quantization, 47 | quantization_param_path=self.quantization_param_path, 48 | enforce_eager=self.enforce_eager, 49 | max_context_len_to_capture=self.max_context_len_to_capture, 50 | max_seq_len_to_capture=self.max_seq_len_to_capture, 51 | max_logprobs=self.max_logprobs, 52 | disable_sliding_window=self.disable_sliding_window, 53 | skip_tokenizer_init=self.skip_tokenizer_init, 54 | served_model_name=self.served_model_name, 55 | limit_mm_per_prompt=self.limit_mm_per_prompt, 56 | use_async_output_proc=not self.disable_async_output_proc, 57 | override_neuron_config=self.override_neuron_config, 58 | config_format=self.config_format, 59 | mm_processor_kwargs=self.mm_processor_kwargs, 60 | ) 61 | 62 | def create_load_config(self) -> LoadConfig: 63 | return LoadConfig( 64 | load_format=self.load_format, 65 | download_dir=self.download_dir, 66 | model_loader_extra_config=self.model_loader_extra_config, 67 | ignore_patterns=self.ignore_patterns, 68 | ) 69 | 70 | def create_engine_config(self) -> EngineConfig: 71 | engine_config = super().create_engine_config() 72 | 73 | # NOTE[VERL]: Use the world_size set by torchrun 74 | world_size = int(os.getenv("WORLD_SIZE", "-1")) 75 | assert world_size != -1, "The world_size is set to -1, not initialized by TORCHRUN" 76 | engine_config.parallel_config.world_size = world_size 77 | 78 | return engine_config 79 | -------------------------------------------------------------------------------- /AgentGym-RL/verl/third_party/vllm/vllm_v_0_6_3/hf_weight_loader.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # Copyright 2023 The vLLM team. 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # Adapted from https://github.com/vllm-project/vllm/tree/main/vllm/model_executor/model_loader 15 | 16 | from typing import Dict 17 | 18 | import torch.nn as nn 19 | from vllm.model_executor.model_loader.utils import set_default_torch_dtype 20 | 21 | 22 | def update_hf_weight_loader(): 23 | print("no hf weight loader need to be updated") 24 | return 25 | 26 | 27 | def load_hf_weights(actor_weights: Dict, vllm_model: nn.Module): 28 | assert isinstance(actor_weights, Dict) 29 | with set_default_torch_dtype(next(vllm_model.parameters()).dtype): # TODO 30 | if vllm_model.config.tie_word_embeddings and "lm_head.weight" in actor_weights.keys(): 31 | del actor_weights["lm_head.weight"] 32 | vllm_model.load_weights(actor_weights.items()) 33 | for _, module in vllm_model.named_modules(): 34 | quant_method = getattr(module, "quant_method", None) 35 | if quant_method is not None: 36 | quant_method.process_weights_after_loading(module) 37 | # FIXME: Remove this after Mixtral is updated 38 | # to use quant_method. 39 | if hasattr(module, "process_weights_after_loading"): 40 | module.process_weights_after_loading() 41 | vllm_model = vllm_model.cuda() 42 | -------------------------------------------------------------------------------- /AgentGym-RL/verl/third_party/vllm/vllm_v_0_6_3/tokenizer.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # Copyright 2023 The vLLM team. 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # Adapted from https://github.com/vllm-project/vllm/blob/main/vllm/transformers_utils/tokenizer_group/tokenizer_group.py 15 | 16 | from typing import Optional 17 | 18 | from transformers import PreTrainedTokenizer 19 | from vllm.transformers_utils.tokenizer_group import TokenizerGroup 20 | from vllm.utils import LRUCache 21 | 22 | 23 | class TokenizerGroup(TokenizerGroup): 24 | """A group of tokenizers that can be used for LoRA adapters.""" 25 | 26 | def __init__(self, tokenizer: PreTrainedTokenizer, enable_lora: bool, max_num_seqs: int, 27 | max_input_length: Optional[int]): 28 | self.enable_lora = enable_lora 29 | self.max_input_length = max_input_length 30 | self.tokenizer = tokenizer 31 | self.lora_tokenizers = LRUCache[PreTrainedTokenizer](capacity=max_num_seqs) if enable_lora else None 32 | 33 | # FIXME(sgm): for simplicity, we assign the special token here 34 | @property 35 | def pad_token_id(self): 36 | return self.tokenizer.pad_token_id 37 | 38 | @property 39 | def eos_token_id(self): 40 | return self.tokenizer.eos_token_id 41 | -------------------------------------------------------------------------------- /AgentGym-RL/verl/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from . import tokenizer 16 | from .tokenizer import * 17 | 18 | __all__ = tokenizer.__all__ -------------------------------------------------------------------------------- /AgentGym-RL/verl/utils/agent_dataset/README.md: -------------------------------------------------------------------------------- 1 | # Dataset Format 2 | ## RLHF dataset 3 | We combine all the data sources into a single parquet files. We directly organize the prompt into the chat format so that multi-turn chats can be easily incorporated. In the prompt, we may add instruction following texts to guide the model output the answers in a particular format so that we can extract the answers. 4 | 5 | Math problems 6 | ```json 7 | { 8 | "data_source": "openai/gsm8k", 9 | "prompt": [{"role": "user", "content": "Natalia sold clips to 48 of her friends in April, and then she sold half as many clips in May. How many clips did Natalia sell altogether in April and May? Let's think step by step and output the final answer after \"####\""}], 10 | "ability": "math", 11 | "reward_model": { 12 | "style": "rule", 13 | "ground_truth": ["72"] 14 | }, 15 | } 16 | ``` 17 | -------------------------------------------------------------------------------- /AgentGym-RL/verl/utils/agent_dataset/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .rl_dataset import RLHFDataset 16 | from .sft_dataset import SFTDataset -------------------------------------------------------------------------------- /AgentGym-RL/verl/utils/agentgym/client.py: -------------------------------------------------------------------------------- 1 | from contextlib import contextmanager 2 | import os 3 | import time 4 | from agentenv.envs import ( 5 | AcademiaEnvClient, 6 | AlfWorldEnvClient, 7 | BabyAIEnvClient, 8 | MazeEnvClient, 9 | MovieEnvClient, 10 | SciworldEnvClient, 11 | SheetEnvClient, 12 | SqlGymEnvClient, 13 | TextCraftEnvClient, 14 | TodoEnvClient, 15 | WeatherEnvClient, 16 | WebarenaEnvClient, 17 | WebshopEnvClient, 18 | WordleEnvClient, 19 | SearchQAEnvClient, 20 | ) 21 | 22 | def init_env_client(args): 23 | # task_name - task dict 24 | envclient_classes = { 25 | "webshop": WebshopEnvClient, 26 | "alfworld": AlfWorldEnvClient, 27 | "babyai": BabyAIEnvClient, 28 | "sciworld": SciworldEnvClient, 29 | "textcraft": TextCraftEnvClient, 30 | "webarena": WebarenaEnvClient, 31 | "sqlgym": SqlGymEnvClient, 32 | "maze": MazeEnvClient, 33 | "wordle": WordleEnvClient, 34 | "weather": WeatherEnvClient, 35 | "todo": TodoEnvClient, 36 | "movie": MovieEnvClient, 37 | "sheet": SheetEnvClient, 38 | "academia": AcademiaEnvClient, 39 | "searchqa": SearchQAEnvClient, 40 | } 41 | # select task according to the name 42 | envclient_class = envclient_classes.get(args.task_name.lower(), None) 43 | if envclient_class is None: 44 | raise ValueError(f"Unsupported task name: {args.task_name}") 45 | retry = 0 46 | while True: 47 | try: 48 | env_client = envclient_class(env_server_base=args.env_addr, data_len=1, timeout=2400) 49 | break 50 | except Exception as e: 51 | retry += 1 52 | print(f"Failed to connect to env server, retrying...({retry}/{args.max_retries})") 53 | if retry > args.max_retries: 54 | raise e 55 | time.sleep(5) 56 | return env_client -------------------------------------------------------------------------------- /AgentGym-RL/verl/utils/checkpoint/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. -------------------------------------------------------------------------------- /AgentGym-RL/verl/utils/config.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from typing import Dict 16 | 17 | from omegaconf import DictConfig 18 | 19 | 20 | def update_dict_with_config(dictionary: Dict, config: DictConfig): 21 | for key in dictionary: 22 | if hasattr(config, key): 23 | dictionary[key] = getattr(config, key) 24 | -------------------------------------------------------------------------------- /AgentGym-RL/verl/utils/debug/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .performance import log_gpu_memory_usage -------------------------------------------------------------------------------- /AgentGym-RL/verl/utils/debug/performance.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import torch 16 | import torch.distributed as dist 17 | import logging 18 | 19 | 20 | def log_gpu_memory_usage(head: str, logger: logging.Logger = None, level=logging.DEBUG, rank: int = 0): 21 | if (not dist.is_initialized()) or (rank is None) or (dist.get_rank() == rank): 22 | memory_allocated = torch.cuda.memory_allocated() / 1024**3 23 | memory_reserved = torch.cuda.memory_reserved() / 1024**3 24 | 25 | message = f'{head}, memory allocated (GB): {memory_allocated}, memory reserved (GB): {memory_reserved}' 26 | 27 | if logger is None: 28 | print(message) 29 | else: 30 | logger.log(msg=message, level=level) 31 | -------------------------------------------------------------------------------- /AgentGym-RL/verl/utils/debug/trajectory_tracker.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | Trajectory tracker can be inserted into code to save the intermediate results. 16 | The results will be dump to hdfs for offline comparison. 17 | Each process will have a client that first move all the tensors to CPU 18 | """ 19 | 20 | from verl.utils.hdfs_io import makedirs, copy 21 | import torch 22 | import os 23 | import ray 24 | import io 25 | import tempfile 26 | 27 | from collections import deque 28 | 29 | remote_copy = ray.remote(copy) 30 | 31 | 32 | @ray.remote 33 | def save_to_hdfs(data: io.BytesIO, name, hdfs_dir, verbose): 34 | filename = name + '.pth' 35 | with tempfile.TemporaryDirectory() as tmpdirname: 36 | local_filepath = os.path.join(tmpdirname, filename) 37 | with open(local_filepath, 'wb') as f: 38 | f.write(data.getbuffer()) 39 | # upload to hdfs 40 | 41 | if verbose: 42 | print(f'Saving {local_filepath} to {hdfs_dir}') 43 | try: 44 | copy(local_filepath, hdfs_dir) 45 | except Exception as e: 46 | print(e) 47 | 48 | 49 | @ray.remote 50 | class TrajectoryTracker(): 51 | 52 | def __init__(self, hdfs_dir, verbose) -> None: 53 | self.hdfs_dir = hdfs_dir 54 | makedirs(hdfs_dir) 55 | self.verbose = verbose 56 | 57 | self.handle = deque() 58 | 59 | def dump(self, data: io.BytesIO, name): 60 | # get a temp file and write to it 61 | self.handle.append(save_to_hdfs.remote(data, name, self.hdfs_dir, self.verbose)) 62 | 63 | def wait_for_hdfs(self): 64 | while len(self.handle) != 0: 65 | future = self.handle.popleft() 66 | ray.get(future) 67 | 68 | 69 | def dump_data(data, name): 70 | enable = os.getenv('VERL_ENABLE_TRACKER', '0') == '1' 71 | if not enable: 72 | return 73 | buffer = io.BytesIO() 74 | torch.save(data, buffer) 75 | tracker = get_trajectory_tracker() 76 | ray.get(tracker.dump.remote(buffer, name)) 77 | 78 | 79 | def get_trajectory_tracker(): 80 | hdfs_dir = os.getenv('VERL_TRACKER_HDFS_DIR', default=None) 81 | verbose = os.getenv('VERL_TRACKER_VERBOSE', default='0') == '1' 82 | assert hdfs_dir is not None 83 | tracker = TrajectoryTracker.options(name="global_tracker", get_if_exists=True, 84 | lifetime="detached").remote(hdfs_dir, verbose) 85 | return tracker 86 | 87 | 88 | if __name__ == '__main__': 89 | # testing 90 | os.environ['VERL_ENABLE_TRACKER'] = '1' 91 | os.environ['VERL_TRACKER_HDFS_DIR'] = '~/debug/test' 92 | 93 | @ray.remote 94 | def process(iter): 95 | data = {'obs': torch.randn(10, 20)} 96 | dump_data(data, f'process_{iter}_obs') 97 | 98 | ray.init() 99 | 100 | output_lst = [] 101 | 102 | for i in range(10): 103 | output_lst.append(process.remote(i)) 104 | 105 | out = ray.get(output_lst) 106 | 107 | tracker = get_trajectory_tracker() 108 | ray.get(tracker.wait_for_hdfs.remote()) 109 | -------------------------------------------------------------------------------- /AgentGym-RL/verl/utils/distributed.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Utilities for distributed training.""" 15 | import os 16 | 17 | 18 | def initialize_global_process_group(timeout_second=36000): 19 | import torch.distributed 20 | from datetime import timedelta 21 | torch.distributed.init_process_group('nccl', timeout=timedelta(seconds=timeout_second)) 22 | local_rank = int(os.environ["LOCAL_RANK"]) 23 | rank = int(os.environ["RANK"]) 24 | world_size = int(os.environ["WORLD_SIZE"]) 25 | 26 | if torch.distributed.is_initialized(): 27 | torch.cuda.set_device(local_rank) 28 | return local_rank, rank, world_size 29 | -------------------------------------------------------------------------------- /AgentGym-RL/verl/utils/fs.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | # -*- coding: utf-8 -*- 17 | """File-system agnostic IO APIs""" 18 | import os 19 | import tempfile 20 | import hashlib 21 | 22 | try: 23 | from hdfs_io import copy, makedirs, exists # for internal use only 24 | except ImportError: 25 | from .hdfs_io import copy, makedirs, exists 26 | 27 | __all__ = ["copy", "exists", "makedirs"] 28 | 29 | _HDFS_PREFIX = "hdfs://" 30 | 31 | 32 | def is_non_local(path): 33 | return path.startswith(_HDFS_PREFIX) 34 | 35 | 36 | def md5_encode(path: str) -> str: 37 | return hashlib.md5(path.encode()).hexdigest() 38 | 39 | 40 | def get_local_temp_path(hdfs_path: str, cache_dir: str) -> str: 41 | """Return a local temp path that joins cache_dir and basename of hdfs_path 42 | 43 | Args: 44 | hdfs_path: 45 | cache_dir: 46 | 47 | Returns: 48 | 49 | """ 50 | # make a base64 encoding of hdfs_path to avoid directory conflict 51 | encoded_hdfs_path = md5_encode(hdfs_path) 52 | temp_dir = os.path.join(cache_dir, encoded_hdfs_path) 53 | os.makedirs(temp_dir, exist_ok=True) 54 | dst = os.path.join(temp_dir, os.path.basename(hdfs_path)) 55 | return dst 56 | 57 | 58 | def copy_local_path_from_hdfs(src: str, cache_dir=None, filelock='.file.lock', verbose=False) -> str: 59 | """Copy src from hdfs to local if src is on hdfs or directly return src. 60 | If cache_dir is None, we will use the default cache dir of the system. Note that this may cause conflicts if 61 | the src name is the same between calls 62 | 63 | Args: 64 | src (str): a HDFS path of a local path 65 | 66 | Returns: 67 | a local path of the copied file 68 | """ 69 | from filelock import FileLock 70 | 71 | assert src[-1] != '/', f'Make sure the last char in src is not / because it will cause error. Got {src}' 72 | 73 | if is_non_local(src): 74 | # download from hdfs to local 75 | if cache_dir is None: 76 | # get a temp folder 77 | cache_dir = tempfile.gettempdir() 78 | os.makedirs(cache_dir, exist_ok=True) 79 | assert os.path.exists(cache_dir) 80 | local_path = get_local_temp_path(src, cache_dir) 81 | # get a specific lock 82 | filelock = md5_encode(src) + '.lock' 83 | lock_file = os.path.join(cache_dir, filelock) 84 | with FileLock(lock_file=lock_file): 85 | if not os.path.exists(local_path): 86 | if verbose: 87 | print(f'Copy from {src} to {local_path}') 88 | copy(src, local_path) 89 | return local_path 90 | else: 91 | return src 92 | -------------------------------------------------------------------------------- /AgentGym-RL/verl/utils/import_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | Utilities to check if packages are available. 16 | We assume package availability won't change during runtime. 17 | """ 18 | 19 | from functools import cache 20 | from typing import List 21 | 22 | 23 | @cache 24 | def is_megatron_core_available(): 25 | try: 26 | from megatron.core import parallel_state as mpu 27 | return True 28 | except ImportError: 29 | return False 30 | 31 | 32 | @cache 33 | def is_vllm_available(): 34 | try: 35 | import vllm 36 | return True 37 | except ImportError: 38 | return False 39 | 40 | 41 | def import_external_libs(external_libs=None): 42 | if external_libs is None: 43 | return 44 | if not isinstance(external_libs, List): 45 | external_libs = [external_libs] 46 | import importlib 47 | for external_lib in external_libs: 48 | importlib.import_module(external_lib) 49 | -------------------------------------------------------------------------------- /AgentGym-RL/verl/utils/logger/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /AgentGym-RL/verl/utils/logger/aggregate_logger.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | A Ray logger will receive logging info from different processes. 16 | """ 17 | import numbers 18 | from typing import Dict 19 | 20 | 21 | def concat_dict_to_str(dict: Dict, step): 22 | output = [f'step:{step}'] 23 | for k, v in dict.items(): 24 | if isinstance(v, numbers.Number): 25 | output.append(f'{k}:{v:.3f}') 26 | output_str = ' - '.join(output) 27 | return output_str 28 | 29 | 30 | class LocalLogger: 31 | 32 | def __init__(self, remote_logger=None, enable_wandb=False, print_to_console=False): 33 | self.print_to_console = print_to_console 34 | if print_to_console: 35 | print('Using LocalLogger is deprecated. The constructor API will change ') 36 | 37 | def flush(self): 38 | pass 39 | 40 | def log(self, data, step): 41 | if self.print_to_console: 42 | print(concat_dict_to_str(data, step=step), flush=True) -------------------------------------------------------------------------------- /AgentGym-RL/verl/utils/logging_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import logging 16 | 17 | 18 | def set_basic_config(level): 19 | """ 20 | This function sets the global logging format and level. It will be called when import verl 21 | """ 22 | logging.basicConfig(format='%(levelname)s:%(asctime)s:%(message)s', level=level) 23 | -------------------------------------------------------------------------------- /AgentGym-RL/verl/utils/megatron/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /AgentGym-RL/verl/utils/megatron/memory.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import torch 16 | 17 | 18 | class MemoryBuffer: 19 | 20 | def __init__(self, numel, numel_padded, dtype): 21 | self.numel = numel 22 | self.numel_padded = numel_padded 23 | self.dtype = dtype 24 | self.data = torch.zeros(self.numel_padded, 25 | dtype=self.dtype, 26 | device=torch.cuda.current_device(), 27 | requires_grad=False) 28 | 29 | def zero(self): 30 | """Reset the buffer to zero.""" 31 | self.data.zero_() 32 | 33 | def get(self, shape, start_index): 34 | """Return a tensor with the input `shape` as a view into the 35 | 1-D data starting at `start_index`.""" 36 | end_index = start_index + shape.numel() 37 | assert end_index <= self.numel, \ 38 | 'requested tensor is out of the buffer range.' 39 | buffer_tensor = self.data[start_index:end_index] 40 | buffer_tensor = buffer_tensor.view(shape) 41 | return buffer_tensor 42 | -------------------------------------------------------------------------------- /AgentGym-RL/verl/utils/megatron/pipeline_parallel.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | import torch 17 | from megatron.core import parallel_state as mpu 18 | 19 | from .sequence_parallel import pad_to_sequence_parallel 20 | 21 | 22 | def compute_transformers_input_shapes(batches, meta_info): 23 | from flash_attn.bert_padding import unpad_input # flash 2 is a must for Megatron 24 | # pre-compute input shapes for each micro-batch at each pp stage 25 | input_shapes = [] 26 | for model_inputs in batches: 27 | input_ids = model_inputs['input_ids'] 28 | attention_mask = model_inputs['attention_mask'] 29 | input_ids_rmpad = unpad_input(input_ids.unsqueeze(dim=-1), attention_mask)[0] # (total_nnz, 1) 30 | if meta_info['sequence_parallel']: 31 | input_ids_rmpad = pad_to_sequence_parallel(input_ids_rmpad) 32 | # compute shapes for model_inputs 33 | input_shapes.append( 34 | torch.Size([ 35 | input_ids_rmpad.shape[0] // mpu.get_tensor_model_parallel_world_size(), 1, meta_info['hidden_size'] 36 | ])) 37 | else: 38 | # compute shapes for model_inputs 39 | input_shapes.append(torch.Size([input_ids_rmpad.shape[0], 1, meta_info['hidden_size']])) 40 | return input_shapes 41 | 42 | 43 | def make_batch_generator(batches, vpp_size): 44 | if vpp_size > 1: 45 | # has vpp 46 | batch_generator = [batches] * vpp_size # number of vpp chunks 47 | batch_generator = [iter(b) for b in batch_generator] 48 | else: 49 | # no vpp 50 | batch_generator = iter(batches) 51 | return batch_generator 52 | -------------------------------------------------------------------------------- /AgentGym-RL/verl/utils/megatron/sequence_parallel.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | import torch 17 | import torch.nn.functional as F 18 | from megatron.core import parallel_state as mpu 19 | 20 | 21 | def mark_parameter_as_sequence_parallel(parameter): 22 | setattr(parameter, 'sequence_parallel', True) 23 | 24 | 25 | def is_sequence_parallel_param(param): 26 | return hasattr(param, 'sequence_parallel') and param.sequence_parallel 27 | 28 | 29 | def pad_to_sequence_parallel(unpad_tokens: torch.Tensor): 30 | """pad the tokens such that the total length is a multiple of sp world size 31 | 32 | Args: 33 | unpad_tokens: (total_nnz, ...). Tokens after removing padding 34 | 35 | Returns: 36 | 37 | """ 38 | total_nnz = unpad_tokens.shape[0] 39 | sp_world_size = mpu.get_tensor_model_parallel_world_size() 40 | 41 | if total_nnz % sp_world_size == 0: 42 | pad_size = 0 43 | else: 44 | pad_size = sp_world_size - total_nnz % sp_world_size 45 | 46 | if pad_size > 0: 47 | if unpad_tokens.ndim == 1: 48 | unpad_tokens = F.pad(unpad_tokens, (0, pad_size)) 49 | elif unpad_tokens.ndim == 2: 50 | unpad_tokens = F.pad(unpad_tokens, (0, 0, 0, pad_size)) 51 | else: 52 | raise NotImplementedError(f'Padding dim {unpad_tokens.ndim()} is not supported') 53 | 54 | return unpad_tokens 55 | -------------------------------------------------------------------------------- /AgentGym-RL/verl/utils/py_functional.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | Contain small python utility functions 16 | """ 17 | 18 | from typing import Dict 19 | from types import SimpleNamespace 20 | 21 | 22 | def union_two_dict(dict1: Dict, dict2: Dict): 23 | """Union two dict. Will throw an error if there is an item not the same object with the same key. 24 | 25 | Args: 26 | dict1: 27 | dict2: 28 | 29 | Returns: 30 | 31 | """ 32 | for key, val in dict2.items(): 33 | if key in dict1: 34 | assert dict2[key] == dict1[key], \ 35 | f'{key} in meta_dict1 and meta_dict2 are not the same object' 36 | dict1[key] = val 37 | 38 | return dict1 39 | 40 | 41 | def append_to_dict(data: Dict, new_data: Dict): 42 | for key, val in new_data.items(): 43 | if key not in data: 44 | data[key] = [] 45 | data[key].append(val) 46 | 47 | 48 | class NestedNamespace(SimpleNamespace): 49 | 50 | def __init__(self, dictionary, **kwargs): 51 | super().__init__(**kwargs) 52 | for key, value in dictionary.items(): 53 | if isinstance(value, dict): 54 | self.__setattr__(key, NestedNamespace(value)) 55 | else: 56 | self.__setattr__(key, value) 57 | -------------------------------------------------------------------------------- /AgentGym-RL/verl/utils/ray_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | Contains commonly used utilities for ray 16 | """ 17 | 18 | import ray 19 | 20 | import concurrent.futures 21 | 22 | 23 | def parallel_put(data_list, max_workers=None): 24 | 25 | def put_data(index, data): 26 | return index, ray.put(data) 27 | 28 | if max_workers is None: 29 | max_workers = min(len(data_list), 16) 30 | 31 | with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor: 32 | data_list_f = [executor.submit(put_data, i, data) for i, data in enumerate(data_list)] 33 | res_lst = [] 34 | for future in concurrent.futures.as_completed(data_list_f): 35 | res_lst.append(future.result()) 36 | 37 | # reorder based on index 38 | output = [None for _ in range(len(data_list))] 39 | for res in res_lst: 40 | index, data_ref = res 41 | output[index] = data_ref 42 | 43 | return output 44 | -------------------------------------------------------------------------------- /AgentGym-RL/verl/utils/rendezvous/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /AgentGym-RL/verl/utils/rendezvous/ray_backend.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import logging 16 | import time 17 | 18 | from cupy.cuda.nccl import NcclCommunicator, get_unique_id 19 | 20 | import ray 21 | from ray.util import list_named_actors 22 | 23 | 24 | @ray.remote 25 | class NCCLIDStore: 26 | 27 | def __init__(self, nccl_id): 28 | self._nccl_id = nccl_id 29 | 30 | def get(self): 31 | return self._nccl_id 32 | 33 | 34 | def get_nccl_id_store_by_name(name): 35 | all_actors = list_named_actors(all_namespaces=True) 36 | matched_actors = [actor for actor in all_actors if actor.get("name", None) == name] 37 | if len(matched_actors) == 1: 38 | actor = matched_actors[0] 39 | return ray.get_actor(**actor) 40 | elif len(matched_actors) > 1: 41 | logging.warning(f"multiple actors with same name found: {matched_actors}") 42 | elif len(matched_actors) == 0: 43 | logging.info(f"failed to get any actor named {name}") 44 | return None 45 | 46 | 47 | def create_nccl_communicator_in_ray(rank: int, 48 | world_size: int, 49 | group_name: str, 50 | max_retries: int = 100, 51 | interval_s: int = 5): 52 | if rank == 0: 53 | nccl_id = get_unique_id() 54 | nccl_id_store = NCCLIDStore.options(name=group_name).remote(nccl_id) 55 | 56 | assert ray.get(nccl_id_store.get.remote()) == nccl_id 57 | communicator = NcclCommunicator( 58 | ndev=world_size, 59 | commId=nccl_id, 60 | rank=0, 61 | ) 62 | return communicator 63 | else: 64 | for i in range(max_retries): 65 | nccl_id_store = get_nccl_id_store_by_name(group_name) 66 | if nccl_id_store is not None: 67 | logging.info(f"nccl_id_store {group_name} got") 68 | nccl_id = ray.get(nccl_id_store.get.remote()) 69 | logging.info(f"nccl id for {group_name} got: {nccl_id}") 70 | communicator = NcclCommunicator( 71 | ndev=world_size, 72 | commId=nccl_id, 73 | rank=rank, 74 | ) 75 | return communicator 76 | logging.info(f"failed to get nccl_id for {i+1} time, sleep for {interval_s} seconds") 77 | time.sleep(interval_s) 78 | -------------------------------------------------------------------------------- /AgentGym-RL/verl/utils/reward_score/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # from . import gsm8k, math, prime_math, prime_code 15 | 16 | 17 | def _default_compute_score(data_source, solution_str, ground_truth): 18 | if data_source == 'openai/gsm8k': 19 | from . import gsm8k 20 | res = gsm8k.compute_score(solution_str, ground_truth) 21 | elif data_source in ['lighteval/MATH', 'DigitalLearningGmbH/MATH-lighteval']: 22 | from . import math 23 | res = math.compute_score(solution_str, ground_truth) 24 | elif data_source in [ 25 | 'numina_aops_forum', 'numina_synthetic_math', 'numina_amc_aime', 'numina_synthetic_amc', 'numina_cn_k12', 26 | 'numina_olympiads' 27 | ]: 28 | from . import prime_math 29 | res = prime_math.compute_score(solution_str, ground_truth) 30 | elif data_source in ['codecontests', 'apps', 'codeforces', 'taco']: 31 | from . import prime_code 32 | res = prime_code.compute_score(solution_str, ground_truth, continuous=True) 33 | else: 34 | raise NotImplementedError 35 | 36 | if isinstance(res, (int, float, bool)): 37 | return float(res) 38 | else: 39 | return float(res[0]) 40 | -------------------------------------------------------------------------------- /AgentGym-RL/verl/utils/reward_score/gsm8k.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import re 16 | 17 | 18 | def extract_solution(solution_str, method='strict'): 19 | assert method in ['strict', 'flexible'] 20 | 21 | if method == 'strict': 22 | # this also tests the formatting of the model 23 | solution = re.search("#### (\\-?[0-9\\.\\,]+)", solution_str) 24 | if solution is None: 25 | final_answer = None 26 | else: 27 | final_answer = solution.group(0) 28 | final_answer = final_answer.split('#### ')[1].replace(',', '').replace('$', '') 29 | elif method == 'flexible': 30 | answer = re.findall("(\\-?[0-9\\.\\,]+)", solution_str) 31 | final_answer = None 32 | if len(answer) == 0: 33 | # no reward is there is no answer 34 | pass 35 | else: 36 | invalid_str = ['', '.'] 37 | # find the last number that is not '.' 38 | for final_answer in reversed(answer): 39 | if final_answer not in invalid_str: 40 | break 41 | return final_answer 42 | 43 | 44 | def compute_score(solution_str, ground_truth, method='strict', format_score=0., score=1.): 45 | """The scoring function for GSM8k. 46 | 47 | Reference: Trung, Luong, et al. "Reft: Reasoning with reinforced fine-tuning." Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers). 2024. 48 | 49 | Args: 50 | solution_str: the solution text 51 | ground_truth: the ground truth 52 | method: the method to extract the solution, choices are 'strict' and 'flexible' 53 | format_score: the score for the format 54 | score: the score for the correct answer 55 | """ 56 | answer = extract_solution(solution_str=solution_str, method=method) 57 | if answer is None: 58 | return 0 59 | else: 60 | if answer == ground_truth: 61 | return score 62 | else: 63 | return format_score -------------------------------------------------------------------------------- /AgentGym-RL/verl/utils/reward_score/prime_code/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 PRIME team and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .utils import check_correctness as apps_check_correctness 16 | import json 17 | import re 18 | import traceback 19 | 20 | 21 | def compute_score(completion, test_cases, continuous=False): 22 | # try to get code solution from completion. if the completion is pure code, this will not take effect. 23 | solution = completion.split('```python')[-1].split('```')[0] 24 | try: 25 | try: 26 | if not isinstance(test_cases, dict): 27 | test_cases = json.loads(test_cases) 28 | except Exception as e: 29 | print(f"Error:{e}") 30 | 31 | # Complete check on all in-out pairs first. If there is no failure, per-sample test can be skipped. 32 | try: 33 | res, metadata = apps_check_correctness(in_outs=test_cases, generation=solution, timeout=5, debug=False) 34 | metadata = dict(enumerate(metadata))[0] 35 | success = all(map(lambda x: x == True, res)) 36 | if success: 37 | return success, metadata 38 | except Exception as e: 39 | pass 40 | 41 | test_cases_list = [] 42 | inputs = test_cases["inputs"] 43 | outputs = test_cases["outputs"] 44 | for i in range(len(inputs)): 45 | test_cases_list.append({"inputs": [inputs[i]], "outputs": [outputs[i]]}) 46 | 47 | if continuous: 48 | # per sample test: if continuous score is needed, test first 10 samples regardless of failures 49 | # do not test all samples cuz some problems have enormous test cases 50 | metadata_list = [] 51 | res_list = [] 52 | for test_case_id, test_case in enumerate(test_cases_list): 53 | res, metadata = apps_check_correctness(in_outs=test_case, generation=solution, timeout=5, debug=False) 54 | try: 55 | metadata = dict(enumerate(metadata))[0] # metadata can be empty occasionally 56 | except Exception as e: 57 | metadata = {} 58 | metadata["test_case"] = {} 59 | metadata["test_case"]["input"] = str(test_case["inputs"][0]) 60 | metadata["test_case"]["output"] = str(test_case["outputs"][0]) 61 | metadata["test_case"]["res"] = str(res) 62 | metadata_list.append(metadata) 63 | res_list.extend(res) 64 | 65 | if test_case_id >= 9: 66 | break 67 | res_count = len(res_list) if len(res_list) > 0 else 1 68 | success = sum(map(lambda x: x == True, res_list)) / res_count 69 | except Exception as e: 70 | traceback.print_exc(10) 71 | success = False 72 | metadata_list = None 73 | return success, metadata_list 74 | -------------------------------------------------------------------------------- /AgentGym-RL/verl/utils/reward_score/prime_code/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 PRIME team and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # Borrowed from: https://huggingface.co/spaces/codeparrot/apps_metric/blob/main/utils.py 16 | 17 | import multiprocessing 18 | from typing import Dict, Optional 19 | from datasets import load_dataset 20 | from .testing_util import run_test 21 | import traceback 22 | import os, sys 23 | 24 | 25 | def _temp_run(sample, generation, debug, result, metadata_list, timeout): 26 | with open(os.devnull, 'w') as devnull: 27 | sys.stdout = devnull 28 | sys.stderr = devnull 29 | try: 30 | res, metadata = run_test(in_outs=sample, test=generation, debug=debug, timeout=timeout) 31 | result.append(res) 32 | metadata_list.append(metadata) 33 | except Exception as e: 34 | # print(e) # some tracebacks are extremely long. 35 | traceback.print_exc(10) 36 | result.append([-1 for i in range(len(sample['inputs']))]) 37 | metadata_list.append({}) 38 | 39 | 40 | def check_correctness(in_outs: Optional[dict], generation, timeout=10, debug=True): 41 | """Check correctness of code generation with a global timeout. 42 | The global timeout is to catch some extreme/rare cases not handled by the timeouts 43 | inside `run_test`""" 44 | 45 | manager = multiprocessing.Manager() 46 | result = manager.list() 47 | metadata_list = manager.list() 48 | p = multiprocessing.Process(target=_temp_run, args=(in_outs, generation, debug, result, metadata_list, timeout)) 49 | p.start() 50 | p.join(timeout=timeout + 1) 51 | if p.is_alive(): 52 | p.kill() 53 | # p.terminate() 54 | if not result: 55 | # consider that all tests failed 56 | result = [[-1 for i in range(len(in_outs["inputs"]))]] 57 | if debug: 58 | print(f"global timeout") 59 | return result[0], metadata_list 60 | -------------------------------------------------------------------------------- /AgentGym-RL/verl/utils/tokenizer.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Utils for tokenization.""" 15 | import warnings 16 | 17 | __all__ = ['hf_tokenizer'] 18 | 19 | 20 | def set_pad_token_id(tokenizer): 21 | """Set pad_token_id to eos_token_id if it is None. 22 | 23 | Args: 24 | tokenizer (transformers.PreTrainedTokenizer): The tokenizer to be set. 25 | 26 | """ 27 | if tokenizer.pad_token_id is None: 28 | tokenizer.pad_token_id = tokenizer.eos_token_id 29 | warnings.warn(f'tokenizer.pad_token_id is None. Now set to {tokenizer.eos_token_id}') 30 | if tokenizer.pad_token is None: 31 | tokenizer.pad_token = tokenizer.eos_token 32 | warnings.warn(f'tokenizer.pad_token is None. Now set to {tokenizer.eos_token}') 33 | 34 | 35 | def hf_tokenizer(name_or_path, correct_pad_token=True, correct_gemma2=True, **kwargs): 36 | """Create a huggingface pretrained tokenizer. 37 | 38 | Args: 39 | name (str): The name of the tokenizer. 40 | correct_pad_token (bool): Whether to correct the pad token id. 41 | correct_gemma2 (bool): Whether to correct the gemma2 tokenizer. 42 | **kwargs: The keyword arguments for the tokenizer. 43 | 44 | Returns: 45 | transformers.PreTrainedTokenizer: The pretrained tokenizer. 46 | 47 | """ 48 | from transformers import AutoTokenizer 49 | if correct_gemma2 and isinstance(name_or_path, str) and 'gemma-2-2b-it' in name_or_path: 50 | # the EOS token in gemma2 is ambiguious, which may worsen RL performance. 51 | # https://huggingface.co/google/gemma-2-2b-it/commit/17a01657f5c87135bcdd0ec7abb4b2dece04408a 52 | warnings.warn('Found gemma-2-2b-it tokenizer. Set eos_token and eos_token_id to and 107.') 53 | kwargs['eos_token'] = '' 54 | kwargs['eos_token_id'] = 107 55 | tokenizer = AutoTokenizer.from_pretrained(name_or_path, **kwargs) 56 | if correct_pad_token: 57 | set_pad_token_id(tokenizer) 58 | return tokenizer -------------------------------------------------------------------------------- /AgentGym-RL/verl/utils/torch_dtypes.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | Adapted from Cruise. 16 | """ 17 | 18 | import torch 19 | 20 | from typing import Union 21 | 22 | HALF_LIST = [16, "16", "fp16", "float16"] 23 | FLOAT_LIST = [32, "32", "fp32", "float32"] 24 | BFLOAT_LIST = ["bf16", "bfloat16"] 25 | 26 | 27 | class PrecisionType(object): 28 | """Type of precision used. 29 | 30 | >>> PrecisionType.HALF == 16 31 | True 32 | >>> PrecisionType.HALF in (16, "16") 33 | True 34 | """ 35 | 36 | HALF = "16" 37 | FLOAT = "32" 38 | FULL = "64" 39 | BFLOAT = "bf16" 40 | MIXED = "mixed" 41 | 42 | @staticmethod 43 | def supported_type(precision: Union[str, int]) -> bool: 44 | return any(x == precision for x in PrecisionType) 45 | 46 | @staticmethod 47 | def supported_types() -> list[str]: 48 | return [x.value for x in PrecisionType] 49 | 50 | @staticmethod 51 | def is_fp16(precision): 52 | return precision in HALF_LIST 53 | 54 | @staticmethod 55 | def is_fp32(precision): 56 | return precision in FLOAT_LIST 57 | 58 | @staticmethod 59 | def is_bf16(precision): 60 | return precision in BFLOAT_LIST 61 | 62 | @staticmethod 63 | def to_dtype(precision): 64 | if precision in HALF_LIST: 65 | return torch.float16 66 | elif precision in FLOAT_LIST: 67 | return torch.float32 68 | elif precision in BFLOAT_LIST: 69 | return torch.bfloat16 70 | else: 71 | raise RuntimeError(f"unexpected precision: {precision}") 72 | 73 | @staticmethod 74 | def to_str(precision): 75 | if precision == torch.float16: 76 | return 'fp16' 77 | elif precision == torch.float32: 78 | return 'fp32' 79 | elif precision == torch.bfloat16: 80 | return 'bf16' 81 | else: 82 | raise RuntimeError(f"unexpected precision: {precision}") 83 | -------------------------------------------------------------------------------- /AgentGym-RL/verl/version/version: -------------------------------------------------------------------------------- 1 | 0.2.0.post2 2 | -------------------------------------------------------------------------------- /AgentGym-RL/verl/workers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /AgentGym-RL/verl/workers/agent_actor/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .base import BasePPOActor 16 | from .dp_actor import DataParallelPPOActor 17 | 18 | __all__ = ["BasePPOActor", "DataParallelPPOActor"] -------------------------------------------------------------------------------- /AgentGym-RL/verl/workers/agent_actor/base.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | The base class for Actor 16 | """ 17 | from abc import ABC, abstractmethod 18 | from typing import Iterable, Dict 19 | 20 | from verl import DataProto 21 | import torch 22 | 23 | __all__ = ['BasePPOActor'] 24 | 25 | 26 | class BasePPOActor(ABC): 27 | 28 | def __init__(self, config): 29 | """The base class for PPO actor 30 | 31 | Args: 32 | config (DictConfig): a config passed to the PPOActor. We expect the type to be 33 | DictConfig (https://omegaconf.readthedocs.io/), but it can be any namedtuple in general. 34 | """ 35 | super().__init__() 36 | self.config = config 37 | 38 | @abstractmethod 39 | def compute_log_prob(self, data: DataProto) -> torch.Tensor: 40 | """Compute logits given a batch of data. 41 | 42 | Args: 43 | data (DataProto): a batch of data represented by DataProto. It must contain key ```input_ids```, 44 | ```attention_mask``` and ```position_ids```. 45 | 46 | Returns: 47 | DataProto: a DataProto containing the key ```log_probs``` 48 | 49 | 50 | """ 51 | pass 52 | 53 | @abstractmethod 54 | def update_policy(self, data: DataProto) -> Dict: 55 | """Update the policy with an iterator of DataProto 56 | 57 | Args: 58 | data (DataProto): an iterator over the DataProto that returns by 59 | ```make_minibatch_iterator``` 60 | 61 | Returns: 62 | Dict: a dictionary contains anything. Typically, it contains the statistics during updating the model 63 | such as ```loss```, ```grad_norm```, etc,. 64 | 65 | """ 66 | pass 67 | -------------------------------------------------------------------------------- /AgentGym-RL/verl/workers/agent_critic/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .base import BasePPOCritic 16 | from .dp_critic import DataParallelPPOCritic 17 | 18 | __all__ = ["BasePPOCritic", "DataParallelPPOCritic"] -------------------------------------------------------------------------------- /AgentGym-RL/verl/workers/agent_critic/base.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | Base class for a critic 16 | """ 17 | from abc import ABC, abstractmethod 18 | 19 | import torch 20 | 21 | from verl import DataProto 22 | 23 | __all__ = ['BasePPOCritic'] 24 | 25 | 26 | class BasePPOCritic(ABC): 27 | 28 | def __init__(self, config): 29 | super().__init__() 30 | self.config = config 31 | 32 | @abstractmethod 33 | def compute_values(self, data: DataProto) -> torch.Tensor: 34 | """Compute values""" 35 | pass 36 | 37 | @abstractmethod 38 | def update_critic(self, data: DataProto): 39 | """Update the critic""" 40 | pass 41 | -------------------------------------------------------------------------------- /AgentGym-RL/verl/workers/reward_manager/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 PRIME team and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .naive import NaiveRewardManager 16 | from .prime import PrimeRewardManager -------------------------------------------------------------------------------- /AgentGym-RL/verl/workers/reward_manager/naive.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from verl import DataProto 16 | from verl.utils.reward_score import _default_compute_score 17 | import torch 18 | 19 | 20 | class NaiveRewardManager: 21 | """The reward manager. 22 | """ 23 | 24 | def __init__(self, tokenizer, num_examine, compute_score=None) -> None: 25 | self.tokenizer = tokenizer 26 | self.num_examine = num_examine # the number of batches of decoded responses to print to the console 27 | self.compute_score = compute_score or _default_compute_score 28 | 29 | def __call__(self, data: DataProto): 30 | """We will expand this function gradually based on the available datasets""" 31 | 32 | # If there is rm score, we directly return rm score. Otherwise, we compute via rm_score_fn 33 | if 'rm_scores' in data.batch.keys(): 34 | return data.batch['rm_scores'] 35 | 36 | reward_tensor = torch.zeros_like(data.batch['responses'], dtype=torch.float32) 37 | 38 | already_print_data_sources = {} 39 | 40 | for i in range(len(data)): 41 | data_item = data[i] # DataProtoItem 42 | 43 | prompt_ids = data_item.batch['prompts'] 44 | 45 | prompt_length = prompt_ids.shape[-1] 46 | 47 | valid_prompt_length = data_item.batch['attention_mask'][:prompt_length].sum() 48 | valid_prompt_ids = prompt_ids[-valid_prompt_length:] 49 | 50 | response_ids = data_item.batch['responses'] 51 | valid_response_length = data_item.batch['attention_mask'][prompt_length:].sum() 52 | valid_response_ids = response_ids[:valid_response_length] 53 | 54 | # decode 55 | sequences = torch.cat((valid_prompt_ids, valid_response_ids)) 56 | sequences_str = self.tokenizer.decode(sequences) 57 | 58 | ground_truth = data_item.non_tensor_batch['reward_model']['ground_truth'] 59 | 60 | data_source = data_item.non_tensor_batch['data_source'] 61 | 62 | score = self.compute_score( 63 | data_source=data_source, 64 | solution_str=sequences_str, 65 | ground_truth=ground_truth, 66 | ) 67 | reward_tensor[i, valid_response_length - 1] = score 68 | 69 | if data_source not in already_print_data_sources: 70 | already_print_data_sources[data_source] = 0 71 | 72 | if already_print_data_sources[data_source] < self.num_examine: 73 | already_print_data_sources[data_source] += 1 74 | print(sequences_str) 75 | 76 | return reward_tensor 77 | -------------------------------------------------------------------------------- /AgentGym-RL/verl/workers/reward_model/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .base import BasePPORewardModel 16 | -------------------------------------------------------------------------------- /AgentGym-RL/verl/workers/reward_model/base.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | The base class for reward model 16 | """ 17 | 18 | from abc import ABC, abstractmethod 19 | 20 | from verl import DataProto 21 | 22 | 23 | class BasePPORewardModel(ABC): 24 | 25 | def __init__(self, config): 26 | self.config = config 27 | 28 | @abstractmethod 29 | def compute_reward(self, data: DataProto) -> DataProto: 30 | """Computing reward given input_ids. The transformers should output a tensor with shape 31 | [batch_size, sequence_length], and the value at [EOS] mask should be gathered. 32 | 33 | Args: 34 | data: must contain keys "input_ids", "attention_mask" and "position_ids". 35 | - input_ids: [batch_size, sequence_length] 36 | - attention_mask: [batch_size, sequence_length] 37 | - position_ids: [batch_size, sequence_length] 38 | 39 | Returns: a data pass protocol containing "reward". Only the [EOS] position contains the reward. 40 | Other position should have zero reward. Note that this may change in the future if we use 41 | dense reward. So, we leave the interface for general case. 42 | - reward: [batch_size, sequence_length]. 43 | 44 | """ 45 | pass 46 | -------------------------------------------------------------------------------- /AgentGym-RL/verl/workers/reward_model/megatron/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .reward_model import MegatronRewardModel 16 | -------------------------------------------------------------------------------- /AgentGym-RL/verl/workers/rollout/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .base import BaseRollout 16 | from .naive import NaiveRollout 17 | from .hf_rollout import HFRollout 18 | 19 | __all__ = ["BaseRollout", "NaiveRollout", "HFRollout"] 20 | -------------------------------------------------------------------------------- /AgentGym-RL/verl/workers/rollout/agent_vllm_rollout/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from importlib.metadata import version, PackageNotFoundError 16 | 17 | 18 | def get_version(pkg): 19 | try: 20 | return version(pkg) 21 | except PackageNotFoundError: 22 | return None 23 | 24 | 25 | package_name = 'vllm' 26 | package_version = get_version(package_name) 27 | 28 | if package_version <= '0.6.3': 29 | vllm_mode = 'customized' 30 | from .vllm_rollout import vLLMRollout 31 | # else: 32 | # vllm_mode = 'spmd' 33 | # from .vllm_rollout_spmd import vLLMRollout -------------------------------------------------------------------------------- /AgentGym-RL/verl/workers/rollout/base.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from abc import ABC, abstractmethod 16 | from typing import Iterable, Union 17 | 18 | from verl import DataProto 19 | 20 | __all__ = ['BaseRollout'] 21 | 22 | 23 | class BaseRollout(ABC): 24 | 25 | def __init__(self): 26 | """ 27 | 28 | Args: 29 | dataloader: an Iterable of TensorDict that consistently generates prompts. Note that the dataloader 30 | should handle when the training stops. 31 | """ 32 | super().__init__() 33 | 34 | @abstractmethod 35 | def generate_sequences(self, prompts: DataProto) -> DataProto: 36 | """Generate sequences""" 37 | pass 38 | -------------------------------------------------------------------------------- /AgentGym-RL/verl/workers/rollout/naive/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .naive_rollout import NaiveRollout 16 | -------------------------------------------------------------------------------- /AgentGym-RL/verl/workers/sharding_manager/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from verl.utils.import_utils import is_vllm_available, is_megatron_core_available 16 | 17 | from .base import BaseShardingManager 18 | from .fsdp_ulysses import FSDPUlyssesShardingManager 19 | 20 | AllGatherPPModel = None 21 | 22 | if is_megatron_core_available() and is_vllm_available(): 23 | from .megatron_vllm import AllGatherPPModel, MegatronVLLMShardingManager 24 | elif AllGatherPPModel is not None: 25 | pass 26 | else: 27 | AllGatherPPModel = None 28 | MegatronVLLMShardingManager = None 29 | 30 | if is_vllm_available(): 31 | from .fsdp_vllm import FSDPVLLMShardingManager 32 | else: 33 | FSDPVLLMShardingManager = None 34 | -------------------------------------------------------------------------------- /AgentGym-RL/verl/workers/sharding_manager/base.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | Sharding manager to implement HybridEngine 16 | """ 17 | 18 | from verl import DataProto 19 | 20 | 21 | class BaseShardingManager: 22 | 23 | def __enter__(self): 24 | pass 25 | 26 | def __exit__(self, exc_type, exc_value, traceback): 27 | pass 28 | 29 | def preprocess_data(self, data: DataProto) -> DataProto: 30 | return data 31 | 32 | def postprocess_data(self, data: DataProto) -> DataProto: 33 | return data 34 | -------------------------------------------------------------------------------- /AgentGym-RL/verl/workers/sharding_manager/fsdp_ulysses.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | Contains a resharding manager that binds weights from FSDP zero3 to XPerfGPT 16 | """ 17 | from .base import BaseShardingManager 18 | 19 | from torch.distributed.device_mesh import DeviceMesh 20 | 21 | from verl.utils.torch_functional import allgather_dict_tensors 22 | from verl.utils.ulysses import set_ulysses_sequence_parallel_group, get_ulysses_sequence_parallel_group 23 | import numpy as np 24 | 25 | import torch 26 | import torch.distributed 27 | 28 | from verl import DataProto 29 | 30 | 31 | class FSDPUlyssesShardingManager(BaseShardingManager): 32 | """ 33 | Sharding manager to support data resharding when using FSDP + Ulysses 34 | """ 35 | 36 | def __init__(self, device_mesh: DeviceMesh): 37 | super().__init__() 38 | self.device_mesh = device_mesh 39 | self.seed_offset = 12345 40 | 41 | def __enter__(self): 42 | if self.device_mesh is not None: 43 | # We have a global SP group 44 | # so we have to change to use model-specific sp group 45 | self.prev_sp_group = get_ulysses_sequence_parallel_group() 46 | set_ulysses_sequence_parallel_group(self.device_mesh['sp'].get_group()) 47 | # TODO: check how to set seed for each model 48 | 49 | def __exit__(self, exc_type, exc_value, traceback): 50 | # restore random states 51 | if self.device_mesh is not None: 52 | # revert to previous sp group 53 | set_ulysses_sequence_parallel_group(self.prev_sp_group) 54 | # TODO: check how to set seed for each model 55 | 56 | def preprocess_data(self, data: DataProto) -> DataProto: 57 | """ 58 | AllGather data from sp region 59 | This is because the data is first sharded along the FSDP dimension as we utilize the DP_COMPUTE 60 | In Ulysses, we need to make sure the same data is used across a SP group 61 | """ 62 | if self.device_mesh is not None: 63 | sp_size = self.device_mesh['sp'].size() 64 | group = self.device_mesh['sp'].get_group() 65 | 66 | prev_device = data.batch.device 67 | data.batch = data.batch.cuda(device=torch.cuda.current_device()) 68 | data.batch = allgather_dict_tensors(data.batch.contiguous(), size=sp_size, group=group, dim=0) 69 | data.batch = data.batch.to(prev_device) 70 | # all gather non_tensor_batch 71 | all_non_tensor_batch = [None for _ in range(sp_size)] 72 | torch.distributed.all_gather_object(all_non_tensor_batch, data.non_tensor_batch, group=group) 73 | data.non_tensor_batch = { 74 | k: np.concatenate([d[k] for d in all_non_tensor_batch]) for k in data.non_tensor_batch 75 | } 76 | return data 77 | 78 | def postprocess_data(self, data: DataProto) -> DataProto: 79 | """ 80 | Split the data to follow FSDP partition 81 | """ 82 | if self.device_mesh is not None: 83 | sp_size = self.device_mesh['sp'].size() 84 | sp_rank = self.device_mesh['sp'].get_local_rank() 85 | data = data.chunk(chunks=sp_size)[sp_rank] 86 | return data -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 FudanNLP-Agent 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /assets/AgentGym-RL-main.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WooooDyy/AgentGym-RL/cd49e7776784c3021db63c181eee4b26f18c7ee6/assets/AgentGym-RL-main.png -------------------------------------------------------------------------------- /assets/ScalingInter-RL-Method.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WooooDyy/AgentGym-RL/cd49e7776784c3021db63c181eee4b26f18c7ee6/assets/ScalingInter-RL-Method.png -------------------------------------------------------------------------------- /assets/bytedance.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WooooDyy/AgentGym-RL/cd49e7776784c3021db63c181eee4b26f18c7ee6/assets/bytedance.jpg -------------------------------------------------------------------------------- /assets/env.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WooooDyy/AgentGym-RL/cd49e7776784c3021db63c181eee4b26f18c7ee6/assets/env.jpg -------------------------------------------------------------------------------- /assets/fudannlp_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WooooDyy/AgentGym-RL/cd49e7776784c3021db63c181eee4b26f18c7ee6/assets/fudannlp_logo.png -------------------------------------------------------------------------------- /assets/main_greedy_performance.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WooooDyy/AgentGym-RL/cd49e7776784c3021db63c181eee4b26f18c7ee6/assets/main_greedy_performance.jpg -------------------------------------------------------------------------------- /assets/main_performance.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WooooDyy/AgentGym-RL/cd49e7776784c3021db63c181eee4b26f18c7ee6/assets/main_performance.jpg -------------------------------------------------------------------------------- /assets/pseudo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WooooDyy/AgentGym-RL/cd49e7776784c3021db63c181eee4b26f18c7ee6/assets/pseudo.jpg -------------------------------------------------------------------------------- /assets/searchqa_performance.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WooooDyy/AgentGym-RL/cd49e7776784c3021db63c181eee4b26f18c7ee6/assets/searchqa_performance.jpg -------------------------------------------------------------------------------- /assets/shanghai_innovation_institute_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WooooDyy/AgentGym-RL/cd49e7776784c3021db63c181eee4b26f18c7ee6/assets/shanghai_innovation_institute_logo.png -------------------------------------------------------------------------------- /assets/webarena_performance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WooooDyy/AgentGym-RL/cd49e7776784c3021db63c181eee4b26f18c7ee6/assets/webarena_performance.png -------------------------------------------------------------------------------- /examples/eval/babyai_eval.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | export VLLM_USE_MODELSCOPE=0 3 | export VLLM_WORKER_MULTIPROC_METHOD=spawn 4 | export VLLM_ATTENTION_BACKEND=XFORMERS 5 | 6 | task_name="babyai" 7 | 8 | cd AgentGym-RL 9 | source activate 10 | conda activate agentgym-rl 11 | export VLLM_ATTENTION_BACKEND=XFORMERS 12 | 13 | env_server_url="http://127.0.0.1:36005" 14 | 15 | sample_num=1 16 | max_rounds=20 17 | 18 | ckpt_path="global_step_150/actor" 19 | model_path=${ckpt_path}/huggingface 20 | 21 | cd AgentGym-RL/scripts 22 | python model_merger.py \ 23 | --local_dir ${ckpt_path} 24 | 25 | HYDRA_FULL_ERROR=1 python3 -m verl.agent_trainer.main_generation \ 26 | data.path=AgentEval/${task_name} \ 27 | data.max_prompt_length=512 \ 28 | data.max_response_length=8192 \ 29 | data.n_samples=${sample_num} \ 30 | data.batch_size=32 \ 31 | agentgym.task_name=${task_name} \ 32 | agentgym.env_addr=${env_server_url} \ 33 | agentgym.max_rounds=${max_rounds} \ 34 | agentgym.timeout=500 \ 35 | model.path=${model_path} \ 36 | rollout.gpu_memory_utilization=0.95 \ 37 | rollout.temperature=1 \ 38 | rollout.max_model_len=32768 \ 39 | rollout.max_tokens=512 \ 40 | rollout.tensor_model_parallel_size=1 \ 41 | rollout.rollout_log_dir=executer_logs 42 | status=$? 43 | exit $status -------------------------------------------------------------------------------- /examples/eval/sciworld_eval.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | export VLLM_USE_MODELSCOPE=0 3 | export VLLM_WORKER_MULTIPROC_METHOD=spawn 4 | export VLLM_ATTENTION_BACKEND=XFORMERS 5 | 6 | task_name="sciworld" 7 | 8 | cd AgentGym-RL 9 | source activate 10 | conda activate agentgym-rl 11 | export VLLM_ATTENTION_BACKEND=XFORMERS 12 | 13 | env_server_url="http://127.0.0.1:36005" 14 | 15 | sample_num=1 16 | max_rounds=30 17 | 18 | ckpt_path="global_step_150/actor" 19 | model_path=${ckpt_path}/huggingface 20 | 21 | cd AgentGym-RL/scripts 22 | python model_merger.py \ 23 | --local_dir ${ckpt_path} 24 | 25 | HYDRA_FULL_ERROR=1 python3 -m verl.agent_trainer.main_generation \ 26 | data.path=AgentEval/${task_name} \ 27 | data.max_prompt_length=1024 \ 28 | data.max_response_length=8192 \ 29 | data.n_samples=${sample_num} \ 30 | data.batch_size=32 \ 31 | agentgym.task_name=${task_name} \ 32 | agentgym.env_addr=${env_server_url} \ 33 | agentgym.max_rounds=${max_rounds} \ 34 | agentgym.timeout=500 \ 35 | model.path=${model_path} \ 36 | rollout.gpu_memory_utilization=0.95 \ 37 | rollout.temperature=1 \ 38 | rollout.max_model_len=32768 \ 39 | rollout.max_tokens=200 \ 40 | rollout.tensor_model_parallel_size=1 \ 41 | rollout.rollout_log_dir=executer_logs 42 | status=$? 43 | exit $status -------------------------------------------------------------------------------- /examples/eval/searchqa_eval.sh: -------------------------------------------------------------------------------- 1 | # TODO 2 | set -x 3 | export VLLM_USE_MODELSCOPE=0 4 | export VLLM_WORKER_MULTIPROC_METHOD=spawn 5 | export VLLM_ATTENTION_BACKEND=XFORMERS 6 | 7 | task_name="searchqa" 8 | 9 | cd AgentGym-RL 10 | source activate 11 | conda activate agentgym-rl 12 | export VLLM_ATTENTION_BACKEND=XFORMERS 13 | 14 | env_server_url="http://127.0.0.1:36005" 15 | 16 | sample_num=1 17 | max_rounds=30 18 | 19 | ckpt_path="global_step_150/actor" 20 | model_path=${ckpt_path}/huggingface 21 | 22 | cd AgentGym-RL/scripts 23 | python model_merger.py \ 24 | --local_dir ${ckpt_path} 25 | 26 | HYDRA_FULL_ERROR=1 python3 -m verl.agent_trainer.main_generation \ 27 | data.path=AgentEval/${task_name} \ 28 | data.max_prompt_length=750 \ 29 | data.max_response_length=14098 \ 30 | data.n_samples=${sample_num} \ 31 | data.batch_size=32 \ 32 | agentgym.task_name=${task_name} \ 33 | agentgym.env_addr=${env_server_url} \ 34 | agentgym.max_rounds=${max_rounds} \ 35 | agentgym.timeout=500 \ 36 | model.path=${model_path} \ 37 | rollout.gpu_memory_utilization=0.95 \ 38 | rollout.temperature=1 \ 39 | rollout.max_model_len=32768 \ 40 | rollout.max_tokens=512 \ 41 | rollout.tensor_model_parallel_size=1 \ 42 | rollout.rollout_log_dir=executer_logs 43 | status=$? 44 | exit $status -------------------------------------------------------------------------------- /examples/eval/textcraft_eval.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | export VLLM_USE_MODELSCOPE=0 3 | export VLLM_WORKER_MULTIPROC_METHOD=spawn 4 | export VLLM_ATTENTION_BACKEND=XFORMERS 5 | 6 | task_name="textcraft" 7 | 8 | cd AgentGym-RL 9 | source activate 10 | conda activate agentgym-rl 11 | export VLLM_ATTENTION_BACKEND=XFORMERS 12 | 13 | env_server_url="http://127.0.0.1:36005" 14 | 15 | sample_num=1 16 | max_rounds=30 17 | 18 | ckpt_path="global_step_150/actor" 19 | model_path=${ckpt_path}/huggingface 20 | 21 | cd AgentGym-RL/scripts 22 | python model_merger.py \ 23 | --local_dir ${ckpt_path} 24 | 25 | HYDRA_FULL_ERROR=1 python3 -m verl.agent_trainer.main_generation \ 26 | data.path=AgentEval/${task_name} \ 27 | data.max_prompt_length=750 \ 28 | data.max_response_length=14098 \ 29 | data.n_samples=${sample_num} \ 30 | data.batch_size=32 \ 31 | agentgym.task_name=${task_name} \ 32 | agentgym.env_addr=${env_server_url} \ 33 | agentgym.max_rounds=${max_rounds} \ 34 | agentgym.timeout=500 \ 35 | model.path=${model_path} \ 36 | rollout.gpu_memory_utilization=0.95 \ 37 | rollout.temperature=1 \ 38 | rollout.max_model_len=32768 \ 39 | rollout.max_tokens=512 \ 40 | rollout.tensor_model_parallel_size=1 \ 41 | rollout.rollout_log_dir=executer_logs 42 | status=$? 43 | exit $status -------------------------------------------------------------------------------- /examples/eval/webarena_eval.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | export VLLM_USE_MODELSCOPE=0 3 | export VLLM_WORKER_MULTIPROC_METHOD=spawn 4 | export VLLM_ATTENTION_BACKEND=XFORMERS 5 | 6 | task_name="webarena" 7 | 8 | cd AgentGym-RL 9 | source activate 10 | conda activate agentgym-rl 11 | export VLLM_ATTENTION_BACKEND=XFORMERS 12 | 13 | env_server_url="http://127.0.0.1:36005" 14 | 15 | sample_num=1 16 | max_rounds=15 17 | 18 | ckpt_path="global_step_150/actor" 19 | model_path=${ckpt_path}/huggingface 20 | 21 | cd AgentGym-RL/scripts 22 | python model_merger.py \ 23 | --local_dir ${ckpt_path} 24 | 25 | HYDRA_FULL_ERROR=1 python3 -m verl.agent_trainer.main_generation \ 26 | data.path=AgentEval/${task_name} \ 27 | data.max_prompt_length=750 \ 28 | data.max_response_length=14098 \ 29 | data.n_samples=${sample_num} \ 30 | data.batch_size=32 \ 31 | agentgym.task_name=${task_name} \ 32 | agentgym.env_addr=${env_server_url} \ 33 | agentgym.max_rounds=${max_rounds} \ 34 | agentgym.timeout=500 \ 35 | model.path=${model_path} \ 36 | rollout.gpu_memory_utilization=0.95 \ 37 | rollout.temperature=1 \ 38 | rollout.max_model_len=32768 \ 39 | rollout.max_tokens=512 \ 40 | rollout.tensor_model_parallel_size=1 \ 41 | rollout.rollout_log_dir=executer_logs 42 | status=$? 43 | exit $status -------------------------------------------------------------------------------- /examples/train/AgentGym-RL/babyai_train.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | export VLLM_USE_MODELSCOPE=0 3 | export VLLM_WORKER_MULTIPROC_METHOD=spawn 4 | export VLLM_ATTENTION_BACKEND=XFORMERS 5 | 6 | task_name="babyai" 7 | 8 | cd AgentGym-RL 9 | source activate 10 | conda activate agentgym-rl 11 | export VLLM_ATTENTION_BACKEND=XFORMERS 12 | export WANDB_BASE_URL=https://api.bandw.top 13 | 14 | env_server_url="http://127.0.0.1:36005" 15 | 16 | # start training 17 | wandb login xxx 18 | 19 | pure_agent_model_name="Qwen2.5-7B-Instruct" 20 | agent_model_path="models/${pure_agent_model_name}" 21 | 22 | kl_coef=0.001 23 | policy_learning_rate=1e-6 24 | rollout_sample_num=8 25 | train_batch_size=16 26 | ppo_mini_batch_size=8 27 | ppo_micro_batch_size_per_gpu=1 28 | ppo_inner_epochs=1 29 | 30 | total_epoches=10 31 | 32 | model_save_dir="saves" 33 | mkdir -p ${model_save_dir} 34 | exp_name="test" 35 | model_save_path=${model_save_dir}/${exp_name} 36 | 37 | mkdir -p ${model_save_path} 38 | 39 | HYDRA_FULL_ERROR=1 PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True WANDB_MODE=online python3 -m verl.agent_trainer.main_ppo \ 40 | algorithm.adv_estimator=grpo \ 41 | algorithm.rounds_ctrl.type=fixed \ 42 | algorithm.rounds_ctrl.rounds=20 \ 43 | data.train_file=AgentItemId/${task_name}_train.json \ 44 | data.train_batch_size=${train_batch_size} \ 45 | data.max_prompt_length=1024 \ 46 | data.max_response_length=4096 \ 47 | actor_rollout_ref.agentgym.task_name=${task_name} \ 48 | actor_rollout_ref.agentgym.env_addr=${env_server_url} \ 49 | actor_rollout_ref.agentgym.timeout=600 \ 50 | actor_rollout_ref.model.path=${agent_model_path} \ 51 | actor_rollout_ref.actor.use_kl_loss=True \ 52 | actor_rollout_ref.actor.kl_loss_coef=0.001 \ 53 | actor_rollout_ref.actor.kl_loss_type=low_var_kl \ 54 | actor_rollout_ref.rollout.gpu_memory_utilization=0.7 \ 55 | actor_rollout_ref.rollout.n=${rollout_sample_num} \ 56 | actor_rollout_ref.rollout.max_model_len=32768 \ 57 | actor_rollout_ref.rollout.max_tokens=200 \ 58 | actor_rollout_ref.rollout.tensor_model_parallel_size=1 \ 59 | actor_rollout_ref.actor.ppo_epochs=${ppo_inner_epochs} \ 60 | actor_rollout_ref.actor.optim.lr=${policy_learning_rate} \ 61 | actor_rollout_ref.actor.ppo_mini_batch_size=${ppo_mini_batch_size} \ 62 | actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=${ppo_micro_batch_size_per_gpu} \ 63 | actor_rollout_ref.rollout.rollout_log_dir=${model_save_path}/executer_logs \ 64 | algorithm.kl_ctrl.kl_coef=${kl_coef} \ 65 | trainer.default_local_dir=${model_save_path} \ 66 | trainer.project_name=xxx \ 67 | trainer.experiment_name=${exp_name} \ 68 | trainer.save_freq=25 \ 69 | trainer.total_epochs=${total_epoches} 70 | status=$? 71 | exit $status -------------------------------------------------------------------------------- /examples/train/AgentGym-RL/sciworld_train.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | export VLLM_USE_MODELSCOPE=0 3 | export VLLM_WORKER_MULTIPROC_METHOD=spawn 4 | export VLLM_ATTENTION_BACKEND=XFORMERS 5 | 6 | task_name="sciworld" 7 | 8 | cd AgentGym-RL 9 | source activate 10 | conda activate agentgym-rl 11 | export VLLM_ATTENTION_BACKEND=XFORMERS 12 | export WANDB_BASE_URL=https://api.bandw.top 13 | 14 | env_server_url="http://127.0.0.1:36005" 15 | 16 | # start training 17 | wandb login xxx 18 | 19 | pure_agent_model_name="Qwen2.5-7B-Instruct" 20 | agent_model_path="models/${pure_agent_model_name}" 21 | 22 | kl_coef=0.001 23 | policy_learning_rate=1e-6 24 | rollout_sample_num=8 25 | train_batch_size=16 26 | ppo_mini_batch_size=8 27 | ppo_micro_batch_size_per_gpu=1 28 | ppo_inner_epochs=1 29 | 30 | total_epoches=10 31 | 32 | model_save_dir="saves" 33 | mkdir -p ${model_save_dir} 34 | exp_name="test" 35 | model_save_path=${model_save_dir}/${exp_name} 36 | 37 | mkdir -p ${model_save_path} 38 | 39 | HYDRA_FULL_ERROR=1 PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True WANDB_MODE=online python3 -m verl.agent_trainer.main_ppo \ 40 | algorithm.adv_estimator=grpo \ 41 | algorithm.rounds_ctrl.type=fixed \ 42 | algorithm.rounds_ctrl.rounds=20 \ 43 | data.train_file=AgentItemId/${task_name}_train.json \ 44 | data.train_batch_size=${train_batch_size} \ 45 | data.max_prompt_length=1024 \ 46 | data.max_response_length=4096 \ 47 | actor_rollout_ref.agentgym.task_name=${task_name} \ 48 | actor_rollout_ref.agentgym.env_addr=${env_server_url} \ 49 | actor_rollout_ref.agentgym.timeout=600 \ 50 | actor_rollout_ref.model.path=${agent_model_path} \ 51 | actor_rollout_ref.actor.use_kl_loss=True \ 52 | actor_rollout_ref.actor.kl_loss_coef=0.001 \ 53 | actor_rollout_ref.actor.kl_loss_type=low_var_kl \ 54 | actor_rollout_ref.rollout.gpu_memory_utilization=0.7 \ 55 | actor_rollout_ref.rollout.n=${rollout_sample_num} \ 56 | actor_rollout_ref.rollout.max_model_len=32768 \ 57 | actor_rollout_ref.rollout.max_tokens=200 \ 58 | actor_rollout_ref.rollout.tensor_model_parallel_size=1 \ 59 | actor_rollout_ref.actor.ppo_epochs=${ppo_inner_epochs} \ 60 | actor_rollout_ref.actor.optim.lr=${policy_learning_rate} \ 61 | actor_rollout_ref.actor.ppo_mini_batch_size=${ppo_mini_batch_size} \ 62 | actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=${ppo_micro_batch_size_per_gpu} \ 63 | actor_rollout_ref.rollout.rollout_log_dir=${model_save_path}/executer_logs \ 64 | algorithm.kl_ctrl.kl_coef=${kl_coef} \ 65 | trainer.default_local_dir=${model_save_path} \ 66 | trainer.project_name=xxx \ 67 | trainer.experiment_name=${exp_name} \ 68 | trainer.save_freq=25 \ 69 | trainer.total_epochs=${total_epoches} 70 | status=$? 71 | exit $status -------------------------------------------------------------------------------- /examples/train/AgentGym-RL/searchqa_train.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | export VLLM_USE_MODELSCOPE=0 3 | export VLLM_WORKER_MULTIPROC_METHOD=spawn 4 | export VLLM_ATTENTION_BACKEND=XFORMERS 5 | 6 | task_name="searchqa" 7 | 8 | cd AgentGym-RL 9 | source activate 10 | conda activate agentgym-rl 11 | export VLLM_ATTENTION_BACKEND=XFORMERS 12 | export WANDB_BASE_URL=https://api.bandw.top 13 | 14 | env_server_url="http://127.0.0.1:36005" 15 | 16 | # start training 17 | wandb login xxx 18 | 19 | pure_agent_model_name="Qwen2.5-7B-Instruct" 20 | agent_model_path="models/${pure_agent_model_name}" 21 | 22 | kl_coef=0.001 23 | policy_learning_rate=1e-6 24 | rollout_sample_num=4 25 | train_batch_size=32 26 | ppo_mini_batch_size=8 27 | ppo_micro_batch_size_per_gpu=1 28 | ppo_inner_epochs=2 29 | 30 | total_epoches=20 31 | 32 | model_save_dir="saves" 33 | mkdir -p ${model_save_dir} 34 | exp_name="test" 35 | model_save_path=${model_save_dir}/${exp_name} 36 | 37 | mkdir -p ${model_save_path} 38 | 39 | HYDRA_FULL_ERROR=1 PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True WANDB_MODE=offline python3 -m verl.agent_trainer.main_ppo \ 40 | algorithm.adv_estimator=grpo \ 41 | algorithm.rounds_ctrl.type=fixed \ 42 | algorithm.rounds_ctrl.rounds=5 \ 43 | data.train_file=AgentItemId/${task_name}_train.json \ 44 | data.train_batch_size=${train_batch_size} \ 45 | data.max_prompt_length=1024 \ 46 | data.max_response_length=8192 \ 47 | actor_rollout_ref.agentgym.task_name=${task_name} \ 48 | actor_rollout_ref.agentgym.env_addr=${env_server_url} \ 49 | actor_rollout_ref.agentgym.timeout=600 \ 50 | actor_rollout_ref.model.path=${agent_model_path} \ 51 | actor_rollout_ref.actor.use_kl_loss=True \ 52 | actor_rollout_ref.actor.kl_loss_coef=0.001 \ 53 | actor_rollout_ref.actor.kl_loss_type=low_var_kl \ 54 | actor_rollout_ref.rollout.gpu_memory_utilization=0.7 \ 55 | actor_rollout_ref.rollout.n=${rollout_sample_num} \ 56 | actor_rollout_ref.rollout.max_model_len=32768 \ 57 | actor_rollout_ref.rollout.max_tokens=512 \ 58 | actor_rollout_ref.rollout.tensor_model_parallel_size=1 \ 59 | actor_rollout_ref.actor.ppo_epochs=${ppo_inner_epochs} \ 60 | actor_rollout_ref.actor.optim.lr=${policy_learning_rate} \ 61 | actor_rollout_ref.actor.ppo_mini_batch_size=${ppo_mini_batch_size} \ 62 | actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=${ppo_micro_batch_size_per_gpu} \ 63 | actor_rollout_ref.rollout.rollout_log_dir=${model_save_path}/executer_logs \ 64 | algorithm.kl_ctrl.kl_coef=${kl_coef} \ 65 | trainer.default_local_dir=${model_save_path} \ 66 | trainer.project_name=xxx \ 67 | trainer.experiment_name=${exp_name} \ 68 | trainer.save_freq=25 \ 69 | trainer.total_epochs=${total_epoches} 70 | status=$? 71 | exit $status 72 | -------------------------------------------------------------------------------- /examples/train/AgentGym-RL/textcraft_train.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | export VLLM_USE_MODELSCOPE=0 3 | export VLLM_WORKER_MULTIPROC_METHOD=spawn 4 | export VLLM_ATTENTION_BACKEND=XFORMERS 5 | 6 | task_name="textcraft" 7 | 8 | cd AgentGym-RL 9 | source activate 10 | conda activate agentgym-rl 11 | export VLLM_ATTENTION_BACKEND=XFORMERS 12 | export WANDB_BASE_URL=https://api.bandw.top 13 | 14 | env_server_url="http://127.0.0.1:36005" 15 | 16 | # start training 17 | wandb login xxx 18 | 19 | pure_agent_model_name="Qwen2.5-7B-Instruct" 20 | agent_model_path="models/${pure_agent_model_name}" 21 | 22 | kl_coef=0.001 23 | policy_learning_rate=1e-6 24 | rollout_sample_num=8 25 | train_batch_size=32 26 | ppo_mini_batch_size=8 27 | ppo_micro_batch_size_per_gpu=1 28 | ppo_inner_epochs=2 29 | 30 | total_epoches=30 31 | 32 | model_save_dir="saves" 33 | mkdir -p ${model_save_dir} 34 | exp_name="test" 35 | model_save_path=${model_save_dir}/${exp_name} 36 | 37 | mkdir -p ${model_save_path} 38 | 39 | HYDRA_FULL_ERROR=1 PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True WANDB_MODE=online python3 -m verl.agent_trainer.main_ppo \ 40 | algorithm.adv_estimator=grpo \ 41 | algorithm.rounds_ctrl.type=fixed \ 42 | algorithm.rounds_ctrl.rounds=30 \ 43 | data.train_file=AgentItemId/${task_name}_train.json \ 44 | data.train_batch_size=${train_batch_size} \ 45 | data.max_prompt_length=512 \ 46 | data.max_response_length=10240 \ 47 | actor_rollout_ref.agentgym.task_name=${task_name} \ 48 | actor_rollout_ref.agentgym.env_addr=${env_server_url} \ 49 | actor_rollout_ref.agentgym.timeout=600 \ 50 | actor_rollout_ref.model.path=${agent_model_path} \ 51 | actor_rollout_ref.actor.use_kl_loss=True \ 52 | actor_rollout_ref.actor.kl_loss_coef=0.001 \ 53 | actor_rollout_ref.actor.kl_loss_type=low_var_kl \ 54 | actor_rollout_ref.rollout.gpu_memory_utilization=0.7 \ 55 | actor_rollout_ref.rollout.n=${rollout_sample_num} \ 56 | actor_rollout_ref.rollout.max_model_len=32768 \ 57 | actor_rollout_ref.rollout.max_tokens=512 \ 58 | actor_rollout_ref.rollout.tensor_model_parallel_size=1 \ 59 | actor_rollout_ref.actor.ppo_epochs=${ppo_inner_epochs} \ 60 | actor_rollout_ref.actor.optim.lr=${policy_learning_rate} \ 61 | actor_rollout_ref.actor.ppo_mini_batch_size=${ppo_mini_batch_size} \ 62 | actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=${ppo_micro_batch_size_per_gpu} \ 63 | actor_rollout_ref.rollout.rollout_log_dir=${model_save_path}/executer_logs \ 64 | algorithm.kl_ctrl.kl_coef=${kl_coef} \ 65 | trainer.default_local_dir=${model_save_path} \ 66 | trainer.project_name=xxx \ 67 | trainer.experiment_name=${exp_name} \ 68 | trainer.save_freq=25 \ 69 | trainer.total_epochs=${total_epoches} 70 | status=$? 71 | exit $status -------------------------------------------------------------------------------- /examples/train/AgentGym-RL/webarena_train.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | export VLLM_USE_MODELSCOPE=0 3 | export VLLM_WORKER_MULTIPROC_METHOD=spawn 4 | export VLLM_ATTENTION_BACKEND=XFORMERS 5 | 6 | task_name="webarena" 7 | 8 | cd AgentGym-RL 9 | source activate 10 | conda activate agentgym-rl 11 | export VLLM_ATTENTION_BACKEND=XFORMERS 12 | export WANDB_BASE_URL=https://api.bandw.top 13 | 14 | env_server_url="http://127.0.0.1:36005" 15 | 16 | # start training 17 | wandb login xxx 18 | 19 | pure_agent_model_name="Qwen2.5-7B-Instruct" 20 | agent_model_path="models/${pure_agent_model_name}" 21 | 22 | kl_coef=0.001 23 | policy_learning_rate=1e-6 24 | rollout_sample_num=4 25 | train_batch_size=32 26 | ppo_mini_batch_size=4 27 | ppo_micro_batch_size_per_gpu=1 28 | ppo_inner_epochs=2 29 | 30 | total_epoches=25 31 | 32 | model_save_dir="saves" 33 | mkdir -p ${model_save_dir} 34 | exp_name="test" 35 | model_save_path=${model_save_dir}/${exp_name} 36 | 37 | mkdir -p ${model_save_path} 38 | 39 | HYDRA_FULL_ERROR=1 PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True WANDB_MODE=online python3 -m verl.agent_trainer.main_ppo \ 40 | algorithm.adv_estimator=grpo \ 41 | algorithm.rounds_ctrl.type=fixed \ 42 | algorithm.rounds_ctrl.rounds=15 \ 43 | data.train_file=AgentItemId/${task_name}_train.json \ 44 | data.train_batch_size=${train_batch_size} \ 45 | data.max_prompt_length=750 \ 46 | data.max_response_length=14098 \ 47 | actor_rollout_ref.agentgym.task_name=${task_name} \ 48 | actor_rollout_ref.agentgym.env_addr=${env_server_url} \ 49 | actor_rollout_ref.agentgym.timeout=600 \ 50 | actor_rollout_ref.model.path=${agent_model_path} \ 51 | actor_rollout_ref.actor.use_kl_loss=True \ 52 | actor_rollout_ref.actor.kl_loss_coef=0.001 \ 53 | actor_rollout_ref.actor.kl_loss_type=low_var_kl \ 54 | actor_rollout_ref.rollout.gpu_memory_utilization=0.7 \ 55 | actor_rollout_ref.rollout.n=${rollout_sample_num} \ 56 | actor_rollout_ref.rollout.max_model_len=32768 \ 57 | actor_rollout_ref.rollout.max_tokens=512 \ 58 | actor_rollout_ref.rollout.tensor_model_parallel_size=1 \ 59 | actor_rollout_ref.actor.ppo_epochs=${ppo_inner_epochs} \ 60 | actor_rollout_ref.actor.optim.lr=${policy_learning_rate} \ 61 | actor_rollout_ref.actor.ppo_mini_batch_size=${ppo_mini_batch_size} \ 62 | actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=${ppo_micro_batch_size_per_gpu} \ 63 | actor_rollout_ref.rollout.rollout_log_dir=${model_save_path}/executer_logs \ 64 | algorithm.kl_ctrl.kl_coef=${kl_coef} \ 65 | trainer.default_local_dir=${model_save_path} \ 66 | trainer.project_name=xxx \ 67 | trainer.experiment_name=${exp_name} \ 68 | trainer.save_freq=25 \ 69 | trainer.total_epochs=${total_epoches} 70 | status=$? 71 | exit $status -------------------------------------------------------------------------------- /examples/train/ScalingInter-RL/babyai_train.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | export VLLM_USE_MODELSCOPE=0 3 | export VLLM_WORKER_MULTIPROC_METHOD=spawn 4 | export VLLM_ATTENTION_BACKEND=XFORMERS 5 | 6 | task_name="babyai" 7 | 8 | cd AgentGym-RL 9 | source activate 10 | conda activate agentgym-rl 11 | export VLLM_ATTENTION_BACKEND=XFORMERS 12 | export WANDB_BASE_URL=https://api.bandw.top 13 | 14 | env_server_url="http://127.0.0.1:36005" 15 | 16 | # start training 17 | wandb login xxx 18 | 19 | pure_agent_model_name="Qwen2.5-7B-Instruct" 20 | agent_model_path="models/${pure_agent_model_name}" 21 | 22 | kl_coef=0.001 23 | policy_learning_rate=1e-6 24 | rollout_sample_num=4 25 | train_batch_size=32 26 | ppo_mini_batch_size=8 27 | ppo_micro_batch_size_per_gpu=1 28 | ppo_inner_epochs=2 29 | 30 | total_epoches=20 31 | 32 | model_save_dir="saves" 33 | mkdir -p ${model_save_dir} 34 | exp_name="test" 35 | model_save_path=${model_save_dir}/${exp_name} 36 | 37 | mkdir -p ${model_save_path} 38 | 39 | HYDRA_FULL_ERROR=1 PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True WANDB_MODE=online python3 -m verl.agent_trainer.main_ppo \ 40 | algorithm.adv_estimator=grpo \ 41 | algorithm.rounds_ctrl.type=scaling_inter_stepwise \ 42 | algorithm.rounds_ctrl.steps_scaling_inter=100 \ 43 | algorithm.rounds_ctrl.rounds=[6,13,20] \ 44 | data.train_file=AgentItemId/${task_name}_train.json \ 45 | data.train_batch_size=${train_batch_size} \ 46 | data.max_prompt_length=512 \ 47 | data.max_response_length=8192 \ 48 | actor_rollout_ref.agentgym.task_name=${task_name} \ 49 | actor_rollout_ref.agentgym.env_addr=${env_server_url} \ 50 | actor_rollout_ref.agentgym.timeout=600 \ 51 | actor_rollout_ref.model.path=${agent_model_path} \ 52 | actor_rollout_ref.actor.use_kl_loss=True \ 53 | actor_rollout_ref.actor.kl_loss_coef=0.001 \ 54 | actor_rollout_ref.actor.kl_loss_type=low_var_kl \ 55 | actor_rollout_ref.rollout.gpu_memory_utilization=0.7 \ 56 | actor_rollout_ref.rollout.n=${rollout_sample_num} \ 57 | actor_rollout_ref.rollout.max_model_len=32768 \ 58 | actor_rollout_ref.rollout.max_tokens=512 \ 59 | actor_rollout_ref.rollout.tensor_model_parallel_size=1 \ 60 | actor_rollout_ref.actor.ppo_epochs=${ppo_inner_epochs} \ 61 | actor_rollout_ref.actor.optim.lr=${policy_learning_rate} \ 62 | actor_rollout_ref.actor.ppo_mini_batch_size=${ppo_mini_batch_size} \ 63 | actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=${ppo_micro_batch_size_per_gpu} \ 64 | actor_rollout_ref.rollout.rollout_log_dir=${model_save_path}/executer_logs \ 65 | algorithm.kl_ctrl.kl_coef=${kl_coef} \ 66 | trainer.default_local_dir=${model_save_path} \ 67 | trainer.project_name=xxx \ 68 | trainer.experiment_name=${exp_name} \ 69 | trainer.save_freq=25 \ 70 | trainer.total_epochs=${total_epoches} 71 | status=$? 72 | exit $status -------------------------------------------------------------------------------- /examples/train/ScalingInter-RL/sciworld_train.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | export VLLM_USE_MODELSCOPE=0 3 | export VLLM_WORKER_MULTIPROC_METHOD=spawn 4 | export VLLM_ATTENTION_BACKEND=XFORMERS 5 | 6 | task_name="sciworld" 7 | 8 | cd AgentGym-RL 9 | source activate 10 | conda activate agentgym-rl 11 | export VLLM_ATTENTION_BACKEND=XFORMERS 12 | export WANDB_BASE_URL=https://api.bandw.top 13 | 14 | env_server_url="http://127.0.0.1:36005" 15 | 16 | # start training 17 | wandb login xxx 18 | 19 | pure_agent_model_name="Qwen2.5-7B-Instruct" 20 | agent_model_path="models/${pure_agent_model_name}" 21 | 22 | kl_coef=0.001 23 | policy_learning_rate=1e-6 24 | rollout_sample_num=8 25 | train_batch_size=16 26 | ppo_mini_batch_size=8 27 | ppo_micro_batch_size_per_gpu=1 28 | ppo_inner_epochs=1 29 | 30 | total_epoches=10 31 | 32 | model_save_dir="saves" 33 | mkdir -p ${model_save_dir} 34 | exp_name="test" 35 | model_save_path=${model_save_dir}/${exp_name} 36 | 37 | mkdir -p ${model_save_path} 38 | 39 | HYDRA_FULL_ERROR=1 PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True WANDB_MODE=online python3 -m verl.agent_trainer.main_ppo \ 40 | algorithm.adv_estimator=grpo \ 41 | algorithm.rounds_ctrl.type=scaling_inter_stepwise \ 42 | algorithm.rounds_ctrl.steps_scaling_inter=100 \ 43 | algorithm.rounds_ctrl.rounds=[10,20,30] \ 44 | data.train_file=AgentItemId/${task_name}_train.json \ 45 | data.train_batch_size=${train_batch_size} \ 46 | data.max_prompt_length=1024 \ 47 | data.max_response_length=8192 \ 48 | actor_rollout_ref.agentgym.task_name=${task_name} \ 49 | actor_rollout_ref.agentgym.env_addr=${env_server_url} \ 50 | actor_rollout_ref.agentgym.timeout=600 \ 51 | actor_rollout_ref.model.path=${agent_model_path} \ 52 | actor_rollout_ref.actor.use_kl_loss=True \ 53 | actor_rollout_ref.actor.kl_loss_coef=0.001 \ 54 | actor_rollout_ref.actor.kl_loss_type=low_var_kl \ 55 | actor_rollout_ref.rollout.gpu_memory_utilization=0.7 \ 56 | actor_rollout_ref.rollout.n=${rollout_sample_num} \ 57 | actor_rollout_ref.rollout.max_model_len=32768 \ 58 | actor_rollout_ref.rollout.max_tokens=200 \ 59 | actor_rollout_ref.rollout.tensor_model_parallel_size=1 \ 60 | actor_rollout_ref.actor.ppo_epochs=${ppo_inner_epochs} \ 61 | actor_rollout_ref.actor.optim.lr=${policy_learning_rate} \ 62 | actor_rollout_ref.actor.ppo_mini_batch_size=${ppo_mini_batch_size} \ 63 | actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=${ppo_micro_batch_size_per_gpu} \ 64 | actor_rollout_ref.rollout.rollout_log_dir=${model_save_path}/executer_logs \ 65 | algorithm.kl_ctrl.kl_coef=${kl_coef} \ 66 | trainer.default_local_dir=${model_save_path} \ 67 | trainer.project_name=xxx \ 68 | trainer.experiment_name=${exp_name} \ 69 | trainer.save_freq=25 \ 70 | trainer.total_epochs=${total_epoches} 71 | status=$? 72 | exit $status -------------------------------------------------------------------------------- /examples/train/ScalingInter-RL/searchqa_train.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | export VLLM_USE_MODELSCOPE=0 3 | export VLLM_WORKER_MULTIPROC_METHOD=spawn 4 | export VLLM_ATTENTION_BACKEND=XFORMERS 5 | 6 | task_name="searchqa" 7 | 8 | cd AgentGym-RL 9 | source activate 10 | conda activate agentgym-rl 11 | export VLLM_ATTENTION_BACKEND=XFORMERS 12 | export WANDB_BASE_URL=https://api.bandw.top 13 | 14 | env_server_url="http://127.0.0.1:36005" 15 | # start training 16 | wandb login xxx 17 | 18 | pure_agent_model_name="Qwen2.5-7B-Instruct" 19 | agent_model_path="models/${pure_agent_model_name}" 20 | 21 | kl_coef=0.001 22 | policy_learning_rate=1e-6 23 | rollout_sample_num=4 24 | train_batch_size=32 25 | ppo_mini_batch_size=8 26 | ppo_micro_batch_size_per_gpu=1 27 | ppo_inner_epochs=2 28 | 29 | total_epoches=20 30 | 31 | model_save_dir="saves" 32 | mkdir -p ${model_save_dir} 33 | exp_name="test" 34 | model_save_path=${model_save_dir}/${exp_name} 35 | 36 | mkdir -p ${model_save_path} 37 | 38 | HYDRA_FULL_ERROR=1 PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True WANDB_MODE=online python3 -m verl.agent_trainer.main_ppo \ 39 | algorithm.adv_estimator=grpo \ 40 | algorithm.rounds_ctrl.type=scaling_inter_stepwise \ 41 | algorithm.rounds_ctrl.steps_scaling_inter=100 \ 42 | algorithm.rounds_ctrl.rounds=[5,8,10] \ 43 | data.train_file=AgentItemId/${task_name}_train.json \ 44 | data.train_batch_size=${train_batch_size} \ 45 | data.max_prompt_length=1024 \ 46 | data.max_response_length=8192 \ 47 | actor_rollout_ref.agentgym.task_name=${task_name} \ 48 | actor_rollout_ref.agentgym.env_addr=${env_server_url} \ 49 | actor_rollout_ref.agentgym.timeout=600 \ 50 | actor_rollout_ref.model.path=${agent_model_path} \ 51 | actor_rollout_ref.actor.use_kl_loss=True \ 52 | actor_rollout_ref.actor.kl_loss_coef=0.001 \ 53 | actor_rollout_ref.actor.kl_loss_type=low_var_kl \ 54 | actor_rollout_ref.rollout.gpu_memory_utilization=0.7 \ 55 | actor_rollout_ref.rollout.n=${rollout_sample_num} \ 56 | actor_rollout_ref.rollout.max_model_len=32768 \ 57 | actor_rollout_ref.rollout.max_tokens=512 \ 58 | actor_rollout_ref.rollout.tensor_model_parallel_size=1 \ 59 | actor_rollout_ref.actor.ppo_epochs=${ppo_inner_epochs} \ 60 | actor_rollout_ref.actor.optim.lr=${policy_learning_rate} \ 61 | actor_rollout_ref.actor.ppo_mini_batch_size=${ppo_mini_batch_size} \ 62 | actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=${ppo_micro_batch_size_per_gpu} \ 63 | actor_rollout_ref.rollout.rollout_log_dir=${model_save_path}/executer_logs \ 64 | algorithm.kl_ctrl.kl_coef=${kl_coef} \ 65 | trainer.default_local_dir=${model_save_path} \ 66 | trainer.project_name=xxx \ 67 | trainer.experiment_name=${exp_name} \ 68 | trainer.save_freq=25 \ 69 | trainer.total_epochs=${total_epoches} 70 | status=$? 71 | exit $status 72 | -------------------------------------------------------------------------------- /examples/train/ScalingInter-RL/textcraft_train.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | export VLLM_USE_MODELSCOPE=0 3 | export VLLM_WORKER_MULTIPROC_METHOD=spawn 4 | export VLLM_ATTENTION_BACKEND=XFORMERS 5 | 6 | task_name="textcraft" 7 | 8 | cd AgentGym-RL 9 | source activate 10 | conda activate agentgym-rl 11 | export VLLM_ATTENTION_BACKEND=XFORMERS 12 | export WANDB_BASE_URL=https://api.bandw.top 13 | 14 | env_server_url="http://127.0.0.1:36005" 15 | 16 | # start training 17 | wandb login xxx 18 | 19 | pure_agent_model_name="Qwen2.5-7B-Instruct" 20 | agent_model_path="models/${pure_agent_model_name}" 21 | 22 | kl_coef=0.001 23 | policy_learning_rate=1e-6 24 | rollout_sample_num=8 25 | train_batch_size=32 26 | ppo_mini_batch_size=8 27 | ppo_micro_batch_size_per_gpu=1 28 | ppo_inner_epochs=2 29 | 30 | total_epoches=30 31 | 32 | model_save_dir="saves" 33 | mkdir -p ${model_save_dir} 34 | exp_name="test" 35 | model_save_path=${model_save_dir}/${exp_name} 36 | 37 | mkdir -p ${model_save_path} 38 | 39 | HYDRA_FULL_ERROR=1 PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True WANDB_MODE=online python3 -m verl.agent_trainer.main_ppo \ 40 | algorithm.adv_estimator=grpo \ 41 | algorithm.rounds_ctrl.type=scaling_inter_stepwise \ 42 | algorithm.rounds_ctrl.steps_scaling_inter=100 \ 43 | algorithm.rounds_ctrl.rounds=[10,20,30] \ 44 | data.train_file=AgentItemId/${task_name}_train.json \ 45 | data.train_batch_size=${train_batch_size} \ 46 | data.max_prompt_length=512 \ 47 | data.max_response_length=10240 \ 48 | actor_rollout_ref.agentgym.task_name=${task_name} \ 49 | actor_rollout_ref.agentgym.env_addr=${env_server_url} \ 50 | actor_rollout_ref.agentgym.timeout=600 \ 51 | actor_rollout_ref.model.path=${agent_model_path} \ 52 | actor_rollout_ref.actor.use_kl_loss=True \ 53 | actor_rollout_ref.actor.kl_loss_coef=0.001 \ 54 | actor_rollout_ref.actor.kl_loss_type=low_var_kl \ 55 | actor_rollout_ref.rollout.gpu_memory_utilization=0.7 \ 56 | actor_rollout_ref.rollout.n=${rollout_sample_num} \ 57 | actor_rollout_ref.rollout.max_model_len=32768 \ 58 | actor_rollout_ref.rollout.max_tokens=512 \ 59 | actor_rollout_ref.rollout.tensor_model_parallel_size=1 \ 60 | actor_rollout_ref.actor.ppo_epochs=${ppo_inner_epochs} \ 61 | actor_rollout_ref.actor.optim.lr=${policy_learning_rate} \ 62 | actor_rollout_ref.actor.ppo_mini_batch_size=${ppo_mini_batch_size} \ 63 | actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=${ppo_micro_batch_size_per_gpu} \ 64 | actor_rollout_ref.rollout.rollout_log_dir=${model_save_path}/executer_logs \ 65 | algorithm.kl_ctrl.kl_coef=${kl_coef} \ 66 | trainer.default_local_dir=${model_save_path} \ 67 | trainer.project_name=xxx \ 68 | trainer.experiment_name=${exp_name} \ 69 | trainer.save_freq=25 \ 70 | trainer.total_epochs=${total_epoches} 71 | status=$? 72 | exit $status -------------------------------------------------------------------------------- /examples/train/ScalingInter-RL/webarena_train.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | export VLLM_USE_MODELSCOPE=0 3 | export VLLM_WORKER_MULTIPROC_METHOD=spawn 4 | export VLLM_ATTENTION_BACKEND=XFORMERS 5 | 6 | task_name="webarena" 7 | 8 | cd AgentGym-RL 9 | source activate 10 | conda activate agentgym-rl 11 | export VLLM_ATTENTION_BACKEND=XFORMERS 12 | export WANDB_BASE_URL=https://api.bandw.top 13 | 14 | env_server_url="http://127.0.0.1:36005" 15 | 16 | # start training 17 | wandb login xxx 18 | 19 | pure_agent_model_name="Qwen2.5-7B-Instruct" 20 | agent_model_path="models/${pure_agent_model_name}" 21 | 22 | kl_coef=0.001 23 | policy_learning_rate=1e-6 24 | rollout_sample_num=4 25 | train_batch_size=32 26 | ppo_mini_batch_size=4 27 | ppo_micro_batch_size_per_gpu=1 28 | ppo_inner_epochs=2 29 | 30 | total_epoches=25 31 | 32 | model_save_dir="saves" 33 | mkdir -p ${model_save_dir} 34 | exp_name="test" 35 | model_save_path=${model_save_dir}/${exp_name} 36 | 37 | mkdir -p ${model_save_path} 38 | 39 | HYDRA_FULL_ERROR=1 PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True WANDB_MODE=online python3 -m verl.agent_trainer.main_ppo \ 40 | algorithm.adv_estimator=grpo \ 41 | algorithm.rounds_ctrl.type=scaling_inter_stepwise \ 42 | algorithm.rounds_ctrl.steps_scaling_inter=80 \ 43 | algorithm.rounds_ctrl.rounds=[8,12,15] \ 44 | data.train_file=AgentItemId/${task_name}_train.json \ 45 | data.train_batch_size=${train_batch_size} \ 46 | data.max_prompt_length=750 \ 47 | data.max_response_length=14098 \ 48 | actor_rollout_ref.agentgym.task_name=${task_name} \ 49 | actor_rollout_ref.agentgym.env_addr=${env_server_url} \ 50 | actor_rollout_ref.agentgym.timeout=600 \ 51 | actor_rollout_ref.model.path=${agent_model_path} \ 52 | actor_rollout_ref.actor.use_kl_loss=True \ 53 | actor_rollout_ref.actor.kl_loss_coef=0.001 \ 54 | actor_rollout_ref.actor.kl_loss_type=low_var_kl \ 55 | actor_rollout_ref.rollout.gpu_memory_utilization=0.7 \ 56 | actor_rollout_ref.rollout.n=${rollout_sample_num} \ 57 | actor_rollout_ref.rollout.max_model_len=32768 \ 58 | actor_rollout_ref.rollout.max_tokens=512 \ 59 | actor_rollout_ref.rollout.tensor_model_parallel_size=1 \ 60 | actor_rollout_ref.actor.ppo_epochs=${ppo_inner_epochs} \ 61 | actor_rollout_ref.actor.optim.lr=${policy_learning_rate} \ 62 | actor_rollout_ref.actor.ppo_mini_batch_size=${ppo_mini_batch_size} \ 63 | actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=${ppo_micro_batch_size_per_gpu} \ 64 | actor_rollout_ref.rollout.rollout_log_dir=${model_save_path}/executer_logs \ 65 | algorithm.kl_ctrl.kl_coef=${kl_coef} \ 66 | trainer.default_local_dir=${model_save_path} \ 67 | trainer.project_name=xxx \ 68 | trainer.experiment_name=${exp_name} \ 69 | trainer.save_freq=25 \ 70 | trainer.total_epochs=${total_epoches} 71 | status=$? 72 | exit $status --------------------------------------------------------------------------------