├── .gitignore
├── LICENSE
├── README.md
├── assets
    ├── concept.png
    └── showcase.png
├── lang_wm
    ├── README.md
    ├── assets
    │   └── lang_wm.png
    ├── data_process
    │   └── text_game
    │   │   ├── README.md
    │   │   ├── calculate_r1_response.py
    │   │   ├── call_deepseek_r1.py
    │   │   ├── generate_sft_data.py
    │   │   ├── process_jsonl.py
    │   │   ├── process_jsonl_train.py
    │   │   └── text_game.py
    ├── verl
    │   ├── .gitattributes
    │   ├── .gitignore
    │   ├── .readthedocs.yaml
    │   ├── .style.yapf
    │   ├── LICENSE
    │   ├── Notice.txt
    │   ├── docker
    │   │   ├── Dockerfile.megatron
    │   │   ├── Dockerfile.ngc.vllm
    │   │   ├── Dockerfile.rocm
    │   │   └── Dockerfile.vemlp.vllm.te
    │   ├── docs
    │   │   ├── Makefile
    │   │   ├── README.md
    │   │   ├── README_vllm0.7.md
    │   │   ├── _static
    │   │   │   └── logo.png
    │   │   ├── advance
    │   │   │   ├── dpo_extension.rst
    │   │   │   ├── fsdp_extension.rst
    │   │   │   ├── megatron_extension.rst
    │   │   │   └── placement.rst
    │   │   ├── amd_tutorial
    │   │   │   ├── amd_build_dockerfile.md
    │   │   │   └── amd_existing_docker.md
    │   │   ├── conf.py
    │   │   ├── data.rst
    │   │   ├── examples
    │   │   │   ├── config.rst
    │   │   │   ├── gsm8k_example.rst
    │   │   │   └── ppo_code_architecture.rst
    │   │   ├── experiment
    │   │   │   └── ppo.rst
    │   │   ├── faq
    │   │   │   └── faq.rst
    │   │   ├── hybrid_flow.rst
    │   │   ├── index.rst
    │   │   ├── perf
    │   │   │   └── perf_tuning.rst
    │   │   ├── preparation
    │   │   │   ├── prepare_data.rst
    │   │   │   └── reward_function.rst
    │   │   ├── requirements-docs.txt
    │   │   ├── start
    │   │   │   ├── install.rst
    │   │   │   └── quickstart.rst
    │   │   └── workers
    │   │   │   ├── fsdp_workers.rst
    │   │   │   ├── megatron_workers.rst
    │   │   │   └── ray_trainer.rst
    │   ├── examples
    │   │   ├── data_preprocess
    │   │   │   ├── full_hh_rlhf.py
    │   │   │   ├── geo3k.py
    │   │   │   ├── gsm8k.py
    │   │   │   ├── hellaswag.py
    │   │   │   └── math_dataset.py
    │   │   ├── generation
    │   │   │   └── run_deepseek_v2_lite_math.sh
    │   │   ├── grpo_trainer
    │   │   │   ├── run_deepseek7b_llm.sh
    │   │   │   ├── run_deepseek7b_llm_seq_balance.sh
    │   │   │   ├── run_qwen2-7b.sh
    │   │   │   ├── run_qwen2-7b_seq_balance.sh
    │   │   │   ├── run_qwen2_5_vl-7b.sh
    │   │   │   ├── run_text_game_rl.sh
    │   │   │   └── run_web_agent_rl.sh
    │   │   ├── ppo_trainer
    │   │   │   ├── run_deepseek7b_llm.sh
    │   │   │   ├── run_deepseek7b_llm_modelscope.sh
    │   │   │   ├── run_deepseek7b_llm_sp2.sh
    │   │   │   ├── run_deepseek_full_hh_rlhf.sh
    │   │   │   ├── run_deepseek_math_gsm8k_megatron.sh
    │   │   │   ├── run_deepseek_megatron.sh
    │   │   │   ├── run_gemma.sh
    │   │   │   ├── run_qwen2-7b_math_gsm8k_megatron.sh
    │   │   │   ├── run_qwen2-7b_megatron.sh
    │   │   │   ├── run_qwen2-7b_rm.sh
    │   │   │   ├── run_qwen2-7b_rm_seq_balance.sh
    │   │   │   ├── run_qwen2-7b_seq_balance.sh
    │   │   │   ├── run_qwen2.5-32b.sh
    │   │   │   └── verl_getting_started.ipynb
    │   │   ├── ray
    │   │   │   └── tutorial.ipynb
    │   │   ├── remax_trainer
    │   │   │   ├── run_qwen2.5-3b_seq_balance.sh
    │   │   │   └── run_qwen2.5-7b_seq_balance.sh
    │   │   ├── rloo_trainer
    │   │   │   └── run_qwen2-7b.sh
    │   │   ├── sft
    │   │   │   ├── gsm8k
    │   │   │   │   ├── run_deepseek_6b7.sh
    │   │   │   │   ├── run_gemma_2b.sh
    │   │   │   │   ├── run_gemma_7b.sh
    │   │   │   │   ├── run_qwen_05_peft.sh
    │   │   │   │   ├── run_qwen_05_sp2.sh
    │   │   │   │   └── run_qwen_05_sp2_liger.sh
    │   │   │   ├── text_game
    │   │   │   │   └── run_text_game_sft.sh
    │   │   │   └── web_agent
    │   │   │   │   └── run_web_agent_sft.sh
    │   │   ├── slurm
    │   │   │   └── ray_on_slurm.slurm
    │   │   └── split_placement
    │   │   │   ├── README.md
    │   │   │   ├── config
    │   │   │       └── ppo_trainer_split.yaml
    │   │   │   ├── main_ppo_split.py
    │   │   │   ├── run_deepseek7b_llm.sh
    │   │   │   └── split_monkey_patch.py
    │   ├── merge_lora.py
    │   ├── patches
    │   │   └── megatron_v4.patch
    │   ├── pyproject.toml
    │   ├── requirements.txt
    │   ├── scripts
    │   │   ├── format.sh
    │   │   └── model_merger.py
    │   ├── setup.py
    │   ├── tests
    │   │   ├── __init__.py
    │   │   ├── checkpoint
    │   │   │   ├── test_fsdp_ckpt.py
    │   │   │   └── test_megatron_ckpt.py
    │   │   ├── distributed
    │   │   │   ├── run_all.sh
    │   │   │   └── test_tensor_dict.py
    │   │   ├── distro
    │   │   │   └── requirements.py
    │   │   ├── e2e
    │   │   │   ├── __init__.py
    │   │   │   ├── arithmetic_sequence
    │   │   │   │   ├── data
    │   │   │   │   │   └── create_dataset.py
    │   │   │   │   ├── model
    │   │   │   │   │   ├── config.json
    │   │   │   │   │   ├── create_model_tokenizer.py
    │   │   │   │   │   ├── generation_config.json
    │   │   │   │   │   ├── model.safetensors
    │   │   │   │   │   └── tokenizer_config.json
    │   │   │   │   └── rl
    │   │   │   │   │   ├── README.md
    │   │   │   │   │   └── main_trainer.py
    │   │   │   ├── check_results.py
    │   │   │   ├── envs
    │   │   │   │   ├── __init__.py
    │   │   │   │   └── digit_completion
    │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   ├── task.py
    │   │   │   │   │   └── tokenizer.py
    │   │   │   ├── run_deepseek_megatron.sh
    │   │   │   ├── run_deepseek_megatron_parallelism.sh
    │   │   │   ├── run_qwen2vl_geo3k_function_rm.sh
    │   │   │   ├── run_qwen_gsm8k_function_rm.sh
    │   │   │   ├── run_qwen_gsm8k_function_rm_grpo.sh
    │   │   │   ├── run_qwen_gsm8k_function_rm_no_rmpad.sh
    │   │   │   ├── run_qwen_gsm8k_function_rm_remax.sh
    │   │   │   ├── run_qwen_gsm8k_model_rm.sh
    │   │   │   ├── run_qwen_gsm8k_model_rm_liger_kernel.sh
    │   │   │   ├── run_qwen_gsm8k_model_rm_no_rmpad.sh
    │   │   │   ├── run_qwen_gsm8k_model_rm_seq_balance.sh
    │   │   │   ├── run_qwen_gsm8k_model_rm_ulysses.sh
    │   │   │   ├── run_qwen_megatron.sh
    │   │   │   ├── run_qwen_megatron_parallelism.sh
    │   │   │   ├── run_ray_trainer.sh
    │   │   │   ├── run_ray_trainer_fire_sampling.sh
    │   │   │   └── run_ray_trainer_rmpad.sh
    │   │   ├── generation
    │   │   │   └── run_gen_qwen05.sh
    │   │   ├── gpu_utility
    │   │   │   ├── test_memory_buffers.py
    │   │   │   ├── test_ops.py
    │   │   │   └── test_torch_functional.py
    │   │   ├── kill_github_tests.sh
    │   │   ├── model
    │   │   │   ├── test_transformer.py
    │   │   │   └── test_transformers_ulysses.py
    │   │   ├── ray
    │   │   │   ├── check_worker_alive
    │   │   │   │   └── main.py
    │   │   │   ├── detached_worker
    │   │   │   │   ├── README.md
    │   │   │   │   ├── client.py
    │   │   │   │   ├── run.sh
    │   │   │   │   └── server.py
    │   │   │   ├── test_check_worker_alive.py
    │   │   │   ├── test_colocated_workers.py
    │   │   │   ├── test_data_transfer.py
    │   │   │   ├── test_driverfunc_to_worker.py
    │   │   │   ├── test_high_level_scheduling_api.py
    │   │   │   ├── test_ray_local_envs.py
    │   │   │   ├── test_rvdz.py
    │   │   │   ├── test_worker_group_basics.py
    │   │   │   └── test_worker_group_torch.py
    │   │   ├── rollout
    │   │   │   ├── run_fsdp_vllm.py
    │   │   │   ├── test_vllm_hf_loader.py
    │   │   │   └── test_vllm_spmd.py
    │   │   ├── sandbox
    │   │   │   └── test_sandbox.py
    │   │   ├── sanity
    │   │   │   ├── check_license.py
    │   │   │   └── test_import.py
    │   │   ├── sft
    │   │   │   ├── run_sft.sh
    │   │   │   ├── run_sft_qwen05_peft.sh
    │   │   │   ├── run_sft_qwen05_sp2_liger.sh
    │   │   │   ├── run_sft_sp_loss_match.sh
    │   │   │   └── test_sp_loss_match.py
    │   │   ├── utility
    │   │   │   └── test_tensor_dict_utilities.py
    │   │   └── verl
    │   │   │   └── utils
    │   │   │       └── dataset
    │   │   │           ├── test_rl_dataset.py
    │   │   │           ├── test_rm_dataset.py
    │   │   │           └── test_sft_dataset.py
    │   └── verl
    │   │   ├── __init__.py
    │   │   ├── models
    │   │       ├── README.md
    │   │       ├── __init__.py
    │   │       ├── llama
    │   │       │   ├── __init__.py
    │   │       │   └── megatron
    │   │       │   │   ├── __init__.py
    │   │       │   │   ├── checkpoint_utils
    │   │       │   │       ├── __init__.py
    │   │       │   │       ├── llama_loader.py
    │   │       │   │       └── llama_saver.py
    │   │       │   │   ├── layers
    │   │       │   │       ├── __init__.py
    │   │       │   │       ├── parallel_attention.py
    │   │       │   │       ├── parallel_decoder.py
    │   │       │   │       ├── parallel_linear.py
    │   │       │   │       ├── parallel_mlp.py
    │   │       │   │       └── parallel_rmsnorm.py
    │   │       │   │   └── modeling_llama_megatron.py
    │   │       ├── qwen2
    │   │       │   ├── __init__.py
    │   │       │   └── megatron
    │   │       │   │   ├── __init__.py
    │   │       │   │   ├── checkpoint_utils
    │   │       │   │       ├── __init__.py
    │   │       │   │       ├── qwen2_loader.py
    │   │       │   │       └── qwen2_saver.py
    │   │       │   │   ├── layers
    │   │       │   │       ├── __init__.py
    │   │       │   │       ├── parallel_attention.py
    │   │       │   │       ├── parallel_decoder.py
    │   │       │   │       ├── parallel_linear.py
    │   │       │   │       ├── parallel_mlp.py
    │   │       │   │       └── parallel_rmsnorm.py
    │   │       │   │   └── modeling_qwen2_megatron.py
    │   │       ├── registry.py
    │   │       ├── transformers
    │   │       │   ├── __init__.py
    │   │       │   ├── llama.py
    │   │       │   ├── monkey_patch.py
    │   │       │   ├── qwen2.py
    │   │       │   └── qwen2_vl.py
    │   │       └── weight_loader_registry.py
    │   │   ├── protocol.py
    │   │   ├── single_controller
    │   │       ├── __init__.py
    │   │       ├── base
    │   │       │   ├── __init__.py
    │   │       │   ├── decorator.py
    │   │       │   ├── megatron
    │   │       │   │   ├── __init__.py
    │   │       │   │   ├── worker.py
    │   │       │   │   └── worker_group.py
    │   │       │   ├── register_center
    │   │       │   │   ├── __init__.py
    │   │       │   │   └── ray.py
    │   │       │   ├── worker.py
    │   │       │   └── worker_group.py
    │   │       └── ray
    │   │       │   ├── __init__.py
    │   │       │   ├── base.py
    │   │       │   └── megatron.py
    │   │   ├── third_party
    │   │       ├── __init__.py
    │   │       └── vllm
    │   │       │   ├── __init__.py
    │   │       │   ├── vllm_spmd
    │   │       │       ├── __init__.py
    │   │       │       └── dtensor_weight_loaders.py
    │   │       │   ├── vllm_v_0_3_1
    │   │       │       ├── __init__.py
    │   │       │       ├── arg_utils.py
    │   │       │       ├── config.py
    │   │       │       ├── llm.py
    │   │       │       ├── llm_engine_sp.py
    │   │       │       ├── model_loader.py
    │   │       │       ├── model_runner.py
    │   │       │       ├── parallel_state.py
    │   │       │       ├── tokenizer.py
    │   │       │       ├── weight_loaders.py
    │   │       │       └── worker.py
    │   │       │   ├── vllm_v_0_4_2
    │   │       │       ├── __init__.py
    │   │       │       ├── arg_utils.py
    │   │       │       ├── config.py
    │   │       │       ├── dtensor_weight_loaders.py
    │   │       │       ├── hf_weight_loader.py
    │   │       │       ├── llm.py
    │   │       │       ├── llm_engine_sp.py
    │   │       │       ├── megatron_weight_loaders.py
    │   │       │       ├── model_loader.py
    │   │       │       ├── model_runner.py
    │   │       │       ├── parallel_state.py
    │   │       │       ├── spmd_gpu_executor.py
    │   │       │       ├── tokenizer.py
    │   │       │       └── worker.py
    │   │       │   ├── vllm_v_0_5_4
    │   │       │       ├── __init__.py
    │   │       │       ├── arg_utils.py
    │   │       │       ├── config.py
    │   │       │       ├── dtensor_weight_loaders.py
    │   │       │       ├── hf_weight_loader.py
    │   │       │       ├── llm.py
    │   │       │       ├── llm_engine_sp.py
    │   │       │       ├── megatron_weight_loaders.py
    │   │       │       ├── model_loader.py
    │   │       │       ├── model_runner.py
    │   │       │       ├── parallel_state.py
    │   │       │       ├── spmd_gpu_executor.py
    │   │       │       ├── tokenizer.py
    │   │       │       └── worker.py
    │   │       │   └── vllm_v_0_6_3
    │   │       │       ├── __init__.py
    │   │       │       ├── arg_utils.py
    │   │       │       ├── config.py
    │   │       │       ├── dtensor_weight_loaders.py
    │   │       │       ├── hf_weight_loader.py
    │   │       │       ├── llm.py
    │   │       │       ├── llm_engine_sp.py
    │   │       │       ├── megatron_weight_loaders.py
    │   │       │       ├── model_loader.py
    │   │       │       ├── model_runner.py
    │   │       │       ├── parallel_state.py
    │   │       │       ├── spmd_gpu_executor.py
    │   │       │       ├── tokenizer.py
    │   │       │       └── worker.py
    │   │   ├── trainer
    │   │       ├── __init__.py
    │   │       ├── config
    │   │       │   ├── evaluation.yaml
    │   │       │   ├── generation.yaml
    │   │       │   ├── ppo_megatron_trainer.yaml
    │   │       │   ├── ppo_trainer.yaml
    │   │       │   └── sft_trainer.yaml
    │   │       ├── fsdp_sft_trainer.py
    │   │       ├── main_eval.py
    │   │       ├── main_generation.py
    │   │       ├── main_ppo.py
    │   │       ├── ppo
    │   │       │   ├── __init__.py
    │   │       │   ├── core_algos.py
    │   │       │   └── ray_trainer.py
    │   │       └── runtime_env.yaml
    │   │   ├── utils
    │   │       ├── __init__.py
    │   │       ├── checkpoint
    │   │       │   ├── __init__.py
    │   │       │   ├── checkpoint_manager.py
    │   │       │   └── fsdp_checkpoint_manager.py
    │   │       ├── config.py
    │   │       ├── dataset
    │   │       │   ├── README.md
    │   │       │   ├── __init__.py
    │   │       │   ├── rl_dataset.py
    │   │       │   ├── rl_dataset_text_game.py
    │   │       │   ├── rm_dataset.py
    │   │       │   └── sft_dataset.py
    │   │       ├── debug
    │   │       │   ├── __init__.py
    │   │       │   ├── performance.py
    │   │       │   └── trajectory_tracker.py
    │   │       ├── distributed.py
    │   │       ├── flops_counter.py
    │   │       ├── fs.py
    │   │       ├── fsdp_utils.py
    │   │       ├── hdfs_io.py
    │   │       ├── import_utils.py
    │   │       ├── logger
    │   │       │   ├── __init__.py
    │   │       │   └── aggregate_logger.py
    │   │       ├── logging_utils.py
    │   │       ├── megatron
    │   │       │   ├── __init__.py
    │   │       │   ├── memory.py
    │   │       │   ├── optimizer.py
    │   │       │   ├── pipeline_parallel.py
    │   │       │   ├── sequence_parallel.py
    │   │       │   └── tensor_parallel.py
    │   │       ├── megatron_utils.py
    │   │       ├── memory_buffer.py
    │   │       ├── model.py
    │   │       ├── py_functional.py
    │   │       ├── ray_utils.py
    │   │       ├── rendezvous
    │   │       │   ├── __init__.py
    │   │       │   └── ray_backend.py
    │   │       ├── reward_score
    │   │       │   ├── __init__.py
    │   │       │   ├── geo3k.py
    │   │       │   ├── gsm8k.py
    │   │       │   ├── math.py
    │   │       │   ├── prime_code
    │   │       │   │   ├── __init__.py
    │   │       │   │   ├── testing_util.py
    │   │       │   │   └── utils.py
    │   │       │   ├── prime_math
    │   │       │   │   ├── __init__.py
    │   │       │   │   ├── grader.py
    │   │       │   │   └── math_normalize.py
    │   │       │   ├── text_game.py
    │   │       │   └── web_agent.py
    │   │       ├── seqlen_balancing.py
    │   │       ├── tokenizer.py
    │   │       ├── torch_dtypes.py
    │   │       ├── torch_functional.py
    │   │       ├── tracking.py
    │   │       └── ulysses.py
    │   │   ├── version
    │   │       └── version
    │   │   └── workers
    │   │       ├── __init__.py
    │   │       ├── actor
    │   │           ├── __init__.py
    │   │           ├── base.py
    │   │           ├── dp_actor.py
    │   │           └── megatron_actor.py
    │   │       ├── critic
    │   │           ├── __init__.py
    │   │           ├── base.py
    │   │           ├── dp_critic.py
    │   │           └── megatron_critic.py
    │   │       ├── fsdp_workers.py
    │   │       ├── megatron_workers.py
    │   │       ├── reward_manager
    │   │           ├── __init__.py
    │   │           ├── naive.py
    │   │           └── prime.py
    │   │       ├── reward_model
    │   │           ├── __init__.py
    │   │           ├── base.py
    │   │           └── megatron
    │   │           │   ├── __init__.py
    │   │           │   └── reward_model.py
    │   │       ├── rollout
    │   │           ├── __init__.py
    │   │           ├── base.py
    │   │           ├── hf_rollout.py
    │   │           ├── naive
    │   │           │   ├── __init__.py
    │   │           │   └── naive_rollout.py
    │   │           ├── tokenizer.py
    │   │           └── vllm_rollout
    │   │           │   ├── __init__.py
    │   │           │   ├── fire_vllm_rollout.py
    │   │           │   ├── vllm_rollout.py
    │   │           │   └── vllm_rollout_spmd.py
    │   │       └── sharding_manager
    │   │           ├── __init__.py
    │   │           ├── base.py
    │   │           ├── fsdp_ulysses.py
    │   │           ├── fsdp_vllm.py
    │   │           └── megatron_vllm.py
    └── webagent
    │   ├── README.md
    │   ├── agent
    │       ├── __init__.py
    │       ├── agent.py
    │       ├── prompts
    │       │   ├── __init__.py
    │       │   ├── jsons
    │       │   │   ├── p_cot_id_actree_2s.json
    │       │   │   ├── p_cot_id_actree_2s_no_na.json
    │       │   │   ├── p_cot_id_actree_3s.json
    │       │   │   ├── p_multimodal_cot_id_actree_3s.json
    │       │   │   ├── p_som_cot_id_actree_3s.json
    │       │   │   ├── refine_tao.json
    │       │   │   ├── rlvr_translate_prompt.json
    │       │   │   ├── state_prediction
    │       │   │   │   ├── rlvr_world_model_prompt.json
    │       │   │   │   ├── text_only_acctree_format.json
    │       │   │   │   └── text_only_description_with_tao_format.json
    │       │   │   └── value_function
    │       │   │   │   ├── rlvr_value_prompt.json
    │       │   │   │   ├── text_only_value_function.json
    │       │   │   │   └── text_only_value_function_likert.json
    │       │   ├── prompt_constructor.py
    │       │   ├── raw
    │       │   │   ├── p_cot_id_actree_2s.py
    │       │   │   ├── p_cot_id_actree_2s_no_na.py
    │       │   │   ├── p_cot_id_actree_3s.py
    │       │   │   ├── p_multimodal_cot_id_actree_3s.py
    │       │   │   └── p_som_cot_id_actree_3s.py
    │       │   └── to_json.py
    │       ├── value_function.py
    │       └── world_model_agent.py
    │   ├── browser_env
    │       ├── __init__.py
    │       ├── actions.py
    │       ├── async_envs.py
    │       ├── auto_login.py
    │       ├── constants.py
    │       ├── env_config.py
    │       ├── envs.py
    │       ├── helper_functions.py
    │       ├── javascript
    │       │   ├── frame_mark_elements.js
    │       │   └── frame_unmark_elements.js
    │       ├── processors.py
    │       ├── py.typed
    │       ├── trajectory.py
    │       └── utils.py
    │   ├── config_files
    │       └── wa
    │       │   └── test_webarena.raw.json
    │   ├── evaluation_harness
    │       ├── __init__.py
    │       ├── evaluators.py
    │       ├── helper_functions.py
    │       └── image_utils.py
    │   ├── llms
    │       ├── __init__.py
    │       ├── lm_config.py
    │       ├── providers
    │       │   ├── gemini_utils.py
    │       │   ├── hf_utils.py
    │       │   └── openai_utils.py
    │       ├── tokenizers.py
    │       └── utils.py
    │   ├── prepare.sh
    │   ├── pyproject.toml
    │   ├── requirements.txt
    │   ├── run_for_trajectory.py
    │   ├── run_w_world_model.py
    │   ├── scripts
    │       ├── check_error_runs.py
    │       ├── collect_obs.py
    │       ├── generate_test_data.py
    │       └── parallel_run_webarena_rlvr.sh
    │   ├── setup.cfg
    │   └── setup.py
└── vid_wm
    ├── README.md
    ├── assets
        └── vid_wm.png
    ├── ivideogpt
        ├── .gitignore
        ├── configs
        │   ├── ctx_vae256
        │   │   └── config.json
        │   ├── ctx_vae64
        │   │   └── config.json
        │   └── vgpt
        │   │   ├── ctx_llama_small.json
        │   │   ├── frac_action_ranges.pth
        │   │   ├── llama.json
        │   │   └── llama_small.json
        ├── eval_runenv.py
        ├── eval_vgpt.py
        ├── eval_vgpt_multiturn.py
        ├── ivideogpt
        │   ├── ctx_tokenizer
        │   │   ├── __init__.py
        │   │   ├── compressive_vq_model_fsq.py
        │   │   ├── conditional_vae.py
        │   │   └── vae.py
        │   ├── data
        │   │   ├── __init__.py
        │   │   ├── dataset_mixes.py
        │   │   ├── simple_dataloader.py
        │   │   └── sthsth_dataloader.py
        │   ├── processor.py
        │   ├── tokenizer
        │   │   ├── __init__.py
        │   │   ├── vae.py
        │   │   └── vq_model.py
        │   └── utils
        │   │   ├── discriminator.py
        │   │   ├── finite_scalar_quantize.py
        │   │   ├── lpips.py
        │   │   └── video_metric.py
        ├── rt1_inference.py
        ├── scripts
        │   ├── eval_multi_step_prediction.sh
        │   ├── eval_policy.sh
        │   ├── eval_single_step_prediction.sh
        │   ├── train_compressive_tokenizer.sh
        │   ├── train_multi_step_prediction.sh
        │   ├── train_perframe_tokenizer.sh
        │   └── train_single_step_prediction.sh
        ├── train_ctx_tokenizer.py
        ├── train_tokenizer.py
        ├── train_vgpt.py
        └── transform_vgpt_checkpoint.py
    ├── oxe_data_converter.py
    ├── requirements.txt
    └── verl
        ├── .gitignore
        ├── .readthedocs.yaml
        ├── .style.yapf
        ├── LICENSE
        ├── Notice.txt
        ├── README.md
        ├── docker
            ├── Dockerfile.megatron
            ├── Dockerfile.ngc.vllm
            ├── Dockerfile.ngc.vllm0.8
            ├── Dockerfile.ngc.vllm0.8.sagemaker
            ├── Dockerfile.rocm
            ├── Dockerfile.sglang
            └── Dockerfile.vemlp.vllm.te
        ├── docs
            ├── Makefile
            ├── README.md
            ├── README_vllm0.7.md
            ├── README_vllm0.8.md
            ├── _static
            │   └── logo.png
            ├── advance
            │   ├── checkpoint.rst
            │   ├── dpo_extension.rst
            │   ├── fsdp_extension.rst
            │   ├── megatron_extension.rst
            │   └── placement.rst
            ├── amd_tutorial
            │   └── amd_build_dockerfile_page.rst
            ├── conf.py
            ├── data.rst
            ├── examples
            │   ├── config.rst
            │   ├── gsm8k_example.rst
            │   └── ppo_code_architecture.rst
            ├── experiment
            │   └── ppo.rst
            ├── faq
            │   └── faq.rst
            ├── hybrid_flow.rst
            ├── index.rst
            ├── perf
            │   └── perf_tuning.rst
            ├── preparation
            │   ├── prepare_data.rst
            │   └── reward_function.rst
            ├── requirements-docs.txt
            ├── start
            │   ├── install.rst
            │   ├── multinode.rst
            │   └── quickstart.rst
            └── workers
            │   ├── fsdp_workers.rst
            │   ├── megatron_workers.rst
            │   └── ray_trainer.rst
        ├── examples
            ├── checkpoint
            │   ├── run_deepseek_megatron_ckpt.sh
            │   └── run_qwen_megatron_ckpt.sh
            ├── data_preprocess
            │   ├── full_hh_rlhf.py
            │   ├── geo3k.py
            │   ├── gsm8k.py
            │   ├── hellaswag.py
            │   ├── math_dataset.py
            │   └── multiturn.py
            ├── generation
            │   ├── run_deepseek7b_mutli_node.sh
            │   └── run_deepseek_v2_lite_math.sh
            ├── grpo_trainer
            │   ├── run_ctx_msp_vgpt.sh
            │   ├── run_deepseek7b_llm.sh
            │   ├── run_deepseek7b_llm_math.sh
            │   ├── run_deepseek7b_llm_math_megatron.sh
            │   ├── run_deepseek7b_llm_megatron.sh
            │   ├── run_deepseek7b_llm_seq_balance.sh
            │   ├── run_qwen2-7b.sh
            │   ├── run_qwen2-7b_math.sh
            │   ├── run_qwen2-7b_math_megatron.sh
            │   ├── run_qwen2-7b_megatron.sh
            │   ├── run_qwen2-7b_seq_balance.sh
            │   ├── run_qwen2_5_vl-7b.sh
            │   └── run_vgpt.sh
            ├── ppo_trainer
            │   ├── run_deepseek7b_llm.sh
            │   ├── run_deepseek7b_llm_modelscope.sh
            │   ├── run_deepseek7b_llm_sp2.sh
            │   ├── run_deepseek_full_hh_rlhf.sh
            │   ├── run_deepseek_math_gsm8k_megatron.sh
            │   ├── run_deepseek_megatron.sh
            │   ├── run_gemma.sh
            │   ├── run_qwen2-7b_math_gsm8k_megatron.sh
            │   ├── run_qwen2-7b_megatron.sh
            │   ├── run_qwen2-7b_rm.sh
            │   ├── run_qwen2-7b_rm_seq_balance.sh
            │   ├── run_qwen2-7b_seq_balance.sh
            │   ├── run_qwen2-7b_sglang_seq_balance.sh
            │   ├── run_qwen2.5-32b.sh
            │   └── verl_getting_started.ipynb
            ├── ray
            │   └── tutorial.ipynb
            ├── reinforce_plus_plus_trainer
            │   ├── run_qwen2-7b_math_rf.sh
            │   └── run_qwen2-7b_math_rf_baseline.sh
            ├── remax_trainer
            │   ├── run_qwen2.5-3b_seq_balance.sh
            │   └── run_qwen2.5-7b_seq_balance.sh
            ├── rloo_trainer
            │   └── run_qwen2-7b.sh
            ├── sft
            │   ├── gsm8k
            │   │   ├── run_deepseek_6b7.sh
            │   │   ├── run_gemma_2b.sh
            │   │   ├── run_gemma_7b.sh
            │   │   ├── run_qwen_05_peft.sh
            │   │   ├── run_qwen_05_sp2.sh
            │   │   └── run_qwen_05_sp2_liger.sh
            │   ├── multiturn
            │   │   └── run_qwen_05_sp2.sh
            │   └── videogpt
            │   │   └── run.sh
            ├── slurm
            │   └── ray_on_slurm.slurm
            └── split_placement
            │   ├── README.md
            │   ├── config
            │       └── ppo_trainer_split.yaml
            │   ├── main_ppo_split.py
            │   ├── run_deepseek7b_llm.sh
            │   └── split_monkey_patch.py
        ├── ivideogpt
            ├── configs
            │   ├── frac_action_ranges.pth
            │   ├── llama.json
            │   └── llama_small.json
            ├── ctx_tokenizer
            │   ├── compressive_vq_model.py
            │   ├── conditional_vae.py
            │   └── vae.py
            ├── data
            │   ├── __init__.py
            │   ├── dataset_mixes.py
            │   ├── simple_dataloader.py
            │   └── sthsth_dataloader.py
            ├── lpips.py
            ├── processor.py
            ├── scripts
            │   └── summarize_action_ranges.py
            └── tokenizer
            │   ├── __init__.py
            │   ├── finite_scalar_quantize.py
            │   ├── vae.py
            │   └── vq_model.py
        ├── merge_sharded_ckpts.py
        ├── patches
            └── megatron_v4.patch
        ├── pyproject.toml
        ├── recipe
            ├── dapo
            │   ├── README.md
            │   ├── prepare_dapo_data.sh
            │   ├── run_dapo_early_qwen2.5_32b.sh
            │   ├── run_dapo_qwen2.5_32b.sh
            │   ├── src
            │   │   ├── config
            │   │   │   └── dapo_trainer.yaml
            │   │   ├── dapo_ray_trainer.py
            │   │   └── main_dapo.py
            │   └── test_dapo_7b.sh
            ├── prime
            │   ├── __init__.py
            │   ├── config
            │   │   └── prime_trainer.yaml
            │   ├── main_prime.py
            │   ├── prime_core_algos.py
            │   ├── prime_dp_rm.py
            │   ├── prime_fsdp_workers.py
            │   ├── prime_ray_trainer.py
            │   └── run_prime_qwen.sh
            └── r1
            │   ├── README.md
            │   ├── __init__.py
            │   ├── config
            │       └── evaluation.yaml
            │   ├── data_process.py
            │   ├── main_eval.py
            │   ├── reward_score.py
            │   ├── run_r1_distill_qwen.sh
            │   └── tasks
            │       ├── __init__.py
            │       ├── gpqa.py
            │       ├── livecodebench.py
            │       └── math.py
        ├── requirements.txt
        ├── requirements_sglang.txt
        ├── scripts
            ├── diagnose.py
            ├── format.sh
            └── model_merger.py
        ├── setup.py
        ├── tests
            ├── __init__.py
            ├── checkpoint
            │   ├── run_deepseek_megatron_ckpt.sh
            │   ├── run_qwen_megatron_ckpt.sh
            │   └── test_fsdp_ckpt.py
            ├── distributed
            │   ├── run_all.sh
            │   └── test_tensor_dict.py
            ├── e2e
            │   ├── __init__.py
            │   ├── arithmetic_sequence
            │   │   ├── data
            │   │   │   └── create_dataset.py
            │   │   ├── model
            │   │   │   ├── config.json
            │   │   │   ├── create_model_tokenizer.py
            │   │   │   ├── generation_config.json
            │   │   │   ├── model.safetensors
            │   │   │   └── tokenizer_config.json
            │   │   └── rl
            │   │   │   ├── README.md
            │   │   │   └── main_trainer.py
            │   ├── check_custom_rwd_fn.py
            │   ├── check_results.py
            │   ├── envs
            │   │   ├── __init__.py
            │   │   └── digit_completion
            │   │   │   ├── __init__.py
            │   │   │   ├── task.py
            │   │   │   └── tokenizer.py
            │   ├── run_deepseek_grpo.sh
            │   ├── run_deepseek_grpo_megatron.sh
            │   ├── run_deepseek_megatron.sh
            │   ├── run_deepseek_megatron_parallelism.sh
            │   ├── run_qwen2vl_geo3k_function_rm.sh
            │   ├── run_qwen_grpo.sh
            │   ├── run_qwen_grpo_megatron.sh
            │   ├── run_qwen_gsm8k_custom_function_rm.sh
            │   ├── run_qwen_gsm8k_dapo.sh
            │   ├── run_qwen_gsm8k_function_rm.sh
            │   ├── run_qwen_gsm8k_function_rm_both_kl.sh
            │   ├── run_qwen_gsm8k_function_rm_grpo.sh
            │   ├── run_qwen_gsm8k_function_rm_no_rmpad.sh
            │   ├── run_qwen_gsm8k_function_rm_remax.sh
            │   ├── run_qwen_gsm8k_model_rm.sh
            │   ├── run_qwen_gsm8k_model_rm_liger_kernel.sh
            │   ├── run_qwen_gsm8k_model_rm_no_rmpad.sh
            │   ├── run_qwen_gsm8k_model_rm_seq_balance.sh
            │   ├── run_qwen_gsm8k_model_rm_ulysses.sh
            │   ├── run_qwen_gsm8k_prime.sh
            │   ├── run_qwen_megatron.sh
            │   ├── run_qwen_megatron_parallelism.sh
            │   ├── run_r1_distill_qwen_aime24_eval.sh
            │   ├── run_ray_trainer.sh
            │   ├── run_ray_trainer_fire_sampling.sh
            │   └── run_ray_trainer_rmpad.sh
            ├── generation
            │   └── run_gen_qwen05.sh
            ├── gpu_utility
            │   ├── test_memory_buffers.py
            │   ├── test_ops.py
            │   └── test_torch_functional.py
            ├── kill_github_tests.sh
            ├── model
            │   ├── test_transformer.py
            │   └── test_transformers_ulysses.py
            ├── ray
            │   ├── check_worker_alive
            │   │   └── main.py
            │   ├── detached_worker
            │   │   ├── README.md
            │   │   ├── client.py
            │   │   ├── run.sh
            │   │   └── server.py
            │   ├── test_check_worker_alive.py
            │   ├── test_colocated_workers.py
            │   ├── test_data_transfer.py
            │   ├── test_driverfunc_to_worker.py
            │   ├── test_high_level_scheduling_api.py
            │   ├── test_ray_local_envs.py
            │   ├── test_rvdz.py
            │   ├── test_worker_group_basics.py
            │   └── test_worker_group_torch.py
            ├── rollout
            │   ├── run_fsdp_vllm.py
            │   ├── test_sglang_spmd.py
            │   ├── test_vllm_hf_loader.py
            │   └── test_vllm_spmd.py
            ├── sandbox
            │   └── test_sandbox.py
            ├── sanity
            │   ├── check_license.py
            │   └── test_import.py
            ├── sft
            │   ├── run_sft.sh
            │   ├── run_sft_multiturn.sh
            │   ├── run_sft_qwen05_peft.sh
            │   ├── run_sft_qwen05_sp2_liger.sh
            │   ├── run_sft_sp_loss_match.sh
            │   └── test_sp_loss_match.py
            ├── utility
            │   └── test_tensor_dict_utilities.py
            └── verl
            │   └── utils
            │       └── dataset
            │           ├── test_multiturn_sft_dataset.py
            │           ├── test_rl_dataset.py
            │           ├── test_rm_dataset.py
            │           └── test_sft_dataset.py
        └── verl
            ├── __init__.py
            ├── models
                ├── README.md
                ├── __init__.py
                ├── llama
                │   ├── __init__.py
                │   └── megatron
                │   │   ├── __init__.py
                │   │   ├── checkpoint_utils
                │   │       ├── __init__.py
                │   │       ├── llama_loader.py
                │   │       ├── llama_loader_depracated.py
                │   │       └── llama_saver.py
                │   │   ├── layers
                │   │       ├── __init__.py
                │   │       ├── parallel_attention.py
                │   │       ├── parallel_decoder.py
                │   │       ├── parallel_linear.py
                │   │       ├── parallel_mlp.py
                │   │       └── parallel_rmsnorm.py
                │   │   └── modeling_llama_megatron.py
                ├── mcore
                │   ├── __init__.py
                │   ├── gpt_model.py
                │   ├── loader.py
                │   └── saver.py
                ├── qwen2
                │   ├── __init__.py
                │   └── megatron
                │   │   ├── __init__.py
                │   │   ├── checkpoint_utils
                │   │       ├── __init__.py
                │   │       ├── qwen2_loader.py
                │   │       ├── qwen2_loader_depracated.py
                │   │       └── qwen2_saver.py
                │   │   ├── layers
                │   │       ├── __init__.py
                │   │       ├── parallel_attention.py
                │   │       ├── parallel_decoder.py
                │   │       ├── parallel_linear.py
                │   │       ├── parallel_mlp.py
                │   │       └── parallel_rmsnorm.py
                │   │   └── modeling_qwen2_megatron.py
                ├── registry.py
                ├── transformers
                │   ├── __init__.py
                │   ├── llama.py
                │   ├── monkey_patch.py
                │   ├── qwen2.py
                │   └── qwen2_vl.py
                └── weight_loader_registry.py
            ├── protocol.py
            ├── single_controller
                ├── __init__.py
                ├── base
                │   ├── __init__.py
                │   ├── decorator.py
                │   ├── megatron
                │   │   ├── __init__.py
                │   │   ├── worker.py
                │   │   └── worker_group.py
                │   ├── register_center
                │   │   ├── __init__.py
                │   │   └── ray.py
                │   ├── worker.py
                │   └── worker_group.py
                └── ray
                │   ├── __init__.py
                │   ├── base.py
                │   └── megatron.py
            ├── third_party
                ├── __init__.py
                ├── sglang
                │   ├── __init__.py
                │   └── parallel_state.py
                └── vllm
                │   ├── __init__.py
                │   ├── vllm_v_0_3_1
                │       ├── __init__.py
                │       ├── arg_utils.py
                │       ├── config.py
                │       ├── llm.py
                │       ├── llm_engine_sp.py
                │       ├── model_loader.py
                │       ├── model_runner.py
                │       ├── parallel_state.py
                │       ├── tokenizer.py
                │       ├── weight_loaders.py
                │       └── worker.py
                │   ├── vllm_v_0_4_2
                │       ├── __init__.py
                │       ├── arg_utils.py
                │       ├── config.py
                │       ├── dtensor_weight_loaders.py
                │       ├── hf_weight_loader.py
                │       ├── llm.py
                │       ├── llm_engine_sp.py
                │       ├── megatron_weight_loaders.py
                │       ├── model_loader.py
                │       ├── model_runner.py
                │       ├── parallel_state.py
                │       ├── spmd_gpu_executor.py
                │       ├── tokenizer.py
                │       └── worker.py
                │   ├── vllm_v_0_5_4
                │       ├── __init__.py
                │       ├── arg_utils.py
                │       ├── config.py
                │       ├── dtensor_weight_loaders.py
                │       ├── hf_weight_loader.py
                │       ├── llm.py
                │       ├── llm_engine_sp.py
                │       ├── megatron_weight_loaders.py
                │       ├── model_loader.py
                │       ├── model_runner.py
                │       ├── parallel_state.py
                │       ├── spmd_gpu_executor.py
                │       ├── tokenizer.py
                │       └── worker.py
                │   └── vllm_v_0_6_3
                │       ├── __init__.py
                │       ├── arg_utils.py
                │       ├── config.py
                │       ├── dtensor_weight_loaders.py
                │       ├── hf_weight_loader.py
                │       ├── llm.py
                │       ├── llm_engine_sp.py
                │       ├── megatron_weight_loaders.py
                │       ├── model_loader.py
                │       ├── model_runner.py
                │       ├── parallel_state.py
                │       ├── spmd_gpu_executor.py
                │       ├── tokenizer.py
                │       └── worker.py
            ├── trainer
                ├── __init__.py
                ├── config
                │   ├── evaluation.yaml
                │   ├── generation.yaml
                │   ├── ppo_megatron_trainer.yaml
                │   ├── ppo_trainer.yaml
                │   ├── sft_trainer.yaml
                │   └── vgpt_ppo_trainer.yaml
                ├── fsdp_sft_trainer.py
                ├── main_eval.py
                ├── main_generation.py
                ├── main_ppo.py
                ├── main_vgpt_ppo.py
                ├── ppo
                │   ├── __init__.py
                │   ├── core_algos.py
                │   ├── metric_utils.py
                │   └── ray_trainer.py
                └── runtime_env.yaml
            ├── utils
                ├── __init__.py
                ├── checkpoint
                │   ├── __init__.py
                │   ├── checkpoint_manager.py
                │   ├── fsdp_checkpoint_manager.py
                │   └── megatron_checkpoint_manager.py
                ├── config.py
                ├── dataset
                │   ├── README.md
                │   ├── __init__.py
                │   ├── multiturn_sft_dataset.py
                │   ├── rl_dataset.py
                │   ├── rm_dataset.py
                │   └── sft_dataset.py
                ├── debug
                │   ├── __init__.py
                │   ├── performance.py
                │   └── trajectory_tracker.py
                ├── distributed.py
                ├── flops_counter.py
                ├── fs.py
                ├── fsdp_utils.py
                ├── hdfs_io.py
                ├── import_utils.py
                ├── logger
                │   ├── __init__.py
                │   └── aggregate_logger.py
                ├── logging_utils.py
                ├── megatron
                │   ├── __init__.py
                │   ├── memory.py
                │   ├── optimizer.py
                │   ├── pipeline_parallel.py
                │   ├── sequence_parallel.py
                │   └── tensor_parallel.py
                ├── megatron_utils.py
                ├── memory_buffer.py
                ├── model.py
                ├── py_functional.py
                ├── ray_utils.py
                ├── rendezvous
                │   ├── __init__.py
                │   └── ray_backend.py
                ├── reward_score
                │   ├── __init__.py
                │   ├── geo3k.py
                │   ├── gsm8k.py
                │   ├── math.py
                │   ├── math_dapo.py
                │   ├── math_verify.py
                │   ├── prime_code
                │   │   ├── __init__.py
                │   │   ├── testing_util.py
                │   │   └── utils.py
                │   └── prime_math
                │   │   ├── __init__.py
                │   │   ├── grader.py
                │   │   └── math_normalize.py
                ├── seqlen_balancing.py
                ├── tokenizer.py
                ├── torch_dtypes.py
                ├── torch_functional.py
                ├── tracking.py
                └── ulysses.py
            ├── version
                └── version
            └── workers
                ├── __init__.py
                ├── actor
                    ├── __init__.py
                    ├── base.py
                    ├── dp_actor.py
                    └── megatron_actor.py
                ├── critic
                    ├── __init__.py
                    ├── base.py
                    ├── dp_critic.py
                    └── megatron_critic.py
                ├── fsdp_workers.py
                ├── megatron_workers.py
                ├── reward_manager
                    ├── __init__.py
                    ├── dapo.py
                    ├── naive.py
                    └── prime.py
                ├── reward_model
                    ├── __init__.py
                    ├── base.py
                    └── megatron
                    │   ├── __init__.py
                    │   └── reward_model.py
                ├── rollout
                    ├── __init__.py
                    ├── base.py
                    ├── hf_rollout.py
                    ├── naive
                    │   ├── __init__.py
                    │   └── naive_rollout.py
                    ├── sglang_rollout
                    │   ├── __init__.py
                    │   └── sglang_rollout.py
                    ├── tokenizer.py
                    └── vllm_rollout
                    │   ├── __init__.py
                    │   ├── fire_vllm_rollout.py
                    │   ├── vllm_rollout.py
                    │   └── vllm_rollout_spmd.py
                └── sharding_manager
                    ├── __init__.py
                    ├── base.py
                    ├── fsdp_sglang.py
                    ├── fsdp_ulysses.py
                    ├── fsdp_vllm.py
                    └── megatron_vllm.py


/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | /.idea
3 | __pycache__
4 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2025 THUML @ Tsinghua University
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/assets/concept.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thuml/RLVR-World/66bce39e195ea3521025d94bdd82eafe8823dc45/assets/concept.png


--------------------------------------------------------------------------------
/assets/showcase.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thuml/RLVR-World/66bce39e195ea3521025d94bdd82eafe8823dc45/assets/showcase.png


--------------------------------------------------------------------------------
/lang_wm/assets/lang_wm.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thuml/RLVR-World/66bce39e195ea3521025d94bdd82eafe8823dc45/lang_wm/assets/lang_wm.png


--------------------------------------------------------------------------------
/lang_wm/verl/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 | 


--------------------------------------------------------------------------------
/lang_wm/verl/.readthedocs.yaml:
--------------------------------------------------------------------------------
 1 | # Read the Docs configuration file
 2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
 3 | 
 4 | version: 2
 5 | 
 6 | build:
 7 |   os: ubuntu-22.04
 8 |   tools:
 9 |     python: "3.11"
10 |     rust: "1.70"
11 | 
12 | sphinx:
13 |   configuration: docs/conf.py
14 | 
15 | python:
16 |   install:
17 |     - requirements: docs/requirements-docs.txt
18 |     - method: pip
19 |       path: .
20 | 


--------------------------------------------------------------------------------
/lang_wm/verl/.style.yapf:
--------------------------------------------------------------------------------
1 | [style]
2 | based_on_style = google
3 | column_limit = 120
4 | indent_width = 4
5 | split_arguments_when_comma_terminated: true


--------------------------------------------------------------------------------
/lang_wm/verl/Notice.txt:
--------------------------------------------------------------------------------
1 | Copyright 2023-2024 Bytedance Ltd. and/or its affiliates 


--------------------------------------------------------------------------------
/lang_wm/verl/docker/Dockerfile.megatron:
--------------------------------------------------------------------------------
1 | FROM verlai/verl:vemlp-th2.4.0-cu124-vllm0.6.3-ray2.10-te1.7-v0.0.3
2 | 
3 | RUN pip install git+https://github.com/NVIDIA/TransformerEngine.git@stable
4 | 
5 | RUN cd /opt/nvidia && git clone --single-branch --branch core_r0.11.0 https://github.com/NVIDIA/Megatron-LM.git Megatron-LM
6 | 
7 | # only config pip index with https://pypi.tuna.tsinghua.edu.cn/simple if needed
8 | # unset for now
9 | RUN cd /opt/nvidia/Megatron-LM && pip3 install --no-deps -e .


--------------------------------------------------------------------------------
/lang_wm/verl/docker/Dockerfile.rocm:
--------------------------------------------------------------------------------
 1 | #  Build the docker in the repo dir:
 2 | # docker build -f docker/Dockerfile.rocm -t verl-rocm:03.04.2015 .
 3 | # docker images # you can find your built docker
 4 | 
 5 | 
 6 | FROM rocm/vllm:rocm6.2_mi300_ubuntu20.04_py3.9_vllm_0.6.4
 7 | 
 8 | # Set working directory
 9 | # WORKDIR $PWD/app
10 | 
11 | # Set environment variables
12 | ENV PYTORCH_ROCM_ARCH="gfx90a;gfx942"
13 | 
14 | # Install vllm
15 | RUN pip uninstall -y vllm && \
16 |     rm -rf vllm && \
17 |     git clone -b v0.6.3 https://github.com/vllm-project/vllm.git && \
18 |     cd vllm && \
19 |     MAX_JOBS=$(nproc) python3 setup.py install && \
20 |     cd .. && \
21 |     rm -rf vllm
22 | 
23 | # Copy the entire project directory
24 | COPY . .
25 | 
26 | # Install dependencies
27 | RUN pip install "tensordict<0.6" --no-deps && \
28 |     pip install accelerate \
29 |     codetiming \
30 |     datasets \
31 |     dill \
32 |     hydra-core \
33 |     liger-kernel \
34 |     numpy \
35 |     pandas \
36 |     peft \
37 |     "pyarrow>=15.0.0" \
38 |     pylatexenc \
39 |     "ray[data,train,tune,serve]" \
40 |     torchdata \
41 |     transformers \
42 |     wandb \
43 |     orjson \
44 |     pybind11 && \
45 |     pip install -e . --no-deps


--------------------------------------------------------------------------------
/lang_wm/verl/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line.
 5 | SPHINXOPTS    =
 6 | SPHINXBUILD   = sphinx-build
 7 | SPHINXPROJ    = verl
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/lang_wm/verl/docs/README.md:
--------------------------------------------------------------------------------
 1 | # verl documents
 2 | 
 3 | ## Build the docs
 4 | 
 5 | ```bash
 6 | # Install dependencies.
 7 | pip install -r requirements-docs.txt
 8 | 
 9 | # Build the docs.
10 | make clean
11 | make html
12 | ```
13 | 
14 | ## Open the docs with your browser
15 | 
16 | ```bash
17 | python -m http.server -d _build/html/
18 | ```
19 | Launch your browser and open localhost:8000.


--------------------------------------------------------------------------------
/lang_wm/verl/docs/_static/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thuml/RLVR-World/66bce39e195ea3521025d94bdd82eafe8823dc45/lang_wm/verl/docs/_static/logo.png


--------------------------------------------------------------------------------
/lang_wm/verl/docs/advance/placement.rst:
--------------------------------------------------------------------------------
 1 | Ray API Design Tutorial
 2 | =======================================
 3 | 
 4 | We provide a tutorial for our Ray API design, including:
 5 | 
 6 | - Ray basic concepts
 7 | - Resource Pool and RayWorkerGroup
 8 | - Data Dispatch, Execution and Collection
 9 | - Initialize the RayWorkerGroup and execute the distributed computation in the given Resource Pool
10 | 
11 | See details in `tutorial.ipynb <https://github.com/volcengine/verl/blob/main/examples/ray/tutorial.ipynb>`_.


--------------------------------------------------------------------------------
/lang_wm/verl/docs/requirements-docs.txt:
--------------------------------------------------------------------------------
 1 | # markdown suport
 2 | recommonmark
 3 | # markdown table suport
 4 | sphinx-markdown-tables
 5 | 
 6 | # theme default rtd
 7 | 
 8 | # crate-docs-theme
 9 | sphinx-rtd-theme
10 | 
11 | # pin tokenizers version to avoid env_logger version req
12 | tokenizers==0.19.1
13 | 


--------------------------------------------------------------------------------
/lang_wm/verl/examples/generation/run_deepseek_v2_lite_math.sh:
--------------------------------------------------------------------------------
 1 | python3 -m verl.trainer.main_generation \
 2 |     trainer.nnodes=1 \
 3 |     trainer.n_gpus_per_node=8 \
 4 |     data.path=~/data/rlhf/gsm8k/test.parquet \
 5 |     data.prompt_key=prompt \
 6 |     data.n_samples=1 \
 7 |     data.output_path=~/data/rlhf/math/deepseek_v2_lite_gen_test.parquet \
 8 |     model.path=deepseek-ai/deepseek-llm-7b-chat \
 9 |     +model.trust_remote_code=True \
10 |     rollout.temperature=1.0 \
11 |     rollout.top_k=50 \
12 |     rollout.top_p=0.7 \
13 |     rollout.prompt_length=2048 \
14 |     rollout.response_length=1024 \
15 |     rollout.tensor_model_parallel_size=2 \
16 |     rollout.gpu_memory_utilization=0.8
17 | 


--------------------------------------------------------------------------------
/lang_wm/verl/examples/sft/gsm8k/run_deepseek_6b7.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | if [ "$#" -lt 2 ]; then
 4 |     echo "Usage: run_deepseek_6b7.sh <nproc_per_node> <save_path> [other_configs...]"
 5 |     exit 1
 6 | fi
 7 | 
 8 | nproc_per_node=$1
 9 | save_path=$2
10 | 
11 | # Shift the arguments so $@ refers to the rest
12 | shift 2
13 | 
14 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \
15 |      -m verl.trainer.fsdp_sft_trainer \
16 |     data.train_files=$HOME/data/gsm8k/train.parquet \
17 |     data.val_files=$HOME/data/gsm8k/test.parquet \
18 |     data.prompt_key=extra_info \
19 |     data.response_key=extra_info \
20 |     +data.prompt_dict_keys=['question'] \
21 |     +data.response_dict_keys=['answer'] \
22 |     data.micro_batch_size_per_gpu=4 \
23 |     model.partial_pretrain=deepseek-ai/deepseek-coder-6.7b-instruct \
24 |     trainer.default_local_dir=$save_path \
25 |     trainer.project_name=gsm8k-sft \
26 |     trainer.experiment_name=gsm8k-sft-deepseek-coder-6.7b-instruct \
27 |     trainer.total_epochs=4 \
28 |     trainer.logger=['console','wandb'] \
29 |     trainer.default_hdfs_dir=null $@


--------------------------------------------------------------------------------
/lang_wm/verl/examples/sft/gsm8k/run_gemma_2b.sh:
--------------------------------------------------------------------------------
 1 | # Tested with 2 & 4 GPUs
 2 | 
 3 | set -x
 4 | 
 5 | if [ "$#" -lt 2 ]; then
 6 |     echo "Usage: run_gemma_2b.sh <nproc_per_node> <save_path> [other_configs...]"
 7 |     exit 1
 8 | fi
 9 | 
10 | nproc_per_node=$1
11 | save_path=$2
12 | 
13 | # Shift the arguments so $@ refers to the rest
14 | shift 2
15 | 
16 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \
17 |      -m verl.trainer.fsdp_sft_trainer \
18 |     data.train_files=$HOME/data/gsm8k/train.parquet \
19 |     data.val_files=$HOME/data/gsm8k/test.parquet \
20 |     data.prompt_key=extra_info \
21 |     data.response_key=extra_info \
22 |     +data.prompt_dict_keys=['question'] \
23 |     +data.response_dict_keys=['answer'] \
24 |     data.micro_batch_size_per_gpu=4 \
25 |     model.partial_pretrain=google/gemma-2b-it \
26 |     trainer.default_local_dir=$save_path \
27 |     trainer.project_name=gsm8k-sft \
28 |     trainer.experiment_name=gsm8k-sft-gemma-2b-it \
29 |     trainer.total_epochs=2 \
30 |     trainer.logger=['console','wandb'] \
31 |     trainer.default_hdfs_dir=null $@


--------------------------------------------------------------------------------
/lang_wm/verl/examples/sft/gsm8k/run_gemma_7b.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | if [ "$#" -lt 2 ]; then
 4 |     echo "Usage: run_gemma_7b.sh <nproc_per_node> <save_path> [other_configs...]"
 5 |     exit 1
 6 | fi
 7 | 
 8 | nproc_per_node=$1
 9 | save_path=$2
10 | 
11 | # Shift the arguments so $@ refers to the rest
12 | shift 2
13 | 
14 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \
15 |      -m verl.trainer.fsdp_sft_trainer \
16 |     data.train_files=$HOME/data/gsm8k/train.parquet \
17 |     data.val_files=$HOME/data/gsm8k/test.parquet \
18 |     data.prompt_key=prompt \
19 |     data.response_key=answer \
20 |     data.micro_batch_size_per_gpu=4 \
21 |     model.partial_pretrain=google/gemma-1.1-7b-it \
22 |     trainer.default_local_dir=$save_path \
23 |     trainer.project_name=gsm8k-sft \
24 |     trainer.experiment_name=gsm8k-sft-gemma-1.1-7b-it \
25 |     trainer.total_epochs=4 \
26 |     trainer.logger=['console','wandb'] \
27 |     trainer.default_hdfs_dir=null $@


--------------------------------------------------------------------------------
/lang_wm/verl/examples/sft/gsm8k/run_qwen_05_peft.sh:
--------------------------------------------------------------------------------
 1 | # Tested with 2 & 4 GPUs
 2 | 
 3 | set -x
 4 | 
 5 | if [ "$#" -lt 2 ]; then
 6 |     echo "Usage: run_qwen_05_peft.sh <nproc_per_node> <save_path> [other_configs...]"
 7 |     exit 1
 8 | fi
 9 | 
10 | nproc_per_node=$1
11 | save_path=$2
12 | 
13 | # Shift the arguments so $@ refers to the rest
14 | shift 2
15 | 
16 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \
17 |      -m verl.trainer.fsdp_sft_trainer \
18 |     data.train_files=$HOME/data/gsm8k/train.parquet \
19 |     data.val_files=$HOME/data/gsm8k/test.parquet \
20 |     data.prompt_key=extra_info \
21 |     data.response_key=extra_info \
22 |     optim.lr=1e-4 \
23 |     +data.prompt_dict_keys=['question'] \
24 |     +data.response_dict_keys=['answer'] \
25 |     data.micro_batch_size_per_gpu=4 \
26 |     model.partial_pretrain=Qwen/Qwen2.5-0.5B-Instruct \
27 |     trainer.default_local_dir=$save_path \
28 |     trainer.project_name=gsm8k-sft \
29 |     trainer.experiment_name=gsm8k-sft-qwen-2.5-0.5b-instruct \
30 |     trainer.logger=['console'] \
31 |     trainer.total_epochs=1 \
32 |     trainer.default_hdfs_dir=null $@ \
33 |     model.lora_rank=32\
34 |     model.lora_alpha=16 \
35 |     model.target_modules=all-linear
36 | 
37 |     # Or you can do this:
38 |     # model.target_modules=[q_proj,v_proj] \
39 | 


--------------------------------------------------------------------------------
/lang_wm/verl/examples/sft/gsm8k/run_qwen_05_sp2.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | if [ "$#" -lt 2 ]; then
 4 |     echo "Usage: run_qwen_05_sp2.sh <nproc_per_node> <save_path> [other_configs...]"
 5 |     exit 1
 6 | fi
 7 | 
 8 | nproc_per_node=$1
 9 | save_path=$2
10 | 
11 | # Shift the arguments so $@ refers to the rest
12 | shift 2
13 | 
14 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \
15 |      -m verl.trainer.fsdp_sft_trainer \
16 |     data.train_files=$HOME/data/gsm8k/train.parquet \
17 |     data.val_files=$HOME/data/gsm8k/test.parquet \
18 |     data.prompt_key=extra_info \
19 |     data.response_key=extra_info \
20 |     optim.lr=1e-4 \
21 |     +data.prompt_dict_keys=['question'] \
22 |     +data.response_dict_keys=['answer'] \
23 |     data.micro_batch_size=4 \
24 |     model.partial_pretrain=Qwen/Qwen2.5-0.5B-Instruct \
25 |     trainer.default_local_dir=$save_path \
26 |     trainer.project_name=gsm8k-sft \
27 |     trainer.experiment_name=gsm8k-sft-qwen-2.5-0.5b-instruct-sp2 \
28 |     trainer.logger=['console'] \
29 |     trainer.total_training_steps=1 \
30 |     trainer.default_hdfs_dir=null $@ \
31 |     ulysses_sequence_parallel_size=2 \
32 |     use_remove_padding=true
33 | 


--------------------------------------------------------------------------------
/lang_wm/verl/examples/sft/gsm8k/run_qwen_05_sp2_liger.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | if [ "$#" -lt 2 ]; then
 4 |     echo "Usage: run_qwen_05_sp2.sh <nproc_per_node> <save_path> [other_configs...]"
 5 |     exit 1
 6 | fi
 7 | 
 8 | nproc_per_node=$1
 9 | save_path=$2
10 | 
11 | # Shift the arguments so $@ refers to the rest
12 | shift 2
13 | 
14 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \
15 |      -m verl.trainer.fsdp_sft_trainer \
16 |     data.train_files=$HOME/data/gsm8k/train.parquet \
17 |     data.val_files=$HOME/data/gsm8k/test.parquet \
18 |     data.prompt_key=extra_info \
19 |     data.response_key=extra_info \
20 |     optim.lr=1e-4 \
21 |     +data.prompt_dict_keys=['question'] \
22 |     +data.response_dict_keys=['answer'] \
23 |     data.micro_batch_size=4 \
24 |     model.partial_pretrain=Qwen/Qwen2.5-0.5B-Instruct \
25 |     model.use_liger=True \
26 |     trainer.default_local_dir=$save_path \
27 |     trainer.project_name=gsm8k-sft \
28 |     trainer.experiment_name=gsm8k-sft-qwen-2.5-0.5b-instruct-sp2-liger \
29 |     trainer.logger=['console'] \
30 |     trainer.default_hdfs_dir=null $@ \
31 |     ulysses_sequence_parallel_size=2 \
32 |     use_remove_padding=true
33 | 


--------------------------------------------------------------------------------
/lang_wm/verl/examples/sft/text_game/run_text_game_sft.sh:
--------------------------------------------------------------------------------
 1 | # using 4x80G A100 GPUs
 2 | 
 3 | torchrun --standalone --nnodes=1 --nproc_per_node=4 \
 4 |     -m verl.trainer.fsdp_sft_trainer \
 5 |     data.train_files=thuml/bytesized32-world-model-cot/generated_cot.parquet \
 6 |     data.val_files=thuml/bytesized32-world-model-cot/generated_cot.parquet \
 7 |     data.train_batch_size=16 \
 8 |     data.prompt_key=prompt \
 9 |     data.response_key=reward_model \
10 |     +data.prompt_dict_keys=['content'] \
11 |     +data.response_dict_keys=['ground_truth'] \
12 |     data.micro_batch_size_per_gpu=1 \
13 |     data.max_length=11384 \
14 |     model.partial_pretrain=deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B \
15 |     trainer.default_local_dir=log/sft_text_game_simulator_experiment \
16 |     trainer.project_name=text_game_simulator_sft \
17 |     trainer.experiment_name=text_game_simulator_deepseek_generated_data_sft \
18 |     trainer.total_epochs=15 \
19 |     trainer.logger=['console','wandb'] \
20 |     model.lora_rank=32 \
21 |     model.lora_alpha=16 $@


--------------------------------------------------------------------------------
/lang_wm/verl/examples/sft/web_agent/run_web_agent_sft.sh:
--------------------------------------------------------------------------------
 1 | # using 8x40G A100 GPUs
 2 | 
 3 | torchrun -m --nnodes 1 --nproc_per_node=8 \
 4 |     verl.trainer.fsdp_sft_trainer \
 5 |     data.train_files=thuml/webarena-world-model-cot/train.parquet \
 6 |     data.val_files=thuml/webarena-world-model-cot/test.parquet \
 7 |     data.prompt_key=question \
 8 |     data.response_key=answer \
 9 |     data.micro_batch_size_per_gpu=1 \
10 |     model.partial_pretrain=deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B \
11 |     model.lora_rank=32 \
12 |     model.lora_alpha=16 \
13 |     trainer.default_hdfs_dir=hdfs://user/verl/experiments/webagent/DeepSeek-R1-Distill-Qwen-1.5B \
14 |     trainer.project_name=webagent-sft \
15 |     trainer.experiment_name=webagent-sft \
16 |     trainer.total_epochs=40 \
17 |     trainer.logger="['console','wandb']" \
18 |     data.train_batch_size=8 \
19 |     trainer.default_local_dir=log/webagent-sft


--------------------------------------------------------------------------------
/lang_wm/verl/merge_lora.py:
--------------------------------------------------------------------------------
1 | from transformers import AutoModelForCausalLM
2 | from peft import PeftModel
3 | 
4 | base_model = AutoModelForCausalLM.from_pretrained("DeepSeek-R1-Distill-Qwen-1.5B")
5 | peft_model_id = "log/webagent-sft-DeepSeek-R1-Distill-Qwen-1.5B/global_step_xxxxxx"  # the output path
6 | model = PeftModel.from_pretrained(base_model, peft_model_id)
7 | merged_model = model.merge_and_unload()
8 | print(type(merged_model))
9 | merged_model.save_pretrained("webagent-sft-DeepSeek-R1-Distill-Qwen-1.5B-merged-lowest")  # where to save the merged model


--------------------------------------------------------------------------------
/lang_wm/verl/requirements.txt:
--------------------------------------------------------------------------------
 1 | # requirements.txt records the full set of dependencies for development
 2 | accelerate
 3 | codetiming
 4 | datasets
 5 | dill
 6 | flash-attn
 7 | hydra-core
 8 | liger-kernel
 9 | numpy
10 | peft
11 | pyarrow>=15.0.0
12 | pybind11
13 | pylatexenc
14 | ray[data,train,tune,serve]
15 | tensordict<0.6
16 | torchdata
17 | transformers
18 | wandb
19 | json-repair
20 | dirtyjson
21 | bytes32==1.1.0
22 | openai==1.6.1
23 | tiktoken==0.5.2
24 | pandas==2.1.4
25 | plotly==5.19.0
26 | kaleido==0.2.1
27 | termcolor==2.4.0
28 | 


--------------------------------------------------------------------------------
/lang_wm/verl/scripts/format.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | pip3 install --upgrade yapf
3 | python3 -m yapf -ir -vv --style ./.style.yapf verl tests examples


--------------------------------------------------------------------------------
/lang_wm/verl/tests/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.


--------------------------------------------------------------------------------
/lang_wm/verl/tests/distributed/run_all.sh:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | #!/usr/bin/env bash
16 | 
17 | set -e -x
18 | torchrun --nproc-per-node=4 --standalone tests/distributed/test_tensor_dict.py


--------------------------------------------------------------------------------
/lang_wm/verl/tests/e2e/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/lang_wm/verl/tests/e2e/arithmetic_sequence/model/config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "architectures": [
 3 |     "LlamaForCausalLM"
 4 |   ],
 5 |   "attention_bias": false,
 6 |   "attention_dropout": 0.0,
 7 |   "bos_token_id": null,
 8 |   "eos_token_id": 1,
 9 |   "hidden_act": "silu",
10 |   "hidden_size": 128,
11 |   "initializer_range": 0.02,
12 |   "intermediate_size": 344,
13 |   "max_position_embeddings": 2048,
14 |   "mlp_bias": false,
15 |   "model_type": "llama",
16 |   "num_attention_heads": 4,
17 |   "num_hidden_layers": 4,
18 |   "num_key_value_heads": 4,
19 |   "pad_token_id": 2,
20 |   "pretraining_tp": 1,
21 |   "rms_norm_eps": 1e-06,
22 |   "rope_scaling": null,
23 |   "rope_theta": 10000.0,
24 |   "tie_word_embeddings": false,
25 |   "torch_dtype": "bfloat16",
26 |   "transformers_version": "4.43.3",
27 |   "use_cache": true,
28 |   "vocab_size": 16
29 | }
30 | 


--------------------------------------------------------------------------------
/lang_wm/verl/tests/e2e/arithmetic_sequence/model/generation_config.json:
--------------------------------------------------------------------------------
1 | {
2 |   "_from_model_config": true,
3 |   "eos_token_id": 1,
4 |   "pad_token_id": 2,
5 |   "transformers_version": "4.43.3"
6 | }
7 | 


--------------------------------------------------------------------------------
/lang_wm/verl/tests/e2e/arithmetic_sequence/model/model.safetensors:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thuml/RLVR-World/66bce39e195ea3521025d94bdd82eafe8823dc45/lang_wm/verl/tests/e2e/arithmetic_sequence/model/model.safetensors


--------------------------------------------------------------------------------
/lang_wm/verl/tests/e2e/arithmetic_sequence/model/tokenizer_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "char_ords": [
 3 |         48,
 4 |         49,
 5 |         50,
 6 |         51,
 7 |         52,
 8 |         53,
 9 |         54,
10 |         55,
11 |         56,
12 |         57,
13 |         44,
14 |         58
15 |     ],
16 |     "model_max_length": 2048,
17 |     "chat_template": "{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% set role = message['role'] %}{{ message['content'] }}{% endfor %}{% if add_generation_prompt %}{{ sep_token }}{% endif %}"
18 | }


--------------------------------------------------------------------------------
/lang_wm/verl/tests/e2e/arithmetic_sequence/rl/README.md:
--------------------------------------------------------------------------------
 1 | # Digit completion
 2 | 
 3 | This is an example of solving a digit completion problem. The problem is defined as below:
 4 | 
 5 | The prompt is a sequence of numbers with fixed difference. The agent's goal is to complete the next N numbers.
 6 | If the max number is reached, the next number should be modulo with max number.
 7 | 
 8 | For example,
 9 | - prompt = [1, 2, 3]
10 | - N = 5
11 | - max_number = 6
12 | 
13 | The response should be [4, 5, 6, 7%6, 8%6] = [4, 5, 6, 0, 1].
14 | 
15 | # Environment definition
16 | 
17 | The core definition of the task is defined in verl/envs/digit_completion/task.py
18 | 
19 | It is highly recommended to take a look at it for better understanding.
20 | 
21 | 
22 | 
23 | # Run experiments
24 | 
25 | The users are required to specify the config path and config name (and the relative model config path to the current working directory)
26 | 
27 | ```bash
28 | # cd examples/arithmetic_sequence/rl
29 | 
30 | # Specify the config path and config name (current working dir)
31 | python3 -m verl.trainer.ppo.ray_megatron_train_synchronous --config-path=$(pwd)/config --config-name='ray_megatron'
32 | 
33 | # The default relative path of model config is 'config/model_config', if you want to change it, you can rewrite it in ray_megatron.yaml or using:
34 | python3 -m verl.trainer.ppo.ray_megatron_train_synchronous --config-path=$(pwd)/config --config-name='ray_megatron' ++model.base_path=config/model_config
35 | 
36 | ```
37 | 
38 | 


--------------------------------------------------------------------------------
/lang_wm/verl/tests/e2e/envs/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .digit_completion import DigitCompletion
16 | 
17 | __all__ = ['DigitCompletion']


--------------------------------------------------------------------------------
/lang_wm/verl/tests/e2e/envs/digit_completion/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .task import DigitCompletion, generate_ground_truth_response
16 | from .tokenizer import CharTokenizer
17 | 
18 | from transformers import AutoTokenizer, LlamaConfig
19 | 
20 | AutoTokenizer.register(LlamaConfig, CharTokenizer, exist_ok=True)
21 | 
22 | __all__ = ['DigitCompletion', 'generate_ground_truth_response', 'CharTokenizer']


--------------------------------------------------------------------------------
/lang_wm/verl/tests/e2e/run_qwen_gsm8k_function_rm_grpo.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | export VLLM_ATTENTION_BACKEND=XFORMERS
 4 | 
 5 | python3 -m verl.trainer.main_ppo \
 6 |     data.train_files=$HOME/data/gsm8k/train.parquet \
 7 |     data.val_files=$HOME/data/gsm8k/test.parquet \
 8 |     data.train_batch_size=1024 \
 9 |     data.max_prompt_length=512 \
10 |     data.max_response_length=512 \
11 |     actor_rollout_ref.model.path=Qwen/Qwen2.5-0.5B \
12 |     actor_rollout_ref.actor.optim.lr=1e-6 \
13 |     actor_rollout_ref.model.use_remove_padding=True \
14 |     actor_rollout_ref.actor.ppo_mini_batch_size=256 \
15 |     actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=4 \
16 |     actor_rollout_ref.actor.fsdp_config.param_offload=False \
17 |     actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \
18 |     actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=16 \
19 |     actor_rollout_ref.rollout.tensor_model_parallel_size=2 \
20 |     actor_rollout_ref.rollout.name=vllm \
21 |     actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \
22 |     actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=16 \
23 |     actor_rollout_ref.ref.fsdp_config.param_offload=True \
24 |     algorithm.kl_ctrl.kl_coef=0.001 \
25 |     algorithm.adv_estimator=grpo \
26 |     trainer.critic_warmup=0 \
27 |     trainer.logger=['console'] \
28 |     trainer.project_name='verl_example_gsm8k' \
29 |     trainer.experiment_name='qwen_e2e_ci_function_rm' \
30 |     trainer.n_gpus_per_node=8 \
31 |     trainer.nnodes=1 \
32 |     trainer.save_freq=-1 \
33 |     trainer.total_training_steps=1 $@
34 | 


--------------------------------------------------------------------------------
/lang_wm/verl/tests/e2e/run_qwen_gsm8k_function_rm_remax.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | export VLLM_ATTENTION_BACKEND=XFORMERS
 4 | 
 5 | python3 -m verl.trainer.main_ppo \
 6 |     data.train_files=$HOME/data/gsm8k/train.parquet \
 7 |     data.val_files=$HOME/data/gsm8k/test.parquet \
 8 |     data.train_batch_size=1024 \
 9 |     data.max_prompt_length=512 \
10 |     data.max_response_length=512 \
11 |     actor_rollout_ref.model.path=Qwen/Qwen2.5-0.5B \
12 |     actor_rollout_ref.actor.optim.lr=1e-6 \
13 |     actor_rollout_ref.model.use_remove_padding=True \
14 |     actor_rollout_ref.actor.ppo_mini_batch_size=256 \
15 |     actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=4 \
16 |     actor_rollout_ref.actor.fsdp_config.param_offload=False \
17 |     actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \
18 |     actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=16 \
19 |     actor_rollout_ref.rollout.tensor_model_parallel_size=2 \
20 |     actor_rollout_ref.rollout.name=vllm \
21 |     actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \
22 |     actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=16 \
23 |     actor_rollout_ref.ref.fsdp_config.param_offload=True \
24 |     algorithm.kl_ctrl.kl_coef=0.001 \
25 |     algorithm.adv_estimator=remax \
26 |     trainer.critic_warmup=0 \
27 |     trainer.logger=['console'] \
28 |     trainer.project_name='verl_example_gsm8k' \
29 |     trainer.experiment_name='qwen_e2e_ci_function_rm' \
30 |     trainer.n_gpus_per_node=8 \
31 |     trainer.nnodes=1 \
32 |     trainer.save_freq=-1 \
33 |     trainer.total_training_steps=1 $@
34 | 


--------------------------------------------------------------------------------
/lang_wm/verl/tests/e2e/run_ray_trainer_rmpad.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -e -x
 4 | 
 5 | python3 tests/e2e/arithmetic_sequence/rl/main_trainer.py \
 6 |     data.train_files=tests/e2e/arithmetic_sequence/data/train.parquet \
 7 |     data.val_files=tests/e2e/arithmetic_sequence/data/test.parquet \
 8 |     actor_rollout_ref.model.path=tests/e2e/arithmetic_sequence/model \
 9 |     actor_rollout_ref.rollout.name=vllm \
10 |     actor_rollout_ref.rollout.tensor_model_parallel_size=1 \
11 |     actor_rollout_ref.model.tokenizer_path=tests/e2e/arithmetic_sequence/model \
12 |     critic.model.path=Qwen/Qwen2.5-0.5B \
13 |     critic.model.use_remove_padding=True \
14 |     trainer.total_epochs=1


--------------------------------------------------------------------------------
/lang_wm/verl/tests/generation/run_gen_qwen05.sh:
--------------------------------------------------------------------------------
 1 | # Tested with 1 & 4 GPUs
 2 | set -x
 3 | 
 4 | if [ "$#" -lt 2 ]; then
 5 |     echo "Usage: run_gen_qwen05.sh <nproc_per_node> <save_path> [other_configs...]"
 6 |     exit 1
 7 | fi
 8 | 
 9 | nproc_per_node=$1
10 | save_path=$2
11 | 
12 | # Shift the arguments so $@ refers to the rest
13 | shift 2
14 | 
15 | python3 -m verl.trainer.main_generation \
16 |     trainer.nnodes=1 \
17 |     trainer.n_gpus_per_node=$nproc_per_node \
18 |     data.path=$HOME/data/gsm8k/test.parquet \
19 |     data.prompt_key=prompt \
20 |     data.n_samples=1 \
21 |     data.output_path=$save_path \
22 |     model.path=Qwen/Qwen2.5-0.5B-Instruct \
23 |     +model.trust_remote_code=True \
24 |     rollout.temperature=1.0 \
25 |     rollout.top_k=50 \
26 |     rollout.top_p=0.7 \
27 |     rollout.prompt_length=2048 \
28 |     rollout.response_length=1024 \
29 |     rollout.tensor_model_parallel_size=2 \
30 |     rollout.gpu_memory_utilization=0.8
31 | 


--------------------------------------------------------------------------------
/lang_wm/verl/tests/kill_github_tests.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | if [ "$#" -ne 1 ]; then
 4 |     echo "Usage: $0 YOUR_GITHUB_TOKEN"
 5 |     echo "Please provide exactly one input argument for your github token."
 6 |     exit 1
 7 | fi
 8 | 
 9 | # Set your GitHub repository details
10 | OWNER="volcengine"
11 | REPO="verl"
12 | TOKEN=$1
13 | 
14 | # API URL for workflow runs
15 | API_URL="https://api.github.com/repos/$OWNER/$REPO/actions/runs?status=queued"
16 | 
17 | # Check required commands
18 | command -v jq >/dev/null 2>&1 || { echo "jq is required but not installed. Aborting."; exit 1; }
19 | 
20 | # Get queued workflow runs
21 | response=$(curl -s -H "Authorization: token $TOKEN" -H "Accept: application/vnd.github.v3+json" "$API_URL")
22 | 
23 | # Run this for debugging
24 | # echo $response
25 | 
26 | # Extract run IDs
27 | queued_run_ids=$(echo "$response" | jq -r '.workflow_runs[] | .id')
28 | 
29 | if [ -z "$queued_run_ids" ]; then
30 |     echo "No queued workflow runs found."
31 |     exit 0
32 | fi
33 | 
34 | # Cancel each queued run
35 | for run_id in $queued_run_ids; do
36 |     echo "Cancelling run $run_id"
37 |     cancel_url="https://api.github.com/repos/$OWNER/$REPO/actions/runs/$run_id/cancel"
38 |     curl -s -X POST -H "Authorization: token $TOKEN" -H "Accept: application/vnd.github.v3+json" "$cancel_url"
39 | done
40 | 
41 | echo "Cancelled all queued workflow runs."
42 | 


--------------------------------------------------------------------------------
/lang_wm/verl/tests/ray/detached_worker/README.md:
--------------------------------------------------------------------------------
 1 | # Detached Worker
 2 | ## How to run (Only on a single node)
 3 | - Start a local ray cluster: 
 4 | ```bash
 5 | ray start --head --port=6379
 6 | ```
 7 | - Run the server
 8 | ```bash
 9 | python3 server.py
10 | ```
11 | - On another terminal, Run the client
12 | ```bash
13 | python3 client.py
14 | ```
15 | 


--------------------------------------------------------------------------------
/lang_wm/verl/tests/ray/detached_worker/run.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | ray start --head --port=6379
3 | python3 server.py
4 | python3 client.py
5 | ray stop --force


--------------------------------------------------------------------------------
/lang_wm/verl/tests/sanity/test_import.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | def test_import():
17 |     import verl
18 |     print(verl.__version__)
19 | 
20 | 
21 | def test_single_controller_import():
22 |     import verl.single_controller
23 |     print(verl.single_controller.__version__)
24 | 


--------------------------------------------------------------------------------
/lang_wm/verl/tests/sft/run_sft.sh:
--------------------------------------------------------------------------------
 1 | # Tested with 2 & 4 GPUs
 2 | 
 3 | set -x
 4 | 
 5 | torchrun --standalone --nnodes=1 --nproc_per_node=8 \
 6 |      -m verl.trainer.fsdp_sft_trainer \
 7 |     data.train_files=$HOME/data/gsm8k/train.parquet \
 8 |     data.val_files=$HOME/data/gsm8k/test.parquet \
 9 |     data.prompt_key=extra_info \
10 |     data.response_key=extra_info \
11 |     +data.prompt_dict_keys=['question'] \
12 |     +data.response_dict_keys=['answer'] \
13 |     data.micro_batch_size_per_gpu=32 \
14 |     model.partial_pretrain=Qwen/Qwen2.5-0.5B-Instruct \
15 |     trainer.default_local_dir=$HOME/ckpts/ \
16 |     trainer.project_name=qwen2.5-sft \
17 |     trainer.experiment_name=gsm8k-sft-gemma-2b-it \
18 |     trainer.total_training_steps=1 \
19 |     trainer.logger=['console'] \
20 |     trainer.default_hdfs_dir=null $@
21 | 
22 | rm -rf $HOME/ckpts/


--------------------------------------------------------------------------------
/lang_wm/verl/tests/sft/run_sft_qwen05_peft.sh:
--------------------------------------------------------------------------------
 1 | # Tested with 2 & 4 GPUs
 2 | 
 3 | set -x
 4 | 
 5 | if [ "$#" -lt 2 ]; then
 6 |     echo "Usage: run_sft_qwen05_peft.sh <nproc_per_node> <save_path> [other_configs...]"
 7 |     exit 1
 8 | fi
 9 | 
10 | nproc_per_node=$1
11 | save_path=$2
12 | 
13 | # Shift the arguments so $@ refers to the rest
14 | shift 2
15 | 
16 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \
17 |      -m verl.trainer.fsdp_sft_trainer \
18 |     data.train_files=$HOME/data/gsm8k/train.parquet \
19 |     data.val_files=$HOME/data/gsm8k/test.parquet \
20 |     data.prompt_key=extra_info \
21 |     data.response_key=extra_info \
22 |     optim.lr=1e-4 \
23 |     +data.prompt_dict_keys=['question'] \
24 |     +data.response_dict_keys=['answer'] \
25 |     data.micro_batch_size_per_gpu=4 \
26 |     model.partial_pretrain=Qwen/Qwen2.5-0.5B-Instruct \
27 |     trainer.default_local_dir=$save_path \
28 |     trainer.project_name=gsm8k-sft \
29 |     trainer.experiment_name=gsm8k-sft-qwen-2.5-0.5b-instruct \
30 |     trainer.logger=['console'] \
31 |     trainer.total_training_steps=1 \
32 |     trainer.default_hdfs_dir=null $@ \
33 |     model.lora_rank=32\
34 |     model.lora_alpha=16 \
35 |     model.target_modules=all-linear
36 | 
37 |     # Or you can do this:
38 |     # model.target_modules=[q_proj,v_proj] \
39 | 


--------------------------------------------------------------------------------
/lang_wm/verl/tests/sft/run_sft_qwen05_sp2_liger.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | if [ "$#" -lt 2 ]; then
 4 |     echo "Usage: run_sft_qwen05_sp2_liger.sh <nproc_per_node> <save_path> [other_configs...]"
 5 |     exit 1
 6 | fi
 7 | 
 8 | nproc_per_node=$1
 9 | save_path=$2
10 | 
11 | # Shift the arguments so $@ refers to the rest
12 | shift 2
13 | 
14 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \
15 |      -m verl.trainer.fsdp_sft_trainer \
16 |     data.train_files=$HOME/data/gsm8k/train.parquet \
17 |     data.val_files=$HOME/data/gsm8k/test.parquet \
18 |     data.prompt_key=extra_info \
19 |     data.response_key=extra_info \
20 |     optim.lr=1e-4 \
21 |     +data.prompt_dict_keys=['question'] \
22 |     +data.response_dict_keys=['answer'] \
23 |     data.micro_batch_size=4 \
24 |     model.partial_pretrain=Qwen/Qwen2.5-0.5B-Instruct \
25 |     model.use_liger=True \
26 |     trainer.default_local_dir=$save_path \
27 |     trainer.project_name=gsm8k-sft \
28 |     trainer.experiment_name=gsm8k-sft-qwen-2.5-0.5b-instruct-sp2-liger \
29 |     trainer.logger=['console'] \
30 |     trainer.total_training_steps=1 \
31 |     trainer.default_hdfs_dir=null $@ \
32 |     ulysses_sequence_parallel_size=2 \
33 |     use_remove_padding=true


--------------------------------------------------------------------------------
/lang_wm/verl/tests/sft/run_sft_sp_loss_match.sh:
--------------------------------------------------------------------------------
 1 | # Tested with 2 & 4 GPUs
 2 | 
 3 | set -x
 4 | 
 5 | torchrun --standalone --nnodes=1 --nproc_per_node=8 \
 6 |     tests/sft/test_sp_loss_match.py \
 7 |     data.train_files=$HOME/data/gsm8k/train.parquet \
 8 |     data.val_files=$HOME/data/gsm8k/test.parquet \
 9 |     data.prompt_key=extra_info \
10 |     data.response_key=extra_info \
11 |     +data.prompt_dict_keys=['question'] \
12 |     +data.response_dict_keys=['answer'] \
13 |     data.micro_batch_size=32 \
14 |     model.partial_pretrain=Qwen/Qwen2.5-0.5B-Instruct \
15 |     ulysses_sequence_parallel_size=2 \
16 |     use_remove_padding=True \
17 |     trainer.default_local_dir=$HOME/ckpts/ \
18 |     trainer.project_name=qwen2.5-sft \
19 |     trainer.experiment_name=gsm8k-sft-gemma-2b-it \
20 |     trainer.total_training_steps=1 \
21 |     trainer.logger=['console'] \
22 |     trainer.default_hdfs_dir=null $@
23 | 
24 | rm -rf $HOME/ckpts/
25 | 


--------------------------------------------------------------------------------
/lang_wm/verl/verl/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import os
16 | 
17 | version_folder = os.path.dirname(os.path.join(os.path.abspath(__file__)))
18 | 
19 | with open(os.path.join(version_folder, 'version/version')) as f:
20 |     __version__ = f.read().strip()
21 | 
22 | from .protocol import DataProto
23 | 
24 | from .utils.logging_utils import set_basic_config
25 | import logging
26 | 
27 | set_basic_config(level=logging.WARNING)
28 | 
29 | from . import single_controller
30 | 
31 | __all__ = ['DataProto', "__version__"]
32 | 
33 | if os.getenv('VERL_USE_MODELSCOPE', 'False').lower() == 'true':
34 |     import importlib
35 |     if importlib.util.find_spec("modelscope") is None:
36 |         raise ImportError(f'You are using the modelscope hub, please install modelscope by `pip install modelscope -U`')
37 |     # Patch hub to download models from modelscope to speed up.
38 |     from modelscope.utils.hf_util import patch_hub
39 |     patch_hub()
40 | 


--------------------------------------------------------------------------------
/lang_wm/verl/verl/models/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/lang_wm/verl/verl/models/llama/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/lang_wm/verl/verl/models/llama/megatron/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .modeling_llama_megatron import (
16 |     # original model with megatron
17 |     ParallelLlamaModel,
18 |     ParallelLlamaForCausalLM,
19 |     # rmpad with megatron
20 |     ParallelLlamaForCausalLMRmPad,
21 |     ParallelLlamaForValueRmPad,
22 |     # rmpad with megatron and pipeline parallelism
23 |     ParallelLlamaForCausalLMRmPadPP,
24 |     ParallelLlamaForValueRmPadPP)
25 | 


--------------------------------------------------------------------------------
/lang_wm/verl/verl/models/llama/megatron/checkpoint_utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/lang_wm/verl/verl/models/llama/megatron/layers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .parallel_attention import ParallelLlamaAttention
16 | from .parallel_decoder import ParallelLlamaDecoderLayer, ParallelLlamaDecoderLayerRmPad
17 | from .parallel_mlp import ParallelLlamaMLP
18 | from .parallel_rmsnorm import ParallelLlamaRMSNorm
19 | 


--------------------------------------------------------------------------------
/lang_wm/verl/verl/models/qwen2/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/lang_wm/verl/verl/models/qwen2/megatron/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .modeling_qwen2_megatron import (
16 |     # original model with megatron
17 |     ParallelQwen2Model,
18 |     ParallelQwen2ForCausalLM,
19 |     # rmpad with megatron
20 |     ParallelQwen2ForCausalLMRmPad,
21 |     ParallelQwen2ForValueRmPad,
22 |     # rmpad with megatron and pipeline parallelism
23 |     ParallelQwen2ForCausalLMRmPadPP,
24 |     ParallelQwen2ForValueRmPadPP)
25 | 


--------------------------------------------------------------------------------
/lang_wm/verl/verl/models/qwen2/megatron/checkpoint_utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/lang_wm/verl/verl/models/qwen2/megatron/layers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .parallel_attention import ParallelQwen2Attention
16 | from .parallel_decoder import ParallelQwen2DecoderLayer, ParallelQwen2DecoderLayerRmPad
17 | from .parallel_mlp import ParallelQwen2MLP
18 | from .parallel_rmsnorm import ParallelQwen2RMSNorm
19 | 


--------------------------------------------------------------------------------
/lang_wm/verl/verl/models/transformers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/lang_wm/verl/verl/single_controller/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import os
16 | 
17 | version_folder = os.path.dirname(os.path.join(os.path.abspath(__file__)))
18 | 
19 | # Note(haibin.lin): single_controller.__version__ is deprecated
20 | with open(os.path.join(os.path.join(version_folder, os.pardir), 'version/version')) as f:
21 |     __version__ = f.read().strip()
22 | 
23 | from . import base
24 | from .base import *
25 | 
26 | __all__ = base.__all__


--------------------------------------------------------------------------------
/lang_wm/verl/verl/single_controller/base/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .worker import Worker
16 | from .worker_group import WorkerGroup, ClassWithInitArgs, ResourcePool
17 | 
18 | __all__ = ['Worker', 'WorkerGroup', 'ClassWithInitArgs', 'ResourcePool']


--------------------------------------------------------------------------------
/lang_wm/verl/verl/single_controller/base/megatron/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/lang_wm/verl/verl/single_controller/base/register_center/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/lang_wm/verl/verl/single_controller/base/register_center/ray.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import ray
16 | 
17 | 
18 | @ray.remote
19 | class WorkerGroupRegisterCenter:
20 | 
21 |     def __init__(self, rank_zero_info):
22 |         self.rank_zero_info = rank_zero_info
23 | 
24 |     def get_rank_zero_info(self):
25 |         return self.rank_zero_info
26 | 
27 | 
28 | def create_worker_group_register_center(name, info):
29 |     return WorkerGroupRegisterCenter.options(name=name).remote(info)
30 | 


--------------------------------------------------------------------------------
/lang_wm/verl/verl/single_controller/ray/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .base import RayResourcePool, RayClassWithInitArgs, RayWorkerGroup, create_colocated_worker_cls


--------------------------------------------------------------------------------
/lang_wm/verl/verl/third_party/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/lang_wm/verl/verl/third_party/vllm/vllm_spmd/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/lang_wm/verl/verl/third_party/vllm/vllm_v_0_3_1/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/lang_wm/verl/verl/third_party/vllm/vllm_v_0_4_2/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/lang_wm/verl/verl/third_party/vllm/vllm_v_0_5_4/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/lang_wm/verl/verl/third_party/vllm/vllm_v_0_6_3/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/lang_wm/verl/verl/trainer/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/lang_wm/verl/verl/trainer/config/evaluation.yaml:
--------------------------------------------------------------------------------
1 | data:
2 |   path: /tmp/math_Qwen2-7B-Instruct.parquet
3 |   prompt_key: prompt
4 |   response_key: responses
5 |   data_source_key: data_source
6 |   reward_model_key: reward_model


--------------------------------------------------------------------------------
/lang_wm/verl/verl/trainer/config/sft_trainer.yaml:
--------------------------------------------------------------------------------
 1 | data:
 2 |   train_batch_size: 256
 3 |   micro_batch_size: null # will be deprecated, use micro_batch_size_per_gpu
 4 |   micro_batch_size_per_gpu: 4  # this is also val batch size
 5 |   train_files: ~/data/gsm8k/train.parquet
 6 |   val_files: ~/data/gsm8k/test.parquet
 7 |   prompt_key: question
 8 |   response_key: answer
 9 |   max_length: 5000
10 |   truncation: error
11 |   balance_dp_token: False
12 |   chat_template: null
13 | model:
14 |   partial_pretrain: ~/models/gemma-1.1-7b-it
15 |   fsdp_config:
16 |     wrap_policy:
17 |       min_num_params: 0
18 |     cpu_offload: False
19 |     offload_params: False
20 |   external_lib: null
21 |   enable_gradient_checkpointing: False
22 |   trust_remote_code: False
23 |   lora_rank: 0  # Set to positive value to enable LoRA (e.g., 32)
24 |   lora_alpha: 16  # LoRA scaling factor
25 |   target_modules: all-linear  # Target modules for LoRA adaptation
26 |   use_liger: False
27 | optim:
28 |   lr: 1e-5
29 |   betas: [0.9, 0.95]
30 |   weight_decay: 0.01
31 |   warmup_steps_ratio: 0.1
32 |   clip_grad: 1.0
33 | ulysses_sequence_parallel_size: 1
34 | use_remove_padding: False
35 | trainer:
36 |   default_local_dir: ./log/sft_model
37 |   default_hdfs_dir: hdfs://tmp/experiments/gsm8k/gemma-1.1-7b-it/ # change the hdfs path here
38 |   resume_path: null
39 |   project_name: gsm8k-sft
40 |   experiment_name: test
41 |   total_epochs: 4
42 |   total_training_steps: null
43 |   logger: ['console']
44 |   seed: 1
45 | 
46 | 


--------------------------------------------------------------------------------
/lang_wm/verl/verl/trainer/ppo/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/lang_wm/verl/verl/trainer/runtime_env.yaml:
--------------------------------------------------------------------------------
1 | working_dir: ./
2 | excludes: ["/.git/"]
3 | env_vars:
4 |   TORCH_NCCL_AVOID_RECORD_STREAMS: "1"
5 |   VLLM_ATTENTION_BACKEND: "XFORMERS"


--------------------------------------------------------------------------------
/lang_wm/verl/verl/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from . import tokenizer
16 | from .tokenizer import hf_tokenizer, hf_processor
17 | 
18 | __all__ = tokenizer.__all__


--------------------------------------------------------------------------------
/lang_wm/verl/verl/utils/checkpoint/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.


--------------------------------------------------------------------------------
/lang_wm/verl/verl/utils/config.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from typing import Dict
16 | 
17 | from omegaconf import DictConfig
18 | 
19 | 
20 | def update_dict_with_config(dictionary: Dict, config: DictConfig):
21 |     for key in dictionary:
22 |         if hasattr(config, key):
23 |             dictionary[key] = getattr(config, key)
24 | 


--------------------------------------------------------------------------------
/lang_wm/verl/verl/utils/dataset/README.md:
--------------------------------------------------------------------------------
 1 | # Dataset Format
 2 | ## RLHF dataset
 3 | We combine all the data sources into a single parquet files. We directly organize the prompt into the chat format so that multi-turn chats can be easily incorporated. In the prompt, we may add instruction following texts to guide the model output the answers in a particular format so that we can extract the answers.
 4 | 
 5 | Math problems
 6 | ```json
 7 | {
 8 |     "data_source": "openai/gsm8k",
 9 |     "prompt": [{"role": "user", "content": "Natalia sold clips to 48 of her friends in April, and then she sold half as many clips in May. How many clips did Natalia sell altogether in April and May? Let's think step by step and output the final answer after \"####\""}],
10 |     "ability": "math",
11 |     "reward_model": {
12 |         "style": "rule",
13 |         "ground_truth": ["72"]
14 |     },
15 | }
16 | ```
17 | 


--------------------------------------------------------------------------------
/lang_wm/verl/verl/utils/dataset/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .rl_dataset import RLHFDataset
16 | from .rm_dataset import RMDataset
17 | from .sft_dataset import SFTDataset
18 | 


--------------------------------------------------------------------------------
/lang_wm/verl/verl/utils/debug/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .performance import log_gpu_memory_usage


--------------------------------------------------------------------------------
/lang_wm/verl/verl/utils/debug/performance.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import torch
16 | import torch.distributed as dist
17 | import logging
18 | 
19 | 
20 | def log_gpu_memory_usage(head: str, logger: logging.Logger = None, level=logging.DEBUG, rank: int = 0):
21 |     if (not dist.is_initialized()) or (rank is None) or (dist.get_rank() == rank):
22 |         memory_allocated = torch.cuda.memory_allocated() / 1024**3
23 |         memory_reserved = torch.cuda.memory_reserved() / 1024**3
24 | 
25 |         message = f'{head}, memory allocated (GB): {memory_allocated}, memory reserved (GB): {memory_reserved}'
26 | 
27 |         if logger is None:
28 |             print(message)
29 |         else:
30 |             logger.log(msg=message, level=level)
31 | 


--------------------------------------------------------------------------------
/lang_wm/verl/verl/utils/distributed.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Utilities for distributed training."""
15 | import os
16 | 
17 | 
18 | def initialize_global_process_group(timeout_second=36000):
19 |     import torch.distributed
20 |     from datetime import timedelta
21 |     torch.distributed.init_process_group('nccl', timeout=timedelta(seconds=timeout_second))
22 |     local_rank = int(os.environ["LOCAL_RANK"])
23 |     rank = int(os.environ["RANK"])
24 |     world_size = int(os.environ["WORLD_SIZE"])
25 | 
26 |     if torch.distributed.is_initialized():
27 |         torch.cuda.set_device(local_rank)
28 |     return local_rank, rank, world_size
29 | 


--------------------------------------------------------------------------------
/lang_wm/verl/verl/utils/import_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | Utilities to check if packages are available.
16 | We assume package availability won't change during runtime.
17 | """
18 | 
19 | from functools import cache
20 | from typing import List
21 | 
22 | 
23 | @cache
24 | def is_megatron_core_available():
25 |     try:
26 |         from megatron.core import parallel_state as mpu
27 |         return True
28 |     except ImportError:
29 |         return False
30 | 
31 | 
32 | @cache
33 | def is_vllm_available():
34 |     try:
35 |         import vllm
36 |         return True
37 |     except ImportError:
38 |         return False
39 | 
40 | 
41 | def import_external_libs(external_libs=None):
42 |     if external_libs is None:
43 |         return
44 |     if not isinstance(external_libs, List):
45 |         external_libs = [external_libs]
46 |     import importlib
47 |     for external_lib in external_libs:
48 |         importlib.import_module(external_lib)
49 | 


--------------------------------------------------------------------------------
/lang_wm/verl/verl/utils/logger/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/lang_wm/verl/verl/utils/logging_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import logging
16 | import os
17 | import torch
18 | 
19 | 
20 | def set_basic_config(level):
21 |     """
22 |     This function sets the global logging format and level. It will be called when import verl
23 |     """
24 |     logging.basicConfig(format='%(levelname)s:%(asctime)s:%(message)s', level=level)
25 | 
26 | 
27 | def log_to_file(string):
28 |     print(string)
29 |     if os.path.isdir('logs'):
30 |         with open(f'logs/log_{torch.distributed.get_rank()}', 'a+') as f:
31 |             f.write(string + '\n')
32 | 


--------------------------------------------------------------------------------
/lang_wm/verl/verl/utils/megatron/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/lang_wm/verl/verl/utils/ray_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | Contains commonly used utilities for ray
16 | """
17 | 
18 | import ray
19 | 
20 | import concurrent.futures
21 | 
22 | 
23 | def parallel_put(data_list, max_workers=None):
24 | 
25 |     def put_data(index, data):
26 |         return index, ray.put(data)
27 | 
28 |     if max_workers is None:
29 |         max_workers = min(len(data_list), 16)
30 | 
31 |     with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
32 |         data_list_f = [executor.submit(put_data, i, data) for i, data in enumerate(data_list)]
33 |         res_lst = []
34 |         for future in concurrent.futures.as_completed(data_list_f):
35 |             res_lst.append(future.result())
36 | 
37 |         # reorder based on index
38 |         output = [None for _ in range(len(data_list))]
39 |         for res in res_lst:
40 |             index, data_ref = res
41 |             output[index] = data_ref
42 | 
43 |     return output
44 | 


--------------------------------------------------------------------------------
/lang_wm/verl/verl/utils/rendezvous/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/lang_wm/verl/verl/utils/reward_score/geo3k.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import re
16 | from mathruler.grader import extract_boxed_content, grade_answer
17 | 
18 | 
19 | def format_reward(predict_str: str) -> float:
20 |     pattern = re.compile(r'<think>.*</think>.*\\boxed\{.*\}.*', re.DOTALL)
21 |     match_result = re.fullmatch(pattern, predict_str)
22 |     return 1.0 if match_result else 0.0
23 | 
24 | 
25 | def acc_reward(predict_str: str, ground_truth: str) -> float:
26 |     answer = extract_boxed_content(predict_str)
27 |     return 1.0 if grade_answer(answer, ground_truth) else 0.0
28 | 
29 | 
30 | def compute_score(predict_str: str, ground_truth: str) -> float:
31 |     return 0.9 * acc_reward(predict_str, ground_truth) + 0.1 * format_reward(predict_str)
32 | 


--------------------------------------------------------------------------------
/lang_wm/verl/verl/version/version:
--------------------------------------------------------------------------------
1 | 0.2.0.dev
2 | 


--------------------------------------------------------------------------------
/lang_wm/verl/verl/workers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/lang_wm/verl/verl/workers/actor/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .base import BasePPOActor
16 | from .dp_actor import DataParallelPPOActor
17 | 
18 | __all__ = ["BasePPOActor", "DataParallelPPOActor"]
19 | 


--------------------------------------------------------------------------------
/lang_wm/verl/verl/workers/critic/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .base import BasePPOCritic
16 | from .dp_critic import DataParallelPPOCritic
17 | 
18 | __all__ = ["BasePPOCritic", "DataParallelPPOCritic"]
19 | 


--------------------------------------------------------------------------------
/lang_wm/verl/verl/workers/critic/base.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | Base class for a critic
16 | """
17 | from abc import ABC, abstractmethod
18 | 
19 | import torch
20 | 
21 | from verl import DataProto
22 | 
23 | __all__ = ['BasePPOCritic']
24 | 
25 | 
26 | class BasePPOCritic(ABC):
27 | 
28 |     def __init__(self, config):
29 |         super().__init__()
30 |         self.config = config
31 | 
32 |     @abstractmethod
33 |     def compute_values(self, data: DataProto) -> torch.Tensor:
34 |         """Compute values"""
35 |         pass
36 | 
37 |     @abstractmethod
38 |     def update_critic(self, data: DataProto):
39 |         """Update the critic"""
40 |         pass
41 | 


--------------------------------------------------------------------------------
/lang_wm/verl/verl/workers/reward_manager/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 PRIME team and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .naive import NaiveRewardManager
16 | from .prime import PrimeRewardManager


--------------------------------------------------------------------------------
/lang_wm/verl/verl/workers/reward_model/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .base import BasePPORewardModel
16 | 


--------------------------------------------------------------------------------
/lang_wm/verl/verl/workers/reward_model/megatron/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .reward_model import MegatronRewardModel
16 | 


--------------------------------------------------------------------------------
/lang_wm/verl/verl/workers/rollout/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .base import BaseRollout
16 | from .naive import NaiveRollout
17 | from .hf_rollout import HFRollout
18 | 
19 | __all__ = ["BaseRollout", "NaiveRollout", "HFRollout"]
20 | 


--------------------------------------------------------------------------------
/lang_wm/verl/verl/workers/rollout/base.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from abc import ABC, abstractmethod
16 | from typing import Iterable, Union
17 | 
18 | from verl import DataProto
19 | 
20 | __all__ = ['BaseRollout']
21 | 
22 | 
23 | class BaseRollout(ABC):
24 | 
25 |     def __init__(self):
26 |         """
27 | 
28 |         Args:
29 |             dataloader: an Iterable of TensorDict that consistently generates prompts. Note that the dataloader
30 |             should handle when the training stops.
31 |         """
32 |         super().__init__()
33 | 
34 |     @abstractmethod
35 |     def generate_sequences(self, prompts: DataProto) -> DataProto:
36 |         """Generate sequences"""
37 |         pass
38 | 


--------------------------------------------------------------------------------
/lang_wm/verl/verl/workers/rollout/naive/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .naive_rollout import NaiveRollout
16 | 


--------------------------------------------------------------------------------
/lang_wm/verl/verl/workers/sharding_manager/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from verl.utils.import_utils import is_vllm_available, is_megatron_core_available
16 | 
17 | from .base import BaseShardingManager
18 | from .fsdp_ulysses import FSDPUlyssesShardingManager
19 | 
20 | AllGatherPPModel = None
21 | 
22 | if is_megatron_core_available() and is_vllm_available():
23 |     from .megatron_vllm import AllGatherPPModel, MegatronVLLMShardingManager
24 | elif AllGatherPPModel is not None:
25 |     pass
26 | else:
27 |     AllGatherPPModel = None
28 |     MegatronVLLMShardingManager = None
29 | 
30 | if is_vllm_available():
31 |     from .fsdp_vllm import FSDPVLLMShardingManager
32 | else:
33 |     FSDPVLLMShardingManager = None
34 | 


--------------------------------------------------------------------------------
/lang_wm/verl/verl/workers/sharding_manager/base.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | Sharding manager to implement HybridEngine
16 | """
17 | 
18 | from verl import DataProto
19 | 
20 | 
21 | class BaseShardingManager:
22 | 
23 |     def __enter__(self):
24 |         pass
25 | 
26 |     def __exit__(self, exc_type, exc_value, traceback):
27 |         pass
28 | 
29 |     def preprocess_data(self, data: DataProto) -> DataProto:
30 |         return data
31 | 
32 |     def postprocess_data(self, data: DataProto) -> DataProto:
33 |         return data
34 | 


--------------------------------------------------------------------------------
/lang_wm/webagent/agent/__init__.py:
--------------------------------------------------------------------------------
 1 | # This file is from WMA project:
 2 | # https://github.com/kyle8581/WMA-Agents
 3 | 
 4 | from .agent import (
 5 |     Agent,
 6 |     PromptAgent,
 7 |     TeacherForcingAgent,
 8 |     construct_agent,
 9 | )
10 | # from .world_model_agent import WMAgent
11 | 
12 | __all__ = ["Agent", "TeacherForcingAgent", "PromptAgent", "construct_agent"]
13 | 


--------------------------------------------------------------------------------
/lang_wm/webagent/agent/prompts/__init__.py:
--------------------------------------------------------------------------------
1 | from .prompt_constructor import *
2 | 


--------------------------------------------------------------------------------
/lang_wm/webagent/agent/prompts/jsons/refine_tao.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "intro": "Summarize the key changes in the web page based on the following information:\nNew items: {new_items}\nUpdated items: {updated_items}\nDeleted items: {deleted_items}\n\nWhen summarizing, follow these output format:\n1. [First key change]\n2. [Second key change]\n3. [Third key change]\n...\n10. [Tenth key change]",
 3 |   "examples":[],
 4 |   "template": "",
 5 |   "meta_data": {
 6 |     "keywords": [
 7 |       "new_items",
 8 |       "updated_items",
 9 |       "deleted_items"
10 |     ]
11 |   }
12 | }


--------------------------------------------------------------------------------
/lang_wm/webagent/agent/prompts/to_json.py:
--------------------------------------------------------------------------------
 1 | import glob
 2 | import importlib
 3 | import json
 4 | import os
 5 | 
 6 | 
 7 | # use the current directory as the root
 8 | def run() -> None:
 9 |     """Convert all python files in agent/prompts to json files in agent/prompts/jsons
10 | 
11 |     Python files are easiser to edit
12 |     """
13 |     for p_file in glob.glob(f"agent/prompts/raw/*.py"):
14 |         # import the file as a module
15 |         base_name = os.path.basename(p_file).replace(".py", "")
16 |         module = importlib.import_module(f"agent.prompts.raw.{base_name}")
17 |         prompt = module.prompt
18 |         # save the prompt as a json file
19 |         os.makedirs("agent/prompts/jsons", exist_ok=True)
20 |         with open(f"agent/prompts/jsons/{base_name}.json", "w+") as f:
21 |             json.dump(prompt, f, indent=2)
22 |     print(f"Done convert python files to json")
23 | 
24 | 
25 | if __name__ == "__main__":
26 |     run()
27 | 


--------------------------------------------------------------------------------
/lang_wm/webagent/browser_env/py.typed:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thuml/RLVR-World/66bce39e195ea3521025d94bdd82eafe8823dc45/lang_wm/webagent/browser_env/py.typed


--------------------------------------------------------------------------------
/lang_wm/webagent/browser_env/trajectory.py:
--------------------------------------------------------------------------------
1 | from typing import Union
2 | 
3 | from .actions import Action
4 | from .utils import StateInfo
5 | 
6 | Trajectory = list[Union[StateInfo, Action]]
7 | 


--------------------------------------------------------------------------------
/lang_wm/webagent/evaluation_harness/__init__.py:
--------------------------------------------------------------------------------
 1 | # This file is from WMA project:
 2 | # https://github.com/kyle8581/WMA-Agents
 3 | 
 4 | from .evaluators import *
 5 | from .helper_functions import (
 6 |     get_query_text,
 7 |     get_query_text_lowercase,
 8 |     reddit_get_latest_comment_content_by_username,
 9 |     reddit_get_latest_comment_obj_by_username,
10 |     reddit_get_parent_comment_username_of_latest_comment_by_username,
11 |     shopping_get_latest_order_url,
12 |     shopping_get_num_reviews,
13 |     shopping_get_order_product_name_list,
14 |     shopping_get_order_product_option,
15 |     shopping_get_order_product_quantity,
16 |     shopping_get_product_attributes,
17 |     shopping_get_product_price,
18 |     shopping_get_rating_as_percentage,
19 |     shopping_get_sku_latest_review_author,
20 |     shopping_get_sku_latest_review_rating,
21 |     shopping_get_sku_latest_review_text,
22 | )
23 | 


--------------------------------------------------------------------------------
/lang_wm/webagent/llms/__init__.py:
--------------------------------------------------------------------------------
 1 | # This file is from WMA project:
 2 | # https://github.com/kyle8581/WMA-Agents
 3 | 
 4 | """This module is adapt from https://github.com/zeno-ml/zeno-build"""
 5 | try:
 6 |     from .providers.gemini_utils import generate_from_gemini_completion
 7 | except:
 8 |     print('Google Cloud not set up, skipping import of providers.gemini_utils.generate_from_gemini_completion')
 9 | 
10 | from .providers.hf_utils import generate_from_huggingface_completion
11 | from .providers.openai_utils import (
12 |     generate_from_openai_chat_completion,
13 |     generate_from_openai_completion,
14 | )
15 | from .utils import call_llm
16 | 
17 | __all__ = [
18 |     "generate_from_openai_completion",
19 |     "generate_from_openai_chat_completion",
20 |     "generate_from_huggingface_completion",
21 |     "generate_from_gemini_completion",
22 |     "call_llm",
23 | ]
24 | 


--------------------------------------------------------------------------------
/lang_wm/webagent/llms/providers/hf_utils.py:
--------------------------------------------------------------------------------
 1 | # This file is from WMA project:
 2 | # https://github.com/kyle8581/WMA-Agents
 3 | 
 4 | from text_generation import Client  # type: ignore
 5 | 
 6 | 
 7 | def generate_from_huggingface_completion(
 8 |     prompt: str,
 9 |     model_endpoint: str,
10 |     temperature: float,
11 |     top_p: float,
12 |     max_new_tokens: int,
13 |     stop_sequences: list[str] | None = None,
14 | ) -> str:
15 |     client = Client(model_endpoint, timeout=60)
16 |     generation: str = client.generate(
17 |         prompt=prompt,
18 |         temperature=temperature,
19 |         top_p=top_p,
20 |         max_new_tokens=max_new_tokens,
21 |         stop_sequences=stop_sequences,
22 |     ).generated_text
23 | 
24 |     return generation
25 | 


--------------------------------------------------------------------------------
/lang_wm/webagent/prepare.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # re-validate login information
3 | export DATASET=webarena
4 | mkdir -p ./.auth
5 | python -m browser_env.auto_login


--------------------------------------------------------------------------------
/lang_wm/webagent/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.mypy]
 2 | explicit_package_bases = true
 3 | exclude = [
 4 |     "environment_docker"
 5 | ]
 6 | 
 7 | [[tool.mypy.overrides]]
 8 | module = [
 9 |     "setuptools.*",
10 |     "pytest.*",
11 |     "pytest_asyncio.*",
12 |     "py.*",
13 |     "munkres.*",
14 |     "weave.*",
15 |     "gradio_client.*",
16 |     "datasets.*",
17 |     "google.*",
18 |     "vertexai.*",
19 |     "transformers.*"
20 | ]
21 | ignore_missing_imports = true


--------------------------------------------------------------------------------
/lang_wm/webagent/setup.cfg:
--------------------------------------------------------------------------------
 1 | [metadata]
 2 | name = webarena
 3 | 
 4 | [tool.pytest.ini_options]
 5 | testpaths = ["tests"]
 6 | python_files = "test_*.py"
 7 | 
 8 | [options.extras_require]
 9 | dev =
10 |     pre-commit==3.0.1
11 |     pytest==7.1.2
12 |     mypy==0.991
13 |     nbmake
14 |     pytest-asyncio
15 |     types-requests
16 | 
17 | [options]
18 | python_requires = >=3.7, <4
19 | packages =
20 |     browser_env
21 |     agent
22 |     evaluation_harness
23 |     llms
24 | [mypy]
25 | strict = true


--------------------------------------------------------------------------------
/lang_wm/webagent/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup
2 | 
3 | if __name__ == "__main__":
4 |     setup()


--------------------------------------------------------------------------------
/vid_wm/assets/vid_wm.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thuml/RLVR-World/66bce39e195ea3521025d94bdd82eafe8823dc45/vid_wm/assets/vid_wm.png


--------------------------------------------------------------------------------
/vid_wm/ivideogpt/.gitignore:
--------------------------------------------------------------------------------
1 | trm-eval
2 | eval_jsonl


--------------------------------------------------------------------------------
/vid_wm/ivideogpt/configs/ctx_vae256/config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "_class_name": "VQModel",
 3 |     "_diffusers_version": "0.25.0.dev0",
 4 |     "act_fn": "silu",
 5 |     "block_out_channels": [
 6 |       128,
 7 |       256,
 8 |       256,
 9 |       512
10 |     ],
11 |     "down_block_types": [
12 |       "DownEncoderBlock2D",
13 |       "DownEncoderBlock2D",
14 |       "DownEncoderBlock2D",
15 |       "DownEncoderBlock2D"
16 |     ],
17 |     "in_channels": 3,
18 |     "latent_channels": 64,
19 |     "layers_per_block": 2,
20 |     "lookup_from_codebook": true,
21 |     "mid_block_add_attention": false,
22 |     "norm_num_groups": 32,
23 |     "norm_type": "group",
24 |     "vq_fsq_levels": 12,
25 |     "out_channels": 3,
26 |     "sample_size": 32,
27 |     "scaling_factor": 0.18215,
28 |     "up_block_types": [
29 |       "UpDecoderBlock2D",
30 |       "UpDecoderBlock2D",
31 |       "UpDecoderBlock2D",
32 |       "UpDecoderBlock2D"
33 |     ],
34 |     "force_upcast": true,
35 |     "dyn_fsq_levels": 12,
36 |     "context_length": 1,
37 |     "resolution": 256,
38 |     "max_att_resolution": 32
39 |   }


--------------------------------------------------------------------------------
/vid_wm/ivideogpt/configs/ctx_vae64/config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "_class_name": "VQModel",
 3 |     "_diffusers_version": "0.25.0.dev0",
 4 |     "act_fn": "silu",
 5 |     "block_out_channels": [
 6 |       128,
 7 |       256,
 8 |       512
 9 |     ],
10 |     "down_block_types": [
11 |       "DownEncoderBlock2D",
12 |       "DownEncoderBlock2D",
13 |       "DownEncoderBlock2D"
14 |     ],
15 |     "in_channels": 3,
16 |     "latent_channels": 64,
17 |     "layers_per_block": 2,
18 |     "lookup_from_codebook": true,
19 |     "mid_block_add_attention": false,
20 |     "norm_num_groups": 32,
21 |     "norm_type": "group",
22 |     "num_vq_embeddings": 8192,
23 |     "out_channels": 3,
24 |     "sample_size": 32,
25 |     "scaling_factor": 0.18215,
26 |     "up_block_types": [
27 |       "UpDecoderBlock2D",
28 |       "UpDecoderBlock2D",
29 |       "UpDecoderBlock2D"
30 |     ],
31 |     "vq_embed_dim": null,
32 |     "force_upcast": true,
33 |     "num_dyn_embeddings": 8192,
34 |     "context_length": 1,
35 |     "resolution": 64,
36 |     "max_att_resolution": 16
37 |   }


--------------------------------------------------------------------------------
/vid_wm/ivideogpt/configs/vgpt/ctx_llama_small.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "architectures": [
 3 |       "LlamaModel"
 4 |     ],
 5 |     "vocab_size": 9008,
 6 |     "bos_token_id": 9006,
 7 |     "eos_token_id": 9007,
 8 |     "hidden_act": "silu",
 9 |     "hidden_size": 768,
10 |     "initializer_range": 0.02,
11 |     "intermediate_size": 3072,
12 |     "max_length": 8192,
13 |     "max_position_embeddings": 8192,
14 |     "model_type": "llama",
15 |     "num_attention_heads": 12,
16 |     "num_hidden_layers": 12,
17 |     "num_key_value_heads": 12,
18 |     "rms_norm_eps": 1e-06,
19 |     "tie_word_embeddings": false,
20 |     "torch_dtype": "float16",
21 |     "transformers_version": "4.32.0.dev0",
22 |     "use_cache": true
23 | }


--------------------------------------------------------------------------------
/vid_wm/ivideogpt/configs/vgpt/frac_action_ranges.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thuml/RLVR-World/66bce39e195ea3521025d94bdd82eafe8823dc45/vid_wm/ivideogpt/configs/vgpt/frac_action_ranges.pth


--------------------------------------------------------------------------------
/vid_wm/ivideogpt/configs/vgpt/llama.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "architectures": [
 3 |       "LlamaModel"
 4 |     ],
 5 |     "vocab_size": 4633,
 6 |     "bos_token_id": 4631,
 7 |     "eos_token_id": 4632,
 8 |     "hidden_act": "silu",
 9 |     "hidden_size": 1024,
10 |     "initializer_range": 0.02,
11 |     "intermediate_size": 4096,
12 |     "max_length": 8192,
13 |     "max_position_embeddings": 8192,
14 |     "model_type": "llama",
15 |     "num_attention_heads": 16,
16 |     "num_hidden_layers": 24,
17 |     "num_key_value_heads": 16,
18 |     "rms_norm_eps": 1e-06,
19 |     "tie_word_embeddings": false,
20 |     "torch_dtype": "float16",
21 |     "transformers_version": "4.32.0.dev0",
22 |     "use_cache": true
23 | }


--------------------------------------------------------------------------------
/vid_wm/ivideogpt/configs/vgpt/llama_small.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "architectures": [
 3 |       "LlamaModel"
 4 |     ],
 5 |     "vocab_size": 4633,
 6 |     "bos_token_id": 4631,
 7 |     "eos_token_id": 4632,
 8 |     "hidden_act": "silu",
 9 |     "hidden_size": 768,
10 |     "initializer_range": 0.02,
11 |     "intermediate_size": 3072,
12 |     "max_length": 8192,
13 |     "max_position_embeddings": 8192,
14 |     "model_type": "llama",
15 |     "num_attention_heads": 12,
16 |     "num_hidden_layers": 12,
17 |     "num_key_value_heads": 12,
18 |     "rms_norm_eps": 1e-06,
19 |     "tie_word_embeddings": false,
20 |     "torch_dtype": "float16",
21 |     "transformers_version": "4.32.0.dev0",
22 |     "use_cache": true
23 | }


--------------------------------------------------------------------------------
/vid_wm/ivideogpt/ivideogpt/ctx_tokenizer/__init__.py:
--------------------------------------------------------------------------------
1 | from .compressive_vq_model_fsq import CompressiveVQModelFSQ
2 | 


--------------------------------------------------------------------------------
/vid_wm/ivideogpt/ivideogpt/data/__init__.py:
--------------------------------------------------------------------------------
1 | from .simple_dataloader import SimpleRoboticDataLoaderv2, EvalDataLoader
2 | from .dataset_mixes import DATASET_NAMED_MIXES
3 | 


--------------------------------------------------------------------------------
/vid_wm/ivideogpt/ivideogpt/tokenizer/__init__.py:
--------------------------------------------------------------------------------
1 | from .vq_model import CNNFSQModel256


--------------------------------------------------------------------------------
/vid_wm/ivideogpt/scripts/eval_multi_step_prediction.sh:
--------------------------------------------------------------------------------
 1 | # using one GPU
 2 | 
 3 | python eval_vgpt_multiturn.py --per_device_eval_batch_size 4 \
 4 |     --config_name configs/vgpt/ctx_llama_small.json\
 5 |     --dataset_path /dev/null \
 6 |     --pretrained_model_name_or_path thuml/rt1-compressive-tokenizer \
 7 |     --pretrained_transformer_path thuml/rt1-world-model-multi-step-rlvr \
 8 |     --processor_type ctx_msp \
 9 |     --output_jsonl eval_jsonl/vgpt_small_ctx_msp8_head12_fulleval_release.jsonl \
10 |     --max_decode_batchsize 1 \
11 |     --segment_length 8 \
12 |     --use_eval_dataset \
13 |     --max_eval_iters 400 \
14 |     --exp_name vgpt_small_ctx_msp8_head12_fulleval $@


--------------------------------------------------------------------------------
/vid_wm/ivideogpt/scripts/eval_policy.sh:
--------------------------------------------------------------------------------
 1 | # using one GPU
 2 | 
 3 | python eval_runenv.py --per_device_eval_batch_size 1 \
 4 |     --config_name configs/vgpt/ctx_llama_small.json\
 5 |     --dataset_path /dev/null  \
 6 |     --pretrained_model_name_or_path thuml/rt1-compressive-tokenizer \
 7 |     --pretrained_transformer_path thuml/rt1-world-model-multi-step-rlvr \
 8 |     --processor_type ctx_msp \
 9 |     --max_decode_batchsize 1 \
10 |     --segment_length 8 \
11 |     --gpu_memory_utilization 0.75 \
12 |     --repetition_penalty 1.2 \
13 |     --output_dir policy_eval \
14 |     --policy_model_path pretrained_models/rt_1_tf_trained_for_000400120 \
15 |     --task_instruction "open middle drawer" $@


--------------------------------------------------------------------------------
/vid_wm/ivideogpt/scripts/eval_single_step_prediction.sh:
--------------------------------------------------------------------------------
 1 | # using one GPU
 2 | 
 3 | python eval_vgpt.py --per_device_eval_batch_size 4 \
 4 |     --dataset_path /dev/null \
 5 |     --pretrained_model_name_or_path thuml/rt1-frame-tokenizer \
 6 |     --pretrained_transformer_path thuml/rt1-world-model-single-step-rlvr \
 7 |     --processor_type simple \
 8 |     --output_jsonl eval_jsonl/vgpt_small_multi1_head12_fulleval_release.jsonl \
 9 |     --use_eval_dataset \
10 |     --max_eval_iters 400 \
11 |     --exp_name vgpt_small_multi1_head12_fulleval $@


--------------------------------------------------------------------------------
/vid_wm/ivideogpt/scripts/train_compressive_tokenizer.sh:
--------------------------------------------------------------------------------
 1 | # using 16x40G A100 GPUs
 2 | 
 3 | accelerate launch train_ctx_tokenizer.py \
 4 |     --exp_name ctx_cnn_fsq12_frac_res320_seg8 \
 5 |     --dataset_path /dev/null \
 6 |     --train_batch_size 1 --gradient_accumulation_steps 1 --log_code_util \
 7 |     --resolution 256 320 \
 8 |     --output_dir vqgan-output \
 9 |     --vae_loss l1 --disc_weight 0.1 --perc_weight 1.0 \
10 |     --start_global_step 0 --disc_start 10000 --max_train_steps 600000 \
11 |     --discr_learning_rate 5e-4 --learning_rate 5e-4 \
12 |     --disc_depth 6 --segment_length 8 $@


--------------------------------------------------------------------------------
/vid_wm/ivideogpt/scripts/train_multi_step_prediction.sh:
--------------------------------------------------------------------------------
 1 | # using 8x40G A100 GPUs
 2 | 
 3 | accelerate launch train_vgpt.py \
 4 |     --per_device_train_batch_size 4 \
 5 |     --config_name configs/vgpt/ctx_llama_small.json \
 6 |     --dataset_path /dev/null \
 7 |     --pretrained_model_name_or_path thuml/rt1-compressive-tokenizer \
 8 |     --output_dir trm-output \
 9 |     --skip_first_val \
10 |     --exp_name vgpt_small_ctx_msp8_head12 \
11 |     --processor_type ctx_msp \
12 |     --segment_length 8 $@


--------------------------------------------------------------------------------
/vid_wm/ivideogpt/scripts/train_perframe_tokenizer.sh:
--------------------------------------------------------------------------------
 1 | # using 8x40G A100 GPUs
 2 | 
 3 | accelerate launch train_tokenizer.py \
 4 |     --exp_name cnn_fsq12_frac_res320_seg8 \
 5 |     --dataset_path /dev/null \
 6 |     --train_batch_size 2 --gradient_accumulation_steps 1 --log_code_util \
 7 |     --resolution 256 320 --fsq_level 12 \
 8 |     --output_dir vqgan-output \
 9 |     --vae_loss l1 --disc_weight 0.1 --perc_weight 1.0 \
10 |     --start_global_step 0 --disc_start 10000 --max_train_steps 600000 \
11 |     --discr_learning_rate 5e-4 --learning_rate 5e-4 \
12 |     --disc_depth 6 --segment_length 8\
13 |     --checkpointing_steps 50000 $@


--------------------------------------------------------------------------------
/vid_wm/ivideogpt/scripts/train_single_step_prediction.sh:
--------------------------------------------------------------------------------
 1 | # using 8x40G A100 GPUs
 2 | 
 3 | accelerate launch train_vgpt.py \
 4 |     --per_device_train_batch_size 4 \
 5 |     --config_name configs/vgpt/llama_small.json \
 6 |     --dataset_path /dev/null \
 7 |     --pretrained_model_name_or_path thuml/rt1-frame-tokenizer \
 8 |     --output_dir trm-output \
 9 |     --skip_first_val \
10 |     --exp_name vgpt_small_multi1_head12 $@


--------------------------------------------------------------------------------
/vid_wm/requirements.txt:
--------------------------------------------------------------------------------
1 | diffusers==0.27.0
2 | imageio==2.9.0
3 | imageio-ffmpeg==0.4.4
4 | piqa==1.3.2
5 | scipy==1.13.0
6 | lpips==0.1.4


--------------------------------------------------------------------------------
/vid_wm/verl/.readthedocs.yaml:
--------------------------------------------------------------------------------
 1 | # Read the Docs configuration file
 2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
 3 | 
 4 | version: 2
 5 | 
 6 | build:
 7 |   os: ubuntu-22.04
 8 |   tools:
 9 |     python: "3.11"
10 |     rust: "1.70"
11 | 
12 | sphinx:
13 |   configuration: docs/conf.py
14 | 
15 | python:
16 |   install:
17 |     - requirements: docs/requirements-docs.txt
18 |     - method: pip
19 |       path: .
20 | 


--------------------------------------------------------------------------------
/vid_wm/verl/.style.yapf:
--------------------------------------------------------------------------------
1 | [style]
2 | based_on_style = google
3 | column_limit = 120
4 | indent_width = 4
5 | split_arguments_when_comma_terminated: true


--------------------------------------------------------------------------------
/vid_wm/verl/Notice.txt:
--------------------------------------------------------------------------------
1 | Copyright 2023-2024 Bytedance Ltd. and/or its affiliates 


--------------------------------------------------------------------------------
/vid_wm/verl/docker/Dockerfile.megatron:
--------------------------------------------------------------------------------
1 | FROM verlai/verl:vemlp-th2.4.0-cu124-vllm0.6.3-ray2.10-te1.7-v0.0.3
2 | 
3 | RUN pip install git+https://github.com/NVIDIA/TransformerEngine.git@stable
4 | 
5 | RUN cd /opt/nvidia && git clone --single-branch --branch core_r0.11.0 https://github.com/NVIDIA/Megatron-LM.git Megatron-LM
6 | 
7 | # only config pip index with https://pypi.tuna.tsinghua.edu.cn/simple if needed
8 | # unset for now
9 | RUN cd /opt/nvidia/Megatron-LM && pip3 install --no-deps -e .


--------------------------------------------------------------------------------
/vid_wm/verl/docker/Dockerfile.rocm:
--------------------------------------------------------------------------------
 1 | #  Build the docker in the repo dir:
 2 | # docker build -f docker/Dockerfile.rocm -t verl-rocm:03.04.2015 .
 3 | # docker images # you can find your built docker
 4 | 
 5 | 
 6 | FROM rocm/vllm:rocm6.2_mi300_ubuntu20.04_py3.9_vllm_0.6.4
 7 | 
 8 | # Set working directory
 9 | # WORKDIR $PWD/app
10 | 
11 | # Set environment variables
12 | ENV PYTORCH_ROCM_ARCH="gfx90a;gfx942"
13 | 
14 | # Install vllm
15 | RUN pip uninstall -y vllm && \
16 |     rm -rf vllm && \
17 |     git clone -b v0.6.3 https://github.com/vllm-project/vllm.git && \
18 |     cd vllm && \
19 |     MAX_JOBS=$(nproc) python3 setup.py install && \
20 |     cd .. && \
21 |     rm -rf vllm
22 | 
23 | # Copy the entire project directory
24 | COPY . .
25 | 
26 | # Install dependencies
27 | RUN pip install "tensordict<0.6" --no-deps && \
28 |     pip install accelerate \
29 |     codetiming \
30 |     datasets \
31 |     dill \
32 |     hydra-core \
33 |     liger-kernel \
34 |     numpy \
35 |     pandas \
36 |     peft \
37 |     "pyarrow>=15.0.0" \
38 |     pylatexenc \
39 |     "ray[data,train,tune,serve]" \
40 |     torchdata \
41 |     transformers \
42 |     wandb \
43 |     orjson \
44 |     pybind11 && \
45 |     pip install -e . --no-deps


--------------------------------------------------------------------------------
/vid_wm/verl/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line.
 5 | SPHINXOPTS    =
 6 | SPHINXBUILD   = sphinx-build
 7 | SPHINXPROJ    = verl
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/vid_wm/verl/docs/README.md:
--------------------------------------------------------------------------------
 1 | # verl documents
 2 | 
 3 | ## Build the docs
 4 | 
 5 | ```bash
 6 | # Install dependencies.
 7 | pip install -r requirements-docs.txt
 8 | 
 9 | # Build the docs.
10 | make clean
11 | make html
12 | ```
13 | 
14 | ## Open the docs with your browser
15 | 
16 | ```bash
17 | python -m http.server -d _build/html/
18 | ```
19 | Launch your browser and navigate to http://localhost:8000 to view the documentation.


--------------------------------------------------------------------------------
/vid_wm/verl/docs/_static/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thuml/RLVR-World/66bce39e195ea3521025d94bdd82eafe8823dc45/vid_wm/verl/docs/_static/logo.png


--------------------------------------------------------------------------------
/vid_wm/verl/docs/advance/placement.rst:
--------------------------------------------------------------------------------
 1 | Ray API Design Tutorial
 2 | =======================================
 3 | 
 4 | We provide a tutorial for our Ray API design, including:
 5 | 
 6 | - Ray basic concepts
 7 | - Resource Pool and RayWorkerGroup
 8 | - Data Dispatch, Execution and Collection
 9 | - Initialize the RayWorkerGroup and execute the distributed computation in the given Resource Pool
10 | 
11 | See details in `tutorial.ipynb <https://github.com/volcengine/verl/blob/main/examples/ray/tutorial.ipynb>`_.


--------------------------------------------------------------------------------
/vid_wm/verl/docs/requirements-docs.txt:
--------------------------------------------------------------------------------
 1 | # markdown suport
 2 | recommonmark
 3 | # markdown table suport
 4 | sphinx-markdown-tables
 5 | 
 6 | # theme default rtd
 7 | 
 8 | # crate-docs-theme
 9 | sphinx-rtd-theme
10 | 
11 | # pin tokenizers version to avoid env_logger version req
12 | tokenizers==0.19.1
13 | 


--------------------------------------------------------------------------------
/vid_wm/verl/examples/generation/run_deepseek7b_mutli_node.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | data_path=$HOME/data/rlhf/gsm8k/test.parquet
 4 | save_path=$HOME/data/rlhf/math/deepseek_v2_lite_gen_test.parquet
 5 | model_path=deepseek-ai/deepseek-llm-7b-chat
 6 | 
 7 | python3 -m verl.trainer.main_generation \
 8 |     trainer.nnodes=2 \
 9 |     trainer.n_gpus_per_node=8 \
10 |     data.path=$data_path \
11 |     data.prompt_key=prompt \
12 |     data.n_samples=1 \
13 |     data.output_path=$save_path \
14 |     model.path=$model_path\
15 |     +model.trust_remote_code=True \
16 |     rollout.temperature=1.0 \
17 |     rollout.top_k=50 \
18 |     rollout.top_p=0.7 \
19 |     rollout.prompt_length=2048 \
20 |     rollout.response_length=1024 \
21 |     rollout.tensor_model_parallel_size=16 \
22 |     rollout.gpu_memory_utilization=0.8
23 | 


--------------------------------------------------------------------------------
/vid_wm/verl/examples/generation/run_deepseek_v2_lite_math.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | data_path=$HOME/data/rlhf/gsm8k/test.parquet
 4 | save_path=$HOME/data/rlhf/math/deepseek_v2_lite_gen_test.parquet
 5 | model_path=deepseek-ai/deepseek-llm-7b-chat
 6 | 
 7 | python3 -m verl.trainer.main_generation \
 8 |     trainer.nnodes=1 \
 9 |     trainer.n_gpus_per_node=8 \
10 |     data.path=$data_path \
11 |     data.prompt_key=prompt \
12 |     data.n_samples=1 \
13 |     data.output_path=$save_path \
14 |     model.path=$model_path \
15 |     +model.trust_remote_code=True \
16 |     rollout.temperature=1.0 \
17 |     rollout.top_k=50 \
18 |     rollout.top_p=0.7 \
19 |     rollout.prompt_length=2048 \
20 |     rollout.response_length=1024 \
21 |     rollout.tensor_model_parallel_size=2 \
22 |     rollout.gpu_memory_utilization=0.8
23 | 


--------------------------------------------------------------------------------
/vid_wm/verl/examples/sft/gsm8k/run_deepseek_6b7.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | if [ "$#" -lt 2 ]; then
 4 |     echo "Usage: run_deepseek_6b7.sh <nproc_per_node> <save_path> [other_configs...]"
 5 |     exit 1
 6 | fi
 7 | 
 8 | nproc_per_node=$1
 9 | save_path=$2
10 | 
11 | # Shift the arguments so $@ refers to the rest
12 | shift 2
13 | 
14 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \
15 |      -m verl.trainer.fsdp_sft_trainer \
16 |     data.train_files=$HOME/data/gsm8k/train.parquet \
17 |     data.val_files=$HOME/data/gsm8k/test.parquet \
18 |     data.prompt_key=extra_info \
19 |     data.response_key=extra_info \
20 |     +data.prompt_dict_keys=['question'] \
21 |     +data.response_dict_keys=['answer'] \
22 |     data.micro_batch_size_per_gpu=4 \
23 |     model.partial_pretrain=deepseek-ai/deepseek-coder-6.7b-instruct \
24 |     trainer.default_local_dir=$save_path \
25 |     trainer.project_name=gsm8k-sft \
26 |     trainer.experiment_name=gsm8k-sft-deepseek-coder-6.7b-instruct \
27 |     trainer.total_epochs=4 \
28 |     trainer.logger=['console','wandb'] \
29 |     trainer.default_hdfs_dir=null $@


--------------------------------------------------------------------------------
/vid_wm/verl/examples/sft/gsm8k/run_gemma_2b.sh:
--------------------------------------------------------------------------------
 1 | # Tested with 2 & 4 GPUs
 2 | 
 3 | set -x
 4 | 
 5 | if [ "$#" -lt 2 ]; then
 6 |     echo "Usage: run_gemma_2b.sh <nproc_per_node> <save_path> [other_configs...]"
 7 |     exit 1
 8 | fi
 9 | 
10 | nproc_per_node=$1
11 | save_path=$2
12 | 
13 | # Shift the arguments so $@ refers to the rest
14 | shift 2
15 | 
16 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \
17 |      -m verl.trainer.fsdp_sft_trainer \
18 |     data.train_files=$HOME/data/gsm8k/train.parquet \
19 |     data.val_files=$HOME/data/gsm8k/test.parquet \
20 |     data.prompt_key=extra_info \
21 |     data.response_key=extra_info \
22 |     +data.prompt_dict_keys=['question'] \
23 |     +data.response_dict_keys=['answer'] \
24 |     data.micro_batch_size_per_gpu=4 \
25 |     model.partial_pretrain=google/gemma-2b-it \
26 |     trainer.default_local_dir=$save_path \
27 |     trainer.project_name=gsm8k-sft \
28 |     trainer.experiment_name=gsm8k-sft-gemma-2b-it \
29 |     trainer.total_epochs=2 \
30 |     trainer.logger=['console','wandb'] \
31 |     trainer.default_hdfs_dir=null $@


--------------------------------------------------------------------------------
/vid_wm/verl/examples/sft/gsm8k/run_gemma_7b.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | if [ "$#" -lt 2 ]; then
 4 |     echo "Usage: run_gemma_7b.sh <nproc_per_node> <save_path> [other_configs...]"
 5 |     exit 1
 6 | fi
 7 | 
 8 | nproc_per_node=$1
 9 | save_path=$2
10 | 
11 | # Shift the arguments so $@ refers to the rest
12 | shift 2
13 | 
14 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \
15 |      -m verl.trainer.fsdp_sft_trainer \
16 |     data.train_files=$HOME/data/gsm8k/train.parquet \
17 |     data.val_files=$HOME/data/gsm8k/test.parquet \
18 |     data.prompt_key=prompt \
19 |     data.response_key=answer \
20 |     data.micro_batch_size_per_gpu=4 \
21 |     model.partial_pretrain=google/gemma-1.1-7b-it \
22 |     trainer.default_local_dir=$save_path \
23 |     trainer.project_name=gsm8k-sft \
24 |     trainer.experiment_name=gsm8k-sft-gemma-1.1-7b-it \
25 |     trainer.total_epochs=4 \
26 |     trainer.logger=['console','wandb'] \
27 |     trainer.default_hdfs_dir=null $@


--------------------------------------------------------------------------------
/vid_wm/verl/examples/sft/gsm8k/run_qwen_05_peft.sh:
--------------------------------------------------------------------------------
 1 | # Tested with 2 & 4 GPUs
 2 | 
 3 | set -x
 4 | 
 5 | if [ "$#" -lt 2 ]; then
 6 |     echo "Usage: run_qwen_05_peft.sh <nproc_per_node> <save_path> [other_configs...]"
 7 |     exit 1
 8 | fi
 9 | 
10 | nproc_per_node=$1
11 | save_path=$2
12 | 
13 | # Shift the arguments so $@ refers to the rest
14 | shift 2
15 | 
16 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \
17 |      -m verl.trainer.fsdp_sft_trainer \
18 |     data.train_files=$HOME/data/gsm8k/train.parquet \
19 |     data.val_files=$HOME/data/gsm8k/test.parquet \
20 |     data.prompt_key=extra_info \
21 |     data.response_key=extra_info \
22 |     optim.lr=1e-4 \
23 |     +data.prompt_dict_keys=['question'] \
24 |     +data.response_dict_keys=['answer'] \
25 |     data.micro_batch_size_per_gpu=4 \
26 |     model.partial_pretrain=Qwen/Qwen2.5-0.5B-Instruct \
27 |     trainer.default_local_dir=$save_path \
28 |     trainer.project_name=gsm8k-sft \
29 |     trainer.experiment_name=gsm8k-sft-qwen-2.5-0.5b-instruct \
30 |     trainer.logger=['console'] \
31 |     trainer.total_epochs=1 \
32 |     trainer.default_hdfs_dir=null $@ \
33 |     model.lora_rank=32\
34 |     model.lora_alpha=16 \
35 |     model.target_modules=all-linear
36 | 
37 |     # Or you can do this:
38 |     # model.target_modules=[q_proj,v_proj] \
39 | 


--------------------------------------------------------------------------------
/vid_wm/verl/examples/sft/gsm8k/run_qwen_05_sp2.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | if [ "$#" -lt 2 ]; then
 4 |     echo "Usage: run_qwen_05_sp2.sh <nproc_per_node> <save_path> [other_configs...]"
 5 |     exit 1
 6 | fi
 7 | 
 8 | nproc_per_node=$1
 9 | save_path=$2
10 | 
11 | # Shift the arguments so $@ refers to the rest
12 | shift 2
13 | 
14 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \
15 |      -m verl.trainer.fsdp_sft_trainer \
16 |     data.train_files=$HOME/data/gsm8k/train.parquet \
17 |     data.val_files=$HOME/data/gsm8k/test.parquet \
18 |     data.prompt_key=extra_info \
19 |     data.response_key=extra_info \
20 |     optim.lr=1e-4 \
21 |     +data.prompt_dict_keys=['question'] \
22 |     +data.response_dict_keys=['answer'] \
23 |     data.micro_batch_size=4 \
24 |     model.partial_pretrain=Qwen/Qwen2.5-0.5B-Instruct \
25 |     trainer.default_local_dir=$save_path \
26 |     trainer.project_name=gsm8k-sft \
27 |     trainer.experiment_name=gsm8k-sft-qwen-2.5-0.5b-instruct-sp2 \
28 |     trainer.logger=['console'] \
29 |     trainer.total_training_steps=1 \
30 |     trainer.default_hdfs_dir=null $@ \
31 |     ulysses_sequence_parallel_size=2 \
32 |     use_remove_padding=true
33 | 


--------------------------------------------------------------------------------
/vid_wm/verl/examples/sft/gsm8k/run_qwen_05_sp2_liger.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | if [ "$#" -lt 2 ]; then
 4 |     echo "Usage: run_qwen_05_sp2.sh <nproc_per_node> <save_path> [other_configs...]"
 5 |     exit 1
 6 | fi
 7 | 
 8 | nproc_per_node=$1
 9 | save_path=$2
10 | 
11 | # Shift the arguments so $@ refers to the rest
12 | shift 2
13 | 
14 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \
15 |      -m verl.trainer.fsdp_sft_trainer \
16 |     data.train_files=$HOME/data/gsm8k/train.parquet \
17 |     data.val_files=$HOME/data/gsm8k/test.parquet \
18 |     data.prompt_key=extra_info \
19 |     data.response_key=extra_info \
20 |     optim.lr=1e-4 \
21 |     +data.prompt_dict_keys=['question'] \
22 |     +data.response_dict_keys=['answer'] \
23 |     data.micro_batch_size=4 \
24 |     model.partial_pretrain=Qwen/Qwen2.5-0.5B-Instruct \
25 |     model.use_liger=True \
26 |     trainer.default_local_dir=$save_path \
27 |     trainer.project_name=gsm8k-sft \
28 |     trainer.experiment_name=gsm8k-sft-qwen-2.5-0.5b-instruct-sp2-liger \
29 |     trainer.logger=['console'] \
30 |     trainer.default_hdfs_dir=null $@ \
31 |     ulysses_sequence_parallel_size=2 \
32 |     use_remove_padding=true
33 | 


--------------------------------------------------------------------------------
/vid_wm/verl/examples/sft/multiturn/run_qwen_05_sp2.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -x
 3 | 
 4 | if [ "$#" -lt 2 ]; then
 5 |     echo "Usage: run_qwen_05_sp2.sh <nproc_per_node> <save_path> [other_configs...]"
 6 |     exit 1
 7 | fi
 8 | 
 9 | nproc_per_node=$1
10 | save_path=$2
11 | 
12 | # Shift the arguments so $@ refers to the rest
13 | shift 2
14 | 
15 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \
16 |      -m verl.trainer.fsdp_sft_trainer \
17 |     data.train_files=$HOME/data/multiturn/train.parquet \
18 |     data.val_files=$HOME/data/multiturn/test.parquet \
19 |     data.multiturn.enable=true \
20 |     data.multiturn.messages_key=messages \
21 |     data.micro_batch_size=4 \
22 |     model.partial_pretrain=Qwen/Qwen2.5-0.5B-Instruct \
23 |     trainer.default_local_dir=$save_path \
24 |     trainer.project_name=multiturn-sft \
25 |     trainer.experiment_name=multiturn-sft-qwen-2.5-0.5b-instruct-sp2 \
26 |     trainer.logger=['console'] \
27 |     trainer.total_training_steps=1 \
28 |     trainer.default_hdfs_dir=null $@ \
29 |     ulysses_sequence_parallel_size=2 \
30 |     use_remove_padding=true


--------------------------------------------------------------------------------
/vid_wm/verl/examples/sft/videogpt/run.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | if [ "$#" -lt 2 ]; then
 4 |     echo "Usage: run.sh <nproc_per_node> <save_path> [other_configs...]"
 5 |     exit 1
 6 | fi
 7 | 
 8 | nproc_per_node=$1
 9 | save_path=$2
10 | 
11 | # Shift the arguments so $@ refers to the rest
12 | shift 2
13 | 
14 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \
15 |      -m verl.trainer.fsdp_vgpt_trainer \
16 |     data.train_batch_size=16 \
17 |     data.micro_batch_size_per_gpu=2 \
18 |     model.tokenizer_path=ivideogpt/pretrained_models/checkpoint-tokenizer400000 \
19 |     trainer.total_training_steps=1000000 \
20 |     trainer.default_local_dir=$save_path \
21 |     trainer.project_name=vgpt-pt \
22 |     trainer.experiment_name=vgpt-pt \
23 |     trainer.logger=['console','wandb'] \
24 |     trainer.default_hdfs_dir=null $@


--------------------------------------------------------------------------------
/vid_wm/verl/ivideogpt/configs/frac_action_ranges.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thuml/RLVR-World/66bce39e195ea3521025d94bdd82eafe8823dc45/vid_wm/verl/ivideogpt/configs/frac_action_ranges.pth


--------------------------------------------------------------------------------
/vid_wm/verl/ivideogpt/configs/llama.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "architectures": [
 3 |       "LlamaModel"
 4 |     ],
 5 |     "vocab_size": 4633,
 6 |     "bos_token_id": 4631,
 7 |     "eos_token_id": 4632,
 8 |     "hidden_act": "silu",
 9 |     "hidden_size": 1024,
10 |     "initializer_range": 0.02,
11 |     "intermediate_size": 4096,
12 |     "max_length": 8192,
13 |     "max_position_embeddings": 8192,
14 |     "model_type": "llama",
15 |     "num_attention_heads": 16,
16 |     "num_hidden_layers": 24,
17 |     "num_key_value_heads": 16,
18 |     "rms_norm_eps": 1e-06,
19 |     "tie_word_embeddings": false,
20 |     "torch_dtype": "float16",
21 |     "transformers_version": "4.32.0.dev0",
22 |     "use_cache": true
23 | }


--------------------------------------------------------------------------------
/vid_wm/verl/ivideogpt/configs/llama_small.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "architectures": [
 3 |       "LlamaModel"
 4 |     ],
 5 |     "vocab_size": 4633,
 6 |     "bos_token_id": 4631,
 7 |     "eos_token_id": 4632,
 8 |     "hidden_act": "silu",
 9 |     "hidden_size": 768,
10 |     "initializer_range": 0.02,
11 |     "intermediate_size": 3072,
12 |     "max_length": 8192,
13 |     "max_position_embeddings": 8192,
14 |     "model_type": "llama",
15 |     "num_attention_heads": 12,
16 |     "num_hidden_layers": 12,
17 |     "num_key_value_heads": 12,
18 |     "rms_norm_eps": 1e-06,
19 |     "tie_word_embeddings": false,
20 |     "torch_dtype": "float16",
21 |     "transformers_version": "4.32.0.dev0",
22 |     "use_cache": true
23 | }


--------------------------------------------------------------------------------
/vid_wm/verl/ivideogpt/data/__init__.py:
--------------------------------------------------------------------------------
1 | from .simple_dataloader import SimpleRoboticDataLoaderv2, EvalDataLoader
2 | from .dataset_mixes import DATASET_NAMED_MIXES
3 | 


--------------------------------------------------------------------------------
/vid_wm/verl/ivideogpt/scripts/summarize_action_ranges.py:
--------------------------------------------------------------------------------
 1 | import glob
 2 | import os
 3 | import numpy as np
 4 | from tqdm import tqdm
 5 | import torch
 6 | 
 7 | root_path = '/home/NAS/rl_data/frame_action_datasets/fractal20220817_data/'
 8 | files = glob.glob(os.path.join(root_path, '*.npz'))
 9 | 
10 | max_actions = np.ones((13, ), dtype=np.float32) * -1e5
11 | min_actions = np.ones((13, ), dtype=np.float32) * 1e5
12 | 
13 | for file in tqdm(files):
14 |     data = np.load(file)
15 |     actions = data['action']
16 |     max_actions = np.maximum(max_actions, actions.max(axis=0))
17 |     min_actions = np.minimum(min_actions, actions.min(axis=0))
18 | 
19 | action_ranges = np.stack([min_actions, max_actions], axis=1)
20 | action_ranges = torch.from_numpy(action_ranges).float()
21 | torch.save(action_ranges, 'ivideogpt/configs/frac_action_ranges.pth')


--------------------------------------------------------------------------------
/vid_wm/verl/ivideogpt/tokenizer/__init__.py:
--------------------------------------------------------------------------------
1 | from .vq_model import CNNFSQModel256
2 | from ivideogpt.ctx_tokenizer.compressive_vq_model import CompressiveVQModelFSQ
3 | 
4 | TOKENIZER = {
5 |     "cnn": CNNFSQModel256,
6 |     "ctx_cnn": CompressiveVQModelFSQ,
7 | }


--------------------------------------------------------------------------------
/vid_wm/verl/recipe/dapo/prepare_dapo_data.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | set -uxo pipefail
 3 | 
 4 | export VERL_HOME=${VERL_HOME:-"${HOME}/verl"}
 5 | export TRAIN_FILE=${TRAIN_FILE:-"${VERL_HOME}/data/dapo-math-17k.parquet"}
 6 | export TEST_FILE=${TEST_FILE:-"${VERL_HOME}/data/aime-2024.parquet"}
 7 | 
 8 | mkdir -p "${VERL_HOME}/data"
 9 | 
10 | wget -O "${TRAIN_FILE}" "https://huggingface.co/datasets/BytedTsinghua-SIA/DAPO-Math-17k/resolve/main/data/dapo-math-17k.parquet?download=true"
11 | 
12 | wget -O "${TEST_FILE}" "https://huggingface.co/datasets/BytedTsinghua-SIA/AIME-2024/resolve/main/data/aime-2024.parquet?download=true"


--------------------------------------------------------------------------------
/vid_wm/verl/recipe/prime/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 PRIME team and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.


--------------------------------------------------------------------------------
/vid_wm/verl/recipe/r1/README.md:
--------------------------------------------------------------------------------
 1 | # DeepSeek R1 Reproduction
 2 | 
 3 | This recipe is under development, if you are interested, checkout the TODO list and join this project! https://github.com/volcengine/verl/issues/708 
 4 | 
 5 | ## Reproducing Evaluation
 6 | 
 7 | Eval Results of DS-R1-Distill-Qwen2.5-1.5B (k=8)
 8 | 
 9 | Dataset | Test Results | Reported
10 | -- | -- | --
11 | GPQA Diamond | 35.3 | 33.8
12 | LiveCodeBench | 16.9 | 16.9
13 | AIME 2024 | 30.4 | 28.9
14 | CNMO 2024 (en) | 45.1 | -
15 | CNMO 2024 (zh) | 41.0 | -
16 | 
17 | ---
18 | 
19 | Eval Results (DS-R1)
20 | 
21 | Dataset | Test Results (k=1) | Test Results (k=4) | Reported
22 | -- | -- | -- | --
23 | GPQA Diamond | 67.7 | 69.6 | 71.5
24 | LiveCodeBench | 64.7 | 63.1 | 65.9
25 | AIME 2024 | 86.7 | 79.2 | 79.8
26 | CNMO 2024 | 75.0 | 78.5 | 78.8
27 | 


--------------------------------------------------------------------------------
/vid_wm/verl/recipe/r1/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/vid_wm/verl/recipe/r1/config/evaluation.yaml:
--------------------------------------------------------------------------------
 1 | data:
 2 |   path: /tmp/math_Qwen2-7B-Instruct.parquet
 3 |   prompt_key: prompt
 4 |   response_key: responses
 5 |   data_source_key: data_source
 6 |   reward_model_key: reward_model
 7 | 
 8 | custom_reward_function:
 9 |   path: null
10 |   name: compute_score
11 | 


--------------------------------------------------------------------------------
/vid_wm/verl/recipe/r1/reward_score.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | def reward_func(data_source, solution_str, ground_truth, extra_info=None):
17 |     if data_source in ['Maxwell-Jia/AIME_2024', "opencompass/cnmo2024_en", "opencompass/cnmo2024_zh"]:
18 |         from recipe.r1.tasks import math
19 |         return math.compute_score(solution_str, ground_truth)
20 |     elif data_source == 'Idavidrein/gpqa':
21 |         from recipe.r1.tasks import gpqa
22 |         return gpqa.compute_score(solution_str, ground_truth)
23 |     elif data_source in ['livecodebench/code_generation_lite', 'livecodebench/code_generation']:
24 |         from recipe.r1.tasks import livecodebench
25 |         return livecodebench.compute_score(solution_str, ground_truth)
26 |     else:
27 |         raise NotImplementedError
28 | 


--------------------------------------------------------------------------------
/vid_wm/verl/recipe/r1/run_r1_distill_qwen.sh:
--------------------------------------------------------------------------------
 1 | MODEL_PATH=Qwen/DeepSeek-R1-Distill-Qwen-1.5B
 2 | DATA_PATH=/workspace/datasets/r1_bench
 3 | 
 4 | # Eval Data Process
 5 | python3 -m recipe.r1.data_process \
 6 |     --local_dir $DATA_PATH \
 7 |     --tasks all
 8 | 
 9 | # Generation
10 | python3 -m verl.trainer.main_generation \
11 |     trainer.nnodes=1 \
12 |     trainer.n_gpus_per_node=8 \
13 |     data.path=$DATA_PATH/test.parquet \
14 |     data.prompt_key=prompt \
15 |     data.batch_size=1024 \
16 |     data.n_samples=8 \
17 |     data.output_path=$DATA_PATH/test-output-8.parquet \
18 |     model.path=$MODEL_PATH \
19 |     rollout.temperature=0.6 \
20 |     rollout.top_p=0.95 \
21 |     rollout.prompt_length=1024 \
22 |     rollout.response_length=32768 \
23 |     rollout.tensor_model_parallel_size=1 \
24 |     rollout.gpu_memory_utilization=0.9 \
25 |     rollout.max_num_batched_tokens=65536
26 | 
27 | # Evaluation
28 | python3 -m recipe.r1.main_eval \
29 |     data.path=$DATA_PATH/test-output-8.parquet \
30 |     data.prompt_key=prompt \
31 |     data.response_key=responses \
32 |     custom_reward_function.path=recipe/r1/reward_score.py \
33 |     custom_reward_function.name=reward_func
34 | 


--------------------------------------------------------------------------------
/vid_wm/verl/recipe/r1/tasks/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/vid_wm/verl/recipe/r1/tasks/gpqa.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import re
16 | 
17 | # Extraction Template from https://github.com/openai/simple-evals/blob/90e3e821cabba2aeb6be651dcb662b253df04225/common.py#L25
18 | ANSWER_PATTERN_MULTICHOICE = r"(?i)Answer[ \t]*:[ \t]*\$?([A-D])\$?"
19 | 
20 | 
21 | def compute_score(solution_str, ground_truth) -> float:
22 |     match = re.search(ANSWER_PATTERN_MULTICHOICE, solution_str)
23 |     extracted_answer = match.group(1) if match else None
24 |     score = 1.0 if extracted_answer == ground_truth else 0.0
25 |     return score
26 | 


--------------------------------------------------------------------------------
/vid_wm/verl/recipe/r1/tasks/math.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | try:
16 |     from math_verify.metric import math_metric
17 |     from math_verify.parser import LatexExtractionConfig, ExprExtractionConfig
18 | except ImportError:
19 |     print("To use Math-Verify, please install it first by running `pip install math-verify`.")
20 | 
21 | 
22 | def compute_score(model_output: str, ground_truth: str) -> bool:
23 |     verify_func = math_metric(
24 |         gold_extraction_target=(LatexExtractionConfig(),),
25 |         pred_extraction_target=(ExprExtractionConfig(), LatexExtractionConfig()),
26 |     )
27 |     ret_score = 0.
28 | 
29 |     # Wrap the ground truth in \boxed{} format for verification
30 |     ground_truth_boxed = "\\boxed{" + ground_truth + "}"
31 |     try:
32 |         ret_score, _ = verify_func([ground_truth_boxed], [model_output])
33 |     except Exception as e:
34 |         pass
35 | 
36 |     return ret_score
37 | 


--------------------------------------------------------------------------------
/vid_wm/verl/requirements.txt:
--------------------------------------------------------------------------------
 1 | # requirements.txt records the full set of dependencies for development
 2 | accelerate
 3 | codetiming
 4 | datasets
 5 | dill
 6 | flash-attn
 7 | hydra-core
 8 | liger-kernel
 9 | numpy
10 | pandas
11 | datasets
12 | peft
13 | pyarrow>=15.0.0
14 | pybind11
15 | pylatexenc
16 | pylint==3.3.6
17 | ray[default]
18 | tensordict<=0.6.2
19 | torchdata
20 | transformers
21 | # vllm==0.6.3.post1
22 | wandb
23 | 


--------------------------------------------------------------------------------
/vid_wm/verl/requirements_sglang.txt:
--------------------------------------------------------------------------------
 1 | # requirements.txt records the full set of dependencies for development
 2 | accelerate
 3 | codetiming
 4 | datasets
 5 | dill
 6 | flash-attn
 7 | hydra-core
 8 | numpy
 9 | pandas
10 | datasets
11 | peft
12 | pyarrow>=15.0.0
13 | pybind11
14 | pylatexenc
15 | ray[default]>=2.10
16 | tensordict<=0.6.2
17 | torchdata
18 | torchvision
19 | transformers
20 | wandb
21 | sglang[all]==0.4.4.post3
22 | torch-memory-saver>=0.0.5


--------------------------------------------------------------------------------
/vid_wm/verl/scripts/format.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | pip3 install --upgrade yapf
3 | python3 -m yapf -ir -vv --style ./.style.yapf verl tests examples recipe scripts
4 | 


--------------------------------------------------------------------------------
/vid_wm/verl/tests/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.


--------------------------------------------------------------------------------
/vid_wm/verl/tests/distributed/run_all.sh:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | #!/usr/bin/env bash
16 | 
17 | set -e -x
18 | torchrun --nproc-per-node=4 --standalone tests/distributed/test_tensor_dict.py


--------------------------------------------------------------------------------
/vid_wm/verl/tests/e2e/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/vid_wm/verl/tests/e2e/arithmetic_sequence/model/config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "architectures": [
 3 |     "LlamaForCausalLM"
 4 |   ],
 5 |   "attention_bias": false,
 6 |   "attention_dropout": 0.0,
 7 |   "bos_token_id": null,
 8 |   "eos_token_id": 1,
 9 |   "hidden_act": "silu",
10 |   "hidden_size": 128,
11 |   "initializer_range": 0.02,
12 |   "intermediate_size": 344,
13 |   "max_position_embeddings": 2048,
14 |   "mlp_bias": false,
15 |   "model_type": "llama",
16 |   "num_attention_heads": 4,
17 |   "num_hidden_layers": 4,
18 |   "num_key_value_heads": 4,
19 |   "pad_token_id": 2,
20 |   "pretraining_tp": 1,
21 |   "rms_norm_eps": 1e-06,
22 |   "rope_scaling": null,
23 |   "rope_theta": 10000.0,
24 |   "tie_word_embeddings": false,
25 |   "torch_dtype": "bfloat16",
26 |   "transformers_version": "4.43.3",
27 |   "use_cache": true,
28 |   "vocab_size": 16
29 | }
30 | 


--------------------------------------------------------------------------------
/vid_wm/verl/tests/e2e/arithmetic_sequence/model/generation_config.json:
--------------------------------------------------------------------------------
1 | {
2 |   "_from_model_config": true,
3 |   "eos_token_id": 1,
4 |   "pad_token_id": 2,
5 |   "transformers_version": "4.43.3"
6 | }
7 | 


--------------------------------------------------------------------------------
/vid_wm/verl/tests/e2e/arithmetic_sequence/model/model.safetensors:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thuml/RLVR-World/66bce39e195ea3521025d94bdd82eafe8823dc45/vid_wm/verl/tests/e2e/arithmetic_sequence/model/model.safetensors


--------------------------------------------------------------------------------
/vid_wm/verl/tests/e2e/arithmetic_sequence/model/tokenizer_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "char_ords": [
 3 |         48,
 4 |         49,
 5 |         50,
 6 |         51,
 7 |         52,
 8 |         53,
 9 |         54,
10 |         55,
11 |         56,
12 |         57,
13 |         44,
14 |         58
15 |     ],
16 |     "model_max_length": 2048,
17 |     "chat_template": "{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% set role = message['role'] %}{{ message['content'] }}{% endfor %}{% if add_generation_prompt %}{{ sep_token }}{% endif %}"
18 | }


--------------------------------------------------------------------------------
/vid_wm/verl/tests/e2e/arithmetic_sequence/rl/README.md:
--------------------------------------------------------------------------------
 1 | # Digit completion
 2 | 
 3 | This is an example of solving a digit completion problem. The problem is defined as below:
 4 | 
 5 | The prompt is a sequence of numbers with fixed difference. The agent's goal is to complete the next N numbers.
 6 | If the max number is reached, the next number should be modulo with max number.
 7 | 
 8 | For example,
 9 | - prompt = [1, 2, 3]
10 | - N = 5
11 | - max_number = 6
12 | 
13 | The response should be [4, 5, 6, 7%6, 8%6] = [4, 5, 6, 0, 1].
14 | 
15 | # Environment definition
16 | 
17 | The core definition of the task is defined in verl/envs/digit_completion/task.py
18 | 
19 | It is highly recommended to take a look at it for better understanding.
20 | 
21 | 
22 | 
23 | # Run experiments
24 | 
25 | The users are required to specify the config path and config name (and the relative model config path to the current working directory)
26 | 
27 | ```bash
28 | # cd examples/arithmetic_sequence/rl
29 | 
30 | # Specify the config path and config name (current working dir)
31 | python3 -m verl.trainer.ppo.ray_megatron_train_synchronous --config-path=$(pwd)/config --config-name='ray_megatron'
32 | 
33 | # The default relative path of model config is 'config/model_config', if you want to change it, you can rewrite it in ray_megatron.yaml or using:
34 | python3 -m verl.trainer.ppo.ray_megatron_train_synchronous --config-path=$(pwd)/config --config-name='ray_megatron' ++model.base_path=config/model_config
35 | 
36 | ```
37 | 
38 | 


--------------------------------------------------------------------------------
/vid_wm/verl/tests/e2e/check_custom_rwd_fn.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import argparse
16 | 
17 | 
18 | def check_congratulations_in_file(output_file):
19 |     with open(output_file, 'r') as f:
20 |         output = f.read()
21 | 
22 |     success_message = "Congratulations!!! You have called my_reward_function successfully!!!"
23 |     assert success_message in output, f'Success message of my_reward_function not found in {output_file}'
24 |     print("Check passes")
25 | 
26 | 
27 | if __name__ == '__main__':
28 |     parser = argparse.ArgumentParser()
29 |     parser.add_argument('--output_file', required=True, type=str)
30 | 
31 |     args = parser.parse_args()
32 | 
33 |     check_congratulations_in_file(args.output_file)
34 | 


--------------------------------------------------------------------------------
/vid_wm/verl/tests/e2e/envs/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .digit_completion import DigitCompletion
16 | 
17 | __all__ = ['DigitCompletion']


--------------------------------------------------------------------------------
/vid_wm/verl/tests/e2e/envs/digit_completion/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .task import DigitCompletion, generate_ground_truth_response
16 | from .tokenizer import CharTokenizer
17 | 
18 | from transformers import AutoTokenizer, LlamaConfig
19 | 
20 | AutoTokenizer.register(LlamaConfig, CharTokenizer, exist_ok=True)
21 | 
22 | __all__ = ['DigitCompletion', 'generate_ground_truth_response', 'CharTokenizer']


--------------------------------------------------------------------------------
/vid_wm/verl/tests/e2e/run_r1_distill_qwen_aime24_eval.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | huggingface-cli download deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B \
 4 |     --local-dir $HOME/models/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B
 5 | 
 6 | python3 -m verl.trainer.main_generation \
 7 |     trainer.nnodes=1 \
 8 |     trainer.n_gpus_per_node=8 \
 9 |     data.path=$HOME/data/r1/test.parquet \
10 |     data.prompt_key=prompt \
11 |     data.batch_size=1024 \
12 |     data.n_samples=1 \
13 |     data.output_path=$HOME/data/r1/test-output-k1.parquet \
14 |     model.path=$HOME/models/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B \
15 |     rollout.temperature=0.6 \
16 |     rollout.top_p=0.95 \
17 |     rollout.prompt_length=1024 \
18 |     rollout.response_length=32768 \
19 |     rollout.tensor_model_parallel_size=1 \
20 |     rollout.gpu_memory_utilization=0.95 \
21 |     rollout.max_num_batched_tokens=65536 \
22 |     rollout.enforce_eager=False \
23 |     rollout.free_cache_engine=False
24 | 
25 | python3 -m recipe.r1.main_eval \
26 |     data.path=$HOME/data/r1/test-output-k1.parquet \
27 |     data.prompt_key=prompt \
28 |     data.response_key=responses \
29 |     custom_reward_function.path=recipe/r1/reward_score.py \
30 |     custom_reward_function.name=reward_func


--------------------------------------------------------------------------------
/vid_wm/verl/tests/e2e/run_ray_trainer_rmpad.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -e -x
 4 | 
 5 | huggingface-cli download Qwen/Qwen2.5-0.5B --local-dir $HOME/models/Qwen/Qwen2.5-0.5B
 6 | 
 7 | python3 tests/e2e/arithmetic_sequence/rl/main_trainer.py \
 8 |     algorithm.adv_estimator=gae \
 9 |     data.train_files=tests/e2e/arithmetic_sequence/data/train.parquet \
10 |     data.val_files=tests/e2e/arithmetic_sequence/data/test.parquet \
11 |     actor_rollout_ref.actor.use_kl_loss=False \
12 |     actor_rollout_ref.model.path=tests/e2e/arithmetic_sequence/model \
13 |     actor_rollout_ref.rollout.name=vllm \
14 |     actor_rollout_ref.rollout.tensor_model_parallel_size=1 \
15 |     actor_rollout_ref.model.tokenizer_path=tests/e2e/arithmetic_sequence/model \
16 |     critic.model.path=Qwen/Qwen2.5-0.5B \
17 |     critic.model.use_remove_padding=True \
18 |     algorithm.use_kl_in_reward=False \
19 |     trainer.total_epochs=1


--------------------------------------------------------------------------------
/vid_wm/verl/tests/generation/run_gen_qwen05.sh:
--------------------------------------------------------------------------------
 1 | # Tested with 1 & 4 GPUs
 2 | set -x
 3 | 
 4 | if [ "$#" -lt 2 ]; then
 5 |     echo "Usage: run_gen_qwen05.sh <nproc_per_node> <save_path> [other_configs...]"
 6 |     exit 1
 7 | fi
 8 | 
 9 | nproc_per_node=$1
10 | save_path=$2
11 | infer_tp=${3:-2}  # Default tensor parallel size to 2
12 | 
13 | # Shift the arguments so $@ refers to the rest
14 | shift 2
15 | 
16 | huggingface-cli download Qwen/Qwen2.5-0.5B-Instruct --local-dir $HOME/models/Qwen/Qwen2.5-0.5B-Instruct
17 | 
18 | python3 -m verl.trainer.main_generation \
19 |     trainer.nnodes=1 \
20 |     trainer.n_gpus_per_node=$nproc_per_node \
21 |     data.path=$HOME/data/gsm8k/test.parquet \
22 |     data.prompt_key=prompt \
23 |     data.n_samples=1 \
24 |     data.output_path=$save_path \
25 |     model.path=$HOME/models/Qwen/Qwen2.5-0.5B-Instruct \
26 |     +model.trust_remote_code=True \
27 |     rollout.temperature=1.0 \
28 |     rollout.top_k=50 \
29 |     rollout.top_p=0.7 \
30 |     rollout.prompt_length=2048 \
31 |     rollout.response_length=1024 \
32 |     rollout.tensor_model_parallel_size=$infer_tp \
33 |     rollout.gpu_memory_utilization=0.8
34 | 


--------------------------------------------------------------------------------
/vid_wm/verl/tests/kill_github_tests.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | if [ "$#" -ne 1 ]; then
 4 |     echo "Usage: $0 YOUR_GITHUB_TOKEN"
 5 |     echo "Please provide exactly one input argument for your github token."
 6 |     exit 1
 7 | fi
 8 | 
 9 | # Set your GitHub repository details
10 | OWNER="volcengine"
11 | REPO="verl"
12 | TOKEN=$1
13 | 
14 | # API URL for workflow runs
15 | API_URL="https://api.github.com/repos/$OWNER/$REPO/actions/runs?status=queued"
16 | 
17 | # Check required commands
18 | command -v jq >/dev/null 2>&1 || { echo "jq is required but not installed. Aborting."; exit 1; }
19 | 
20 | # Get queued workflow runs
21 | response=$(curl -s -H "Authorization: token $TOKEN" -H "Accept: application/vnd.github.v3+json" "$API_URL")
22 | 
23 | # Run this for debugging
24 | # echo $response
25 | 
26 | # Extract run IDs
27 | queued_run_ids=$(echo "$response" | jq -r '.workflow_runs[] | .id')
28 | 
29 | if [ -z "$queued_run_ids" ]; then
30 |     echo "No queued workflow runs found."
31 |     exit 0
32 | fi
33 | 
34 | # Cancel each queued run
35 | for run_id in $queued_run_ids; do
36 |     echo "Cancelling run $run_id"
37 |     cancel_url="https://api.github.com/repos/$OWNER/$REPO/actions/runs/$run_id/cancel"
38 |     curl -s -X POST -H "Authorization: token $TOKEN" -H "Accept: application/vnd.github.v3+json" "$cancel_url"
39 | done
40 | 
41 | echo "Cancelled all queued workflow runs."
42 | 


--------------------------------------------------------------------------------
/vid_wm/verl/tests/ray/detached_worker/README.md:
--------------------------------------------------------------------------------
 1 | # Detached Worker
 2 | ## How to run (Only on a single node)
 3 | - Start a local ray cluster: 
 4 | ```bash
 5 | ray start --head --port=6379
 6 | ```
 7 | - Run the server
 8 | ```bash
 9 | python3 server.py
10 | ```
11 | - On another terminal, Run the client
12 | ```bash
13 | python3 client.py
14 | ```
15 | 


--------------------------------------------------------------------------------
/vid_wm/verl/tests/ray/detached_worker/run.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | ray start --head --port=6379
3 | python3 server.py
4 | python3 client.py
5 | ray stop --force


--------------------------------------------------------------------------------
/vid_wm/verl/tests/sanity/test_import.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | def test_import():
17 |     import verl
18 |     print(verl.__version__)
19 | 
20 | 
21 | def test_single_controller_import():
22 |     import verl.single_controller
23 |     print(verl.single_controller.__version__)
24 | 


--------------------------------------------------------------------------------
/vid_wm/verl/tests/sft/run_sft.sh:
--------------------------------------------------------------------------------
 1 | # Tested with 2 & 4 GPUs
 2 | 
 3 | set -x
 4 | 
 5 | torchrun --standalone --nnodes=1 --nproc_per_node=8 \
 6 |      -m verl.trainer.fsdp_sft_trainer \
 7 |     data.train_files=$HOME/data/gsm8k/train.parquet \
 8 |     data.val_files=$HOME/data/gsm8k/test.parquet \
 9 |     data.prompt_key=extra_info \
10 |     data.response_key=extra_info \
11 |     +data.prompt_dict_keys=['question'] \
12 |     +data.response_dict_keys=['answer'] \
13 |     data.micro_batch_size_per_gpu=32 \
14 |     model.partial_pretrain=Qwen/Qwen2.5-0.5B-Instruct \
15 |     trainer.default_local_dir=$HOME/ckpts/ \
16 |     trainer.project_name=qwen2.5-sft \
17 |     trainer.experiment_name=gsm8k-sft-gemma-2b-it \
18 |     trainer.total_training_steps=1 \
19 |     trainer.logger=['console'] \
20 |     trainer.default_hdfs_dir=null $@
21 | 
22 | rm -rf $HOME/ckpts/


--------------------------------------------------------------------------------
/vid_wm/verl/tests/sft/run_sft_multiturn.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -x
 3 | 
 4 | if [ "$#" -lt 2 ]; then
 5 |     echo "Usage: run_qwen_05_sp2.sh <nproc_per_node> <save_path> [other_configs...]"
 6 |     exit 1
 7 | fi
 8 | 
 9 | nproc_per_node=$1
10 | save_path=$2
11 | 
12 | # Shift the arguments so $@ refers to the rest
13 | shift 2
14 | 
15 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \
16 |      -m verl.trainer.fsdp_sft_trainer \
17 |     data.train_files=$HOME/data/multiturn/train.parquet \
18 |     data.val_files=$HOME/data/multiturn/test.parquet \
19 |     data.multiturn.enable=true \
20 |     data.multiturn.messages_key=messages \
21 |     data.micro_batch_size=4 \
22 |     model.partial_pretrain=Qwen/Qwen2.5-0.5B-Instruct \
23 |     trainer.default_local_dir=$save_path \
24 |     trainer.project_name=multiturn-sft \
25 |     trainer.experiment_name=multiturn-sft-qwen-2.5-0.5b-instruct-sp2 \
26 |     trainer.logger=['console'] \
27 |     trainer.total_training_steps=1 \
28 |     trainer.default_hdfs_dir=null $@ \
29 |     ulysses_sequence_parallel_size=2 \
30 |     use_remove_padding=true


--------------------------------------------------------------------------------
/vid_wm/verl/tests/sft/run_sft_qwen05_peft.sh:
--------------------------------------------------------------------------------
 1 | # Tested with 2 & 4 GPUs
 2 | 
 3 | set -x
 4 | 
 5 | if [ "$#" -lt 2 ]; then
 6 |     echo "Usage: run_sft_qwen05_peft.sh <nproc_per_node> <save_path> [other_configs...]"
 7 |     exit 1
 8 | fi
 9 | 
10 | nproc_per_node=$1
11 | save_path=$2
12 | 
13 | # Shift the arguments so $@ refers to the rest
14 | shift 2
15 | 
16 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \
17 |      -m verl.trainer.fsdp_sft_trainer \
18 |     data.train_files=$HOME/data/gsm8k/train.parquet \
19 |     data.val_files=$HOME/data/gsm8k/test.parquet \
20 |     data.prompt_key=extra_info \
21 |     data.response_key=extra_info \
22 |     optim.lr=1e-4 \
23 |     +data.prompt_dict_keys=['question'] \
24 |     +data.response_dict_keys=['answer'] \
25 |     data.micro_batch_size_per_gpu=4 \
26 |     model.partial_pretrain=Qwen/Qwen2.5-0.5B-Instruct \
27 |     trainer.default_local_dir=$save_path \
28 |     trainer.project_name=gsm8k-sft \
29 |     trainer.experiment_name=gsm8k-sft-qwen-2.5-0.5b-instruct \
30 |     trainer.logger=['console'] \
31 |     trainer.total_training_steps=1 \
32 |     trainer.default_hdfs_dir=null $@ \
33 |     model.lora_rank=32\
34 |     model.lora_alpha=16 \
35 |     model.target_modules=all-linear
36 | 
37 |     # Or you can do this:
38 |     # model.target_modules=[q_proj,v_proj] \
39 | 


--------------------------------------------------------------------------------
/vid_wm/verl/tests/sft/run_sft_qwen05_sp2_liger.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | if [ "$#" -lt 2 ]; then
 4 |     echo "Usage: run_sft_qwen05_sp2_liger.sh <nproc_per_node> <save_path> [other_configs...]"
 5 |     exit 1
 6 | fi
 7 | 
 8 | nproc_per_node=$1
 9 | save_path=$2
10 | 
11 | # Shift the arguments so $@ refers to the rest
12 | shift 2
13 | 
14 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \
15 |      -m verl.trainer.fsdp_sft_trainer \
16 |     data.train_files=$HOME/data/gsm8k/train.parquet \
17 |     data.val_files=$HOME/data/gsm8k/test.parquet \
18 |     data.prompt_key=extra_info \
19 |     data.response_key=extra_info \
20 |     optim.lr=1e-4 \
21 |     +data.prompt_dict_keys=['question'] \
22 |     +data.response_dict_keys=['answer'] \
23 |     data.micro_batch_size=4 \
24 |     model.partial_pretrain=Qwen/Qwen2.5-0.5B-Instruct \
25 |     model.use_liger=True \
26 |     trainer.default_local_dir=$save_path \
27 |     trainer.project_name=gsm8k-sft \
28 |     trainer.experiment_name=gsm8k-sft-qwen-2.5-0.5b-instruct-sp2-liger \
29 |     trainer.logger=['console'] \
30 |     trainer.total_training_steps=1 \
31 |     trainer.default_hdfs_dir=null $@ \
32 |     ulysses_sequence_parallel_size=2 \
33 |     use_remove_padding=true


--------------------------------------------------------------------------------
/vid_wm/verl/tests/sft/run_sft_sp_loss_match.sh:
--------------------------------------------------------------------------------
 1 | # Tested with 2 & 4 GPUs
 2 | 
 3 | set -x
 4 | 
 5 | torchrun --standalone --nnodes=1 --nproc_per_node=8 \
 6 |     tests/sft/test_sp_loss_match.py \
 7 |     data.train_files=$HOME/data/gsm8k/train.parquet \
 8 |     data.val_files=$HOME/data/gsm8k/test.parquet \
 9 |     data.prompt_key=extra_info \
10 |     data.response_key=extra_info \
11 |     +data.prompt_dict_keys=['question'] \
12 |     +data.response_dict_keys=['answer'] \
13 |     data.micro_batch_size=32 \
14 |     model.partial_pretrain=Qwen/Qwen2.5-0.5B-Instruct \
15 |     ulysses_sequence_parallel_size=2 \
16 |     use_remove_padding=True \
17 |     trainer.default_local_dir=$HOME/ckpts/ \
18 |     trainer.project_name=qwen2.5-sft \
19 |     trainer.experiment_name=gsm8k-sft-gemma-2b-it \
20 |     trainer.total_training_steps=1 \
21 |     trainer.logger=['console'] \
22 |     trainer.default_hdfs_dir=null $@
23 | 
24 | rm -rf $HOME/ckpts/
25 | 


--------------------------------------------------------------------------------
/vid_wm/verl/verl/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import os
16 | 
17 | version_folder = os.path.dirname(os.path.join(os.path.abspath(__file__)))
18 | 
19 | with open(os.path.join(version_folder, 'version/version')) as f:
20 |     __version__ = f.read().strip()
21 | 
22 | from .protocol import DataProto
23 | 
24 | from .utils.logging_utils import set_basic_config
25 | import logging
26 | 
27 | set_basic_config(level=logging.WARNING)
28 | 
29 | from . import single_controller
30 | 
31 | __all__ = ['DataProto', "__version__"]
32 | 
33 | if os.getenv('VERL_USE_MODELSCOPE', 'False').lower() == 'true':
34 |     import importlib
35 |     if importlib.util.find_spec("modelscope") is None:
36 |         raise ImportError(f'You are using the modelscope hub, please install modelscope by `pip install modelscope -U`')
37 |     # Patch hub to download models from modelscope to speed up.
38 |     from modelscope.utils.hf_util import patch_hub
39 |     patch_hub()
40 | 


--------------------------------------------------------------------------------
/vid_wm/verl/verl/models/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/vid_wm/verl/verl/models/llama/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/vid_wm/verl/verl/models/llama/megatron/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .modeling_llama_megatron import (
16 |     # original model with megatron
17 |     ParallelLlamaModel,
18 |     ParallelLlamaForCausalLM,
19 |     # rmpad with megatron
20 |     ParallelLlamaForCausalLMRmPad,
21 |     ParallelLlamaForValueRmPad,
22 |     # rmpad with megatron and pipeline parallelism
23 |     ParallelLlamaForCausalLMRmPadPP,
24 |     ParallelLlamaForValueRmPadPP)
25 | 


--------------------------------------------------------------------------------
/vid_wm/verl/verl/models/llama/megatron/checkpoint_utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/vid_wm/verl/verl/models/llama/megatron/layers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .parallel_attention import ParallelLlamaAttention
16 | from .parallel_decoder import ParallelLlamaDecoderLayer, ParallelLlamaDecoderLayerRmPad
17 | from .parallel_mlp import ParallelLlamaMLP
18 | from .parallel_rmsnorm import ParallelLlamaRMSNorm
19 | 


--------------------------------------------------------------------------------
/vid_wm/verl/verl/models/mcore/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 Bytedance Ltd. and/or its affiliates
 2 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | from .gpt_model import gptmodel_forward


--------------------------------------------------------------------------------
/vid_wm/verl/verl/models/qwen2/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/vid_wm/verl/verl/models/qwen2/megatron/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .modeling_qwen2_megatron import (
16 |     # original model with megatron
17 |     ParallelQwen2Model,
18 |     ParallelQwen2ForCausalLM,
19 |     # rmpad with megatron
20 |     ParallelQwen2ForCausalLMRmPad,
21 |     ParallelQwen2ForValueRmPad,
22 |     # rmpad with megatron and pipeline parallelism
23 |     ParallelQwen2ForCausalLMRmPadPP,
24 |     ParallelQwen2ForValueRmPadPP)
25 | 


--------------------------------------------------------------------------------
/vid_wm/verl/verl/models/qwen2/megatron/checkpoint_utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/vid_wm/verl/verl/models/qwen2/megatron/layers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .parallel_attention import ParallelQwen2Attention
16 | from .parallel_decoder import ParallelQwen2DecoderLayer, ParallelQwen2DecoderLayerRmPad
17 | from .parallel_mlp import ParallelQwen2MLP
18 | from .parallel_rmsnorm import ParallelQwen2RMSNorm
19 | 


--------------------------------------------------------------------------------
/vid_wm/verl/verl/models/transformers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/vid_wm/verl/verl/single_controller/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import os
16 | 
17 | version_folder = os.path.dirname(os.path.join(os.path.abspath(__file__)))
18 | 
19 | # Note(haibin.lin): single_controller.__version__ is deprecated
20 | with open(os.path.join(os.path.join(version_folder, os.pardir), 'version/version')) as f:
21 |     __version__ = f.read().strip()
22 | 
23 | from . import base
24 | from .base import *
25 | 
26 | __all__ = base.__all__


--------------------------------------------------------------------------------
/vid_wm/verl/verl/single_controller/base/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .worker import Worker
16 | from .worker_group import WorkerGroup, ClassWithInitArgs, ResourcePool
17 | 
18 | __all__ = ['Worker', 'WorkerGroup', 'ClassWithInitArgs', 'ResourcePool']


--------------------------------------------------------------------------------
/vid_wm/verl/verl/single_controller/base/megatron/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/vid_wm/verl/verl/single_controller/base/register_center/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/vid_wm/verl/verl/single_controller/base/register_center/ray.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import ray
16 | 
17 | 
18 | @ray.remote
19 | class WorkerGroupRegisterCenter:
20 | 
21 |     def __init__(self, rank_zero_info):
22 |         self.rank_zero_info = rank_zero_info
23 | 
24 |     def get_rank_zero_info(self):
25 |         return self.rank_zero_info
26 | 
27 | 
28 | def create_worker_group_register_center(name, info):
29 |     return WorkerGroupRegisterCenter.options(name=name).remote(info)
30 | 


--------------------------------------------------------------------------------
/vid_wm/verl/verl/single_controller/ray/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .base import RayResourcePool, RayClassWithInitArgs, RayWorkerGroup, create_colocated_worker_cls


--------------------------------------------------------------------------------
/vid_wm/verl/verl/third_party/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/vid_wm/verl/verl/third_party/sglang/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023-2024 SGLang Team
 2 | # Licensed under the Apache License, Version 2.0 (the "License");
 3 | # you may not use this file except in compliance with the License.
 4 | # You may obtain a copy of the License at
 5 | #
 6 | #     http://www.apache.org/licenses/LICENSE-2.0
 7 | #
 8 | # Unless required by applicable law or agreed to in writing, software
 9 | # distributed under the License is distributed on an "AS IS" BASIS,
10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | # See the License for the specific language governing permissions and
12 | # limitations under the License.
13 | # ==============================================================================
14 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
15 | #
16 | # Licensed under the Apache License, Version 2.0 (the "License");
17 | # you may not use this file except in compliance with the License.
18 | # You may obtain a copy of the License at
19 | #
20 | #     http://www.apache.org/licenses/LICENSE-2.0
21 | #
22 | # Unless required by applicable law or agreed to in writing, software
23 | # distributed under the License is distributed on an "AS IS" BASIS,
24 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25 | # See the License for the specific language governing permissions and
26 | # limitations under the License.


--------------------------------------------------------------------------------
/vid_wm/verl/verl/third_party/vllm/vllm_v_0_3_1/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/vid_wm/verl/verl/third_party/vllm/vllm_v_0_4_2/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/vid_wm/verl/verl/third_party/vllm/vllm_v_0_5_4/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/vid_wm/verl/verl/third_party/vllm/vllm_v_0_6_3/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/vid_wm/verl/verl/trainer/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/vid_wm/verl/verl/trainer/config/evaluation.yaml:
--------------------------------------------------------------------------------
 1 | data:
 2 |   path: /tmp/math_Qwen2-7B-Instruct.parquet
 3 |   prompt_key: prompt
 4 |   response_key: responses
 5 |   data_source_key: data_source
 6 |   reward_model_key: reward_model
 7 | 
 8 | custom_reward_function:
 9 |   path: null
10 |   name: compute_score
11 | 


--------------------------------------------------------------------------------
/vid_wm/verl/verl/trainer/config/generation.yaml:
--------------------------------------------------------------------------------
 1 | trainer:
 2 |   nnodes: 1
 3 |   n_gpus_per_node: 8
 4 | 
 5 | data:
 6 |   path: ~/data/rlhf/math/test.parquet
 7 |   prompt_key: prompt
 8 |   n_samples: 5
 9 |   output_path: /opt/tiger/math_Qwen2-7B-Instruct.parquet
10 |   batch_size: 128
11 | 
12 | model:
13 |   path: ~/models/Qwen2-7B-Instruct
14 |   external_lib: null
15 | rollout:
16 |   name: vllm
17 |   temperature: 1.0
18 |   top_k: 50 # 0 for hf rollout, -1 for vllm rollout
19 |   top_p: 0.7
20 |   prompt_length: 1536
21 |   response_length: 512
22 |   # for vllm rollout
23 |   dtype: bfloat16 # should align with FSDP
24 |   gpu_memory_utilization: 0.5
25 |   ignore_eos: False
26 |   enforce_eager: True
27 |   free_cache_engine: True
28 |   load_format: dummy_dtensor
29 |   tensor_model_parallel_size: 1
30 |   max_num_batched_tokens: 8192
31 |   max_model_len: null
32 |   max_num_seqs: 1024
33 |   log_prob_micro_batch_size: null # will be deprecated, use log_prob_micro_batch_size_per_gpu
34 |   log_prob_micro_batch_size_per_gpu: 8
35 |   # for fire vllm rollout
36 |   use_fire_sampling: False # enable FIRE https://arxiv.org/abs/2410.21236
37 |   # for hf rollout
38 |   do_sample: True
39 |   disable_log_stats: True
40 |   enable_chunked_prefill: True
41 |   n: 1
42 | actor:
43 |   strategy: fsdp  # This is for backward-compatibility
44 |   ulysses_sequence_parallel_size: 1 # sp size
45 |   fsdp_config:
46 |     fsdp_size: -1


--------------------------------------------------------------------------------
/vid_wm/verl/verl/trainer/ppo/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/vid_wm/verl/verl/trainer/runtime_env.yaml:
--------------------------------------------------------------------------------
1 | working_dir: ./
2 | excludes: ["/.git/"]
3 | env_vars:
4 |   TORCH_NCCL_AVOID_RECORD_STREAMS: "1"
5 |   VLLM_ATTENTION_BACKEND: "XFORMERS"


--------------------------------------------------------------------------------
/vid_wm/verl/verl/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from . import tokenizer
16 | from .tokenizer import hf_tokenizer, hf_processor
17 | 
18 | __all__ = tokenizer.__all__


--------------------------------------------------------------------------------
/vid_wm/verl/verl/utils/checkpoint/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.


--------------------------------------------------------------------------------
/vid_wm/verl/verl/utils/config.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from typing import Dict
16 | 
17 | from omegaconf import DictConfig
18 | 
19 | 
20 | def update_dict_with_config(dictionary: Dict, config: DictConfig):
21 |     for key in dictionary:
22 |         if hasattr(config, key):
23 |             dictionary[key] = getattr(config, key)
24 | 


--------------------------------------------------------------------------------
/vid_wm/verl/verl/utils/dataset/README.md:
--------------------------------------------------------------------------------
 1 | # Dataset Format
 2 | ## RLHF dataset
 3 | We combine all the data sources into a single parquet files. We directly organize the prompt into the chat format so that multi-turn chats can be easily incorporated. In the prompt, we may add instruction following texts to guide the model output the answers in a particular format so that we can extract the answers.
 4 | 
 5 | Math problems
 6 | ```json
 7 | {
 8 |     "data_source": "openai/gsm8k",
 9 |     "prompt": [{"role": "user", "content": "Natalia sold clips to 48 of her friends in April, and then she sold half as many clips in May. How many clips did Natalia sell altogether in April and May? Let's think step by step and output the final answer after \"####\""}],
10 |     "ability": "math",
11 |     "reward_model": {
12 |         "style": "rule",
13 |         "ground_truth": ["72"]
14 |     },
15 | }
16 | ```
17 | 


--------------------------------------------------------------------------------
/vid_wm/verl/verl/utils/dataset/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .rl_dataset import RLHFDataset
16 | from .rm_dataset import RMDataset
17 | from .sft_dataset import SFTDataset
18 | 


--------------------------------------------------------------------------------
/vid_wm/verl/verl/utils/debug/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .performance import log_gpu_memory_usage


--------------------------------------------------------------------------------
/vid_wm/verl/verl/utils/debug/performance.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import torch
16 | import torch.distributed as dist
17 | import logging
18 | 
19 | 
20 | def log_gpu_memory_usage(head: str, logger: logging.Logger = None, level=logging.DEBUG, rank: int = 0):
21 |     if (not dist.is_initialized()) or (rank is None) or (dist.get_rank() == rank):
22 |         memory_allocated = torch.cuda.memory_allocated() / 1024**3
23 |         memory_reserved = torch.cuda.memory_reserved() / 1024**3
24 | 
25 |         message = f'{head}, memory allocated (GB): {memory_allocated}, memory reserved (GB): {memory_reserved}'
26 | 
27 |         if logger is None:
28 |             print(message)
29 |         else:
30 |             logger.log(msg=message, level=level)
31 | 


--------------------------------------------------------------------------------
/vid_wm/verl/verl/utils/distributed.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Utilities for distributed training."""
15 | import os
16 | 
17 | 
18 | def initialize_global_process_group(timeout_second=36000):
19 |     import torch.distributed
20 |     from datetime import timedelta
21 |     torch.distributed.init_process_group('nccl', timeout=timedelta(seconds=timeout_second))
22 |     local_rank = int(os.environ["LOCAL_RANK"])
23 |     rank = int(os.environ["RANK"])
24 |     world_size = int(os.environ["WORLD_SIZE"])
25 | 
26 |     if torch.distributed.is_initialized():
27 |         torch.cuda.set_device(local_rank)
28 |     return local_rank, rank, world_size
29 | 


--------------------------------------------------------------------------------
/vid_wm/verl/verl/utils/logger/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/vid_wm/verl/verl/utils/logging_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import logging
16 | import os
17 | import torch
18 | 
19 | 
20 | def set_basic_config(level):
21 |     """
22 |     This function sets the global logging format and level. It will be called when import verl
23 |     """
24 |     logging.basicConfig(format='%(levelname)s:%(asctime)s:%(message)s', level=level)
25 | 
26 | 
27 | def log_to_file(string):
28 |     print(string)
29 |     if os.path.isdir('logs'):
30 |         with open(f'logs/log_{torch.distributed.get_rank()}', 'a+') as f:
31 |             f.write(string + '\n')
32 | 


--------------------------------------------------------------------------------
/vid_wm/verl/verl/utils/megatron/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/vid_wm/verl/verl/utils/ray_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | Contains commonly used utilities for ray
16 | """
17 | 
18 | import ray
19 | 
20 | import concurrent.futures
21 | 
22 | 
23 | def parallel_put(data_list, max_workers=None):
24 | 
25 |     def put_data(index, data):
26 |         return index, ray.put(data)
27 | 
28 |     if max_workers is None:
29 |         max_workers = min(len(data_list), 16)
30 | 
31 |     with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
32 |         data_list_f = [executor.submit(put_data, i, data) for i, data in enumerate(data_list)]
33 |         res_lst = []
34 |         for future in concurrent.futures.as_completed(data_list_f):
35 |             res_lst.append(future.result())
36 | 
37 |         # reorder based on index
38 |         output = [None for _ in range(len(data_list))]
39 |         for res in res_lst:
40 |             index, data_ref = res
41 |             output[index] = data_ref
42 | 
43 |     return output
44 | 


--------------------------------------------------------------------------------
/vid_wm/verl/verl/utils/rendezvous/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/vid_wm/verl/verl/utils/reward_score/geo3k.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import re
16 | from mathruler.grader import extract_boxed_content, grade_answer
17 | 
18 | 
19 | def format_reward(predict_str: str) -> float:
20 |     pattern = re.compile(r'<think>.*</think>.*\\boxed\{.*\}.*', re.DOTALL)
21 |     match_result = re.fullmatch(pattern, predict_str)
22 |     return 1.0 if match_result else 0.0
23 | 
24 | 
25 | def acc_reward(predict_str: str, ground_truth: str) -> float:
26 |     answer = extract_boxed_content(predict_str)
27 |     return 1.0 if grade_answer(answer, ground_truth) else 0.0
28 | 
29 | 
30 | def compute_score(predict_str: str, ground_truth: str) -> float:
31 |     return 0.9 * acc_reward(predict_str, ground_truth) + 0.1 * format_reward(predict_str)
32 | 


--------------------------------------------------------------------------------
/vid_wm/verl/verl/version/version:
--------------------------------------------------------------------------------
1 | 0.2.0.dev
2 | 


--------------------------------------------------------------------------------
/vid_wm/verl/verl/workers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/vid_wm/verl/verl/workers/actor/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .base import BasePPOActor
16 | from .dp_actor import DataParallelPPOActor
17 | 
18 | __all__ = ["BasePPOActor", "DataParallelPPOActor"]
19 | 


--------------------------------------------------------------------------------
/vid_wm/verl/verl/workers/critic/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .base import BasePPOCritic
16 | from .dp_critic import DataParallelPPOCritic
17 | 
18 | __all__ = ["BasePPOCritic", "DataParallelPPOCritic"]
19 | 


--------------------------------------------------------------------------------
/vid_wm/verl/verl/workers/critic/base.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | Base class for a critic
16 | """
17 | from abc import ABC, abstractmethod
18 | 
19 | import torch
20 | 
21 | from verl import DataProto
22 | 
23 | __all__ = ['BasePPOCritic']
24 | 
25 | 
26 | class BasePPOCritic(ABC):
27 | 
28 |     def __init__(self, config):
29 |         super().__init__()
30 |         self.config = config
31 | 
32 |     @abstractmethod
33 |     def compute_values(self, data: DataProto) -> torch.Tensor:
34 |         """Compute values"""
35 |         pass
36 | 
37 |     @abstractmethod
38 |     def update_critic(self, data: DataProto):
39 |         """Update the critic"""
40 |         pass
41 | 


--------------------------------------------------------------------------------
/vid_wm/verl/verl/workers/reward_manager/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 PRIME team and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .naive import NaiveRewardManager
16 | from .prime import PrimeRewardManager
17 | from .dapo import DAPORewardManager
18 | 


--------------------------------------------------------------------------------
/vid_wm/verl/verl/workers/reward_model/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .base import BasePPORewardModel
16 | 


--------------------------------------------------------------------------------
/vid_wm/verl/verl/workers/reward_model/megatron/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .reward_model import MegatronRewardModel
16 | 


--------------------------------------------------------------------------------
/vid_wm/verl/verl/workers/rollout/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .base import BaseRollout
16 | from .naive import NaiveRollout
17 | from .hf_rollout import HFRollout
18 | 
19 | __all__ = ["BaseRollout", "NaiveRollout", "HFRollout"]
20 | 


--------------------------------------------------------------------------------
/vid_wm/verl/verl/workers/rollout/base.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from abc import ABC, abstractmethod
16 | from typing import Iterable, Union
17 | 
18 | from verl import DataProto
19 | 
20 | __all__ = ['BaseRollout']
21 | 
22 | 
23 | class BaseRollout(ABC):
24 | 
25 |     def __init__(self):
26 |         """
27 | 
28 |         Args:
29 |             dataloader: an Iterable of TensorDict that consistently generates prompts. Note that the dataloader
30 |             should handle when the training stops.
31 |         """
32 |         super().__init__()
33 | 
34 |     @abstractmethod
35 |     def generate_sequences(self, prompts: DataProto) -> DataProto:
36 |         """Generate sequences"""
37 |         pass
38 | 


--------------------------------------------------------------------------------
/vid_wm/verl/verl/workers/rollout/naive/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .naive_rollout import NaiveRollout
16 | 


--------------------------------------------------------------------------------
/vid_wm/verl/verl/workers/rollout/sglang_rollout/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | 
14 | from .sglang_rollout import SGLangRollout
15 | 


--------------------------------------------------------------------------------
/vid_wm/verl/verl/workers/sharding_manager/base.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | Sharding manager to implement HybridEngine
16 | """
17 | 
18 | from verl import DataProto
19 | 
20 | 
21 | class BaseShardingManager:
22 | 
23 |     def __enter__(self):
24 |         pass
25 | 
26 |     def __exit__(self, exc_type, exc_value, traceback):
27 |         pass
28 | 
29 |     def preprocess_data(self, data: DataProto) -> DataProto:
30 |         return data
31 | 
32 |     def postprocess_data(self, data: DataProto) -> DataProto:
33 |         return data
34 | 


--------------------------------------------------------------------------------