├── .gitignore ├── LICENSE ├── README.md ├── assets ├── concept.png └── showcase.png ├── lang_wm ├── README.md ├── assets │ └── lang_wm.png ├── data_process │ └── text_game │ │ ├── README.md │ │ ├── calculate_r1_response.py │ │ ├── call_deepseek_r1.py │ │ ├── generate_sft_data.py │ │ ├── process_jsonl.py │ │ ├── process_jsonl_train.py │ │ └── text_game.py ├── verl │ ├── .gitattributes │ ├── .gitignore │ ├── .readthedocs.yaml │ ├── .style.yapf │ ├── LICENSE │ ├── Notice.txt │ ├── docker │ │ ├── Dockerfile.megatron │ │ ├── Dockerfile.ngc.vllm │ │ ├── Dockerfile.rocm │ │ └── Dockerfile.vemlp.vllm.te │ ├── docs │ │ ├── Makefile │ │ ├── README.md │ │ ├── README_vllm0.7.md │ │ ├── _static │ │ │ └── logo.png │ │ ├── advance │ │ │ ├── dpo_extension.rst │ │ │ ├── fsdp_extension.rst │ │ │ ├── megatron_extension.rst │ │ │ └── placement.rst │ │ ├── amd_tutorial │ │ │ ├── amd_build_dockerfile.md │ │ │ └── amd_existing_docker.md │ │ ├── conf.py │ │ ├── data.rst │ │ ├── examples │ │ │ ├── config.rst │ │ │ ├── gsm8k_example.rst │ │ │ └── ppo_code_architecture.rst │ │ ├── experiment │ │ │ └── ppo.rst │ │ ├── faq │ │ │ └── faq.rst │ │ ├── hybrid_flow.rst │ │ ├── index.rst │ │ ├── perf │ │ │ └── perf_tuning.rst │ │ ├── preparation │ │ │ ├── prepare_data.rst │ │ │ └── reward_function.rst │ │ ├── requirements-docs.txt │ │ ├── start │ │ │ ├── install.rst │ │ │ └── quickstart.rst │ │ └── workers │ │ │ ├── fsdp_workers.rst │ │ │ ├── megatron_workers.rst │ │ │ └── ray_trainer.rst │ ├── examples │ │ ├── data_preprocess │ │ │ ├── full_hh_rlhf.py │ │ │ ├── geo3k.py │ │ │ ├── gsm8k.py │ │ │ ├── hellaswag.py │ │ │ └── math_dataset.py │ │ ├── generation │ │ │ └── run_deepseek_v2_lite_math.sh │ │ ├── grpo_trainer │ │ │ ├── run_deepseek7b_llm.sh │ │ │ ├── run_deepseek7b_llm_seq_balance.sh │ │ │ ├── run_qwen2-7b.sh │ │ │ ├── run_qwen2-7b_seq_balance.sh │ │ │ ├── run_qwen2_5_vl-7b.sh │ │ │ ├── run_text_game_rl.sh │ │ │ └── run_web_agent_rl.sh │ │ ├── ppo_trainer │ │ │ ├── run_deepseek7b_llm.sh │ │ │ ├── run_deepseek7b_llm_modelscope.sh │ │ │ ├── run_deepseek7b_llm_sp2.sh │ │ │ ├── run_deepseek_full_hh_rlhf.sh │ │ │ ├── run_deepseek_math_gsm8k_megatron.sh │ │ │ ├── run_deepseek_megatron.sh │ │ │ ├── run_gemma.sh │ │ │ ├── run_qwen2-7b_math_gsm8k_megatron.sh │ │ │ ├── run_qwen2-7b_megatron.sh │ │ │ ├── run_qwen2-7b_rm.sh │ │ │ ├── run_qwen2-7b_rm_seq_balance.sh │ │ │ ├── run_qwen2-7b_seq_balance.sh │ │ │ ├── run_qwen2.5-32b.sh │ │ │ └── verl_getting_started.ipynb │ │ ├── ray │ │ │ └── tutorial.ipynb │ │ ├── remax_trainer │ │ │ ├── run_qwen2.5-3b_seq_balance.sh │ │ │ └── run_qwen2.5-7b_seq_balance.sh │ │ ├── rloo_trainer │ │ │ └── run_qwen2-7b.sh │ │ ├── sft │ │ │ ├── gsm8k │ │ │ │ ├── run_deepseek_6b7.sh │ │ │ │ ├── run_gemma_2b.sh │ │ │ │ ├── run_gemma_7b.sh │ │ │ │ ├── run_qwen_05_peft.sh │ │ │ │ ├── run_qwen_05_sp2.sh │ │ │ │ └── run_qwen_05_sp2_liger.sh │ │ │ ├── text_game │ │ │ │ └── run_text_game_sft.sh │ │ │ └── web_agent │ │ │ │ └── run_web_agent_sft.sh │ │ ├── slurm │ │ │ └── ray_on_slurm.slurm │ │ └── split_placement │ │ │ ├── README.md │ │ │ ├── config │ │ │ └── ppo_trainer_split.yaml │ │ │ ├── main_ppo_split.py │ │ │ ├── run_deepseek7b_llm.sh │ │ │ └── split_monkey_patch.py │ ├── merge_lora.py │ ├── patches │ │ └── megatron_v4.patch │ ├── pyproject.toml │ ├── requirements.txt │ ├── scripts │ │ ├── format.sh │ │ └── model_merger.py │ ├── setup.py │ ├── tests │ │ ├── __init__.py │ │ ├── checkpoint │ │ │ ├── test_fsdp_ckpt.py │ │ │ └── test_megatron_ckpt.py │ │ ├── distributed │ │ │ ├── run_all.sh │ │ │ └── test_tensor_dict.py │ │ ├── distro │ │ │ └── requirements.py │ │ ├── e2e │ │ │ ├── __init__.py │ │ │ ├── arithmetic_sequence │ │ │ │ ├── data │ │ │ │ │ └── create_dataset.py │ │ │ │ ├── model │ │ │ │ │ ├── config.json │ │ │ │ │ ├── create_model_tokenizer.py │ │ │ │ │ ├── generation_config.json │ │ │ │ │ ├── model.safetensors │ │ │ │ │ └── tokenizer_config.json │ │ │ │ └── rl │ │ │ │ │ ├── README.md │ │ │ │ │ └── main_trainer.py │ │ │ ├── check_results.py │ │ │ ├── envs │ │ │ │ ├── __init__.py │ │ │ │ └── digit_completion │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── task.py │ │ │ │ │ └── tokenizer.py │ │ │ ├── run_deepseek_megatron.sh │ │ │ ├── run_deepseek_megatron_parallelism.sh │ │ │ ├── run_qwen2vl_geo3k_function_rm.sh │ │ │ ├── run_qwen_gsm8k_function_rm.sh │ │ │ ├── run_qwen_gsm8k_function_rm_grpo.sh │ │ │ ├── run_qwen_gsm8k_function_rm_no_rmpad.sh │ │ │ ├── run_qwen_gsm8k_function_rm_remax.sh │ │ │ ├── run_qwen_gsm8k_model_rm.sh │ │ │ ├── run_qwen_gsm8k_model_rm_liger_kernel.sh │ │ │ ├── run_qwen_gsm8k_model_rm_no_rmpad.sh │ │ │ ├── run_qwen_gsm8k_model_rm_seq_balance.sh │ │ │ ├── run_qwen_gsm8k_model_rm_ulysses.sh │ │ │ ├── run_qwen_megatron.sh │ │ │ ├── run_qwen_megatron_parallelism.sh │ │ │ ├── run_ray_trainer.sh │ │ │ ├── run_ray_trainer_fire_sampling.sh │ │ │ └── run_ray_trainer_rmpad.sh │ │ ├── generation │ │ │ └── run_gen_qwen05.sh │ │ ├── gpu_utility │ │ │ ├── test_memory_buffers.py │ │ │ ├── test_ops.py │ │ │ └── test_torch_functional.py │ │ ├── kill_github_tests.sh │ │ ├── model │ │ │ ├── test_transformer.py │ │ │ └── test_transformers_ulysses.py │ │ ├── ray │ │ │ ├── check_worker_alive │ │ │ │ └── main.py │ │ │ ├── detached_worker │ │ │ │ ├── README.md │ │ │ │ ├── client.py │ │ │ │ ├── run.sh │ │ │ │ └── server.py │ │ │ ├── test_check_worker_alive.py │ │ │ ├── test_colocated_workers.py │ │ │ ├── test_data_transfer.py │ │ │ ├── test_driverfunc_to_worker.py │ │ │ ├── test_high_level_scheduling_api.py │ │ │ ├── test_ray_local_envs.py │ │ │ ├── test_rvdz.py │ │ │ ├── test_worker_group_basics.py │ │ │ └── test_worker_group_torch.py │ │ ├── rollout │ │ │ ├── run_fsdp_vllm.py │ │ │ ├── test_vllm_hf_loader.py │ │ │ └── test_vllm_spmd.py │ │ ├── sandbox │ │ │ └── test_sandbox.py │ │ ├── sanity │ │ │ ├── check_license.py │ │ │ └── test_import.py │ │ ├── sft │ │ │ ├── run_sft.sh │ │ │ ├── run_sft_qwen05_peft.sh │ │ │ ├── run_sft_qwen05_sp2_liger.sh │ │ │ ├── run_sft_sp_loss_match.sh │ │ │ └── test_sp_loss_match.py │ │ ├── utility │ │ │ └── test_tensor_dict_utilities.py │ │ └── verl │ │ │ └── utils │ │ │ └── dataset │ │ │ ├── test_rl_dataset.py │ │ │ ├── test_rm_dataset.py │ │ │ └── test_sft_dataset.py │ └── verl │ │ ├── __init__.py │ │ ├── models │ │ ├── README.md │ │ ├── __init__.py │ │ ├── llama │ │ │ ├── __init__.py │ │ │ └── megatron │ │ │ │ ├── __init__.py │ │ │ │ ├── checkpoint_utils │ │ │ │ ├── __init__.py │ │ │ │ ├── llama_loader.py │ │ │ │ └── llama_saver.py │ │ │ │ ├── layers │ │ │ │ ├── __init__.py │ │ │ │ ├── parallel_attention.py │ │ │ │ ├── parallel_decoder.py │ │ │ │ ├── parallel_linear.py │ │ │ │ ├── parallel_mlp.py │ │ │ │ └── parallel_rmsnorm.py │ │ │ │ └── modeling_llama_megatron.py │ │ ├── qwen2 │ │ │ ├── __init__.py │ │ │ └── megatron │ │ │ │ ├── __init__.py │ │ │ │ ├── checkpoint_utils │ │ │ │ ├── __init__.py │ │ │ │ ├── qwen2_loader.py │ │ │ │ └── qwen2_saver.py │ │ │ │ ├── layers │ │ │ │ ├── __init__.py │ │ │ │ ├── parallel_attention.py │ │ │ │ ├── parallel_decoder.py │ │ │ │ ├── parallel_linear.py │ │ │ │ ├── parallel_mlp.py │ │ │ │ └── parallel_rmsnorm.py │ │ │ │ └── modeling_qwen2_megatron.py │ │ ├── registry.py │ │ ├── transformers │ │ │ ├── __init__.py │ │ │ ├── llama.py │ │ │ ├── monkey_patch.py │ │ │ ├── qwen2.py │ │ │ └── qwen2_vl.py │ │ └── weight_loader_registry.py │ │ ├── protocol.py │ │ ├── single_controller │ │ ├── __init__.py │ │ ├── base │ │ │ ├── __init__.py │ │ │ ├── decorator.py │ │ │ ├── megatron │ │ │ │ ├── __init__.py │ │ │ │ ├── worker.py │ │ │ │ └── worker_group.py │ │ │ ├── register_center │ │ │ │ ├── __init__.py │ │ │ │ └── ray.py │ │ │ ├── worker.py │ │ │ └── worker_group.py │ │ └── ray │ │ │ ├── __init__.py │ │ │ ├── base.py │ │ │ └── megatron.py │ │ ├── third_party │ │ ├── __init__.py │ │ └── vllm │ │ │ ├── __init__.py │ │ │ ├── vllm_spmd │ │ │ ├── __init__.py │ │ │ └── dtensor_weight_loaders.py │ │ │ ├── vllm_v_0_3_1 │ │ │ ├── __init__.py │ │ │ ├── arg_utils.py │ │ │ ├── config.py │ │ │ ├── llm.py │ │ │ ├── llm_engine_sp.py │ │ │ ├── model_loader.py │ │ │ ├── model_runner.py │ │ │ ├── parallel_state.py │ │ │ ├── tokenizer.py │ │ │ ├── weight_loaders.py │ │ │ └── worker.py │ │ │ ├── vllm_v_0_4_2 │ │ │ ├── __init__.py │ │ │ ├── arg_utils.py │ │ │ ├── config.py │ │ │ ├── dtensor_weight_loaders.py │ │ │ ├── hf_weight_loader.py │ │ │ ├── llm.py │ │ │ ├── llm_engine_sp.py │ │ │ ├── megatron_weight_loaders.py │ │ │ ├── model_loader.py │ │ │ ├── model_runner.py │ │ │ ├── parallel_state.py │ │ │ ├── spmd_gpu_executor.py │ │ │ ├── tokenizer.py │ │ │ └── worker.py │ │ │ ├── vllm_v_0_5_4 │ │ │ ├── __init__.py │ │ │ ├── arg_utils.py │ │ │ ├── config.py │ │ │ ├── dtensor_weight_loaders.py │ │ │ ├── hf_weight_loader.py │ │ │ ├── llm.py │ │ │ ├── llm_engine_sp.py │ │ │ ├── megatron_weight_loaders.py │ │ │ ├── model_loader.py │ │ │ ├── model_runner.py │ │ │ ├── parallel_state.py │ │ │ ├── spmd_gpu_executor.py │ │ │ ├── tokenizer.py │ │ │ └── worker.py │ │ │ └── vllm_v_0_6_3 │ │ │ ├── __init__.py │ │ │ ├── arg_utils.py │ │ │ ├── config.py │ │ │ ├── dtensor_weight_loaders.py │ │ │ ├── hf_weight_loader.py │ │ │ ├── llm.py │ │ │ ├── llm_engine_sp.py │ │ │ ├── megatron_weight_loaders.py │ │ │ ├── model_loader.py │ │ │ ├── model_runner.py │ │ │ ├── parallel_state.py │ │ │ ├── spmd_gpu_executor.py │ │ │ ├── tokenizer.py │ │ │ └── worker.py │ │ ├── trainer │ │ ├── __init__.py │ │ ├── config │ │ │ ├── evaluation.yaml │ │ │ ├── generation.yaml │ │ │ ├── ppo_megatron_trainer.yaml │ │ │ ├── ppo_trainer.yaml │ │ │ └── sft_trainer.yaml │ │ ├── fsdp_sft_trainer.py │ │ ├── main_eval.py │ │ ├── main_generation.py │ │ ├── main_ppo.py │ │ ├── ppo │ │ │ ├── __init__.py │ │ │ ├── core_algos.py │ │ │ └── ray_trainer.py │ │ └── runtime_env.yaml │ │ ├── utils │ │ ├── __init__.py │ │ ├── checkpoint │ │ │ ├── __init__.py │ │ │ ├── checkpoint_manager.py │ │ │ └── fsdp_checkpoint_manager.py │ │ ├── config.py │ │ ├── dataset │ │ │ ├── README.md │ │ │ ├── __init__.py │ │ │ ├── rl_dataset.py │ │ │ ├── rl_dataset_text_game.py │ │ │ ├── rm_dataset.py │ │ │ └── sft_dataset.py │ │ ├── debug │ │ │ ├── __init__.py │ │ │ ├── performance.py │ │ │ └── trajectory_tracker.py │ │ ├── distributed.py │ │ ├── flops_counter.py │ │ ├── fs.py │ │ ├── fsdp_utils.py │ │ ├── hdfs_io.py │ │ ├── import_utils.py │ │ ├── logger │ │ │ ├── __init__.py │ │ │ └── aggregate_logger.py │ │ ├── logging_utils.py │ │ ├── megatron │ │ │ ├── __init__.py │ │ │ ├── memory.py │ │ │ ├── optimizer.py │ │ │ ├── pipeline_parallel.py │ │ │ ├── sequence_parallel.py │ │ │ └── tensor_parallel.py │ │ ├── megatron_utils.py │ │ ├── memory_buffer.py │ │ ├── model.py │ │ ├── py_functional.py │ │ ├── ray_utils.py │ │ ├── rendezvous │ │ │ ├── __init__.py │ │ │ └── ray_backend.py │ │ ├── reward_score │ │ │ ├── __init__.py │ │ │ ├── geo3k.py │ │ │ ├── gsm8k.py │ │ │ ├── math.py │ │ │ ├── prime_code │ │ │ │ ├── __init__.py │ │ │ │ ├── testing_util.py │ │ │ │ └── utils.py │ │ │ ├── prime_math │ │ │ │ ├── __init__.py │ │ │ │ ├── grader.py │ │ │ │ └── math_normalize.py │ │ │ ├── text_game.py │ │ │ └── web_agent.py │ │ ├── seqlen_balancing.py │ │ ├── tokenizer.py │ │ ├── torch_dtypes.py │ │ ├── torch_functional.py │ │ ├── tracking.py │ │ └── ulysses.py │ │ ├── version │ │ └── version │ │ └── workers │ │ ├── __init__.py │ │ ├── actor │ │ ├── __init__.py │ │ ├── base.py │ │ ├── dp_actor.py │ │ └── megatron_actor.py │ │ ├── critic │ │ ├── __init__.py │ │ ├── base.py │ │ ├── dp_critic.py │ │ └── megatron_critic.py │ │ ├── fsdp_workers.py │ │ ├── megatron_workers.py │ │ ├── reward_manager │ │ ├── __init__.py │ │ ├── naive.py │ │ └── prime.py │ │ ├── reward_model │ │ ├── __init__.py │ │ ├── base.py │ │ └── megatron │ │ │ ├── __init__.py │ │ │ └── reward_model.py │ │ ├── rollout │ │ ├── __init__.py │ │ ├── base.py │ │ ├── hf_rollout.py │ │ ├── naive │ │ │ ├── __init__.py │ │ │ └── naive_rollout.py │ │ ├── tokenizer.py │ │ └── vllm_rollout │ │ │ ├── __init__.py │ │ │ ├── fire_vllm_rollout.py │ │ │ ├── vllm_rollout.py │ │ │ └── vllm_rollout_spmd.py │ │ └── sharding_manager │ │ ├── __init__.py │ │ ├── base.py │ │ ├── fsdp_ulysses.py │ │ ├── fsdp_vllm.py │ │ └── megatron_vllm.py └── webagent │ ├── README.md │ ├── agent │ ├── __init__.py │ ├── agent.py │ ├── prompts │ │ ├── __init__.py │ │ ├── jsons │ │ │ ├── p_cot_id_actree_2s.json │ │ │ ├── p_cot_id_actree_2s_no_na.json │ │ │ ├── p_cot_id_actree_3s.json │ │ │ ├── p_multimodal_cot_id_actree_3s.json │ │ │ ├── p_som_cot_id_actree_3s.json │ │ │ ├── refine_tao.json │ │ │ ├── rlvr_translate_prompt.json │ │ │ ├── state_prediction │ │ │ │ ├── rlvr_world_model_prompt.json │ │ │ │ ├── text_only_acctree_format.json │ │ │ │ └── text_only_description_with_tao_format.json │ │ │ └── value_function │ │ │ │ ├── rlvr_value_prompt.json │ │ │ │ ├── text_only_value_function.json │ │ │ │ └── text_only_value_function_likert.json │ │ ├── prompt_constructor.py │ │ ├── raw │ │ │ ├── p_cot_id_actree_2s.py │ │ │ ├── p_cot_id_actree_2s_no_na.py │ │ │ ├── p_cot_id_actree_3s.py │ │ │ ├── p_multimodal_cot_id_actree_3s.py │ │ │ └── p_som_cot_id_actree_3s.py │ │ └── to_json.py │ ├── value_function.py │ └── world_model_agent.py │ ├── browser_env │ ├── __init__.py │ ├── actions.py │ ├── async_envs.py │ ├── auto_login.py │ ├── constants.py │ ├── env_config.py │ ├── envs.py │ ├── helper_functions.py │ ├── javascript │ │ ├── frame_mark_elements.js │ │ └── frame_unmark_elements.js │ ├── processors.py │ ├── py.typed │ ├── trajectory.py │ └── utils.py │ ├── config_files │ └── wa │ │ └── test_webarena.raw.json │ ├── evaluation_harness │ ├── __init__.py │ ├── evaluators.py │ ├── helper_functions.py │ └── image_utils.py │ ├── llms │ ├── __init__.py │ ├── lm_config.py │ ├── providers │ │ ├── gemini_utils.py │ │ ├── hf_utils.py │ │ └── openai_utils.py │ ├── tokenizers.py │ └── utils.py │ ├── prepare.sh │ ├── pyproject.toml │ ├── requirements.txt │ ├── run_for_trajectory.py │ ├── run_w_world_model.py │ ├── scripts │ ├── check_error_runs.py │ ├── collect_obs.py │ ├── generate_test_data.py │ └── parallel_run_webarena_rlvr.sh │ ├── setup.cfg │ └── setup.py └── vid_wm ├── README.md ├── assets └── vid_wm.png ├── ivideogpt ├── .gitignore ├── configs │ ├── ctx_vae256 │ │ └── config.json │ ├── ctx_vae64 │ │ └── config.json │ └── vgpt │ │ ├── ctx_llama_small.json │ │ ├── frac_action_ranges.pth │ │ ├── llama.json │ │ └── llama_small.json ├── eval_runenv.py ├── eval_vgpt.py ├── eval_vgpt_multiturn.py ├── ivideogpt │ ├── ctx_tokenizer │ │ ├── __init__.py │ │ ├── compressive_vq_model_fsq.py │ │ ├── conditional_vae.py │ │ └── vae.py │ ├── data │ │ ├── __init__.py │ │ ├── dataset_mixes.py │ │ ├── simple_dataloader.py │ │ └── sthsth_dataloader.py │ ├── processor.py │ ├── tokenizer │ │ ├── __init__.py │ │ ├── vae.py │ │ └── vq_model.py │ └── utils │ │ ├── discriminator.py │ │ ├── finite_scalar_quantize.py │ │ ├── lpips.py │ │ └── video_metric.py ├── rt1_inference.py ├── scripts │ ├── eval_multi_step_prediction.sh │ ├── eval_policy.sh │ ├── eval_single_step_prediction.sh │ ├── train_compressive_tokenizer.sh │ ├── train_multi_step_prediction.sh │ ├── train_perframe_tokenizer.sh │ └── train_single_step_prediction.sh ├── train_ctx_tokenizer.py ├── train_tokenizer.py ├── train_vgpt.py └── transform_vgpt_checkpoint.py ├── oxe_data_converter.py ├── requirements.txt └── verl ├── .gitignore ├── .readthedocs.yaml ├── .style.yapf ├── LICENSE ├── Notice.txt ├── README.md ├── docker ├── Dockerfile.megatron ├── Dockerfile.ngc.vllm ├── Dockerfile.ngc.vllm0.8 ├── Dockerfile.ngc.vllm0.8.sagemaker ├── Dockerfile.rocm ├── Dockerfile.sglang └── Dockerfile.vemlp.vllm.te ├── docs ├── Makefile ├── README.md ├── README_vllm0.7.md ├── README_vllm0.8.md ├── _static │ └── logo.png ├── advance │ ├── checkpoint.rst │ ├── dpo_extension.rst │ ├── fsdp_extension.rst │ ├── megatron_extension.rst │ └── placement.rst ├── amd_tutorial │ └── amd_build_dockerfile_page.rst ├── conf.py ├── data.rst ├── examples │ ├── config.rst │ ├── gsm8k_example.rst │ └── ppo_code_architecture.rst ├── experiment │ └── ppo.rst ├── faq │ └── faq.rst ├── hybrid_flow.rst ├── index.rst ├── perf │ └── perf_tuning.rst ├── preparation │ ├── prepare_data.rst │ └── reward_function.rst ├── requirements-docs.txt ├── start │ ├── install.rst │ ├── multinode.rst │ └── quickstart.rst └── workers │ ├── fsdp_workers.rst │ ├── megatron_workers.rst │ └── ray_trainer.rst ├── examples ├── checkpoint │ ├── run_deepseek_megatron_ckpt.sh │ └── run_qwen_megatron_ckpt.sh ├── data_preprocess │ ├── full_hh_rlhf.py │ ├── geo3k.py │ ├── gsm8k.py │ ├── hellaswag.py │ ├── math_dataset.py │ └── multiturn.py ├── generation │ ├── run_deepseek7b_mutli_node.sh │ └── run_deepseek_v2_lite_math.sh ├── grpo_trainer │ ├── run_ctx_msp_vgpt.sh │ ├── run_deepseek7b_llm.sh │ ├── run_deepseek7b_llm_math.sh │ ├── run_deepseek7b_llm_math_megatron.sh │ ├── run_deepseek7b_llm_megatron.sh │ ├── run_deepseek7b_llm_seq_balance.sh │ ├── run_qwen2-7b.sh │ ├── run_qwen2-7b_math.sh │ ├── run_qwen2-7b_math_megatron.sh │ ├── run_qwen2-7b_megatron.sh │ ├── run_qwen2-7b_seq_balance.sh │ ├── run_qwen2_5_vl-7b.sh │ └── run_vgpt.sh ├── ppo_trainer │ ├── run_deepseek7b_llm.sh │ ├── run_deepseek7b_llm_modelscope.sh │ ├── run_deepseek7b_llm_sp2.sh │ ├── run_deepseek_full_hh_rlhf.sh │ ├── run_deepseek_math_gsm8k_megatron.sh │ ├── run_deepseek_megatron.sh │ ├── run_gemma.sh │ ├── run_qwen2-7b_math_gsm8k_megatron.sh │ ├── run_qwen2-7b_megatron.sh │ ├── run_qwen2-7b_rm.sh │ ├── run_qwen2-7b_rm_seq_balance.sh │ ├── run_qwen2-7b_seq_balance.sh │ ├── run_qwen2-7b_sglang_seq_balance.sh │ ├── run_qwen2.5-32b.sh │ └── verl_getting_started.ipynb ├── ray │ └── tutorial.ipynb ├── reinforce_plus_plus_trainer │ ├── run_qwen2-7b_math_rf.sh │ └── run_qwen2-7b_math_rf_baseline.sh ├── remax_trainer │ ├── run_qwen2.5-3b_seq_balance.sh │ └── run_qwen2.5-7b_seq_balance.sh ├── rloo_trainer │ └── run_qwen2-7b.sh ├── sft │ ├── gsm8k │ │ ├── run_deepseek_6b7.sh │ │ ├── run_gemma_2b.sh │ │ ├── run_gemma_7b.sh │ │ ├── run_qwen_05_peft.sh │ │ ├── run_qwen_05_sp2.sh │ │ └── run_qwen_05_sp2_liger.sh │ ├── multiturn │ │ └── run_qwen_05_sp2.sh │ └── videogpt │ │ └── run.sh ├── slurm │ └── ray_on_slurm.slurm └── split_placement │ ├── README.md │ ├── config │ └── ppo_trainer_split.yaml │ ├── main_ppo_split.py │ ├── run_deepseek7b_llm.sh │ └── split_monkey_patch.py ├── ivideogpt ├── configs │ ├── frac_action_ranges.pth │ ├── llama.json │ └── llama_small.json ├── ctx_tokenizer │ ├── compressive_vq_model.py │ ├── conditional_vae.py │ └── vae.py ├── data │ ├── __init__.py │ ├── dataset_mixes.py │ ├── simple_dataloader.py │ └── sthsth_dataloader.py ├── lpips.py ├── processor.py ├── scripts │ └── summarize_action_ranges.py └── tokenizer │ ├── __init__.py │ ├── finite_scalar_quantize.py │ ├── vae.py │ └── vq_model.py ├── merge_sharded_ckpts.py ├── patches └── megatron_v4.patch ├── pyproject.toml ├── recipe ├── dapo │ ├── README.md │ ├── prepare_dapo_data.sh │ ├── run_dapo_early_qwen2.5_32b.sh │ ├── run_dapo_qwen2.5_32b.sh │ ├── src │ │ ├── config │ │ │ └── dapo_trainer.yaml │ │ ├── dapo_ray_trainer.py │ │ └── main_dapo.py │ └── test_dapo_7b.sh ├── prime │ ├── __init__.py │ ├── config │ │ └── prime_trainer.yaml │ ├── main_prime.py │ ├── prime_core_algos.py │ ├── prime_dp_rm.py │ ├── prime_fsdp_workers.py │ ├── prime_ray_trainer.py │ └── run_prime_qwen.sh └── r1 │ ├── README.md │ ├── __init__.py │ ├── config │ └── evaluation.yaml │ ├── data_process.py │ ├── main_eval.py │ ├── reward_score.py │ ├── run_r1_distill_qwen.sh │ └── tasks │ ├── __init__.py │ ├── gpqa.py │ ├── livecodebench.py │ └── math.py ├── requirements.txt ├── requirements_sglang.txt ├── scripts ├── diagnose.py ├── format.sh └── model_merger.py ├── setup.py ├── tests ├── __init__.py ├── checkpoint │ ├── run_deepseek_megatron_ckpt.sh │ ├── run_qwen_megatron_ckpt.sh │ └── test_fsdp_ckpt.py ├── distributed │ ├── run_all.sh │ └── test_tensor_dict.py ├── e2e │ ├── __init__.py │ ├── arithmetic_sequence │ │ ├── data │ │ │ └── create_dataset.py │ │ ├── model │ │ │ ├── config.json │ │ │ ├── create_model_tokenizer.py │ │ │ ├── generation_config.json │ │ │ ├── model.safetensors │ │ │ └── tokenizer_config.json │ │ └── rl │ │ │ ├── README.md │ │ │ └── main_trainer.py │ ├── check_custom_rwd_fn.py │ ├── check_results.py │ ├── envs │ │ ├── __init__.py │ │ └── digit_completion │ │ │ ├── __init__.py │ │ │ ├── task.py │ │ │ └── tokenizer.py │ ├── run_deepseek_grpo.sh │ ├── run_deepseek_grpo_megatron.sh │ ├── run_deepseek_megatron.sh │ ├── run_deepseek_megatron_parallelism.sh │ ├── run_qwen2vl_geo3k_function_rm.sh │ ├── run_qwen_grpo.sh │ ├── run_qwen_grpo_megatron.sh │ ├── run_qwen_gsm8k_custom_function_rm.sh │ ├── run_qwen_gsm8k_dapo.sh │ ├── run_qwen_gsm8k_function_rm.sh │ ├── run_qwen_gsm8k_function_rm_both_kl.sh │ ├── run_qwen_gsm8k_function_rm_grpo.sh │ ├── run_qwen_gsm8k_function_rm_no_rmpad.sh │ ├── run_qwen_gsm8k_function_rm_remax.sh │ ├── run_qwen_gsm8k_model_rm.sh │ ├── run_qwen_gsm8k_model_rm_liger_kernel.sh │ ├── run_qwen_gsm8k_model_rm_no_rmpad.sh │ ├── run_qwen_gsm8k_model_rm_seq_balance.sh │ ├── run_qwen_gsm8k_model_rm_ulysses.sh │ ├── run_qwen_gsm8k_prime.sh │ ├── run_qwen_megatron.sh │ ├── run_qwen_megatron_parallelism.sh │ ├── run_r1_distill_qwen_aime24_eval.sh │ ├── run_ray_trainer.sh │ ├── run_ray_trainer_fire_sampling.sh │ └── run_ray_trainer_rmpad.sh ├── generation │ └── run_gen_qwen05.sh ├── gpu_utility │ ├── test_memory_buffers.py │ ├── test_ops.py │ └── test_torch_functional.py ├── kill_github_tests.sh ├── model │ ├── test_transformer.py │ └── test_transformers_ulysses.py ├── ray │ ├── check_worker_alive │ │ └── main.py │ ├── detached_worker │ │ ├── README.md │ │ ├── client.py │ │ ├── run.sh │ │ └── server.py │ ├── test_check_worker_alive.py │ ├── test_colocated_workers.py │ ├── test_data_transfer.py │ ├── test_driverfunc_to_worker.py │ ├── test_high_level_scheduling_api.py │ ├── test_ray_local_envs.py │ ├── test_rvdz.py │ ├── test_worker_group_basics.py │ └── test_worker_group_torch.py ├── rollout │ ├── run_fsdp_vllm.py │ ├── test_sglang_spmd.py │ ├── test_vllm_hf_loader.py │ └── test_vllm_spmd.py ├── sandbox │ └── test_sandbox.py ├── sanity │ ├── check_license.py │ └── test_import.py ├── sft │ ├── run_sft.sh │ ├── run_sft_multiturn.sh │ ├── run_sft_qwen05_peft.sh │ ├── run_sft_qwen05_sp2_liger.sh │ ├── run_sft_sp_loss_match.sh │ └── test_sp_loss_match.py ├── utility │ └── test_tensor_dict_utilities.py └── verl │ └── utils │ └── dataset │ ├── test_multiturn_sft_dataset.py │ ├── test_rl_dataset.py │ ├── test_rm_dataset.py │ └── test_sft_dataset.py └── verl ├── __init__.py ├── models ├── README.md ├── __init__.py ├── llama │ ├── __init__.py │ └── megatron │ │ ├── __init__.py │ │ ├── checkpoint_utils │ │ ├── __init__.py │ │ ├── llama_loader.py │ │ ├── llama_loader_depracated.py │ │ └── llama_saver.py │ │ ├── layers │ │ ├── __init__.py │ │ ├── parallel_attention.py │ │ ├── parallel_decoder.py │ │ ├── parallel_linear.py │ │ ├── parallel_mlp.py │ │ └── parallel_rmsnorm.py │ │ └── modeling_llama_megatron.py ├── mcore │ ├── __init__.py │ ├── gpt_model.py │ ├── loader.py │ └── saver.py ├── qwen2 │ ├── __init__.py │ └── megatron │ │ ├── __init__.py │ │ ├── checkpoint_utils │ │ ├── __init__.py │ │ ├── qwen2_loader.py │ │ ├── qwen2_loader_depracated.py │ │ └── qwen2_saver.py │ │ ├── layers │ │ ├── __init__.py │ │ ├── parallel_attention.py │ │ ├── parallel_decoder.py │ │ ├── parallel_linear.py │ │ ├── parallel_mlp.py │ │ └── parallel_rmsnorm.py │ │ └── modeling_qwen2_megatron.py ├── registry.py ├── transformers │ ├── __init__.py │ ├── llama.py │ ├── monkey_patch.py │ ├── qwen2.py │ └── qwen2_vl.py └── weight_loader_registry.py ├── protocol.py ├── single_controller ├── __init__.py ├── base │ ├── __init__.py │ ├── decorator.py │ ├── megatron │ │ ├── __init__.py │ │ ├── worker.py │ │ └── worker_group.py │ ├── register_center │ │ ├── __init__.py │ │ └── ray.py │ ├── worker.py │ └── worker_group.py └── ray │ ├── __init__.py │ ├── base.py │ └── megatron.py ├── third_party ├── __init__.py ├── sglang │ ├── __init__.py │ └── parallel_state.py └── vllm │ ├── __init__.py │ ├── vllm_v_0_3_1 │ ├── __init__.py │ ├── arg_utils.py │ ├── config.py │ ├── llm.py │ ├── llm_engine_sp.py │ ├── model_loader.py │ ├── model_runner.py │ ├── parallel_state.py │ ├── tokenizer.py │ ├── weight_loaders.py │ └── worker.py │ ├── vllm_v_0_4_2 │ ├── __init__.py │ ├── arg_utils.py │ ├── config.py │ ├── dtensor_weight_loaders.py │ ├── hf_weight_loader.py │ ├── llm.py │ ├── llm_engine_sp.py │ ├── megatron_weight_loaders.py │ ├── model_loader.py │ ├── model_runner.py │ ├── parallel_state.py │ ├── spmd_gpu_executor.py │ ├── tokenizer.py │ └── worker.py │ ├── vllm_v_0_5_4 │ ├── __init__.py │ ├── arg_utils.py │ ├── config.py │ ├── dtensor_weight_loaders.py │ ├── hf_weight_loader.py │ ├── llm.py │ ├── llm_engine_sp.py │ ├── megatron_weight_loaders.py │ ├── model_loader.py │ ├── model_runner.py │ ├── parallel_state.py │ ├── spmd_gpu_executor.py │ ├── tokenizer.py │ └── worker.py │ └── vllm_v_0_6_3 │ ├── __init__.py │ ├── arg_utils.py │ ├── config.py │ ├── dtensor_weight_loaders.py │ ├── hf_weight_loader.py │ ├── llm.py │ ├── llm_engine_sp.py │ ├── megatron_weight_loaders.py │ ├── model_loader.py │ ├── model_runner.py │ ├── parallel_state.py │ ├── spmd_gpu_executor.py │ ├── tokenizer.py │ └── worker.py ├── trainer ├── __init__.py ├── config │ ├── evaluation.yaml │ ├── generation.yaml │ ├── ppo_megatron_trainer.yaml │ ├── ppo_trainer.yaml │ ├── sft_trainer.yaml │ └── vgpt_ppo_trainer.yaml ├── fsdp_sft_trainer.py ├── main_eval.py ├── main_generation.py ├── main_ppo.py ├── main_vgpt_ppo.py ├── ppo │ ├── __init__.py │ ├── core_algos.py │ ├── metric_utils.py │ └── ray_trainer.py └── runtime_env.yaml ├── utils ├── __init__.py ├── checkpoint │ ├── __init__.py │ ├── checkpoint_manager.py │ ├── fsdp_checkpoint_manager.py │ └── megatron_checkpoint_manager.py ├── config.py ├── dataset │ ├── README.md │ ├── __init__.py │ ├── multiturn_sft_dataset.py │ ├── rl_dataset.py │ ├── rm_dataset.py │ └── sft_dataset.py ├── debug │ ├── __init__.py │ ├── performance.py │ └── trajectory_tracker.py ├── distributed.py ├── flops_counter.py ├── fs.py ├── fsdp_utils.py ├── hdfs_io.py ├── import_utils.py ├── logger │ ├── __init__.py │ └── aggregate_logger.py ├── logging_utils.py ├── megatron │ ├── __init__.py │ ├── memory.py │ ├── optimizer.py │ ├── pipeline_parallel.py │ ├── sequence_parallel.py │ └── tensor_parallel.py ├── megatron_utils.py ├── memory_buffer.py ├── model.py ├── py_functional.py ├── ray_utils.py ├── rendezvous │ ├── __init__.py │ └── ray_backend.py ├── reward_score │ ├── __init__.py │ ├── geo3k.py │ ├── gsm8k.py │ ├── math.py │ ├── math_dapo.py │ ├── math_verify.py │ ├── prime_code │ │ ├── __init__.py │ │ ├── testing_util.py │ │ └── utils.py │ └── prime_math │ │ ├── __init__.py │ │ ├── grader.py │ │ └── math_normalize.py ├── seqlen_balancing.py ├── tokenizer.py ├── torch_dtypes.py ├── torch_functional.py ├── tracking.py └── ulysses.py ├── version └── version └── workers ├── __init__.py ├── actor ├── __init__.py ├── base.py ├── dp_actor.py └── megatron_actor.py ├── critic ├── __init__.py ├── base.py ├── dp_critic.py └── megatron_critic.py ├── fsdp_workers.py ├── megatron_workers.py ├── reward_manager ├── __init__.py ├── dapo.py ├── naive.py └── prime.py ├── reward_model ├── __init__.py ├── base.py └── megatron │ ├── __init__.py │ └── reward_model.py ├── rollout ├── __init__.py ├── base.py ├── hf_rollout.py ├── naive │ ├── __init__.py │ └── naive_rollout.py ├── sglang_rollout │ ├── __init__.py │ └── sglang_rollout.py ├── tokenizer.py └── vllm_rollout │ ├── __init__.py │ ├── fire_vllm_rollout.py │ ├── vllm_rollout.py │ └── vllm_rollout_spmd.py └── sharding_manager ├── __init__.py ├── base.py ├── fsdp_sglang.py ├── fsdp_ulysses.py ├── fsdp_vllm.py └── megatron_vllm.py /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | /.idea 3 | __pycache__ 4 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 THUML @ Tsinghua University 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /assets/concept.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thuml/RLVR-World/66bce39e195ea3521025d94bdd82eafe8823dc45/assets/concept.png -------------------------------------------------------------------------------- /assets/showcase.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thuml/RLVR-World/66bce39e195ea3521025d94bdd82eafe8823dc45/assets/showcase.png -------------------------------------------------------------------------------- /lang_wm/assets/lang_wm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thuml/RLVR-World/66bce39e195ea3521025d94bdd82eafe8823dc45/lang_wm/assets/lang_wm.png -------------------------------------------------------------------------------- /lang_wm/verl/.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /lang_wm/verl/.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | # Read the Docs configuration file 2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 3 | 4 | version: 2 5 | 6 | build: 7 | os: ubuntu-22.04 8 | tools: 9 | python: "3.11" 10 | rust: "1.70" 11 | 12 | sphinx: 13 | configuration: docs/conf.py 14 | 15 | python: 16 | install: 17 | - requirements: docs/requirements-docs.txt 18 | - method: pip 19 | path: . 20 | -------------------------------------------------------------------------------- /lang_wm/verl/.style.yapf: -------------------------------------------------------------------------------- 1 | [style] 2 | based_on_style = google 3 | column_limit = 120 4 | indent_width = 4 5 | split_arguments_when_comma_terminated: true -------------------------------------------------------------------------------- /lang_wm/verl/Notice.txt: -------------------------------------------------------------------------------- 1 | Copyright 2023-2024 Bytedance Ltd. and/or its affiliates -------------------------------------------------------------------------------- /lang_wm/verl/docker/Dockerfile.megatron: -------------------------------------------------------------------------------- 1 | FROM verlai/verl:vemlp-th2.4.0-cu124-vllm0.6.3-ray2.10-te1.7-v0.0.3 2 | 3 | RUN pip install git+https://github.com/NVIDIA/TransformerEngine.git@stable 4 | 5 | RUN cd /opt/nvidia && git clone --single-branch --branch core_r0.11.0 https://github.com/NVIDIA/Megatron-LM.git Megatron-LM 6 | 7 | # only config pip index with https://pypi.tuna.tsinghua.edu.cn/simple if needed 8 | # unset for now 9 | RUN cd /opt/nvidia/Megatron-LM && pip3 install --no-deps -e . -------------------------------------------------------------------------------- /lang_wm/verl/docker/Dockerfile.rocm: -------------------------------------------------------------------------------- 1 | # Build the docker in the repo dir: 2 | # docker build -f docker/Dockerfile.rocm -t verl-rocm:03.04.2015 . 3 | # docker images # you can find your built docker 4 | 5 | 6 | FROM rocm/vllm:rocm6.2_mi300_ubuntu20.04_py3.9_vllm_0.6.4 7 | 8 | # Set working directory 9 | # WORKDIR $PWD/app 10 | 11 | # Set environment variables 12 | ENV PYTORCH_ROCM_ARCH="gfx90a;gfx942" 13 | 14 | # Install vllm 15 | RUN pip uninstall -y vllm && \ 16 | rm -rf vllm && \ 17 | git clone -b v0.6.3 https://github.com/vllm-project/vllm.git && \ 18 | cd vllm && \ 19 | MAX_JOBS=$(nproc) python3 setup.py install && \ 20 | cd .. && \ 21 | rm -rf vllm 22 | 23 | # Copy the entire project directory 24 | COPY . . 25 | 26 | # Install dependencies 27 | RUN pip install "tensordict<0.6" --no-deps && \ 28 | pip install accelerate \ 29 | codetiming \ 30 | datasets \ 31 | dill \ 32 | hydra-core \ 33 | liger-kernel \ 34 | numpy \ 35 | pandas \ 36 | peft \ 37 | "pyarrow>=15.0.0" \ 38 | pylatexenc \ 39 | "ray[data,train,tune,serve]" \ 40 | torchdata \ 41 | transformers \ 42 | wandb \ 43 | orjson \ 44 | pybind11 && \ 45 | pip install -e . --no-deps -------------------------------------------------------------------------------- /lang_wm/verl/docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SPHINXPROJ = verl 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /lang_wm/verl/docs/README.md: -------------------------------------------------------------------------------- 1 | # verl documents 2 | 3 | ## Build the docs 4 | 5 | ```bash 6 | # Install dependencies. 7 | pip install -r requirements-docs.txt 8 | 9 | # Build the docs. 10 | make clean 11 | make html 12 | ``` 13 | 14 | ## Open the docs with your browser 15 | 16 | ```bash 17 | python -m http.server -d _build/html/ 18 | ``` 19 | Launch your browser and open localhost:8000. -------------------------------------------------------------------------------- /lang_wm/verl/docs/_static/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thuml/RLVR-World/66bce39e195ea3521025d94bdd82eafe8823dc45/lang_wm/verl/docs/_static/logo.png -------------------------------------------------------------------------------- /lang_wm/verl/docs/advance/placement.rst: -------------------------------------------------------------------------------- 1 | Ray API Design Tutorial 2 | ======================================= 3 | 4 | We provide a tutorial for our Ray API design, including: 5 | 6 | - Ray basic concepts 7 | - Resource Pool and RayWorkerGroup 8 | - Data Dispatch, Execution and Collection 9 | - Initialize the RayWorkerGroup and execute the distributed computation in the given Resource Pool 10 | 11 | See details in `tutorial.ipynb `_. -------------------------------------------------------------------------------- /lang_wm/verl/docs/requirements-docs.txt: -------------------------------------------------------------------------------- 1 | # markdown suport 2 | recommonmark 3 | # markdown table suport 4 | sphinx-markdown-tables 5 | 6 | # theme default rtd 7 | 8 | # crate-docs-theme 9 | sphinx-rtd-theme 10 | 11 | # pin tokenizers version to avoid env_logger version req 12 | tokenizers==0.19.1 13 | -------------------------------------------------------------------------------- /lang_wm/verl/examples/generation/run_deepseek_v2_lite_math.sh: -------------------------------------------------------------------------------- 1 | python3 -m verl.trainer.main_generation \ 2 | trainer.nnodes=1 \ 3 | trainer.n_gpus_per_node=8 \ 4 | data.path=~/data/rlhf/gsm8k/test.parquet \ 5 | data.prompt_key=prompt \ 6 | data.n_samples=1 \ 7 | data.output_path=~/data/rlhf/math/deepseek_v2_lite_gen_test.parquet \ 8 | model.path=deepseek-ai/deepseek-llm-7b-chat \ 9 | +model.trust_remote_code=True \ 10 | rollout.temperature=1.0 \ 11 | rollout.top_k=50 \ 12 | rollout.top_p=0.7 \ 13 | rollout.prompt_length=2048 \ 14 | rollout.response_length=1024 \ 15 | rollout.tensor_model_parallel_size=2 \ 16 | rollout.gpu_memory_utilization=0.8 17 | -------------------------------------------------------------------------------- /lang_wm/verl/examples/sft/gsm8k/run_deepseek_6b7.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | if [ "$#" -lt 2 ]; then 4 | echo "Usage: run_deepseek_6b7.sh [other_configs...]" 5 | exit 1 6 | fi 7 | 8 | nproc_per_node=$1 9 | save_path=$2 10 | 11 | # Shift the arguments so $@ refers to the rest 12 | shift 2 13 | 14 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \ 15 | -m verl.trainer.fsdp_sft_trainer \ 16 | data.train_files=$HOME/data/gsm8k/train.parquet \ 17 | data.val_files=$HOME/data/gsm8k/test.parquet \ 18 | data.prompt_key=extra_info \ 19 | data.response_key=extra_info \ 20 | +data.prompt_dict_keys=['question'] \ 21 | +data.response_dict_keys=['answer'] \ 22 | data.micro_batch_size_per_gpu=4 \ 23 | model.partial_pretrain=deepseek-ai/deepseek-coder-6.7b-instruct \ 24 | trainer.default_local_dir=$save_path \ 25 | trainer.project_name=gsm8k-sft \ 26 | trainer.experiment_name=gsm8k-sft-deepseek-coder-6.7b-instruct \ 27 | trainer.total_epochs=4 \ 28 | trainer.logger=['console','wandb'] \ 29 | trainer.default_hdfs_dir=null $@ -------------------------------------------------------------------------------- /lang_wm/verl/examples/sft/gsm8k/run_gemma_2b.sh: -------------------------------------------------------------------------------- 1 | # Tested with 2 & 4 GPUs 2 | 3 | set -x 4 | 5 | if [ "$#" -lt 2 ]; then 6 | echo "Usage: run_gemma_2b.sh [other_configs...]" 7 | exit 1 8 | fi 9 | 10 | nproc_per_node=$1 11 | save_path=$2 12 | 13 | # Shift the arguments so $@ refers to the rest 14 | shift 2 15 | 16 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \ 17 | -m verl.trainer.fsdp_sft_trainer \ 18 | data.train_files=$HOME/data/gsm8k/train.parquet \ 19 | data.val_files=$HOME/data/gsm8k/test.parquet \ 20 | data.prompt_key=extra_info \ 21 | data.response_key=extra_info \ 22 | +data.prompt_dict_keys=['question'] \ 23 | +data.response_dict_keys=['answer'] \ 24 | data.micro_batch_size_per_gpu=4 \ 25 | model.partial_pretrain=google/gemma-2b-it \ 26 | trainer.default_local_dir=$save_path \ 27 | trainer.project_name=gsm8k-sft \ 28 | trainer.experiment_name=gsm8k-sft-gemma-2b-it \ 29 | trainer.total_epochs=2 \ 30 | trainer.logger=['console','wandb'] \ 31 | trainer.default_hdfs_dir=null $@ -------------------------------------------------------------------------------- /lang_wm/verl/examples/sft/gsm8k/run_gemma_7b.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | if [ "$#" -lt 2 ]; then 4 | echo "Usage: run_gemma_7b.sh [other_configs...]" 5 | exit 1 6 | fi 7 | 8 | nproc_per_node=$1 9 | save_path=$2 10 | 11 | # Shift the arguments so $@ refers to the rest 12 | shift 2 13 | 14 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \ 15 | -m verl.trainer.fsdp_sft_trainer \ 16 | data.train_files=$HOME/data/gsm8k/train.parquet \ 17 | data.val_files=$HOME/data/gsm8k/test.parquet \ 18 | data.prompt_key=prompt \ 19 | data.response_key=answer \ 20 | data.micro_batch_size_per_gpu=4 \ 21 | model.partial_pretrain=google/gemma-1.1-7b-it \ 22 | trainer.default_local_dir=$save_path \ 23 | trainer.project_name=gsm8k-sft \ 24 | trainer.experiment_name=gsm8k-sft-gemma-1.1-7b-it \ 25 | trainer.total_epochs=4 \ 26 | trainer.logger=['console','wandb'] \ 27 | trainer.default_hdfs_dir=null $@ -------------------------------------------------------------------------------- /lang_wm/verl/examples/sft/gsm8k/run_qwen_05_peft.sh: -------------------------------------------------------------------------------- 1 | # Tested with 2 & 4 GPUs 2 | 3 | set -x 4 | 5 | if [ "$#" -lt 2 ]; then 6 | echo "Usage: run_qwen_05_peft.sh [other_configs...]" 7 | exit 1 8 | fi 9 | 10 | nproc_per_node=$1 11 | save_path=$2 12 | 13 | # Shift the arguments so $@ refers to the rest 14 | shift 2 15 | 16 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \ 17 | -m verl.trainer.fsdp_sft_trainer \ 18 | data.train_files=$HOME/data/gsm8k/train.parquet \ 19 | data.val_files=$HOME/data/gsm8k/test.parquet \ 20 | data.prompt_key=extra_info \ 21 | data.response_key=extra_info \ 22 | optim.lr=1e-4 \ 23 | +data.prompt_dict_keys=['question'] \ 24 | +data.response_dict_keys=['answer'] \ 25 | data.micro_batch_size_per_gpu=4 \ 26 | model.partial_pretrain=Qwen/Qwen2.5-0.5B-Instruct \ 27 | trainer.default_local_dir=$save_path \ 28 | trainer.project_name=gsm8k-sft \ 29 | trainer.experiment_name=gsm8k-sft-qwen-2.5-0.5b-instruct \ 30 | trainer.logger=['console'] \ 31 | trainer.total_epochs=1 \ 32 | trainer.default_hdfs_dir=null $@ \ 33 | model.lora_rank=32\ 34 | model.lora_alpha=16 \ 35 | model.target_modules=all-linear 36 | 37 | # Or you can do this: 38 | # model.target_modules=[q_proj,v_proj] \ 39 | -------------------------------------------------------------------------------- /lang_wm/verl/examples/sft/gsm8k/run_qwen_05_sp2.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | if [ "$#" -lt 2 ]; then 4 | echo "Usage: run_qwen_05_sp2.sh [other_configs...]" 5 | exit 1 6 | fi 7 | 8 | nproc_per_node=$1 9 | save_path=$2 10 | 11 | # Shift the arguments so $@ refers to the rest 12 | shift 2 13 | 14 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \ 15 | -m verl.trainer.fsdp_sft_trainer \ 16 | data.train_files=$HOME/data/gsm8k/train.parquet \ 17 | data.val_files=$HOME/data/gsm8k/test.parquet \ 18 | data.prompt_key=extra_info \ 19 | data.response_key=extra_info \ 20 | optim.lr=1e-4 \ 21 | +data.prompt_dict_keys=['question'] \ 22 | +data.response_dict_keys=['answer'] \ 23 | data.micro_batch_size=4 \ 24 | model.partial_pretrain=Qwen/Qwen2.5-0.5B-Instruct \ 25 | trainer.default_local_dir=$save_path \ 26 | trainer.project_name=gsm8k-sft \ 27 | trainer.experiment_name=gsm8k-sft-qwen-2.5-0.5b-instruct-sp2 \ 28 | trainer.logger=['console'] \ 29 | trainer.total_training_steps=1 \ 30 | trainer.default_hdfs_dir=null $@ \ 31 | ulysses_sequence_parallel_size=2 \ 32 | use_remove_padding=true 33 | -------------------------------------------------------------------------------- /lang_wm/verl/examples/sft/gsm8k/run_qwen_05_sp2_liger.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | if [ "$#" -lt 2 ]; then 4 | echo "Usage: run_qwen_05_sp2.sh [other_configs...]" 5 | exit 1 6 | fi 7 | 8 | nproc_per_node=$1 9 | save_path=$2 10 | 11 | # Shift the arguments so $@ refers to the rest 12 | shift 2 13 | 14 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \ 15 | -m verl.trainer.fsdp_sft_trainer \ 16 | data.train_files=$HOME/data/gsm8k/train.parquet \ 17 | data.val_files=$HOME/data/gsm8k/test.parquet \ 18 | data.prompt_key=extra_info \ 19 | data.response_key=extra_info \ 20 | optim.lr=1e-4 \ 21 | +data.prompt_dict_keys=['question'] \ 22 | +data.response_dict_keys=['answer'] \ 23 | data.micro_batch_size=4 \ 24 | model.partial_pretrain=Qwen/Qwen2.5-0.5B-Instruct \ 25 | model.use_liger=True \ 26 | trainer.default_local_dir=$save_path \ 27 | trainer.project_name=gsm8k-sft \ 28 | trainer.experiment_name=gsm8k-sft-qwen-2.5-0.5b-instruct-sp2-liger \ 29 | trainer.logger=['console'] \ 30 | trainer.default_hdfs_dir=null $@ \ 31 | ulysses_sequence_parallel_size=2 \ 32 | use_remove_padding=true 33 | -------------------------------------------------------------------------------- /lang_wm/verl/examples/sft/text_game/run_text_game_sft.sh: -------------------------------------------------------------------------------- 1 | # using 4x80G A100 GPUs 2 | 3 | torchrun --standalone --nnodes=1 --nproc_per_node=4 \ 4 | -m verl.trainer.fsdp_sft_trainer \ 5 | data.train_files=thuml/bytesized32-world-model-cot/generated_cot.parquet \ 6 | data.val_files=thuml/bytesized32-world-model-cot/generated_cot.parquet \ 7 | data.train_batch_size=16 \ 8 | data.prompt_key=prompt \ 9 | data.response_key=reward_model \ 10 | +data.prompt_dict_keys=['content'] \ 11 | +data.response_dict_keys=['ground_truth'] \ 12 | data.micro_batch_size_per_gpu=1 \ 13 | data.max_length=11384 \ 14 | model.partial_pretrain=deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B \ 15 | trainer.default_local_dir=log/sft_text_game_simulator_experiment \ 16 | trainer.project_name=text_game_simulator_sft \ 17 | trainer.experiment_name=text_game_simulator_deepseek_generated_data_sft \ 18 | trainer.total_epochs=15 \ 19 | trainer.logger=['console','wandb'] \ 20 | model.lora_rank=32 \ 21 | model.lora_alpha=16 $@ -------------------------------------------------------------------------------- /lang_wm/verl/examples/sft/web_agent/run_web_agent_sft.sh: -------------------------------------------------------------------------------- 1 | # using 8x40G A100 GPUs 2 | 3 | torchrun -m --nnodes 1 --nproc_per_node=8 \ 4 | verl.trainer.fsdp_sft_trainer \ 5 | data.train_files=thuml/webarena-world-model-cot/train.parquet \ 6 | data.val_files=thuml/webarena-world-model-cot/test.parquet \ 7 | data.prompt_key=question \ 8 | data.response_key=answer \ 9 | data.micro_batch_size_per_gpu=1 \ 10 | model.partial_pretrain=deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B \ 11 | model.lora_rank=32 \ 12 | model.lora_alpha=16 \ 13 | trainer.default_hdfs_dir=hdfs://user/verl/experiments/webagent/DeepSeek-R1-Distill-Qwen-1.5B \ 14 | trainer.project_name=webagent-sft \ 15 | trainer.experiment_name=webagent-sft \ 16 | trainer.total_epochs=40 \ 17 | trainer.logger="['console','wandb']" \ 18 | data.train_batch_size=8 \ 19 | trainer.default_local_dir=log/webagent-sft -------------------------------------------------------------------------------- /lang_wm/verl/merge_lora.py: -------------------------------------------------------------------------------- 1 | from transformers import AutoModelForCausalLM 2 | from peft import PeftModel 3 | 4 | base_model = AutoModelForCausalLM.from_pretrained("DeepSeek-R1-Distill-Qwen-1.5B") 5 | peft_model_id = "log/webagent-sft-DeepSeek-R1-Distill-Qwen-1.5B/global_step_xxxxxx" # the output path 6 | model = PeftModel.from_pretrained(base_model, peft_model_id) 7 | merged_model = model.merge_and_unload() 8 | print(type(merged_model)) 9 | merged_model.save_pretrained("webagent-sft-DeepSeek-R1-Distill-Qwen-1.5B-merged-lowest") # where to save the merged model -------------------------------------------------------------------------------- /lang_wm/verl/requirements.txt: -------------------------------------------------------------------------------- 1 | # requirements.txt records the full set of dependencies for development 2 | accelerate 3 | codetiming 4 | datasets 5 | dill 6 | flash-attn 7 | hydra-core 8 | liger-kernel 9 | numpy 10 | peft 11 | pyarrow>=15.0.0 12 | pybind11 13 | pylatexenc 14 | ray[data,train,tune,serve] 15 | tensordict<0.6 16 | torchdata 17 | transformers 18 | wandb 19 | json-repair 20 | dirtyjson 21 | bytes32==1.1.0 22 | openai==1.6.1 23 | tiktoken==0.5.2 24 | pandas==2.1.4 25 | plotly==5.19.0 26 | kaleido==0.2.1 27 | termcolor==2.4.0 28 | -------------------------------------------------------------------------------- /lang_wm/verl/scripts/format.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | pip3 install --upgrade yapf 3 | python3 -m yapf -ir -vv --style ./.style.yapf verl tests examples -------------------------------------------------------------------------------- /lang_wm/verl/tests/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. -------------------------------------------------------------------------------- /lang_wm/verl/tests/distributed/run_all.sh: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | #!/usr/bin/env bash 16 | 17 | set -e -x 18 | torchrun --nproc-per-node=4 --standalone tests/distributed/test_tensor_dict.py -------------------------------------------------------------------------------- /lang_wm/verl/tests/e2e/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /lang_wm/verl/tests/e2e/arithmetic_sequence/model/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "architectures": [ 3 | "LlamaForCausalLM" 4 | ], 5 | "attention_bias": false, 6 | "attention_dropout": 0.0, 7 | "bos_token_id": null, 8 | "eos_token_id": 1, 9 | "hidden_act": "silu", 10 | "hidden_size": 128, 11 | "initializer_range": 0.02, 12 | "intermediate_size": 344, 13 | "max_position_embeddings": 2048, 14 | "mlp_bias": false, 15 | "model_type": "llama", 16 | "num_attention_heads": 4, 17 | "num_hidden_layers": 4, 18 | "num_key_value_heads": 4, 19 | "pad_token_id": 2, 20 | "pretraining_tp": 1, 21 | "rms_norm_eps": 1e-06, 22 | "rope_scaling": null, 23 | "rope_theta": 10000.0, 24 | "tie_word_embeddings": false, 25 | "torch_dtype": "bfloat16", 26 | "transformers_version": "4.43.3", 27 | "use_cache": true, 28 | "vocab_size": 16 29 | } 30 | -------------------------------------------------------------------------------- /lang_wm/verl/tests/e2e/arithmetic_sequence/model/generation_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "_from_model_config": true, 3 | "eos_token_id": 1, 4 | "pad_token_id": 2, 5 | "transformers_version": "4.43.3" 6 | } 7 | -------------------------------------------------------------------------------- /lang_wm/verl/tests/e2e/arithmetic_sequence/model/model.safetensors: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thuml/RLVR-World/66bce39e195ea3521025d94bdd82eafe8823dc45/lang_wm/verl/tests/e2e/arithmetic_sequence/model/model.safetensors -------------------------------------------------------------------------------- /lang_wm/verl/tests/e2e/arithmetic_sequence/model/tokenizer_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "char_ords": [ 3 | 48, 4 | 49, 5 | 50, 6 | 51, 7 | 52, 8 | 53, 9 | 54, 10 | 55, 11 | 56, 12 | 57, 13 | 44, 14 | 58 15 | ], 16 | "model_max_length": 2048, 17 | "chat_template": "{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% set role = message['role'] %}{{ message['content'] }}{% endfor %}{% if add_generation_prompt %}{{ sep_token }}{% endif %}" 18 | } -------------------------------------------------------------------------------- /lang_wm/verl/tests/e2e/arithmetic_sequence/rl/README.md: -------------------------------------------------------------------------------- 1 | # Digit completion 2 | 3 | This is an example of solving a digit completion problem. The problem is defined as below: 4 | 5 | The prompt is a sequence of numbers with fixed difference. The agent's goal is to complete the next N numbers. 6 | If the max number is reached, the next number should be modulo with max number. 7 | 8 | For example, 9 | - prompt = [1, 2, 3] 10 | - N = 5 11 | - max_number = 6 12 | 13 | The response should be [4, 5, 6, 7%6, 8%6] = [4, 5, 6, 0, 1]. 14 | 15 | # Environment definition 16 | 17 | The core definition of the task is defined in verl/envs/digit_completion/task.py 18 | 19 | It is highly recommended to take a look at it for better understanding. 20 | 21 | 22 | 23 | # Run experiments 24 | 25 | The users are required to specify the config path and config name (and the relative model config path to the current working directory) 26 | 27 | ```bash 28 | # cd examples/arithmetic_sequence/rl 29 | 30 | # Specify the config path and config name (current working dir) 31 | python3 -m verl.trainer.ppo.ray_megatron_train_synchronous --config-path=$(pwd)/config --config-name='ray_megatron' 32 | 33 | # The default relative path of model config is 'config/model_config', if you want to change it, you can rewrite it in ray_megatron.yaml or using: 34 | python3 -m verl.trainer.ppo.ray_megatron_train_synchronous --config-path=$(pwd)/config --config-name='ray_megatron' ++model.base_path=config/model_config 35 | 36 | ``` 37 | 38 | -------------------------------------------------------------------------------- /lang_wm/verl/tests/e2e/envs/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .digit_completion import DigitCompletion 16 | 17 | __all__ = ['DigitCompletion'] -------------------------------------------------------------------------------- /lang_wm/verl/tests/e2e/envs/digit_completion/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .task import DigitCompletion, generate_ground_truth_response 16 | from .tokenizer import CharTokenizer 17 | 18 | from transformers import AutoTokenizer, LlamaConfig 19 | 20 | AutoTokenizer.register(LlamaConfig, CharTokenizer, exist_ok=True) 21 | 22 | __all__ = ['DigitCompletion', 'generate_ground_truth_response', 'CharTokenizer'] -------------------------------------------------------------------------------- /lang_wm/verl/tests/e2e/run_qwen_gsm8k_function_rm_grpo.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | export VLLM_ATTENTION_BACKEND=XFORMERS 4 | 5 | python3 -m verl.trainer.main_ppo \ 6 | data.train_files=$HOME/data/gsm8k/train.parquet \ 7 | data.val_files=$HOME/data/gsm8k/test.parquet \ 8 | data.train_batch_size=1024 \ 9 | data.max_prompt_length=512 \ 10 | data.max_response_length=512 \ 11 | actor_rollout_ref.model.path=Qwen/Qwen2.5-0.5B \ 12 | actor_rollout_ref.actor.optim.lr=1e-6 \ 13 | actor_rollout_ref.model.use_remove_padding=True \ 14 | actor_rollout_ref.actor.ppo_mini_batch_size=256 \ 15 | actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=4 \ 16 | actor_rollout_ref.actor.fsdp_config.param_offload=False \ 17 | actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \ 18 | actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=16 \ 19 | actor_rollout_ref.rollout.tensor_model_parallel_size=2 \ 20 | actor_rollout_ref.rollout.name=vllm \ 21 | actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \ 22 | actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=16 \ 23 | actor_rollout_ref.ref.fsdp_config.param_offload=True \ 24 | algorithm.kl_ctrl.kl_coef=0.001 \ 25 | algorithm.adv_estimator=grpo \ 26 | trainer.critic_warmup=0 \ 27 | trainer.logger=['console'] \ 28 | trainer.project_name='verl_example_gsm8k' \ 29 | trainer.experiment_name='qwen_e2e_ci_function_rm' \ 30 | trainer.n_gpus_per_node=8 \ 31 | trainer.nnodes=1 \ 32 | trainer.save_freq=-1 \ 33 | trainer.total_training_steps=1 $@ 34 | -------------------------------------------------------------------------------- /lang_wm/verl/tests/e2e/run_qwen_gsm8k_function_rm_remax.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | export VLLM_ATTENTION_BACKEND=XFORMERS 4 | 5 | python3 -m verl.trainer.main_ppo \ 6 | data.train_files=$HOME/data/gsm8k/train.parquet \ 7 | data.val_files=$HOME/data/gsm8k/test.parquet \ 8 | data.train_batch_size=1024 \ 9 | data.max_prompt_length=512 \ 10 | data.max_response_length=512 \ 11 | actor_rollout_ref.model.path=Qwen/Qwen2.5-0.5B \ 12 | actor_rollout_ref.actor.optim.lr=1e-6 \ 13 | actor_rollout_ref.model.use_remove_padding=True \ 14 | actor_rollout_ref.actor.ppo_mini_batch_size=256 \ 15 | actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=4 \ 16 | actor_rollout_ref.actor.fsdp_config.param_offload=False \ 17 | actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \ 18 | actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=16 \ 19 | actor_rollout_ref.rollout.tensor_model_parallel_size=2 \ 20 | actor_rollout_ref.rollout.name=vllm \ 21 | actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \ 22 | actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=16 \ 23 | actor_rollout_ref.ref.fsdp_config.param_offload=True \ 24 | algorithm.kl_ctrl.kl_coef=0.001 \ 25 | algorithm.adv_estimator=remax \ 26 | trainer.critic_warmup=0 \ 27 | trainer.logger=['console'] \ 28 | trainer.project_name='verl_example_gsm8k' \ 29 | trainer.experiment_name='qwen_e2e_ci_function_rm' \ 30 | trainer.n_gpus_per_node=8 \ 31 | trainer.nnodes=1 \ 32 | trainer.save_freq=-1 \ 33 | trainer.total_training_steps=1 $@ 34 | -------------------------------------------------------------------------------- /lang_wm/verl/tests/e2e/run_ray_trainer_rmpad.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e -x 4 | 5 | python3 tests/e2e/arithmetic_sequence/rl/main_trainer.py \ 6 | data.train_files=tests/e2e/arithmetic_sequence/data/train.parquet \ 7 | data.val_files=tests/e2e/arithmetic_sequence/data/test.parquet \ 8 | actor_rollout_ref.model.path=tests/e2e/arithmetic_sequence/model \ 9 | actor_rollout_ref.rollout.name=vllm \ 10 | actor_rollout_ref.rollout.tensor_model_parallel_size=1 \ 11 | actor_rollout_ref.model.tokenizer_path=tests/e2e/arithmetic_sequence/model \ 12 | critic.model.path=Qwen/Qwen2.5-0.5B \ 13 | critic.model.use_remove_padding=True \ 14 | trainer.total_epochs=1 -------------------------------------------------------------------------------- /lang_wm/verl/tests/generation/run_gen_qwen05.sh: -------------------------------------------------------------------------------- 1 | # Tested with 1 & 4 GPUs 2 | set -x 3 | 4 | if [ "$#" -lt 2 ]; then 5 | echo "Usage: run_gen_qwen05.sh [other_configs...]" 6 | exit 1 7 | fi 8 | 9 | nproc_per_node=$1 10 | save_path=$2 11 | 12 | # Shift the arguments so $@ refers to the rest 13 | shift 2 14 | 15 | python3 -m verl.trainer.main_generation \ 16 | trainer.nnodes=1 \ 17 | trainer.n_gpus_per_node=$nproc_per_node \ 18 | data.path=$HOME/data/gsm8k/test.parquet \ 19 | data.prompt_key=prompt \ 20 | data.n_samples=1 \ 21 | data.output_path=$save_path \ 22 | model.path=Qwen/Qwen2.5-0.5B-Instruct \ 23 | +model.trust_remote_code=True \ 24 | rollout.temperature=1.0 \ 25 | rollout.top_k=50 \ 26 | rollout.top_p=0.7 \ 27 | rollout.prompt_length=2048 \ 28 | rollout.response_length=1024 \ 29 | rollout.tensor_model_parallel_size=2 \ 30 | rollout.gpu_memory_utilization=0.8 31 | -------------------------------------------------------------------------------- /lang_wm/verl/tests/kill_github_tests.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [ "$#" -ne 1 ]; then 4 | echo "Usage: $0 YOUR_GITHUB_TOKEN" 5 | echo "Please provide exactly one input argument for your github token." 6 | exit 1 7 | fi 8 | 9 | # Set your GitHub repository details 10 | OWNER="volcengine" 11 | REPO="verl" 12 | TOKEN=$1 13 | 14 | # API URL for workflow runs 15 | API_URL="https://api.github.com/repos/$OWNER/$REPO/actions/runs?status=queued" 16 | 17 | # Check required commands 18 | command -v jq >/dev/null 2>&1 || { echo "jq is required but not installed. Aborting."; exit 1; } 19 | 20 | # Get queued workflow runs 21 | response=$(curl -s -H "Authorization: token $TOKEN" -H "Accept: application/vnd.github.v3+json" "$API_URL") 22 | 23 | # Run this for debugging 24 | # echo $response 25 | 26 | # Extract run IDs 27 | queued_run_ids=$(echo "$response" | jq -r '.workflow_runs[] | .id') 28 | 29 | if [ -z "$queued_run_ids" ]; then 30 | echo "No queued workflow runs found." 31 | exit 0 32 | fi 33 | 34 | # Cancel each queued run 35 | for run_id in $queued_run_ids; do 36 | echo "Cancelling run $run_id" 37 | cancel_url="https://api.github.com/repos/$OWNER/$REPO/actions/runs/$run_id/cancel" 38 | curl -s -X POST -H "Authorization: token $TOKEN" -H "Accept: application/vnd.github.v3+json" "$cancel_url" 39 | done 40 | 41 | echo "Cancelled all queued workflow runs." 42 | -------------------------------------------------------------------------------- /lang_wm/verl/tests/ray/detached_worker/README.md: -------------------------------------------------------------------------------- 1 | # Detached Worker 2 | ## How to run (Only on a single node) 3 | - Start a local ray cluster: 4 | ```bash 5 | ray start --head --port=6379 6 | ``` 7 | - Run the server 8 | ```bash 9 | python3 server.py 10 | ``` 11 | - On another terminal, Run the client 12 | ```bash 13 | python3 client.py 14 | ``` 15 | -------------------------------------------------------------------------------- /lang_wm/verl/tests/ray/detached_worker/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ray start --head --port=6379 3 | python3 server.py 4 | python3 client.py 5 | ray stop --force -------------------------------------------------------------------------------- /lang_wm/verl/tests/sanity/test_import.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | def test_import(): 17 | import verl 18 | print(verl.__version__) 19 | 20 | 21 | def test_single_controller_import(): 22 | import verl.single_controller 23 | print(verl.single_controller.__version__) 24 | -------------------------------------------------------------------------------- /lang_wm/verl/tests/sft/run_sft.sh: -------------------------------------------------------------------------------- 1 | # Tested with 2 & 4 GPUs 2 | 3 | set -x 4 | 5 | torchrun --standalone --nnodes=1 --nproc_per_node=8 \ 6 | -m verl.trainer.fsdp_sft_trainer \ 7 | data.train_files=$HOME/data/gsm8k/train.parquet \ 8 | data.val_files=$HOME/data/gsm8k/test.parquet \ 9 | data.prompt_key=extra_info \ 10 | data.response_key=extra_info \ 11 | +data.prompt_dict_keys=['question'] \ 12 | +data.response_dict_keys=['answer'] \ 13 | data.micro_batch_size_per_gpu=32 \ 14 | model.partial_pretrain=Qwen/Qwen2.5-0.5B-Instruct \ 15 | trainer.default_local_dir=$HOME/ckpts/ \ 16 | trainer.project_name=qwen2.5-sft \ 17 | trainer.experiment_name=gsm8k-sft-gemma-2b-it \ 18 | trainer.total_training_steps=1 \ 19 | trainer.logger=['console'] \ 20 | trainer.default_hdfs_dir=null $@ 21 | 22 | rm -rf $HOME/ckpts/ -------------------------------------------------------------------------------- /lang_wm/verl/tests/sft/run_sft_qwen05_peft.sh: -------------------------------------------------------------------------------- 1 | # Tested with 2 & 4 GPUs 2 | 3 | set -x 4 | 5 | if [ "$#" -lt 2 ]; then 6 | echo "Usage: run_sft_qwen05_peft.sh [other_configs...]" 7 | exit 1 8 | fi 9 | 10 | nproc_per_node=$1 11 | save_path=$2 12 | 13 | # Shift the arguments so $@ refers to the rest 14 | shift 2 15 | 16 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \ 17 | -m verl.trainer.fsdp_sft_trainer \ 18 | data.train_files=$HOME/data/gsm8k/train.parquet \ 19 | data.val_files=$HOME/data/gsm8k/test.parquet \ 20 | data.prompt_key=extra_info \ 21 | data.response_key=extra_info \ 22 | optim.lr=1e-4 \ 23 | +data.prompt_dict_keys=['question'] \ 24 | +data.response_dict_keys=['answer'] \ 25 | data.micro_batch_size_per_gpu=4 \ 26 | model.partial_pretrain=Qwen/Qwen2.5-0.5B-Instruct \ 27 | trainer.default_local_dir=$save_path \ 28 | trainer.project_name=gsm8k-sft \ 29 | trainer.experiment_name=gsm8k-sft-qwen-2.5-0.5b-instruct \ 30 | trainer.logger=['console'] \ 31 | trainer.total_training_steps=1 \ 32 | trainer.default_hdfs_dir=null $@ \ 33 | model.lora_rank=32\ 34 | model.lora_alpha=16 \ 35 | model.target_modules=all-linear 36 | 37 | # Or you can do this: 38 | # model.target_modules=[q_proj,v_proj] \ 39 | -------------------------------------------------------------------------------- /lang_wm/verl/tests/sft/run_sft_qwen05_sp2_liger.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | if [ "$#" -lt 2 ]; then 4 | echo "Usage: run_sft_qwen05_sp2_liger.sh [other_configs...]" 5 | exit 1 6 | fi 7 | 8 | nproc_per_node=$1 9 | save_path=$2 10 | 11 | # Shift the arguments so $@ refers to the rest 12 | shift 2 13 | 14 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \ 15 | -m verl.trainer.fsdp_sft_trainer \ 16 | data.train_files=$HOME/data/gsm8k/train.parquet \ 17 | data.val_files=$HOME/data/gsm8k/test.parquet \ 18 | data.prompt_key=extra_info \ 19 | data.response_key=extra_info \ 20 | optim.lr=1e-4 \ 21 | +data.prompt_dict_keys=['question'] \ 22 | +data.response_dict_keys=['answer'] \ 23 | data.micro_batch_size=4 \ 24 | model.partial_pretrain=Qwen/Qwen2.5-0.5B-Instruct \ 25 | model.use_liger=True \ 26 | trainer.default_local_dir=$save_path \ 27 | trainer.project_name=gsm8k-sft \ 28 | trainer.experiment_name=gsm8k-sft-qwen-2.5-0.5b-instruct-sp2-liger \ 29 | trainer.logger=['console'] \ 30 | trainer.total_training_steps=1 \ 31 | trainer.default_hdfs_dir=null $@ \ 32 | ulysses_sequence_parallel_size=2 \ 33 | use_remove_padding=true -------------------------------------------------------------------------------- /lang_wm/verl/tests/sft/run_sft_sp_loss_match.sh: -------------------------------------------------------------------------------- 1 | # Tested with 2 & 4 GPUs 2 | 3 | set -x 4 | 5 | torchrun --standalone --nnodes=1 --nproc_per_node=8 \ 6 | tests/sft/test_sp_loss_match.py \ 7 | data.train_files=$HOME/data/gsm8k/train.parquet \ 8 | data.val_files=$HOME/data/gsm8k/test.parquet \ 9 | data.prompt_key=extra_info \ 10 | data.response_key=extra_info \ 11 | +data.prompt_dict_keys=['question'] \ 12 | +data.response_dict_keys=['answer'] \ 13 | data.micro_batch_size=32 \ 14 | model.partial_pretrain=Qwen/Qwen2.5-0.5B-Instruct \ 15 | ulysses_sequence_parallel_size=2 \ 16 | use_remove_padding=True \ 17 | trainer.default_local_dir=$HOME/ckpts/ \ 18 | trainer.project_name=qwen2.5-sft \ 19 | trainer.experiment_name=gsm8k-sft-gemma-2b-it \ 20 | trainer.total_training_steps=1 \ 21 | trainer.logger=['console'] \ 22 | trainer.default_hdfs_dir=null $@ 23 | 24 | rm -rf $HOME/ckpts/ 25 | -------------------------------------------------------------------------------- /lang_wm/verl/verl/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import os 16 | 17 | version_folder = os.path.dirname(os.path.join(os.path.abspath(__file__))) 18 | 19 | with open(os.path.join(version_folder, 'version/version')) as f: 20 | __version__ = f.read().strip() 21 | 22 | from .protocol import DataProto 23 | 24 | from .utils.logging_utils import set_basic_config 25 | import logging 26 | 27 | set_basic_config(level=logging.WARNING) 28 | 29 | from . import single_controller 30 | 31 | __all__ = ['DataProto', "__version__"] 32 | 33 | if os.getenv('VERL_USE_MODELSCOPE', 'False').lower() == 'true': 34 | import importlib 35 | if importlib.util.find_spec("modelscope") is None: 36 | raise ImportError(f'You are using the modelscope hub, please install modelscope by `pip install modelscope -U`') 37 | # Patch hub to download models from modelscope to speed up. 38 | from modelscope.utils.hf_util import patch_hub 39 | patch_hub() 40 | -------------------------------------------------------------------------------- /lang_wm/verl/verl/models/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /lang_wm/verl/verl/models/llama/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /lang_wm/verl/verl/models/llama/megatron/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .modeling_llama_megatron import ( 16 | # original model with megatron 17 | ParallelLlamaModel, 18 | ParallelLlamaForCausalLM, 19 | # rmpad with megatron 20 | ParallelLlamaForCausalLMRmPad, 21 | ParallelLlamaForValueRmPad, 22 | # rmpad with megatron and pipeline parallelism 23 | ParallelLlamaForCausalLMRmPadPP, 24 | ParallelLlamaForValueRmPadPP) 25 | -------------------------------------------------------------------------------- /lang_wm/verl/verl/models/llama/megatron/checkpoint_utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /lang_wm/verl/verl/models/llama/megatron/layers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .parallel_attention import ParallelLlamaAttention 16 | from .parallel_decoder import ParallelLlamaDecoderLayer, ParallelLlamaDecoderLayerRmPad 17 | from .parallel_mlp import ParallelLlamaMLP 18 | from .parallel_rmsnorm import ParallelLlamaRMSNorm 19 | -------------------------------------------------------------------------------- /lang_wm/verl/verl/models/qwen2/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /lang_wm/verl/verl/models/qwen2/megatron/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .modeling_qwen2_megatron import ( 16 | # original model with megatron 17 | ParallelQwen2Model, 18 | ParallelQwen2ForCausalLM, 19 | # rmpad with megatron 20 | ParallelQwen2ForCausalLMRmPad, 21 | ParallelQwen2ForValueRmPad, 22 | # rmpad with megatron and pipeline parallelism 23 | ParallelQwen2ForCausalLMRmPadPP, 24 | ParallelQwen2ForValueRmPadPP) 25 | -------------------------------------------------------------------------------- /lang_wm/verl/verl/models/qwen2/megatron/checkpoint_utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /lang_wm/verl/verl/models/qwen2/megatron/layers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .parallel_attention import ParallelQwen2Attention 16 | from .parallel_decoder import ParallelQwen2DecoderLayer, ParallelQwen2DecoderLayerRmPad 17 | from .parallel_mlp import ParallelQwen2MLP 18 | from .parallel_rmsnorm import ParallelQwen2RMSNorm 19 | -------------------------------------------------------------------------------- /lang_wm/verl/verl/models/transformers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /lang_wm/verl/verl/single_controller/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import os 16 | 17 | version_folder = os.path.dirname(os.path.join(os.path.abspath(__file__))) 18 | 19 | # Note(haibin.lin): single_controller.__version__ is deprecated 20 | with open(os.path.join(os.path.join(version_folder, os.pardir), 'version/version')) as f: 21 | __version__ = f.read().strip() 22 | 23 | from . import base 24 | from .base import * 25 | 26 | __all__ = base.__all__ -------------------------------------------------------------------------------- /lang_wm/verl/verl/single_controller/base/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .worker import Worker 16 | from .worker_group import WorkerGroup, ClassWithInitArgs, ResourcePool 17 | 18 | __all__ = ['Worker', 'WorkerGroup', 'ClassWithInitArgs', 'ResourcePool'] -------------------------------------------------------------------------------- /lang_wm/verl/verl/single_controller/base/megatron/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /lang_wm/verl/verl/single_controller/base/register_center/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /lang_wm/verl/verl/single_controller/base/register_center/ray.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import ray 16 | 17 | 18 | @ray.remote 19 | class WorkerGroupRegisterCenter: 20 | 21 | def __init__(self, rank_zero_info): 22 | self.rank_zero_info = rank_zero_info 23 | 24 | def get_rank_zero_info(self): 25 | return self.rank_zero_info 26 | 27 | 28 | def create_worker_group_register_center(name, info): 29 | return WorkerGroupRegisterCenter.options(name=name).remote(info) 30 | -------------------------------------------------------------------------------- /lang_wm/verl/verl/single_controller/ray/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .base import RayResourcePool, RayClassWithInitArgs, RayWorkerGroup, create_colocated_worker_cls -------------------------------------------------------------------------------- /lang_wm/verl/verl/third_party/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /lang_wm/verl/verl/third_party/vllm/vllm_spmd/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /lang_wm/verl/verl/third_party/vllm/vllm_v_0_3_1/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /lang_wm/verl/verl/third_party/vllm/vllm_v_0_4_2/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /lang_wm/verl/verl/third_party/vllm/vllm_v_0_5_4/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /lang_wm/verl/verl/third_party/vllm/vllm_v_0_6_3/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /lang_wm/verl/verl/trainer/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /lang_wm/verl/verl/trainer/config/evaluation.yaml: -------------------------------------------------------------------------------- 1 | data: 2 | path: /tmp/math_Qwen2-7B-Instruct.parquet 3 | prompt_key: prompt 4 | response_key: responses 5 | data_source_key: data_source 6 | reward_model_key: reward_model -------------------------------------------------------------------------------- /lang_wm/verl/verl/trainer/config/sft_trainer.yaml: -------------------------------------------------------------------------------- 1 | data: 2 | train_batch_size: 256 3 | micro_batch_size: null # will be deprecated, use micro_batch_size_per_gpu 4 | micro_batch_size_per_gpu: 4 # this is also val batch size 5 | train_files: ~/data/gsm8k/train.parquet 6 | val_files: ~/data/gsm8k/test.parquet 7 | prompt_key: question 8 | response_key: answer 9 | max_length: 5000 10 | truncation: error 11 | balance_dp_token: False 12 | chat_template: null 13 | model: 14 | partial_pretrain: ~/models/gemma-1.1-7b-it 15 | fsdp_config: 16 | wrap_policy: 17 | min_num_params: 0 18 | cpu_offload: False 19 | offload_params: False 20 | external_lib: null 21 | enable_gradient_checkpointing: False 22 | trust_remote_code: False 23 | lora_rank: 0 # Set to positive value to enable LoRA (e.g., 32) 24 | lora_alpha: 16 # LoRA scaling factor 25 | target_modules: all-linear # Target modules for LoRA adaptation 26 | use_liger: False 27 | optim: 28 | lr: 1e-5 29 | betas: [0.9, 0.95] 30 | weight_decay: 0.01 31 | warmup_steps_ratio: 0.1 32 | clip_grad: 1.0 33 | ulysses_sequence_parallel_size: 1 34 | use_remove_padding: False 35 | trainer: 36 | default_local_dir: ./log/sft_model 37 | default_hdfs_dir: hdfs://tmp/experiments/gsm8k/gemma-1.1-7b-it/ # change the hdfs path here 38 | resume_path: null 39 | project_name: gsm8k-sft 40 | experiment_name: test 41 | total_epochs: 4 42 | total_training_steps: null 43 | logger: ['console'] 44 | seed: 1 45 | 46 | -------------------------------------------------------------------------------- /lang_wm/verl/verl/trainer/ppo/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /lang_wm/verl/verl/trainer/runtime_env.yaml: -------------------------------------------------------------------------------- 1 | working_dir: ./ 2 | excludes: ["/.git/"] 3 | env_vars: 4 | TORCH_NCCL_AVOID_RECORD_STREAMS: "1" 5 | VLLM_ATTENTION_BACKEND: "XFORMERS" -------------------------------------------------------------------------------- /lang_wm/verl/verl/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from . import tokenizer 16 | from .tokenizer import hf_tokenizer, hf_processor 17 | 18 | __all__ = tokenizer.__all__ -------------------------------------------------------------------------------- /lang_wm/verl/verl/utils/checkpoint/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. -------------------------------------------------------------------------------- /lang_wm/verl/verl/utils/config.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from typing import Dict 16 | 17 | from omegaconf import DictConfig 18 | 19 | 20 | def update_dict_with_config(dictionary: Dict, config: DictConfig): 21 | for key in dictionary: 22 | if hasattr(config, key): 23 | dictionary[key] = getattr(config, key) 24 | -------------------------------------------------------------------------------- /lang_wm/verl/verl/utils/dataset/README.md: -------------------------------------------------------------------------------- 1 | # Dataset Format 2 | ## RLHF dataset 3 | We combine all the data sources into a single parquet files. We directly organize the prompt into the chat format so that multi-turn chats can be easily incorporated. In the prompt, we may add instruction following texts to guide the model output the answers in a particular format so that we can extract the answers. 4 | 5 | Math problems 6 | ```json 7 | { 8 | "data_source": "openai/gsm8k", 9 | "prompt": [{"role": "user", "content": "Natalia sold clips to 48 of her friends in April, and then she sold half as many clips in May. How many clips did Natalia sell altogether in April and May? Let's think step by step and output the final answer after \"####\""}], 10 | "ability": "math", 11 | "reward_model": { 12 | "style": "rule", 13 | "ground_truth": ["72"] 14 | }, 15 | } 16 | ``` 17 | -------------------------------------------------------------------------------- /lang_wm/verl/verl/utils/dataset/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .rl_dataset import RLHFDataset 16 | from .rm_dataset import RMDataset 17 | from .sft_dataset import SFTDataset 18 | -------------------------------------------------------------------------------- /lang_wm/verl/verl/utils/debug/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .performance import log_gpu_memory_usage -------------------------------------------------------------------------------- /lang_wm/verl/verl/utils/debug/performance.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import torch 16 | import torch.distributed as dist 17 | import logging 18 | 19 | 20 | def log_gpu_memory_usage(head: str, logger: logging.Logger = None, level=logging.DEBUG, rank: int = 0): 21 | if (not dist.is_initialized()) or (rank is None) or (dist.get_rank() == rank): 22 | memory_allocated = torch.cuda.memory_allocated() / 1024**3 23 | memory_reserved = torch.cuda.memory_reserved() / 1024**3 24 | 25 | message = f'{head}, memory allocated (GB): {memory_allocated}, memory reserved (GB): {memory_reserved}' 26 | 27 | if logger is None: 28 | print(message) 29 | else: 30 | logger.log(msg=message, level=level) 31 | -------------------------------------------------------------------------------- /lang_wm/verl/verl/utils/distributed.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Utilities for distributed training.""" 15 | import os 16 | 17 | 18 | def initialize_global_process_group(timeout_second=36000): 19 | import torch.distributed 20 | from datetime import timedelta 21 | torch.distributed.init_process_group('nccl', timeout=timedelta(seconds=timeout_second)) 22 | local_rank = int(os.environ["LOCAL_RANK"]) 23 | rank = int(os.environ["RANK"]) 24 | world_size = int(os.environ["WORLD_SIZE"]) 25 | 26 | if torch.distributed.is_initialized(): 27 | torch.cuda.set_device(local_rank) 28 | return local_rank, rank, world_size 29 | -------------------------------------------------------------------------------- /lang_wm/verl/verl/utils/import_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | Utilities to check if packages are available. 16 | We assume package availability won't change during runtime. 17 | """ 18 | 19 | from functools import cache 20 | from typing import List 21 | 22 | 23 | @cache 24 | def is_megatron_core_available(): 25 | try: 26 | from megatron.core import parallel_state as mpu 27 | return True 28 | except ImportError: 29 | return False 30 | 31 | 32 | @cache 33 | def is_vllm_available(): 34 | try: 35 | import vllm 36 | return True 37 | except ImportError: 38 | return False 39 | 40 | 41 | def import_external_libs(external_libs=None): 42 | if external_libs is None: 43 | return 44 | if not isinstance(external_libs, List): 45 | external_libs = [external_libs] 46 | import importlib 47 | for external_lib in external_libs: 48 | importlib.import_module(external_lib) 49 | -------------------------------------------------------------------------------- /lang_wm/verl/verl/utils/logger/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /lang_wm/verl/verl/utils/logging_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import logging 16 | import os 17 | import torch 18 | 19 | 20 | def set_basic_config(level): 21 | """ 22 | This function sets the global logging format and level. It will be called when import verl 23 | """ 24 | logging.basicConfig(format='%(levelname)s:%(asctime)s:%(message)s', level=level) 25 | 26 | 27 | def log_to_file(string): 28 | print(string) 29 | if os.path.isdir('logs'): 30 | with open(f'logs/log_{torch.distributed.get_rank()}', 'a+') as f: 31 | f.write(string + '\n') 32 | -------------------------------------------------------------------------------- /lang_wm/verl/verl/utils/megatron/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /lang_wm/verl/verl/utils/ray_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | Contains commonly used utilities for ray 16 | """ 17 | 18 | import ray 19 | 20 | import concurrent.futures 21 | 22 | 23 | def parallel_put(data_list, max_workers=None): 24 | 25 | def put_data(index, data): 26 | return index, ray.put(data) 27 | 28 | if max_workers is None: 29 | max_workers = min(len(data_list), 16) 30 | 31 | with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor: 32 | data_list_f = [executor.submit(put_data, i, data) for i, data in enumerate(data_list)] 33 | res_lst = [] 34 | for future in concurrent.futures.as_completed(data_list_f): 35 | res_lst.append(future.result()) 36 | 37 | # reorder based on index 38 | output = [None for _ in range(len(data_list))] 39 | for res in res_lst: 40 | index, data_ref = res 41 | output[index] = data_ref 42 | 43 | return output 44 | -------------------------------------------------------------------------------- /lang_wm/verl/verl/utils/rendezvous/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /lang_wm/verl/verl/utils/reward_score/geo3k.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import re 16 | from mathruler.grader import extract_boxed_content, grade_answer 17 | 18 | 19 | def format_reward(predict_str: str) -> float: 20 | pattern = re.compile(r'.*.*\\boxed\{.*\}.*', re.DOTALL) 21 | match_result = re.fullmatch(pattern, predict_str) 22 | return 1.0 if match_result else 0.0 23 | 24 | 25 | def acc_reward(predict_str: str, ground_truth: str) -> float: 26 | answer = extract_boxed_content(predict_str) 27 | return 1.0 if grade_answer(answer, ground_truth) else 0.0 28 | 29 | 30 | def compute_score(predict_str: str, ground_truth: str) -> float: 31 | return 0.9 * acc_reward(predict_str, ground_truth) + 0.1 * format_reward(predict_str) 32 | -------------------------------------------------------------------------------- /lang_wm/verl/verl/version/version: -------------------------------------------------------------------------------- 1 | 0.2.0.dev 2 | -------------------------------------------------------------------------------- /lang_wm/verl/verl/workers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /lang_wm/verl/verl/workers/actor/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .base import BasePPOActor 16 | from .dp_actor import DataParallelPPOActor 17 | 18 | __all__ = ["BasePPOActor", "DataParallelPPOActor"] 19 | -------------------------------------------------------------------------------- /lang_wm/verl/verl/workers/critic/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .base import BasePPOCritic 16 | from .dp_critic import DataParallelPPOCritic 17 | 18 | __all__ = ["BasePPOCritic", "DataParallelPPOCritic"] 19 | -------------------------------------------------------------------------------- /lang_wm/verl/verl/workers/critic/base.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | Base class for a critic 16 | """ 17 | from abc import ABC, abstractmethod 18 | 19 | import torch 20 | 21 | from verl import DataProto 22 | 23 | __all__ = ['BasePPOCritic'] 24 | 25 | 26 | class BasePPOCritic(ABC): 27 | 28 | def __init__(self, config): 29 | super().__init__() 30 | self.config = config 31 | 32 | @abstractmethod 33 | def compute_values(self, data: DataProto) -> torch.Tensor: 34 | """Compute values""" 35 | pass 36 | 37 | @abstractmethod 38 | def update_critic(self, data: DataProto): 39 | """Update the critic""" 40 | pass 41 | -------------------------------------------------------------------------------- /lang_wm/verl/verl/workers/reward_manager/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 PRIME team and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .naive import NaiveRewardManager 16 | from .prime import PrimeRewardManager -------------------------------------------------------------------------------- /lang_wm/verl/verl/workers/reward_model/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .base import BasePPORewardModel 16 | -------------------------------------------------------------------------------- /lang_wm/verl/verl/workers/reward_model/megatron/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .reward_model import MegatronRewardModel 16 | -------------------------------------------------------------------------------- /lang_wm/verl/verl/workers/rollout/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .base import BaseRollout 16 | from .naive import NaiveRollout 17 | from .hf_rollout import HFRollout 18 | 19 | __all__ = ["BaseRollout", "NaiveRollout", "HFRollout"] 20 | -------------------------------------------------------------------------------- /lang_wm/verl/verl/workers/rollout/base.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from abc import ABC, abstractmethod 16 | from typing import Iterable, Union 17 | 18 | from verl import DataProto 19 | 20 | __all__ = ['BaseRollout'] 21 | 22 | 23 | class BaseRollout(ABC): 24 | 25 | def __init__(self): 26 | """ 27 | 28 | Args: 29 | dataloader: an Iterable of TensorDict that consistently generates prompts. Note that the dataloader 30 | should handle when the training stops. 31 | """ 32 | super().__init__() 33 | 34 | @abstractmethod 35 | def generate_sequences(self, prompts: DataProto) -> DataProto: 36 | """Generate sequences""" 37 | pass 38 | -------------------------------------------------------------------------------- /lang_wm/verl/verl/workers/rollout/naive/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .naive_rollout import NaiveRollout 16 | -------------------------------------------------------------------------------- /lang_wm/verl/verl/workers/sharding_manager/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from verl.utils.import_utils import is_vllm_available, is_megatron_core_available 16 | 17 | from .base import BaseShardingManager 18 | from .fsdp_ulysses import FSDPUlyssesShardingManager 19 | 20 | AllGatherPPModel = None 21 | 22 | if is_megatron_core_available() and is_vllm_available(): 23 | from .megatron_vllm import AllGatherPPModel, MegatronVLLMShardingManager 24 | elif AllGatherPPModel is not None: 25 | pass 26 | else: 27 | AllGatherPPModel = None 28 | MegatronVLLMShardingManager = None 29 | 30 | if is_vllm_available(): 31 | from .fsdp_vllm import FSDPVLLMShardingManager 32 | else: 33 | FSDPVLLMShardingManager = None 34 | -------------------------------------------------------------------------------- /lang_wm/verl/verl/workers/sharding_manager/base.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | Sharding manager to implement HybridEngine 16 | """ 17 | 18 | from verl import DataProto 19 | 20 | 21 | class BaseShardingManager: 22 | 23 | def __enter__(self): 24 | pass 25 | 26 | def __exit__(self, exc_type, exc_value, traceback): 27 | pass 28 | 29 | def preprocess_data(self, data: DataProto) -> DataProto: 30 | return data 31 | 32 | def postprocess_data(self, data: DataProto) -> DataProto: 33 | return data 34 | -------------------------------------------------------------------------------- /lang_wm/webagent/agent/__init__.py: -------------------------------------------------------------------------------- 1 | # This file is from WMA project: 2 | # https://github.com/kyle8581/WMA-Agents 3 | 4 | from .agent import ( 5 | Agent, 6 | PromptAgent, 7 | TeacherForcingAgent, 8 | construct_agent, 9 | ) 10 | # from .world_model_agent import WMAgent 11 | 12 | __all__ = ["Agent", "TeacherForcingAgent", "PromptAgent", "construct_agent"] 13 | -------------------------------------------------------------------------------- /lang_wm/webagent/agent/prompts/__init__.py: -------------------------------------------------------------------------------- 1 | from .prompt_constructor import * 2 | -------------------------------------------------------------------------------- /lang_wm/webagent/agent/prompts/jsons/refine_tao.json: -------------------------------------------------------------------------------- 1 | { 2 | "intro": "Summarize the key changes in the web page based on the following information:\nNew items: {new_items}\nUpdated items: {updated_items}\nDeleted items: {deleted_items}\n\nWhen summarizing, follow these output format:\n1. [First key change]\n2. [Second key change]\n3. [Third key change]\n...\n10. [Tenth key change]", 3 | "examples":[], 4 | "template": "", 5 | "meta_data": { 6 | "keywords": [ 7 | "new_items", 8 | "updated_items", 9 | "deleted_items" 10 | ] 11 | } 12 | } -------------------------------------------------------------------------------- /lang_wm/webagent/agent/prompts/to_json.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import importlib 3 | import json 4 | import os 5 | 6 | 7 | # use the current directory as the root 8 | def run() -> None: 9 | """Convert all python files in agent/prompts to json files in agent/prompts/jsons 10 | 11 | Python files are easiser to edit 12 | """ 13 | for p_file in glob.glob(f"agent/prompts/raw/*.py"): 14 | # import the file as a module 15 | base_name = os.path.basename(p_file).replace(".py", "") 16 | module = importlib.import_module(f"agent.prompts.raw.{base_name}") 17 | prompt = module.prompt 18 | # save the prompt as a json file 19 | os.makedirs("agent/prompts/jsons", exist_ok=True) 20 | with open(f"agent/prompts/jsons/{base_name}.json", "w+") as f: 21 | json.dump(prompt, f, indent=2) 22 | print(f"Done convert python files to json") 23 | 24 | 25 | if __name__ == "__main__": 26 | run() 27 | -------------------------------------------------------------------------------- /lang_wm/webagent/browser_env/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thuml/RLVR-World/66bce39e195ea3521025d94bdd82eafe8823dc45/lang_wm/webagent/browser_env/py.typed -------------------------------------------------------------------------------- /lang_wm/webagent/browser_env/trajectory.py: -------------------------------------------------------------------------------- 1 | from typing import Union 2 | 3 | from .actions import Action 4 | from .utils import StateInfo 5 | 6 | Trajectory = list[Union[StateInfo, Action]] 7 | -------------------------------------------------------------------------------- /lang_wm/webagent/evaluation_harness/__init__.py: -------------------------------------------------------------------------------- 1 | # This file is from WMA project: 2 | # https://github.com/kyle8581/WMA-Agents 3 | 4 | from .evaluators import * 5 | from .helper_functions import ( 6 | get_query_text, 7 | get_query_text_lowercase, 8 | reddit_get_latest_comment_content_by_username, 9 | reddit_get_latest_comment_obj_by_username, 10 | reddit_get_parent_comment_username_of_latest_comment_by_username, 11 | shopping_get_latest_order_url, 12 | shopping_get_num_reviews, 13 | shopping_get_order_product_name_list, 14 | shopping_get_order_product_option, 15 | shopping_get_order_product_quantity, 16 | shopping_get_product_attributes, 17 | shopping_get_product_price, 18 | shopping_get_rating_as_percentage, 19 | shopping_get_sku_latest_review_author, 20 | shopping_get_sku_latest_review_rating, 21 | shopping_get_sku_latest_review_text, 22 | ) 23 | -------------------------------------------------------------------------------- /lang_wm/webagent/llms/__init__.py: -------------------------------------------------------------------------------- 1 | # This file is from WMA project: 2 | # https://github.com/kyle8581/WMA-Agents 3 | 4 | """This module is adapt from https://github.com/zeno-ml/zeno-build""" 5 | try: 6 | from .providers.gemini_utils import generate_from_gemini_completion 7 | except: 8 | print('Google Cloud not set up, skipping import of providers.gemini_utils.generate_from_gemini_completion') 9 | 10 | from .providers.hf_utils import generate_from_huggingface_completion 11 | from .providers.openai_utils import ( 12 | generate_from_openai_chat_completion, 13 | generate_from_openai_completion, 14 | ) 15 | from .utils import call_llm 16 | 17 | __all__ = [ 18 | "generate_from_openai_completion", 19 | "generate_from_openai_chat_completion", 20 | "generate_from_huggingface_completion", 21 | "generate_from_gemini_completion", 22 | "call_llm", 23 | ] 24 | -------------------------------------------------------------------------------- /lang_wm/webagent/llms/providers/hf_utils.py: -------------------------------------------------------------------------------- 1 | # This file is from WMA project: 2 | # https://github.com/kyle8581/WMA-Agents 3 | 4 | from text_generation import Client # type: ignore 5 | 6 | 7 | def generate_from_huggingface_completion( 8 | prompt: str, 9 | model_endpoint: str, 10 | temperature: float, 11 | top_p: float, 12 | max_new_tokens: int, 13 | stop_sequences: list[str] | None = None, 14 | ) -> str: 15 | client = Client(model_endpoint, timeout=60) 16 | generation: str = client.generate( 17 | prompt=prompt, 18 | temperature=temperature, 19 | top_p=top_p, 20 | max_new_tokens=max_new_tokens, 21 | stop_sequences=stop_sequences, 22 | ).generated_text 23 | 24 | return generation 25 | -------------------------------------------------------------------------------- /lang_wm/webagent/prepare.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # re-validate login information 3 | export DATASET=webarena 4 | mkdir -p ./.auth 5 | python -m browser_env.auto_login -------------------------------------------------------------------------------- /lang_wm/webagent/pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.mypy] 2 | explicit_package_bases = true 3 | exclude = [ 4 | "environment_docker" 5 | ] 6 | 7 | [[tool.mypy.overrides]] 8 | module = [ 9 | "setuptools.*", 10 | "pytest.*", 11 | "pytest_asyncio.*", 12 | "py.*", 13 | "munkres.*", 14 | "weave.*", 15 | "gradio_client.*", 16 | "datasets.*", 17 | "google.*", 18 | "vertexai.*", 19 | "transformers.*" 20 | ] 21 | ignore_missing_imports = true -------------------------------------------------------------------------------- /lang_wm/webagent/setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | name = webarena 3 | 4 | [tool.pytest.ini_options] 5 | testpaths = ["tests"] 6 | python_files = "test_*.py" 7 | 8 | [options.extras_require] 9 | dev = 10 | pre-commit==3.0.1 11 | pytest==7.1.2 12 | mypy==0.991 13 | nbmake 14 | pytest-asyncio 15 | types-requests 16 | 17 | [options] 18 | python_requires = >=3.7, <4 19 | packages = 20 | browser_env 21 | agent 22 | evaluation_harness 23 | llms 24 | [mypy] 25 | strict = true -------------------------------------------------------------------------------- /lang_wm/webagent/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | if __name__ == "__main__": 4 | setup() -------------------------------------------------------------------------------- /vid_wm/assets/vid_wm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thuml/RLVR-World/66bce39e195ea3521025d94bdd82eafe8823dc45/vid_wm/assets/vid_wm.png -------------------------------------------------------------------------------- /vid_wm/ivideogpt/.gitignore: -------------------------------------------------------------------------------- 1 | trm-eval 2 | eval_jsonl -------------------------------------------------------------------------------- /vid_wm/ivideogpt/configs/ctx_vae256/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "_class_name": "VQModel", 3 | "_diffusers_version": "0.25.0.dev0", 4 | "act_fn": "silu", 5 | "block_out_channels": [ 6 | 128, 7 | 256, 8 | 256, 9 | 512 10 | ], 11 | "down_block_types": [ 12 | "DownEncoderBlock2D", 13 | "DownEncoderBlock2D", 14 | "DownEncoderBlock2D", 15 | "DownEncoderBlock2D" 16 | ], 17 | "in_channels": 3, 18 | "latent_channels": 64, 19 | "layers_per_block": 2, 20 | "lookup_from_codebook": true, 21 | "mid_block_add_attention": false, 22 | "norm_num_groups": 32, 23 | "norm_type": "group", 24 | "vq_fsq_levels": 12, 25 | "out_channels": 3, 26 | "sample_size": 32, 27 | "scaling_factor": 0.18215, 28 | "up_block_types": [ 29 | "UpDecoderBlock2D", 30 | "UpDecoderBlock2D", 31 | "UpDecoderBlock2D", 32 | "UpDecoderBlock2D" 33 | ], 34 | "force_upcast": true, 35 | "dyn_fsq_levels": 12, 36 | "context_length": 1, 37 | "resolution": 256, 38 | "max_att_resolution": 32 39 | } -------------------------------------------------------------------------------- /vid_wm/ivideogpt/configs/ctx_vae64/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "_class_name": "VQModel", 3 | "_diffusers_version": "0.25.0.dev0", 4 | "act_fn": "silu", 5 | "block_out_channels": [ 6 | 128, 7 | 256, 8 | 512 9 | ], 10 | "down_block_types": [ 11 | "DownEncoderBlock2D", 12 | "DownEncoderBlock2D", 13 | "DownEncoderBlock2D" 14 | ], 15 | "in_channels": 3, 16 | "latent_channels": 64, 17 | "layers_per_block": 2, 18 | "lookup_from_codebook": true, 19 | "mid_block_add_attention": false, 20 | "norm_num_groups": 32, 21 | "norm_type": "group", 22 | "num_vq_embeddings": 8192, 23 | "out_channels": 3, 24 | "sample_size": 32, 25 | "scaling_factor": 0.18215, 26 | "up_block_types": [ 27 | "UpDecoderBlock2D", 28 | "UpDecoderBlock2D", 29 | "UpDecoderBlock2D" 30 | ], 31 | "vq_embed_dim": null, 32 | "force_upcast": true, 33 | "num_dyn_embeddings": 8192, 34 | "context_length": 1, 35 | "resolution": 64, 36 | "max_att_resolution": 16 37 | } -------------------------------------------------------------------------------- /vid_wm/ivideogpt/configs/vgpt/ctx_llama_small.json: -------------------------------------------------------------------------------- 1 | { 2 | "architectures": [ 3 | "LlamaModel" 4 | ], 5 | "vocab_size": 9008, 6 | "bos_token_id": 9006, 7 | "eos_token_id": 9007, 8 | "hidden_act": "silu", 9 | "hidden_size": 768, 10 | "initializer_range": 0.02, 11 | "intermediate_size": 3072, 12 | "max_length": 8192, 13 | "max_position_embeddings": 8192, 14 | "model_type": "llama", 15 | "num_attention_heads": 12, 16 | "num_hidden_layers": 12, 17 | "num_key_value_heads": 12, 18 | "rms_norm_eps": 1e-06, 19 | "tie_word_embeddings": false, 20 | "torch_dtype": "float16", 21 | "transformers_version": "4.32.0.dev0", 22 | "use_cache": true 23 | } -------------------------------------------------------------------------------- /vid_wm/ivideogpt/configs/vgpt/frac_action_ranges.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thuml/RLVR-World/66bce39e195ea3521025d94bdd82eafe8823dc45/vid_wm/ivideogpt/configs/vgpt/frac_action_ranges.pth -------------------------------------------------------------------------------- /vid_wm/ivideogpt/configs/vgpt/llama.json: -------------------------------------------------------------------------------- 1 | { 2 | "architectures": [ 3 | "LlamaModel" 4 | ], 5 | "vocab_size": 4633, 6 | "bos_token_id": 4631, 7 | "eos_token_id": 4632, 8 | "hidden_act": "silu", 9 | "hidden_size": 1024, 10 | "initializer_range": 0.02, 11 | "intermediate_size": 4096, 12 | "max_length": 8192, 13 | "max_position_embeddings": 8192, 14 | "model_type": "llama", 15 | "num_attention_heads": 16, 16 | "num_hidden_layers": 24, 17 | "num_key_value_heads": 16, 18 | "rms_norm_eps": 1e-06, 19 | "tie_word_embeddings": false, 20 | "torch_dtype": "float16", 21 | "transformers_version": "4.32.0.dev0", 22 | "use_cache": true 23 | } -------------------------------------------------------------------------------- /vid_wm/ivideogpt/configs/vgpt/llama_small.json: -------------------------------------------------------------------------------- 1 | { 2 | "architectures": [ 3 | "LlamaModel" 4 | ], 5 | "vocab_size": 4633, 6 | "bos_token_id": 4631, 7 | "eos_token_id": 4632, 8 | "hidden_act": "silu", 9 | "hidden_size": 768, 10 | "initializer_range": 0.02, 11 | "intermediate_size": 3072, 12 | "max_length": 8192, 13 | "max_position_embeddings": 8192, 14 | "model_type": "llama", 15 | "num_attention_heads": 12, 16 | "num_hidden_layers": 12, 17 | "num_key_value_heads": 12, 18 | "rms_norm_eps": 1e-06, 19 | "tie_word_embeddings": false, 20 | "torch_dtype": "float16", 21 | "transformers_version": "4.32.0.dev0", 22 | "use_cache": true 23 | } -------------------------------------------------------------------------------- /vid_wm/ivideogpt/ivideogpt/ctx_tokenizer/__init__.py: -------------------------------------------------------------------------------- 1 | from .compressive_vq_model_fsq import CompressiveVQModelFSQ 2 | -------------------------------------------------------------------------------- /vid_wm/ivideogpt/ivideogpt/data/__init__.py: -------------------------------------------------------------------------------- 1 | from .simple_dataloader import SimpleRoboticDataLoaderv2, EvalDataLoader 2 | from .dataset_mixes import DATASET_NAMED_MIXES 3 | -------------------------------------------------------------------------------- /vid_wm/ivideogpt/ivideogpt/tokenizer/__init__.py: -------------------------------------------------------------------------------- 1 | from .vq_model import CNNFSQModel256 -------------------------------------------------------------------------------- /vid_wm/ivideogpt/scripts/eval_multi_step_prediction.sh: -------------------------------------------------------------------------------- 1 | # using one GPU 2 | 3 | python eval_vgpt_multiturn.py --per_device_eval_batch_size 4 \ 4 | --config_name configs/vgpt/ctx_llama_small.json\ 5 | --dataset_path /dev/null \ 6 | --pretrained_model_name_or_path thuml/rt1-compressive-tokenizer \ 7 | --pretrained_transformer_path thuml/rt1-world-model-multi-step-rlvr \ 8 | --processor_type ctx_msp \ 9 | --output_jsonl eval_jsonl/vgpt_small_ctx_msp8_head12_fulleval_release.jsonl \ 10 | --max_decode_batchsize 1 \ 11 | --segment_length 8 \ 12 | --use_eval_dataset \ 13 | --max_eval_iters 400 \ 14 | --exp_name vgpt_small_ctx_msp8_head12_fulleval $@ -------------------------------------------------------------------------------- /vid_wm/ivideogpt/scripts/eval_policy.sh: -------------------------------------------------------------------------------- 1 | # using one GPU 2 | 3 | python eval_runenv.py --per_device_eval_batch_size 1 \ 4 | --config_name configs/vgpt/ctx_llama_small.json\ 5 | --dataset_path /dev/null \ 6 | --pretrained_model_name_or_path thuml/rt1-compressive-tokenizer \ 7 | --pretrained_transformer_path thuml/rt1-world-model-multi-step-rlvr \ 8 | --processor_type ctx_msp \ 9 | --max_decode_batchsize 1 \ 10 | --segment_length 8 \ 11 | --gpu_memory_utilization 0.75 \ 12 | --repetition_penalty 1.2 \ 13 | --output_dir policy_eval \ 14 | --policy_model_path pretrained_models/rt_1_tf_trained_for_000400120 \ 15 | --task_instruction "open middle drawer" $@ -------------------------------------------------------------------------------- /vid_wm/ivideogpt/scripts/eval_single_step_prediction.sh: -------------------------------------------------------------------------------- 1 | # using one GPU 2 | 3 | python eval_vgpt.py --per_device_eval_batch_size 4 \ 4 | --dataset_path /dev/null \ 5 | --pretrained_model_name_or_path thuml/rt1-frame-tokenizer \ 6 | --pretrained_transformer_path thuml/rt1-world-model-single-step-rlvr \ 7 | --processor_type simple \ 8 | --output_jsonl eval_jsonl/vgpt_small_multi1_head12_fulleval_release.jsonl \ 9 | --use_eval_dataset \ 10 | --max_eval_iters 400 \ 11 | --exp_name vgpt_small_multi1_head12_fulleval $@ -------------------------------------------------------------------------------- /vid_wm/ivideogpt/scripts/train_compressive_tokenizer.sh: -------------------------------------------------------------------------------- 1 | # using 16x40G A100 GPUs 2 | 3 | accelerate launch train_ctx_tokenizer.py \ 4 | --exp_name ctx_cnn_fsq12_frac_res320_seg8 \ 5 | --dataset_path /dev/null \ 6 | --train_batch_size 1 --gradient_accumulation_steps 1 --log_code_util \ 7 | --resolution 256 320 \ 8 | --output_dir vqgan-output \ 9 | --vae_loss l1 --disc_weight 0.1 --perc_weight 1.0 \ 10 | --start_global_step 0 --disc_start 10000 --max_train_steps 600000 \ 11 | --discr_learning_rate 5e-4 --learning_rate 5e-4 \ 12 | --disc_depth 6 --segment_length 8 $@ -------------------------------------------------------------------------------- /vid_wm/ivideogpt/scripts/train_multi_step_prediction.sh: -------------------------------------------------------------------------------- 1 | # using 8x40G A100 GPUs 2 | 3 | accelerate launch train_vgpt.py \ 4 | --per_device_train_batch_size 4 \ 5 | --config_name configs/vgpt/ctx_llama_small.json \ 6 | --dataset_path /dev/null \ 7 | --pretrained_model_name_or_path thuml/rt1-compressive-tokenizer \ 8 | --output_dir trm-output \ 9 | --skip_first_val \ 10 | --exp_name vgpt_small_ctx_msp8_head12 \ 11 | --processor_type ctx_msp \ 12 | --segment_length 8 $@ -------------------------------------------------------------------------------- /vid_wm/ivideogpt/scripts/train_perframe_tokenizer.sh: -------------------------------------------------------------------------------- 1 | # using 8x40G A100 GPUs 2 | 3 | accelerate launch train_tokenizer.py \ 4 | --exp_name cnn_fsq12_frac_res320_seg8 \ 5 | --dataset_path /dev/null \ 6 | --train_batch_size 2 --gradient_accumulation_steps 1 --log_code_util \ 7 | --resolution 256 320 --fsq_level 12 \ 8 | --output_dir vqgan-output \ 9 | --vae_loss l1 --disc_weight 0.1 --perc_weight 1.0 \ 10 | --start_global_step 0 --disc_start 10000 --max_train_steps 600000 \ 11 | --discr_learning_rate 5e-4 --learning_rate 5e-4 \ 12 | --disc_depth 6 --segment_length 8\ 13 | --checkpointing_steps 50000 $@ -------------------------------------------------------------------------------- /vid_wm/ivideogpt/scripts/train_single_step_prediction.sh: -------------------------------------------------------------------------------- 1 | # using 8x40G A100 GPUs 2 | 3 | accelerate launch train_vgpt.py \ 4 | --per_device_train_batch_size 4 \ 5 | --config_name configs/vgpt/llama_small.json \ 6 | --dataset_path /dev/null \ 7 | --pretrained_model_name_or_path thuml/rt1-frame-tokenizer \ 8 | --output_dir trm-output \ 9 | --skip_first_val \ 10 | --exp_name vgpt_small_multi1_head12 $@ -------------------------------------------------------------------------------- /vid_wm/requirements.txt: -------------------------------------------------------------------------------- 1 | diffusers==0.27.0 2 | imageio==2.9.0 3 | imageio-ffmpeg==0.4.4 4 | piqa==1.3.2 5 | scipy==1.13.0 6 | lpips==0.1.4 -------------------------------------------------------------------------------- /vid_wm/verl/.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | # Read the Docs configuration file 2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 3 | 4 | version: 2 5 | 6 | build: 7 | os: ubuntu-22.04 8 | tools: 9 | python: "3.11" 10 | rust: "1.70" 11 | 12 | sphinx: 13 | configuration: docs/conf.py 14 | 15 | python: 16 | install: 17 | - requirements: docs/requirements-docs.txt 18 | - method: pip 19 | path: . 20 | -------------------------------------------------------------------------------- /vid_wm/verl/.style.yapf: -------------------------------------------------------------------------------- 1 | [style] 2 | based_on_style = google 3 | column_limit = 120 4 | indent_width = 4 5 | split_arguments_when_comma_terminated: true -------------------------------------------------------------------------------- /vid_wm/verl/Notice.txt: -------------------------------------------------------------------------------- 1 | Copyright 2023-2024 Bytedance Ltd. and/or its affiliates -------------------------------------------------------------------------------- /vid_wm/verl/docker/Dockerfile.megatron: -------------------------------------------------------------------------------- 1 | FROM verlai/verl:vemlp-th2.4.0-cu124-vllm0.6.3-ray2.10-te1.7-v0.0.3 2 | 3 | RUN pip install git+https://github.com/NVIDIA/TransformerEngine.git@stable 4 | 5 | RUN cd /opt/nvidia && git clone --single-branch --branch core_r0.11.0 https://github.com/NVIDIA/Megatron-LM.git Megatron-LM 6 | 7 | # only config pip index with https://pypi.tuna.tsinghua.edu.cn/simple if needed 8 | # unset for now 9 | RUN cd /opt/nvidia/Megatron-LM && pip3 install --no-deps -e . -------------------------------------------------------------------------------- /vid_wm/verl/docker/Dockerfile.rocm: -------------------------------------------------------------------------------- 1 | # Build the docker in the repo dir: 2 | # docker build -f docker/Dockerfile.rocm -t verl-rocm:03.04.2015 . 3 | # docker images # you can find your built docker 4 | 5 | 6 | FROM rocm/vllm:rocm6.2_mi300_ubuntu20.04_py3.9_vllm_0.6.4 7 | 8 | # Set working directory 9 | # WORKDIR $PWD/app 10 | 11 | # Set environment variables 12 | ENV PYTORCH_ROCM_ARCH="gfx90a;gfx942" 13 | 14 | # Install vllm 15 | RUN pip uninstall -y vllm && \ 16 | rm -rf vllm && \ 17 | git clone -b v0.6.3 https://github.com/vllm-project/vllm.git && \ 18 | cd vllm && \ 19 | MAX_JOBS=$(nproc) python3 setup.py install && \ 20 | cd .. && \ 21 | rm -rf vllm 22 | 23 | # Copy the entire project directory 24 | COPY . . 25 | 26 | # Install dependencies 27 | RUN pip install "tensordict<0.6" --no-deps && \ 28 | pip install accelerate \ 29 | codetiming \ 30 | datasets \ 31 | dill \ 32 | hydra-core \ 33 | liger-kernel \ 34 | numpy \ 35 | pandas \ 36 | peft \ 37 | "pyarrow>=15.0.0" \ 38 | pylatexenc \ 39 | "ray[data,train,tune,serve]" \ 40 | torchdata \ 41 | transformers \ 42 | wandb \ 43 | orjson \ 44 | pybind11 && \ 45 | pip install -e . --no-deps -------------------------------------------------------------------------------- /vid_wm/verl/docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SPHINXPROJ = verl 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /vid_wm/verl/docs/README.md: -------------------------------------------------------------------------------- 1 | # verl documents 2 | 3 | ## Build the docs 4 | 5 | ```bash 6 | # Install dependencies. 7 | pip install -r requirements-docs.txt 8 | 9 | # Build the docs. 10 | make clean 11 | make html 12 | ``` 13 | 14 | ## Open the docs with your browser 15 | 16 | ```bash 17 | python -m http.server -d _build/html/ 18 | ``` 19 | Launch your browser and navigate to http://localhost:8000 to view the documentation. -------------------------------------------------------------------------------- /vid_wm/verl/docs/_static/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thuml/RLVR-World/66bce39e195ea3521025d94bdd82eafe8823dc45/vid_wm/verl/docs/_static/logo.png -------------------------------------------------------------------------------- /vid_wm/verl/docs/advance/placement.rst: -------------------------------------------------------------------------------- 1 | Ray API Design Tutorial 2 | ======================================= 3 | 4 | We provide a tutorial for our Ray API design, including: 5 | 6 | - Ray basic concepts 7 | - Resource Pool and RayWorkerGroup 8 | - Data Dispatch, Execution and Collection 9 | - Initialize the RayWorkerGroup and execute the distributed computation in the given Resource Pool 10 | 11 | See details in `tutorial.ipynb `_. -------------------------------------------------------------------------------- /vid_wm/verl/docs/requirements-docs.txt: -------------------------------------------------------------------------------- 1 | # markdown suport 2 | recommonmark 3 | # markdown table suport 4 | sphinx-markdown-tables 5 | 6 | # theme default rtd 7 | 8 | # crate-docs-theme 9 | sphinx-rtd-theme 10 | 11 | # pin tokenizers version to avoid env_logger version req 12 | tokenizers==0.19.1 13 | -------------------------------------------------------------------------------- /vid_wm/verl/examples/generation/run_deepseek7b_mutli_node.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | data_path=$HOME/data/rlhf/gsm8k/test.parquet 4 | save_path=$HOME/data/rlhf/math/deepseek_v2_lite_gen_test.parquet 5 | model_path=deepseek-ai/deepseek-llm-7b-chat 6 | 7 | python3 -m verl.trainer.main_generation \ 8 | trainer.nnodes=2 \ 9 | trainer.n_gpus_per_node=8 \ 10 | data.path=$data_path \ 11 | data.prompt_key=prompt \ 12 | data.n_samples=1 \ 13 | data.output_path=$save_path \ 14 | model.path=$model_path\ 15 | +model.trust_remote_code=True \ 16 | rollout.temperature=1.0 \ 17 | rollout.top_k=50 \ 18 | rollout.top_p=0.7 \ 19 | rollout.prompt_length=2048 \ 20 | rollout.response_length=1024 \ 21 | rollout.tensor_model_parallel_size=16 \ 22 | rollout.gpu_memory_utilization=0.8 23 | -------------------------------------------------------------------------------- /vid_wm/verl/examples/generation/run_deepseek_v2_lite_math.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | data_path=$HOME/data/rlhf/gsm8k/test.parquet 4 | save_path=$HOME/data/rlhf/math/deepseek_v2_lite_gen_test.parquet 5 | model_path=deepseek-ai/deepseek-llm-7b-chat 6 | 7 | python3 -m verl.trainer.main_generation \ 8 | trainer.nnodes=1 \ 9 | trainer.n_gpus_per_node=8 \ 10 | data.path=$data_path \ 11 | data.prompt_key=prompt \ 12 | data.n_samples=1 \ 13 | data.output_path=$save_path \ 14 | model.path=$model_path \ 15 | +model.trust_remote_code=True \ 16 | rollout.temperature=1.0 \ 17 | rollout.top_k=50 \ 18 | rollout.top_p=0.7 \ 19 | rollout.prompt_length=2048 \ 20 | rollout.response_length=1024 \ 21 | rollout.tensor_model_parallel_size=2 \ 22 | rollout.gpu_memory_utilization=0.8 23 | -------------------------------------------------------------------------------- /vid_wm/verl/examples/sft/gsm8k/run_deepseek_6b7.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | if [ "$#" -lt 2 ]; then 4 | echo "Usage: run_deepseek_6b7.sh [other_configs...]" 5 | exit 1 6 | fi 7 | 8 | nproc_per_node=$1 9 | save_path=$2 10 | 11 | # Shift the arguments so $@ refers to the rest 12 | shift 2 13 | 14 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \ 15 | -m verl.trainer.fsdp_sft_trainer \ 16 | data.train_files=$HOME/data/gsm8k/train.parquet \ 17 | data.val_files=$HOME/data/gsm8k/test.parquet \ 18 | data.prompt_key=extra_info \ 19 | data.response_key=extra_info \ 20 | +data.prompt_dict_keys=['question'] \ 21 | +data.response_dict_keys=['answer'] \ 22 | data.micro_batch_size_per_gpu=4 \ 23 | model.partial_pretrain=deepseek-ai/deepseek-coder-6.7b-instruct \ 24 | trainer.default_local_dir=$save_path \ 25 | trainer.project_name=gsm8k-sft \ 26 | trainer.experiment_name=gsm8k-sft-deepseek-coder-6.7b-instruct \ 27 | trainer.total_epochs=4 \ 28 | trainer.logger=['console','wandb'] \ 29 | trainer.default_hdfs_dir=null $@ -------------------------------------------------------------------------------- /vid_wm/verl/examples/sft/gsm8k/run_gemma_2b.sh: -------------------------------------------------------------------------------- 1 | # Tested with 2 & 4 GPUs 2 | 3 | set -x 4 | 5 | if [ "$#" -lt 2 ]; then 6 | echo "Usage: run_gemma_2b.sh [other_configs...]" 7 | exit 1 8 | fi 9 | 10 | nproc_per_node=$1 11 | save_path=$2 12 | 13 | # Shift the arguments so $@ refers to the rest 14 | shift 2 15 | 16 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \ 17 | -m verl.trainer.fsdp_sft_trainer \ 18 | data.train_files=$HOME/data/gsm8k/train.parquet \ 19 | data.val_files=$HOME/data/gsm8k/test.parquet \ 20 | data.prompt_key=extra_info \ 21 | data.response_key=extra_info \ 22 | +data.prompt_dict_keys=['question'] \ 23 | +data.response_dict_keys=['answer'] \ 24 | data.micro_batch_size_per_gpu=4 \ 25 | model.partial_pretrain=google/gemma-2b-it \ 26 | trainer.default_local_dir=$save_path \ 27 | trainer.project_name=gsm8k-sft \ 28 | trainer.experiment_name=gsm8k-sft-gemma-2b-it \ 29 | trainer.total_epochs=2 \ 30 | trainer.logger=['console','wandb'] \ 31 | trainer.default_hdfs_dir=null $@ -------------------------------------------------------------------------------- /vid_wm/verl/examples/sft/gsm8k/run_gemma_7b.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | if [ "$#" -lt 2 ]; then 4 | echo "Usage: run_gemma_7b.sh [other_configs...]" 5 | exit 1 6 | fi 7 | 8 | nproc_per_node=$1 9 | save_path=$2 10 | 11 | # Shift the arguments so $@ refers to the rest 12 | shift 2 13 | 14 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \ 15 | -m verl.trainer.fsdp_sft_trainer \ 16 | data.train_files=$HOME/data/gsm8k/train.parquet \ 17 | data.val_files=$HOME/data/gsm8k/test.parquet \ 18 | data.prompt_key=prompt \ 19 | data.response_key=answer \ 20 | data.micro_batch_size_per_gpu=4 \ 21 | model.partial_pretrain=google/gemma-1.1-7b-it \ 22 | trainer.default_local_dir=$save_path \ 23 | trainer.project_name=gsm8k-sft \ 24 | trainer.experiment_name=gsm8k-sft-gemma-1.1-7b-it \ 25 | trainer.total_epochs=4 \ 26 | trainer.logger=['console','wandb'] \ 27 | trainer.default_hdfs_dir=null $@ -------------------------------------------------------------------------------- /vid_wm/verl/examples/sft/gsm8k/run_qwen_05_peft.sh: -------------------------------------------------------------------------------- 1 | # Tested with 2 & 4 GPUs 2 | 3 | set -x 4 | 5 | if [ "$#" -lt 2 ]; then 6 | echo "Usage: run_qwen_05_peft.sh [other_configs...]" 7 | exit 1 8 | fi 9 | 10 | nproc_per_node=$1 11 | save_path=$2 12 | 13 | # Shift the arguments so $@ refers to the rest 14 | shift 2 15 | 16 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \ 17 | -m verl.trainer.fsdp_sft_trainer \ 18 | data.train_files=$HOME/data/gsm8k/train.parquet \ 19 | data.val_files=$HOME/data/gsm8k/test.parquet \ 20 | data.prompt_key=extra_info \ 21 | data.response_key=extra_info \ 22 | optim.lr=1e-4 \ 23 | +data.prompt_dict_keys=['question'] \ 24 | +data.response_dict_keys=['answer'] \ 25 | data.micro_batch_size_per_gpu=4 \ 26 | model.partial_pretrain=Qwen/Qwen2.5-0.5B-Instruct \ 27 | trainer.default_local_dir=$save_path \ 28 | trainer.project_name=gsm8k-sft \ 29 | trainer.experiment_name=gsm8k-sft-qwen-2.5-0.5b-instruct \ 30 | trainer.logger=['console'] \ 31 | trainer.total_epochs=1 \ 32 | trainer.default_hdfs_dir=null $@ \ 33 | model.lora_rank=32\ 34 | model.lora_alpha=16 \ 35 | model.target_modules=all-linear 36 | 37 | # Or you can do this: 38 | # model.target_modules=[q_proj,v_proj] \ 39 | -------------------------------------------------------------------------------- /vid_wm/verl/examples/sft/gsm8k/run_qwen_05_sp2.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | if [ "$#" -lt 2 ]; then 4 | echo "Usage: run_qwen_05_sp2.sh [other_configs...]" 5 | exit 1 6 | fi 7 | 8 | nproc_per_node=$1 9 | save_path=$2 10 | 11 | # Shift the arguments so $@ refers to the rest 12 | shift 2 13 | 14 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \ 15 | -m verl.trainer.fsdp_sft_trainer \ 16 | data.train_files=$HOME/data/gsm8k/train.parquet \ 17 | data.val_files=$HOME/data/gsm8k/test.parquet \ 18 | data.prompt_key=extra_info \ 19 | data.response_key=extra_info \ 20 | optim.lr=1e-4 \ 21 | +data.prompt_dict_keys=['question'] \ 22 | +data.response_dict_keys=['answer'] \ 23 | data.micro_batch_size=4 \ 24 | model.partial_pretrain=Qwen/Qwen2.5-0.5B-Instruct \ 25 | trainer.default_local_dir=$save_path \ 26 | trainer.project_name=gsm8k-sft \ 27 | trainer.experiment_name=gsm8k-sft-qwen-2.5-0.5b-instruct-sp2 \ 28 | trainer.logger=['console'] \ 29 | trainer.total_training_steps=1 \ 30 | trainer.default_hdfs_dir=null $@ \ 31 | ulysses_sequence_parallel_size=2 \ 32 | use_remove_padding=true 33 | -------------------------------------------------------------------------------- /vid_wm/verl/examples/sft/gsm8k/run_qwen_05_sp2_liger.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | if [ "$#" -lt 2 ]; then 4 | echo "Usage: run_qwen_05_sp2.sh [other_configs...]" 5 | exit 1 6 | fi 7 | 8 | nproc_per_node=$1 9 | save_path=$2 10 | 11 | # Shift the arguments so $@ refers to the rest 12 | shift 2 13 | 14 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \ 15 | -m verl.trainer.fsdp_sft_trainer \ 16 | data.train_files=$HOME/data/gsm8k/train.parquet \ 17 | data.val_files=$HOME/data/gsm8k/test.parquet \ 18 | data.prompt_key=extra_info \ 19 | data.response_key=extra_info \ 20 | optim.lr=1e-4 \ 21 | +data.prompt_dict_keys=['question'] \ 22 | +data.response_dict_keys=['answer'] \ 23 | data.micro_batch_size=4 \ 24 | model.partial_pretrain=Qwen/Qwen2.5-0.5B-Instruct \ 25 | model.use_liger=True \ 26 | trainer.default_local_dir=$save_path \ 27 | trainer.project_name=gsm8k-sft \ 28 | trainer.experiment_name=gsm8k-sft-qwen-2.5-0.5b-instruct-sp2-liger \ 29 | trainer.logger=['console'] \ 30 | trainer.default_hdfs_dir=null $@ \ 31 | ulysses_sequence_parallel_size=2 \ 32 | use_remove_padding=true 33 | -------------------------------------------------------------------------------- /vid_wm/verl/examples/sft/multiturn/run_qwen_05_sp2.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -x 3 | 4 | if [ "$#" -lt 2 ]; then 5 | echo "Usage: run_qwen_05_sp2.sh [other_configs...]" 6 | exit 1 7 | fi 8 | 9 | nproc_per_node=$1 10 | save_path=$2 11 | 12 | # Shift the arguments so $@ refers to the rest 13 | shift 2 14 | 15 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \ 16 | -m verl.trainer.fsdp_sft_trainer \ 17 | data.train_files=$HOME/data/multiturn/train.parquet \ 18 | data.val_files=$HOME/data/multiturn/test.parquet \ 19 | data.multiturn.enable=true \ 20 | data.multiturn.messages_key=messages \ 21 | data.micro_batch_size=4 \ 22 | model.partial_pretrain=Qwen/Qwen2.5-0.5B-Instruct \ 23 | trainer.default_local_dir=$save_path \ 24 | trainer.project_name=multiturn-sft \ 25 | trainer.experiment_name=multiturn-sft-qwen-2.5-0.5b-instruct-sp2 \ 26 | trainer.logger=['console'] \ 27 | trainer.total_training_steps=1 \ 28 | trainer.default_hdfs_dir=null $@ \ 29 | ulysses_sequence_parallel_size=2 \ 30 | use_remove_padding=true -------------------------------------------------------------------------------- /vid_wm/verl/examples/sft/videogpt/run.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | if [ "$#" -lt 2 ]; then 4 | echo "Usage: run.sh [other_configs...]" 5 | exit 1 6 | fi 7 | 8 | nproc_per_node=$1 9 | save_path=$2 10 | 11 | # Shift the arguments so $@ refers to the rest 12 | shift 2 13 | 14 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \ 15 | -m verl.trainer.fsdp_vgpt_trainer \ 16 | data.train_batch_size=16 \ 17 | data.micro_batch_size_per_gpu=2 \ 18 | model.tokenizer_path=ivideogpt/pretrained_models/checkpoint-tokenizer400000 \ 19 | trainer.total_training_steps=1000000 \ 20 | trainer.default_local_dir=$save_path \ 21 | trainer.project_name=vgpt-pt \ 22 | trainer.experiment_name=vgpt-pt \ 23 | trainer.logger=['console','wandb'] \ 24 | trainer.default_hdfs_dir=null $@ -------------------------------------------------------------------------------- /vid_wm/verl/ivideogpt/configs/frac_action_ranges.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thuml/RLVR-World/66bce39e195ea3521025d94bdd82eafe8823dc45/vid_wm/verl/ivideogpt/configs/frac_action_ranges.pth -------------------------------------------------------------------------------- /vid_wm/verl/ivideogpt/configs/llama.json: -------------------------------------------------------------------------------- 1 | { 2 | "architectures": [ 3 | "LlamaModel" 4 | ], 5 | "vocab_size": 4633, 6 | "bos_token_id": 4631, 7 | "eos_token_id": 4632, 8 | "hidden_act": "silu", 9 | "hidden_size": 1024, 10 | "initializer_range": 0.02, 11 | "intermediate_size": 4096, 12 | "max_length": 8192, 13 | "max_position_embeddings": 8192, 14 | "model_type": "llama", 15 | "num_attention_heads": 16, 16 | "num_hidden_layers": 24, 17 | "num_key_value_heads": 16, 18 | "rms_norm_eps": 1e-06, 19 | "tie_word_embeddings": false, 20 | "torch_dtype": "float16", 21 | "transformers_version": "4.32.0.dev0", 22 | "use_cache": true 23 | } -------------------------------------------------------------------------------- /vid_wm/verl/ivideogpt/configs/llama_small.json: -------------------------------------------------------------------------------- 1 | { 2 | "architectures": [ 3 | "LlamaModel" 4 | ], 5 | "vocab_size": 4633, 6 | "bos_token_id": 4631, 7 | "eos_token_id": 4632, 8 | "hidden_act": "silu", 9 | "hidden_size": 768, 10 | "initializer_range": 0.02, 11 | "intermediate_size": 3072, 12 | "max_length": 8192, 13 | "max_position_embeddings": 8192, 14 | "model_type": "llama", 15 | "num_attention_heads": 12, 16 | "num_hidden_layers": 12, 17 | "num_key_value_heads": 12, 18 | "rms_norm_eps": 1e-06, 19 | "tie_word_embeddings": false, 20 | "torch_dtype": "float16", 21 | "transformers_version": "4.32.0.dev0", 22 | "use_cache": true 23 | } -------------------------------------------------------------------------------- /vid_wm/verl/ivideogpt/data/__init__.py: -------------------------------------------------------------------------------- 1 | from .simple_dataloader import SimpleRoboticDataLoaderv2, EvalDataLoader 2 | from .dataset_mixes import DATASET_NAMED_MIXES 3 | -------------------------------------------------------------------------------- /vid_wm/verl/ivideogpt/scripts/summarize_action_ranges.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import os 3 | import numpy as np 4 | from tqdm import tqdm 5 | import torch 6 | 7 | root_path = '/home/NAS/rl_data/frame_action_datasets/fractal20220817_data/' 8 | files = glob.glob(os.path.join(root_path, '*.npz')) 9 | 10 | max_actions = np.ones((13, ), dtype=np.float32) * -1e5 11 | min_actions = np.ones((13, ), dtype=np.float32) * 1e5 12 | 13 | for file in tqdm(files): 14 | data = np.load(file) 15 | actions = data['action'] 16 | max_actions = np.maximum(max_actions, actions.max(axis=0)) 17 | min_actions = np.minimum(min_actions, actions.min(axis=0)) 18 | 19 | action_ranges = np.stack([min_actions, max_actions], axis=1) 20 | action_ranges = torch.from_numpy(action_ranges).float() 21 | torch.save(action_ranges, 'ivideogpt/configs/frac_action_ranges.pth') -------------------------------------------------------------------------------- /vid_wm/verl/ivideogpt/tokenizer/__init__.py: -------------------------------------------------------------------------------- 1 | from .vq_model import CNNFSQModel256 2 | from ivideogpt.ctx_tokenizer.compressive_vq_model import CompressiveVQModelFSQ 3 | 4 | TOKENIZER = { 5 | "cnn": CNNFSQModel256, 6 | "ctx_cnn": CompressiveVQModelFSQ, 7 | } -------------------------------------------------------------------------------- /vid_wm/verl/recipe/dapo/prepare_dapo_data.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -uxo pipefail 3 | 4 | export VERL_HOME=${VERL_HOME:-"${HOME}/verl"} 5 | export TRAIN_FILE=${TRAIN_FILE:-"${VERL_HOME}/data/dapo-math-17k.parquet"} 6 | export TEST_FILE=${TEST_FILE:-"${VERL_HOME}/data/aime-2024.parquet"} 7 | 8 | mkdir -p "${VERL_HOME}/data" 9 | 10 | wget -O "${TRAIN_FILE}" "https://huggingface.co/datasets/BytedTsinghua-SIA/DAPO-Math-17k/resolve/main/data/dapo-math-17k.parquet?download=true" 11 | 12 | wget -O "${TEST_FILE}" "https://huggingface.co/datasets/BytedTsinghua-SIA/AIME-2024/resolve/main/data/aime-2024.parquet?download=true" -------------------------------------------------------------------------------- /vid_wm/verl/recipe/prime/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 PRIME team and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. -------------------------------------------------------------------------------- /vid_wm/verl/recipe/r1/README.md: -------------------------------------------------------------------------------- 1 | # DeepSeek R1 Reproduction 2 | 3 | This recipe is under development, if you are interested, checkout the TODO list and join this project! https://github.com/volcengine/verl/issues/708 4 | 5 | ## Reproducing Evaluation 6 | 7 | Eval Results of DS-R1-Distill-Qwen2.5-1.5B (k=8) 8 | 9 | Dataset | Test Results | Reported 10 | -- | -- | -- 11 | GPQA Diamond | 35.3 | 33.8 12 | LiveCodeBench | 16.9 | 16.9 13 | AIME 2024 | 30.4 | 28.9 14 | CNMO 2024 (en) | 45.1 | - 15 | CNMO 2024 (zh) | 41.0 | - 16 | 17 | --- 18 | 19 | Eval Results (DS-R1) 20 | 21 | Dataset | Test Results (k=1) | Test Results (k=4) | Reported 22 | -- | -- | -- | -- 23 | GPQA Diamond | 67.7 | 69.6 | 71.5 24 | LiveCodeBench | 64.7 | 63.1 | 65.9 25 | AIME 2024 | 86.7 | 79.2 | 79.8 26 | CNMO 2024 | 75.0 | 78.5 | 78.8 27 | -------------------------------------------------------------------------------- /vid_wm/verl/recipe/r1/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /vid_wm/verl/recipe/r1/config/evaluation.yaml: -------------------------------------------------------------------------------- 1 | data: 2 | path: /tmp/math_Qwen2-7B-Instruct.parquet 3 | prompt_key: prompt 4 | response_key: responses 5 | data_source_key: data_source 6 | reward_model_key: reward_model 7 | 8 | custom_reward_function: 9 | path: null 10 | name: compute_score 11 | -------------------------------------------------------------------------------- /vid_wm/verl/recipe/r1/reward_score.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | def reward_func(data_source, solution_str, ground_truth, extra_info=None): 17 | if data_source in ['Maxwell-Jia/AIME_2024', "opencompass/cnmo2024_en", "opencompass/cnmo2024_zh"]: 18 | from recipe.r1.tasks import math 19 | return math.compute_score(solution_str, ground_truth) 20 | elif data_source == 'Idavidrein/gpqa': 21 | from recipe.r1.tasks import gpqa 22 | return gpqa.compute_score(solution_str, ground_truth) 23 | elif data_source in ['livecodebench/code_generation_lite', 'livecodebench/code_generation']: 24 | from recipe.r1.tasks import livecodebench 25 | return livecodebench.compute_score(solution_str, ground_truth) 26 | else: 27 | raise NotImplementedError 28 | -------------------------------------------------------------------------------- /vid_wm/verl/recipe/r1/run_r1_distill_qwen.sh: -------------------------------------------------------------------------------- 1 | MODEL_PATH=Qwen/DeepSeek-R1-Distill-Qwen-1.5B 2 | DATA_PATH=/workspace/datasets/r1_bench 3 | 4 | # Eval Data Process 5 | python3 -m recipe.r1.data_process \ 6 | --local_dir $DATA_PATH \ 7 | --tasks all 8 | 9 | # Generation 10 | python3 -m verl.trainer.main_generation \ 11 | trainer.nnodes=1 \ 12 | trainer.n_gpus_per_node=8 \ 13 | data.path=$DATA_PATH/test.parquet \ 14 | data.prompt_key=prompt \ 15 | data.batch_size=1024 \ 16 | data.n_samples=8 \ 17 | data.output_path=$DATA_PATH/test-output-8.parquet \ 18 | model.path=$MODEL_PATH \ 19 | rollout.temperature=0.6 \ 20 | rollout.top_p=0.95 \ 21 | rollout.prompt_length=1024 \ 22 | rollout.response_length=32768 \ 23 | rollout.tensor_model_parallel_size=1 \ 24 | rollout.gpu_memory_utilization=0.9 \ 25 | rollout.max_num_batched_tokens=65536 26 | 27 | # Evaluation 28 | python3 -m recipe.r1.main_eval \ 29 | data.path=$DATA_PATH/test-output-8.parquet \ 30 | data.prompt_key=prompt \ 31 | data.response_key=responses \ 32 | custom_reward_function.path=recipe/r1/reward_score.py \ 33 | custom_reward_function.name=reward_func 34 | -------------------------------------------------------------------------------- /vid_wm/verl/recipe/r1/tasks/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /vid_wm/verl/recipe/r1/tasks/gpqa.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import re 16 | 17 | # Extraction Template from https://github.com/openai/simple-evals/blob/90e3e821cabba2aeb6be651dcb662b253df04225/common.py#L25 18 | ANSWER_PATTERN_MULTICHOICE = r"(?i)Answer[ \t]*:[ \t]*\$?([A-D])\$?" 19 | 20 | 21 | def compute_score(solution_str, ground_truth) -> float: 22 | match = re.search(ANSWER_PATTERN_MULTICHOICE, solution_str) 23 | extracted_answer = match.group(1) if match else None 24 | score = 1.0 if extracted_answer == ground_truth else 0.0 25 | return score 26 | -------------------------------------------------------------------------------- /vid_wm/verl/recipe/r1/tasks/math.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | try: 16 | from math_verify.metric import math_metric 17 | from math_verify.parser import LatexExtractionConfig, ExprExtractionConfig 18 | except ImportError: 19 | print("To use Math-Verify, please install it first by running `pip install math-verify`.") 20 | 21 | 22 | def compute_score(model_output: str, ground_truth: str) -> bool: 23 | verify_func = math_metric( 24 | gold_extraction_target=(LatexExtractionConfig(),), 25 | pred_extraction_target=(ExprExtractionConfig(), LatexExtractionConfig()), 26 | ) 27 | ret_score = 0. 28 | 29 | # Wrap the ground truth in \boxed{} format for verification 30 | ground_truth_boxed = "\\boxed{" + ground_truth + "}" 31 | try: 32 | ret_score, _ = verify_func([ground_truth_boxed], [model_output]) 33 | except Exception as e: 34 | pass 35 | 36 | return ret_score 37 | -------------------------------------------------------------------------------- /vid_wm/verl/requirements.txt: -------------------------------------------------------------------------------- 1 | # requirements.txt records the full set of dependencies for development 2 | accelerate 3 | codetiming 4 | datasets 5 | dill 6 | flash-attn 7 | hydra-core 8 | liger-kernel 9 | numpy 10 | pandas 11 | datasets 12 | peft 13 | pyarrow>=15.0.0 14 | pybind11 15 | pylatexenc 16 | pylint==3.3.6 17 | ray[default] 18 | tensordict<=0.6.2 19 | torchdata 20 | transformers 21 | # vllm==0.6.3.post1 22 | wandb 23 | -------------------------------------------------------------------------------- /vid_wm/verl/requirements_sglang.txt: -------------------------------------------------------------------------------- 1 | # requirements.txt records the full set of dependencies for development 2 | accelerate 3 | codetiming 4 | datasets 5 | dill 6 | flash-attn 7 | hydra-core 8 | numpy 9 | pandas 10 | datasets 11 | peft 12 | pyarrow>=15.0.0 13 | pybind11 14 | pylatexenc 15 | ray[default]>=2.10 16 | tensordict<=0.6.2 17 | torchdata 18 | torchvision 19 | transformers 20 | wandb 21 | sglang[all]==0.4.4.post3 22 | torch-memory-saver>=0.0.5 -------------------------------------------------------------------------------- /vid_wm/verl/scripts/format.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | pip3 install --upgrade yapf 3 | python3 -m yapf -ir -vv --style ./.style.yapf verl tests examples recipe scripts 4 | -------------------------------------------------------------------------------- /vid_wm/verl/tests/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. -------------------------------------------------------------------------------- /vid_wm/verl/tests/distributed/run_all.sh: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | #!/usr/bin/env bash 16 | 17 | set -e -x 18 | torchrun --nproc-per-node=4 --standalone tests/distributed/test_tensor_dict.py -------------------------------------------------------------------------------- /vid_wm/verl/tests/e2e/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /vid_wm/verl/tests/e2e/arithmetic_sequence/model/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "architectures": [ 3 | "LlamaForCausalLM" 4 | ], 5 | "attention_bias": false, 6 | "attention_dropout": 0.0, 7 | "bos_token_id": null, 8 | "eos_token_id": 1, 9 | "hidden_act": "silu", 10 | "hidden_size": 128, 11 | "initializer_range": 0.02, 12 | "intermediate_size": 344, 13 | "max_position_embeddings": 2048, 14 | "mlp_bias": false, 15 | "model_type": "llama", 16 | "num_attention_heads": 4, 17 | "num_hidden_layers": 4, 18 | "num_key_value_heads": 4, 19 | "pad_token_id": 2, 20 | "pretraining_tp": 1, 21 | "rms_norm_eps": 1e-06, 22 | "rope_scaling": null, 23 | "rope_theta": 10000.0, 24 | "tie_word_embeddings": false, 25 | "torch_dtype": "bfloat16", 26 | "transformers_version": "4.43.3", 27 | "use_cache": true, 28 | "vocab_size": 16 29 | } 30 | -------------------------------------------------------------------------------- /vid_wm/verl/tests/e2e/arithmetic_sequence/model/generation_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "_from_model_config": true, 3 | "eos_token_id": 1, 4 | "pad_token_id": 2, 5 | "transformers_version": "4.43.3" 6 | } 7 | -------------------------------------------------------------------------------- /vid_wm/verl/tests/e2e/arithmetic_sequence/model/model.safetensors: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thuml/RLVR-World/66bce39e195ea3521025d94bdd82eafe8823dc45/vid_wm/verl/tests/e2e/arithmetic_sequence/model/model.safetensors -------------------------------------------------------------------------------- /vid_wm/verl/tests/e2e/arithmetic_sequence/model/tokenizer_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "char_ords": [ 3 | 48, 4 | 49, 5 | 50, 6 | 51, 7 | 52, 8 | 53, 9 | 54, 10 | 55, 11 | 56, 12 | 57, 13 | 44, 14 | 58 15 | ], 16 | "model_max_length": 2048, 17 | "chat_template": "{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% set role = message['role'] %}{{ message['content'] }}{% endfor %}{% if add_generation_prompt %}{{ sep_token }}{% endif %}" 18 | } -------------------------------------------------------------------------------- /vid_wm/verl/tests/e2e/arithmetic_sequence/rl/README.md: -------------------------------------------------------------------------------- 1 | # Digit completion 2 | 3 | This is an example of solving a digit completion problem. The problem is defined as below: 4 | 5 | The prompt is a sequence of numbers with fixed difference. The agent's goal is to complete the next N numbers. 6 | If the max number is reached, the next number should be modulo with max number. 7 | 8 | For example, 9 | - prompt = [1, 2, 3] 10 | - N = 5 11 | - max_number = 6 12 | 13 | The response should be [4, 5, 6, 7%6, 8%6] = [4, 5, 6, 0, 1]. 14 | 15 | # Environment definition 16 | 17 | The core definition of the task is defined in verl/envs/digit_completion/task.py 18 | 19 | It is highly recommended to take a look at it for better understanding. 20 | 21 | 22 | 23 | # Run experiments 24 | 25 | The users are required to specify the config path and config name (and the relative model config path to the current working directory) 26 | 27 | ```bash 28 | # cd examples/arithmetic_sequence/rl 29 | 30 | # Specify the config path and config name (current working dir) 31 | python3 -m verl.trainer.ppo.ray_megatron_train_synchronous --config-path=$(pwd)/config --config-name='ray_megatron' 32 | 33 | # The default relative path of model config is 'config/model_config', if you want to change it, you can rewrite it in ray_megatron.yaml or using: 34 | python3 -m verl.trainer.ppo.ray_megatron_train_synchronous --config-path=$(pwd)/config --config-name='ray_megatron' ++model.base_path=config/model_config 35 | 36 | ``` 37 | 38 | -------------------------------------------------------------------------------- /vid_wm/verl/tests/e2e/check_custom_rwd_fn.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import argparse 16 | 17 | 18 | def check_congratulations_in_file(output_file): 19 | with open(output_file, 'r') as f: 20 | output = f.read() 21 | 22 | success_message = "Congratulations!!! You have called my_reward_function successfully!!!" 23 | assert success_message in output, f'Success message of my_reward_function not found in {output_file}' 24 | print("Check passes") 25 | 26 | 27 | if __name__ == '__main__': 28 | parser = argparse.ArgumentParser() 29 | parser.add_argument('--output_file', required=True, type=str) 30 | 31 | args = parser.parse_args() 32 | 33 | check_congratulations_in_file(args.output_file) 34 | -------------------------------------------------------------------------------- /vid_wm/verl/tests/e2e/envs/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .digit_completion import DigitCompletion 16 | 17 | __all__ = ['DigitCompletion'] -------------------------------------------------------------------------------- /vid_wm/verl/tests/e2e/envs/digit_completion/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .task import DigitCompletion, generate_ground_truth_response 16 | from .tokenizer import CharTokenizer 17 | 18 | from transformers import AutoTokenizer, LlamaConfig 19 | 20 | AutoTokenizer.register(LlamaConfig, CharTokenizer, exist_ok=True) 21 | 22 | __all__ = ['DigitCompletion', 'generate_ground_truth_response', 'CharTokenizer'] -------------------------------------------------------------------------------- /vid_wm/verl/tests/e2e/run_r1_distill_qwen_aime24_eval.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | huggingface-cli download deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B \ 4 | --local-dir $HOME/models/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B 5 | 6 | python3 -m verl.trainer.main_generation \ 7 | trainer.nnodes=1 \ 8 | trainer.n_gpus_per_node=8 \ 9 | data.path=$HOME/data/r1/test.parquet \ 10 | data.prompt_key=prompt \ 11 | data.batch_size=1024 \ 12 | data.n_samples=1 \ 13 | data.output_path=$HOME/data/r1/test-output-k1.parquet \ 14 | model.path=$HOME/models/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B \ 15 | rollout.temperature=0.6 \ 16 | rollout.top_p=0.95 \ 17 | rollout.prompt_length=1024 \ 18 | rollout.response_length=32768 \ 19 | rollout.tensor_model_parallel_size=1 \ 20 | rollout.gpu_memory_utilization=0.95 \ 21 | rollout.max_num_batched_tokens=65536 \ 22 | rollout.enforce_eager=False \ 23 | rollout.free_cache_engine=False 24 | 25 | python3 -m recipe.r1.main_eval \ 26 | data.path=$HOME/data/r1/test-output-k1.parquet \ 27 | data.prompt_key=prompt \ 28 | data.response_key=responses \ 29 | custom_reward_function.path=recipe/r1/reward_score.py \ 30 | custom_reward_function.name=reward_func -------------------------------------------------------------------------------- /vid_wm/verl/tests/e2e/run_ray_trainer_rmpad.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e -x 4 | 5 | huggingface-cli download Qwen/Qwen2.5-0.5B --local-dir $HOME/models/Qwen/Qwen2.5-0.5B 6 | 7 | python3 tests/e2e/arithmetic_sequence/rl/main_trainer.py \ 8 | algorithm.adv_estimator=gae \ 9 | data.train_files=tests/e2e/arithmetic_sequence/data/train.parquet \ 10 | data.val_files=tests/e2e/arithmetic_sequence/data/test.parquet \ 11 | actor_rollout_ref.actor.use_kl_loss=False \ 12 | actor_rollout_ref.model.path=tests/e2e/arithmetic_sequence/model \ 13 | actor_rollout_ref.rollout.name=vllm \ 14 | actor_rollout_ref.rollout.tensor_model_parallel_size=1 \ 15 | actor_rollout_ref.model.tokenizer_path=tests/e2e/arithmetic_sequence/model \ 16 | critic.model.path=Qwen/Qwen2.5-0.5B \ 17 | critic.model.use_remove_padding=True \ 18 | algorithm.use_kl_in_reward=False \ 19 | trainer.total_epochs=1 -------------------------------------------------------------------------------- /vid_wm/verl/tests/generation/run_gen_qwen05.sh: -------------------------------------------------------------------------------- 1 | # Tested with 1 & 4 GPUs 2 | set -x 3 | 4 | if [ "$#" -lt 2 ]; then 5 | echo "Usage: run_gen_qwen05.sh [other_configs...]" 6 | exit 1 7 | fi 8 | 9 | nproc_per_node=$1 10 | save_path=$2 11 | infer_tp=${3:-2} # Default tensor parallel size to 2 12 | 13 | # Shift the arguments so $@ refers to the rest 14 | shift 2 15 | 16 | huggingface-cli download Qwen/Qwen2.5-0.5B-Instruct --local-dir $HOME/models/Qwen/Qwen2.5-0.5B-Instruct 17 | 18 | python3 -m verl.trainer.main_generation \ 19 | trainer.nnodes=1 \ 20 | trainer.n_gpus_per_node=$nproc_per_node \ 21 | data.path=$HOME/data/gsm8k/test.parquet \ 22 | data.prompt_key=prompt \ 23 | data.n_samples=1 \ 24 | data.output_path=$save_path \ 25 | model.path=$HOME/models/Qwen/Qwen2.5-0.5B-Instruct \ 26 | +model.trust_remote_code=True \ 27 | rollout.temperature=1.0 \ 28 | rollout.top_k=50 \ 29 | rollout.top_p=0.7 \ 30 | rollout.prompt_length=2048 \ 31 | rollout.response_length=1024 \ 32 | rollout.tensor_model_parallel_size=$infer_tp \ 33 | rollout.gpu_memory_utilization=0.8 34 | -------------------------------------------------------------------------------- /vid_wm/verl/tests/kill_github_tests.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [ "$#" -ne 1 ]; then 4 | echo "Usage: $0 YOUR_GITHUB_TOKEN" 5 | echo "Please provide exactly one input argument for your github token." 6 | exit 1 7 | fi 8 | 9 | # Set your GitHub repository details 10 | OWNER="volcengine" 11 | REPO="verl" 12 | TOKEN=$1 13 | 14 | # API URL for workflow runs 15 | API_URL="https://api.github.com/repos/$OWNER/$REPO/actions/runs?status=queued" 16 | 17 | # Check required commands 18 | command -v jq >/dev/null 2>&1 || { echo "jq is required but not installed. Aborting."; exit 1; } 19 | 20 | # Get queued workflow runs 21 | response=$(curl -s -H "Authorization: token $TOKEN" -H "Accept: application/vnd.github.v3+json" "$API_URL") 22 | 23 | # Run this for debugging 24 | # echo $response 25 | 26 | # Extract run IDs 27 | queued_run_ids=$(echo "$response" | jq -r '.workflow_runs[] | .id') 28 | 29 | if [ -z "$queued_run_ids" ]; then 30 | echo "No queued workflow runs found." 31 | exit 0 32 | fi 33 | 34 | # Cancel each queued run 35 | for run_id in $queued_run_ids; do 36 | echo "Cancelling run $run_id" 37 | cancel_url="https://api.github.com/repos/$OWNER/$REPO/actions/runs/$run_id/cancel" 38 | curl -s -X POST -H "Authorization: token $TOKEN" -H "Accept: application/vnd.github.v3+json" "$cancel_url" 39 | done 40 | 41 | echo "Cancelled all queued workflow runs." 42 | -------------------------------------------------------------------------------- /vid_wm/verl/tests/ray/detached_worker/README.md: -------------------------------------------------------------------------------- 1 | # Detached Worker 2 | ## How to run (Only on a single node) 3 | - Start a local ray cluster: 4 | ```bash 5 | ray start --head --port=6379 6 | ``` 7 | - Run the server 8 | ```bash 9 | python3 server.py 10 | ``` 11 | - On another terminal, Run the client 12 | ```bash 13 | python3 client.py 14 | ``` 15 | -------------------------------------------------------------------------------- /vid_wm/verl/tests/ray/detached_worker/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ray start --head --port=6379 3 | python3 server.py 4 | python3 client.py 5 | ray stop --force -------------------------------------------------------------------------------- /vid_wm/verl/tests/sanity/test_import.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | def test_import(): 17 | import verl 18 | print(verl.__version__) 19 | 20 | 21 | def test_single_controller_import(): 22 | import verl.single_controller 23 | print(verl.single_controller.__version__) 24 | -------------------------------------------------------------------------------- /vid_wm/verl/tests/sft/run_sft.sh: -------------------------------------------------------------------------------- 1 | # Tested with 2 & 4 GPUs 2 | 3 | set -x 4 | 5 | torchrun --standalone --nnodes=1 --nproc_per_node=8 \ 6 | -m verl.trainer.fsdp_sft_trainer \ 7 | data.train_files=$HOME/data/gsm8k/train.parquet \ 8 | data.val_files=$HOME/data/gsm8k/test.parquet \ 9 | data.prompt_key=extra_info \ 10 | data.response_key=extra_info \ 11 | +data.prompt_dict_keys=['question'] \ 12 | +data.response_dict_keys=['answer'] \ 13 | data.micro_batch_size_per_gpu=32 \ 14 | model.partial_pretrain=Qwen/Qwen2.5-0.5B-Instruct \ 15 | trainer.default_local_dir=$HOME/ckpts/ \ 16 | trainer.project_name=qwen2.5-sft \ 17 | trainer.experiment_name=gsm8k-sft-gemma-2b-it \ 18 | trainer.total_training_steps=1 \ 19 | trainer.logger=['console'] \ 20 | trainer.default_hdfs_dir=null $@ 21 | 22 | rm -rf $HOME/ckpts/ -------------------------------------------------------------------------------- /vid_wm/verl/tests/sft/run_sft_multiturn.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -x 3 | 4 | if [ "$#" -lt 2 ]; then 5 | echo "Usage: run_qwen_05_sp2.sh [other_configs...]" 6 | exit 1 7 | fi 8 | 9 | nproc_per_node=$1 10 | save_path=$2 11 | 12 | # Shift the arguments so $@ refers to the rest 13 | shift 2 14 | 15 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \ 16 | -m verl.trainer.fsdp_sft_trainer \ 17 | data.train_files=$HOME/data/multiturn/train.parquet \ 18 | data.val_files=$HOME/data/multiturn/test.parquet \ 19 | data.multiturn.enable=true \ 20 | data.multiturn.messages_key=messages \ 21 | data.micro_batch_size=4 \ 22 | model.partial_pretrain=Qwen/Qwen2.5-0.5B-Instruct \ 23 | trainer.default_local_dir=$save_path \ 24 | trainer.project_name=multiturn-sft \ 25 | trainer.experiment_name=multiturn-sft-qwen-2.5-0.5b-instruct-sp2 \ 26 | trainer.logger=['console'] \ 27 | trainer.total_training_steps=1 \ 28 | trainer.default_hdfs_dir=null $@ \ 29 | ulysses_sequence_parallel_size=2 \ 30 | use_remove_padding=true -------------------------------------------------------------------------------- /vid_wm/verl/tests/sft/run_sft_qwen05_peft.sh: -------------------------------------------------------------------------------- 1 | # Tested with 2 & 4 GPUs 2 | 3 | set -x 4 | 5 | if [ "$#" -lt 2 ]; then 6 | echo "Usage: run_sft_qwen05_peft.sh [other_configs...]" 7 | exit 1 8 | fi 9 | 10 | nproc_per_node=$1 11 | save_path=$2 12 | 13 | # Shift the arguments so $@ refers to the rest 14 | shift 2 15 | 16 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \ 17 | -m verl.trainer.fsdp_sft_trainer \ 18 | data.train_files=$HOME/data/gsm8k/train.parquet \ 19 | data.val_files=$HOME/data/gsm8k/test.parquet \ 20 | data.prompt_key=extra_info \ 21 | data.response_key=extra_info \ 22 | optim.lr=1e-4 \ 23 | +data.prompt_dict_keys=['question'] \ 24 | +data.response_dict_keys=['answer'] \ 25 | data.micro_batch_size_per_gpu=4 \ 26 | model.partial_pretrain=Qwen/Qwen2.5-0.5B-Instruct \ 27 | trainer.default_local_dir=$save_path \ 28 | trainer.project_name=gsm8k-sft \ 29 | trainer.experiment_name=gsm8k-sft-qwen-2.5-0.5b-instruct \ 30 | trainer.logger=['console'] \ 31 | trainer.total_training_steps=1 \ 32 | trainer.default_hdfs_dir=null $@ \ 33 | model.lora_rank=32\ 34 | model.lora_alpha=16 \ 35 | model.target_modules=all-linear 36 | 37 | # Or you can do this: 38 | # model.target_modules=[q_proj,v_proj] \ 39 | -------------------------------------------------------------------------------- /vid_wm/verl/tests/sft/run_sft_qwen05_sp2_liger.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | if [ "$#" -lt 2 ]; then 4 | echo "Usage: run_sft_qwen05_sp2_liger.sh [other_configs...]" 5 | exit 1 6 | fi 7 | 8 | nproc_per_node=$1 9 | save_path=$2 10 | 11 | # Shift the arguments so $@ refers to the rest 12 | shift 2 13 | 14 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \ 15 | -m verl.trainer.fsdp_sft_trainer \ 16 | data.train_files=$HOME/data/gsm8k/train.parquet \ 17 | data.val_files=$HOME/data/gsm8k/test.parquet \ 18 | data.prompt_key=extra_info \ 19 | data.response_key=extra_info \ 20 | optim.lr=1e-4 \ 21 | +data.prompt_dict_keys=['question'] \ 22 | +data.response_dict_keys=['answer'] \ 23 | data.micro_batch_size=4 \ 24 | model.partial_pretrain=Qwen/Qwen2.5-0.5B-Instruct \ 25 | model.use_liger=True \ 26 | trainer.default_local_dir=$save_path \ 27 | trainer.project_name=gsm8k-sft \ 28 | trainer.experiment_name=gsm8k-sft-qwen-2.5-0.5b-instruct-sp2-liger \ 29 | trainer.logger=['console'] \ 30 | trainer.total_training_steps=1 \ 31 | trainer.default_hdfs_dir=null $@ \ 32 | ulysses_sequence_parallel_size=2 \ 33 | use_remove_padding=true -------------------------------------------------------------------------------- /vid_wm/verl/tests/sft/run_sft_sp_loss_match.sh: -------------------------------------------------------------------------------- 1 | # Tested with 2 & 4 GPUs 2 | 3 | set -x 4 | 5 | torchrun --standalone --nnodes=1 --nproc_per_node=8 \ 6 | tests/sft/test_sp_loss_match.py \ 7 | data.train_files=$HOME/data/gsm8k/train.parquet \ 8 | data.val_files=$HOME/data/gsm8k/test.parquet \ 9 | data.prompt_key=extra_info \ 10 | data.response_key=extra_info \ 11 | +data.prompt_dict_keys=['question'] \ 12 | +data.response_dict_keys=['answer'] \ 13 | data.micro_batch_size=32 \ 14 | model.partial_pretrain=Qwen/Qwen2.5-0.5B-Instruct \ 15 | ulysses_sequence_parallel_size=2 \ 16 | use_remove_padding=True \ 17 | trainer.default_local_dir=$HOME/ckpts/ \ 18 | trainer.project_name=qwen2.5-sft \ 19 | trainer.experiment_name=gsm8k-sft-gemma-2b-it \ 20 | trainer.total_training_steps=1 \ 21 | trainer.logger=['console'] \ 22 | trainer.default_hdfs_dir=null $@ 23 | 24 | rm -rf $HOME/ckpts/ 25 | -------------------------------------------------------------------------------- /vid_wm/verl/verl/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import os 16 | 17 | version_folder = os.path.dirname(os.path.join(os.path.abspath(__file__))) 18 | 19 | with open(os.path.join(version_folder, 'version/version')) as f: 20 | __version__ = f.read().strip() 21 | 22 | from .protocol import DataProto 23 | 24 | from .utils.logging_utils import set_basic_config 25 | import logging 26 | 27 | set_basic_config(level=logging.WARNING) 28 | 29 | from . import single_controller 30 | 31 | __all__ = ['DataProto', "__version__"] 32 | 33 | if os.getenv('VERL_USE_MODELSCOPE', 'False').lower() == 'true': 34 | import importlib 35 | if importlib.util.find_spec("modelscope") is None: 36 | raise ImportError(f'You are using the modelscope hub, please install modelscope by `pip install modelscope -U`') 37 | # Patch hub to download models from modelscope to speed up. 38 | from modelscope.utils.hf_util import patch_hub 39 | patch_hub() 40 | -------------------------------------------------------------------------------- /vid_wm/verl/verl/models/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /vid_wm/verl/verl/models/llama/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /vid_wm/verl/verl/models/llama/megatron/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .modeling_llama_megatron import ( 16 | # original model with megatron 17 | ParallelLlamaModel, 18 | ParallelLlamaForCausalLM, 19 | # rmpad with megatron 20 | ParallelLlamaForCausalLMRmPad, 21 | ParallelLlamaForValueRmPad, 22 | # rmpad with megatron and pipeline parallelism 23 | ParallelLlamaForCausalLMRmPadPP, 24 | ParallelLlamaForValueRmPadPP) 25 | -------------------------------------------------------------------------------- /vid_wm/verl/verl/models/llama/megatron/checkpoint_utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /vid_wm/verl/verl/models/llama/megatron/layers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .parallel_attention import ParallelLlamaAttention 16 | from .parallel_decoder import ParallelLlamaDecoderLayer, ParallelLlamaDecoderLayerRmPad 17 | from .parallel_mlp import ParallelLlamaMLP 18 | from .parallel_rmsnorm import ParallelLlamaRMSNorm 19 | -------------------------------------------------------------------------------- /vid_wm/verl/verl/models/mcore/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2025 Bytedance Ltd. and/or its affiliates 2 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | from .gpt_model import gptmodel_forward -------------------------------------------------------------------------------- /vid_wm/verl/verl/models/qwen2/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /vid_wm/verl/verl/models/qwen2/megatron/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .modeling_qwen2_megatron import ( 16 | # original model with megatron 17 | ParallelQwen2Model, 18 | ParallelQwen2ForCausalLM, 19 | # rmpad with megatron 20 | ParallelQwen2ForCausalLMRmPad, 21 | ParallelQwen2ForValueRmPad, 22 | # rmpad with megatron and pipeline parallelism 23 | ParallelQwen2ForCausalLMRmPadPP, 24 | ParallelQwen2ForValueRmPadPP) 25 | -------------------------------------------------------------------------------- /vid_wm/verl/verl/models/qwen2/megatron/checkpoint_utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /vid_wm/verl/verl/models/qwen2/megatron/layers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .parallel_attention import ParallelQwen2Attention 16 | from .parallel_decoder import ParallelQwen2DecoderLayer, ParallelQwen2DecoderLayerRmPad 17 | from .parallel_mlp import ParallelQwen2MLP 18 | from .parallel_rmsnorm import ParallelQwen2RMSNorm 19 | -------------------------------------------------------------------------------- /vid_wm/verl/verl/models/transformers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /vid_wm/verl/verl/single_controller/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import os 16 | 17 | version_folder = os.path.dirname(os.path.join(os.path.abspath(__file__))) 18 | 19 | # Note(haibin.lin): single_controller.__version__ is deprecated 20 | with open(os.path.join(os.path.join(version_folder, os.pardir), 'version/version')) as f: 21 | __version__ = f.read().strip() 22 | 23 | from . import base 24 | from .base import * 25 | 26 | __all__ = base.__all__ -------------------------------------------------------------------------------- /vid_wm/verl/verl/single_controller/base/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .worker import Worker 16 | from .worker_group import WorkerGroup, ClassWithInitArgs, ResourcePool 17 | 18 | __all__ = ['Worker', 'WorkerGroup', 'ClassWithInitArgs', 'ResourcePool'] -------------------------------------------------------------------------------- /vid_wm/verl/verl/single_controller/base/megatron/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /vid_wm/verl/verl/single_controller/base/register_center/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /vid_wm/verl/verl/single_controller/base/register_center/ray.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import ray 16 | 17 | 18 | @ray.remote 19 | class WorkerGroupRegisterCenter: 20 | 21 | def __init__(self, rank_zero_info): 22 | self.rank_zero_info = rank_zero_info 23 | 24 | def get_rank_zero_info(self): 25 | return self.rank_zero_info 26 | 27 | 28 | def create_worker_group_register_center(name, info): 29 | return WorkerGroupRegisterCenter.options(name=name).remote(info) 30 | -------------------------------------------------------------------------------- /vid_wm/verl/verl/single_controller/ray/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .base import RayResourcePool, RayClassWithInitArgs, RayWorkerGroup, create_colocated_worker_cls -------------------------------------------------------------------------------- /vid_wm/verl/verl/third_party/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /vid_wm/verl/verl/third_party/sglang/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023-2024 SGLang Team 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # 6 | # http://www.apache.org/licenses/LICENSE-2.0 7 | # 8 | # Unless required by applicable law or agreed to in writing, software 9 | # distributed under the License is distributed on an "AS IS" BASIS, 10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | # See the License for the specific language governing permissions and 12 | # limitations under the License. 13 | # ============================================================================== 14 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 15 | # 16 | # Licensed under the Apache License, Version 2.0 (the "License"); 17 | # you may not use this file except in compliance with the License. 18 | # You may obtain a copy of the License at 19 | # 20 | # http://www.apache.org/licenses/LICENSE-2.0 21 | # 22 | # Unless required by applicable law or agreed to in writing, software 23 | # distributed under the License is distributed on an "AS IS" BASIS, 24 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 25 | # See the License for the specific language governing permissions and 26 | # limitations under the License. -------------------------------------------------------------------------------- /vid_wm/verl/verl/third_party/vllm/vllm_v_0_3_1/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /vid_wm/verl/verl/third_party/vllm/vllm_v_0_4_2/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /vid_wm/verl/verl/third_party/vllm/vllm_v_0_5_4/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /vid_wm/verl/verl/third_party/vllm/vllm_v_0_6_3/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /vid_wm/verl/verl/trainer/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /vid_wm/verl/verl/trainer/config/evaluation.yaml: -------------------------------------------------------------------------------- 1 | data: 2 | path: /tmp/math_Qwen2-7B-Instruct.parquet 3 | prompt_key: prompt 4 | response_key: responses 5 | data_source_key: data_source 6 | reward_model_key: reward_model 7 | 8 | custom_reward_function: 9 | path: null 10 | name: compute_score 11 | -------------------------------------------------------------------------------- /vid_wm/verl/verl/trainer/config/generation.yaml: -------------------------------------------------------------------------------- 1 | trainer: 2 | nnodes: 1 3 | n_gpus_per_node: 8 4 | 5 | data: 6 | path: ~/data/rlhf/math/test.parquet 7 | prompt_key: prompt 8 | n_samples: 5 9 | output_path: /opt/tiger/math_Qwen2-7B-Instruct.parquet 10 | batch_size: 128 11 | 12 | model: 13 | path: ~/models/Qwen2-7B-Instruct 14 | external_lib: null 15 | rollout: 16 | name: vllm 17 | temperature: 1.0 18 | top_k: 50 # 0 for hf rollout, -1 for vllm rollout 19 | top_p: 0.7 20 | prompt_length: 1536 21 | response_length: 512 22 | # for vllm rollout 23 | dtype: bfloat16 # should align with FSDP 24 | gpu_memory_utilization: 0.5 25 | ignore_eos: False 26 | enforce_eager: True 27 | free_cache_engine: True 28 | load_format: dummy_dtensor 29 | tensor_model_parallel_size: 1 30 | max_num_batched_tokens: 8192 31 | max_model_len: null 32 | max_num_seqs: 1024 33 | log_prob_micro_batch_size: null # will be deprecated, use log_prob_micro_batch_size_per_gpu 34 | log_prob_micro_batch_size_per_gpu: 8 35 | # for fire vllm rollout 36 | use_fire_sampling: False # enable FIRE https://arxiv.org/abs/2410.21236 37 | # for hf rollout 38 | do_sample: True 39 | disable_log_stats: True 40 | enable_chunked_prefill: True 41 | n: 1 42 | actor: 43 | strategy: fsdp # This is for backward-compatibility 44 | ulysses_sequence_parallel_size: 1 # sp size 45 | fsdp_config: 46 | fsdp_size: -1 -------------------------------------------------------------------------------- /vid_wm/verl/verl/trainer/ppo/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /vid_wm/verl/verl/trainer/runtime_env.yaml: -------------------------------------------------------------------------------- 1 | working_dir: ./ 2 | excludes: ["/.git/"] 3 | env_vars: 4 | TORCH_NCCL_AVOID_RECORD_STREAMS: "1" 5 | VLLM_ATTENTION_BACKEND: "XFORMERS" -------------------------------------------------------------------------------- /vid_wm/verl/verl/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from . import tokenizer 16 | from .tokenizer import hf_tokenizer, hf_processor 17 | 18 | __all__ = tokenizer.__all__ -------------------------------------------------------------------------------- /vid_wm/verl/verl/utils/checkpoint/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. -------------------------------------------------------------------------------- /vid_wm/verl/verl/utils/config.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from typing import Dict 16 | 17 | from omegaconf import DictConfig 18 | 19 | 20 | def update_dict_with_config(dictionary: Dict, config: DictConfig): 21 | for key in dictionary: 22 | if hasattr(config, key): 23 | dictionary[key] = getattr(config, key) 24 | -------------------------------------------------------------------------------- /vid_wm/verl/verl/utils/dataset/README.md: -------------------------------------------------------------------------------- 1 | # Dataset Format 2 | ## RLHF dataset 3 | We combine all the data sources into a single parquet files. We directly organize the prompt into the chat format so that multi-turn chats can be easily incorporated. In the prompt, we may add instruction following texts to guide the model output the answers in a particular format so that we can extract the answers. 4 | 5 | Math problems 6 | ```json 7 | { 8 | "data_source": "openai/gsm8k", 9 | "prompt": [{"role": "user", "content": "Natalia sold clips to 48 of her friends in April, and then she sold half as many clips in May. How many clips did Natalia sell altogether in April and May? Let's think step by step and output the final answer after \"####\""}], 10 | "ability": "math", 11 | "reward_model": { 12 | "style": "rule", 13 | "ground_truth": ["72"] 14 | }, 15 | } 16 | ``` 17 | -------------------------------------------------------------------------------- /vid_wm/verl/verl/utils/dataset/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .rl_dataset import RLHFDataset 16 | from .rm_dataset import RMDataset 17 | from .sft_dataset import SFTDataset 18 | -------------------------------------------------------------------------------- /vid_wm/verl/verl/utils/debug/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .performance import log_gpu_memory_usage -------------------------------------------------------------------------------- /vid_wm/verl/verl/utils/debug/performance.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import torch 16 | import torch.distributed as dist 17 | import logging 18 | 19 | 20 | def log_gpu_memory_usage(head: str, logger: logging.Logger = None, level=logging.DEBUG, rank: int = 0): 21 | if (not dist.is_initialized()) or (rank is None) or (dist.get_rank() == rank): 22 | memory_allocated = torch.cuda.memory_allocated() / 1024**3 23 | memory_reserved = torch.cuda.memory_reserved() / 1024**3 24 | 25 | message = f'{head}, memory allocated (GB): {memory_allocated}, memory reserved (GB): {memory_reserved}' 26 | 27 | if logger is None: 28 | print(message) 29 | else: 30 | logger.log(msg=message, level=level) 31 | -------------------------------------------------------------------------------- /vid_wm/verl/verl/utils/distributed.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Utilities for distributed training.""" 15 | import os 16 | 17 | 18 | def initialize_global_process_group(timeout_second=36000): 19 | import torch.distributed 20 | from datetime import timedelta 21 | torch.distributed.init_process_group('nccl', timeout=timedelta(seconds=timeout_second)) 22 | local_rank = int(os.environ["LOCAL_RANK"]) 23 | rank = int(os.environ["RANK"]) 24 | world_size = int(os.environ["WORLD_SIZE"]) 25 | 26 | if torch.distributed.is_initialized(): 27 | torch.cuda.set_device(local_rank) 28 | return local_rank, rank, world_size 29 | -------------------------------------------------------------------------------- /vid_wm/verl/verl/utils/logger/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /vid_wm/verl/verl/utils/logging_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import logging 16 | import os 17 | import torch 18 | 19 | 20 | def set_basic_config(level): 21 | """ 22 | This function sets the global logging format and level. It will be called when import verl 23 | """ 24 | logging.basicConfig(format='%(levelname)s:%(asctime)s:%(message)s', level=level) 25 | 26 | 27 | def log_to_file(string): 28 | print(string) 29 | if os.path.isdir('logs'): 30 | with open(f'logs/log_{torch.distributed.get_rank()}', 'a+') as f: 31 | f.write(string + '\n') 32 | -------------------------------------------------------------------------------- /vid_wm/verl/verl/utils/megatron/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /vid_wm/verl/verl/utils/ray_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | Contains commonly used utilities for ray 16 | """ 17 | 18 | import ray 19 | 20 | import concurrent.futures 21 | 22 | 23 | def parallel_put(data_list, max_workers=None): 24 | 25 | def put_data(index, data): 26 | return index, ray.put(data) 27 | 28 | if max_workers is None: 29 | max_workers = min(len(data_list), 16) 30 | 31 | with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor: 32 | data_list_f = [executor.submit(put_data, i, data) for i, data in enumerate(data_list)] 33 | res_lst = [] 34 | for future in concurrent.futures.as_completed(data_list_f): 35 | res_lst.append(future.result()) 36 | 37 | # reorder based on index 38 | output = [None for _ in range(len(data_list))] 39 | for res in res_lst: 40 | index, data_ref = res 41 | output[index] = data_ref 42 | 43 | return output 44 | -------------------------------------------------------------------------------- /vid_wm/verl/verl/utils/rendezvous/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /vid_wm/verl/verl/utils/reward_score/geo3k.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import re 16 | from mathruler.grader import extract_boxed_content, grade_answer 17 | 18 | 19 | def format_reward(predict_str: str) -> float: 20 | pattern = re.compile(r'.*.*\\boxed\{.*\}.*', re.DOTALL) 21 | match_result = re.fullmatch(pattern, predict_str) 22 | return 1.0 if match_result else 0.0 23 | 24 | 25 | def acc_reward(predict_str: str, ground_truth: str) -> float: 26 | answer = extract_boxed_content(predict_str) 27 | return 1.0 if grade_answer(answer, ground_truth) else 0.0 28 | 29 | 30 | def compute_score(predict_str: str, ground_truth: str) -> float: 31 | return 0.9 * acc_reward(predict_str, ground_truth) + 0.1 * format_reward(predict_str) 32 | -------------------------------------------------------------------------------- /vid_wm/verl/verl/version/version: -------------------------------------------------------------------------------- 1 | 0.2.0.dev 2 | -------------------------------------------------------------------------------- /vid_wm/verl/verl/workers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /vid_wm/verl/verl/workers/actor/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .base import BasePPOActor 16 | from .dp_actor import DataParallelPPOActor 17 | 18 | __all__ = ["BasePPOActor", "DataParallelPPOActor"] 19 | -------------------------------------------------------------------------------- /vid_wm/verl/verl/workers/critic/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .base import BasePPOCritic 16 | from .dp_critic import DataParallelPPOCritic 17 | 18 | __all__ = ["BasePPOCritic", "DataParallelPPOCritic"] 19 | -------------------------------------------------------------------------------- /vid_wm/verl/verl/workers/critic/base.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | Base class for a critic 16 | """ 17 | from abc import ABC, abstractmethod 18 | 19 | import torch 20 | 21 | from verl import DataProto 22 | 23 | __all__ = ['BasePPOCritic'] 24 | 25 | 26 | class BasePPOCritic(ABC): 27 | 28 | def __init__(self, config): 29 | super().__init__() 30 | self.config = config 31 | 32 | @abstractmethod 33 | def compute_values(self, data: DataProto) -> torch.Tensor: 34 | """Compute values""" 35 | pass 36 | 37 | @abstractmethod 38 | def update_critic(self, data: DataProto): 39 | """Update the critic""" 40 | pass 41 | -------------------------------------------------------------------------------- /vid_wm/verl/verl/workers/reward_manager/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 PRIME team and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .naive import NaiveRewardManager 16 | from .prime import PrimeRewardManager 17 | from .dapo import DAPORewardManager 18 | -------------------------------------------------------------------------------- /vid_wm/verl/verl/workers/reward_model/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .base import BasePPORewardModel 16 | -------------------------------------------------------------------------------- /vid_wm/verl/verl/workers/reward_model/megatron/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .reward_model import MegatronRewardModel 16 | -------------------------------------------------------------------------------- /vid_wm/verl/verl/workers/rollout/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .base import BaseRollout 16 | from .naive import NaiveRollout 17 | from .hf_rollout import HFRollout 18 | 19 | __all__ = ["BaseRollout", "NaiveRollout", "HFRollout"] 20 | -------------------------------------------------------------------------------- /vid_wm/verl/verl/workers/rollout/base.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from abc import ABC, abstractmethod 16 | from typing import Iterable, Union 17 | 18 | from verl import DataProto 19 | 20 | __all__ = ['BaseRollout'] 21 | 22 | 23 | class BaseRollout(ABC): 24 | 25 | def __init__(self): 26 | """ 27 | 28 | Args: 29 | dataloader: an Iterable of TensorDict that consistently generates prompts. Note that the dataloader 30 | should handle when the training stops. 31 | """ 32 | super().__init__() 33 | 34 | @abstractmethod 35 | def generate_sequences(self, prompts: DataProto) -> DataProto: 36 | """Generate sequences""" 37 | pass 38 | -------------------------------------------------------------------------------- /vid_wm/verl/verl/workers/rollout/naive/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .naive_rollout import NaiveRollout 16 | -------------------------------------------------------------------------------- /vid_wm/verl/verl/workers/rollout/sglang_rollout/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | 14 | from .sglang_rollout import SGLangRollout 15 | -------------------------------------------------------------------------------- /vid_wm/verl/verl/workers/sharding_manager/base.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | Sharding manager to implement HybridEngine 16 | """ 17 | 18 | from verl import DataProto 19 | 20 | 21 | class BaseShardingManager: 22 | 23 | def __enter__(self): 24 | pass 25 | 26 | def __exit__(self, exc_type, exc_value, traceback): 27 | pass 28 | 29 | def preprocess_data(self, data: DataProto) -> DataProto: 30 | return data 31 | 32 | def postprocess_data(self, data: DataProto) -> DataProto: 33 | return data 34 | --------------------------------------------------------------------------------