├── .DS_Store ├── LICENSE ├── README.md ├── Synthetic_Data_RL.pdf ├── TinyZero ├── .DS_Store ├── Notice.txt ├── docker │ ├── Dockerfile.ngc.vllm │ └── Dockerfile.vemlp.vllm.te ├── docs │ ├── .DS_Store │ ├── Makefile │ ├── README.md │ ├── _static │ │ └── logo.png │ ├── advance │ │ ├── dpo_extension.rst │ │ ├── fsdp_extension.rst │ │ ├── megatron_extension.rst │ │ └── placement.rst │ ├── conf.py │ ├── examples │ │ ├── config.rst │ │ ├── gsm8k_example.rst │ │ └── ppo_code_architecture.rst │ ├── experiment │ │ └── ppo.rst │ ├── faq │ │ └── faq.rst │ ├── index.rst │ ├── preparation │ │ ├── prepare_data.rst │ │ └── reward_function.rst │ ├── requirements-docs.txt │ ├── start │ │ ├── install.rst │ │ └── quickstart.rst │ └── workers │ │ ├── fsdp_workers.rst │ │ ├── megatron_workers.rst │ │ └── ray_trainer.rst ├── examples │ ├── .DS_Store │ ├── data_preprocess │ │ ├── .DS_Store │ │ ├── BM25_retriever.py │ │ ├── __pycache__ │ │ │ └── BM25_retriever.cpython-310.pyc │ │ ├── arth.py │ │ ├── bfcl_simple.py │ │ ├── countdown.py │ │ ├── dentist_book.pt │ │ ├── dentist_qa.py │ │ ├── full_hh_rlhf.py │ │ ├── gsm8k.py │ │ ├── hellaswag.py │ │ ├── logiqa.py │ │ ├── math_dataset.py │ │ ├── mednli.py │ │ └── multiply.py │ ├── generation │ │ └── run_deepseek_v2_lite_math.sh │ ├── grpo_trainer │ │ ├── run_deepseek7b_llm.sh │ │ ├── run_deepseek7b_llm_seq_balance.sh │ │ ├── run_qwen2-7b.sh │ │ └── run_qwen2-7b_seq_balance.sh │ ├── ppo_trainer │ │ ├── run_deepseek7b_llm.sh │ │ ├── run_deepseek7b_llm_sp2.sh │ │ ├── run_deepseek_full_hh_rlhf.sh │ │ ├── run_deepseek_math_gsm8k_megatron.sh │ │ ├── run_deepseek_megatron.sh │ │ ├── run_gemma.sh │ │ ├── run_qwen2-7b.sh │ │ ├── run_qwen2-7b_rm.sh │ │ ├── run_qwen2-7b_rm_seq_balance.sh │ │ ├── run_qwen2-7b_seq_balance.sh │ │ ├── run_qwen2.5-32b.sh │ │ └── verl_getting_started.ipynb │ ├── ray │ │ └── tutorial.ipynb │ ├── sft │ │ └── gsm8k │ │ │ ├── run_deepseek_6b7.sh │ │ │ ├── run_gemma_2b.sh │ │ │ └── run_gemma_7b.sh │ └── split_placement │ │ ├── README.md │ │ ├── config │ │ └── ppo_trainer_split.yaml │ │ ├── main_ppo_split.py │ │ ├── run_deepseek7b_llm.sh │ │ └── split_monkey_patch.py ├── init.py ├── patches │ └── megatron_v4.patch ├── requirements.txt ├── retriever.py ├── scripts │ ├── format.sh │ ├── train_tiny_a100_grpo.sh │ ├── train_tiny_zero.sh │ ├── train_tiny_zero_a100_grpo.sh │ └── train_tiny_zero_a100_grpo_14b.sh ├── setup.py ├── test_results.py ├── tests │ ├── .DS_Store │ ├── __init__.py │ ├── e2e │ │ ├── __init__.py │ │ ├── arithmetic_sequence │ │ │ ├── data │ │ │ │ ├── create_dataset.py │ │ │ │ ├── test.parquet │ │ │ │ └── train.parquet │ │ │ ├── model │ │ │ │ ├── config.json │ │ │ │ ├── create_model_tokenizer.py │ │ │ │ ├── generation_config.json │ │ │ │ ├── model.safetensors │ │ │ │ └── tokenizer_config.json │ │ │ └── rl │ │ │ │ ├── README.md │ │ │ │ ├── config │ │ │ │ └── ray_trainer.yaml │ │ │ │ └── main_trainer.py │ │ ├── check_results.py │ │ ├── envs │ │ │ ├── __init__.py │ │ │ └── digit_completion │ │ │ │ ├── __init__.py │ │ │ │ ├── task.py │ │ │ │ └── tokenizer.py │ │ ├── run_qwen_gsm8k_function_rm.sh │ │ ├── run_qwen_gsm8k_function_rm_no_rmpad.sh │ │ ├── run_qwen_gsm8k_model_rm.sh │ │ ├── run_qwen_gsm8k_model_rm_no_rmpad.sh │ │ ├── run_qwen_gsm8k_model_rm_seq_balance.sh │ │ ├── run_qwen_gsm8k_model_rm_ulysses.sh │ │ ├── run_ray_trainer.sh │ │ └── run_ray_trainer_rmpad.sh │ ├── gpu_utility │ │ ├── test_memory_buffers.py │ │ ├── test_ops.py │ │ └── test_torch_functional.py │ ├── model │ │ ├── test_transformer.py │ │ └── test_transformers_ulysses.py │ ├── ray │ │ ├── check_worker_alive │ │ │ └── main.py │ │ ├── detached_worker │ │ │ ├── README.md │ │ │ ├── client.py │ │ │ ├── run.sh │ │ │ └── server.py │ │ ├── test_check_worker_alive.py │ │ ├── test_colocated_workers.py │ │ ├── test_data_transfer.py │ │ ├── test_driverfunc_to_worker.py │ │ ├── test_high_level_scheduling_api.py │ │ ├── test_ray_local_envs.py │ │ ├── test_rvdz.py │ │ ├── test_worker_group_basics.py │ │ └── test_worker_group_torch.py │ ├── rollout │ │ ├── run_fsdp_vllm.py │ │ └── test_vllm_hf_loader.py │ ├── sanity │ │ ├── check_license.py │ │ └── test_import.py │ ├── utility │ │ └── test_tensor_dict_utilities.py │ └── verl │ │ └── utils │ │ └── dataset │ │ ├── test_rl_dataset.py │ │ ├── test_rm_dataset.py │ │ └── test_sft_dataset.py ├── train_RL_base.sh ├── train_SFT_base.sh ├── train_base.sh └── verl.egg-info │ ├── PKG-INFO │ ├── SOURCES.txt │ ├── dependency_links.txt │ ├── requires.txt │ └── top_level.txt ├── activate.sh ├── img ├── Overviewv2.png ├── final-one.png ├── final1.4.png └── final2.4.png ├── requirements.txt ├── src ├── .DS_Store ├── data_generator │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ ├── __init__.cpython-39.pyc │ │ └── generator.cpython-39.pyc │ └── generator.py ├── eval │ ├── .DS_Store │ ├── __init__.py │ ├── __pycache__ │ │ └── __init__.cpython-39.pyc │ ├── model_eval.py │ └── tasks │ │ ├── .DS_Store │ │ ├── __init__.py │ │ ├── __pycache__ │ │ ├── __init__.cpython-39.pyc │ │ └── task_manager.cpython-39.pyc │ │ ├── cfa │ │ ├── __init__.py │ │ ├── eval_function.py │ │ ├── get_fixed_options.py │ │ ├── get_input_instruction.py │ │ ├── get_output_instruction.py │ │ ├── process_and_save_dataset.py │ │ ├── process_label.py │ │ └── process_prediction.py │ │ ├── cqa │ │ ├── __init__.py │ │ ├── eval_function.py │ │ ├── get_fixed_options.py │ │ ├── get_input_instruction.py │ │ ├── get_output_instruction.py │ │ ├── process_and_save_dataset.py │ │ ├── process_label.py │ │ ├── process_prediction.py │ │ └── test.tsv │ │ ├── gpqa │ │ ├── __init__.py │ │ ├── eval_function.py │ │ ├── get_fixed_options.py │ │ ├── get_input_instruction.py │ │ ├── get_output_instruction.py │ │ ├── process_and_save_dataset.py │ │ ├── process_label.py │ │ └── process_prediction.py │ │ ├── gsm8k │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-39.pyc │ │ │ ├── eval_function.cpython-39.pyc │ │ │ ├── get_output_instruction.cpython-39.pyc │ │ │ ├── process_and_save_dataset.cpython-39.pyc │ │ │ ├── process_label.cpython-39.pyc │ │ │ └── process_prediction.cpython-39.pyc │ │ ├── eval_function.py │ │ ├── get_input_instruction.py │ │ ├── get_output_instruction.py │ │ ├── process_and_save_dataset.py │ │ ├── process_label.py │ │ └── process_prediction.py │ │ ├── logiqa │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-39.pyc │ │ │ ├── eval_function.cpython-39.pyc │ │ │ ├── get_output_instruction.cpython-39.pyc │ │ │ ├── process_and_save_dataset.cpython-39.pyc │ │ │ ├── process_label.cpython-39.pyc │ │ │ └── process_prediction.cpython-39.pyc │ │ ├── eval_function.py │ │ ├── get_input_instruction.py │ │ ├── get_output_instruction.py │ │ ├── process_and_save_dataset.py │ │ ├── process_label.py │ │ └── process_prediction.py │ │ ├── math │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-39.pyc │ │ │ ├── eval_function.cpython-39.pyc │ │ │ ├── get_output_instruction.cpython-39.pyc │ │ │ ├── process_and_save_dataset.cpython-39.pyc │ │ │ ├── process_label.cpython-39.pyc │ │ │ └── process_prediction.cpython-39.pyc │ │ ├── eval_function.py │ │ ├── get_input_instruction.py │ │ ├── get_output_instruction.py │ │ ├── process_and_save_dataset.py │ │ ├── process_label.py │ │ └── process_prediction.py │ │ ├── mednli │ │ ├── __init__.py │ │ ├── eval_function.py │ │ ├── get_input_instruction.py │ │ ├── get_output_instruction.py │ │ ├── process_and_save_dataset.py │ │ ├── process_label.py │ │ └── process_prediction.py │ │ ├── medqa │ │ ├── __init__.py │ │ ├── eval_function.py │ │ ├── get_input_instruction.py │ │ ├── get_output_instruction.py │ │ ├── process_and_save_dataset.py │ │ ├── process_label.py │ │ └── process_prediction.py │ │ ├── task_manager.py │ │ └── test_to_sql │ │ ├── eval_function.py │ │ ├── process_label.py │ │ └── process_prediction.py ├── main.py ├── model_inference │ ├── __init__.py │ ├── batch_inference.py │ └── openai_call.py └── retriever │ ├── .DS_Store │ ├── BM25_retriever.py │ ├── __init__.py │ ├── __pycache__ │ ├── BM25_retriever.cpython-39.pyc │ └── __init__.cpython-39.pyc │ └── passages │ ├── .DS_Store │ └── __init__.py └── verl ├── __init__.py ├── __pycache__ ├── __init__.cpython-310.pyc ├── __init__.cpython-39.pyc ├── protocol.cpython-310.pyc └── protocol.cpython-39.pyc ├── i.py ├── init.py ├── models ├── README.md ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-310.pyc │ ├── __init__.cpython-39.pyc │ ├── registry.cpython-310.pyc │ └── registry.cpython-39.pyc ├── llama │ ├── __init__.py │ └── megatron │ │ ├── __init__.py │ │ ├── checkpoint_utils │ │ ├── __init__.py │ │ ├── llama_loader.py │ │ └── llama_saver.py │ │ ├── layers │ │ ├── __init__.py │ │ ├── parallel_attention.py │ │ ├── parallel_decoder.py │ │ ├── parallel_linear.py │ │ ├── parallel_mlp.py │ │ └── parallel_rmsnorm.py │ │ └── modeling_llama_megatron.py ├── registry.py ├── transformers │ ├── __init__.py │ ├── llama.py │ ├── monkey_patch.py │ └── qwen2.py └── weight_loader_registry.py ├── protocol.py ├── single_controller ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-310.pyc │ └── __init__.cpython-39.pyc ├── base │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ ├── __init__.cpython-39.pyc │ │ ├── decorator.cpython-310.pyc │ │ ├── decorator.cpython-39.pyc │ │ ├── worker.cpython-310.pyc │ │ ├── worker.cpython-39.pyc │ │ ├── worker_group.cpython-310.pyc │ │ └── worker_group.cpython-39.pyc │ ├── decorator.py │ ├── megatron │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ ├── __init__.cpython-39.pyc │ │ │ ├── worker.cpython-310.pyc │ │ │ ├── worker.cpython-39.pyc │ │ │ ├── worker_group.cpython-310.pyc │ │ │ └── worker_group.cpython-39.pyc │ │ ├── worker.py │ │ └── worker_group.py │ ├── register_center │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ ├── __init__.cpython-39.pyc │ │ │ ├── ray.cpython-310.pyc │ │ │ └── ray.cpython-39.pyc │ │ └── ray.py │ ├── worker.py │ └── worker_group.py ├── ray │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ ├── __init__.cpython-39.pyc │ │ ├── base.cpython-310.pyc │ │ ├── base.cpython-39.pyc │ │ ├── megatron.cpython-310.pyc │ │ └── megatron.cpython-39.pyc │ ├── base.py │ └── megatron.py └── version │ └── version ├── third_party ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-310.pyc │ └── __init__.cpython-39.pyc └── vllm │ ├── __init__.py │ ├── __pycache__ │ ├── __init__.cpython-310.pyc │ └── __init__.cpython-39.pyc │ ├── vllm_v_0_3_1 │ ├── __init__.py │ ├── arg_utils.py │ ├── config.py │ ├── llm.py │ ├── llm_engine_sp.py │ ├── model_loader.py │ ├── model_runner.py │ ├── parallel_state.py │ ├── tokenizer.py │ ├── weight_loaders.py │ └── worker.py │ ├── vllm_v_0_4_2 │ ├── __init__.py │ ├── arg_utils.py │ ├── config.py │ ├── dtensor_weight_loaders.py │ ├── hf_weight_loader.py │ ├── llm.py │ ├── llm_engine_sp.py │ ├── megatron_weight_loaders.py │ ├── model_loader.py │ ├── model_runner.py │ ├── parallel_state.py │ ├── spmd_gpu_executor.py │ ├── tokenizer.py │ └── worker.py │ ├── vllm_v_0_5_4 │ ├── __init__.py │ ├── arg_utils.py │ ├── config.py │ ├── dtensor_weight_loaders.py │ ├── hf_weight_loader.py │ ├── llm.py │ ├── llm_engine_sp.py │ ├── megatron_weight_loaders.py │ ├── model_loader.py │ ├── model_runner.py │ ├── parallel_state.py │ ├── spmd_gpu_executor.py │ ├── tokenizer.py │ └── worker.py │ └── vllm_v_0_6_3 │ ├── __init__.py │ ├── __pycache__ │ ├── __init__.cpython-310.pyc │ ├── __init__.cpython-39.pyc │ ├── arg_utils.cpython-310.pyc │ ├── arg_utils.cpython-39.pyc │ ├── config.cpython-310.pyc │ ├── config.cpython-39.pyc │ ├── dtensor_weight_loaders.cpython-310.pyc │ ├── dtensor_weight_loaders.cpython-39.pyc │ ├── hf_weight_loader.cpython-310.pyc │ ├── hf_weight_loader.cpython-39.pyc │ ├── llm.cpython-310.pyc │ ├── llm.cpython-39.pyc │ ├── llm_engine_sp.cpython-310.pyc │ ├── llm_engine_sp.cpython-39.pyc │ ├── megatron_weight_loaders.cpython-310.pyc │ ├── megatron_weight_loaders.cpython-39.pyc │ ├── model_loader.cpython-310.pyc │ ├── model_loader.cpython-39.pyc │ ├── model_runner.cpython-310.pyc │ ├── model_runner.cpython-39.pyc │ ├── parallel_state.cpython-310.pyc │ ├── parallel_state.cpython-39.pyc │ ├── spmd_gpu_executor.cpython-310.pyc │ ├── spmd_gpu_executor.cpython-39.pyc │ ├── tokenizer.cpython-310.pyc │ ├── tokenizer.cpython-39.pyc │ ├── worker.cpython-310.pyc │ └── worker.cpython-39.pyc │ ├── arg_utils.py │ ├── config.py │ ├── dtensor_weight_loaders.py │ ├── hf_weight_loader.py │ ├── llm.py │ ├── llm_engine_sp.py │ ├── megatron_weight_loaders.py │ ├── model_loader.py │ ├── model_runner.py │ ├── parallel_state.py │ ├── spmd_gpu_executor.py │ ├── tokenizer.py │ └── worker.py ├── trainer ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-310.pyc │ ├── __init__.cpython-39.pyc │ ├── fsdp_sft_trainer.cpython-39.pyc │ ├── main_ppo.cpython-310.pyc │ └── main_ppo.cpython-39.pyc ├── config │ ├── evaluation.yaml │ ├── generation.yaml │ ├── ppo_megatron_trainer.yaml │ ├── ppo_trainer.yaml │ └── sft_trainer.yaml ├── fsdp_sft_trainer.py ├── main_eval.py ├── main_generation.py ├── main_ppo.py ├── ppo │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ ├── __init__.cpython-39.pyc │ │ ├── core_algos.cpython-310.pyc │ │ ├── core_algos.cpython-39.pyc │ │ ├── ray_trainer.cpython-310.pyc │ │ └── ray_trainer.cpython-39.pyc │ ├── core_algos.py │ └── ray_trainer.py └── runtime_env.yaml ├── utils ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-310.pyc │ ├── __init__.cpython-39.pyc │ ├── ast.cpython-310.pyc │ ├── ast.cpython-39.pyc │ ├── countdown.cpython-310.pyc │ ├── countdown.cpython-39.pyc │ ├── dentist_qa.cpython-310.pyc │ ├── dentist_qa.cpython-39.pyc │ ├── distributed.cpython-39.pyc │ ├── flops_counter.cpython-310.pyc │ ├── flops_counter.cpython-39.pyc │ ├── fs.cpython-310.pyc │ ├── fs.cpython-39.pyc │ ├── fsdp_utils.cpython-310.pyc │ ├── fsdp_utils.cpython-39.pyc │ ├── gsm8k.cpython-310.pyc │ ├── gsm8k.cpython-39.pyc │ ├── hdfs_io.cpython-310.pyc │ ├── hdfs_io.cpython-39.pyc │ ├── import_utils.cpython-310.pyc │ ├── import_utils.cpython-39.pyc │ ├── logging_utils.cpython-310.pyc │ ├── logging_utils.cpython-39.pyc │ ├── logiqa.cpython-310.pyc │ ├── logiqa.cpython-39.pyc │ ├── math.cpython-310.pyc │ ├── math.cpython-39.pyc │ ├── mednli.cpython-310.pyc │ ├── mednli.cpython-39.pyc │ ├── model.cpython-310.pyc │ ├── model.cpython-39.pyc │ ├── multiply.cpython-310.pyc │ ├── multiply.cpython-39.pyc │ ├── py_functional.cpython-310.pyc │ ├── py_functional.cpython-39.pyc │ ├── seqlen_balancing.cpython-310.pyc │ ├── seqlen_balancing.cpython-39.pyc │ ├── tokenizer.cpython-310.pyc │ ├── tokenizer.cpython-39.pyc │ ├── torch_dtypes.cpython-310.pyc │ ├── torch_dtypes.cpython-39.pyc │ ├── torch_functional.cpython-310.pyc │ ├── torch_functional.cpython-39.pyc │ ├── tracking.cpython-310.pyc │ ├── tracking.cpython-39.pyc │ ├── ulysses.cpython-310.pyc │ └── ulysses.cpython-39.pyc ├── ast.py ├── config.py ├── countdown.py ├── dataset │ ├── README.md │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ ├── __init__.cpython-39.pyc │ │ ├── rl_dataset.cpython-310.pyc │ │ ├── rl_dataset.cpython-39.pyc │ │ ├── rm_dataset.cpython-310.pyc │ │ ├── rm_dataset.cpython-39.pyc │ │ ├── sft_dataset.cpython-310.pyc │ │ └── sft_dataset.cpython-39.pyc │ ├── rl_dataset.py │ ├── rm_dataset.py │ └── sft_dataset.py ├── debug │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ ├── __init__.cpython-39.pyc │ │ ├── performance.cpython-310.pyc │ │ └── performance.cpython-39.pyc │ ├── performance.py │ └── trajectory_tracker.py ├── dentist_qa.py ├── distributed.py ├── flops_counter.py ├── fs.py ├── fsdp_utils.py ├── gsm8k.py ├── gsm8k_2.py ├── hdfs_io.py ├── i.py ├── import_utils.py ├── init.py ├── logger │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-39.pyc │ │ └── aggregate_logger.cpython-39.pyc │ └── aggregate_logger.py ├── logging_utils.py ├── logiqa.py ├── math.py ├── mednli.py ├── megatron │ ├── __init__.py │ ├── memory.py │ ├── optimizer.py │ ├── optimizer_config.py │ ├── pipeline_parallel.py │ ├── sequence_parallel.py │ └── tensor_parallel.py ├── megatron_utils.py ├── memory_buffer.py ├── model.py ├── multiply.py ├── py_functional.py ├── ray_utils.py ├── rendezvous │ ├── __init__.py │ └── ray_backend.py ├── reward_score │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ ├── __init__.cpython-39.pyc │ │ ├── ast.cpython-310.pyc │ │ ├── ast.cpython-39.pyc │ │ ├── countdown.cpython-310.pyc │ │ ├── countdown.cpython-39.pyc │ │ ├── dentist_qa.cpython-310.pyc │ │ ├── dentist_qa.cpython-39.pyc │ │ ├── gsm8k.cpython-310.pyc │ │ ├── gsm8k.cpython-39.pyc │ │ ├── logiqa.cpython-310.pyc │ │ ├── logiqa.cpython-39.pyc │ │ ├── math.cpython-310.pyc │ │ ├── math.cpython-39.pyc │ │ ├── mednli.cpython-310.pyc │ │ ├── mednli.cpython-39.pyc │ │ ├── multiply.cpython-310.pyc │ │ └── multiply.cpython-39.pyc │ ├── ast.py │ ├── countdown.py │ ├── dentist_qa.py │ ├── gsm8k.py │ ├── gsm8k_2.py │ ├── logiqa.py │ ├── math.py │ ├── mednli.py │ └── multiply.py ├── seqlen_balancing.py ├── tokenizer.py ├── torch_dtypes.py ├── torch_functional.py ├── tracking.py ├── ulysses.py ├── version │ └── version └── workers │ ├── __init__.py │ ├── __pycache__ │ ├── __init__.cpython-310.pyc │ ├── __init__.cpython-39.pyc │ ├── fsdp_workers.cpython-310.pyc │ └── fsdp_workers.cpython-39.pyc │ ├── actor │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ ├── __init__.cpython-39.pyc │ │ ├── base.cpython-310.pyc │ │ ├── base.cpython-39.pyc │ │ ├── dp_actor.cpython-310.pyc │ │ └── dp_actor.cpython-39.pyc │ ├── base.py │ ├── dp_actor.py │ └── megatron_actor.py │ ├── critic │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ ├── base.cpython-310.pyc │ │ └── dp_critic.cpython-310.pyc │ ├── base.py │ ├── dp_critic.py │ └── megatron_critic.py │ ├── fsdp_workers.py │ ├── megatron_workers.py │ ├── reward_model │ ├── __init__.py │ ├── base.py │ └── megatron │ │ ├── __init__.py │ │ └── reward_model.py │ ├── rollout │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ ├── __init__.cpython-39.pyc │ │ ├── base.cpython-310.pyc │ │ ├── base.cpython-39.pyc │ │ ├── hf_rollout.cpython-310.pyc │ │ ├── hf_rollout.cpython-39.pyc │ │ ├── tokenizer.cpython-310.pyc │ │ └── tokenizer.cpython-39.pyc │ ├── base.py │ ├── hf_rollout.py │ ├── naive │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ ├── __init__.cpython-39.pyc │ │ │ ├── naive_rollout.cpython-310.pyc │ │ │ └── naive_rollout.cpython-39.pyc │ │ └── naive_rollout.py │ ├── tokenizer.py │ └── vllm_rollout │ │ ├── __init__.py │ │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ ├── __init__.cpython-39.pyc │ │ ├── vllm_rollout.cpython-310.pyc │ │ └── vllm_rollout.cpython-39.pyc │ │ └── vllm_rollout.py │ └── sharding_manager │ ├── __init__.py │ ├── __pycache__ │ ├── __init__.cpython-310.pyc │ ├── __init__.cpython-39.pyc │ ├── base.cpython-310.pyc │ ├── base.cpython-39.pyc │ ├── fsdp_ulysses.cpython-310.pyc │ ├── fsdp_ulysses.cpython-39.pyc │ ├── fsdp_vllm.cpython-310.pyc │ └── fsdp_vllm.cpython-39.pyc │ ├── base.py │ ├── fsdp_ulysses.py │ ├── fsdp_vllm.py │ └── megatron_vllm.py ├── version └── version └── workers ├── __init__.py ├── __pycache__ ├── __init__.cpython-310.pyc ├── __init__.cpython-39.pyc ├── fsdp_workers.cpython-310.pyc └── fsdp_workers.cpython-39.pyc ├── actor ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-310.pyc │ ├── __init__.cpython-39.pyc │ ├── base.cpython-310.pyc │ ├── base.cpython-39.pyc │ ├── dp_actor.cpython-310.pyc │ └── dp_actor.cpython-39.pyc ├── base.py ├── dp_actor.py └── megatron_actor.py ├── critic ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-310.pyc │ ├── base.cpython-310.pyc │ └── dp_critic.cpython-310.pyc ├── base.py ├── dp_critic.py └── megatron_critic.py ├── fsdp_workers.py ├── megatron_workers.py ├── reward_model ├── __init__.py ├── base.py └── megatron │ ├── __init__.py │ └── reward_model.py ├── rollout ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-310.pyc │ ├── __init__.cpython-39.pyc │ ├── base.cpython-310.pyc │ ├── base.cpython-39.pyc │ ├── hf_rollout.cpython-310.pyc │ ├── hf_rollout.cpython-39.pyc │ ├── tokenizer.cpython-310.pyc │ └── tokenizer.cpython-39.pyc ├── base.py ├── hf_rollout.py ├── naive │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ ├── __init__.cpython-39.pyc │ │ ├── naive_rollout.cpython-310.pyc │ │ └── naive_rollout.cpython-39.pyc │ └── naive_rollout.py ├── tokenizer.py └── vllm_rollout │ ├── __init__.py │ ├── __pycache__ │ ├── __init__.cpython-310.pyc │ ├── __init__.cpython-39.pyc │ ├── vllm_rollout.cpython-310.pyc │ └── vllm_rollout.cpython-39.pyc │ └── vllm_rollout.py └── sharding_manager ├── __init__.py ├── __pycache__ ├── __init__.cpython-310.pyc ├── __init__.cpython-39.pyc ├── base.cpython-310.pyc ├── base.cpython-39.pyc ├── fsdp_ulysses.cpython-310.pyc ├── fsdp_ulysses.cpython-39.pyc ├── fsdp_vllm.cpython-310.pyc └── fsdp_vllm.cpython-39.pyc ├── base.py ├── fsdp_ulysses.py ├── fsdp_vllm.py └── megatron_vllm.py /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/.DS_Store -------------------------------------------------------------------------------- /Synthetic_Data_RL.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/Synthetic_Data_RL.pdf -------------------------------------------------------------------------------- /TinyZero/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/TinyZero/.DS_Store -------------------------------------------------------------------------------- /TinyZero/Notice.txt: -------------------------------------------------------------------------------- 1 | Copyright 2023-2024 Bytedance Ltd. and/or its affiliates -------------------------------------------------------------------------------- /TinyZero/docker/Dockerfile.ngc.vllm: -------------------------------------------------------------------------------- 1 | FROM nvcr.io/nvidia/pytorch:24.05-py3 2 | 3 | # uninstall nv-pytorch fork 4 | RUN pip3 uninstall pytorch-quantization \ 5 | pytorch-triton \ 6 | torch \ 7 | torch-tensorrt \ 8 | torchvision \ 9 | xgboost transformer_engine flash_attn \ 10 | apex megatron-core -y 11 | 12 | RUN pip3 install torch==2.4.0 torchvision==0.19.0 torchaudio==2.4.0 --index-url https://download.pytorch.org/whl/cu124 13 | 14 | # make sure torch version is kept 15 | RUN pip3 install --no-cache-dir \ 16 | "torch==2.4.0" \ 17 | accelerate \ 18 | codetiming \ 19 | datasets \ 20 | dill \ 21 | hydra-core \ 22 | numpy \ 23 | pybind11 \ 24 | tensordict \ 25 | "transformers<=4.46.0" 26 | 27 | # ray is installed via vllm 28 | RUN pip3 install --no-cache-dir vllm==0.6.3 29 | 30 | # we choose flash-attn v2.7.0 or v2.7.2 which contain pre-built wheels 31 | RUN pip3 install --no-cache-dir --no-build-isolation flash-attn==2.7.0.post2 32 | 33 | # install apex, set MAX_JOBS to avoid OOMs 34 | RUN MAX_JOBS=4 pip3 install -v --disable-pip-version-check --no-cache-dir --no-build-isolation \ 35 | --config-settings "--build-option=--cpp_ext" --config-settings "--build-option=--cuda_ext" \ 36 | git+https://github.com/NVIDIA/apex 37 | 38 | # install Transformer Engine, which requires FA 2.5.8 39 | RUN MAX_JOBS=4 NINJA_FLAGS="-j4" pip3 install flash-attn==2.5.8 --no-cache-dir --no-build-isolation 40 | RUN MAX_JOBS=4 NINJA_FLAGS="-j4" pip3 install git+https://github.com/NVIDIA/TransformerEngine.git@v1.7 41 | 42 | # Pin wandb to v0.18 since v0.19.1 is released with ImportError 43 | RUN pip3 install wandb==0.18.7 py-spy 44 | -------------------------------------------------------------------------------- /TinyZero/docs/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/TinyZero/docs/.DS_Store -------------------------------------------------------------------------------- /TinyZero/docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SPHINXPROJ = verl 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /TinyZero/docs/README.md: -------------------------------------------------------------------------------- 1 | # veRL documents 2 | 3 | ## Build the docs 4 | 5 | ```bash 6 | # Install dependencies. 7 | pip install -r requirements-docs.txt 8 | 9 | # Build the docs. 10 | make clean 11 | make html 12 | ``` 13 | 14 | ## Open the docs with your browser 15 | 16 | ```bash 17 | python -m http.server -d _build/html/ 18 | ``` 19 | Launch your browser and open localhost:8000. -------------------------------------------------------------------------------- /TinyZero/docs/_static/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/TinyZero/docs/_static/logo.png -------------------------------------------------------------------------------- /TinyZero/docs/advance/placement.rst: -------------------------------------------------------------------------------- 1 | Ray API Design Tutorial 2 | ======================================= 3 | 4 | We provide a tutorial for our Ray API design, including: 5 | 6 | - Ray basic concepts 7 | - Resource Pool and RayWorkerGroup 8 | - Data Dispatch, Execution and Collection 9 | - Initialize the RayWorkerGroup and execute the distributed computation in the given Resource Pool 10 | 11 | See details in `tutorial.ipynb `_. -------------------------------------------------------------------------------- /TinyZero/docs/faq/faq.rst: -------------------------------------------------------------------------------- 1 | Frequently Asked Questions 2 | ==================================== 3 | 4 | Ray related 5 | ------------ 6 | 7 | How to add breakpoint for debugging with distributed Ray? 8 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 9 | 10 | Please checkout the official debugging guide from Ray: https://docs.ray.io/en/latest/ray-observability/ray-distributed-debugger.html 11 | 12 | 13 | Distributed training 14 | ------------------------ 15 | 16 | How to run multi-node post-training with Ray? 17 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 18 | 19 | You can start a ray cluster and submit a ray job, following the official guide from Ray: https://docs.ray.io/en/latest/ray-core/starting-ray.html 20 | -------------------------------------------------------------------------------- /TinyZero/docs/requirements-docs.txt: -------------------------------------------------------------------------------- 1 | # markdown suport 2 | recommonmark 3 | # markdown table suport 4 | sphinx-markdown-tables 5 | 6 | # theme default rtd 7 | 8 | # crate-docs-theme 9 | sphinx-rtd-theme -------------------------------------------------------------------------------- /TinyZero/examples/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/TinyZero/examples/.DS_Store -------------------------------------------------------------------------------- /TinyZero/examples/data_preprocess/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/TinyZero/examples/data_preprocess/.DS_Store -------------------------------------------------------------------------------- /TinyZero/examples/data_preprocess/__pycache__/BM25_retriever.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/TinyZero/examples/data_preprocess/__pycache__/BM25_retriever.cpython-310.pyc -------------------------------------------------------------------------------- /TinyZero/examples/data_preprocess/dentist_book.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/TinyZero/examples/data_preprocess/dentist_book.pt -------------------------------------------------------------------------------- /TinyZero/examples/generation/run_deepseek_v2_lite_math.sh: -------------------------------------------------------------------------------- 1 | python3 -m verl.trainer.main_generation \ 2 | trainer.nnodes=1 \ 3 | trainer.n_gpus_per_node=8 \ 4 | data.path=~/data/rlhf/gsm8k/test.parquet \ 5 | data.prompt_key=prompt \ 6 | data.n_samples=1 \ 7 | data.output_path=~/data/rlhf/math/deepseek_v2_lite_gen_test.parquet \ 8 | model.path=deepseek-ai/deepseek-llm-7b-chat \ 9 | +model.trust_remote_code=True \ 10 | rollout.temperature=1.0 \ 11 | rollout.top_k=50 \ 12 | rollout.top_p=0.7 \ 13 | rollout.prompt_length=2048 \ 14 | rollout.response_length=1024 \ 15 | rollout.tensor_model_parallel_size=2 \ 16 | rollout.gpu_memory_utilization=0.8 17 | -------------------------------------------------------------------------------- /TinyZero/examples/ppo_trainer/run_deepseek_megatron.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | python3 -m verl.trainer.main_ppo --config-path=./config --config-name='ppo_megatron_trainer'\ 4 | data.train_files=$HOME/data/gsm8k/train.parquet \ 5 | data.val_files=$HOME/data/gsm8k/test.parquet \ 6 | data.train_batch_size=1024 \ 7 | data.val_batch_size=1312 \ 8 | data.max_prompt_length=512 \ 9 | data.max_response_length=512 \ 10 | actor_rollout_ref.model.path=deepseek-ai/deepseek-coder-6.7b-instruct \ 11 | actor_rollout_ref.actor.optim.lr=2e-6 \ 12 | actor_rollout_ref.actor.ppo_mini_batch_size=256 \ 13 | actor_rollout_ref.actor.ppo_micro_batch_size=64 \ 14 | actor_rollout_ref.rollout.log_prob_micro_batch_size=64 \ 15 | actor_rollout_ref.rollout.tensor_model_parallel_size=2 \ 16 | actor_rollout_ref.rollout.name=vllm \ 17 | actor_rollout_ref.rollout.gpu_memory_utilization=0.5 \ 18 | actor_rollout_ref.ref.log_prob_micro_batch_size=128 \ 19 | critic.optim.lr=2e-5 \ 20 | critic.model.path=deepseek-ai/deepseek-coder-6.7b-instruct \ 21 | critic.model.enable_gradient_checkpointing=False \ 22 | critic.ppo_micro_batch_size=64 \ 23 | algorithm.kl_ctrl.kl_coef=0.001 \ 24 | trainer.critic_warmup=0 \ 25 | trainer.logger=['console','wandb'] \ 26 | trainer.project_name='verl_megatron_gsm8k_examples' \ 27 | trainer.experiment_name='deepseek_llm_7b_function_rm' \ 28 | trainer.n_gpus_per_node=8 \ 29 | trainer.nnodes=1 \ 30 | trainer.save_freq=-1 \ 31 | trainer.total_epochs=15 \ 32 | +trainer.val_before_train=False $@ 33 | -------------------------------------------------------------------------------- /TinyZero/examples/sft/gsm8k/run_deepseek_6b7.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | hdfs_path=hdfs://user/verl/experiments/gsm8k/deepseek-coder-6.7b-instruct/ # replace to your own hdfs/local path 4 | 5 | nproc_per_node=$1 6 | 7 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \ 8 | -m verl.trainer.fsdp_sft_trainer \ 9 | data.train_files=$HOME/data/gsm8k/train.parquet \ 10 | data.val_files=$HOME/data/gsm8k/test.parquet \ 11 | data.prompt_key=prompt \ 12 | data.response_key=answer \ 13 | data.micro_batch_size=8 \ 14 | model.partial_pretrain=deepseek-ai/deepseek-coder-6.7b-instruct \ 15 | trainer.default_hdfs_dir=$hdfs_path \ 16 | trainer.project_name=gsm8k-sft \ 17 | trainer.experiment_name=gsm8k-sft-deepseek-coder-6.7b-instruct \ 18 | trainer.total_epochs=4 \ 19 | trainer.logger=['console','wandb'] -------------------------------------------------------------------------------- /TinyZero/examples/sft/gsm8k/run_gemma_2b.sh: -------------------------------------------------------------------------------- 1 | # Tested with 2 & 4 GPUs 2 | 3 | set -x 4 | 5 | if [ "$#" -lt 2 ]; then 6 | echo "Usage: run_gemma_2b.sh [other_configs...]" 7 | exit 1 8 | fi 9 | 10 | nproc_per_node=$1 11 | save_path=$2 12 | 13 | # Shift the arguments so $@ refers to the rest 14 | shift 2 15 | 16 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \ 17 | -m verl.trainer.fsdp_sft_trainer \ 18 | data.train_files=$HOME/data/gsm8k/train.parquet \ 19 | data.val_files=$HOME/data/gsm8k/test.parquet \ 20 | data.prompt_key=extra_info \ 21 | data.response_key=extra_info \ 22 | +data.prompt_dict_keys=['question'] \ 23 | +data.response_dict_keys=['answer'] \ 24 | data.micro_batch_size=8 \ 25 | model.partial_pretrain=google/gemma-2b-it \ 26 | trainer.default_local_dir=$save_path \ 27 | trainer.project_name=gsm8k-sft \ 28 | trainer.experiment_name=gsm8k-sft-gemma-2b-it \ 29 | trainer.total_epochs=2 \ 30 | trainer.logger=['console','wandb'] \ 31 | trainer.default_hdfs_dir=null $@ -------------------------------------------------------------------------------- /TinyZero/examples/sft/gsm8k/run_gemma_7b.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | hdfs_path=hdfs://user/verl/experiments/gsm8k/gemma-1.1-7b-it/ # replace to your own hdfs/local path 4 | 5 | nproc_per_node=$1 6 | 7 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \ 8 | -m verl.trainer.fsdp_sft_trainer \ 9 | data.train_files=$HOME/data/gsm8k/train.parquet \ 10 | data.val_files=$HOME/data/gsm8k/test.parquet \ 11 | data.prompt_key=prompt \ 12 | data.response_key=answer \ 13 | data.micro_batch_size=8 \ 14 | model.partial_pretrain=google/gemma-1.1-7b-it \ 15 | trainer.default_hdfs_dir=$hdfs_path \ 16 | trainer.project_name=gsm8k-sft \ 17 | trainer.experiment_name=gsm8k-sft-gemma-1.1-7b-it \ 18 | trainer.total_epochs=4 \ 19 | trainer.logger=['console','wandb'] -------------------------------------------------------------------------------- /TinyZero/init.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /TinyZero/requirements.txt: -------------------------------------------------------------------------------- 1 | accelerate 2 | codetiming 3 | datasets 4 | dill 5 | flash-attn 6 | hydra-core 7 | numpy 8 | pandas 9 | pybind11 10 | ray 11 | tensordict<0.6 12 | transformers<4.48 13 | vllm<=0.6.3 14 | wandb 15 | -------------------------------------------------------------------------------- /TinyZero/scripts/format.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | pip3 install --upgrade yapf 3 | yapf -ir -vv --style ./.style.yapf verl tests single_controller examples -------------------------------------------------------------------------------- /TinyZero/scripts/train_tiny_a100_grpo.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/TinyZero/scripts/train_tiny_a100_grpo.sh -------------------------------------------------------------------------------- /TinyZero/scripts/train_tiny_zero.sh: -------------------------------------------------------------------------------- 1 | python3 -m verl.trainer.main_ppo \ 2 | data.train_files=$DATA_DIR/train.parquet \ 3 | data.val_files=$DATA_DIR/test.parquet \ 4 | data.train_batch_size=256 \ 5 | data.val_batch_size=1312 \ 6 | data.max_prompt_length=256 \ 7 | data.max_response_length=1024 \ 8 | actor_rollout_ref.model.path=$BASE_MODEL \ 9 | actor_rollout_ref.actor.optim.lr=1e-6 \ 10 | actor_rollout_ref.actor.ppo_mini_batch_size=128 \ 11 | actor_rollout_ref.actor.ppo_micro_batch_size=8 \ 12 | actor_rollout_ref.rollout.log_prob_micro_batch_size=8 \ 13 | actor_rollout_ref.rollout.tensor_model_parallel_size=$ROLLOUT_TP_SIZE \ 14 | actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \ 15 | actor_rollout_ref.ref.log_prob_micro_batch_size=4 \ 16 | critic.optim.lr=1e-5 \ 17 | critic.model.path=$BASE_MODEL \ 18 | critic.ppo_micro_batch_size=8 \ 19 | algorithm.kl_ctrl.kl_coef=0.001 \ 20 | trainer.logger=['wandb'] \ 21 | +trainer.val_before_train=False \ 22 | trainer.default_hdfs_dir=null \ 23 | trainer.n_gpus_per_node=$N_GPUS \ 24 | trainer.nnodes=1 \ 25 | trainer.save_freq=100 \ 26 | trainer.test_freq=100 \ 27 | trainer.project_name=TinyZero \ 28 | trainer.experiment_name=$EXPERIMENT_NAME \ 29 | trainer.total_epochs=15 2>&1 | tee verl_demo.log 30 | -------------------------------------------------------------------------------- /TinyZero/tests/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/TinyZero/tests/.DS_Store -------------------------------------------------------------------------------- /TinyZero/tests/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. -------------------------------------------------------------------------------- /TinyZero/tests/e2e/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /TinyZero/tests/e2e/arithmetic_sequence/data/test.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/TinyZero/tests/e2e/arithmetic_sequence/data/test.parquet -------------------------------------------------------------------------------- /TinyZero/tests/e2e/arithmetic_sequence/data/train.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/TinyZero/tests/e2e/arithmetic_sequence/data/train.parquet -------------------------------------------------------------------------------- /TinyZero/tests/e2e/arithmetic_sequence/model/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "architectures": [ 3 | "LlamaForCausalLM" 4 | ], 5 | "attention_bias": false, 6 | "attention_dropout": 0.0, 7 | "bos_token_id": null, 8 | "eos_token_id": 1, 9 | "hidden_act": "silu", 10 | "hidden_size": 128, 11 | "initializer_range": 0.02, 12 | "intermediate_size": 344, 13 | "max_position_embeddings": 2048, 14 | "mlp_bias": false, 15 | "model_type": "llama", 16 | "num_attention_heads": 4, 17 | "num_hidden_layers": 4, 18 | "num_key_value_heads": 4, 19 | "pad_token_id": 2, 20 | "pretraining_tp": 1, 21 | "rms_norm_eps": 1e-06, 22 | "rope_scaling": null, 23 | "rope_theta": 10000.0, 24 | "tie_word_embeddings": false, 25 | "torch_dtype": "bfloat16", 26 | "transformers_version": "4.43.3", 27 | "use_cache": true, 28 | "vocab_size": 16 29 | } 30 | -------------------------------------------------------------------------------- /TinyZero/tests/e2e/arithmetic_sequence/model/generation_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "_from_model_config": true, 3 | "eos_token_id": 1, 4 | "pad_token_id": 2, 5 | "transformers_version": "4.43.3" 6 | } 7 | -------------------------------------------------------------------------------- /TinyZero/tests/e2e/arithmetic_sequence/model/model.safetensors: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/TinyZero/tests/e2e/arithmetic_sequence/model/model.safetensors -------------------------------------------------------------------------------- /TinyZero/tests/e2e/arithmetic_sequence/model/tokenizer_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "char_ords": [ 3 | 48, 4 | 49, 5 | 50, 6 | 51, 7 | 52, 8 | 53, 9 | 54, 10 | 55, 11 | 56, 12 | 57, 13 | 44, 14 | 58 15 | ], 16 | "model_max_length": 2048, 17 | "chat_template": "{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% set role = message['role'] %}{{ message['content'] }}{% endfor %}{% if add_generation_prompt %}{{ sep_token }}{% endif %}" 18 | } -------------------------------------------------------------------------------- /TinyZero/tests/e2e/arithmetic_sequence/rl/README.md: -------------------------------------------------------------------------------- 1 | # Digit completion 2 | 3 | This is an example of solving a digit completion problem. The problem is defined as below: 4 | 5 | The prompt is a sequence of numbers with fixed difference. The agent's goal is to complete the next N numbers. 6 | If the max number is reached, the next number should be modulo with max number. 7 | 8 | For example, 9 | - prompt = [1, 2, 3] 10 | - N = 5 11 | - max_number = 6 12 | 13 | The response should be [4, 5, 6, 7%6, 8%6] = [4, 5, 6, 0, 1]. 14 | 15 | # Environment definition 16 | 17 | The core definition of the task is defined in verl/envs/digit_completion/task.py 18 | 19 | It is highly recommended to take a look at it for better understanding. 20 | 21 | 22 | 23 | # Run experiments 24 | 25 | The users are required to specify the config path and config name (and the relative model config path to the current working directory) 26 | 27 | ```bash 28 | # cd examples/arithmetic_sequence/rl 29 | 30 | # Specify the config path and config name (current working dir) 31 | python3 -m verl.trainer.ppo.ray_megatron_train_synchronous --config-path=$(pwd)/config --config-name='ray_megatron' 32 | 33 | # The default relative path of model config is 'config/model_config', if you want to change it, you can rewrite it in ray_megatron.yaml or using: 34 | python3 -m verl.trainer.ppo.ray_megatron_train_synchronous --config-path=$(pwd)/config --config-name='ray_megatron' ++model.base_path=config/model_config 35 | 36 | ``` 37 | 38 | -------------------------------------------------------------------------------- /TinyZero/tests/e2e/envs/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .digit_completion import DigitCompletion 16 | 17 | __all__ = ['DigitCompletion'] -------------------------------------------------------------------------------- /TinyZero/tests/e2e/envs/digit_completion/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .task import DigitCompletion, generate_ground_truth_response 16 | from .tokenizer import CharTokenizer 17 | 18 | from transformers import AutoTokenizer, LlamaConfig 19 | 20 | AutoTokenizer.register(LlamaConfig, CharTokenizer, exist_ok=True) 21 | 22 | __all__ = ['DigitCompletion', 'generate_ground_truth_response', 'CharTokenizer'] -------------------------------------------------------------------------------- /TinyZero/tests/e2e/run_ray_trainer.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e -x 4 | 5 | OUTPUT_FILE="/tmp/output_ray_trainer.txt" 6 | 7 | export PATH=$PATH:~/.local/bin 8 | 9 | rm -rf $OUTPUT_FILE 10 | python3 tests/e2e/arithmetic_sequence/rl/main_trainer.py \ 11 | data.train_files=tests/e2e/arithmetic_sequence/data/train.parquet \ 12 | data.val_files=tests/e2e/arithmetic_sequence/data/test.parquet \ 13 | actor_rollout_ref.model.path=tests/e2e/arithmetic_sequence/model \ 14 | critic.model.path=tests/e2e/arithmetic_sequence/model | tee $OUTPUT_FILE; 15 | 16 | python3 tests/e2e/check_results.py --output_file=$OUTPUT_FILE 17 | rm -rf $OUTPUT_FILE 18 | -------------------------------------------------------------------------------- /TinyZero/tests/e2e/run_ray_trainer_rmpad.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e -x 4 | 5 | python3 tests/e2e/arithmetic_sequence/rl/main_trainer.py \ 6 | data.train_files=tests/e2e/arithmetic_sequence/data/train.parquet \ 7 | data.val_files=tests/e2e/arithmetic_sequence/data/test.parquet \ 8 | actor_rollout_ref.model.path=tests/e2e/arithmetic_sequence/model \ 9 | actor_rollout_ref.rollout.name=vllm \ 10 | actor_rollout_ref.rollout.tensor_model_parallel_size=1 \ 11 | actor_rollout_ref.model.tokenizer_path=tests/e2e/arithmetic_sequence/model \ 12 | critic.model.path=Qwen/Qwen2.5-0.5B \ 13 | critic.model.use_remove_padding=True \ 14 | trainer.total_epochs=1 -------------------------------------------------------------------------------- /TinyZero/tests/ray/detached_worker/README.md: -------------------------------------------------------------------------------- 1 | # Detached Worker 2 | ## How to run (Only on a single node) 3 | - Start a local ray cluster: 4 | ```bash 5 | ray start --head --port=6379 6 | ``` 7 | - Run the server 8 | ```bash 9 | python3 server.py 10 | ``` 11 | - On another terminal, Run the client 12 | ```bash 13 | python3 client.py 14 | ``` 15 | -------------------------------------------------------------------------------- /TinyZero/tests/ray/detached_worker/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ray start --head --port=6379 3 | python3 server.py 4 | python3 client.py 5 | ray stop --force -------------------------------------------------------------------------------- /TinyZero/tests/ray/test_check_worker_alive.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import time 16 | import os 17 | import subprocess 18 | 19 | 20 | def test(): 21 | wait_time = 10 22 | 23 | my_env = os.environ.copy() 24 | my_env["WAIT_TIME"] = str(wait_time) 25 | 26 | p = subprocess.Popen(["python3", "-u", "./check_worker_alive/main.py"], env=my_env, stdout=subprocess.PIPE) 27 | 28 | count = 0 29 | while b"foo started" not in p.stdout.read(): 30 | time.sleep(1) 31 | count += 1 32 | if count > 40: 33 | raise RuntimeError("timeout for start foo in check_worker_alive/main.py") 34 | 35 | print( 36 | time.time(), 37 | f"wait 1.5 wait time {wait_time*1.5} to let signal returned to process but still not exceed process wait time") 38 | time.sleep(wait_time * 1.5) 39 | print(time.time(), f"start checking") 40 | assert p.poll() is not None, f"process {p} still alive, expecting signal raised abort" 41 | assert p.returncode != 0, f"process {p} exit with code 0, expecting not-zero exit code" 42 | print(f"test passed") 43 | 44 | 45 | if __name__ == "__main__": 46 | test() 47 | -------------------------------------------------------------------------------- /TinyZero/tests/sanity/check_license.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | license_head = "Copyright 2024 Bytedance Ltd. and/or its affiliates" 16 | 17 | from pathlib import Path 18 | from argparse import ArgumentParser 19 | 20 | if __name__ == '__main__': 21 | parser = ArgumentParser() 22 | parser.add_argument('--directory', '-d', required=True, type=str) 23 | args = parser.parse_args() 24 | directory_in_str = args.directory 25 | 26 | pathlist = Path(directory_in_str).glob('**/*.py') 27 | for path in pathlist: 28 | # because path is object not string 29 | path_in_str = str(path.absolute()) 30 | with open(path_in_str, 'r') as f: 31 | file_content = f.read() 32 | 33 | assert license_head in file_content, f'file {path_in_str} does not contain license' 34 | 35 | print(path_in_str) 36 | -------------------------------------------------------------------------------- /TinyZero/tests/sanity/test_import.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | def test_import(): 17 | import verl 18 | print(verl.__version__) 19 | 20 | 21 | def test_single_controller_import(): 22 | import verl.single_controller 23 | print(verl.single_controller.__version__) 24 | -------------------------------------------------------------------------------- /TinyZero/tests/verl/utils/dataset/test_rm_dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import os 15 | 16 | from transformers import AutoTokenizer 17 | from verl.utils import hf_tokenizer 18 | from verl.utils.dataset.rm_dataset import RMDataset 19 | 20 | 21 | def get_rm_data(): 22 | # prepare test dataset 23 | url = "https://github.com/eric-haibin-lin/verl-data/raw/refs/heads/main/full_hh_rlhf/rm/test.parquet" 24 | local_folder = os.path.expanduser('~/verl-data/full_hh_rlhf/rm/') 25 | local_path = os.path.join(local_folder, 'test.parquet') 26 | os.makedirs(local_folder, exist_ok=True) 27 | return local_path 28 | 29 | 30 | def test_rm_dataset(): 31 | tokenizer = hf_tokenizer("facebook/opt-1.3b") 32 | local_path = get_rm_data() 33 | dataset = RMDataset(parquet_files=local_path, tokenizer=tokenizer, max_length=512) 34 | data = dataset[0]['input_ids'] 35 | output = tokenizer.batch_decode(data) 36 | assert len(output) > 1 37 | assert type(output[0]) == str 38 | -------------------------------------------------------------------------------- /TinyZero/train_RL_base.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # alias python='/home/weiji/anaconda3/envs/zero/bin/python' 3 | # alias python3='/home/weiji/anaconda3/envs/zero/bin/python3' 4 | # alias pip='/home/weiji/anaconda3/envs/zero/bin/pip' 5 | dataset_path=$1 6 | train_model_path=$2 7 | save_model_path=$3 8 | temperature=$4 9 | rollout=$5 10 | batch_size=$6 11 | response_length=$7 12 | export N_GPUS=4 13 | export WANDB_API_KEY='xxx' 14 | #export WANDB_MODE=disabled 15 | export CUDA_VISIBLE_DEVICES=1,2,3,4 16 | 17 | ray stop --force && ray start --head --include-dashboard=True 18 | 19 | export BASE_MODEL="$train_model_path" 20 | export DATA_DIR="$dataset_path" 21 | export ROLLOUT_TP_SIZE=4 22 | export EXPERIMENT_NAME="$save_model_path" 23 | export VLLM_ATTENTION_BACKEND=XFORMERS 24 | 25 | bash TinyZero/scripts/train_tiny_zero_a100_grpo.sh $temperature $rollout $batch_size $response_length 26 | #bash ./scripts/train_tiny_zero.sh 27 | -------------------------------------------------------------------------------- /TinyZero/train_SFT_base.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # alias python='/home/weiji/anaconda3/envs/zero/bin/python' 3 | # alias python3='/home/weiji/anaconda3/envs/zero/bin/python3' 4 | # alias pip='/home/weiji/anaconda3/envs/zero/bin/pip' 5 | dataset_path=$1 6 | train_model_path=$2 7 | save_model_path=$3 8 | batch_size=$4 9 | max_length=$5 10 | 11 | export N_GPUS=8 12 | export WANDB_API_KEY='xxx' 13 | # export WANDB_MODE=disabled 14 | export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 15 | 16 | ray stop --force && ray start --head --include-dashboard=True 17 | 18 | torchrun --standalone --nnodes=1 --nproc_per_node=$N_GPUS \ 19 | -m verl.trainer.fsdp_sft_trainer \ 20 | data.train_files="$dataset_path"/train.parquet \ 21 | data.val_files="$dataset_path"/test.parquet \ 22 | data.prompt_key=input \ 23 | data.max_length=$max_length \ 24 | data.response_key=output \ 25 | data.train_batch_size=$batch_size \ 26 | data.micro_batch_size=$N_GPUS \ 27 | model.partial_pretrain="$train_model_path" \ 28 | trainer.default_hdfs_dir="$save_model_path" \ 29 | trainer.logger=['console','wandb'] \ 30 | trainer.project_name=sft \ 31 | trainer.experiment_name="SFT_experiment" \ 32 | trainer.total_epochs=3 \ 33 | optim.lr=1e-6 \ 34 | optim.weight_decay=0.01 35 | -------------------------------------------------------------------------------- /TinyZero/train_base.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | dataset_path=$1 3 | train_model_path=$2 4 | save_model_path=$3 5 | temperature=$4 6 | rollout=$5 7 | batch_size=$6 8 | response_length=$7 9 | export N_GPUS=4 10 | export WANDB_API_KEY='xxx' 11 | #export WANDB_MODE=disabled 12 | export CUDA_VISIBLE_DEVICES=1,2,3,4 13 | 14 | ray stop --force && ray start --head --include-dashboard=True 15 | 16 | export BASE_MODEL="$train_model_path" 17 | export DATA_DIR="$dataset_path" 18 | export ROLLOUT_TP_SIZE=4 19 | export EXPERIMENT_NAME="$save_model_path" 20 | export VLLM_ATTENTION_BACKEND=XFORMERS 21 | 22 | bash TinyZero/scripts/train_tiny_zero_a100_grpo.sh $temperature $rollout $batch_size $response_length 23 | #bash ./scripts/train_tiny_zero.sh 24 | -------------------------------------------------------------------------------- /TinyZero/verl.egg-info/dependency_links.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /TinyZero/verl.egg-info/requires.txt: -------------------------------------------------------------------------------- 1 | accelerate 2 | codetiming 3 | datasets 4 | dill 5 | hydra-core 6 | numpy 7 | pybind11 8 | ray 9 | tensordict 10 | transformers<4.48 11 | vllm<=0.6.3 12 | 13 | [test] 14 | pytest 15 | yapf 16 | -------------------------------------------------------------------------------- /TinyZero/verl.egg-info/top_level.txt: -------------------------------------------------------------------------------- 1 | tests 2 | verl 3 | -------------------------------------------------------------------------------- /activate.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Create and activate a virtual environment 4 | #conda create --name myenv python=3.9 5 | #pip install --upgrade pip 6 | 7 | # Install PyTorch (optional, vLLM can install the correct version) 8 | 9 | 10 | # Install vLLM 11 | pip install vllm==0.6.3 # Change version if needed 12 | pip install ray 13 | 14 | pip install tensordict 15 | 16 | # Install verl 17 | pip install omegaconf 18 | pip install -e . 19 | pip install -r requirements.txt 20 | 21 | # Install FlashAttention 2 22 | 23 | # Install quality-of-life tools 24 | pip install wandb IPython matplotlib 25 | pip install openai anthropic tree_sitter 26 | pip install tenacity==8.2.2 pydantic==1.10.7 rank-bm25==0.2.2 27 | pip install -U "ray[default]" 28 | pip install "pydantic>=2" 29 | pip install huggingface_hub 30 | pip install torch==2.4.0 --index-url https://download.pytorch.org/whl/cu121 31 | pip install flash-attn==2.7.3 32 | -------------------------------------------------------------------------------- /img/Overviewv2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/img/Overviewv2.png -------------------------------------------------------------------------------- /img/final-one.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/img/final-one.png -------------------------------------------------------------------------------- /img/final1.4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/img/final1.4.png -------------------------------------------------------------------------------- /img/final2.4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/img/final2.4.png -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | accelerate 2 | codetiming 3 | datasets 4 | dill 5 | flash-attn 6 | hydra-core 7 | numpy 8 | pandas 9 | pybind11 10 | ray 11 | tensordict<0.6 12 | transformers<4.48 13 | vllm<=0.6.3 14 | wandb 15 | -------------------------------------------------------------------------------- /src/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/src/.DS_Store -------------------------------------------------------------------------------- /src/data_generator/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /src/data_generator/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/src/data_generator/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /src/data_generator/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/src/data_generator/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /src/data_generator/__pycache__/generator.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/src/data_generator/__pycache__/generator.cpython-39.pyc -------------------------------------------------------------------------------- /src/eval/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/src/eval/.DS_Store -------------------------------------------------------------------------------- /src/eval/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /src/eval/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/src/eval/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /src/eval/tasks/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/src/eval/tasks/.DS_Store -------------------------------------------------------------------------------- /src/eval/tasks/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /src/eval/tasks/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/src/eval/tasks/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /src/eval/tasks/__pycache__/task_manager.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/src/eval/tasks/__pycache__/task_manager.cpython-39.pyc -------------------------------------------------------------------------------- /src/eval/tasks/cfa/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/src/eval/tasks/cfa/__init__.py -------------------------------------------------------------------------------- /src/eval/tasks/cfa/eval_function.py: -------------------------------------------------------------------------------- 1 | from verl.utils.reward_score.cfa import compute_score 2 | def eval_function(pred:str, label:str): 3 | eval=compute_score(pred, label,valid=True) 4 | if eval is None or eval==0: 5 | return False 6 | else: 7 | return True 8 | -------------------------------------------------------------------------------- /src/eval/tasks/cfa/get_fixed_options.py: -------------------------------------------------------------------------------- 1 | def get_fixed_options(): 2 | return ['A', 'B', 'C'] -------------------------------------------------------------------------------- /src/eval/tasks/cfa/get_input_instruction.py: -------------------------------------------------------------------------------- 1 | def get_input_instruction(): 2 | return f"""Follow this format: 'Read the questions and answers carefully, and choose the one you think is appropriate among the three options A, B and C.' then Q:[Your question here] CHOICES: A: ...,B: ...,C: ...""" 3 | -------------------------------------------------------------------------------- /src/eval/tasks/cfa/get_output_instruction.py: -------------------------------------------------------------------------------- 1 | def get_output_instruction(): 2 | return 'Your output thinking process and answer should be enclosed within and tags, respectively, i.e., thinking process here a single option here . ' 3 | -------------------------------------------------------------------------------- /src/eval/tasks/cfa/process_label.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | def process_label(label:str): 4 | return label.strip() 5 | -------------------------------------------------------------------------------- /src/eval/tasks/cfa/process_prediction.py: -------------------------------------------------------------------------------- 1 | import re 2 | def process_prediction(pred:str): 3 | answer_pattern = r'(.*?)' 4 | match = re.finditer(answer_pattern, pred) 5 | matches = list(match) 6 | if matches: 7 | final_str = matches[-1].group(1).strip() 8 | return final_str 9 | return None 10 | 11 | -------------------------------------------------------------------------------- /src/eval/tasks/cqa/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/src/eval/tasks/cqa/__init__.py -------------------------------------------------------------------------------- /src/eval/tasks/cqa/eval_function.py: -------------------------------------------------------------------------------- 1 | from verl.utils.reward_score.cqa import compute_score 2 | def eval_function(pred:str, label:str): 3 | eval=compute_score(pred, label,valid=True) 4 | if eval is None or eval==0: 5 | return False 6 | else: 7 | return True 8 | -------------------------------------------------------------------------------- /src/eval/tasks/cqa/get_fixed_options.py: -------------------------------------------------------------------------------- 1 | def get_fixed_options(): 2 | return ['Yes', 'No'] -------------------------------------------------------------------------------- /src/eval/tasks/cqa/get_input_instruction.py: -------------------------------------------------------------------------------- 1 | def get_input_instruction(): 2 | return "Your input should consists of a contract passage like 'Contract:...' and then a yes-or-no question like 'Question:...'" -------------------------------------------------------------------------------- /src/eval/tasks/cqa/get_output_instruction.py: -------------------------------------------------------------------------------- 1 | def get_output_instruction(): 2 | return 'Your output thinking process and answer should be enclosed within and tags, respectively, i.e., thinking process here Yes or No here . ' 3 | -------------------------------------------------------------------------------- /src/eval/tasks/cqa/process_label.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | def process_label(label:str): 4 | return label.strip() 5 | -------------------------------------------------------------------------------- /src/eval/tasks/cqa/process_prediction.py: -------------------------------------------------------------------------------- 1 | import re 2 | def process_prediction(pred:str): 3 | answer_pattern = r'(.*?)' 4 | match = re.finditer(answer_pattern, pred) 5 | matches = list(match) 6 | if matches: 7 | final_str = matches[-1].group(1).strip() 8 | return final_str 9 | return None 10 | 11 | -------------------------------------------------------------------------------- /src/eval/tasks/gpqa/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/src/eval/tasks/gpqa/__init__.py -------------------------------------------------------------------------------- /src/eval/tasks/gpqa/get_fixed_options.py: -------------------------------------------------------------------------------- 1 | def get_fixed_options(): 2 | return ['A', 'B', 'C', 'D'] -------------------------------------------------------------------------------- /src/eval/tasks/gpqa/get_input_instruction.py: -------------------------------------------------------------------------------- 1 | def get_input_instruction(): 2 | return "Each data instance typically consists of a scientific question and 4 option labels and values are the corresponding answer texts." #"Your input should consists of a context passage like 'Context:...' and then a logic question like 'Question:...' and then ABCD four different options" -------------------------------------------------------------------------------- /src/eval/tasks/gpqa/get_output_instruction.py: -------------------------------------------------------------------------------- 1 | def get_output_instruction(): 2 | return 'Your output thinking process and answer should be enclosed within and tags, respectively, i.e., thinking process here the correct option here . ' 3 | -------------------------------------------------------------------------------- /src/eval/tasks/gpqa/process_label.py: -------------------------------------------------------------------------------- 1 | import re 2 | def extract_solution(solution_str): 3 | # Remove everything before the first "Assistant:" 4 | 5 | answer_pattern = r'(.*?)' 6 | match = re.finditer(answer_pattern, solution_str) 7 | matches = list(match) 8 | if matches: 9 | final_answer = matches[-1].group(1).strip() 10 | else: 11 | final_answer = None 12 | # if final_answer is not None: 13 | # try: 14 | # int_final_answer = int(final_answer) 15 | # except ValueError: 16 | # final_answer = None 17 | return final_answer 18 | def process_label(label:str): 19 | matches=re.findall(r'\b[A-D]\b', label) 20 | if matches: 21 | return matches[0] 22 | return None 23 | -------------------------------------------------------------------------------- /src/eval/tasks/gpqa/process_prediction.py: -------------------------------------------------------------------------------- 1 | import re 2 | def process_prediction(pred:str): 3 | answer_pattern = r'(.*?)' 4 | match = re.finditer(answer_pattern, pred) 5 | matches = list(match) 6 | if matches: 7 | final_str = matches[-1].group(1).strip() 8 | option_matches=re.findall(r'\b[A-D]\b', final_str) 9 | if option_matches: 10 | return option_matches[0] 11 | return None 12 | #sens=pred.split('.') 13 | #final_sens=[sen for sen in sens if 'final' in sen] 14 | 15 | 16 | -------------------------------------------------------------------------------- /src/eval/tasks/gsm8k/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/src/eval/tasks/gsm8k/__init__.py -------------------------------------------------------------------------------- /src/eval/tasks/gsm8k/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/src/eval/tasks/gsm8k/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /src/eval/tasks/gsm8k/__pycache__/eval_function.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/src/eval/tasks/gsm8k/__pycache__/eval_function.cpython-39.pyc -------------------------------------------------------------------------------- /src/eval/tasks/gsm8k/__pycache__/get_output_instruction.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/src/eval/tasks/gsm8k/__pycache__/get_output_instruction.cpython-39.pyc -------------------------------------------------------------------------------- /src/eval/tasks/gsm8k/__pycache__/process_and_save_dataset.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/src/eval/tasks/gsm8k/__pycache__/process_and_save_dataset.cpython-39.pyc -------------------------------------------------------------------------------- /src/eval/tasks/gsm8k/__pycache__/process_label.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/src/eval/tasks/gsm8k/__pycache__/process_label.cpython-39.pyc -------------------------------------------------------------------------------- /src/eval/tasks/gsm8k/__pycache__/process_prediction.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/src/eval/tasks/gsm8k/__pycache__/process_prediction.cpython-39.pyc -------------------------------------------------------------------------------- /src/eval/tasks/gsm8k/eval_function.py: -------------------------------------------------------------------------------- 1 | from verl.utils.reward_score.gsm8k import compute_score 2 | 3 | 4 | def eval_function(pred:str, label:str): 5 | eval=compute_score(pred, label,valid=True) 6 | if eval is None or eval==0: 7 | return False 8 | else: 9 | return True 10 | ''' 11 | if pred is None or label is None: 12 | return False 13 | elif abs(float(pred)-float(label))>1e-3: 14 | return False 15 | else: 16 | return True 17 | ''' 18 | -------------------------------------------------------------------------------- /src/eval/tasks/gsm8k/get_input_instruction.py: -------------------------------------------------------------------------------- 1 | def get_input_instruction(): 2 | return None -------------------------------------------------------------------------------- /src/eval/tasks/gsm8k/get_output_instruction.py: -------------------------------------------------------------------------------- 1 | def get_output_instruction(): 2 | return "Let's think step by step and output the final answer after \"####\"." -------------------------------------------------------------------------------- /src/eval/tasks/gsm8k/process_label.py: -------------------------------------------------------------------------------- 1 | import re 2 | from eval.tasks.gsm8k.process_prediction import process_prediction 3 | def process_label(label:str): 4 | try_1=process_prediction(label) 5 | if try_1 is not None: 6 | return try_1 7 | else: 8 | if len(label.split('\n\n'))>1: 9 | return process_prediction(label.split('\n\n')[-2]) 10 | else: 11 | return 12 | return label 13 | -------------------------------------------------------------------------------- /src/eval/tasks/logiqa/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/src/eval/tasks/logiqa/__init__.py -------------------------------------------------------------------------------- /src/eval/tasks/logiqa/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/src/eval/tasks/logiqa/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /src/eval/tasks/logiqa/__pycache__/eval_function.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/src/eval/tasks/logiqa/__pycache__/eval_function.cpython-39.pyc -------------------------------------------------------------------------------- /src/eval/tasks/logiqa/__pycache__/get_output_instruction.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/src/eval/tasks/logiqa/__pycache__/get_output_instruction.cpython-39.pyc -------------------------------------------------------------------------------- /src/eval/tasks/logiqa/__pycache__/process_and_save_dataset.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/src/eval/tasks/logiqa/__pycache__/process_and_save_dataset.cpython-39.pyc -------------------------------------------------------------------------------- /src/eval/tasks/logiqa/__pycache__/process_label.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/src/eval/tasks/logiqa/__pycache__/process_label.cpython-39.pyc -------------------------------------------------------------------------------- /src/eval/tasks/logiqa/__pycache__/process_prediction.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/src/eval/tasks/logiqa/__pycache__/process_prediction.cpython-39.pyc -------------------------------------------------------------------------------- /src/eval/tasks/logiqa/eval_function.py: -------------------------------------------------------------------------------- 1 | from verl.utils.reward_score.logiqa import compute_score 2 | def eval_function(pred:str, label:str): 3 | eval=compute_score(pred, label,valid=True) 4 | if eval is None or eval==0: 5 | return False 6 | else: 7 | return True 8 | -------------------------------------------------------------------------------- /src/eval/tasks/logiqa/get_input_instruction.py: -------------------------------------------------------------------------------- 1 | def get_input_instruction(): 2 | return "Your input should consists of a context passage like 'Context:...' and then a logic question like 'Question:...' and then ABCD four different options" -------------------------------------------------------------------------------- /src/eval/tasks/logiqa/get_output_instruction.py: -------------------------------------------------------------------------------- 1 | def get_output_instruction(): 2 | return 'Your output thinking process and answer should be enclosed within and tags, respectively, i.e., thinking process here the correct option here . ' 3 | -------------------------------------------------------------------------------- /src/eval/tasks/logiqa/process_label.py: -------------------------------------------------------------------------------- 1 | import re 2 | def extract_solution(solution_str): 3 | # Remove everything before the first "Assistant:" 4 | 5 | answer_pattern = r'(.*?)' 6 | match = re.finditer(answer_pattern, solution_str) 7 | matches = list(match) 8 | if matches: 9 | final_answer = matches[-1].group(1).strip() 10 | else: 11 | final_answer = None 12 | # if final_answer is not None: 13 | # try: 14 | # int_final_answer = int(final_answer) 15 | # except ValueError: 16 | # final_answer = None 17 | return final_answer 18 | def process_label(label:str): 19 | matches=re.findall(r'\b[A-E]\b', label) 20 | if matches: 21 | return matches[0] 22 | return None 23 | -------------------------------------------------------------------------------- /src/eval/tasks/logiqa/process_prediction.py: -------------------------------------------------------------------------------- 1 | import re 2 | def process_prediction(pred:str): 3 | #sens=pred.split('.') 4 | #final_sens=[sen for sen in sens if 'final' in sen] 5 | answer_pattern = r'(.*?)' 6 | match = re.finditer(answer_pattern, pred) 7 | matches = list(match) 8 | if matches: 9 | final_str = matches[-1].group(1).strip() 10 | option_matches=re.findall(r'\b[A-E]\b', final_str) 11 | if option_matches: 12 | return option_matches[0] 13 | return None 14 | 15 | -------------------------------------------------------------------------------- /src/eval/tasks/math/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/src/eval/tasks/math/__init__.py -------------------------------------------------------------------------------- /src/eval/tasks/math/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/src/eval/tasks/math/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /src/eval/tasks/math/__pycache__/eval_function.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/src/eval/tasks/math/__pycache__/eval_function.cpython-39.pyc -------------------------------------------------------------------------------- /src/eval/tasks/math/__pycache__/get_output_instruction.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/src/eval/tasks/math/__pycache__/get_output_instruction.cpython-39.pyc -------------------------------------------------------------------------------- /src/eval/tasks/math/__pycache__/process_and_save_dataset.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/src/eval/tasks/math/__pycache__/process_and_save_dataset.cpython-39.pyc -------------------------------------------------------------------------------- /src/eval/tasks/math/__pycache__/process_label.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/src/eval/tasks/math/__pycache__/process_label.cpython-39.pyc -------------------------------------------------------------------------------- /src/eval/tasks/math/__pycache__/process_prediction.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/src/eval/tasks/math/__pycache__/process_prediction.cpython-39.pyc -------------------------------------------------------------------------------- /src/eval/tasks/math/get_input_instruction.py: -------------------------------------------------------------------------------- 1 | def get_input_instruction(): 2 | return None 3 | #"Let's think step by step and output the final answer within \\boxed{}." 4 | -------------------------------------------------------------------------------- /src/eval/tasks/math/get_output_instruction.py: -------------------------------------------------------------------------------- 1 | def get_output_instruction(): 2 | return "Let's think step by step and output the final answer within \\boxed{}." 3 | -------------------------------------------------------------------------------- /src/eval/tasks/math/process_label.py: -------------------------------------------------------------------------------- 1 | def remove_boxed(s): 2 | try: 3 | if "\\boxed " in s: 4 | left = "\\boxed " 5 | assert s[:len(left)] == left 6 | return s[len(left):] 7 | except: 8 | return None 9 | 10 | left = "\\boxed{" 11 | 12 | assert s[:len(left)] == left 13 | assert s[-1] == "}" 14 | 15 | return s[len(left):-1] 16 | def last_boxed_only_string(string): 17 | idx = string.rfind("\\boxed") 18 | if "\\boxed " in string: 19 | return "\\boxed " + string.split("\\boxed ")[-1].split("$")[0] 20 | if idx < 0: 21 | idx = string.rfind("\\fbox") 22 | if idx < 0: 23 | return None 24 | 25 | i = idx 26 | right_brace_idx = None 27 | num_left_braces_open = 0 28 | while i < len(string): 29 | if string[i] == "{": 30 | num_left_braces_open += 1 31 | if string[i] == "}": 32 | num_left_braces_open -= 1 33 | if num_left_braces_open == 0: 34 | right_brace_idx = i 35 | break 36 | i += 1 37 | 38 | if right_brace_idx is None: 39 | retval = None 40 | else: 41 | retval = string[idx:right_brace_idx + 1] 42 | 43 | return retval 44 | def process_label(text): 45 | return remove_boxed(last_boxed_only_string(text)) 46 | 47 | -------------------------------------------------------------------------------- /src/eval/tasks/math/process_prediction.py: -------------------------------------------------------------------------------- 1 | def remove_boxed(s): 2 | if s is None: 3 | return None 4 | if "\\boxed " in s: 5 | left = "\\boxed " 6 | assert s[:len(left)] == left 7 | return s[len(left):] 8 | 9 | left = "\\boxed{" 10 | try: 11 | assert s[:len(left)] == left 12 | assert s[-1] == "}" 13 | except: 14 | return None 15 | 16 | return s[len(left):-1] 17 | def last_boxed_only_string(string): 18 | idx = string.rfind("\\boxed") 19 | if "\\boxed " in string: 20 | return "\\boxed " + string.split("\\boxed ")[-1].split("$")[0] 21 | if idx < 0: 22 | idx = string.rfind("\\fbox") 23 | if idx < 0: 24 | return None 25 | 26 | i = idx 27 | right_brace_idx = None 28 | num_left_braces_open = 0 29 | while i < len(string): 30 | if string[i] == "{": 31 | num_left_braces_open += 1 32 | if string[i] == "}": 33 | num_left_braces_open -= 1 34 | if num_left_braces_open == 0: 35 | right_brace_idx = i 36 | break 37 | i += 1 38 | 39 | if right_brace_idx is None: 40 | retval = None 41 | else: 42 | retval = string[idx:right_brace_idx + 1] 43 | 44 | return retval 45 | def process_prediction(text): 46 | return remove_boxed(last_boxed_only_string(text)) 47 | 48 | -------------------------------------------------------------------------------- /src/eval/tasks/mednli/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/src/eval/tasks/mednli/__init__.py -------------------------------------------------------------------------------- /src/eval/tasks/mednli/eval_function.py: -------------------------------------------------------------------------------- 1 | from verl.utils.reward_score.mednli import compute_score 2 | def eval_function(pred:str, label:str): 3 | eval=compute_score(pred, label,valid=True) 4 | if eval is None or eval==0: 5 | return False 6 | else: 7 | return True 8 | -------------------------------------------------------------------------------- /src/eval/tasks/mednli/get_input_instruction.py: -------------------------------------------------------------------------------- 1 | def get_input_instruction(): 2 | return "Your input should start with 'Please classify the relationship between the premise and the hypothesis as 'entailment','neutral' or 'contradiction'.'. Then the premise sentence, and then the hypothesis sentence." # enclosed within and tags, respectively, i.e., thinking process here the correct option here . ' -------------------------------------------------------------------------------- /src/eval/tasks/mednli/get_output_instruction.py: -------------------------------------------------------------------------------- 1 | def get_output_instruction(): 2 | return 'Your output thinking process and answer should be enclosed within and tags, respectively, i.e., thinking process here the correct option here . ' -------------------------------------------------------------------------------- /src/eval/tasks/mednli/process_label.py: -------------------------------------------------------------------------------- 1 | import re 2 | def process_label(solution_str): 3 | 4 | return solution_str 5 | -------------------------------------------------------------------------------- /src/eval/tasks/mednli/process_prediction.py: -------------------------------------------------------------------------------- 1 | import re 2 | def process_prediction(solution_str): 3 | answer_pattern = r'(.*?)' 4 | match = re.finditer(answer_pattern, solution_str) 5 | matches = list(match) 6 | if matches: 7 | final_answer = matches[-1].group(1).strip() 8 | else: 9 | final_answer = None 10 | # if final_answer is not None: 11 | # try: 12 | # int_final_answer = int(final_answer) 13 | # except ValueError: 14 | # final_answer = None 15 | return final_answer -------------------------------------------------------------------------------- /src/eval/tasks/medqa/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/src/eval/tasks/medqa/__init__.py -------------------------------------------------------------------------------- /src/eval/tasks/medqa/eval_function.py: -------------------------------------------------------------------------------- 1 | from verl.utils.reward_score.medqa import compute_score 2 | def eval_function(pred:str, label:str): 3 | eval=compute_score(pred, label,valid=True) 4 | if eval is None or eval==0: 5 | return False 6 | else: 7 | return True 8 | -------------------------------------------------------------------------------- /src/eval/tasks/medqa/get_input_instruction.py: -------------------------------------------------------------------------------- 1 | def get_input_instruction(): 2 | return "First a clinical vignettes or diagrams. A clinical vignette is a short, descriptive medical case that simulates a real-life scenario involving a patient. It includes details like: Patient demographics (age, sex, etc.),Medical history,Symptoms and signs,Lab or imaging results,Progression or complication. Then a USMLE-style multiple-choice question with its four options. " -------------------------------------------------------------------------------- /src/eval/tasks/medqa/get_output_instruction.py: -------------------------------------------------------------------------------- 1 | def get_output_instruction(): 2 | return 'Your output thinking process and answer should be enclosed within and tags, respectively, i.e., thinking process here the correct option here . ' 3 | -------------------------------------------------------------------------------- /src/eval/tasks/medqa/process_label.py: -------------------------------------------------------------------------------- 1 | import re 2 | def extract_solution(solution_str): 3 | # Remove everything before the first "Assistant:" 4 | 5 | answer_pattern = r'(.*?)' 6 | match = re.finditer(answer_pattern, solution_str) 7 | matches = list(match) 8 | if matches: 9 | final_answer = matches[-1].group(1).strip() 10 | else: 11 | final_answer = None 12 | # if final_answer is not None: 13 | # try: 14 | # int_final_answer = int(final_answer) 15 | # except ValueError: 16 | # final_answer = None 17 | return final_answer 18 | def process_label(label:str): 19 | matches=re.findall(r'\b[A-D]\b', label) 20 | if matches: 21 | return matches[0] 22 | return None 23 | -------------------------------------------------------------------------------- /src/eval/tasks/medqa/process_prediction.py: -------------------------------------------------------------------------------- 1 | import re 2 | def process_prediction(pred:str): 3 | #sens=pred.split('.') 4 | #final_sens=[sen for sen in sens if 'final' in sen] 5 | answer_pattern = r'(.*?)' 6 | match = re.finditer(answer_pattern, pred) 7 | matches = list(match) 8 | if matches: 9 | final_str = matches[-1].group(1).strip() 10 | option_matches=re.findall(r'\b[A-D]\b', final_str) 11 | if option_matches: 12 | return option_matches[0] 13 | return None 14 | 15 | -------------------------------------------------------------------------------- /src/eval/tasks/task_manager.py: -------------------------------------------------------------------------------- 1 | import os 2 | import importlib.util 3 | class TaskManager: 4 | def __init__(self, tasks_folder="./src/eval/tasks"): 5 | self.tasks_folder = tasks_folder 6 | def _load_function(self, task_name, function_name): 7 | """Loads a specific function from a task folder.""" 8 | task_path = os.path.join(self.tasks_folder, task_name, f"{function_name}.py") 9 | if not os.path.isfile(task_path): 10 | raise FileNotFoundError(f"{task_path} does not exist!") 11 | spec = importlib.util.spec_from_file_location(function_name, task_path) 12 | module = importlib.util.module_from_spec(spec) 13 | spec.loader.exec_module(module) 14 | if not hasattr(module, function_name): 15 | raise AttributeError(f"{function_name} not found in {task_path}") 16 | return getattr(module, function_name) 17 | def load_task(self, task_name): 18 | """Loads all functions for a given task and binds them to the manager.""" 19 | for func_name in ["process_label", "process_prediction", "eval_function","process_and_save_dataset","get_input_instruction","get_output_instruction"]: 20 | func = self._load_function(task_name, func_name) 21 | setattr(self, func_name, func) 22 | 23 | -------------------------------------------------------------------------------- /src/eval/tasks/test_to_sql/eval_function.py: -------------------------------------------------------------------------------- 1 | def eval_function(pred:str, label:str): 2 | if pred is None or set(pred) != set(label):: 3 | return False 4 | else: 5 | return True 6 | -------------------------------------------------------------------------------- /src/eval/tasks/test_to_sql/process_label.py: -------------------------------------------------------------------------------- 1 | import re 2 | def process_label(label:str) -> Optional[str]: 3 | ground_truth,db_path=label[0],label[1] 4 | conn=sqlite3.connect(db_path) 5 | cursor = conn.cursor() 6 | try: 7 | cursor.execute(ground_truth) 8 | ground_truth_res = cursor.fetchall() 9 | return ground_truth_res 10 | except: 11 | return 12 | 13 | -------------------------------------------------------------------------------- /src/eval/tasks/test_to_sql/process_prediction.py: -------------------------------------------------------------------------------- 1 | import re 2 | def process_prediction(pred:str) -> Optional[str]: 3 | predicted_sql,db_path = pred[0],pred[1] 4 | 5 | prior_pred=predicted_sql.split('final SQL')[0] 6 | try: 7 | predicted_sql = predicted_sql.split('final SQL')[1].strip() 8 | except: 9 | predicted_sql = 'SELECT'+predicted_sql.split('SELECT')[1] 10 | predicted_sql=predicted_sql.split(';')[0] 11 | predicted_sql=predicted_sql[predicted_sql.find('SELECT'):] #[1:] 12 | conn=sqlite3.connect(db_path) 13 | cursor = conn.cursor() 14 | try: 15 | cursor.execute(predicted_sql) 16 | predicted_res = cursor.fetchall() 17 | return predicted_res 18 | except: 19 | return None 20 | return None 21 | -------------------------------------------------------------------------------- /src/model_inference/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /src/retriever/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/src/retriever/.DS_Store -------------------------------------------------------------------------------- /src/retriever/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /src/retriever/__pycache__/BM25_retriever.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/src/retriever/__pycache__/BM25_retriever.cpython-39.pyc -------------------------------------------------------------------------------- /src/retriever/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/src/retriever/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /src/retriever/passages/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/src/retriever/passages/.DS_Store -------------------------------------------------------------------------------- /src/retriever/passages/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /verl/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import os 16 | 17 | version_folder = os.path.dirname(os.path.join(os.path.abspath(__file__))) 18 | 19 | with open(os.path.join(version_folder, 'version/version')) as f: 20 | __version__ = f.read().strip() 21 | 22 | from .protocol import DataProto 23 | 24 | from .utils.logging_utils import set_basic_config 25 | import logging 26 | 27 | set_basic_config(level=logging.WARNING) 28 | -------------------------------------------------------------------------------- /verl/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /verl/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /verl/__pycache__/protocol.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/__pycache__/protocol.cpython-310.pyc -------------------------------------------------------------------------------- /verl/__pycache__/protocol.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/__pycache__/protocol.cpython-39.pyc -------------------------------------------------------------------------------- /verl/i.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /verl/init.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /verl/models/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/models/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/models/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /verl/models/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/models/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /verl/models/__pycache__/registry.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/models/__pycache__/registry.cpython-310.pyc -------------------------------------------------------------------------------- /verl/models/__pycache__/registry.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/models/__pycache__/registry.cpython-39.pyc -------------------------------------------------------------------------------- /verl/models/llama/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/models/llama/megatron/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .modeling_llama_megatron import ( 16 | # original model with megatron 17 | ParallelLlamaModel, 18 | ParallelLlamaForCausalLM, 19 | # rmpad with megatron 20 | ParallelLlamaForCausalLMRmPad, 21 | ParallelLlamaForValueRmPad, 22 | # rmpad with megatron and pipeline parallelism 23 | ParallelLlamaForCausalLMRmPadPP, 24 | ParallelLlamaForValueRmPadPP) 25 | -------------------------------------------------------------------------------- /verl/models/llama/megatron/checkpoint_utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/models/llama/megatron/layers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .parallel_attention import ParallelLlamaAttention 16 | from .parallel_decoder import ParallelLlamaDecoderLayer, ParallelLlamaDecoderLayerRmPad 17 | from .parallel_mlp import ParallelLlamaMLP 18 | from .parallel_rmsnorm import ParallelLlamaRMSNorm 19 | -------------------------------------------------------------------------------- /verl/models/transformers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/models/weight_loader_registry.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | def get_weight_loader(arch: str): 17 | from verl.models.llama.megatron.checkpoint_utils.llama_loader import load_state_dict_to_megatron_llama 18 | _MODEL_WEIGHT_MEGATRON_LOADER_REGISTRY = {'LlamaForCausalLM': load_state_dict_to_megatron_llama} 19 | 20 | if arch in _MODEL_WEIGHT_MEGATRON_LOADER_REGISTRY: 21 | return _MODEL_WEIGHT_MEGATRON_LOADER_REGISTRY[arch] 22 | raise ValueError(f"Model architectures {arch} are not supported for now. " 23 | f"Supported architectures: {_MODEL_WEIGHT_MEGATRON_LOADER_REGISTRY.keys()}") 24 | -------------------------------------------------------------------------------- /verl/single_controller/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import os 16 | 17 | version_folder = os.path.dirname(os.path.join(os.path.abspath(__file__))) 18 | 19 | with open(os.path.join(version_folder, 'version/version')) as f: 20 | __version__ = f.read().strip() 21 | -------------------------------------------------------------------------------- /verl/single_controller/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/single_controller/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /verl/single_controller/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/single_controller/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /verl/single_controller/base/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .worker import Worker 16 | from .worker_group import WorkerGroup, ClassWithInitArgs, ResourcePool 17 | -------------------------------------------------------------------------------- /verl/single_controller/base/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/single_controller/base/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /verl/single_controller/base/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/single_controller/base/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /verl/single_controller/base/__pycache__/decorator.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/single_controller/base/__pycache__/decorator.cpython-310.pyc -------------------------------------------------------------------------------- /verl/single_controller/base/__pycache__/decorator.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/single_controller/base/__pycache__/decorator.cpython-39.pyc -------------------------------------------------------------------------------- /verl/single_controller/base/__pycache__/worker.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/single_controller/base/__pycache__/worker.cpython-310.pyc -------------------------------------------------------------------------------- /verl/single_controller/base/__pycache__/worker.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/single_controller/base/__pycache__/worker.cpython-39.pyc -------------------------------------------------------------------------------- /verl/single_controller/base/__pycache__/worker_group.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/single_controller/base/__pycache__/worker_group.cpython-310.pyc -------------------------------------------------------------------------------- /verl/single_controller/base/__pycache__/worker_group.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/single_controller/base/__pycache__/worker_group.cpython-39.pyc -------------------------------------------------------------------------------- /verl/single_controller/base/megatron/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/single_controller/base/megatron/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/single_controller/base/megatron/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /verl/single_controller/base/megatron/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/single_controller/base/megatron/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /verl/single_controller/base/megatron/__pycache__/worker.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/single_controller/base/megatron/__pycache__/worker.cpython-310.pyc -------------------------------------------------------------------------------- /verl/single_controller/base/megatron/__pycache__/worker.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/single_controller/base/megatron/__pycache__/worker.cpython-39.pyc -------------------------------------------------------------------------------- /verl/single_controller/base/megatron/__pycache__/worker_group.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/single_controller/base/megatron/__pycache__/worker_group.cpython-310.pyc -------------------------------------------------------------------------------- /verl/single_controller/base/megatron/__pycache__/worker_group.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/single_controller/base/megatron/__pycache__/worker_group.cpython-39.pyc -------------------------------------------------------------------------------- /verl/single_controller/base/megatron/worker.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import os 16 | from dataclasses import dataclass 17 | from verl.single_controller.base.worker import Worker, DistRankInfo, DistGlobalInfo 18 | 19 | 20 | class MegatronWorker(Worker): 21 | 22 | def __init__(self, cuda_visible_devices=None) -> None: 23 | super().__init__(cuda_visible_devices) 24 | 25 | def get_megatron_global_info(self): 26 | from megatron.core import parallel_state as mpu 27 | tp_size = mpu.get_tensor_model_parallel_world_size() 28 | dp_size = mpu.get_data_parallel_world_size() 29 | pp_size = mpu.get_pipeline_model_parallel_world_size() 30 | info = DistGlobalInfo(tp_size=tp_size, dp_size=dp_size, pp_size=pp_size) 31 | return info 32 | 33 | def get_megatron_rank_info(self): 34 | from megatron.core import parallel_state as mpu 35 | tp_rank = mpu.get_tensor_model_parallel_rank() 36 | dp_rank = mpu.get_data_parallel_rank() 37 | pp_rank = mpu.get_pipeline_model_parallel_rank() 38 | info = DistRankInfo(tp_rank=tp_rank, dp_rank=dp_rank, pp_rank=pp_rank) 39 | return info -------------------------------------------------------------------------------- /verl/single_controller/base/register_center/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/single_controller/base/register_center/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/single_controller/base/register_center/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /verl/single_controller/base/register_center/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/single_controller/base/register_center/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /verl/single_controller/base/register_center/__pycache__/ray.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/single_controller/base/register_center/__pycache__/ray.cpython-310.pyc -------------------------------------------------------------------------------- /verl/single_controller/base/register_center/__pycache__/ray.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/single_controller/base/register_center/__pycache__/ray.cpython-39.pyc -------------------------------------------------------------------------------- /verl/single_controller/base/register_center/ray.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import ray 16 | 17 | 18 | @ray.remote 19 | class WorkerGroupRegisterCenter: 20 | 21 | def __init__(self, rank_zero_info): 22 | self.rank_zero_info = rank_zero_info 23 | 24 | def get_rank_zero_info(self): 25 | return self.rank_zero_info 26 | 27 | 28 | def create_worker_group_register_center(name, info): 29 | return WorkerGroupRegisterCenter.options(name=name).remote(info) 30 | -------------------------------------------------------------------------------- /verl/single_controller/ray/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .base import RayResourcePool, RayClassWithInitArgs, RayWorkerGroup, create_colocated_worker_cls 16 | from .megatron import (MegatronRayWorkerGroup, DistRankInfo, DistGlobalInfo) -------------------------------------------------------------------------------- /verl/single_controller/ray/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/single_controller/ray/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /verl/single_controller/ray/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/single_controller/ray/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /verl/single_controller/ray/__pycache__/base.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/single_controller/ray/__pycache__/base.cpython-310.pyc -------------------------------------------------------------------------------- /verl/single_controller/ray/__pycache__/base.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/single_controller/ray/__pycache__/base.cpython-39.pyc -------------------------------------------------------------------------------- /verl/single_controller/ray/__pycache__/megatron.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/single_controller/ray/__pycache__/megatron.cpython-310.pyc -------------------------------------------------------------------------------- /verl/single_controller/ray/__pycache__/megatron.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/single_controller/ray/__pycache__/megatron.cpython-39.pyc -------------------------------------------------------------------------------- /verl/single_controller/version/version: -------------------------------------------------------------------------------- 1 | 0.0.2 -------------------------------------------------------------------------------- /verl/third_party/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/third_party/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/third_party/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /verl/third_party/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/third_party/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /verl/third_party/vllm/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/third_party/vllm/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /verl/third_party/vllm/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/third_party/vllm/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /verl/third_party/vllm/vllm_v_0_3_1/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/third_party/vllm/vllm_v_0_4_2/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/third_party/vllm/vllm_v_0_5_4/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/third_party/vllm/vllm_v_0_6_3/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/third_party/vllm/vllm_v_0_6_3/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/third_party/vllm/vllm_v_0_6_3/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /verl/third_party/vllm/vllm_v_0_6_3/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/third_party/vllm/vllm_v_0_6_3/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /verl/third_party/vllm/vllm_v_0_6_3/__pycache__/arg_utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/third_party/vllm/vllm_v_0_6_3/__pycache__/arg_utils.cpython-310.pyc -------------------------------------------------------------------------------- /verl/third_party/vllm/vllm_v_0_6_3/__pycache__/arg_utils.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/third_party/vllm/vllm_v_0_6_3/__pycache__/arg_utils.cpython-39.pyc -------------------------------------------------------------------------------- /verl/third_party/vllm/vllm_v_0_6_3/__pycache__/config.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/third_party/vllm/vllm_v_0_6_3/__pycache__/config.cpython-310.pyc -------------------------------------------------------------------------------- /verl/third_party/vllm/vllm_v_0_6_3/__pycache__/config.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/third_party/vllm/vllm_v_0_6_3/__pycache__/config.cpython-39.pyc -------------------------------------------------------------------------------- /verl/third_party/vllm/vllm_v_0_6_3/__pycache__/dtensor_weight_loaders.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/third_party/vllm/vllm_v_0_6_3/__pycache__/dtensor_weight_loaders.cpython-310.pyc -------------------------------------------------------------------------------- /verl/third_party/vllm/vllm_v_0_6_3/__pycache__/dtensor_weight_loaders.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/third_party/vllm/vllm_v_0_6_3/__pycache__/dtensor_weight_loaders.cpython-39.pyc -------------------------------------------------------------------------------- /verl/third_party/vllm/vllm_v_0_6_3/__pycache__/hf_weight_loader.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/third_party/vllm/vllm_v_0_6_3/__pycache__/hf_weight_loader.cpython-310.pyc -------------------------------------------------------------------------------- /verl/third_party/vllm/vllm_v_0_6_3/__pycache__/hf_weight_loader.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/third_party/vllm/vllm_v_0_6_3/__pycache__/hf_weight_loader.cpython-39.pyc -------------------------------------------------------------------------------- /verl/third_party/vllm/vllm_v_0_6_3/__pycache__/llm.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/third_party/vllm/vllm_v_0_6_3/__pycache__/llm.cpython-310.pyc -------------------------------------------------------------------------------- /verl/third_party/vllm/vllm_v_0_6_3/__pycache__/llm.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/third_party/vllm/vllm_v_0_6_3/__pycache__/llm.cpython-39.pyc -------------------------------------------------------------------------------- /verl/third_party/vllm/vllm_v_0_6_3/__pycache__/llm_engine_sp.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/third_party/vllm/vllm_v_0_6_3/__pycache__/llm_engine_sp.cpython-310.pyc -------------------------------------------------------------------------------- /verl/third_party/vllm/vllm_v_0_6_3/__pycache__/llm_engine_sp.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/third_party/vllm/vllm_v_0_6_3/__pycache__/llm_engine_sp.cpython-39.pyc -------------------------------------------------------------------------------- /verl/third_party/vllm/vllm_v_0_6_3/__pycache__/megatron_weight_loaders.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/third_party/vllm/vllm_v_0_6_3/__pycache__/megatron_weight_loaders.cpython-310.pyc -------------------------------------------------------------------------------- /verl/third_party/vllm/vllm_v_0_6_3/__pycache__/megatron_weight_loaders.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/third_party/vllm/vllm_v_0_6_3/__pycache__/megatron_weight_loaders.cpython-39.pyc -------------------------------------------------------------------------------- /verl/third_party/vllm/vllm_v_0_6_3/__pycache__/model_loader.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/third_party/vllm/vllm_v_0_6_3/__pycache__/model_loader.cpython-310.pyc -------------------------------------------------------------------------------- /verl/third_party/vllm/vllm_v_0_6_3/__pycache__/model_loader.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/third_party/vllm/vllm_v_0_6_3/__pycache__/model_loader.cpython-39.pyc -------------------------------------------------------------------------------- /verl/third_party/vllm/vllm_v_0_6_3/__pycache__/model_runner.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/third_party/vllm/vllm_v_0_6_3/__pycache__/model_runner.cpython-310.pyc -------------------------------------------------------------------------------- /verl/third_party/vllm/vllm_v_0_6_3/__pycache__/model_runner.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/third_party/vllm/vllm_v_0_6_3/__pycache__/model_runner.cpython-39.pyc -------------------------------------------------------------------------------- /verl/third_party/vllm/vllm_v_0_6_3/__pycache__/parallel_state.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/third_party/vllm/vllm_v_0_6_3/__pycache__/parallel_state.cpython-310.pyc -------------------------------------------------------------------------------- /verl/third_party/vllm/vllm_v_0_6_3/__pycache__/parallel_state.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/third_party/vllm/vllm_v_0_6_3/__pycache__/parallel_state.cpython-39.pyc -------------------------------------------------------------------------------- /verl/third_party/vllm/vllm_v_0_6_3/__pycache__/spmd_gpu_executor.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/third_party/vllm/vllm_v_0_6_3/__pycache__/spmd_gpu_executor.cpython-310.pyc -------------------------------------------------------------------------------- /verl/third_party/vllm/vllm_v_0_6_3/__pycache__/spmd_gpu_executor.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/third_party/vllm/vllm_v_0_6_3/__pycache__/spmd_gpu_executor.cpython-39.pyc -------------------------------------------------------------------------------- /verl/third_party/vllm/vllm_v_0_6_3/__pycache__/tokenizer.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/third_party/vllm/vllm_v_0_6_3/__pycache__/tokenizer.cpython-310.pyc -------------------------------------------------------------------------------- /verl/third_party/vllm/vllm_v_0_6_3/__pycache__/tokenizer.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/third_party/vllm/vllm_v_0_6_3/__pycache__/tokenizer.cpython-39.pyc -------------------------------------------------------------------------------- /verl/third_party/vllm/vllm_v_0_6_3/__pycache__/worker.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/third_party/vllm/vllm_v_0_6_3/__pycache__/worker.cpython-310.pyc -------------------------------------------------------------------------------- /verl/third_party/vllm/vllm_v_0_6_3/__pycache__/worker.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/third_party/vllm/vllm_v_0_6_3/__pycache__/worker.cpython-39.pyc -------------------------------------------------------------------------------- /verl/trainer/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/trainer/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/trainer/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /verl/trainer/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/trainer/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /verl/trainer/__pycache__/fsdp_sft_trainer.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/trainer/__pycache__/fsdp_sft_trainer.cpython-39.pyc -------------------------------------------------------------------------------- /verl/trainer/__pycache__/main_ppo.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/trainer/__pycache__/main_ppo.cpython-310.pyc -------------------------------------------------------------------------------- /verl/trainer/__pycache__/main_ppo.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/trainer/__pycache__/main_ppo.cpython-39.pyc -------------------------------------------------------------------------------- /verl/trainer/config/evaluation.yaml: -------------------------------------------------------------------------------- 1 | data: 2 | path: /tmp/math_Qwen2-7B-Instruct.parquet 3 | prompt_key: prompt 4 | response_key: responses 5 | data_source_key: data_source 6 | reward_model_key: reward_model -------------------------------------------------------------------------------- /verl/trainer/config/generation.yaml: -------------------------------------------------------------------------------- 1 | trainer: 2 | nnodes: 1 3 | n_gpus_per_node: 8 4 | 5 | data: 6 | path: ~/data/rlhf/math/test.parquet 7 | prompt_key: prompt 8 | n_samples: 5 9 | output_path: /opt/tiger/math_Qwen2-7B-Instruct.parquet 10 | batch_size: 128 11 | 12 | model: 13 | path: ~/models/Qwen2-7B-Instruct 14 | external_lib: null 15 | rollout: 16 | name: vllm 17 | temperature: 1.0 18 | top_k: 50 # 0 for hf rollout, -1 for vllm rollout 19 | top_p: 0.7 20 | prompt_length: 1536 21 | response_length: 512 22 | # for vllm rollout 23 | dtype: bfloat16 # should align with FSDP 24 | gpu_memory_utilization: 0.5 25 | ignore_eos: False 26 | micro_batch_size: 256 27 | enforce_eager: True 28 | free_cache_engine: True 29 | load_format: dummy_dtensor 30 | tensor_model_parallel_size: 1 31 | max_num_batched_tokens: 8192 32 | max_num_seqs: 1024 33 | log_prob_micro_batch_size: 8 34 | # for hf rollout 35 | do_sample: True -------------------------------------------------------------------------------- /verl/trainer/config/sft_trainer.yaml: -------------------------------------------------------------------------------- 1 | data: 2 | train_batch_size: 256 3 | micro_batch_size: 16 # this is also val batch size 4 | train_files: ~/data/gsm8k/train.parquet 5 | val_files: ~/data/gsm8k/test.parquet 6 | prompt_key: question 7 | response_key: answer 8 | max_length: 1024 9 | truncation: error 10 | balance_dp_token: False 11 | chat_template: null 12 | model: 13 | partial_pretrain: ~/models/gemma-1.1-7b-it 14 | fsdp_config: 15 | wrap_policy: 16 | min_num_params: 0 17 | cpu_offload: False 18 | offload_params: False 19 | external_lib: null 20 | enable_gradient_checkpointing: False 21 | trust_remote_code: False 22 | optim: 23 | lr: 1e-5 24 | betas: [0.9, 0.95] 25 | weight_decay: 0.01 26 | warmup_steps_ratio: 0.1 27 | clip_grad: 1.0 28 | 29 | trainer: 30 | default_local_dir: /tmp/sft_model 31 | default_hdfs_dir: hdfs://tmp/experiments/gsm8k/gemma-1.1-7b-it/ # change the hdfs path here 32 | resume_path: null 33 | project_name: gsm8k-sft 34 | experiment_name: test 35 | total_epochs: 4 36 | logger: ['console'] 37 | seed: 1 38 | 39 | -------------------------------------------------------------------------------- /verl/trainer/ppo/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/trainer/ppo/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/trainer/ppo/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /verl/trainer/ppo/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/trainer/ppo/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /verl/trainer/ppo/__pycache__/core_algos.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/trainer/ppo/__pycache__/core_algos.cpython-310.pyc -------------------------------------------------------------------------------- /verl/trainer/ppo/__pycache__/core_algos.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/trainer/ppo/__pycache__/core_algos.cpython-39.pyc -------------------------------------------------------------------------------- /verl/trainer/ppo/__pycache__/ray_trainer.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/trainer/ppo/__pycache__/ray_trainer.cpython-310.pyc -------------------------------------------------------------------------------- /verl/trainer/ppo/__pycache__/ray_trainer.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/trainer/ppo/__pycache__/ray_trainer.cpython-39.pyc -------------------------------------------------------------------------------- /verl/trainer/runtime_env.yaml: -------------------------------------------------------------------------------- 1 | working_dir: ./ 2 | excludes: ["/.git/"] 3 | env_vars: 4 | TORCH_NCCL_AVOID_RECORD_STREAMS: "1" 5 | VLLM_ATTENTION_BACKEND: "XFORMERS" -------------------------------------------------------------------------------- /verl/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | from . import tokenizer 15 | from .tokenizer import * 16 | 17 | __all__ = tokenizer.__all__ 18 | -------------------------------------------------------------------------------- /verl/utils/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /verl/utils/__pycache__/ast.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/__pycache__/ast.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/__pycache__/ast.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/__pycache__/ast.cpython-39.pyc -------------------------------------------------------------------------------- /verl/utils/__pycache__/countdown.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/__pycache__/countdown.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/__pycache__/countdown.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/__pycache__/countdown.cpython-39.pyc -------------------------------------------------------------------------------- /verl/utils/__pycache__/dentist_qa.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/__pycache__/dentist_qa.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/__pycache__/dentist_qa.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/__pycache__/dentist_qa.cpython-39.pyc -------------------------------------------------------------------------------- /verl/utils/__pycache__/distributed.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/__pycache__/distributed.cpython-39.pyc -------------------------------------------------------------------------------- /verl/utils/__pycache__/flops_counter.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/__pycache__/flops_counter.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/__pycache__/flops_counter.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/__pycache__/flops_counter.cpython-39.pyc -------------------------------------------------------------------------------- /verl/utils/__pycache__/fs.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/__pycache__/fs.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/__pycache__/fs.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/__pycache__/fs.cpython-39.pyc -------------------------------------------------------------------------------- /verl/utils/__pycache__/fsdp_utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/__pycache__/fsdp_utils.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/__pycache__/fsdp_utils.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/__pycache__/fsdp_utils.cpython-39.pyc -------------------------------------------------------------------------------- /verl/utils/__pycache__/gsm8k.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/__pycache__/gsm8k.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/__pycache__/gsm8k.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/__pycache__/gsm8k.cpython-39.pyc -------------------------------------------------------------------------------- /verl/utils/__pycache__/hdfs_io.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/__pycache__/hdfs_io.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/__pycache__/hdfs_io.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/__pycache__/hdfs_io.cpython-39.pyc -------------------------------------------------------------------------------- /verl/utils/__pycache__/import_utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/__pycache__/import_utils.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/__pycache__/import_utils.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/__pycache__/import_utils.cpython-39.pyc -------------------------------------------------------------------------------- /verl/utils/__pycache__/logging_utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/__pycache__/logging_utils.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/__pycache__/logging_utils.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/__pycache__/logging_utils.cpython-39.pyc -------------------------------------------------------------------------------- /verl/utils/__pycache__/logiqa.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/__pycache__/logiqa.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/__pycache__/logiqa.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/__pycache__/logiqa.cpython-39.pyc -------------------------------------------------------------------------------- /verl/utils/__pycache__/math.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/__pycache__/math.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/__pycache__/math.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/__pycache__/math.cpython-39.pyc -------------------------------------------------------------------------------- /verl/utils/__pycache__/mednli.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/__pycache__/mednli.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/__pycache__/mednli.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/__pycache__/mednli.cpython-39.pyc -------------------------------------------------------------------------------- /verl/utils/__pycache__/model.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/__pycache__/model.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/__pycache__/model.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/__pycache__/model.cpython-39.pyc -------------------------------------------------------------------------------- /verl/utils/__pycache__/multiply.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/__pycache__/multiply.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/__pycache__/multiply.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/__pycache__/multiply.cpython-39.pyc -------------------------------------------------------------------------------- /verl/utils/__pycache__/py_functional.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/__pycache__/py_functional.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/__pycache__/py_functional.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/__pycache__/py_functional.cpython-39.pyc -------------------------------------------------------------------------------- /verl/utils/__pycache__/seqlen_balancing.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/__pycache__/seqlen_balancing.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/__pycache__/seqlen_balancing.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/__pycache__/seqlen_balancing.cpython-39.pyc -------------------------------------------------------------------------------- /verl/utils/__pycache__/tokenizer.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/__pycache__/tokenizer.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/__pycache__/tokenizer.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/__pycache__/tokenizer.cpython-39.pyc -------------------------------------------------------------------------------- /verl/utils/__pycache__/torch_dtypes.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/__pycache__/torch_dtypes.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/__pycache__/torch_dtypes.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/__pycache__/torch_dtypes.cpython-39.pyc -------------------------------------------------------------------------------- /verl/utils/__pycache__/torch_functional.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/__pycache__/torch_functional.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/__pycache__/torch_functional.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/__pycache__/torch_functional.cpython-39.pyc -------------------------------------------------------------------------------- /verl/utils/__pycache__/tracking.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/__pycache__/tracking.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/__pycache__/tracking.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/__pycache__/tracking.cpython-39.pyc -------------------------------------------------------------------------------- /verl/utils/__pycache__/ulysses.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/__pycache__/ulysses.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/__pycache__/ulysses.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/__pycache__/ulysses.cpython-39.pyc -------------------------------------------------------------------------------- /verl/utils/config.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from typing import Dict 16 | 17 | from omegaconf import DictConfig 18 | 19 | 20 | def update_dict_with_config(dictionary: Dict, config: DictConfig): 21 | for key in dictionary: 22 | if hasattr(config, key): 23 | dictionary[key] = getattr(config, key) 24 | -------------------------------------------------------------------------------- /verl/utils/dataset/README.md: -------------------------------------------------------------------------------- 1 | # Dataset Format 2 | ## RLHF dataset 3 | We combine all the data sources into a single parquet files. We directly organize the prompt into the chat format so that multi-turn chats can be easily incorporated. In the prompt, we may add instruction following texts to guide the model output the answers in a particular format so that we can extract the answers. 4 | 5 | Math problems 6 | ```json 7 | { 8 | "data_source": "openai/gsm8k", 9 | "prompt": [{"role": "user", "content": "Natalia sold clips to 48 of her friends in April, and then she sold half as many clips in May. How many clips did Natalia sell altogether in April and May? Let's think step by step and output the final answer after \"####\""}], 10 | "ability": "math", 11 | "reward_model": { 12 | "style": "rule", 13 | "ground_truth": ["72"] 14 | }, 15 | } 16 | ``` 17 | -------------------------------------------------------------------------------- /verl/utils/dataset/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .rl_dataset import RLHFDataset 16 | from .rm_dataset import RMDataset 17 | from .sft_dataset import SFTDataset 18 | -------------------------------------------------------------------------------- /verl/utils/dataset/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/dataset/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/dataset/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/dataset/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /verl/utils/dataset/__pycache__/rl_dataset.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/dataset/__pycache__/rl_dataset.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/dataset/__pycache__/rl_dataset.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/dataset/__pycache__/rl_dataset.cpython-39.pyc -------------------------------------------------------------------------------- /verl/utils/dataset/__pycache__/rm_dataset.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/dataset/__pycache__/rm_dataset.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/dataset/__pycache__/rm_dataset.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/dataset/__pycache__/rm_dataset.cpython-39.pyc -------------------------------------------------------------------------------- /verl/utils/dataset/__pycache__/sft_dataset.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/dataset/__pycache__/sft_dataset.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/dataset/__pycache__/sft_dataset.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/dataset/__pycache__/sft_dataset.cpython-39.pyc -------------------------------------------------------------------------------- /verl/utils/debug/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .performance import log_gpu_memory_usage -------------------------------------------------------------------------------- /verl/utils/debug/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/debug/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/debug/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/debug/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /verl/utils/debug/__pycache__/performance.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/debug/__pycache__/performance.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/debug/__pycache__/performance.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/debug/__pycache__/performance.cpython-39.pyc -------------------------------------------------------------------------------- /verl/utils/debug/performance.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import torch 16 | import torch.distributed as dist 17 | import logging 18 | 19 | 20 | def log_gpu_memory_usage(head: str, logger: logging.Logger = None, level=logging.DEBUG, rank: int = 0): 21 | if (not dist.is_initialized()) or (rank is None) or (dist.get_rank() == rank): 22 | memory_allocated = torch.cuda.memory_allocated() / 1024**3 23 | memory_reserved = torch.cuda.memory_reserved() / 1024**3 24 | 25 | message = f'{head}, memory allocated (GB): {memory_allocated}, memory reserved (GB): {memory_reserved}' 26 | 27 | if logger is None: 28 | print(message) 29 | else: 30 | logger.log(msg=message, level=level) 31 | -------------------------------------------------------------------------------- /verl/utils/distributed.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Utilities for distributed training.""" 15 | import os 16 | 17 | 18 | def initialize_global_process_group(timeout_second=36000): 19 | import torch.distributed 20 | from datetime import timedelta 21 | torch.distributed.init_process_group('nccl', timeout=timedelta(seconds=timeout_second)) 22 | local_rank = int(os.environ["LOCAL_RANK"]) 23 | rank = int(os.environ["RANK"]) 24 | world_size = int(os.environ["WORLD_SIZE"]) 25 | 26 | if torch.distributed.is_initialized(): 27 | torch.cuda.set_device(local_rank) 28 | return local_rank, rank, world_size 29 | -------------------------------------------------------------------------------- /verl/utils/i.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /verl/utils/import_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | Utilities to check if packages are available. 16 | We assume package availability won't change during runtime. 17 | """ 18 | 19 | from functools import cache 20 | from typing import List 21 | 22 | 23 | @cache 24 | def is_megatron_core_available(): 25 | try: 26 | from megatron.core import parallel_state as mpu 27 | return True 28 | except ImportError: 29 | return False 30 | 31 | 32 | @cache 33 | def is_vllm_available(): 34 | try: 35 | import vllm 36 | return True 37 | except ImportError: 38 | return False 39 | 40 | 41 | def import_external_libs(external_libs=None): 42 | if external_libs is None: 43 | return 44 | if not isinstance(external_libs, List): 45 | external_libs = [external_libs] 46 | import importlib 47 | for external_lib in external_libs: 48 | importlib.import_module(external_lib) 49 | -------------------------------------------------------------------------------- /verl/utils/init.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /verl/utils/logger/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/utils/logger/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/logger/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /verl/utils/logger/__pycache__/aggregate_logger.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/logger/__pycache__/aggregate_logger.cpython-39.pyc -------------------------------------------------------------------------------- /verl/utils/logger/aggregate_logger.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | A Ray logger will receive logging info from different processes. 16 | """ 17 | import numbers 18 | from typing import Dict 19 | 20 | 21 | def concat_dict_to_str(dict: Dict, step): 22 | output = [f'step:{step}'] 23 | for k, v in dict.items(): 24 | if isinstance(v, numbers.Number): 25 | output.append(f'{k}:{v:.3f}') 26 | output_str = ' - '.join(output) 27 | return output_str 28 | 29 | 30 | class LocalLogger: 31 | 32 | def __init__(self, remote_logger=None, enable_wandb=False, print_to_console=False): 33 | self.print_to_console = print_to_console 34 | if print_to_console: 35 | print('Using LocalLogger is deprecated. The constructor API will change ') 36 | 37 | def flush(self): 38 | pass 39 | 40 | def log(self, data, step): 41 | if self.print_to_console: 42 | print(concat_dict_to_str(data, step=step), flush=True) -------------------------------------------------------------------------------- /verl/utils/logging_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import logging 16 | 17 | 18 | def set_basic_config(level): 19 | """ 20 | This function sets the global logging format and level. It will be called when import verl 21 | """ 22 | logging.basicConfig(format='%(levelname)s:%(asctime)s:%(message)s', level=level) 23 | -------------------------------------------------------------------------------- /verl/utils/megatron/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/utils/megatron/memory.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import torch 16 | 17 | 18 | class MemoryBuffer: 19 | 20 | def __init__(self, numel, numel_padded, dtype): 21 | self.numel = numel 22 | self.numel_padded = numel_padded 23 | self.dtype = dtype 24 | self.data = torch.zeros(self.numel_padded, 25 | dtype=self.dtype, 26 | device=torch.cuda.current_device(), 27 | requires_grad=False) 28 | 29 | def zero(self): 30 | """Reset the buffer to zero.""" 31 | self.data.zero_() 32 | 33 | def get(self, shape, start_index): 34 | """Return a tensor with the input `shape` as a view into the 35 | 1-D data starting at `start_index`.""" 36 | end_index = start_index + shape.numel() 37 | assert end_index <= self.numel, \ 38 | 'requested tensor is out of the buffer range.' 39 | buffer_tensor = self.data[start_index:end_index] 40 | buffer_tensor = buffer_tensor.view(shape) 41 | return buffer_tensor 42 | -------------------------------------------------------------------------------- /verl/utils/ray_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | Contains commonly used utilities for ray 16 | """ 17 | 18 | import ray 19 | 20 | import concurrent.futures 21 | 22 | 23 | def parallel_put(data_list, max_workers=None): 24 | 25 | def put_data(index, data): 26 | return index, ray.put(data) 27 | 28 | if max_workers is None: 29 | max_workers = min(len(data_list), 16) 30 | 31 | with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor: 32 | data_list_f = [executor.submit(put_data, i, data) for i, data in enumerate(data_list)] 33 | res_lst = [] 34 | for future in concurrent.futures.as_completed(data_list_f): 35 | res_lst.append(future.result()) 36 | 37 | # reorder based on index 38 | output = [None for _ in range(len(data_list))] 39 | for res in res_lst: 40 | index, data_ref = res 41 | output[index] = data_ref 42 | 43 | return output 44 | -------------------------------------------------------------------------------- /verl/utils/rendezvous/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/utils/reward_score/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/utils/reward_score/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/reward_score/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/reward_score/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/reward_score/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /verl/utils/reward_score/__pycache__/ast.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/reward_score/__pycache__/ast.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/reward_score/__pycache__/ast.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/reward_score/__pycache__/ast.cpython-39.pyc -------------------------------------------------------------------------------- /verl/utils/reward_score/__pycache__/countdown.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/reward_score/__pycache__/countdown.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/reward_score/__pycache__/countdown.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/reward_score/__pycache__/countdown.cpython-39.pyc -------------------------------------------------------------------------------- /verl/utils/reward_score/__pycache__/dentist_qa.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/reward_score/__pycache__/dentist_qa.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/reward_score/__pycache__/dentist_qa.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/reward_score/__pycache__/dentist_qa.cpython-39.pyc -------------------------------------------------------------------------------- /verl/utils/reward_score/__pycache__/gsm8k.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/reward_score/__pycache__/gsm8k.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/reward_score/__pycache__/gsm8k.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/reward_score/__pycache__/gsm8k.cpython-39.pyc -------------------------------------------------------------------------------- /verl/utils/reward_score/__pycache__/logiqa.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/reward_score/__pycache__/logiqa.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/reward_score/__pycache__/logiqa.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/reward_score/__pycache__/logiqa.cpython-39.pyc -------------------------------------------------------------------------------- /verl/utils/reward_score/__pycache__/math.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/reward_score/__pycache__/math.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/reward_score/__pycache__/math.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/reward_score/__pycache__/math.cpython-39.pyc -------------------------------------------------------------------------------- /verl/utils/reward_score/__pycache__/mednli.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/reward_score/__pycache__/mednli.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/reward_score/__pycache__/mednli.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/reward_score/__pycache__/mednli.cpython-39.pyc -------------------------------------------------------------------------------- /verl/utils/reward_score/__pycache__/multiply.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/reward_score/__pycache__/multiply.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/reward_score/__pycache__/multiply.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/reward_score/__pycache__/multiply.cpython-39.pyc -------------------------------------------------------------------------------- /verl/utils/version/version: -------------------------------------------------------------------------------- 1 | 0.1 -------------------------------------------------------------------------------- /verl/utils/workers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/utils/workers/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/workers/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/workers/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/workers/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /verl/utils/workers/__pycache__/fsdp_workers.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/workers/__pycache__/fsdp_workers.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/workers/__pycache__/fsdp_workers.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/workers/__pycache__/fsdp_workers.cpython-39.pyc -------------------------------------------------------------------------------- /verl/utils/workers/actor/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .base import BasePPOActor 16 | from .dp_actor import DataParallelPPOActor 17 | 18 | __all__ = ["BasePPOActor", "DataParallelPPOActor"] 19 | -------------------------------------------------------------------------------- /verl/utils/workers/actor/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/workers/actor/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/workers/actor/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/workers/actor/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /verl/utils/workers/actor/__pycache__/base.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/workers/actor/__pycache__/base.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/workers/actor/__pycache__/base.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/workers/actor/__pycache__/base.cpython-39.pyc -------------------------------------------------------------------------------- /verl/utils/workers/actor/__pycache__/dp_actor.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/workers/actor/__pycache__/dp_actor.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/workers/actor/__pycache__/dp_actor.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/workers/actor/__pycache__/dp_actor.cpython-39.pyc -------------------------------------------------------------------------------- /verl/utils/workers/critic/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .base import BasePPOCritic 16 | from .dp_critic import DataParallelPPOCritic 17 | 18 | __all__ = ["BasePPOCritic", "DataParallelPPOCritic"] 19 | -------------------------------------------------------------------------------- /verl/utils/workers/critic/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/workers/critic/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/workers/critic/__pycache__/base.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/workers/critic/__pycache__/base.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/workers/critic/__pycache__/dp_critic.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/workers/critic/__pycache__/dp_critic.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/workers/critic/base.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | Base class for a critic 16 | """ 17 | from abc import ABC, abstractmethod 18 | 19 | import torch 20 | 21 | from verl import DataProto 22 | 23 | __all__ = ['BasePPOCritic'] 24 | 25 | 26 | class BasePPOCritic(ABC): 27 | 28 | def __init__(self, config): 29 | super().__init__() 30 | self.config = config 31 | 32 | @abstractmethod 33 | def compute_values(self, data: DataProto) -> torch.Tensor: 34 | """Compute values""" 35 | pass 36 | 37 | @abstractmethod 38 | def update_critic(self, data: DataProto): 39 | """Update the critic""" 40 | pass 41 | -------------------------------------------------------------------------------- /verl/utils/workers/reward_model/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .base import BasePPORewardModel 16 | -------------------------------------------------------------------------------- /verl/utils/workers/reward_model/megatron/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .reward_model import MegatronRewardModel 16 | -------------------------------------------------------------------------------- /verl/utils/workers/rollout/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .base import BaseRollout 16 | from .naive import NaiveRollout 17 | from .hf_rollout import HFRollout 18 | 19 | __all__ = ["BaseRollout", "NaiveRollout", "HFRollout"] 20 | -------------------------------------------------------------------------------- /verl/utils/workers/rollout/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/workers/rollout/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/workers/rollout/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/workers/rollout/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /verl/utils/workers/rollout/__pycache__/base.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/workers/rollout/__pycache__/base.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/workers/rollout/__pycache__/base.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/workers/rollout/__pycache__/base.cpython-39.pyc -------------------------------------------------------------------------------- /verl/utils/workers/rollout/__pycache__/hf_rollout.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/workers/rollout/__pycache__/hf_rollout.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/workers/rollout/__pycache__/hf_rollout.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/workers/rollout/__pycache__/hf_rollout.cpython-39.pyc -------------------------------------------------------------------------------- /verl/utils/workers/rollout/__pycache__/tokenizer.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/workers/rollout/__pycache__/tokenizer.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/workers/rollout/__pycache__/tokenizer.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/workers/rollout/__pycache__/tokenizer.cpython-39.pyc -------------------------------------------------------------------------------- /verl/utils/workers/rollout/base.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from abc import ABC, abstractmethod 16 | from typing import Iterable, Union 17 | 18 | from verl import DataProto 19 | 20 | __all__ = ['BaseRollout'] 21 | 22 | 23 | class BaseRollout(ABC): 24 | 25 | def __init__(self): 26 | """ 27 | 28 | Args: 29 | dataloader: an Iterable of TensorDict that consistently generates prompts. Note that the dataloader 30 | should handle when the training stops. 31 | """ 32 | super().__init__() 33 | 34 | @abstractmethod 35 | def generate_sequences(self, prompts: DataProto) -> DataProto: 36 | """Generate sequences""" 37 | pass 38 | -------------------------------------------------------------------------------- /verl/utils/workers/rollout/naive/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .naive_rollout import NaiveRollout 16 | -------------------------------------------------------------------------------- /verl/utils/workers/rollout/naive/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/workers/rollout/naive/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/workers/rollout/naive/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/workers/rollout/naive/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /verl/utils/workers/rollout/naive/__pycache__/naive_rollout.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/workers/rollout/naive/__pycache__/naive_rollout.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/workers/rollout/naive/__pycache__/naive_rollout.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/workers/rollout/naive/__pycache__/naive_rollout.cpython-39.pyc -------------------------------------------------------------------------------- /verl/utils/workers/rollout/vllm_rollout/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .vllm_rollout import vLLMRollout -------------------------------------------------------------------------------- /verl/utils/workers/rollout/vllm_rollout/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/workers/rollout/vllm_rollout/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/workers/rollout/vllm_rollout/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/workers/rollout/vllm_rollout/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /verl/utils/workers/rollout/vllm_rollout/__pycache__/vllm_rollout.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/workers/rollout/vllm_rollout/__pycache__/vllm_rollout.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/workers/rollout/vllm_rollout/__pycache__/vllm_rollout.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/workers/rollout/vllm_rollout/__pycache__/vllm_rollout.cpython-39.pyc -------------------------------------------------------------------------------- /verl/utils/workers/sharding_manager/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from verl.utils.import_utils import is_vllm_available, is_megatron_core_available 16 | 17 | from .base import BaseShardingManager 18 | from .fsdp_ulysses import FSDPUlyssesShardingManager 19 | 20 | AllGatherPPModel = None 21 | 22 | if is_megatron_core_available() and is_vllm_available(): 23 | from .megatron_vllm import AllGatherPPModel, MegatronVLLMShardingManager 24 | elif AllGatherPPModel is not None: 25 | pass 26 | else: 27 | AllGatherPPModel = None 28 | MegatronVLLMShardingManager = None 29 | 30 | if is_vllm_available(): 31 | from .fsdp_vllm import FSDPVLLMShardingManager 32 | else: 33 | FSDPVLLMShardingManager = None 34 | -------------------------------------------------------------------------------- /verl/utils/workers/sharding_manager/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/workers/sharding_manager/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/workers/sharding_manager/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/workers/sharding_manager/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /verl/utils/workers/sharding_manager/__pycache__/base.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/workers/sharding_manager/__pycache__/base.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/workers/sharding_manager/__pycache__/base.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/workers/sharding_manager/__pycache__/base.cpython-39.pyc -------------------------------------------------------------------------------- /verl/utils/workers/sharding_manager/__pycache__/fsdp_ulysses.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/workers/sharding_manager/__pycache__/fsdp_ulysses.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/workers/sharding_manager/__pycache__/fsdp_ulysses.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/workers/sharding_manager/__pycache__/fsdp_ulysses.cpython-39.pyc -------------------------------------------------------------------------------- /verl/utils/workers/sharding_manager/__pycache__/fsdp_vllm.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/workers/sharding_manager/__pycache__/fsdp_vllm.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/workers/sharding_manager/__pycache__/fsdp_vllm.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/workers/sharding_manager/__pycache__/fsdp_vllm.cpython-39.pyc -------------------------------------------------------------------------------- /verl/utils/workers/sharding_manager/base.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | Sharding manager to implement HybridEngine 16 | """ 17 | 18 | from verl import DataProto 19 | 20 | 21 | class BaseShardingManager: 22 | 23 | def __enter__(self): 24 | pass 25 | 26 | def __exit__(self, exc_type, exc_value, traceback): 27 | pass 28 | 29 | def preprocess_data(self, data: DataProto) -> DataProto: 30 | return data 31 | 32 | def postprocess_data(self, data: DataProto) -> DataProto: 33 | return data 34 | -------------------------------------------------------------------------------- /verl/version/version: -------------------------------------------------------------------------------- 1 | 0.1 -------------------------------------------------------------------------------- /verl/workers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /verl/workers/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/workers/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /verl/workers/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/workers/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /verl/workers/__pycache__/fsdp_workers.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/workers/__pycache__/fsdp_workers.cpython-310.pyc -------------------------------------------------------------------------------- /verl/workers/__pycache__/fsdp_workers.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/workers/__pycache__/fsdp_workers.cpython-39.pyc -------------------------------------------------------------------------------- /verl/workers/actor/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .base import BasePPOActor 16 | from .dp_actor import DataParallelPPOActor 17 | 18 | __all__ = ["BasePPOActor", "DataParallelPPOActor"] 19 | -------------------------------------------------------------------------------- /verl/workers/actor/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/workers/actor/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /verl/workers/actor/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/workers/actor/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /verl/workers/actor/__pycache__/base.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/workers/actor/__pycache__/base.cpython-310.pyc -------------------------------------------------------------------------------- /verl/workers/actor/__pycache__/base.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/workers/actor/__pycache__/base.cpython-39.pyc -------------------------------------------------------------------------------- /verl/workers/actor/__pycache__/dp_actor.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/workers/actor/__pycache__/dp_actor.cpython-310.pyc -------------------------------------------------------------------------------- /verl/workers/actor/__pycache__/dp_actor.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/workers/actor/__pycache__/dp_actor.cpython-39.pyc -------------------------------------------------------------------------------- /verl/workers/critic/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .base import BasePPOCritic 16 | from .dp_critic import DataParallelPPOCritic 17 | 18 | __all__ = ["BasePPOCritic", "DataParallelPPOCritic"] 19 | -------------------------------------------------------------------------------- /verl/workers/critic/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/workers/critic/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /verl/workers/critic/__pycache__/base.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/workers/critic/__pycache__/base.cpython-310.pyc -------------------------------------------------------------------------------- /verl/workers/critic/__pycache__/dp_critic.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/workers/critic/__pycache__/dp_critic.cpython-310.pyc -------------------------------------------------------------------------------- /verl/workers/critic/base.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | Base class for a critic 16 | """ 17 | from abc import ABC, abstractmethod 18 | 19 | import torch 20 | 21 | from verl import DataProto 22 | 23 | __all__ = ['BasePPOCritic'] 24 | 25 | 26 | class BasePPOCritic(ABC): 27 | 28 | def __init__(self, config): 29 | super().__init__() 30 | self.config = config 31 | 32 | @abstractmethod 33 | def compute_values(self, data: DataProto) -> torch.Tensor: 34 | """Compute values""" 35 | pass 36 | 37 | @abstractmethod 38 | def update_critic(self, data: DataProto): 39 | """Update the critic""" 40 | pass 41 | -------------------------------------------------------------------------------- /verl/workers/reward_model/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .base import BasePPORewardModel 16 | -------------------------------------------------------------------------------- /verl/workers/reward_model/megatron/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .reward_model import MegatronRewardModel 16 | -------------------------------------------------------------------------------- /verl/workers/rollout/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .base import BaseRollout 16 | from .naive import NaiveRollout 17 | from .hf_rollout import HFRollout 18 | 19 | __all__ = ["BaseRollout", "NaiveRollout", "HFRollout"] 20 | -------------------------------------------------------------------------------- /verl/workers/rollout/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/workers/rollout/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /verl/workers/rollout/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/workers/rollout/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /verl/workers/rollout/__pycache__/base.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/workers/rollout/__pycache__/base.cpython-310.pyc -------------------------------------------------------------------------------- /verl/workers/rollout/__pycache__/base.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/workers/rollout/__pycache__/base.cpython-39.pyc -------------------------------------------------------------------------------- /verl/workers/rollout/__pycache__/hf_rollout.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/workers/rollout/__pycache__/hf_rollout.cpython-310.pyc -------------------------------------------------------------------------------- /verl/workers/rollout/__pycache__/hf_rollout.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/workers/rollout/__pycache__/hf_rollout.cpython-39.pyc -------------------------------------------------------------------------------- /verl/workers/rollout/__pycache__/tokenizer.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/workers/rollout/__pycache__/tokenizer.cpython-310.pyc -------------------------------------------------------------------------------- /verl/workers/rollout/__pycache__/tokenizer.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/workers/rollout/__pycache__/tokenizer.cpython-39.pyc -------------------------------------------------------------------------------- /verl/workers/rollout/base.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from abc import ABC, abstractmethod 16 | from typing import Iterable, Union 17 | 18 | from verl import DataProto 19 | 20 | __all__ = ['BaseRollout'] 21 | 22 | 23 | class BaseRollout(ABC): 24 | 25 | def __init__(self): 26 | """ 27 | 28 | Args: 29 | dataloader: an Iterable of TensorDict that consistently generates prompts. Note that the dataloader 30 | should handle when the training stops. 31 | """ 32 | super().__init__() 33 | 34 | @abstractmethod 35 | def generate_sequences(self, prompts: DataProto) -> DataProto: 36 | """Generate sequences""" 37 | pass 38 | -------------------------------------------------------------------------------- /verl/workers/rollout/naive/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .naive_rollout import NaiveRollout 16 | -------------------------------------------------------------------------------- /verl/workers/rollout/naive/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/workers/rollout/naive/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /verl/workers/rollout/naive/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/workers/rollout/naive/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /verl/workers/rollout/naive/__pycache__/naive_rollout.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/workers/rollout/naive/__pycache__/naive_rollout.cpython-310.pyc -------------------------------------------------------------------------------- /verl/workers/rollout/naive/__pycache__/naive_rollout.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/workers/rollout/naive/__pycache__/naive_rollout.cpython-39.pyc -------------------------------------------------------------------------------- /verl/workers/rollout/vllm_rollout/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .vllm_rollout import vLLMRollout -------------------------------------------------------------------------------- /verl/workers/rollout/vllm_rollout/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/workers/rollout/vllm_rollout/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /verl/workers/rollout/vllm_rollout/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/workers/rollout/vllm_rollout/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /verl/workers/rollout/vllm_rollout/__pycache__/vllm_rollout.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/workers/rollout/vllm_rollout/__pycache__/vllm_rollout.cpython-310.pyc -------------------------------------------------------------------------------- /verl/workers/rollout/vllm_rollout/__pycache__/vllm_rollout.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/workers/rollout/vllm_rollout/__pycache__/vllm_rollout.cpython-39.pyc -------------------------------------------------------------------------------- /verl/workers/sharding_manager/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from verl.utils.import_utils import is_vllm_available, is_megatron_core_available 16 | 17 | from .base import BaseShardingManager 18 | from .fsdp_ulysses import FSDPUlyssesShardingManager 19 | 20 | AllGatherPPModel = None 21 | 22 | if is_megatron_core_available() and is_vllm_available(): 23 | from .megatron_vllm import AllGatherPPModel, MegatronVLLMShardingManager 24 | elif AllGatherPPModel is not None: 25 | pass 26 | else: 27 | AllGatherPPModel = None 28 | MegatronVLLMShardingManager = None 29 | 30 | if is_vllm_available(): 31 | from .fsdp_vllm import FSDPVLLMShardingManager 32 | else: 33 | FSDPVLLMShardingManager = None 34 | -------------------------------------------------------------------------------- /verl/workers/sharding_manager/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/workers/sharding_manager/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /verl/workers/sharding_manager/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/workers/sharding_manager/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /verl/workers/sharding_manager/__pycache__/base.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/workers/sharding_manager/__pycache__/base.cpython-310.pyc -------------------------------------------------------------------------------- /verl/workers/sharding_manager/__pycache__/base.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/workers/sharding_manager/__pycache__/base.cpython-39.pyc -------------------------------------------------------------------------------- /verl/workers/sharding_manager/__pycache__/fsdp_ulysses.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/workers/sharding_manager/__pycache__/fsdp_ulysses.cpython-310.pyc -------------------------------------------------------------------------------- /verl/workers/sharding_manager/__pycache__/fsdp_ulysses.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/workers/sharding_manager/__pycache__/fsdp_ulysses.cpython-39.pyc -------------------------------------------------------------------------------- /verl/workers/sharding_manager/__pycache__/fsdp_vllm.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/workers/sharding_manager/__pycache__/fsdp_vllm.cpython-310.pyc -------------------------------------------------------------------------------- /verl/workers/sharding_manager/__pycache__/fsdp_vllm.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/workers/sharding_manager/__pycache__/fsdp_vllm.cpython-39.pyc -------------------------------------------------------------------------------- /verl/workers/sharding_manager/base.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | Sharding manager to implement HybridEngine 16 | """ 17 | 18 | from verl import DataProto 19 | 20 | 21 | class BaseShardingManager: 22 | 23 | def __enter__(self): 24 | pass 25 | 26 | def __exit__(self, exc_type, exc_value, traceback): 27 | pass 28 | 29 | def preprocess_data(self, data: DataProto) -> DataProto: 30 | return data 31 | 32 | def postprocess_data(self, data: DataProto) -> DataProto: 33 | return data 34 | --------------------------------------------------------------------------------