├── .DS_Store
├── LICENSE
├── README.md
├── Synthetic_Data_RL.pdf
├── TinyZero
    ├── .DS_Store
    ├── Notice.txt
    ├── docker
    │   ├── Dockerfile.ngc.vllm
    │   └── Dockerfile.vemlp.vllm.te
    ├── docs
    │   ├── .DS_Store
    │   ├── Makefile
    │   ├── README.md
    │   ├── _static
    │   │   └── logo.png
    │   ├── advance
    │   │   ├── dpo_extension.rst
    │   │   ├── fsdp_extension.rst
    │   │   ├── megatron_extension.rst
    │   │   └── placement.rst
    │   ├── conf.py
    │   ├── examples
    │   │   ├── config.rst
    │   │   ├── gsm8k_example.rst
    │   │   └── ppo_code_architecture.rst
    │   ├── experiment
    │   │   └── ppo.rst
    │   ├── faq
    │   │   └── faq.rst
    │   ├── index.rst
    │   ├── preparation
    │   │   ├── prepare_data.rst
    │   │   └── reward_function.rst
    │   ├── requirements-docs.txt
    │   ├── start
    │   │   ├── install.rst
    │   │   └── quickstart.rst
    │   └── workers
    │   │   ├── fsdp_workers.rst
    │   │   ├── megatron_workers.rst
    │   │   └── ray_trainer.rst
    ├── examples
    │   ├── .DS_Store
    │   ├── data_preprocess
    │   │   ├── .DS_Store
    │   │   ├── BM25_retriever.py
    │   │   ├── __pycache__
    │   │   │   └── BM25_retriever.cpython-310.pyc
    │   │   ├── arth.py
    │   │   ├── bfcl_simple.py
    │   │   ├── countdown.py
    │   │   ├── dentist_book.pt
    │   │   ├── dentist_qa.py
    │   │   ├── full_hh_rlhf.py
    │   │   ├── gsm8k.py
    │   │   ├── hellaswag.py
    │   │   ├── logiqa.py
    │   │   ├── math_dataset.py
    │   │   ├── mednli.py
    │   │   └── multiply.py
    │   ├── generation
    │   │   └── run_deepseek_v2_lite_math.sh
    │   ├── grpo_trainer
    │   │   ├── run_deepseek7b_llm.sh
    │   │   ├── run_deepseek7b_llm_seq_balance.sh
    │   │   ├── run_qwen2-7b.sh
    │   │   └── run_qwen2-7b_seq_balance.sh
    │   ├── ppo_trainer
    │   │   ├── run_deepseek7b_llm.sh
    │   │   ├── run_deepseek7b_llm_sp2.sh
    │   │   ├── run_deepseek_full_hh_rlhf.sh
    │   │   ├── run_deepseek_math_gsm8k_megatron.sh
    │   │   ├── run_deepseek_megatron.sh
    │   │   ├── run_gemma.sh
    │   │   ├── run_qwen2-7b.sh
    │   │   ├── run_qwen2-7b_rm.sh
    │   │   ├── run_qwen2-7b_rm_seq_balance.sh
    │   │   ├── run_qwen2-7b_seq_balance.sh
    │   │   ├── run_qwen2.5-32b.sh
    │   │   └── verl_getting_started.ipynb
    │   ├── ray
    │   │   └── tutorial.ipynb
    │   ├── sft
    │   │   └── gsm8k
    │   │   │   ├── run_deepseek_6b7.sh
    │   │   │   ├── run_gemma_2b.sh
    │   │   │   └── run_gemma_7b.sh
    │   └── split_placement
    │   │   ├── README.md
    │   │   ├── config
    │   │       └── ppo_trainer_split.yaml
    │   │   ├── main_ppo_split.py
    │   │   ├── run_deepseek7b_llm.sh
    │   │   └── split_monkey_patch.py
    ├── init.py
    ├── patches
    │   └── megatron_v4.patch
    ├── requirements.txt
    ├── retriever.py
    ├── scripts
    │   ├── format.sh
    │   ├── train_tiny_a100_grpo.sh
    │   ├── train_tiny_zero.sh
    │   ├── train_tiny_zero_a100_grpo.sh
    │   └── train_tiny_zero_a100_grpo_14b.sh
    ├── setup.py
    ├── test_results.py
    ├── tests
    │   ├── .DS_Store
    │   ├── __init__.py
    │   ├── e2e
    │   │   ├── __init__.py
    │   │   ├── arithmetic_sequence
    │   │   │   ├── data
    │   │   │   │   ├── create_dataset.py
    │   │   │   │   ├── test.parquet
    │   │   │   │   └── train.parquet
    │   │   │   ├── model
    │   │   │   │   ├── config.json
    │   │   │   │   ├── create_model_tokenizer.py
    │   │   │   │   ├── generation_config.json
    │   │   │   │   ├── model.safetensors
    │   │   │   │   └── tokenizer_config.json
    │   │   │   └── rl
    │   │   │   │   ├── README.md
    │   │   │   │   ├── config
    │   │   │   │       └── ray_trainer.yaml
    │   │   │   │   └── main_trainer.py
    │   │   ├── check_results.py
    │   │   ├── envs
    │   │   │   ├── __init__.py
    │   │   │   └── digit_completion
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── task.py
    │   │   │   │   └── tokenizer.py
    │   │   ├── run_qwen_gsm8k_function_rm.sh
    │   │   ├── run_qwen_gsm8k_function_rm_no_rmpad.sh
    │   │   ├── run_qwen_gsm8k_model_rm.sh
    │   │   ├── run_qwen_gsm8k_model_rm_no_rmpad.sh
    │   │   ├── run_qwen_gsm8k_model_rm_seq_balance.sh
    │   │   ├── run_qwen_gsm8k_model_rm_ulysses.sh
    │   │   ├── run_ray_trainer.sh
    │   │   └── run_ray_trainer_rmpad.sh
    │   ├── gpu_utility
    │   │   ├── test_memory_buffers.py
    │   │   ├── test_ops.py
    │   │   └── test_torch_functional.py
    │   ├── model
    │   │   ├── test_transformer.py
    │   │   └── test_transformers_ulysses.py
    │   ├── ray
    │   │   ├── check_worker_alive
    │   │   │   └── main.py
    │   │   ├── detached_worker
    │   │   │   ├── README.md
    │   │   │   ├── client.py
    │   │   │   ├── run.sh
    │   │   │   └── server.py
    │   │   ├── test_check_worker_alive.py
    │   │   ├── test_colocated_workers.py
    │   │   ├── test_data_transfer.py
    │   │   ├── test_driverfunc_to_worker.py
    │   │   ├── test_high_level_scheduling_api.py
    │   │   ├── test_ray_local_envs.py
    │   │   ├── test_rvdz.py
    │   │   ├── test_worker_group_basics.py
    │   │   └── test_worker_group_torch.py
    │   ├── rollout
    │   │   ├── run_fsdp_vllm.py
    │   │   └── test_vllm_hf_loader.py
    │   ├── sanity
    │   │   ├── check_license.py
    │   │   └── test_import.py
    │   ├── utility
    │   │   └── test_tensor_dict_utilities.py
    │   └── verl
    │   │   └── utils
    │   │       └── dataset
    │   │           ├── test_rl_dataset.py
    │   │           ├── test_rm_dataset.py
    │   │           └── test_sft_dataset.py
    ├── train_RL_base.sh
    ├── train_SFT_base.sh
    ├── train_base.sh
    └── verl.egg-info
    │   ├── PKG-INFO
    │   ├── SOURCES.txt
    │   ├── dependency_links.txt
    │   ├── requires.txt
    │   └── top_level.txt
├── activate.sh
├── img
    ├── Overviewv2.png
    ├── final-one.png
    ├── final1.4.png
    └── final2.4.png
├── requirements.txt
├── src
    ├── .DS_Store
    ├── data_generator
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-310.pyc
    │   │   ├── __init__.cpython-39.pyc
    │   │   └── generator.cpython-39.pyc
    │   └── generator.py
    ├── eval
    │   ├── .DS_Store
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   └── __init__.cpython-39.pyc
    │   ├── model_eval.py
    │   └── tasks
    │   │   ├── .DS_Store
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │       ├── __init__.cpython-39.pyc
    │   │       └── task_manager.cpython-39.pyc
    │   │   ├── cfa
    │   │       ├── __init__.py
    │   │       ├── eval_function.py
    │   │       ├── get_fixed_options.py
    │   │       ├── get_input_instruction.py
    │   │       ├── get_output_instruction.py
    │   │       ├── process_and_save_dataset.py
    │   │       ├── process_label.py
    │   │       └── process_prediction.py
    │   │   ├── cqa
    │   │       ├── __init__.py
    │   │       ├── eval_function.py
    │   │       ├── get_fixed_options.py
    │   │       ├── get_input_instruction.py
    │   │       ├── get_output_instruction.py
    │   │       ├── process_and_save_dataset.py
    │   │       ├── process_label.py
    │   │       ├── process_prediction.py
    │   │       └── test.tsv
    │   │   ├── gpqa
    │   │       ├── __init__.py
    │   │       ├── eval_function.py
    │   │       ├── get_fixed_options.py
    │   │       ├── get_input_instruction.py
    │   │       ├── get_output_instruction.py
    │   │       ├── process_and_save_dataset.py
    │   │       ├── process_label.py
    │   │       └── process_prediction.py
    │   │   ├── gsm8k
    │   │       ├── __init__.py
    │   │       ├── __pycache__
    │   │       │   ├── __init__.cpython-39.pyc
    │   │       │   ├── eval_function.cpython-39.pyc
    │   │       │   ├── get_output_instruction.cpython-39.pyc
    │   │       │   ├── process_and_save_dataset.cpython-39.pyc
    │   │       │   ├── process_label.cpython-39.pyc
    │   │       │   └── process_prediction.cpython-39.pyc
    │   │       ├── eval_function.py
    │   │       ├── get_input_instruction.py
    │   │       ├── get_output_instruction.py
    │   │       ├── process_and_save_dataset.py
    │   │       ├── process_label.py
    │   │       └── process_prediction.py
    │   │   ├── logiqa
    │   │       ├── __init__.py
    │   │       ├── __pycache__
    │   │       │   ├── __init__.cpython-39.pyc
    │   │       │   ├── eval_function.cpython-39.pyc
    │   │       │   ├── get_output_instruction.cpython-39.pyc
    │   │       │   ├── process_and_save_dataset.cpython-39.pyc
    │   │       │   ├── process_label.cpython-39.pyc
    │   │       │   └── process_prediction.cpython-39.pyc
    │   │       ├── eval_function.py
    │   │       ├── get_input_instruction.py
    │   │       ├── get_output_instruction.py
    │   │       ├── process_and_save_dataset.py
    │   │       ├── process_label.py
    │   │       └── process_prediction.py
    │   │   ├── math
    │   │       ├── __init__.py
    │   │       ├── __pycache__
    │   │       │   ├── __init__.cpython-39.pyc
    │   │       │   ├── eval_function.cpython-39.pyc
    │   │       │   ├── get_output_instruction.cpython-39.pyc
    │   │       │   ├── process_and_save_dataset.cpython-39.pyc
    │   │       │   ├── process_label.cpython-39.pyc
    │   │       │   └── process_prediction.cpython-39.pyc
    │   │       ├── eval_function.py
    │   │       ├── get_input_instruction.py
    │   │       ├── get_output_instruction.py
    │   │       ├── process_and_save_dataset.py
    │   │       ├── process_label.py
    │   │       └── process_prediction.py
    │   │   ├── mednli
    │   │       ├── __init__.py
    │   │       ├── eval_function.py
    │   │       ├── get_input_instruction.py
    │   │       ├── get_output_instruction.py
    │   │       ├── process_and_save_dataset.py
    │   │       ├── process_label.py
    │   │       └── process_prediction.py
    │   │   ├── medqa
    │   │       ├── __init__.py
    │   │       ├── eval_function.py
    │   │       ├── get_input_instruction.py
    │   │       ├── get_output_instruction.py
    │   │       ├── process_and_save_dataset.py
    │   │       ├── process_label.py
    │   │       └── process_prediction.py
    │   │   ├── task_manager.py
    │   │   └── test_to_sql
    │   │       ├── eval_function.py
    │   │       ├── process_label.py
    │   │       └── process_prediction.py
    ├── main.py
    ├── model_inference
    │   ├── __init__.py
    │   ├── batch_inference.py
    │   └── openai_call.py
    └── retriever
    │   ├── .DS_Store
    │   ├── BM25_retriever.py
    │   ├── __init__.py
    │   ├── __pycache__
    │       ├── BM25_retriever.cpython-39.pyc
    │       └── __init__.cpython-39.pyc
    │   └── passages
    │       ├── .DS_Store
    │       └── __init__.py
└── verl
    ├── __init__.py
    ├── __pycache__
        ├── __init__.cpython-310.pyc
        ├── __init__.cpython-39.pyc
        ├── protocol.cpython-310.pyc
        └── protocol.cpython-39.pyc
    ├── i.py
    ├── init.py
    ├── models
        ├── README.md
        ├── __init__.py
        ├── __pycache__
        │   ├── __init__.cpython-310.pyc
        │   ├── __init__.cpython-39.pyc
        │   ├── registry.cpython-310.pyc
        │   └── registry.cpython-39.pyc
        ├── llama
        │   ├── __init__.py
        │   └── megatron
        │   │   ├── __init__.py
        │   │   ├── checkpoint_utils
        │   │       ├── __init__.py
        │   │       ├── llama_loader.py
        │   │       └── llama_saver.py
        │   │   ├── layers
        │   │       ├── __init__.py
        │   │       ├── parallel_attention.py
        │   │       ├── parallel_decoder.py
        │   │       ├── parallel_linear.py
        │   │       ├── parallel_mlp.py
        │   │       └── parallel_rmsnorm.py
        │   │   └── modeling_llama_megatron.py
        ├── registry.py
        ├── transformers
        │   ├── __init__.py
        │   ├── llama.py
        │   ├── monkey_patch.py
        │   └── qwen2.py
        └── weight_loader_registry.py
    ├── protocol.py
    ├── single_controller
        ├── __init__.py
        ├── __pycache__
        │   ├── __init__.cpython-310.pyc
        │   └── __init__.cpython-39.pyc
        ├── base
        │   ├── __init__.py
        │   ├── __pycache__
        │   │   ├── __init__.cpython-310.pyc
        │   │   ├── __init__.cpython-39.pyc
        │   │   ├── decorator.cpython-310.pyc
        │   │   ├── decorator.cpython-39.pyc
        │   │   ├── worker.cpython-310.pyc
        │   │   ├── worker.cpython-39.pyc
        │   │   ├── worker_group.cpython-310.pyc
        │   │   └── worker_group.cpython-39.pyc
        │   ├── decorator.py
        │   ├── megatron
        │   │   ├── __init__.py
        │   │   ├── __pycache__
        │   │   │   ├── __init__.cpython-310.pyc
        │   │   │   ├── __init__.cpython-39.pyc
        │   │   │   ├── worker.cpython-310.pyc
        │   │   │   ├── worker.cpython-39.pyc
        │   │   │   ├── worker_group.cpython-310.pyc
        │   │   │   └── worker_group.cpython-39.pyc
        │   │   ├── worker.py
        │   │   └── worker_group.py
        │   ├── register_center
        │   │   ├── __init__.py
        │   │   ├── __pycache__
        │   │   │   ├── __init__.cpython-310.pyc
        │   │   │   ├── __init__.cpython-39.pyc
        │   │   │   ├── ray.cpython-310.pyc
        │   │   │   └── ray.cpython-39.pyc
        │   │   └── ray.py
        │   ├── worker.py
        │   └── worker_group.py
        ├── ray
        │   ├── __init__.py
        │   ├── __pycache__
        │   │   ├── __init__.cpython-310.pyc
        │   │   ├── __init__.cpython-39.pyc
        │   │   ├── base.cpython-310.pyc
        │   │   ├── base.cpython-39.pyc
        │   │   ├── megatron.cpython-310.pyc
        │   │   └── megatron.cpython-39.pyc
        │   ├── base.py
        │   └── megatron.py
        └── version
        │   └── version
    ├── third_party
        ├── __init__.py
        ├── __pycache__
        │   ├── __init__.cpython-310.pyc
        │   └── __init__.cpython-39.pyc
        └── vllm
        │   ├── __init__.py
        │   ├── __pycache__
        │       ├── __init__.cpython-310.pyc
        │       └── __init__.cpython-39.pyc
        │   ├── vllm_v_0_3_1
        │       ├── __init__.py
        │       ├── arg_utils.py
        │       ├── config.py
        │       ├── llm.py
        │       ├── llm_engine_sp.py
        │       ├── model_loader.py
        │       ├── model_runner.py
        │       ├── parallel_state.py
        │       ├── tokenizer.py
        │       ├── weight_loaders.py
        │       └── worker.py
        │   ├── vllm_v_0_4_2
        │       ├── __init__.py
        │       ├── arg_utils.py
        │       ├── config.py
        │       ├── dtensor_weight_loaders.py
        │       ├── hf_weight_loader.py
        │       ├── llm.py
        │       ├── llm_engine_sp.py
        │       ├── megatron_weight_loaders.py
        │       ├── model_loader.py
        │       ├── model_runner.py
        │       ├── parallel_state.py
        │       ├── spmd_gpu_executor.py
        │       ├── tokenizer.py
        │       └── worker.py
        │   ├── vllm_v_0_5_4
        │       ├── __init__.py
        │       ├── arg_utils.py
        │       ├── config.py
        │       ├── dtensor_weight_loaders.py
        │       ├── hf_weight_loader.py
        │       ├── llm.py
        │       ├── llm_engine_sp.py
        │       ├── megatron_weight_loaders.py
        │       ├── model_loader.py
        │       ├── model_runner.py
        │       ├── parallel_state.py
        │       ├── spmd_gpu_executor.py
        │       ├── tokenizer.py
        │       └── worker.py
        │   └── vllm_v_0_6_3
        │       ├── __init__.py
        │       ├── __pycache__
        │           ├── __init__.cpython-310.pyc
        │           ├── __init__.cpython-39.pyc
        │           ├── arg_utils.cpython-310.pyc
        │           ├── arg_utils.cpython-39.pyc
        │           ├── config.cpython-310.pyc
        │           ├── config.cpython-39.pyc
        │           ├── dtensor_weight_loaders.cpython-310.pyc
        │           ├── dtensor_weight_loaders.cpython-39.pyc
        │           ├── hf_weight_loader.cpython-310.pyc
        │           ├── hf_weight_loader.cpython-39.pyc
        │           ├── llm.cpython-310.pyc
        │           ├── llm.cpython-39.pyc
        │           ├── llm_engine_sp.cpython-310.pyc
        │           ├── llm_engine_sp.cpython-39.pyc
        │           ├── megatron_weight_loaders.cpython-310.pyc
        │           ├── megatron_weight_loaders.cpython-39.pyc
        │           ├── model_loader.cpython-310.pyc
        │           ├── model_loader.cpython-39.pyc
        │           ├── model_runner.cpython-310.pyc
        │           ├── model_runner.cpython-39.pyc
        │           ├── parallel_state.cpython-310.pyc
        │           ├── parallel_state.cpython-39.pyc
        │           ├── spmd_gpu_executor.cpython-310.pyc
        │           ├── spmd_gpu_executor.cpython-39.pyc
        │           ├── tokenizer.cpython-310.pyc
        │           ├── tokenizer.cpython-39.pyc
        │           ├── worker.cpython-310.pyc
        │           └── worker.cpython-39.pyc
        │       ├── arg_utils.py
        │       ├── config.py
        │       ├── dtensor_weight_loaders.py
        │       ├── hf_weight_loader.py
        │       ├── llm.py
        │       ├── llm_engine_sp.py
        │       ├── megatron_weight_loaders.py
        │       ├── model_loader.py
        │       ├── model_runner.py
        │       ├── parallel_state.py
        │       ├── spmd_gpu_executor.py
        │       ├── tokenizer.py
        │       └── worker.py
    ├── trainer
        ├── __init__.py
        ├── __pycache__
        │   ├── __init__.cpython-310.pyc
        │   ├── __init__.cpython-39.pyc
        │   ├── fsdp_sft_trainer.cpython-39.pyc
        │   ├── main_ppo.cpython-310.pyc
        │   └── main_ppo.cpython-39.pyc
        ├── config
        │   ├── evaluation.yaml
        │   ├── generation.yaml
        │   ├── ppo_megatron_trainer.yaml
        │   ├── ppo_trainer.yaml
        │   └── sft_trainer.yaml
        ├── fsdp_sft_trainer.py
        ├── main_eval.py
        ├── main_generation.py
        ├── main_ppo.py
        ├── ppo
        │   ├── __init__.py
        │   ├── __pycache__
        │   │   ├── __init__.cpython-310.pyc
        │   │   ├── __init__.cpython-39.pyc
        │   │   ├── core_algos.cpython-310.pyc
        │   │   ├── core_algos.cpython-39.pyc
        │   │   ├── ray_trainer.cpython-310.pyc
        │   │   └── ray_trainer.cpython-39.pyc
        │   ├── core_algos.py
        │   └── ray_trainer.py
        └── runtime_env.yaml
    ├── utils
        ├── __init__.py
        ├── __pycache__
        │   ├── __init__.cpython-310.pyc
        │   ├── __init__.cpython-39.pyc
        │   ├── ast.cpython-310.pyc
        │   ├── ast.cpython-39.pyc
        │   ├── countdown.cpython-310.pyc
        │   ├── countdown.cpython-39.pyc
        │   ├── dentist_qa.cpython-310.pyc
        │   ├── dentist_qa.cpython-39.pyc
        │   ├── distributed.cpython-39.pyc
        │   ├── flops_counter.cpython-310.pyc
        │   ├── flops_counter.cpython-39.pyc
        │   ├── fs.cpython-310.pyc
        │   ├── fs.cpython-39.pyc
        │   ├── fsdp_utils.cpython-310.pyc
        │   ├── fsdp_utils.cpython-39.pyc
        │   ├── gsm8k.cpython-310.pyc
        │   ├── gsm8k.cpython-39.pyc
        │   ├── hdfs_io.cpython-310.pyc
        │   ├── hdfs_io.cpython-39.pyc
        │   ├── import_utils.cpython-310.pyc
        │   ├── import_utils.cpython-39.pyc
        │   ├── logging_utils.cpython-310.pyc
        │   ├── logging_utils.cpython-39.pyc
        │   ├── logiqa.cpython-310.pyc
        │   ├── logiqa.cpython-39.pyc
        │   ├── math.cpython-310.pyc
        │   ├── math.cpython-39.pyc
        │   ├── mednli.cpython-310.pyc
        │   ├── mednli.cpython-39.pyc
        │   ├── model.cpython-310.pyc
        │   ├── model.cpython-39.pyc
        │   ├── multiply.cpython-310.pyc
        │   ├── multiply.cpython-39.pyc
        │   ├── py_functional.cpython-310.pyc
        │   ├── py_functional.cpython-39.pyc
        │   ├── seqlen_balancing.cpython-310.pyc
        │   ├── seqlen_balancing.cpython-39.pyc
        │   ├── tokenizer.cpython-310.pyc
        │   ├── tokenizer.cpython-39.pyc
        │   ├── torch_dtypes.cpython-310.pyc
        │   ├── torch_dtypes.cpython-39.pyc
        │   ├── torch_functional.cpython-310.pyc
        │   ├── torch_functional.cpython-39.pyc
        │   ├── tracking.cpython-310.pyc
        │   ├── tracking.cpython-39.pyc
        │   ├── ulysses.cpython-310.pyc
        │   └── ulysses.cpython-39.pyc
        ├── ast.py
        ├── config.py
        ├── countdown.py
        ├── dataset
        │   ├── README.md
        │   ├── __init__.py
        │   ├── __pycache__
        │   │   ├── __init__.cpython-310.pyc
        │   │   ├── __init__.cpython-39.pyc
        │   │   ├── rl_dataset.cpython-310.pyc
        │   │   ├── rl_dataset.cpython-39.pyc
        │   │   ├── rm_dataset.cpython-310.pyc
        │   │   ├── rm_dataset.cpython-39.pyc
        │   │   ├── sft_dataset.cpython-310.pyc
        │   │   └── sft_dataset.cpython-39.pyc
        │   ├── rl_dataset.py
        │   ├── rm_dataset.py
        │   └── sft_dataset.py
        ├── debug
        │   ├── __init__.py
        │   ├── __pycache__
        │   │   ├── __init__.cpython-310.pyc
        │   │   ├── __init__.cpython-39.pyc
        │   │   ├── performance.cpython-310.pyc
        │   │   └── performance.cpython-39.pyc
        │   ├── performance.py
        │   └── trajectory_tracker.py
        ├── dentist_qa.py
        ├── distributed.py
        ├── flops_counter.py
        ├── fs.py
        ├── fsdp_utils.py
        ├── gsm8k.py
        ├── gsm8k_2.py
        ├── hdfs_io.py
        ├── i.py
        ├── import_utils.py
        ├── init.py
        ├── logger
        │   ├── __init__.py
        │   ├── __pycache__
        │   │   ├── __init__.cpython-39.pyc
        │   │   └── aggregate_logger.cpython-39.pyc
        │   └── aggregate_logger.py
        ├── logging_utils.py
        ├── logiqa.py
        ├── math.py
        ├── mednli.py
        ├── megatron
        │   ├── __init__.py
        │   ├── memory.py
        │   ├── optimizer.py
        │   ├── optimizer_config.py
        │   ├── pipeline_parallel.py
        │   ├── sequence_parallel.py
        │   └── tensor_parallel.py
        ├── megatron_utils.py
        ├── memory_buffer.py
        ├── model.py
        ├── multiply.py
        ├── py_functional.py
        ├── ray_utils.py
        ├── rendezvous
        │   ├── __init__.py
        │   └── ray_backend.py
        ├── reward_score
        │   ├── __init__.py
        │   ├── __pycache__
        │   │   ├── __init__.cpython-310.pyc
        │   │   ├── __init__.cpython-39.pyc
        │   │   ├── ast.cpython-310.pyc
        │   │   ├── ast.cpython-39.pyc
        │   │   ├── countdown.cpython-310.pyc
        │   │   ├── countdown.cpython-39.pyc
        │   │   ├── dentist_qa.cpython-310.pyc
        │   │   ├── dentist_qa.cpython-39.pyc
        │   │   ├── gsm8k.cpython-310.pyc
        │   │   ├── gsm8k.cpython-39.pyc
        │   │   ├── logiqa.cpython-310.pyc
        │   │   ├── logiqa.cpython-39.pyc
        │   │   ├── math.cpython-310.pyc
        │   │   ├── math.cpython-39.pyc
        │   │   ├── mednli.cpython-310.pyc
        │   │   ├── mednli.cpython-39.pyc
        │   │   ├── multiply.cpython-310.pyc
        │   │   └── multiply.cpython-39.pyc
        │   ├── ast.py
        │   ├── countdown.py
        │   ├── dentist_qa.py
        │   ├── gsm8k.py
        │   ├── gsm8k_2.py
        │   ├── logiqa.py
        │   ├── math.py
        │   ├── mednli.py
        │   └── multiply.py
        ├── seqlen_balancing.py
        ├── tokenizer.py
        ├── torch_dtypes.py
        ├── torch_functional.py
        ├── tracking.py
        ├── ulysses.py
        ├── version
        │   └── version
        └── workers
        │   ├── __init__.py
        │   ├── __pycache__
        │       ├── __init__.cpython-310.pyc
        │       ├── __init__.cpython-39.pyc
        │       ├── fsdp_workers.cpython-310.pyc
        │       └── fsdp_workers.cpython-39.pyc
        │   ├── actor
        │       ├── __init__.py
        │       ├── __pycache__
        │       │   ├── __init__.cpython-310.pyc
        │       │   ├── __init__.cpython-39.pyc
        │       │   ├── base.cpython-310.pyc
        │       │   ├── base.cpython-39.pyc
        │       │   ├── dp_actor.cpython-310.pyc
        │       │   └── dp_actor.cpython-39.pyc
        │       ├── base.py
        │       ├── dp_actor.py
        │       └── megatron_actor.py
        │   ├── critic
        │       ├── __init__.py
        │       ├── __pycache__
        │       │   ├── __init__.cpython-310.pyc
        │       │   ├── base.cpython-310.pyc
        │       │   └── dp_critic.cpython-310.pyc
        │       ├── base.py
        │       ├── dp_critic.py
        │       └── megatron_critic.py
        │   ├── fsdp_workers.py
        │   ├── megatron_workers.py
        │   ├── reward_model
        │       ├── __init__.py
        │       ├── base.py
        │       └── megatron
        │       │   ├── __init__.py
        │       │   └── reward_model.py
        │   ├── rollout
        │       ├── __init__.py
        │       ├── __pycache__
        │       │   ├── __init__.cpython-310.pyc
        │       │   ├── __init__.cpython-39.pyc
        │       │   ├── base.cpython-310.pyc
        │       │   ├── base.cpython-39.pyc
        │       │   ├── hf_rollout.cpython-310.pyc
        │       │   ├── hf_rollout.cpython-39.pyc
        │       │   ├── tokenizer.cpython-310.pyc
        │       │   └── tokenizer.cpython-39.pyc
        │       ├── base.py
        │       ├── hf_rollout.py
        │       ├── naive
        │       │   ├── __init__.py
        │       │   ├── __pycache__
        │       │   │   ├── __init__.cpython-310.pyc
        │       │   │   ├── __init__.cpython-39.pyc
        │       │   │   ├── naive_rollout.cpython-310.pyc
        │       │   │   └── naive_rollout.cpython-39.pyc
        │       │   └── naive_rollout.py
        │       ├── tokenizer.py
        │       └── vllm_rollout
        │       │   ├── __init__.py
        │       │   ├── __pycache__
        │       │       ├── __init__.cpython-310.pyc
        │       │       ├── __init__.cpython-39.pyc
        │       │       ├── vllm_rollout.cpython-310.pyc
        │       │       └── vllm_rollout.cpython-39.pyc
        │       │   └── vllm_rollout.py
        │   └── sharding_manager
        │       ├── __init__.py
        │       ├── __pycache__
        │           ├── __init__.cpython-310.pyc
        │           ├── __init__.cpython-39.pyc
        │           ├── base.cpython-310.pyc
        │           ├── base.cpython-39.pyc
        │           ├── fsdp_ulysses.cpython-310.pyc
        │           ├── fsdp_ulysses.cpython-39.pyc
        │           ├── fsdp_vllm.cpython-310.pyc
        │           └── fsdp_vllm.cpython-39.pyc
        │       ├── base.py
        │       ├── fsdp_ulysses.py
        │       ├── fsdp_vllm.py
        │       └── megatron_vllm.py
    ├── version
        └── version
    └── workers
        ├── __init__.py
        ├── __pycache__
            ├── __init__.cpython-310.pyc
            ├── __init__.cpython-39.pyc
            ├── fsdp_workers.cpython-310.pyc
            └── fsdp_workers.cpython-39.pyc
        ├── actor
            ├── __init__.py
            ├── __pycache__
            │   ├── __init__.cpython-310.pyc
            │   ├── __init__.cpython-39.pyc
            │   ├── base.cpython-310.pyc
            │   ├── base.cpython-39.pyc
            │   ├── dp_actor.cpython-310.pyc
            │   └── dp_actor.cpython-39.pyc
            ├── base.py
            ├── dp_actor.py
            └── megatron_actor.py
        ├── critic
            ├── __init__.py
            ├── __pycache__
            │   ├── __init__.cpython-310.pyc
            │   ├── base.cpython-310.pyc
            │   └── dp_critic.cpython-310.pyc
            ├── base.py
            ├── dp_critic.py
            └── megatron_critic.py
        ├── fsdp_workers.py
        ├── megatron_workers.py
        ├── reward_model
            ├── __init__.py
            ├── base.py
            └── megatron
            │   ├── __init__.py
            │   └── reward_model.py
        ├── rollout
            ├── __init__.py
            ├── __pycache__
            │   ├── __init__.cpython-310.pyc
            │   ├── __init__.cpython-39.pyc
            │   ├── base.cpython-310.pyc
            │   ├── base.cpython-39.pyc
            │   ├── hf_rollout.cpython-310.pyc
            │   ├── hf_rollout.cpython-39.pyc
            │   ├── tokenizer.cpython-310.pyc
            │   └── tokenizer.cpython-39.pyc
            ├── base.py
            ├── hf_rollout.py
            ├── naive
            │   ├── __init__.py
            │   ├── __pycache__
            │   │   ├── __init__.cpython-310.pyc
            │   │   ├── __init__.cpython-39.pyc
            │   │   ├── naive_rollout.cpython-310.pyc
            │   │   └── naive_rollout.cpython-39.pyc
            │   └── naive_rollout.py
            ├── tokenizer.py
            └── vllm_rollout
            │   ├── __init__.py
            │   ├── __pycache__
            │       ├── __init__.cpython-310.pyc
            │       ├── __init__.cpython-39.pyc
            │       ├── vllm_rollout.cpython-310.pyc
            │       └── vllm_rollout.cpython-39.pyc
            │   └── vllm_rollout.py
        └── sharding_manager
            ├── __init__.py
            ├── __pycache__
                ├── __init__.cpython-310.pyc
                ├── __init__.cpython-39.pyc
                ├── base.cpython-310.pyc
                ├── base.cpython-39.pyc
                ├── fsdp_ulysses.cpython-310.pyc
                ├── fsdp_ulysses.cpython-39.pyc
                ├── fsdp_vllm.cpython-310.pyc
                └── fsdp_vllm.cpython-39.pyc
            ├── base.py
            ├── fsdp_ulysses.py
            ├── fsdp_vllm.py
            └── megatron_vllm.py


/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/.DS_Store


--------------------------------------------------------------------------------
/Synthetic_Data_RL.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/Synthetic_Data_RL.pdf


--------------------------------------------------------------------------------
/TinyZero/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/TinyZero/.DS_Store


--------------------------------------------------------------------------------
/TinyZero/Notice.txt:
--------------------------------------------------------------------------------
1 | Copyright 2023-2024 Bytedance Ltd. and/or its affiliates 


--------------------------------------------------------------------------------
/TinyZero/docker/Dockerfile.ngc.vllm:
--------------------------------------------------------------------------------
 1 | FROM nvcr.io/nvidia/pytorch:24.05-py3
 2 | 
 3 | # uninstall nv-pytorch fork
 4 | RUN pip3 uninstall pytorch-quantization \
 5 |      pytorch-triton \
 6 |      torch \
 7 |      torch-tensorrt \
 8 |      torchvision \
 9 |      xgboost transformer_engine flash_attn \
10 |      apex megatron-core -y
11 | 
12 | RUN pip3 install torch==2.4.0 torchvision==0.19.0 torchaudio==2.4.0 --index-url https://download.pytorch.org/whl/cu124
13 | 
14 | # make sure torch version is kept
15 | RUN pip3 install --no-cache-dir \
16 |     "torch==2.4.0" \
17 |     accelerate \
18 |     codetiming \
19 |     datasets \
20 |     dill \
21 |     hydra-core \
22 |     numpy \
23 |     pybind11 \
24 |     tensordict \
25 |     "transformers<=4.46.0"
26 | 
27 | # ray is installed via vllm
28 | RUN pip3 install --no-cache-dir vllm==0.6.3
29 | 
30 | # we choose flash-attn v2.7.0 or v2.7.2 which contain pre-built wheels
31 | RUN pip3 install --no-cache-dir --no-build-isolation flash-attn==2.7.0.post2
32 | 
33 | # install apex, set MAX_JOBS to avoid OOMs
34 | RUN MAX_JOBS=4 pip3 install -v --disable-pip-version-check --no-cache-dir --no-build-isolation \
35 |     --config-settings "--build-option=--cpp_ext" --config-settings "--build-option=--cuda_ext" \
36 |     git+https://github.com/NVIDIA/apex
37 | 
38 | # install Transformer Engine, which requires FA 2.5.8
39 | RUN MAX_JOBS=4 NINJA_FLAGS="-j4" pip3 install flash-attn==2.5.8 --no-cache-dir --no-build-isolation
40 | RUN MAX_JOBS=4 NINJA_FLAGS="-j4" pip3 install git+https://github.com/NVIDIA/TransformerEngine.git@v1.7
41 | 
42 | # Pin wandb to v0.18 since v0.19.1 is released with ImportError
43 | RUN pip3 install wandb==0.18.7 py-spy
44 | 


--------------------------------------------------------------------------------
/TinyZero/docs/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/TinyZero/docs/.DS_Store


--------------------------------------------------------------------------------
/TinyZero/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line.
 5 | SPHINXOPTS    =
 6 | SPHINXBUILD   = sphinx-build
 7 | SPHINXPROJ    = verl
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/TinyZero/docs/README.md:
--------------------------------------------------------------------------------
 1 | # veRL documents
 2 | 
 3 | ## Build the docs
 4 | 
 5 | ```bash
 6 | # Install dependencies.
 7 | pip install -r requirements-docs.txt
 8 | 
 9 | # Build the docs.
10 | make clean
11 | make html
12 | ```
13 | 
14 | ## Open the docs with your browser
15 | 
16 | ```bash
17 | python -m http.server -d _build/html/
18 | ```
19 | Launch your browser and open localhost:8000.


--------------------------------------------------------------------------------
/TinyZero/docs/_static/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/TinyZero/docs/_static/logo.png


--------------------------------------------------------------------------------
/TinyZero/docs/advance/placement.rst:
--------------------------------------------------------------------------------
 1 | Ray API Design Tutorial
 2 | =======================================
 3 | 
 4 | We provide a tutorial for our Ray API design, including:
 5 | 
 6 | - Ray basic concepts
 7 | - Resource Pool and RayWorkerGroup
 8 | - Data Dispatch, Execution and Collection
 9 | - Initialize the RayWorkerGroup and execute the distributed computation in the given Resource Pool
10 | 
11 | See details in `tutorial.ipynb <https://github.com/volcengine/verl/blob/main/examples/ray/tutorial.ipynb>`_.


--------------------------------------------------------------------------------
/TinyZero/docs/faq/faq.rst:
--------------------------------------------------------------------------------
 1 | Frequently Asked Questions
 2 | ====================================
 3 | 
 4 | Ray related
 5 | ------------
 6 | 
 7 | How to add breakpoint for debugging with distributed Ray?
 8 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 9 | 
10 | Please checkout the official debugging guide from Ray: https://docs.ray.io/en/latest/ray-observability/ray-distributed-debugger.html
11 | 
12 | 
13 | Distributed training
14 | ------------------------
15 | 
16 | How to run multi-node post-training with Ray?
17 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
18 | 
19 | You can start a ray cluster and submit a ray job, following the official guide from Ray: https://docs.ray.io/en/latest/ray-core/starting-ray.html
20 | 


--------------------------------------------------------------------------------
/TinyZero/docs/requirements-docs.txt:
--------------------------------------------------------------------------------
1 | # markdown suport
2 | recommonmark
3 | # markdown table suport
4 | sphinx-markdown-tables
5 | 
6 | # theme default rtd
7 | 
8 | # crate-docs-theme
9 | sphinx-rtd-theme


--------------------------------------------------------------------------------
/TinyZero/examples/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/TinyZero/examples/.DS_Store


--------------------------------------------------------------------------------
/TinyZero/examples/data_preprocess/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/TinyZero/examples/data_preprocess/.DS_Store


--------------------------------------------------------------------------------
/TinyZero/examples/data_preprocess/__pycache__/BM25_retriever.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/TinyZero/examples/data_preprocess/__pycache__/BM25_retriever.cpython-310.pyc


--------------------------------------------------------------------------------
/TinyZero/examples/data_preprocess/dentist_book.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/TinyZero/examples/data_preprocess/dentist_book.pt


--------------------------------------------------------------------------------
/TinyZero/examples/generation/run_deepseek_v2_lite_math.sh:
--------------------------------------------------------------------------------
 1 | python3 -m verl.trainer.main_generation \
 2 |     trainer.nnodes=1 \
 3 |     trainer.n_gpus_per_node=8 \
 4 |     data.path=~/data/rlhf/gsm8k/test.parquet \
 5 |     data.prompt_key=prompt \
 6 |     data.n_samples=1 \
 7 |     data.output_path=~/data/rlhf/math/deepseek_v2_lite_gen_test.parquet \
 8 |     model.path=deepseek-ai/deepseek-llm-7b-chat \
 9 |     +model.trust_remote_code=True \
10 |     rollout.temperature=1.0 \
11 |     rollout.top_k=50 \
12 |     rollout.top_p=0.7 \
13 |     rollout.prompt_length=2048 \
14 |     rollout.response_length=1024 \
15 |     rollout.tensor_model_parallel_size=2 \
16 |     rollout.gpu_memory_utilization=0.8
17 | 


--------------------------------------------------------------------------------
/TinyZero/examples/ppo_trainer/run_deepseek_megatron.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | python3 -m verl.trainer.main_ppo --config-path=./config --config-name='ppo_megatron_trainer'\
 4 |     data.train_files=$HOME/data/gsm8k/train.parquet \
 5 |     data.val_files=$HOME/data/gsm8k/test.parquet \
 6 |     data.train_batch_size=1024 \
 7 |     data.val_batch_size=1312 \
 8 |     data.max_prompt_length=512 \
 9 |     data.max_response_length=512 \
10 |     actor_rollout_ref.model.path=deepseek-ai/deepseek-coder-6.7b-instruct \
11 |     actor_rollout_ref.actor.optim.lr=2e-6 \
12 |     actor_rollout_ref.actor.ppo_mini_batch_size=256 \
13 |     actor_rollout_ref.actor.ppo_micro_batch_size=64 \
14 |     actor_rollout_ref.rollout.log_prob_micro_batch_size=64 \
15 |     actor_rollout_ref.rollout.tensor_model_parallel_size=2 \
16 |     actor_rollout_ref.rollout.name=vllm \
17 |     actor_rollout_ref.rollout.gpu_memory_utilization=0.5 \
18 |     actor_rollout_ref.ref.log_prob_micro_batch_size=128 \
19 |     critic.optim.lr=2e-5 \
20 |     critic.model.path=deepseek-ai/deepseek-coder-6.7b-instruct \
21 |     critic.model.enable_gradient_checkpointing=False \
22 |     critic.ppo_micro_batch_size=64 \
23 |     algorithm.kl_ctrl.kl_coef=0.001 \
24 |     trainer.critic_warmup=0 \
25 |     trainer.logger=['console','wandb'] \
26 |     trainer.project_name='verl_megatron_gsm8k_examples' \
27 |     trainer.experiment_name='deepseek_llm_7b_function_rm' \
28 |     trainer.n_gpus_per_node=8 \
29 |     trainer.nnodes=1 \
30 |     trainer.save_freq=-1 \
31 |     trainer.total_epochs=15 \
32 |     +trainer.val_before_train=False $@
33 | 


--------------------------------------------------------------------------------
/TinyZero/examples/sft/gsm8k/run_deepseek_6b7.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | hdfs_path=hdfs://user/verl/experiments/gsm8k/deepseek-coder-6.7b-instruct/ # replace to your own hdfs/local path
 4 | 
 5 | nproc_per_node=$1
 6 | 
 7 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \
 8 |      -m verl.trainer.fsdp_sft_trainer \
 9 |     data.train_files=$HOME/data/gsm8k/train.parquet \
10 |     data.val_files=$HOME/data/gsm8k/test.parquet \
11 |     data.prompt_key=prompt \
12 |     data.response_key=answer \
13 |     data.micro_batch_size=8 \
14 |     model.partial_pretrain=deepseek-ai/deepseek-coder-6.7b-instruct \
15 |     trainer.default_hdfs_dir=$hdfs_path \
16 |     trainer.project_name=gsm8k-sft \
17 |     trainer.experiment_name=gsm8k-sft-deepseek-coder-6.7b-instruct \
18 |     trainer.total_epochs=4 \
19 |     trainer.logger=['console','wandb']


--------------------------------------------------------------------------------
/TinyZero/examples/sft/gsm8k/run_gemma_2b.sh:
--------------------------------------------------------------------------------
 1 | # Tested with 2 & 4 GPUs
 2 | 
 3 | set -x
 4 | 
 5 | if [ "$#" -lt 2 ]; then
 6 |     echo "Usage: run_gemma_2b.sh <nproc_per_node> <save_path> [other_configs...]"
 7 |     exit 1
 8 | fi
 9 | 
10 | nproc_per_node=$1
11 | save_path=$2
12 | 
13 | # Shift the arguments so $@ refers to the rest
14 | shift 2
15 | 
16 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \
17 |      -m verl.trainer.fsdp_sft_trainer \
18 |     data.train_files=$HOME/data/gsm8k/train.parquet \
19 |     data.val_files=$HOME/data/gsm8k/test.parquet \
20 |     data.prompt_key=extra_info \
21 |     data.response_key=extra_info \
22 |     +data.prompt_dict_keys=['question'] \
23 |     +data.response_dict_keys=['answer'] \
24 |     data.micro_batch_size=8 \
25 |     model.partial_pretrain=google/gemma-2b-it \
26 |     trainer.default_local_dir=$save_path \
27 |     trainer.project_name=gsm8k-sft \
28 |     trainer.experiment_name=gsm8k-sft-gemma-2b-it \
29 |     trainer.total_epochs=2 \
30 |     trainer.logger=['console','wandb'] \
31 |     trainer.default_hdfs_dir=null $@


--------------------------------------------------------------------------------
/TinyZero/examples/sft/gsm8k/run_gemma_7b.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | hdfs_path=hdfs://user/verl/experiments/gsm8k/gemma-1.1-7b-it/ # replace to your own hdfs/local path
 4 | 
 5 | nproc_per_node=$1
 6 | 
 7 | torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \
 8 |      -m verl.trainer.fsdp_sft_trainer \
 9 |     data.train_files=$HOME/data/gsm8k/train.parquet \
10 |     data.val_files=$HOME/data/gsm8k/test.parquet \
11 |     data.prompt_key=prompt \
12 |     data.response_key=answer \
13 |     data.micro_batch_size=8 \
14 |     model.partial_pretrain=google/gemma-1.1-7b-it \
15 |     trainer.default_hdfs_dir=$hdfs_path \
16 |     trainer.project_name=gsm8k-sft \
17 |     trainer.experiment_name=gsm8k-sft-gemma-1.1-7b-it \
18 |     trainer.total_epochs=4 \
19 |     trainer.logger=['console','wandb']


--------------------------------------------------------------------------------
/TinyZero/init.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/TinyZero/requirements.txt:
--------------------------------------------------------------------------------
 1 | accelerate
 2 | codetiming
 3 | datasets
 4 | dill
 5 | flash-attn
 6 | hydra-core
 7 | numpy
 8 | pandas
 9 | pybind11
10 | ray
11 | tensordict<0.6
12 | transformers<4.48
13 | vllm<=0.6.3
14 | wandb
15 | 


--------------------------------------------------------------------------------
/TinyZero/scripts/format.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | pip3 install --upgrade yapf
3 | yapf -ir -vv --style ./.style.yapf verl tests single_controller examples


--------------------------------------------------------------------------------
/TinyZero/scripts/train_tiny_a100_grpo.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/TinyZero/scripts/train_tiny_a100_grpo.sh


--------------------------------------------------------------------------------
/TinyZero/scripts/train_tiny_zero.sh:
--------------------------------------------------------------------------------
 1 | python3 -m verl.trainer.main_ppo \
 2 | data.train_files=$DATA_DIR/train.parquet \
 3 | data.val_files=$DATA_DIR/test.parquet \
 4 | data.train_batch_size=256 \
 5 | data.val_batch_size=1312 \
 6 | data.max_prompt_length=256 \
 7 | data.max_response_length=1024 \
 8 | actor_rollout_ref.model.path=$BASE_MODEL \
 9 | actor_rollout_ref.actor.optim.lr=1e-6 \
10 | actor_rollout_ref.actor.ppo_mini_batch_size=128 \
11 | actor_rollout_ref.actor.ppo_micro_batch_size=8 \
12 | actor_rollout_ref.rollout.log_prob_micro_batch_size=8 \
13 | actor_rollout_ref.rollout.tensor_model_parallel_size=$ROLLOUT_TP_SIZE \
14 | actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \
15 | actor_rollout_ref.ref.log_prob_micro_batch_size=4 \
16 | critic.optim.lr=1e-5 \
17 | critic.model.path=$BASE_MODEL \
18 | critic.ppo_micro_batch_size=8 \
19 | algorithm.kl_ctrl.kl_coef=0.001 \
20 | trainer.logger=['wandb'] \
21 | +trainer.val_before_train=False \
22 | trainer.default_hdfs_dir=null \
23 | trainer.n_gpus_per_node=$N_GPUS \
24 | trainer.nnodes=1 \
25 | trainer.save_freq=100 \
26 | trainer.test_freq=100 \
27 | trainer.project_name=TinyZero \
28 | trainer.experiment_name=$EXPERIMENT_NAME \
29 | trainer.total_epochs=15 2>&1 | tee verl_demo.log
30 | 


--------------------------------------------------------------------------------
/TinyZero/tests/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/TinyZero/tests/.DS_Store


--------------------------------------------------------------------------------
/TinyZero/tests/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.


--------------------------------------------------------------------------------
/TinyZero/tests/e2e/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/TinyZero/tests/e2e/arithmetic_sequence/data/test.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/TinyZero/tests/e2e/arithmetic_sequence/data/test.parquet


--------------------------------------------------------------------------------
/TinyZero/tests/e2e/arithmetic_sequence/data/train.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/TinyZero/tests/e2e/arithmetic_sequence/data/train.parquet


--------------------------------------------------------------------------------
/TinyZero/tests/e2e/arithmetic_sequence/model/config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "architectures": [
 3 |     "LlamaForCausalLM"
 4 |   ],
 5 |   "attention_bias": false,
 6 |   "attention_dropout": 0.0,
 7 |   "bos_token_id": null,
 8 |   "eos_token_id": 1,
 9 |   "hidden_act": "silu",
10 |   "hidden_size": 128,
11 |   "initializer_range": 0.02,
12 |   "intermediate_size": 344,
13 |   "max_position_embeddings": 2048,
14 |   "mlp_bias": false,
15 |   "model_type": "llama",
16 |   "num_attention_heads": 4,
17 |   "num_hidden_layers": 4,
18 |   "num_key_value_heads": 4,
19 |   "pad_token_id": 2,
20 |   "pretraining_tp": 1,
21 |   "rms_norm_eps": 1e-06,
22 |   "rope_scaling": null,
23 |   "rope_theta": 10000.0,
24 |   "tie_word_embeddings": false,
25 |   "torch_dtype": "bfloat16",
26 |   "transformers_version": "4.43.3",
27 |   "use_cache": true,
28 |   "vocab_size": 16
29 | }
30 | 


--------------------------------------------------------------------------------
/TinyZero/tests/e2e/arithmetic_sequence/model/generation_config.json:
--------------------------------------------------------------------------------
1 | {
2 |   "_from_model_config": true,
3 |   "eos_token_id": 1,
4 |   "pad_token_id": 2,
5 |   "transformers_version": "4.43.3"
6 | }
7 | 


--------------------------------------------------------------------------------
/TinyZero/tests/e2e/arithmetic_sequence/model/model.safetensors:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/TinyZero/tests/e2e/arithmetic_sequence/model/model.safetensors


--------------------------------------------------------------------------------
/TinyZero/tests/e2e/arithmetic_sequence/model/tokenizer_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "char_ords": [
 3 |         48,
 4 |         49,
 5 |         50,
 6 |         51,
 7 |         52,
 8 |         53,
 9 |         54,
10 |         55,
11 |         56,
12 |         57,
13 |         44,
14 |         58
15 |     ],
16 |     "model_max_length": 2048,
17 |     "chat_template": "{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% set role = message['role'] %}{{ message['content'] }}{% endfor %}{% if add_generation_prompt %}{{ sep_token }}{% endif %}"
18 | }


--------------------------------------------------------------------------------
/TinyZero/tests/e2e/arithmetic_sequence/rl/README.md:
--------------------------------------------------------------------------------
 1 | # Digit completion
 2 | 
 3 | This is an example of solving a digit completion problem. The problem is defined as below:
 4 | 
 5 | The prompt is a sequence of numbers with fixed difference. The agent's goal is to complete the next N numbers.
 6 | If the max number is reached, the next number should be modulo with max number.
 7 | 
 8 | For example,
 9 | - prompt = [1, 2, 3]
10 | - N = 5
11 | - max_number = 6
12 | 
13 | The response should be [4, 5, 6, 7%6, 8%6] = [4, 5, 6, 0, 1].
14 | 
15 | # Environment definition
16 | 
17 | The core definition of the task is defined in verl/envs/digit_completion/task.py
18 | 
19 | It is highly recommended to take a look at it for better understanding.
20 | 
21 | 
22 | 
23 | # Run experiments
24 | 
25 | The users are required to specify the config path and config name (and the relative model config path to the current working directory)
26 | 
27 | ```bash
28 | # cd examples/arithmetic_sequence/rl
29 | 
30 | # Specify the config path and config name (current working dir)
31 | python3 -m verl.trainer.ppo.ray_megatron_train_synchronous --config-path=$(pwd)/config --config-name='ray_megatron'
32 | 
33 | # The default relative path of model config is 'config/model_config', if you want to change it, you can rewrite it in ray_megatron.yaml or using:
34 | python3 -m verl.trainer.ppo.ray_megatron_train_synchronous --config-path=$(pwd)/config --config-name='ray_megatron' ++model.base_path=config/model_config
35 | 
36 | ```
37 | 
38 | 


--------------------------------------------------------------------------------
/TinyZero/tests/e2e/envs/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .digit_completion import DigitCompletion
16 | 
17 | __all__ = ['DigitCompletion']


--------------------------------------------------------------------------------
/TinyZero/tests/e2e/envs/digit_completion/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .task import DigitCompletion, generate_ground_truth_response
16 | from .tokenizer import CharTokenizer
17 | 
18 | from transformers import AutoTokenizer, LlamaConfig
19 | 
20 | AutoTokenizer.register(LlamaConfig, CharTokenizer, exist_ok=True)
21 | 
22 | __all__ = ['DigitCompletion', 'generate_ground_truth_response', 'CharTokenizer']


--------------------------------------------------------------------------------
/TinyZero/tests/e2e/run_ray_trainer.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -e -x
 4 | 
 5 | OUTPUT_FILE="/tmp/output_ray_trainer.txt"
 6 | 
 7 | export PATH=$PATH:~/.local/bin
 8 | 
 9 | rm -rf $OUTPUT_FILE
10 | python3 tests/e2e/arithmetic_sequence/rl/main_trainer.py \
11 |     data.train_files=tests/e2e/arithmetic_sequence/data/train.parquet \
12 |     data.val_files=tests/e2e/arithmetic_sequence/data/test.parquet \
13 |     actor_rollout_ref.model.path=tests/e2e/arithmetic_sequence/model \
14 |     critic.model.path=tests/e2e/arithmetic_sequence/model | tee $OUTPUT_FILE;
15 | 
16 | python3 tests/e2e/check_results.py --output_file=$OUTPUT_FILE
17 | rm -rf $OUTPUT_FILE
18 | 


--------------------------------------------------------------------------------
/TinyZero/tests/e2e/run_ray_trainer_rmpad.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -e -x
 4 | 
 5 | python3 tests/e2e/arithmetic_sequence/rl/main_trainer.py \
 6 |     data.train_files=tests/e2e/arithmetic_sequence/data/train.parquet \
 7 |     data.val_files=tests/e2e/arithmetic_sequence/data/test.parquet \
 8 |     actor_rollout_ref.model.path=tests/e2e/arithmetic_sequence/model \
 9 |     actor_rollout_ref.rollout.name=vllm \
10 |     actor_rollout_ref.rollout.tensor_model_parallel_size=1 \
11 |     actor_rollout_ref.model.tokenizer_path=tests/e2e/arithmetic_sequence/model \
12 |     critic.model.path=Qwen/Qwen2.5-0.5B \
13 |     critic.model.use_remove_padding=True \
14 |     trainer.total_epochs=1


--------------------------------------------------------------------------------
/TinyZero/tests/ray/detached_worker/README.md:
--------------------------------------------------------------------------------
 1 | # Detached Worker
 2 | ## How to run (Only on a single node)
 3 | - Start a local ray cluster: 
 4 | ```bash
 5 | ray start --head --port=6379
 6 | ```
 7 | - Run the server
 8 | ```bash
 9 | python3 server.py
10 | ```
11 | - On another terminal, Run the client
12 | ```bash
13 | python3 client.py
14 | ```
15 | 


--------------------------------------------------------------------------------
/TinyZero/tests/ray/detached_worker/run.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | ray start --head --port=6379
3 | python3 server.py
4 | python3 client.py
5 | ray stop --force


--------------------------------------------------------------------------------
/TinyZero/tests/ray/test_check_worker_alive.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import time
16 | import os
17 | import subprocess
18 | 
19 | 
20 | def test():
21 |     wait_time = 10
22 | 
23 |     my_env = os.environ.copy()
24 |     my_env["WAIT_TIME"] = str(wait_time)
25 | 
26 |     p = subprocess.Popen(["python3", "-u", "./check_worker_alive/main.py"], env=my_env, stdout=subprocess.PIPE)
27 | 
28 |     count = 0
29 |     while b"foo started" not in p.stdout.read():
30 |         time.sleep(1)
31 |         count += 1
32 |         if count > 40:
33 |             raise RuntimeError("timeout for start foo in check_worker_alive/main.py")
34 | 
35 |     print(
36 |         time.time(),
37 |         f"wait 1.5 wait time {wait_time*1.5} to let signal returned to process but still not exceed process wait time")
38 |     time.sleep(wait_time * 1.5)
39 |     print(time.time(), f"start checking")
40 |     assert p.poll() is not None, f"process {p} still alive, expecting signal raised abort"
41 |     assert p.returncode != 0, f"process {p} exit with code 0, expecting not-zero exit code"
42 |     print(f"test passed")
43 | 
44 | 
45 | if __name__ == "__main__":
46 |     test()
47 | 


--------------------------------------------------------------------------------
/TinyZero/tests/sanity/check_license.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | license_head = "Copyright 2024 Bytedance Ltd. and/or its affiliates"
16 | 
17 | from pathlib import Path
18 | from argparse import ArgumentParser
19 | 
20 | if __name__ == '__main__':
21 |     parser = ArgumentParser()
22 |     parser.add_argument('--directory', '-d', required=True, type=str)
23 |     args = parser.parse_args()
24 |     directory_in_str = args.directory
25 | 
26 |     pathlist = Path(directory_in_str).glob('**/*.py')
27 |     for path in pathlist:
28 |         # because path is object not string
29 |         path_in_str = str(path.absolute())
30 |         with open(path_in_str, 'r') as f:
31 |             file_content = f.read()
32 | 
33 |             assert license_head in file_content, f'file {path_in_str} does not contain license'
34 | 
35 |         print(path_in_str)
36 | 


--------------------------------------------------------------------------------
/TinyZero/tests/sanity/test_import.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | def test_import():
17 |     import verl
18 |     print(verl.__version__)
19 | 
20 | 
21 | def test_single_controller_import():
22 |     import verl.single_controller
23 |     print(verl.single_controller.__version__)
24 | 


--------------------------------------------------------------------------------
/TinyZero/tests/verl/utils/dataset/test_rm_dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import os
15 | 
16 | from transformers import AutoTokenizer
17 | from verl.utils import hf_tokenizer
18 | from verl.utils.dataset.rm_dataset import RMDataset
19 | 
20 | 
21 | def get_rm_data():
22 |     # prepare test dataset
23 |     url = "https://github.com/eric-haibin-lin/verl-data/raw/refs/heads/main/full_hh_rlhf/rm/test.parquet"
24 |     local_folder = os.path.expanduser('~/verl-data/full_hh_rlhf/rm/')
25 |     local_path = os.path.join(local_folder, 'test.parquet')
26 |     os.makedirs(local_folder, exist_ok=True)
27 |     return local_path
28 | 
29 | 
30 | def test_rm_dataset():
31 |     tokenizer = hf_tokenizer("facebook/opt-1.3b")
32 |     local_path = get_rm_data()
33 |     dataset = RMDataset(parquet_files=local_path, tokenizer=tokenizer, max_length=512)
34 |     data = dataset[0]['input_ids']
35 |     output = tokenizer.batch_decode(data)
36 |     assert len(output) > 1
37 |     assert type(output[0]) == str
38 | 


--------------------------------------------------------------------------------
/TinyZero/train_RL_base.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # alias python='/home/weiji/anaconda3/envs/zero/bin/python'
 3 | # alias python3='/home/weiji/anaconda3/envs/zero/bin/python3'
 4 | # alias pip='/home/weiji/anaconda3/envs/zero/bin/pip'
 5 | dataset_path=$1
 6 | train_model_path=$2
 7 | save_model_path=$3
 8 | temperature=$4
 9 | rollout=$5
10 | batch_size=$6
11 | response_length=$7
12 | export N_GPUS=4
13 | export WANDB_API_KEY='xxx'
14 | #export WANDB_MODE=disabled
15 | export CUDA_VISIBLE_DEVICES=1,2,3,4
16 | 
17 | ray stop --force && ray start --head --include-dashboard=True
18 | 
19 | export BASE_MODEL="$train_model_path"
20 | export DATA_DIR="$dataset_path"
21 | export ROLLOUT_TP_SIZE=4
22 | export EXPERIMENT_NAME="$save_model_path"
23 | export VLLM_ATTENTION_BACKEND=XFORMERS
24 | 
25 | bash TinyZero/scripts/train_tiny_zero_a100_grpo.sh $temperature $rollout $batch_size $response_length
26 | #bash ./scripts/train_tiny_zero.sh
27 | 


--------------------------------------------------------------------------------
/TinyZero/train_SFT_base.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # alias python='/home/weiji/anaconda3/envs/zero/bin/python'
 3 | # alias python3='/home/weiji/anaconda3/envs/zero/bin/python3'
 4 | # alias pip='/home/weiji/anaconda3/envs/zero/bin/pip'
 5 | dataset_path=$1
 6 | train_model_path=$2
 7 | save_model_path=$3
 8 | batch_size=$4
 9 | max_length=$5
10 | 
11 | export N_GPUS=8
12 | export WANDB_API_KEY='xxx'
13 | # export WANDB_MODE=disabled
14 | export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
15 | 
16 | ray stop --force && ray start --head --include-dashboard=True
17 | 
18 | torchrun --standalone --nnodes=1 --nproc_per_node=$N_GPUS \
19 |     -m verl.trainer.fsdp_sft_trainer \
20 |     data.train_files="$dataset_path"/train.parquet \
21 |     data.val_files="$dataset_path"/test.parquet \
22 |     data.prompt_key=input \
23 |     data.max_length=$max_length \
24 |     data.response_key=output \
25 |     data.train_batch_size=$batch_size \
26 |     data.micro_batch_size=$N_GPUS \
27 |     model.partial_pretrain="$train_model_path" \
28 |     trainer.default_hdfs_dir="$save_model_path" \
29 |     trainer.logger=['console','wandb'] \
30 |     trainer.project_name=sft \
31 |     trainer.experiment_name="SFT_experiment" \
32 |     trainer.total_epochs=3 \
33 |     optim.lr=1e-6 \
34 |     optim.weight_decay=0.01
35 | 


--------------------------------------------------------------------------------
/TinyZero/train_base.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | dataset_path=$1
 3 | train_model_path=$2
 4 | save_model_path=$3
 5 | temperature=$4
 6 | rollout=$5
 7 | batch_size=$6
 8 | response_length=$7
 9 | export N_GPUS=4
10 | export WANDB_API_KEY='xxx'
11 | #export WANDB_MODE=disabled
12 | export CUDA_VISIBLE_DEVICES=1,2,3,4
13 | 
14 | ray stop --force && ray start --head --include-dashboard=True
15 | 
16 | export BASE_MODEL="$train_model_path"
17 | export DATA_DIR="$dataset_path"
18 | export ROLLOUT_TP_SIZE=4
19 | export EXPERIMENT_NAME="$save_model_path"
20 | export VLLM_ATTENTION_BACKEND=XFORMERS
21 | 
22 | bash TinyZero/scripts/train_tiny_zero_a100_grpo.sh $temperature $rollout $batch_size $response_length
23 | #bash ./scripts/train_tiny_zero.sh
24 | 


--------------------------------------------------------------------------------
/TinyZero/verl.egg-info/dependency_links.txt:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/TinyZero/verl.egg-info/requires.txt:
--------------------------------------------------------------------------------
 1 | accelerate
 2 | codetiming
 3 | datasets
 4 | dill
 5 | hydra-core
 6 | numpy
 7 | pybind11
 8 | ray
 9 | tensordict
10 | transformers<4.48
11 | vllm<=0.6.3
12 | 
13 | [test]
14 | pytest
15 | yapf
16 | 


--------------------------------------------------------------------------------
/TinyZero/verl.egg-info/top_level.txt:
--------------------------------------------------------------------------------
1 | tests
2 | verl
3 | 


--------------------------------------------------------------------------------
/activate.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Create and activate a virtual environment
 4 | #conda create --name myenv python=3.9
 5 | #pip install --upgrade pip
 6 | 
 7 | # Install PyTorch (optional, vLLM can install the correct version)
 8 | 
 9 | 
10 | # Install vLLM
11 | pip install vllm==0.6.3  # Change version if needed
12 | pip install ray
13 | 
14 | pip install tensordict
15 | 
16 | # Install verl
17 | pip install omegaconf
18 | pip install -e .
19 | pip install -r requirements.txt
20 | 
21 | # Install FlashAttention 2
22 | 
23 | # Install quality-of-life tools
24 | pip install wandb IPython matplotlib
25 | pip install openai anthropic tree_sitter
26 | pip install tenacity==8.2.2 pydantic==1.10.7 rank-bm25==0.2.2
27 | pip install -U "ray[default]"
28 | pip install "pydantic>=2"
29 | pip install huggingface_hub
30 | pip install torch==2.4.0 --index-url https://download.pytorch.org/whl/cu121
31 | pip install flash-attn==2.7.3
32 | 


--------------------------------------------------------------------------------
/img/Overviewv2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/img/Overviewv2.png


--------------------------------------------------------------------------------
/img/final-one.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/img/final-one.png


--------------------------------------------------------------------------------
/img/final1.4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/img/final1.4.png


--------------------------------------------------------------------------------
/img/final2.4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/img/final2.4.png


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | accelerate
 2 | codetiming
 3 | datasets
 4 | dill
 5 | flash-attn
 6 | hydra-core
 7 | numpy
 8 | pandas
 9 | pybind11
10 | ray
11 | tensordict<0.6
12 | transformers<4.48
13 | vllm<=0.6.3
14 | wandb
15 | 


--------------------------------------------------------------------------------
/src/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/src/.DS_Store


--------------------------------------------------------------------------------
/src/data_generator/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/src/data_generator/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/src/data_generator/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/src/data_generator/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/src/data_generator/__pycache__/__init__.cpython-39.pyc


--------------------------------------------------------------------------------
/src/data_generator/__pycache__/generator.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/src/data_generator/__pycache__/generator.cpython-39.pyc


--------------------------------------------------------------------------------
/src/eval/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/src/eval/.DS_Store


--------------------------------------------------------------------------------
/src/eval/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/src/eval/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/src/eval/__pycache__/__init__.cpython-39.pyc


--------------------------------------------------------------------------------
/src/eval/tasks/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/src/eval/tasks/.DS_Store


--------------------------------------------------------------------------------
/src/eval/tasks/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/src/eval/tasks/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/src/eval/tasks/__pycache__/__init__.cpython-39.pyc


--------------------------------------------------------------------------------
/src/eval/tasks/__pycache__/task_manager.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/src/eval/tasks/__pycache__/task_manager.cpython-39.pyc


--------------------------------------------------------------------------------
/src/eval/tasks/cfa/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/src/eval/tasks/cfa/__init__.py


--------------------------------------------------------------------------------
/src/eval/tasks/cfa/eval_function.py:
--------------------------------------------------------------------------------
1 | from verl.utils.reward_score.cfa import compute_score
2 | def eval_function(pred:str, label:str):
3 |     eval=compute_score(pred, label,valid=True)
4 |     if eval is None or eval==0:
5 |         return False
6 |     else:
7 |         return True
8 | 


--------------------------------------------------------------------------------
/src/eval/tasks/cfa/get_fixed_options.py:
--------------------------------------------------------------------------------
1 | def get_fixed_options():
2 |     return ['A', 'B', 'C']


--------------------------------------------------------------------------------
/src/eval/tasks/cfa/get_input_instruction.py:
--------------------------------------------------------------------------------
1 | def get_input_instruction():
2 |     return f"""Follow this format: 'Read the questions and answers carefully, and choose the one you think is appropriate among the three options A, B and C.' then Q:[Your question here] CHOICES: A: ...,B: ...,C: ..."""
3 | 


--------------------------------------------------------------------------------
/src/eval/tasks/cfa/get_output_instruction.py:
--------------------------------------------------------------------------------
1 | def get_output_instruction():
2 |     return 'Your output thinking process and answer should be enclosed within <think> </think> and <answer> </answer> tags, respectively, i.e., <think> thinking process here </think> <answer> a single option here </answer>. '
3 | 


--------------------------------------------------------------------------------
/src/eval/tasks/cfa/process_label.py:
--------------------------------------------------------------------------------
1 | import re
2 | 
3 | def process_label(label:str):
4 |     return label.strip()
5 | 


--------------------------------------------------------------------------------
/src/eval/tasks/cfa/process_prediction.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | def process_prediction(pred:str):
 3 |     answer_pattern = r'<answer>(.*?)</answer>'
 4 |     match = re.finditer(answer_pattern, pred)
 5 |     matches = list(match)
 6 |     if matches:
 7 |         final_str = matches[-1].group(1).strip()
 8 |         return final_str
 9 |     return None
10 |         
11 | 


--------------------------------------------------------------------------------
/src/eval/tasks/cqa/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/src/eval/tasks/cqa/__init__.py


--------------------------------------------------------------------------------
/src/eval/tasks/cqa/eval_function.py:
--------------------------------------------------------------------------------
1 | from verl.utils.reward_score.cqa import compute_score
2 | def eval_function(pred:str, label:str):
3 |     eval=compute_score(pred, label,valid=True)
4 |     if eval is None or eval==0:
5 |         return False
6 |     else:
7 |         return True
8 | 


--------------------------------------------------------------------------------
/src/eval/tasks/cqa/get_fixed_options.py:
--------------------------------------------------------------------------------
1 | def get_fixed_options():
2 |     return ['Yes', 'No']


--------------------------------------------------------------------------------
/src/eval/tasks/cqa/get_input_instruction.py:
--------------------------------------------------------------------------------
1 | def get_input_instruction():
2 |     return "Your input should consists of a contract passage like 'Contract:...' and then a yes-or-no question like 'Question:...'"


--------------------------------------------------------------------------------
/src/eval/tasks/cqa/get_output_instruction.py:
--------------------------------------------------------------------------------
1 | def get_output_instruction():
2 |     return 'Your output thinking process and answer should be enclosed within <think> </think> and <answer> </answer> tags, respectively, i.e., <think> thinking process here </think> <answer> Yes or No here </answer>. '
3 | 


--------------------------------------------------------------------------------
/src/eval/tasks/cqa/process_label.py:
--------------------------------------------------------------------------------
1 | import re
2 | 
3 | def process_label(label:str):
4 |     return label.strip()
5 | 


--------------------------------------------------------------------------------
/src/eval/tasks/cqa/process_prediction.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | def process_prediction(pred:str):
 3 |     answer_pattern = r'<answer>(.*?)</answer>'
 4 |     match = re.finditer(answer_pattern, pred)
 5 |     matches = list(match)
 6 |     if matches:
 7 |         final_str = matches[-1].group(1).strip()
 8 |         return final_str
 9 |     return None
10 |         
11 | 


--------------------------------------------------------------------------------
/src/eval/tasks/gpqa/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/src/eval/tasks/gpqa/__init__.py


--------------------------------------------------------------------------------
/src/eval/tasks/gpqa/get_fixed_options.py:
--------------------------------------------------------------------------------
1 | def get_fixed_options():
2 |     return ['A', 'B', 'C', 'D']


--------------------------------------------------------------------------------
/src/eval/tasks/gpqa/get_input_instruction.py:
--------------------------------------------------------------------------------
1 | def get_input_instruction():
2 |     return "Each data instance typically consists of a scientific question and 4 option labels and values are the corresponding answer texts." #"Your input should consists of a context passage like 'Context:...' and then a logic question like 'Question:...' and then ABCD four different options"


--------------------------------------------------------------------------------
/src/eval/tasks/gpqa/get_output_instruction.py:
--------------------------------------------------------------------------------
1 | def get_output_instruction():
2 |     return 'Your output thinking process and answer should be enclosed within <think> </think> and <answer> </answer> tags, respectively, i.e., <think> thinking process here </think> <answer> the correct option here </answer>. '
3 | 


--------------------------------------------------------------------------------
/src/eval/tasks/gpqa/process_label.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | def extract_solution(solution_str):
 3 |     # Remove everything before the first "Assistant:"
 4 | 
 5 |     answer_pattern = r'<answer>(.*?)</answer>'
 6 |     match = re.finditer(answer_pattern, solution_str)
 7 |     matches = list(match)
 8 |     if matches:
 9 |         final_answer = matches[-1].group(1).strip()
10 |     else:
11 |         final_answer = None
12 | #    if final_answer is not None:
13 |  #       try:
14 |  #           int_final_answer = int(final_answer)
15 |  #       except ValueError:
16 |  #           final_answer = None
17 |     return final_answer
18 | def process_label(label:str):
19 |         matches=re.findall(r'\b[A-D]\b', label)
20 |         if matches:
21 |             return matches[0]
22 |         return None
23 | 


--------------------------------------------------------------------------------
/src/eval/tasks/gpqa/process_prediction.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | def process_prediction(pred:str):
 3 |     answer_pattern = r'<answer>(.*?)</answer>'
 4 |     match = re.finditer(answer_pattern, pred)
 5 |     matches = list(match)
 6 |     if matches:
 7 |         final_str = matches[-1].group(1).strip()
 8 |         option_matches=re.findall(r'\b[A-D]\b', final_str)
 9 |         if option_matches:
10 |             return option_matches[0]
11 |     return None
12 |         #sens=pred.split('.')
13 |         #final_sens=[sen for sen in sens if 'final' in sen]
14 |     
15 |         
16 | 


--------------------------------------------------------------------------------
/src/eval/tasks/gsm8k/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/src/eval/tasks/gsm8k/__init__.py


--------------------------------------------------------------------------------
/src/eval/tasks/gsm8k/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/src/eval/tasks/gsm8k/__pycache__/__init__.cpython-39.pyc


--------------------------------------------------------------------------------
/src/eval/tasks/gsm8k/__pycache__/eval_function.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/src/eval/tasks/gsm8k/__pycache__/eval_function.cpython-39.pyc


--------------------------------------------------------------------------------
/src/eval/tasks/gsm8k/__pycache__/get_output_instruction.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/src/eval/tasks/gsm8k/__pycache__/get_output_instruction.cpython-39.pyc


--------------------------------------------------------------------------------
/src/eval/tasks/gsm8k/__pycache__/process_and_save_dataset.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/src/eval/tasks/gsm8k/__pycache__/process_and_save_dataset.cpython-39.pyc


--------------------------------------------------------------------------------
/src/eval/tasks/gsm8k/__pycache__/process_label.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/src/eval/tasks/gsm8k/__pycache__/process_label.cpython-39.pyc


--------------------------------------------------------------------------------
/src/eval/tasks/gsm8k/__pycache__/process_prediction.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/src/eval/tasks/gsm8k/__pycache__/process_prediction.cpython-39.pyc


--------------------------------------------------------------------------------
/src/eval/tasks/gsm8k/eval_function.py:
--------------------------------------------------------------------------------
 1 | from verl.utils.reward_score.gsm8k import compute_score
 2 | 
 3 | 
 4 | def eval_function(pred:str, label:str):
 5 |     eval=compute_score(pred, label,valid=True)
 6 |     if eval is None or eval==0:
 7 |         return False
 8 |     else:
 9 |         return True
10 |         '''
11 |         if pred is None or label is None:
12 |             return False
13 |         elif abs(float(pred)-float(label))>1e-3:
14 |             return False
15 |         else:
16 |             return True
17 |         '''
18 | 


--------------------------------------------------------------------------------
/src/eval/tasks/gsm8k/get_input_instruction.py:
--------------------------------------------------------------------------------
1 | def get_input_instruction():
2 |     return None


--------------------------------------------------------------------------------
/src/eval/tasks/gsm8k/get_output_instruction.py:
--------------------------------------------------------------------------------
1 | def get_output_instruction():
2 |     return "Let's think step by step and output the final answer after \"####\"."


--------------------------------------------------------------------------------
/src/eval/tasks/gsm8k/process_label.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | from eval.tasks.gsm8k.process_prediction import process_prediction
 3 | def process_label(label:str):
 4 |     try_1=process_prediction(label)
 5 |     if try_1 is not None:
 6 |         return try_1
 7 |     else:
 8 |         if len(label.split('\n\n'))>1:
 9 |           return process_prediction(label.split('\n\n')[-2])
10 |         else:
11 |           return
12 |     return label
13 | 


--------------------------------------------------------------------------------
/src/eval/tasks/logiqa/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/src/eval/tasks/logiqa/__init__.py


--------------------------------------------------------------------------------
/src/eval/tasks/logiqa/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/src/eval/tasks/logiqa/__pycache__/__init__.cpython-39.pyc


--------------------------------------------------------------------------------
/src/eval/tasks/logiqa/__pycache__/eval_function.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/src/eval/tasks/logiqa/__pycache__/eval_function.cpython-39.pyc


--------------------------------------------------------------------------------
/src/eval/tasks/logiqa/__pycache__/get_output_instruction.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/src/eval/tasks/logiqa/__pycache__/get_output_instruction.cpython-39.pyc


--------------------------------------------------------------------------------
/src/eval/tasks/logiqa/__pycache__/process_and_save_dataset.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/src/eval/tasks/logiqa/__pycache__/process_and_save_dataset.cpython-39.pyc


--------------------------------------------------------------------------------
/src/eval/tasks/logiqa/__pycache__/process_label.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/src/eval/tasks/logiqa/__pycache__/process_label.cpython-39.pyc


--------------------------------------------------------------------------------
/src/eval/tasks/logiqa/__pycache__/process_prediction.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/src/eval/tasks/logiqa/__pycache__/process_prediction.cpython-39.pyc


--------------------------------------------------------------------------------
/src/eval/tasks/logiqa/eval_function.py:
--------------------------------------------------------------------------------
1 | from verl.utils.reward_score.logiqa import compute_score
2 | def eval_function(pred:str, label:str):
3 |     eval=compute_score(pred, label,valid=True)
4 |     if eval is None or eval==0:
5 |         return False
6 |     else:
7 |         return True
8 | 


--------------------------------------------------------------------------------
/src/eval/tasks/logiqa/get_input_instruction.py:
--------------------------------------------------------------------------------
1 | def get_input_instruction():
2 |     return "Your input should consists of a context passage like 'Context:...' and then a logic question like 'Question:...' and then ABCD four different options"


--------------------------------------------------------------------------------
/src/eval/tasks/logiqa/get_output_instruction.py:
--------------------------------------------------------------------------------
1 | def get_output_instruction():
2 |     return 'Your output thinking process and answer should be enclosed within <think> </think> and <answer> </answer> tags, respectively, i.e., <think> thinking process here </think> <answer> the correct option here </answer>. '
3 | 


--------------------------------------------------------------------------------
/src/eval/tasks/logiqa/process_label.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | def extract_solution(solution_str):
 3 |     # Remove everything before the first "Assistant:"
 4 | 
 5 |     answer_pattern = r'<answer>(.*?)</answer>'
 6 |     match = re.finditer(answer_pattern, solution_str)
 7 |     matches = list(match)
 8 |     if matches:
 9 |         final_answer = matches[-1].group(1).strip()
10 |     else:
11 |         final_answer = None
12 | #    if final_answer is not None:
13 |  #       try:
14 |  #           int_final_answer = int(final_answer)
15 |  #       except ValueError:
16 |  #           final_answer = None
17 |     return final_answer
18 | def process_label(label:str):
19 |         matches=re.findall(r'\b[A-E]\b', label)
20 |         if matches:
21 |             return matches[0]
22 |         return None
23 | 


--------------------------------------------------------------------------------
/src/eval/tasks/logiqa/process_prediction.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | def process_prediction(pred:str):
 3 |         #sens=pred.split('.')
 4 |         #final_sens=[sen for sen in sens if 'final' in sen]
 5 |     answer_pattern = r'<answer>(.*?)</answer>'
 6 |     match = re.finditer(answer_pattern, pred)
 7 |     matches = list(match)
 8 |     if matches:
 9 |         final_str = matches[-1].group(1).strip()
10 |         option_matches=re.findall(r'\b[A-E]\b', final_str)
11 |         if option_matches:
12 |             return option_matches[0]
13 |     return None
14 |         
15 | 


--------------------------------------------------------------------------------
/src/eval/tasks/math/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/src/eval/tasks/math/__init__.py


--------------------------------------------------------------------------------
/src/eval/tasks/math/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/src/eval/tasks/math/__pycache__/__init__.cpython-39.pyc


--------------------------------------------------------------------------------
/src/eval/tasks/math/__pycache__/eval_function.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/src/eval/tasks/math/__pycache__/eval_function.cpython-39.pyc


--------------------------------------------------------------------------------
/src/eval/tasks/math/__pycache__/get_output_instruction.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/src/eval/tasks/math/__pycache__/get_output_instruction.cpython-39.pyc


--------------------------------------------------------------------------------
/src/eval/tasks/math/__pycache__/process_and_save_dataset.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/src/eval/tasks/math/__pycache__/process_and_save_dataset.cpython-39.pyc


--------------------------------------------------------------------------------
/src/eval/tasks/math/__pycache__/process_label.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/src/eval/tasks/math/__pycache__/process_label.cpython-39.pyc


--------------------------------------------------------------------------------
/src/eval/tasks/math/__pycache__/process_prediction.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/src/eval/tasks/math/__pycache__/process_prediction.cpython-39.pyc


--------------------------------------------------------------------------------
/src/eval/tasks/math/get_input_instruction.py:
--------------------------------------------------------------------------------
1 | def get_input_instruction():
2 |     return None
3 | #"Let's think step by step and output the final answer within \\boxed{}."
4 |     


--------------------------------------------------------------------------------
/src/eval/tasks/math/get_output_instruction.py:
--------------------------------------------------------------------------------
1 | def get_output_instruction():
2 |     return "Let's think step by step and output the final answer within \\boxed{}."
3 |     


--------------------------------------------------------------------------------
/src/eval/tasks/math/process_label.py:
--------------------------------------------------------------------------------
 1 | def remove_boxed(s):
 2 |     try:
 3 |         if "\\boxed " in s:
 4 |             left = "\\boxed "
 5 |             assert s[:len(left)] == left
 6 |             return s[len(left):]
 7 |     except:
 8 |         return None
 9 | 
10 |     left = "\\boxed{"
11 | 
12 |     assert s[:len(left)] == left
13 |     assert s[-1] == "}"
14 | 
15 |     return s[len(left):-1]
16 | def last_boxed_only_string(string):
17 |     idx = string.rfind("\\boxed")
18 |     if "\\boxed " in string:
19 |         return "\\boxed " + string.split("\\boxed ")[-1].split("$")[0]
20 |     if idx < 0:
21 |         idx = string.rfind("\\fbox")
22 |         if idx < 0:
23 |             return None
24 | 
25 |     i = idx
26 |     right_brace_idx = None
27 |     num_left_braces_open = 0
28 |     while i < len(string):
29 |         if string[i] == "{":
30 |             num_left_braces_open += 1
31 |         if string[i] == "}":
32 |             num_left_braces_open -= 1
33 |             if num_left_braces_open == 0:
34 |                 right_brace_idx = i
35 |                 break
36 |         i += 1
37 | 
38 |     if right_brace_idx is None:
39 |         retval = None
40 |     else:
41 |         retval = string[idx:right_brace_idx + 1]
42 | 
43 |     return retval
44 | def process_label(text):
45 |     return remove_boxed(last_boxed_only_string(text))
46 | 
47 | 


--------------------------------------------------------------------------------
/src/eval/tasks/math/process_prediction.py:
--------------------------------------------------------------------------------
 1 | def remove_boxed(s):
 2 |     if s is None:
 3 |         return None
 4 |     if "\\boxed " in s:
 5 |         left = "\\boxed "
 6 |         assert s[:len(left)] == left
 7 |         return s[len(left):]
 8 | 
 9 |     left = "\\boxed{"
10 |     try:
11 |         assert s[:len(left)] == left
12 |         assert s[-1] == "}"
13 |     except:
14 |         return None
15 | 
16 |     return s[len(left):-1]
17 | def last_boxed_only_string(string):
18 |     idx = string.rfind("\\boxed")
19 |     if "\\boxed " in string:
20 |         return "\\boxed " + string.split("\\boxed ")[-1].split("$")[0]
21 |     if idx < 0:
22 |         idx = string.rfind("\\fbox")
23 |         if idx < 0:
24 |             return None
25 | 
26 |     i = idx
27 |     right_brace_idx = None
28 |     num_left_braces_open = 0
29 |     while i < len(string):
30 |         if string[i] == "{":
31 |             num_left_braces_open += 1
32 |         if string[i] == "}":
33 |             num_left_braces_open -= 1
34 |             if num_left_braces_open == 0:
35 |                 right_brace_idx = i
36 |                 break
37 |         i += 1
38 | 
39 |     if right_brace_idx is None:
40 |         retval = None
41 |     else:
42 |         retval = string[idx:right_brace_idx + 1]
43 | 
44 |     return retval
45 | def process_prediction(text):
46 |     return remove_boxed(last_boxed_only_string(text))
47 | 
48 | 


--------------------------------------------------------------------------------
/src/eval/tasks/mednli/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/src/eval/tasks/mednli/__init__.py


--------------------------------------------------------------------------------
/src/eval/tasks/mednli/eval_function.py:
--------------------------------------------------------------------------------
1 | from verl.utils.reward_score.mednli import compute_score
2 | def eval_function(pred:str, label:str):
3 |     eval=compute_score(pred, label,valid=True)
4 |     if eval is None or eval==0:
5 |         return False
6 |     else:
7 |         return True
8 | 


--------------------------------------------------------------------------------
/src/eval/tasks/mednli/get_input_instruction.py:
--------------------------------------------------------------------------------
1 | def get_input_instruction():
2 |     return "Your input should start with 'Please classify the relationship between the premise and the hypothesis as 'entailment','neutral' or 'contradiction'.'. Then the premise sentence, and then the hypothesis sentence." # enclosed within <think> </think> and <answer> </answer> tags, respectively, i.e., <think> thinking process here </think> <answer> the correct option here </answer>. '


--------------------------------------------------------------------------------
/src/eval/tasks/mednli/get_output_instruction.py:
--------------------------------------------------------------------------------
1 | def get_output_instruction():
2 |     return 'Your output thinking process and answer should be enclosed within <think> </think> and <answer> </answer> tags, respectively, i.e., <think> thinking process here </think> <answer> the correct option here </answer>. '


--------------------------------------------------------------------------------
/src/eval/tasks/mednli/process_label.py:
--------------------------------------------------------------------------------
1 | import re
2 | def process_label(solution_str):
3 |     
4 |     return solution_str
5 | 


--------------------------------------------------------------------------------
/src/eval/tasks/mednli/process_prediction.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | def process_prediction(solution_str):
 3 |     answer_pattern = r'<answer>(.*?)</answer>'
 4 |     match = re.finditer(answer_pattern, solution_str)
 5 |     matches = list(match)
 6 |     if matches:
 7 |         final_answer = matches[-1].group(1).strip()
 8 |     else:
 9 |         final_answer = None
10 | #    if final_answer is not None:
11 |  #       try:
12 |  #           int_final_answer = int(final_answer)
13 |  #       except ValueError:
14 |  #           final_answer = None
15 |     return final_answer


--------------------------------------------------------------------------------
/src/eval/tasks/medqa/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/src/eval/tasks/medqa/__init__.py


--------------------------------------------------------------------------------
/src/eval/tasks/medqa/eval_function.py:
--------------------------------------------------------------------------------
1 | from verl.utils.reward_score.medqa import compute_score
2 | def eval_function(pred:str, label:str):
3 |     eval=compute_score(pred, label,valid=True)
4 |     if eval is None or eval==0:
5 |         return False
6 |     else:
7 |         return True
8 | 


--------------------------------------------------------------------------------
/src/eval/tasks/medqa/get_input_instruction.py:
--------------------------------------------------------------------------------
1 | def get_input_instruction():
2 |     return "First a clinical vignettes or diagrams. A clinical vignette is a short, descriptive medical case that simulates a real-life scenario involving a patient. It includes details like: Patient demographics (age, sex, etc.),Medical history,Symptoms and signs,Lab or imaging results,Progression or complication. Then a USMLE-style multiple-choice question with its four options. "


--------------------------------------------------------------------------------
/src/eval/tasks/medqa/get_output_instruction.py:
--------------------------------------------------------------------------------
1 | def get_output_instruction():
2 |     return 'Your output thinking process and answer should be enclosed within <think> </think> and <answer> </answer> tags, respectively, i.e., <think> thinking process here </think> <answer> the correct option here </answer>. '
3 | 


--------------------------------------------------------------------------------
/src/eval/tasks/medqa/process_label.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | def extract_solution(solution_str):
 3 |     # Remove everything before the first "Assistant:"
 4 | 
 5 |     answer_pattern = r'<answer>(.*?)</answer>'
 6 |     match = re.finditer(answer_pattern, solution_str)
 7 |     matches = list(match)
 8 |     if matches:
 9 |         final_answer = matches[-1].group(1).strip()
10 |     else:
11 |         final_answer = None
12 | #    if final_answer is not None:
13 |  #       try:
14 |  #           int_final_answer = int(final_answer)
15 |  #       except ValueError:
16 |  #           final_answer = None
17 |     return final_answer
18 | def process_label(label:str):
19 |         matches=re.findall(r'\b[A-D]\b', label)
20 |         if matches:
21 |             return matches[0]
22 |         return None
23 | 


--------------------------------------------------------------------------------
/src/eval/tasks/medqa/process_prediction.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | def process_prediction(pred:str):
 3 |         #sens=pred.split('.')
 4 |         #final_sens=[sen for sen in sens if 'final' in sen]
 5 |     answer_pattern = r'<answer>(.*?)</answer>'
 6 |     match = re.finditer(answer_pattern, pred)
 7 |     matches = list(match)
 8 |     if matches:
 9 |         final_str = matches[-1].group(1).strip()
10 |         option_matches=re.findall(r'\b[A-D]\b', final_str)
11 |         if option_matches:
12 |             return option_matches[0]
13 |     return None
14 |         
15 | 


--------------------------------------------------------------------------------
/src/eval/tasks/task_manager.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import importlib.util
 3 | class TaskManager:
 4 |     def __init__(self, tasks_folder="./src/eval/tasks"):
 5 |         self.tasks_folder = tasks_folder
 6 |     def _load_function(self, task_name, function_name):
 7 |         """Loads a specific function from a task folder."""
 8 |         task_path = os.path.join(self.tasks_folder, task_name, f"{function_name}.py")
 9 |         if not os.path.isfile(task_path):
10 |             raise FileNotFoundError(f"{task_path} does not exist!")
11 |         spec = importlib.util.spec_from_file_location(function_name, task_path)
12 |         module = importlib.util.module_from_spec(spec)
13 |         spec.loader.exec_module(module)
14 |         if not hasattr(module, function_name):
15 |             raise AttributeError(f"{function_name} not found in {task_path}")
16 |         return getattr(module, function_name)
17 |     def load_task(self, task_name):
18 |         """Loads all functions for a given task and binds them to the manager."""
19 |         for func_name in ["process_label", "process_prediction", "eval_function","process_and_save_dataset","get_input_instruction","get_output_instruction"]:
20 |             func = self._load_function(task_name, func_name)
21 |             setattr(self, func_name, func)
22 |     
23 | 


--------------------------------------------------------------------------------
/src/eval/tasks/test_to_sql/eval_function.py:
--------------------------------------------------------------------------------
1 | def eval_function(pred:str, label:str):
2 |         if pred is None or set(pred) != set(label)::
3 |             return False
4 |         else: 
5 |             return True
6 | 


--------------------------------------------------------------------------------
/src/eval/tasks/test_to_sql/process_label.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | def process_label(label:str) -> Optional[str]:
 3 |     ground_truth,db_path=label[0],label[1]
 4 |     conn=sqlite3.connect(db_path)
 5 |     cursor = conn.cursor()
 6 |     try:
 7 |         cursor.execute(ground_truth)
 8 |         ground_truth_res = cursor.fetchall()
 9 |         return ground_truth_res
10 |     except:
11 |         return 
12 |         
13 | 


--------------------------------------------------------------------------------
/src/eval/tasks/test_to_sql/process_prediction.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | def process_prediction(pred:str) -> Optional[str]:
 3 |     predicted_sql,db_path = pred[0],pred[1]
 4 |         
 5 |     prior_pred=predicted_sql.split('final SQL')[0]
 6 |     try:
 7 |         predicted_sql = predicted_sql.split('final SQL')[1].strip()
 8 |     except:
 9 |         predicted_sql = 'SELECT'+predicted_sql.split('SELECT')[1]
10 |     predicted_sql=predicted_sql.split(';')[0]
11 |     predicted_sql=predicted_sql[predicted_sql.find('SELECT'):] #[1:]
12 |     conn=sqlite3.connect(db_path)
13 |     cursor = conn.cursor()
14 |     try:
15 |         cursor.execute(predicted_sql)
16 |         predicted_res = cursor.fetchall()
17 |         return predicted_res
18 |     except:
19 |         return None
20 |     return None
21 | 


--------------------------------------------------------------------------------
/src/model_inference/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/src/retriever/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/src/retriever/.DS_Store


--------------------------------------------------------------------------------
/src/retriever/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/src/retriever/__pycache__/BM25_retriever.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/src/retriever/__pycache__/BM25_retriever.cpython-39.pyc


--------------------------------------------------------------------------------
/src/retriever/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/src/retriever/__pycache__/__init__.cpython-39.pyc


--------------------------------------------------------------------------------
/src/retriever/passages/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/src/retriever/passages/.DS_Store


--------------------------------------------------------------------------------
/src/retriever/passages/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/verl/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import os
16 | 
17 | version_folder = os.path.dirname(os.path.join(os.path.abspath(__file__)))
18 | 
19 | with open(os.path.join(version_folder, 'version/version')) as f:
20 |     __version__ = f.read().strip()
21 | 
22 | from .protocol import DataProto
23 | 
24 | from .utils.logging_utils import set_basic_config
25 | import logging
26 | 
27 | set_basic_config(level=logging.WARNING)
28 | 


--------------------------------------------------------------------------------
/verl/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/__pycache__/__init__.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/__pycache__/protocol.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/__pycache__/protocol.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/__pycache__/protocol.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/__pycache__/protocol.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/i.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/verl/init.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/verl/models/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/models/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/models/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/models/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/models/__pycache__/__init__.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/models/__pycache__/registry.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/models/__pycache__/registry.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/models/__pycache__/registry.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/models/__pycache__/registry.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/models/llama/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/models/llama/megatron/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .modeling_llama_megatron import (
16 |     # original model with megatron
17 |     ParallelLlamaModel,
18 |     ParallelLlamaForCausalLM,
19 |     # rmpad with megatron
20 |     ParallelLlamaForCausalLMRmPad,
21 |     ParallelLlamaForValueRmPad,
22 |     # rmpad with megatron and pipeline parallelism
23 |     ParallelLlamaForCausalLMRmPadPP,
24 |     ParallelLlamaForValueRmPadPP)
25 | 


--------------------------------------------------------------------------------
/verl/models/llama/megatron/checkpoint_utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/models/llama/megatron/layers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .parallel_attention import ParallelLlamaAttention
16 | from .parallel_decoder import ParallelLlamaDecoderLayer, ParallelLlamaDecoderLayerRmPad
17 | from .parallel_mlp import ParallelLlamaMLP
18 | from .parallel_rmsnorm import ParallelLlamaRMSNorm
19 | 


--------------------------------------------------------------------------------
/verl/models/transformers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/models/weight_loader_registry.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | def get_weight_loader(arch: str):
17 |     from verl.models.llama.megatron.checkpoint_utils.llama_loader import load_state_dict_to_megatron_llama
18 |     _MODEL_WEIGHT_MEGATRON_LOADER_REGISTRY = {'LlamaForCausalLM': load_state_dict_to_megatron_llama}
19 | 
20 |     if arch in _MODEL_WEIGHT_MEGATRON_LOADER_REGISTRY:
21 |         return _MODEL_WEIGHT_MEGATRON_LOADER_REGISTRY[arch]
22 |     raise ValueError(f"Model architectures {arch} are not supported for now. "
23 |                      f"Supported architectures: {_MODEL_WEIGHT_MEGATRON_LOADER_REGISTRY.keys()}")
24 | 


--------------------------------------------------------------------------------
/verl/single_controller/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import os
16 | 
17 | version_folder = os.path.dirname(os.path.join(os.path.abspath(__file__)))
18 | 
19 | with open(os.path.join(version_folder, 'version/version')) as f:
20 |     __version__ = f.read().strip()
21 | 


--------------------------------------------------------------------------------
/verl/single_controller/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/single_controller/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/single_controller/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/single_controller/__pycache__/__init__.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/single_controller/base/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .worker import Worker
16 | from .worker_group import WorkerGroup, ClassWithInitArgs, ResourcePool
17 | 


--------------------------------------------------------------------------------
/verl/single_controller/base/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/single_controller/base/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/single_controller/base/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/single_controller/base/__pycache__/__init__.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/single_controller/base/__pycache__/decorator.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/single_controller/base/__pycache__/decorator.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/single_controller/base/__pycache__/decorator.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/single_controller/base/__pycache__/decorator.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/single_controller/base/__pycache__/worker.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/single_controller/base/__pycache__/worker.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/single_controller/base/__pycache__/worker.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/single_controller/base/__pycache__/worker.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/single_controller/base/__pycache__/worker_group.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/single_controller/base/__pycache__/worker_group.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/single_controller/base/__pycache__/worker_group.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/single_controller/base/__pycache__/worker_group.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/single_controller/base/megatron/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/single_controller/base/megatron/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/single_controller/base/megatron/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/single_controller/base/megatron/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/single_controller/base/megatron/__pycache__/__init__.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/single_controller/base/megatron/__pycache__/worker.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/single_controller/base/megatron/__pycache__/worker.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/single_controller/base/megatron/__pycache__/worker.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/single_controller/base/megatron/__pycache__/worker.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/single_controller/base/megatron/__pycache__/worker_group.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/single_controller/base/megatron/__pycache__/worker_group.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/single_controller/base/megatron/__pycache__/worker_group.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/single_controller/base/megatron/__pycache__/worker_group.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/single_controller/base/megatron/worker.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import os
16 | from dataclasses import dataclass
17 | from verl.single_controller.base.worker import Worker, DistRankInfo, DistGlobalInfo
18 | 
19 | 
20 | class MegatronWorker(Worker):
21 | 
22 |     def __init__(self, cuda_visible_devices=None) -> None:
23 |         super().__init__(cuda_visible_devices)
24 | 
25 |     def get_megatron_global_info(self):
26 |         from megatron.core import parallel_state as mpu
27 |         tp_size = mpu.get_tensor_model_parallel_world_size()
28 |         dp_size = mpu.get_data_parallel_world_size()
29 |         pp_size = mpu.get_pipeline_model_parallel_world_size()
30 |         info = DistGlobalInfo(tp_size=tp_size, dp_size=dp_size, pp_size=pp_size)
31 |         return info
32 | 
33 |     def get_megatron_rank_info(self):
34 |         from megatron.core import parallel_state as mpu
35 |         tp_rank = mpu.get_tensor_model_parallel_rank()
36 |         dp_rank = mpu.get_data_parallel_rank()
37 |         pp_rank = mpu.get_pipeline_model_parallel_rank()
38 |         info = DistRankInfo(tp_rank=tp_rank, dp_rank=dp_rank, pp_rank=pp_rank)
39 |         return info


--------------------------------------------------------------------------------
/verl/single_controller/base/register_center/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/single_controller/base/register_center/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/single_controller/base/register_center/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/single_controller/base/register_center/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/single_controller/base/register_center/__pycache__/__init__.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/single_controller/base/register_center/__pycache__/ray.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/single_controller/base/register_center/__pycache__/ray.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/single_controller/base/register_center/__pycache__/ray.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/single_controller/base/register_center/__pycache__/ray.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/single_controller/base/register_center/ray.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import ray
16 | 
17 | 
18 | @ray.remote
19 | class WorkerGroupRegisterCenter:
20 | 
21 |     def __init__(self, rank_zero_info):
22 |         self.rank_zero_info = rank_zero_info
23 | 
24 |     def get_rank_zero_info(self):
25 |         return self.rank_zero_info
26 | 
27 | 
28 | def create_worker_group_register_center(name, info):
29 |     return WorkerGroupRegisterCenter.options(name=name).remote(info)
30 | 


--------------------------------------------------------------------------------
/verl/single_controller/ray/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .base import RayResourcePool, RayClassWithInitArgs, RayWorkerGroup, create_colocated_worker_cls
16 | from .megatron import (MegatronRayWorkerGroup, DistRankInfo, DistGlobalInfo)


--------------------------------------------------------------------------------
/verl/single_controller/ray/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/single_controller/ray/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/single_controller/ray/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/single_controller/ray/__pycache__/__init__.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/single_controller/ray/__pycache__/base.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/single_controller/ray/__pycache__/base.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/single_controller/ray/__pycache__/base.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/single_controller/ray/__pycache__/base.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/single_controller/ray/__pycache__/megatron.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/single_controller/ray/__pycache__/megatron.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/single_controller/ray/__pycache__/megatron.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/single_controller/ray/__pycache__/megatron.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/single_controller/version/version:
--------------------------------------------------------------------------------
1 | 0.0.2


--------------------------------------------------------------------------------
/verl/third_party/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/third_party/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/third_party/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/third_party/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/third_party/__pycache__/__init__.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/third_party/vllm/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/third_party/vllm/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/third_party/vllm/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/third_party/vllm/__pycache__/__init__.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_3_1/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_4_2/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_5_4/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_6_3/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_6_3/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/third_party/vllm/vllm_v_0_6_3/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_6_3/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/third_party/vllm/vllm_v_0_6_3/__pycache__/__init__.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_6_3/__pycache__/arg_utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/third_party/vllm/vllm_v_0_6_3/__pycache__/arg_utils.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_6_3/__pycache__/arg_utils.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/third_party/vllm/vllm_v_0_6_3/__pycache__/arg_utils.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_6_3/__pycache__/config.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/third_party/vllm/vllm_v_0_6_3/__pycache__/config.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_6_3/__pycache__/config.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/third_party/vllm/vllm_v_0_6_3/__pycache__/config.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_6_3/__pycache__/dtensor_weight_loaders.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/third_party/vllm/vllm_v_0_6_3/__pycache__/dtensor_weight_loaders.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_6_3/__pycache__/dtensor_weight_loaders.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/third_party/vllm/vllm_v_0_6_3/__pycache__/dtensor_weight_loaders.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_6_3/__pycache__/hf_weight_loader.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/third_party/vllm/vllm_v_0_6_3/__pycache__/hf_weight_loader.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_6_3/__pycache__/hf_weight_loader.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/third_party/vllm/vllm_v_0_6_3/__pycache__/hf_weight_loader.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_6_3/__pycache__/llm.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/third_party/vllm/vllm_v_0_6_3/__pycache__/llm.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_6_3/__pycache__/llm.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/third_party/vllm/vllm_v_0_6_3/__pycache__/llm.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_6_3/__pycache__/llm_engine_sp.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/third_party/vllm/vllm_v_0_6_3/__pycache__/llm_engine_sp.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_6_3/__pycache__/llm_engine_sp.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/third_party/vllm/vllm_v_0_6_3/__pycache__/llm_engine_sp.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_6_3/__pycache__/megatron_weight_loaders.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/third_party/vllm/vllm_v_0_6_3/__pycache__/megatron_weight_loaders.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_6_3/__pycache__/megatron_weight_loaders.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/third_party/vllm/vllm_v_0_6_3/__pycache__/megatron_weight_loaders.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_6_3/__pycache__/model_loader.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/third_party/vllm/vllm_v_0_6_3/__pycache__/model_loader.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_6_3/__pycache__/model_loader.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/third_party/vllm/vllm_v_0_6_3/__pycache__/model_loader.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_6_3/__pycache__/model_runner.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/third_party/vllm/vllm_v_0_6_3/__pycache__/model_runner.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_6_3/__pycache__/model_runner.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/third_party/vllm/vllm_v_0_6_3/__pycache__/model_runner.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_6_3/__pycache__/parallel_state.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/third_party/vllm/vllm_v_0_6_3/__pycache__/parallel_state.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_6_3/__pycache__/parallel_state.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/third_party/vllm/vllm_v_0_6_3/__pycache__/parallel_state.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_6_3/__pycache__/spmd_gpu_executor.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/third_party/vllm/vllm_v_0_6_3/__pycache__/spmd_gpu_executor.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_6_3/__pycache__/spmd_gpu_executor.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/third_party/vllm/vllm_v_0_6_3/__pycache__/spmd_gpu_executor.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_6_3/__pycache__/tokenizer.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/third_party/vllm/vllm_v_0_6_3/__pycache__/tokenizer.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_6_3/__pycache__/tokenizer.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/third_party/vllm/vllm_v_0_6_3/__pycache__/tokenizer.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_6_3/__pycache__/worker.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/third_party/vllm/vllm_v_0_6_3/__pycache__/worker.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_6_3/__pycache__/worker.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/third_party/vllm/vllm_v_0_6_3/__pycache__/worker.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/trainer/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/trainer/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/trainer/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/trainer/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/trainer/__pycache__/__init__.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/trainer/__pycache__/fsdp_sft_trainer.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/trainer/__pycache__/fsdp_sft_trainer.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/trainer/__pycache__/main_ppo.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/trainer/__pycache__/main_ppo.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/trainer/__pycache__/main_ppo.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/trainer/__pycache__/main_ppo.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/trainer/config/evaluation.yaml:
--------------------------------------------------------------------------------
1 | data:
2 |   path: /tmp/math_Qwen2-7B-Instruct.parquet
3 |   prompt_key: prompt
4 |   response_key: responses
5 |   data_source_key: data_source
6 |   reward_model_key: reward_model


--------------------------------------------------------------------------------
/verl/trainer/config/generation.yaml:
--------------------------------------------------------------------------------
 1 | trainer:
 2 |   nnodes: 1
 3 |   n_gpus_per_node: 8
 4 | 
 5 | data:
 6 |   path: ~/data/rlhf/math/test.parquet
 7 |   prompt_key: prompt
 8 |   n_samples: 5
 9 |   output_path: /opt/tiger/math_Qwen2-7B-Instruct.parquet
10 |   batch_size: 128
11 | 
12 | model:
13 |   path: ~/models/Qwen2-7B-Instruct
14 |   external_lib: null
15 | rollout:
16 |   name: vllm
17 |   temperature: 1.0
18 |   top_k: 50 # 0 for hf rollout, -1 for vllm rollout
19 |   top_p: 0.7
20 |   prompt_length: 1536
21 |   response_length: 512
22 |   # for vllm rollout
23 |   dtype: bfloat16 # should align with FSDP
24 |   gpu_memory_utilization: 0.5
25 |   ignore_eos: False
26 |   micro_batch_size: 256
27 |   enforce_eager: True
28 |   free_cache_engine: True
29 |   load_format: dummy_dtensor
30 |   tensor_model_parallel_size: 1
31 |   max_num_batched_tokens: 8192
32 |   max_num_seqs: 1024
33 |   log_prob_micro_batch_size: 8
34 |   # for hf rollout
35 |   do_sample: True


--------------------------------------------------------------------------------
/verl/trainer/config/sft_trainer.yaml:
--------------------------------------------------------------------------------
 1 | data:
 2 |   train_batch_size: 256
 3 |   micro_batch_size: 16  # this is also val batch size
 4 |   train_files: ~/data/gsm8k/train.parquet
 5 |   val_files: ~/data/gsm8k/test.parquet
 6 |   prompt_key: question
 7 |   response_key: answer
 8 |   max_length: 1024
 9 |   truncation: error
10 |   balance_dp_token: False
11 |   chat_template: null
12 | model:
13 |   partial_pretrain: ~/models/gemma-1.1-7b-it
14 |   fsdp_config:
15 |     wrap_policy:
16 |       min_num_params: 0
17 |     cpu_offload: False
18 |     offload_params: False
19 |   external_lib: null
20 |   enable_gradient_checkpointing: False
21 |   trust_remote_code: False
22 | optim:
23 |   lr: 1e-5
24 |   betas: [0.9, 0.95]
25 |   weight_decay: 0.01
26 |   warmup_steps_ratio: 0.1
27 |   clip_grad: 1.0
28 | 
29 | trainer:
30 |   default_local_dir: /tmp/sft_model
31 |   default_hdfs_dir: hdfs://tmp/experiments/gsm8k/gemma-1.1-7b-it/ # change the hdfs path here
32 |   resume_path: null
33 |   project_name: gsm8k-sft
34 |   experiment_name: test
35 |   total_epochs: 4
36 |   logger: ['console']
37 |   seed: 1
38 | 
39 | 


--------------------------------------------------------------------------------
/verl/trainer/ppo/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/trainer/ppo/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/trainer/ppo/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/trainer/ppo/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/trainer/ppo/__pycache__/__init__.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/trainer/ppo/__pycache__/core_algos.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/trainer/ppo/__pycache__/core_algos.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/trainer/ppo/__pycache__/core_algos.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/trainer/ppo/__pycache__/core_algos.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/trainer/ppo/__pycache__/ray_trainer.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/trainer/ppo/__pycache__/ray_trainer.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/trainer/ppo/__pycache__/ray_trainer.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/trainer/ppo/__pycache__/ray_trainer.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/trainer/runtime_env.yaml:
--------------------------------------------------------------------------------
1 | working_dir: ./
2 | excludes: ["/.git/"]
3 | env_vars:
4 |   TORCH_NCCL_AVOID_RECORD_STREAMS: "1"
5 |   VLLM_ATTENTION_BACKEND: "XFORMERS"


--------------------------------------------------------------------------------
/verl/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | from . import tokenizer
15 | from .tokenizer import *
16 | 
17 | __all__ = tokenizer.__all__
18 | 


--------------------------------------------------------------------------------
/verl/utils/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/__pycache__/__init__.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/utils/__pycache__/ast.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/__pycache__/ast.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/__pycache__/ast.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/__pycache__/ast.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/utils/__pycache__/countdown.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/__pycache__/countdown.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/__pycache__/countdown.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/__pycache__/countdown.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/utils/__pycache__/dentist_qa.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/__pycache__/dentist_qa.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/__pycache__/dentist_qa.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/__pycache__/dentist_qa.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/utils/__pycache__/distributed.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/__pycache__/distributed.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/utils/__pycache__/flops_counter.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/__pycache__/flops_counter.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/__pycache__/flops_counter.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/__pycache__/flops_counter.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/utils/__pycache__/fs.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/__pycache__/fs.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/__pycache__/fs.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/__pycache__/fs.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/utils/__pycache__/fsdp_utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/__pycache__/fsdp_utils.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/__pycache__/fsdp_utils.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/__pycache__/fsdp_utils.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/utils/__pycache__/gsm8k.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/__pycache__/gsm8k.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/__pycache__/gsm8k.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/__pycache__/gsm8k.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/utils/__pycache__/hdfs_io.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/__pycache__/hdfs_io.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/__pycache__/hdfs_io.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/__pycache__/hdfs_io.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/utils/__pycache__/import_utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/__pycache__/import_utils.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/__pycache__/import_utils.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/__pycache__/import_utils.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/utils/__pycache__/logging_utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/__pycache__/logging_utils.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/__pycache__/logging_utils.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/__pycache__/logging_utils.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/utils/__pycache__/logiqa.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/__pycache__/logiqa.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/__pycache__/logiqa.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/__pycache__/logiqa.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/utils/__pycache__/math.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/__pycache__/math.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/__pycache__/math.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/__pycache__/math.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/utils/__pycache__/mednli.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/__pycache__/mednli.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/__pycache__/mednli.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/__pycache__/mednli.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/utils/__pycache__/model.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/__pycache__/model.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/__pycache__/model.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/__pycache__/model.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/utils/__pycache__/multiply.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/__pycache__/multiply.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/__pycache__/multiply.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/__pycache__/multiply.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/utils/__pycache__/py_functional.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/__pycache__/py_functional.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/__pycache__/py_functional.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/__pycache__/py_functional.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/utils/__pycache__/seqlen_balancing.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/__pycache__/seqlen_balancing.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/__pycache__/seqlen_balancing.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/__pycache__/seqlen_balancing.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/utils/__pycache__/tokenizer.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/__pycache__/tokenizer.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/__pycache__/tokenizer.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/__pycache__/tokenizer.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/utils/__pycache__/torch_dtypes.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/__pycache__/torch_dtypes.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/__pycache__/torch_dtypes.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/__pycache__/torch_dtypes.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/utils/__pycache__/torch_functional.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/__pycache__/torch_functional.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/__pycache__/torch_functional.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/__pycache__/torch_functional.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/utils/__pycache__/tracking.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/__pycache__/tracking.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/__pycache__/tracking.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/__pycache__/tracking.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/utils/__pycache__/ulysses.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/__pycache__/ulysses.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/__pycache__/ulysses.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/__pycache__/ulysses.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/utils/config.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from typing import Dict
16 | 
17 | from omegaconf import DictConfig
18 | 
19 | 
20 | def update_dict_with_config(dictionary: Dict, config: DictConfig):
21 |     for key in dictionary:
22 |         if hasattr(config, key):
23 |             dictionary[key] = getattr(config, key)
24 | 


--------------------------------------------------------------------------------
/verl/utils/dataset/README.md:
--------------------------------------------------------------------------------
 1 | # Dataset Format
 2 | ## RLHF dataset
 3 | We combine all the data sources into a single parquet files. We directly organize the prompt into the chat format so that multi-turn chats can be easily incorporated. In the prompt, we may add instruction following texts to guide the model output the answers in a particular format so that we can extract the answers.
 4 | 
 5 | Math problems
 6 | ```json
 7 | {
 8 |     "data_source": "openai/gsm8k",
 9 |     "prompt": [{"role": "user", "content": "Natalia sold clips to 48 of her friends in April, and then she sold half as many clips in May. How many clips did Natalia sell altogether in April and May? Let's think step by step and output the final answer after \"####\""}],
10 |     "ability": "math",
11 |     "reward_model": {
12 |         "style": "rule",
13 |         "ground_truth": ["72"]
14 |     },
15 | }
16 | ```
17 | 


--------------------------------------------------------------------------------
/verl/utils/dataset/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .rl_dataset import RLHFDataset
16 | from .rm_dataset import RMDataset
17 | from .sft_dataset import SFTDataset
18 | 


--------------------------------------------------------------------------------
/verl/utils/dataset/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/dataset/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/dataset/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/dataset/__pycache__/__init__.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/utils/dataset/__pycache__/rl_dataset.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/dataset/__pycache__/rl_dataset.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/dataset/__pycache__/rl_dataset.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/dataset/__pycache__/rl_dataset.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/utils/dataset/__pycache__/rm_dataset.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/dataset/__pycache__/rm_dataset.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/dataset/__pycache__/rm_dataset.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/dataset/__pycache__/rm_dataset.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/utils/dataset/__pycache__/sft_dataset.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/dataset/__pycache__/sft_dataset.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/dataset/__pycache__/sft_dataset.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/dataset/__pycache__/sft_dataset.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/utils/debug/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .performance import log_gpu_memory_usage


--------------------------------------------------------------------------------
/verl/utils/debug/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/debug/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/debug/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/debug/__pycache__/__init__.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/utils/debug/__pycache__/performance.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/debug/__pycache__/performance.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/debug/__pycache__/performance.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/debug/__pycache__/performance.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/utils/debug/performance.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import torch
16 | import torch.distributed as dist
17 | import logging
18 | 
19 | 
20 | def log_gpu_memory_usage(head: str, logger: logging.Logger = None, level=logging.DEBUG, rank: int = 0):
21 |     if (not dist.is_initialized()) or (rank is None) or (dist.get_rank() == rank):
22 |         memory_allocated = torch.cuda.memory_allocated() / 1024**3
23 |         memory_reserved = torch.cuda.memory_reserved() / 1024**3
24 | 
25 |         message = f'{head}, memory allocated (GB): {memory_allocated}, memory reserved (GB): {memory_reserved}'
26 | 
27 |         if logger is None:
28 |             print(message)
29 |         else:
30 |             logger.log(msg=message, level=level)
31 | 


--------------------------------------------------------------------------------
/verl/utils/distributed.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Utilities for distributed training."""
15 | import os
16 | 
17 | 
18 | def initialize_global_process_group(timeout_second=36000):
19 |     import torch.distributed
20 |     from datetime import timedelta
21 |     torch.distributed.init_process_group('nccl', timeout=timedelta(seconds=timeout_second))
22 |     local_rank = int(os.environ["LOCAL_RANK"])
23 |     rank = int(os.environ["RANK"])
24 |     world_size = int(os.environ["WORLD_SIZE"])
25 | 
26 |     if torch.distributed.is_initialized():
27 |         torch.cuda.set_device(local_rank)
28 |     return local_rank, rank, world_size
29 | 


--------------------------------------------------------------------------------
/verl/utils/i.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/verl/utils/import_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | Utilities to check if packages are available.
16 | We assume package availability won't change during runtime.
17 | """
18 | 
19 | from functools import cache
20 | from typing import List
21 | 
22 | 
23 | @cache
24 | def is_megatron_core_available():
25 |     try:
26 |         from megatron.core import parallel_state as mpu
27 |         return True
28 |     except ImportError:
29 |         return False
30 | 
31 | 
32 | @cache
33 | def is_vllm_available():
34 |     try:
35 |         import vllm
36 |         return True
37 |     except ImportError:
38 |         return False
39 | 
40 | 
41 | def import_external_libs(external_libs=None):
42 |     if external_libs is None:
43 |         return
44 |     if not isinstance(external_libs, List):
45 |         external_libs = [external_libs]
46 |     import importlib
47 |     for external_lib in external_libs:
48 |         importlib.import_module(external_lib)
49 | 


--------------------------------------------------------------------------------
/verl/utils/init.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/verl/utils/logger/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/utils/logger/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/logger/__pycache__/__init__.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/utils/logger/__pycache__/aggregate_logger.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/logger/__pycache__/aggregate_logger.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/utils/logger/aggregate_logger.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | A Ray logger will receive logging info from different processes.
16 | """
17 | import numbers
18 | from typing import Dict
19 | 
20 | 
21 | def concat_dict_to_str(dict: Dict, step):
22 |     output = [f'step:{step}']
23 |     for k, v in dict.items():
24 |         if isinstance(v, numbers.Number):
25 |             output.append(f'{k}:{v:.3f}')
26 |     output_str = ' - '.join(output)
27 |     return output_str
28 | 
29 | 
30 | class LocalLogger:
31 | 
32 |     def __init__(self, remote_logger=None, enable_wandb=False, print_to_console=False):
33 |         self.print_to_console = print_to_console
34 |         if print_to_console:
35 |             print('Using LocalLogger is deprecated. The constructor API will change ')
36 | 
37 |     def flush(self):
38 |         pass
39 | 
40 |     def log(self, data, step):
41 |         if self.print_to_console:
42 |             print(concat_dict_to_str(data, step=step), flush=True)


--------------------------------------------------------------------------------
/verl/utils/logging_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import logging
16 | 
17 | 
18 | def set_basic_config(level):
19 |     """
20 |     This function sets the global logging format and level. It will be called when import verl
21 |     """
22 |     logging.basicConfig(format='%(levelname)s:%(asctime)s:%(message)s', level=level)
23 | 


--------------------------------------------------------------------------------
/verl/utils/megatron/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/utils/megatron/memory.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import torch
16 | 
17 | 
18 | class MemoryBuffer:
19 | 
20 |     def __init__(self, numel, numel_padded, dtype):
21 |         self.numel = numel
22 |         self.numel_padded = numel_padded
23 |         self.dtype = dtype
24 |         self.data = torch.zeros(self.numel_padded,
25 |                                 dtype=self.dtype,
26 |                                 device=torch.cuda.current_device(),
27 |                                 requires_grad=False)
28 | 
29 |     def zero(self):
30 |         """Reset the buffer to zero."""
31 |         self.data.zero_()
32 | 
33 |     def get(self, shape, start_index):
34 |         """Return a tensor with the input `shape` as a view into the
35 |         1-D data starting at `start_index`."""
36 |         end_index = start_index + shape.numel()
37 |         assert end_index <= self.numel, \
38 |             'requested tensor is out of the buffer range.'
39 |         buffer_tensor = self.data[start_index:end_index]
40 |         buffer_tensor = buffer_tensor.view(shape)
41 |         return buffer_tensor
42 | 


--------------------------------------------------------------------------------
/verl/utils/ray_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | Contains commonly used utilities for ray
16 | """
17 | 
18 | import ray
19 | 
20 | import concurrent.futures
21 | 
22 | 
23 | def parallel_put(data_list, max_workers=None):
24 | 
25 |     def put_data(index, data):
26 |         return index, ray.put(data)
27 | 
28 |     if max_workers is None:
29 |         max_workers = min(len(data_list), 16)
30 | 
31 |     with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
32 |         data_list_f = [executor.submit(put_data, i, data) for i, data in enumerate(data_list)]
33 |         res_lst = []
34 |         for future in concurrent.futures.as_completed(data_list_f):
35 |             res_lst.append(future.result())
36 | 
37 |         # reorder based on index
38 |         output = [None for _ in range(len(data_list))]
39 |         for res in res_lst:
40 |             index, data_ref = res
41 |             output[index] = data_ref
42 | 
43 |     return output
44 | 


--------------------------------------------------------------------------------
/verl/utils/rendezvous/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/utils/reward_score/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/utils/reward_score/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/reward_score/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/reward_score/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/reward_score/__pycache__/__init__.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/utils/reward_score/__pycache__/ast.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/reward_score/__pycache__/ast.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/reward_score/__pycache__/ast.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/reward_score/__pycache__/ast.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/utils/reward_score/__pycache__/countdown.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/reward_score/__pycache__/countdown.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/reward_score/__pycache__/countdown.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/reward_score/__pycache__/countdown.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/utils/reward_score/__pycache__/dentist_qa.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/reward_score/__pycache__/dentist_qa.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/reward_score/__pycache__/dentist_qa.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/reward_score/__pycache__/dentist_qa.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/utils/reward_score/__pycache__/gsm8k.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/reward_score/__pycache__/gsm8k.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/reward_score/__pycache__/gsm8k.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/reward_score/__pycache__/gsm8k.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/utils/reward_score/__pycache__/logiqa.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/reward_score/__pycache__/logiqa.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/reward_score/__pycache__/logiqa.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/reward_score/__pycache__/logiqa.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/utils/reward_score/__pycache__/math.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/reward_score/__pycache__/math.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/reward_score/__pycache__/math.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/reward_score/__pycache__/math.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/utils/reward_score/__pycache__/mednli.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/reward_score/__pycache__/mednli.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/reward_score/__pycache__/mednli.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/reward_score/__pycache__/mednli.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/utils/reward_score/__pycache__/multiply.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/reward_score/__pycache__/multiply.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/reward_score/__pycache__/multiply.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/reward_score/__pycache__/multiply.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/utils/version/version:
--------------------------------------------------------------------------------
1 | 0.1


--------------------------------------------------------------------------------
/verl/utils/workers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/utils/workers/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/workers/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/workers/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/workers/__pycache__/__init__.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/utils/workers/__pycache__/fsdp_workers.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/workers/__pycache__/fsdp_workers.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/workers/__pycache__/fsdp_workers.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/workers/__pycache__/fsdp_workers.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/utils/workers/actor/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .base import BasePPOActor
16 | from .dp_actor import DataParallelPPOActor
17 | 
18 | __all__ = ["BasePPOActor", "DataParallelPPOActor"]
19 | 


--------------------------------------------------------------------------------
/verl/utils/workers/actor/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/workers/actor/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/workers/actor/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/workers/actor/__pycache__/__init__.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/utils/workers/actor/__pycache__/base.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/workers/actor/__pycache__/base.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/workers/actor/__pycache__/base.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/workers/actor/__pycache__/base.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/utils/workers/actor/__pycache__/dp_actor.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/workers/actor/__pycache__/dp_actor.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/workers/actor/__pycache__/dp_actor.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/workers/actor/__pycache__/dp_actor.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/utils/workers/critic/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .base import BasePPOCritic
16 | from .dp_critic import DataParallelPPOCritic
17 | 
18 | __all__ = ["BasePPOCritic", "DataParallelPPOCritic"]
19 | 


--------------------------------------------------------------------------------
/verl/utils/workers/critic/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/workers/critic/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/workers/critic/__pycache__/base.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/workers/critic/__pycache__/base.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/workers/critic/__pycache__/dp_critic.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/workers/critic/__pycache__/dp_critic.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/workers/critic/base.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | Base class for a critic
16 | """
17 | from abc import ABC, abstractmethod
18 | 
19 | import torch
20 | 
21 | from verl import DataProto
22 | 
23 | __all__ = ['BasePPOCritic']
24 | 
25 | 
26 | class BasePPOCritic(ABC):
27 | 
28 |     def __init__(self, config):
29 |         super().__init__()
30 |         self.config = config
31 | 
32 |     @abstractmethod
33 |     def compute_values(self, data: DataProto) -> torch.Tensor:
34 |         """Compute values"""
35 |         pass
36 | 
37 |     @abstractmethod
38 |     def update_critic(self, data: DataProto):
39 |         """Update the critic"""
40 |         pass
41 | 


--------------------------------------------------------------------------------
/verl/utils/workers/reward_model/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .base import BasePPORewardModel
16 | 


--------------------------------------------------------------------------------
/verl/utils/workers/reward_model/megatron/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .reward_model import MegatronRewardModel
16 | 


--------------------------------------------------------------------------------
/verl/utils/workers/rollout/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .base import BaseRollout
16 | from .naive import NaiveRollout
17 | from .hf_rollout import HFRollout
18 | 
19 | __all__ = ["BaseRollout", "NaiveRollout", "HFRollout"]
20 | 


--------------------------------------------------------------------------------
/verl/utils/workers/rollout/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/workers/rollout/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/workers/rollout/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/workers/rollout/__pycache__/__init__.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/utils/workers/rollout/__pycache__/base.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/workers/rollout/__pycache__/base.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/workers/rollout/__pycache__/base.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/workers/rollout/__pycache__/base.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/utils/workers/rollout/__pycache__/hf_rollout.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/workers/rollout/__pycache__/hf_rollout.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/workers/rollout/__pycache__/hf_rollout.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/workers/rollout/__pycache__/hf_rollout.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/utils/workers/rollout/__pycache__/tokenizer.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/workers/rollout/__pycache__/tokenizer.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/workers/rollout/__pycache__/tokenizer.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/workers/rollout/__pycache__/tokenizer.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/utils/workers/rollout/base.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from abc import ABC, abstractmethod
16 | from typing import Iterable, Union
17 | 
18 | from verl import DataProto
19 | 
20 | __all__ = ['BaseRollout']
21 | 
22 | 
23 | class BaseRollout(ABC):
24 | 
25 |     def __init__(self):
26 |         """
27 | 
28 |         Args:
29 |             dataloader: an Iterable of TensorDict that consistently generates prompts. Note that the dataloader
30 |             should handle when the training stops.
31 |         """
32 |         super().__init__()
33 | 
34 |     @abstractmethod
35 |     def generate_sequences(self, prompts: DataProto) -> DataProto:
36 |         """Generate sequences"""
37 |         pass
38 | 


--------------------------------------------------------------------------------
/verl/utils/workers/rollout/naive/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .naive_rollout import NaiveRollout
16 | 


--------------------------------------------------------------------------------
/verl/utils/workers/rollout/naive/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/workers/rollout/naive/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/workers/rollout/naive/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/workers/rollout/naive/__pycache__/__init__.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/utils/workers/rollout/naive/__pycache__/naive_rollout.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/workers/rollout/naive/__pycache__/naive_rollout.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/workers/rollout/naive/__pycache__/naive_rollout.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/workers/rollout/naive/__pycache__/naive_rollout.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/utils/workers/rollout/vllm_rollout/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .vllm_rollout import vLLMRollout


--------------------------------------------------------------------------------
/verl/utils/workers/rollout/vllm_rollout/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/workers/rollout/vllm_rollout/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/workers/rollout/vllm_rollout/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/workers/rollout/vllm_rollout/__pycache__/__init__.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/utils/workers/rollout/vllm_rollout/__pycache__/vllm_rollout.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/workers/rollout/vllm_rollout/__pycache__/vllm_rollout.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/workers/rollout/vllm_rollout/__pycache__/vllm_rollout.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/workers/rollout/vllm_rollout/__pycache__/vllm_rollout.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/utils/workers/sharding_manager/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from verl.utils.import_utils import is_vllm_available, is_megatron_core_available
16 | 
17 | from .base import BaseShardingManager
18 | from .fsdp_ulysses import FSDPUlyssesShardingManager
19 | 
20 | AllGatherPPModel = None
21 | 
22 | if is_megatron_core_available() and is_vllm_available():
23 |     from .megatron_vllm import AllGatherPPModel, MegatronVLLMShardingManager
24 | elif AllGatherPPModel is not None:
25 |     pass
26 | else:
27 |     AllGatherPPModel = None
28 |     MegatronVLLMShardingManager = None
29 | 
30 | if is_vllm_available():
31 |     from .fsdp_vllm import FSDPVLLMShardingManager
32 | else:
33 |     FSDPVLLMShardingManager = None
34 | 


--------------------------------------------------------------------------------
/verl/utils/workers/sharding_manager/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/workers/sharding_manager/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/workers/sharding_manager/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/workers/sharding_manager/__pycache__/__init__.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/utils/workers/sharding_manager/__pycache__/base.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/workers/sharding_manager/__pycache__/base.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/workers/sharding_manager/__pycache__/base.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/workers/sharding_manager/__pycache__/base.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/utils/workers/sharding_manager/__pycache__/fsdp_ulysses.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/workers/sharding_manager/__pycache__/fsdp_ulysses.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/workers/sharding_manager/__pycache__/fsdp_ulysses.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/workers/sharding_manager/__pycache__/fsdp_ulysses.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/utils/workers/sharding_manager/__pycache__/fsdp_vllm.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/workers/sharding_manager/__pycache__/fsdp_vllm.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/utils/workers/sharding_manager/__pycache__/fsdp_vllm.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/utils/workers/sharding_manager/__pycache__/fsdp_vllm.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/utils/workers/sharding_manager/base.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | Sharding manager to implement HybridEngine
16 | """
17 | 
18 | from verl import DataProto
19 | 
20 | 
21 | class BaseShardingManager:
22 | 
23 |     def __enter__(self):
24 |         pass
25 | 
26 |     def __exit__(self, exc_type, exc_value, traceback):
27 |         pass
28 | 
29 |     def preprocess_data(self, data: DataProto) -> DataProto:
30 |         return data
31 | 
32 |     def postprocess_data(self, data: DataProto) -> DataProto:
33 |         return data
34 | 


--------------------------------------------------------------------------------
/verl/version/version:
--------------------------------------------------------------------------------
1 | 0.1


--------------------------------------------------------------------------------
/verl/workers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/verl/workers/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/workers/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/workers/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/workers/__pycache__/__init__.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/workers/__pycache__/fsdp_workers.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/workers/__pycache__/fsdp_workers.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/workers/__pycache__/fsdp_workers.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/workers/__pycache__/fsdp_workers.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/workers/actor/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .base import BasePPOActor
16 | from .dp_actor import DataParallelPPOActor
17 | 
18 | __all__ = ["BasePPOActor", "DataParallelPPOActor"]
19 | 


--------------------------------------------------------------------------------
/verl/workers/actor/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/workers/actor/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/workers/actor/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/workers/actor/__pycache__/__init__.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/workers/actor/__pycache__/base.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/workers/actor/__pycache__/base.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/workers/actor/__pycache__/base.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/workers/actor/__pycache__/base.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/workers/actor/__pycache__/dp_actor.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/workers/actor/__pycache__/dp_actor.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/workers/actor/__pycache__/dp_actor.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/workers/actor/__pycache__/dp_actor.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/workers/critic/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .base import BasePPOCritic
16 | from .dp_critic import DataParallelPPOCritic
17 | 
18 | __all__ = ["BasePPOCritic", "DataParallelPPOCritic"]
19 | 


--------------------------------------------------------------------------------
/verl/workers/critic/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/workers/critic/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/workers/critic/__pycache__/base.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/workers/critic/__pycache__/base.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/workers/critic/__pycache__/dp_critic.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/workers/critic/__pycache__/dp_critic.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/workers/critic/base.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | Base class for a critic
16 | """
17 | from abc import ABC, abstractmethod
18 | 
19 | import torch
20 | 
21 | from verl import DataProto
22 | 
23 | __all__ = ['BasePPOCritic']
24 | 
25 | 
26 | class BasePPOCritic(ABC):
27 | 
28 |     def __init__(self, config):
29 |         super().__init__()
30 |         self.config = config
31 | 
32 |     @abstractmethod
33 |     def compute_values(self, data: DataProto) -> torch.Tensor:
34 |         """Compute values"""
35 |         pass
36 | 
37 |     @abstractmethod
38 |     def update_critic(self, data: DataProto):
39 |         """Update the critic"""
40 |         pass
41 | 


--------------------------------------------------------------------------------
/verl/workers/reward_model/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .base import BasePPORewardModel
16 | 


--------------------------------------------------------------------------------
/verl/workers/reward_model/megatron/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .reward_model import MegatronRewardModel
16 | 


--------------------------------------------------------------------------------
/verl/workers/rollout/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .base import BaseRollout
16 | from .naive import NaiveRollout
17 | from .hf_rollout import HFRollout
18 | 
19 | __all__ = ["BaseRollout", "NaiveRollout", "HFRollout"]
20 | 


--------------------------------------------------------------------------------
/verl/workers/rollout/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/workers/rollout/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/workers/rollout/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/workers/rollout/__pycache__/__init__.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/workers/rollout/__pycache__/base.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/workers/rollout/__pycache__/base.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/workers/rollout/__pycache__/base.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/workers/rollout/__pycache__/base.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/workers/rollout/__pycache__/hf_rollout.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/workers/rollout/__pycache__/hf_rollout.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/workers/rollout/__pycache__/hf_rollout.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/workers/rollout/__pycache__/hf_rollout.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/workers/rollout/__pycache__/tokenizer.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/workers/rollout/__pycache__/tokenizer.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/workers/rollout/__pycache__/tokenizer.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/workers/rollout/__pycache__/tokenizer.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/workers/rollout/base.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from abc import ABC, abstractmethod
16 | from typing import Iterable, Union
17 | 
18 | from verl import DataProto
19 | 
20 | __all__ = ['BaseRollout']
21 | 
22 | 
23 | class BaseRollout(ABC):
24 | 
25 |     def __init__(self):
26 |         """
27 | 
28 |         Args:
29 |             dataloader: an Iterable of TensorDict that consistently generates prompts. Note that the dataloader
30 |             should handle when the training stops.
31 |         """
32 |         super().__init__()
33 | 
34 |     @abstractmethod
35 |     def generate_sequences(self, prompts: DataProto) -> DataProto:
36 |         """Generate sequences"""
37 |         pass
38 | 


--------------------------------------------------------------------------------
/verl/workers/rollout/naive/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .naive_rollout import NaiveRollout
16 | 


--------------------------------------------------------------------------------
/verl/workers/rollout/naive/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/workers/rollout/naive/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/workers/rollout/naive/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/workers/rollout/naive/__pycache__/__init__.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/workers/rollout/naive/__pycache__/naive_rollout.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/workers/rollout/naive/__pycache__/naive_rollout.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/workers/rollout/naive/__pycache__/naive_rollout.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/workers/rollout/naive/__pycache__/naive_rollout.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/workers/rollout/vllm_rollout/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .vllm_rollout import vLLMRollout


--------------------------------------------------------------------------------
/verl/workers/rollout/vllm_rollout/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/workers/rollout/vllm_rollout/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/workers/rollout/vllm_rollout/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/workers/rollout/vllm_rollout/__pycache__/__init__.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/workers/rollout/vllm_rollout/__pycache__/vllm_rollout.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/workers/rollout/vllm_rollout/__pycache__/vllm_rollout.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/workers/rollout/vllm_rollout/__pycache__/vllm_rollout.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/workers/rollout/vllm_rollout/__pycache__/vllm_rollout.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/workers/sharding_manager/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from verl.utils.import_utils import is_vllm_available, is_megatron_core_available
16 | 
17 | from .base import BaseShardingManager
18 | from .fsdp_ulysses import FSDPUlyssesShardingManager
19 | 
20 | AllGatherPPModel = None
21 | 
22 | if is_megatron_core_available() and is_vllm_available():
23 |     from .megatron_vllm import AllGatherPPModel, MegatronVLLMShardingManager
24 | elif AllGatherPPModel is not None:
25 |     pass
26 | else:
27 |     AllGatherPPModel = None
28 |     MegatronVLLMShardingManager = None
29 | 
30 | if is_vllm_available():
31 |     from .fsdp_vllm import FSDPVLLMShardingManager
32 | else:
33 |     FSDPVLLMShardingManager = None
34 | 


--------------------------------------------------------------------------------
/verl/workers/sharding_manager/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/workers/sharding_manager/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/workers/sharding_manager/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/workers/sharding_manager/__pycache__/__init__.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/workers/sharding_manager/__pycache__/base.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/workers/sharding_manager/__pycache__/base.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/workers/sharding_manager/__pycache__/base.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/workers/sharding_manager/__pycache__/base.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/workers/sharding_manager/__pycache__/fsdp_ulysses.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/workers/sharding_manager/__pycache__/fsdp_ulysses.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/workers/sharding_manager/__pycache__/fsdp_ulysses.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/workers/sharding_manager/__pycache__/fsdp_ulysses.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/workers/sharding_manager/__pycache__/fsdp_vllm.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/workers/sharding_manager/__pycache__/fsdp_vllm.cpython-310.pyc


--------------------------------------------------------------------------------
/verl/workers/sharding_manager/__pycache__/fsdp_vllm.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gydpku/Data_Synthesis_RL/5d1ca8e856d166b5a9d872fae24012bd04a6fac0/verl/workers/sharding_manager/__pycache__/fsdp_vllm.cpython-39.pyc


--------------------------------------------------------------------------------
/verl/workers/sharding_manager/base.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | Sharding manager to implement HybridEngine
16 | """
17 | 
18 | from verl import DataProto
19 | 
20 | 
21 | class BaseShardingManager:
22 | 
23 |     def __enter__(self):
24 |         pass
25 | 
26 |     def __exit__(self, exc_type, exc_value, traceback):
27 |         pass
28 | 
29 |     def preprocess_data(self, data: DataProto) -> DataProto:
30 |         return data
31 | 
32 |     def postprocess_data(self, data: DataProto) -> DataProto:
33 |         return data
34 | 


--------------------------------------------------------------------------------