├── .github └── workflows │ └── python-package.yml ├── .gitignore ├── .pre-commit-config.yaml ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── README_zh.md ├── dockerfile ├── Dockerfile └── docker-entrypoint.sh ├── docs ├── lmm-r1-logo-panda.png ├── lmm-r1-logo.png ├── logo.png ├── model.jpg ├── motivation.png ├── ppo_examples.md ├── ray_architecture.png ├── sokoban_demo.gif ├── time_compare.jpg └── wandb_log_1.png ├── examples ├── data │ ├── convert_text_to_img.py │ ├── gen_sokoban_tasks.py │ ├── mathlv345_8k_chatml.json │ └── test_message.jsonl └── scripts │ ├── ckpt_ds_zero_to_universal.sh │ ├── docker_run.sh │ ├── experience_filter.py │ ├── lmm_r1 │ ├── train_direct_rl_geo.sh │ ├── train_fre_multi.sh │ ├── train_fre_text.sh │ ├── train_mgt_geo.sh │ ├── train_mgt_percereas.sh │ └── train_sokoban.sh │ ├── nvidia_docker_install.sh │ ├── reward_func.py │ ├── serve_remote_rm.sh │ ├── train_conditional_llama.sh │ ├── train_continue_pretrain_llama.sh │ ├── train_dpo_llama.sh │ ├── train_dpo_llama_34b.sh │ ├── train_dpo_ring_llama.sh │ ├── train_grpo_llama_ray.sh │ ├── train_grpo_ray_hybrid_engine.sh │ ├── train_iterative_dpo_llama.sh │ ├── train_knowledge_distillation.sh │ ├── train_kto_llama.sh │ ├── train_llama_slurm.sh │ ├── train_ppo_llama_ray.sh │ ├── train_ppo_llama_ray_70b.sh │ ├── train_ppo_llama_ray_hybrid_engine.sh │ ├── train_ppo_llama_ray_ring.sh │ ├── train_ppo_llama_ray_slurm.sh │ ├── train_ppo_llama_with_dynamic_sampling.sh │ ├── train_ppo_llama_with_remote_rm.sh │ ├── train_ppo_llama_with_reward_fn.sh │ ├── train_prm_mistral.sh │ ├── train_reinforce_baseline_llama_ray_hybrid_engine.sh │ ├── train_reinforce_llama_ray.sh │ ├── train_reinforce_llama_ray_hybrid_engine.sh │ ├── train_rejection_sampling_llama.sh │ ├── train_rm_llama.sh │ ├── train_sft_llama.sh │ └── train_sft_mixtral_lora.sh ├── openrlhf ├── __init__.py ├── cli │ ├── __init__.py │ ├── batch_inference.py │ ├── interactive_chat.py │ ├── lora_combiner.py │ ├── serve_rm.py │ ├── train_dpo.py │ ├── train_kd.py │ ├── train_kto.py │ ├── train_ppo_ray.py │ ├── train_prm.py │ ├── train_rm.py │ └── train_sft.py ├── datasets │ ├── __init__.py │ ├── process_reward_dataset.py │ ├── prompts_dataset.py │ ├── reward_dataset.py │ ├── sft_dataset.py │ ├── unpaired_preference_dataset.py │ └── utils.py ├── models │ ├── __init__.py │ ├── actor.py │ ├── lmm_kits │ │ ├── base │ │ │ ├── data_processor.py │ │ │ └── patch.py │ │ ├── gemma3 │ │ │ ├── data_processor.py │ │ │ └── patch.py │ │ ├── llm │ │ │ ├── data_processor.py │ │ │ └── patch.py │ │ ├── phi3_v │ │ │ ├── data_processor.py │ │ │ ├── patch.py │ │ │ └── src │ │ │ │ ├── configuration_phi3_v.py │ │ │ │ ├── modeling_phi3_v.py │ │ │ │ └── processing_phi3_v.py │ │ ├── phi4mm │ │ │ ├── data_processor.py │ │ │ ├── patch.py │ │ │ └── src │ │ │ │ ├── configuration_phi4mm.py │ │ │ │ ├── modeling_phi4mm.py │ │ │ │ ├── processing_phi4mm.py │ │ │ │ ├── speech_conformer_encoder.py │ │ │ │ └── vision_siglip_navit.py │ │ ├── qwen2_5_vl │ │ │ ├── data_processor.py │ │ │ └── patch.py │ │ └── utils.py │ ├── loss.py │ ├── model.py │ ├── remote_rm │ │ ├── math_verifier.py │ │ └── sokoban_verifier.py │ ├── ring_attn_utils.py │ └── utils.py ├── trainer │ ├── __init__.py │ ├── dpo_trainer.py │ ├── kd_trainer.py │ ├── kto_trainer.py │ ├── ppo_trainer.py │ ├── ppo_utils │ │ ├── __init__.py │ │ ├── experience_maker.py │ │ ├── kl_controller.py │ │ └── replay_buffer.py │ ├── prm_trainer.py │ ├── ray │ │ ├── __init__.py │ │ ├── launcher.py │ │ ├── ppo_actor.py │ │ ├── ppo_critic.py │ │ ├── utils.py │ │ ├── vllm_engine.py │ │ └── vllm_worker_wrap.py │ ├── rm_trainer.py │ └── sft_trainer.py └── utils │ ├── __init__.py │ ├── deepspeed │ ├── __init__.py │ ├── deepspeed.py │ └── deepspeed_utils.py │ ├── distributed_sampler.py │ ├── distributed_util.py │ ├── logging_utils.py │ ├── processor.py │ ├── remote_rm_utils.py │ └── utils.py ├── pyproject.toml ├── requirements.txt ├── setup.py └── version.txt /.github/workflows/python-package.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/.github/workflows/python-package.yml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/.gitignore -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/.pre-commit-config.yaml -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/CONTRIBUTING.md -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/README.md -------------------------------------------------------------------------------- /README_zh.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/README_zh.md -------------------------------------------------------------------------------- /dockerfile/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/dockerfile/Dockerfile -------------------------------------------------------------------------------- /dockerfile/docker-entrypoint.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/dockerfile/docker-entrypoint.sh -------------------------------------------------------------------------------- /docs/lmm-r1-logo-panda.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/docs/lmm-r1-logo-panda.png -------------------------------------------------------------------------------- /docs/lmm-r1-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/docs/lmm-r1-logo.png -------------------------------------------------------------------------------- /docs/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/docs/logo.png -------------------------------------------------------------------------------- /docs/model.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/docs/model.jpg -------------------------------------------------------------------------------- /docs/motivation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/docs/motivation.png -------------------------------------------------------------------------------- /docs/ppo_examples.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/docs/ppo_examples.md -------------------------------------------------------------------------------- /docs/ray_architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/docs/ray_architecture.png -------------------------------------------------------------------------------- /docs/sokoban_demo.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/docs/sokoban_demo.gif -------------------------------------------------------------------------------- /docs/time_compare.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/docs/time_compare.jpg -------------------------------------------------------------------------------- /docs/wandb_log_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/docs/wandb_log_1.png -------------------------------------------------------------------------------- /examples/data/convert_text_to_img.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/examples/data/convert_text_to_img.py -------------------------------------------------------------------------------- /examples/data/gen_sokoban_tasks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/examples/data/gen_sokoban_tasks.py -------------------------------------------------------------------------------- /examples/data/mathlv345_8k_chatml.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/examples/data/mathlv345_8k_chatml.json -------------------------------------------------------------------------------- /examples/data/test_message.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/examples/data/test_message.jsonl -------------------------------------------------------------------------------- /examples/scripts/ckpt_ds_zero_to_universal.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/examples/scripts/ckpt_ds_zero_to_universal.sh -------------------------------------------------------------------------------- /examples/scripts/docker_run.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/examples/scripts/docker_run.sh -------------------------------------------------------------------------------- /examples/scripts/experience_filter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/examples/scripts/experience_filter.py -------------------------------------------------------------------------------- /examples/scripts/lmm_r1/train_direct_rl_geo.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/examples/scripts/lmm_r1/train_direct_rl_geo.sh -------------------------------------------------------------------------------- /examples/scripts/lmm_r1/train_fre_multi.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/examples/scripts/lmm_r1/train_fre_multi.sh -------------------------------------------------------------------------------- /examples/scripts/lmm_r1/train_fre_text.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/examples/scripts/lmm_r1/train_fre_text.sh -------------------------------------------------------------------------------- /examples/scripts/lmm_r1/train_mgt_geo.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/examples/scripts/lmm_r1/train_mgt_geo.sh -------------------------------------------------------------------------------- /examples/scripts/lmm_r1/train_mgt_percereas.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/examples/scripts/lmm_r1/train_mgt_percereas.sh -------------------------------------------------------------------------------- /examples/scripts/lmm_r1/train_sokoban.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/examples/scripts/lmm_r1/train_sokoban.sh -------------------------------------------------------------------------------- /examples/scripts/nvidia_docker_install.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/examples/scripts/nvidia_docker_install.sh -------------------------------------------------------------------------------- /examples/scripts/reward_func.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/examples/scripts/reward_func.py -------------------------------------------------------------------------------- /examples/scripts/serve_remote_rm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/examples/scripts/serve_remote_rm.sh -------------------------------------------------------------------------------- /examples/scripts/train_conditional_llama.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/examples/scripts/train_conditional_llama.sh -------------------------------------------------------------------------------- /examples/scripts/train_continue_pretrain_llama.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/examples/scripts/train_continue_pretrain_llama.sh -------------------------------------------------------------------------------- /examples/scripts/train_dpo_llama.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/examples/scripts/train_dpo_llama.sh -------------------------------------------------------------------------------- /examples/scripts/train_dpo_llama_34b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/examples/scripts/train_dpo_llama_34b.sh -------------------------------------------------------------------------------- /examples/scripts/train_dpo_ring_llama.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/examples/scripts/train_dpo_ring_llama.sh -------------------------------------------------------------------------------- /examples/scripts/train_grpo_llama_ray.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/examples/scripts/train_grpo_llama_ray.sh -------------------------------------------------------------------------------- /examples/scripts/train_grpo_ray_hybrid_engine.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/examples/scripts/train_grpo_ray_hybrid_engine.sh -------------------------------------------------------------------------------- /examples/scripts/train_iterative_dpo_llama.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/examples/scripts/train_iterative_dpo_llama.sh -------------------------------------------------------------------------------- /examples/scripts/train_knowledge_distillation.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/examples/scripts/train_knowledge_distillation.sh -------------------------------------------------------------------------------- /examples/scripts/train_kto_llama.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/examples/scripts/train_kto_llama.sh -------------------------------------------------------------------------------- /examples/scripts/train_llama_slurm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/examples/scripts/train_llama_slurm.sh -------------------------------------------------------------------------------- /examples/scripts/train_ppo_llama_ray.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/examples/scripts/train_ppo_llama_ray.sh -------------------------------------------------------------------------------- /examples/scripts/train_ppo_llama_ray_70b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/examples/scripts/train_ppo_llama_ray_70b.sh -------------------------------------------------------------------------------- /examples/scripts/train_ppo_llama_ray_hybrid_engine.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/examples/scripts/train_ppo_llama_ray_hybrid_engine.sh -------------------------------------------------------------------------------- /examples/scripts/train_ppo_llama_ray_ring.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/examples/scripts/train_ppo_llama_ray_ring.sh -------------------------------------------------------------------------------- /examples/scripts/train_ppo_llama_ray_slurm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/examples/scripts/train_ppo_llama_ray_slurm.sh -------------------------------------------------------------------------------- /examples/scripts/train_ppo_llama_with_dynamic_sampling.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/examples/scripts/train_ppo_llama_with_dynamic_sampling.sh -------------------------------------------------------------------------------- /examples/scripts/train_ppo_llama_with_remote_rm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/examples/scripts/train_ppo_llama_with_remote_rm.sh -------------------------------------------------------------------------------- /examples/scripts/train_ppo_llama_with_reward_fn.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/examples/scripts/train_ppo_llama_with_reward_fn.sh -------------------------------------------------------------------------------- /examples/scripts/train_prm_mistral.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/examples/scripts/train_prm_mistral.sh -------------------------------------------------------------------------------- /examples/scripts/train_reinforce_baseline_llama_ray_hybrid_engine.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/examples/scripts/train_reinforce_baseline_llama_ray_hybrid_engine.sh -------------------------------------------------------------------------------- /examples/scripts/train_reinforce_llama_ray.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/examples/scripts/train_reinforce_llama_ray.sh -------------------------------------------------------------------------------- /examples/scripts/train_reinforce_llama_ray_hybrid_engine.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/examples/scripts/train_reinforce_llama_ray_hybrid_engine.sh -------------------------------------------------------------------------------- /examples/scripts/train_rejection_sampling_llama.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/examples/scripts/train_rejection_sampling_llama.sh -------------------------------------------------------------------------------- /examples/scripts/train_rm_llama.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/examples/scripts/train_rm_llama.sh -------------------------------------------------------------------------------- /examples/scripts/train_sft_llama.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/examples/scripts/train_sft_llama.sh -------------------------------------------------------------------------------- /examples/scripts/train_sft_mixtral_lora.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/examples/scripts/train_sft_mixtral_lora.sh -------------------------------------------------------------------------------- /openrlhf/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /openrlhf/cli/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /openrlhf/cli/batch_inference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/openrlhf/cli/batch_inference.py -------------------------------------------------------------------------------- /openrlhf/cli/interactive_chat.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/openrlhf/cli/interactive_chat.py -------------------------------------------------------------------------------- /openrlhf/cli/lora_combiner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/openrlhf/cli/lora_combiner.py -------------------------------------------------------------------------------- /openrlhf/cli/serve_rm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/openrlhf/cli/serve_rm.py -------------------------------------------------------------------------------- /openrlhf/cli/train_dpo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/openrlhf/cli/train_dpo.py -------------------------------------------------------------------------------- /openrlhf/cli/train_kd.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/openrlhf/cli/train_kd.py -------------------------------------------------------------------------------- /openrlhf/cli/train_kto.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/openrlhf/cli/train_kto.py -------------------------------------------------------------------------------- /openrlhf/cli/train_ppo_ray.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/openrlhf/cli/train_ppo_ray.py -------------------------------------------------------------------------------- /openrlhf/cli/train_prm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/openrlhf/cli/train_prm.py -------------------------------------------------------------------------------- /openrlhf/cli/train_rm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/openrlhf/cli/train_rm.py -------------------------------------------------------------------------------- /openrlhf/cli/train_sft.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/openrlhf/cli/train_sft.py -------------------------------------------------------------------------------- /openrlhf/datasets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/openrlhf/datasets/__init__.py -------------------------------------------------------------------------------- /openrlhf/datasets/process_reward_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/openrlhf/datasets/process_reward_dataset.py -------------------------------------------------------------------------------- /openrlhf/datasets/prompts_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/openrlhf/datasets/prompts_dataset.py -------------------------------------------------------------------------------- /openrlhf/datasets/reward_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/openrlhf/datasets/reward_dataset.py -------------------------------------------------------------------------------- /openrlhf/datasets/sft_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/openrlhf/datasets/sft_dataset.py -------------------------------------------------------------------------------- /openrlhf/datasets/unpaired_preference_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/openrlhf/datasets/unpaired_preference_dataset.py -------------------------------------------------------------------------------- /openrlhf/datasets/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/openrlhf/datasets/utils.py -------------------------------------------------------------------------------- /openrlhf/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/openrlhf/models/__init__.py -------------------------------------------------------------------------------- /openrlhf/models/actor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/openrlhf/models/actor.py -------------------------------------------------------------------------------- /openrlhf/models/lmm_kits/base/data_processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/openrlhf/models/lmm_kits/base/data_processor.py -------------------------------------------------------------------------------- /openrlhf/models/lmm_kits/base/patch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/openrlhf/models/lmm_kits/base/patch.py -------------------------------------------------------------------------------- /openrlhf/models/lmm_kits/gemma3/data_processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/openrlhf/models/lmm_kits/gemma3/data_processor.py -------------------------------------------------------------------------------- /openrlhf/models/lmm_kits/gemma3/patch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/openrlhf/models/lmm_kits/gemma3/patch.py -------------------------------------------------------------------------------- /openrlhf/models/lmm_kits/llm/data_processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/openrlhf/models/lmm_kits/llm/data_processor.py -------------------------------------------------------------------------------- /openrlhf/models/lmm_kits/llm/patch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/openrlhf/models/lmm_kits/llm/patch.py -------------------------------------------------------------------------------- /openrlhf/models/lmm_kits/phi3_v/data_processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/openrlhf/models/lmm_kits/phi3_v/data_processor.py -------------------------------------------------------------------------------- /openrlhf/models/lmm_kits/phi3_v/patch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/openrlhf/models/lmm_kits/phi3_v/patch.py -------------------------------------------------------------------------------- /openrlhf/models/lmm_kits/phi3_v/src/configuration_phi3_v.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/openrlhf/models/lmm_kits/phi3_v/src/configuration_phi3_v.py -------------------------------------------------------------------------------- /openrlhf/models/lmm_kits/phi3_v/src/modeling_phi3_v.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/openrlhf/models/lmm_kits/phi3_v/src/modeling_phi3_v.py -------------------------------------------------------------------------------- /openrlhf/models/lmm_kits/phi3_v/src/processing_phi3_v.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/openrlhf/models/lmm_kits/phi3_v/src/processing_phi3_v.py -------------------------------------------------------------------------------- /openrlhf/models/lmm_kits/phi4mm/data_processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/openrlhf/models/lmm_kits/phi4mm/data_processor.py -------------------------------------------------------------------------------- /openrlhf/models/lmm_kits/phi4mm/patch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/openrlhf/models/lmm_kits/phi4mm/patch.py -------------------------------------------------------------------------------- /openrlhf/models/lmm_kits/phi4mm/src/configuration_phi4mm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/openrlhf/models/lmm_kits/phi4mm/src/configuration_phi4mm.py -------------------------------------------------------------------------------- /openrlhf/models/lmm_kits/phi4mm/src/modeling_phi4mm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/openrlhf/models/lmm_kits/phi4mm/src/modeling_phi4mm.py -------------------------------------------------------------------------------- /openrlhf/models/lmm_kits/phi4mm/src/processing_phi4mm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/openrlhf/models/lmm_kits/phi4mm/src/processing_phi4mm.py -------------------------------------------------------------------------------- /openrlhf/models/lmm_kits/phi4mm/src/speech_conformer_encoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/openrlhf/models/lmm_kits/phi4mm/src/speech_conformer_encoder.py -------------------------------------------------------------------------------- /openrlhf/models/lmm_kits/phi4mm/src/vision_siglip_navit.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/openrlhf/models/lmm_kits/phi4mm/src/vision_siglip_navit.py -------------------------------------------------------------------------------- /openrlhf/models/lmm_kits/qwen2_5_vl/data_processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/openrlhf/models/lmm_kits/qwen2_5_vl/data_processor.py -------------------------------------------------------------------------------- /openrlhf/models/lmm_kits/qwen2_5_vl/patch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/openrlhf/models/lmm_kits/qwen2_5_vl/patch.py -------------------------------------------------------------------------------- /openrlhf/models/lmm_kits/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/openrlhf/models/lmm_kits/utils.py -------------------------------------------------------------------------------- /openrlhf/models/loss.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/openrlhf/models/loss.py -------------------------------------------------------------------------------- /openrlhf/models/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/openrlhf/models/model.py -------------------------------------------------------------------------------- /openrlhf/models/remote_rm/math_verifier.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/openrlhf/models/remote_rm/math_verifier.py -------------------------------------------------------------------------------- /openrlhf/models/remote_rm/sokoban_verifier.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/openrlhf/models/remote_rm/sokoban_verifier.py -------------------------------------------------------------------------------- /openrlhf/models/ring_attn_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/openrlhf/models/ring_attn_utils.py -------------------------------------------------------------------------------- /openrlhf/models/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/openrlhf/models/utils.py -------------------------------------------------------------------------------- /openrlhf/trainer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/openrlhf/trainer/__init__.py -------------------------------------------------------------------------------- /openrlhf/trainer/dpo_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/openrlhf/trainer/dpo_trainer.py -------------------------------------------------------------------------------- /openrlhf/trainer/kd_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/openrlhf/trainer/kd_trainer.py -------------------------------------------------------------------------------- /openrlhf/trainer/kto_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/openrlhf/trainer/kto_trainer.py -------------------------------------------------------------------------------- /openrlhf/trainer/ppo_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/openrlhf/trainer/ppo_trainer.py -------------------------------------------------------------------------------- /openrlhf/trainer/ppo_utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/openrlhf/trainer/ppo_utils/__init__.py -------------------------------------------------------------------------------- /openrlhf/trainer/ppo_utils/experience_maker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/openrlhf/trainer/ppo_utils/experience_maker.py -------------------------------------------------------------------------------- /openrlhf/trainer/ppo_utils/kl_controller.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/openrlhf/trainer/ppo_utils/kl_controller.py -------------------------------------------------------------------------------- /openrlhf/trainer/ppo_utils/replay_buffer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/openrlhf/trainer/ppo_utils/replay_buffer.py -------------------------------------------------------------------------------- /openrlhf/trainer/prm_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/openrlhf/trainer/prm_trainer.py -------------------------------------------------------------------------------- /openrlhf/trainer/ray/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/openrlhf/trainer/ray/__init__.py -------------------------------------------------------------------------------- /openrlhf/trainer/ray/launcher.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/openrlhf/trainer/ray/launcher.py -------------------------------------------------------------------------------- /openrlhf/trainer/ray/ppo_actor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/openrlhf/trainer/ray/ppo_actor.py -------------------------------------------------------------------------------- /openrlhf/trainer/ray/ppo_critic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/openrlhf/trainer/ray/ppo_critic.py -------------------------------------------------------------------------------- /openrlhf/trainer/ray/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/openrlhf/trainer/ray/utils.py -------------------------------------------------------------------------------- /openrlhf/trainer/ray/vllm_engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/openrlhf/trainer/ray/vllm_engine.py -------------------------------------------------------------------------------- /openrlhf/trainer/ray/vllm_worker_wrap.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/openrlhf/trainer/ray/vllm_worker_wrap.py -------------------------------------------------------------------------------- /openrlhf/trainer/rm_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/openrlhf/trainer/rm_trainer.py -------------------------------------------------------------------------------- /openrlhf/trainer/sft_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/openrlhf/trainer/sft_trainer.py -------------------------------------------------------------------------------- /openrlhf/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/openrlhf/utils/__init__.py -------------------------------------------------------------------------------- /openrlhf/utils/deepspeed/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/openrlhf/utils/deepspeed/__init__.py -------------------------------------------------------------------------------- /openrlhf/utils/deepspeed/deepspeed.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/openrlhf/utils/deepspeed/deepspeed.py -------------------------------------------------------------------------------- /openrlhf/utils/deepspeed/deepspeed_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/openrlhf/utils/deepspeed/deepspeed_utils.py -------------------------------------------------------------------------------- /openrlhf/utils/distributed_sampler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/openrlhf/utils/distributed_sampler.py -------------------------------------------------------------------------------- /openrlhf/utils/distributed_util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/openrlhf/utils/distributed_util.py -------------------------------------------------------------------------------- /openrlhf/utils/logging_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/openrlhf/utils/logging_utils.py -------------------------------------------------------------------------------- /openrlhf/utils/processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/openrlhf/utils/processor.py -------------------------------------------------------------------------------- /openrlhf/utils/remote_rm_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/openrlhf/utils/remote_rm_utils.py -------------------------------------------------------------------------------- /openrlhf/utils/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/openrlhf/utils/utils.py -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/pyproject.toml -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/requirements.txt -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideDra/lmm-r1/HEAD/setup.py -------------------------------------------------------------------------------- /version.txt: -------------------------------------------------------------------------------- 1 | 0.7.3a 2 | --------------------------------------------------------------------------------