├── .github └── workflows │ └── python-package.yml ├── .gitignore ├── .pre-commit-config.yaml ├── LICENSE ├── README.md ├── README_old.md ├── README_old_ja.md ├── README_old_zh.md ├── backup ├── .pre-commit-config.yaml ├── CONTRIBUTING.md └── dockerfile │ ├── Dockerfile │ └── docker-entrypoint.sh ├── create_env.sh ├── data ├── aime_formatted_qwen.jsonl └── math_formatted.jsonl ├── docs ├── logo.png ├── ppo_examples.md └── ray_architecture.png ├── eval_config.yaml ├── evaluation ├── data │ ├── aime24 │ │ └── test.jsonl │ ├── aime_full │ │ └── test.jsonl │ ├── aime_full_except_24 │ │ └── train.jsonl │ ├── math │ │ ├── test.jsonl │ │ └── train.jsonl │ └── openai_math_splits │ │ ├── test.jsonl │ │ └── train.jsonl ├── data_loader.py ├── eval_math_data_parallel.py ├── evaluate.py ├── examples.py ├── gpu.py ├── grader.py ├── latex2sympy │ ├── .coveragerc │ ├── .gitignore │ ├── LICENSE.txt │ ├── PS.g4 │ ├── README.md │ ├── __init__.py │ ├── antlr-4.11.1-complete.jar │ ├── asciimath_printer.py │ ├── description.txt │ ├── dev-requirements.in │ ├── dev-requirements.txt │ ├── gen │ │ ├── PS.interp │ │ ├── PS.tokens │ │ ├── PSLexer.interp │ │ ├── PSLexer.py │ │ ├── PSLexer.tokens │ │ ├── PSListener.py │ │ ├── PSParser.py │ │ └── __init__.py │ ├── icon.png │ ├── latex2sympy2.py │ ├── requirements.in │ ├── requirements.txt │ ├── sandbox │ │ ├── linalg_equations.py │ │ ├── linalg_span.py │ │ ├── matrix.py │ │ ├── matrix_placeholders.py │ │ ├── sandbox.py │ │ ├── sandbox_equality.py │ │ ├── sectan.py │ │ └── vector.py │ ├── scripts │ │ ├── compile.sh │ │ ├── coverage-ci.sh │ │ ├── coverage.sh │ │ ├── pre-commit │ │ ├── pre-push │ │ ├── publish.sh │ │ ├── setup-hooks.sh │ │ ├── setup.sh │ │ └── test.sh │ ├── setup.cfg │ ├── setup.py │ └── tests │ │ ├── __init__.py │ │ ├── abs_test.py │ │ ├── all_bad_test.py │ │ ├── all_good_test.py │ │ ├── atom_expr_test.py │ │ ├── binomial_test.py │ │ ├── ceil_test.py │ │ ├── complex_test.py │ │ ├── context.py │ │ ├── exp_test.py │ │ ├── floor_test.py │ │ ├── gcd_test.py │ │ ├── greek_test.py │ │ ├── grouping_test.py │ │ ├── lcm_test.py │ │ ├── left_right_cdot_test.py │ │ ├── linalg_test.py │ │ ├── max_test.py │ │ ├── min_test.py │ │ ├── mod_test.py │ │ ├── overline_test.py │ │ ├── pi_test.py │ │ ├── trig_test.py │ │ └── variable_test.py ├── model_utils.py ├── parser.py ├── prepare_data.py ├── python_executor.py ├── trajectory.py └── utils.py ├── examples └── scripts │ ├── backup │ ├── docker_run.sh │ ├── nvidia_docker_install.sh │ ├── reward_func.py │ ├── train_conditional_llama.sh │ ├── train_continue_pretrain_llama.sh │ ├── train_dpo_llama.sh │ ├── train_dpo_llama_34b.sh │ ├── train_dpo_ring_llama.sh │ ├── train_iterative_dpo_llama.sh │ ├── train_knowledge_distillation.sh │ ├── train_kto_llama.sh │ ├── train_llama_slurm.sh │ ├── train_prm_mistral.sh │ ├── train_reinforce_llama_ray.sh │ ├── train_rejection_sampling_llama.sh │ ├── train_rm_llama.sh │ ├── train_sft_llama.sh │ ├── train_sft_mixtral_lora.sh │ └── train_sft_qwen32b.sh │ ├── serve_remote_rm.sh │ ├── train_ppo_Qwen_32B_ray_rm_multi_aime.sh │ ├── train_ppo_Qwen_32B_ray_rm_multi_numinamath.sh │ ├── train_ppo_Qwen_32B_ray_test.sh │ ├── train_ppo_llama.sh │ ├── train_ppo_llama_3B_reinforce_local.sh │ ├── train_ppo_llama_3B_test.sh │ ├── train_ppo_llama_ray.sh │ ├── train_ppo_llama_ray_70b.sh │ ├── train_ppo_llama_ray_8B_rm.sh │ ├── train_ppo_llama_ray_8B_rm_local.sh │ ├── train_ppo_llama_ray_8B_rm_multi.sh │ ├── train_ppo_llama_ray_8B_rm_multi_aime.sh │ ├── train_ppo_llama_ray_8B_rm_multi_numinamath.sh │ ├── train_ppo_llama_ray_8B_rm_multi_qwen.sh │ ├── train_ppo_llama_ray_mini.sh │ ├── train_ppo_llama_ray_sing.sh │ ├── train_ppo_llama_ray_slurm.sh │ ├── train_ppo_llama_ray_with_remote_rm_sing_node.sh │ ├── train_ppo_llama_sft_ray_3B_rm.sh │ ├── train_ppo_llama_sft_ray_8B_rm.sh │ ├── train_ppo_llama_with_remote_rm.sh │ ├── train_ppo_llama_with_reward_fn.sh │ ├── train_ppo_phi_ray_with_remote_rm_sing.sh │ ├── train_ppo_qwen_ray_with_remote_rm_sing.sh │ ├── train_ppo_qwen_ray_with_remote_rm_sing_node.sh │ └── train_ppo_reinforce_8B.sh ├── openrlhf ├── __init__.py ├── cli │ ├── __init__.py │ ├── batch_inference.py │ ├── code_contests_rm_server.py │ ├── gpt_reward.py │ ├── interactive_chat.py │ ├── lora_combiner.py │ ├── orm_benchmark.py │ ├── orm_server.py │ ├── orm_server_efficient.py │ ├── serve_rm.py │ ├── train_dpo.py │ ├── train_kd.py │ ├── train_kto.py │ ├── train_ppo.py │ ├── train_ppo_ray.py │ ├── train_prm.py │ ├── train_rm.py │ └── train_sft.py ├── datasets │ ├── __init__.py │ ├── process_reward_dataset.py │ ├── prompts_dataset.py │ ├── reward_dataset.py │ ├── sft_dataset.py │ ├── unpaired_preference_dataset.py │ └── utils.py ├── models │ ├── __init__.py │ ├── actor.py │ ├── loss.py │ ├── model.py │ ├── ring_attn_utils.py │ └── utils.py ├── trainer │ ├── __init__.py │ ├── dpo_trainer.py │ ├── kd_trainer.py │ ├── kto_trainer.py │ ├── ppo_trainer.py │ ├── ppo_utils │ │ ├── __init__.py │ │ ├── experience_maker.py │ │ ├── kl_controller.py │ │ └── replay_buffer.py │ ├── prm_trainer.py │ ├── ray │ │ ├── __init__.py │ │ ├── launcher.py │ │ ├── ppo_actor.py │ │ ├── ppo_critic.py │ │ ├── utils.py │ │ ├── vllm_engine.py │ │ └── vllm_worker_wrap.py │ ├── rm_trainer.py │ └── sft_trainer.py └── utils │ ├── __init__.py │ ├── deepspeed │ ├── __init__.py │ ├── deepspeed.py │ └── deepspeed_utils.py │ ├── distributed_sampler.py │ ├── distributed_util.py │ ├── logging_utils.py │ ├── processor.py │ ├── remote_rm_utils.py │ └── utils.py ├── pyproject.toml ├── requirements.txt ├── setup.py ├── train_ppo_llama_ray_8B_rm_multi.sh ├── train_ppo_qwen_ray_32B_rm_multi.sh └── version.txt /.github/workflows/python-package.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/.github/workflows/python-package.yml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/.gitignore -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/.pre-commit-config.yaml -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/README.md -------------------------------------------------------------------------------- /README_old.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/README_old.md -------------------------------------------------------------------------------- /README_old_ja.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/README_old_ja.md -------------------------------------------------------------------------------- /README_old_zh.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/README_old_zh.md -------------------------------------------------------------------------------- /backup/.pre-commit-config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/backup/.pre-commit-config.yaml -------------------------------------------------------------------------------- /backup/CONTRIBUTING.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/backup/CONTRIBUTING.md -------------------------------------------------------------------------------- /backup/dockerfile/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/backup/dockerfile/Dockerfile -------------------------------------------------------------------------------- /backup/dockerfile/docker-entrypoint.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/backup/dockerfile/docker-entrypoint.sh -------------------------------------------------------------------------------- /create_env.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/create_env.sh -------------------------------------------------------------------------------- /data/aime_formatted_qwen.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/data/aime_formatted_qwen.jsonl -------------------------------------------------------------------------------- /data/math_formatted.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/data/math_formatted.jsonl -------------------------------------------------------------------------------- /docs/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/docs/logo.png -------------------------------------------------------------------------------- /docs/ppo_examples.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/docs/ppo_examples.md -------------------------------------------------------------------------------- /docs/ray_architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/docs/ray_architecture.png -------------------------------------------------------------------------------- /eval_config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/eval_config.yaml -------------------------------------------------------------------------------- /evaluation/data/aime24/test.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/data/aime24/test.jsonl -------------------------------------------------------------------------------- /evaluation/data/aime_full/test.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/data/aime_full/test.jsonl -------------------------------------------------------------------------------- /evaluation/data/aime_full_except_24/train.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/data/aime_full_except_24/train.jsonl -------------------------------------------------------------------------------- /evaluation/data/math/test.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/data/math/test.jsonl -------------------------------------------------------------------------------- /evaluation/data/math/train.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/data/math/train.jsonl -------------------------------------------------------------------------------- /evaluation/data/openai_math_splits/test.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/data/openai_math_splits/test.jsonl -------------------------------------------------------------------------------- /evaluation/data/openai_math_splits/train.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/data/openai_math_splits/train.jsonl -------------------------------------------------------------------------------- /evaluation/data_loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/data_loader.py -------------------------------------------------------------------------------- /evaluation/eval_math_data_parallel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/eval_math_data_parallel.py -------------------------------------------------------------------------------- /evaluation/evaluate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/evaluate.py -------------------------------------------------------------------------------- /evaluation/examples.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/examples.py -------------------------------------------------------------------------------- /evaluation/gpu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/gpu.py -------------------------------------------------------------------------------- /evaluation/grader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/grader.py -------------------------------------------------------------------------------- /evaluation/latex2sympy/.coveragerc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/.coveragerc -------------------------------------------------------------------------------- /evaluation/latex2sympy/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/.gitignore -------------------------------------------------------------------------------- /evaluation/latex2sympy/LICENSE.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/LICENSE.txt -------------------------------------------------------------------------------- /evaluation/latex2sympy/PS.g4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/PS.g4 -------------------------------------------------------------------------------- /evaluation/latex2sympy/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/README.md -------------------------------------------------------------------------------- /evaluation/latex2sympy/__init__.py: -------------------------------------------------------------------------------- 1 | import latex2sympy -------------------------------------------------------------------------------- /evaluation/latex2sympy/antlr-4.11.1-complete.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/antlr-4.11.1-complete.jar -------------------------------------------------------------------------------- /evaluation/latex2sympy/asciimath_printer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/asciimath_printer.py -------------------------------------------------------------------------------- /evaluation/latex2sympy/description.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/description.txt -------------------------------------------------------------------------------- /evaluation/latex2sympy/dev-requirements.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/dev-requirements.in -------------------------------------------------------------------------------- /evaluation/latex2sympy/dev-requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/dev-requirements.txt -------------------------------------------------------------------------------- /evaluation/latex2sympy/gen/PS.interp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/gen/PS.interp -------------------------------------------------------------------------------- /evaluation/latex2sympy/gen/PS.tokens: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/gen/PS.tokens -------------------------------------------------------------------------------- /evaluation/latex2sympy/gen/PSLexer.interp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/gen/PSLexer.interp -------------------------------------------------------------------------------- /evaluation/latex2sympy/gen/PSLexer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/gen/PSLexer.py -------------------------------------------------------------------------------- /evaluation/latex2sympy/gen/PSLexer.tokens: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/gen/PSLexer.tokens -------------------------------------------------------------------------------- /evaluation/latex2sympy/gen/PSListener.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/gen/PSListener.py -------------------------------------------------------------------------------- /evaluation/latex2sympy/gen/PSParser.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/gen/PSParser.py -------------------------------------------------------------------------------- /evaluation/latex2sympy/gen/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /evaluation/latex2sympy/icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/icon.png -------------------------------------------------------------------------------- /evaluation/latex2sympy/latex2sympy2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/latex2sympy2.py -------------------------------------------------------------------------------- /evaluation/latex2sympy/requirements.in: -------------------------------------------------------------------------------- 1 | sympy 2 | antlr4-python3-runtime 3 | -------------------------------------------------------------------------------- /evaluation/latex2sympy/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/requirements.txt -------------------------------------------------------------------------------- /evaluation/latex2sympy/sandbox/linalg_equations.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/sandbox/linalg_equations.py -------------------------------------------------------------------------------- /evaluation/latex2sympy/sandbox/linalg_span.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/sandbox/linalg_span.py -------------------------------------------------------------------------------- /evaluation/latex2sympy/sandbox/matrix.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/sandbox/matrix.py -------------------------------------------------------------------------------- /evaluation/latex2sympy/sandbox/matrix_placeholders.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/sandbox/matrix_placeholders.py -------------------------------------------------------------------------------- /evaluation/latex2sympy/sandbox/sandbox.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/sandbox/sandbox.py -------------------------------------------------------------------------------- /evaluation/latex2sympy/sandbox/sandbox_equality.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/sandbox/sandbox_equality.py -------------------------------------------------------------------------------- /evaluation/latex2sympy/sandbox/sectan.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/sandbox/sectan.py -------------------------------------------------------------------------------- /evaluation/latex2sympy/sandbox/vector.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/sandbox/vector.py -------------------------------------------------------------------------------- /evaluation/latex2sympy/scripts/compile.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/scripts/compile.sh -------------------------------------------------------------------------------- /evaluation/latex2sympy/scripts/coverage-ci.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/scripts/coverage-ci.sh -------------------------------------------------------------------------------- /evaluation/latex2sympy/scripts/coverage.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/scripts/coverage.sh -------------------------------------------------------------------------------- /evaluation/latex2sympy/scripts/pre-commit: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/scripts/pre-commit -------------------------------------------------------------------------------- /evaluation/latex2sympy/scripts/pre-push: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/scripts/pre-push -------------------------------------------------------------------------------- /evaluation/latex2sympy/scripts/publish.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/scripts/publish.sh -------------------------------------------------------------------------------- /evaluation/latex2sympy/scripts/setup-hooks.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/scripts/setup-hooks.sh -------------------------------------------------------------------------------- /evaluation/latex2sympy/scripts/setup.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/scripts/setup.sh -------------------------------------------------------------------------------- /evaluation/latex2sympy/scripts/test.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/scripts/test.sh -------------------------------------------------------------------------------- /evaluation/latex2sympy/setup.cfg: -------------------------------------------------------------------------------- 1 | [pycodestyle] 2 | max-line-length = 120 3 | ignore = E501 4 | -------------------------------------------------------------------------------- /evaluation/latex2sympy/setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/setup.py -------------------------------------------------------------------------------- /evaluation/latex2sympy/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /evaluation/latex2sympy/tests/abs_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/tests/abs_test.py -------------------------------------------------------------------------------- /evaluation/latex2sympy/tests/all_bad_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/tests/all_bad_test.py -------------------------------------------------------------------------------- /evaluation/latex2sympy/tests/all_good_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/tests/all_good_test.py -------------------------------------------------------------------------------- /evaluation/latex2sympy/tests/atom_expr_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/tests/atom_expr_test.py -------------------------------------------------------------------------------- /evaluation/latex2sympy/tests/binomial_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/tests/binomial_test.py -------------------------------------------------------------------------------- /evaluation/latex2sympy/tests/ceil_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/tests/ceil_test.py -------------------------------------------------------------------------------- /evaluation/latex2sympy/tests/complex_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/tests/complex_test.py -------------------------------------------------------------------------------- /evaluation/latex2sympy/tests/context.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/tests/context.py -------------------------------------------------------------------------------- /evaluation/latex2sympy/tests/exp_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/tests/exp_test.py -------------------------------------------------------------------------------- /evaluation/latex2sympy/tests/floor_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/tests/floor_test.py -------------------------------------------------------------------------------- /evaluation/latex2sympy/tests/gcd_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/tests/gcd_test.py -------------------------------------------------------------------------------- /evaluation/latex2sympy/tests/greek_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/tests/greek_test.py -------------------------------------------------------------------------------- /evaluation/latex2sympy/tests/grouping_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/tests/grouping_test.py -------------------------------------------------------------------------------- /evaluation/latex2sympy/tests/lcm_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/tests/lcm_test.py -------------------------------------------------------------------------------- /evaluation/latex2sympy/tests/left_right_cdot_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/tests/left_right_cdot_test.py -------------------------------------------------------------------------------- /evaluation/latex2sympy/tests/linalg_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/tests/linalg_test.py -------------------------------------------------------------------------------- /evaluation/latex2sympy/tests/max_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/tests/max_test.py -------------------------------------------------------------------------------- /evaluation/latex2sympy/tests/min_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/tests/min_test.py -------------------------------------------------------------------------------- /evaluation/latex2sympy/tests/mod_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/tests/mod_test.py -------------------------------------------------------------------------------- /evaluation/latex2sympy/tests/overline_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/tests/overline_test.py -------------------------------------------------------------------------------- /evaluation/latex2sympy/tests/pi_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/tests/pi_test.py -------------------------------------------------------------------------------- /evaluation/latex2sympy/tests/trig_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/tests/trig_test.py -------------------------------------------------------------------------------- /evaluation/latex2sympy/tests/variable_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/tests/variable_test.py -------------------------------------------------------------------------------- /evaluation/model_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/model_utils.py -------------------------------------------------------------------------------- /evaluation/parser.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/parser.py -------------------------------------------------------------------------------- /evaluation/prepare_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/prepare_data.py -------------------------------------------------------------------------------- /evaluation/python_executor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/python_executor.py -------------------------------------------------------------------------------- /evaluation/trajectory.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/trajectory.py -------------------------------------------------------------------------------- /evaluation/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/utils.py -------------------------------------------------------------------------------- /examples/scripts/backup/docker_run.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/examples/scripts/backup/docker_run.sh -------------------------------------------------------------------------------- /examples/scripts/backup/nvidia_docker_install.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/examples/scripts/backup/nvidia_docker_install.sh -------------------------------------------------------------------------------- /examples/scripts/backup/reward_func.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/examples/scripts/backup/reward_func.py -------------------------------------------------------------------------------- /examples/scripts/backup/train_conditional_llama.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/examples/scripts/backup/train_conditional_llama.sh -------------------------------------------------------------------------------- /examples/scripts/backup/train_continue_pretrain_llama.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/examples/scripts/backup/train_continue_pretrain_llama.sh -------------------------------------------------------------------------------- /examples/scripts/backup/train_dpo_llama.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/examples/scripts/backup/train_dpo_llama.sh -------------------------------------------------------------------------------- /examples/scripts/backup/train_dpo_llama_34b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/examples/scripts/backup/train_dpo_llama_34b.sh -------------------------------------------------------------------------------- /examples/scripts/backup/train_dpo_ring_llama.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/examples/scripts/backup/train_dpo_ring_llama.sh -------------------------------------------------------------------------------- /examples/scripts/backup/train_iterative_dpo_llama.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/examples/scripts/backup/train_iterative_dpo_llama.sh -------------------------------------------------------------------------------- /examples/scripts/backup/train_knowledge_distillation.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/examples/scripts/backup/train_knowledge_distillation.sh -------------------------------------------------------------------------------- /examples/scripts/backup/train_kto_llama.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/examples/scripts/backup/train_kto_llama.sh -------------------------------------------------------------------------------- /examples/scripts/backup/train_llama_slurm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/examples/scripts/backup/train_llama_slurm.sh -------------------------------------------------------------------------------- /examples/scripts/backup/train_prm_mistral.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/examples/scripts/backup/train_prm_mistral.sh -------------------------------------------------------------------------------- /examples/scripts/backup/train_reinforce_llama_ray.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/examples/scripts/backup/train_reinforce_llama_ray.sh -------------------------------------------------------------------------------- /examples/scripts/backup/train_rejection_sampling_llama.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/examples/scripts/backup/train_rejection_sampling_llama.sh -------------------------------------------------------------------------------- /examples/scripts/backup/train_rm_llama.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/examples/scripts/backup/train_rm_llama.sh -------------------------------------------------------------------------------- /examples/scripts/backup/train_sft_llama.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/examples/scripts/backup/train_sft_llama.sh -------------------------------------------------------------------------------- /examples/scripts/backup/train_sft_mixtral_lora.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/examples/scripts/backup/train_sft_mixtral_lora.sh -------------------------------------------------------------------------------- /examples/scripts/backup/train_sft_qwen32b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/examples/scripts/backup/train_sft_qwen32b.sh -------------------------------------------------------------------------------- /examples/scripts/serve_remote_rm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/examples/scripts/serve_remote_rm.sh -------------------------------------------------------------------------------- /examples/scripts/train_ppo_Qwen_32B_ray_rm_multi_aime.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/examples/scripts/train_ppo_Qwen_32B_ray_rm_multi_aime.sh -------------------------------------------------------------------------------- /examples/scripts/train_ppo_Qwen_32B_ray_rm_multi_numinamath.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/examples/scripts/train_ppo_Qwen_32B_ray_rm_multi_numinamath.sh -------------------------------------------------------------------------------- /examples/scripts/train_ppo_Qwen_32B_ray_test.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/examples/scripts/train_ppo_Qwen_32B_ray_test.sh -------------------------------------------------------------------------------- /examples/scripts/train_ppo_llama.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/examples/scripts/train_ppo_llama.sh -------------------------------------------------------------------------------- /examples/scripts/train_ppo_llama_3B_reinforce_local.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/examples/scripts/train_ppo_llama_3B_reinforce_local.sh -------------------------------------------------------------------------------- /examples/scripts/train_ppo_llama_3B_test.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/examples/scripts/train_ppo_llama_3B_test.sh -------------------------------------------------------------------------------- /examples/scripts/train_ppo_llama_ray.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/examples/scripts/train_ppo_llama_ray.sh -------------------------------------------------------------------------------- /examples/scripts/train_ppo_llama_ray_70b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/examples/scripts/train_ppo_llama_ray_70b.sh -------------------------------------------------------------------------------- /examples/scripts/train_ppo_llama_ray_8B_rm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/examples/scripts/train_ppo_llama_ray_8B_rm.sh -------------------------------------------------------------------------------- /examples/scripts/train_ppo_llama_ray_8B_rm_local.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/examples/scripts/train_ppo_llama_ray_8B_rm_local.sh -------------------------------------------------------------------------------- /examples/scripts/train_ppo_llama_ray_8B_rm_multi.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/examples/scripts/train_ppo_llama_ray_8B_rm_multi.sh -------------------------------------------------------------------------------- /examples/scripts/train_ppo_llama_ray_8B_rm_multi_aime.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/examples/scripts/train_ppo_llama_ray_8B_rm_multi_aime.sh -------------------------------------------------------------------------------- /examples/scripts/train_ppo_llama_ray_8B_rm_multi_numinamath.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/examples/scripts/train_ppo_llama_ray_8B_rm_multi_numinamath.sh -------------------------------------------------------------------------------- /examples/scripts/train_ppo_llama_ray_8B_rm_multi_qwen.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/examples/scripts/train_ppo_llama_ray_8B_rm_multi_qwen.sh -------------------------------------------------------------------------------- /examples/scripts/train_ppo_llama_ray_mini.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/examples/scripts/train_ppo_llama_ray_mini.sh -------------------------------------------------------------------------------- /examples/scripts/train_ppo_llama_ray_sing.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/examples/scripts/train_ppo_llama_ray_sing.sh -------------------------------------------------------------------------------- /examples/scripts/train_ppo_llama_ray_slurm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/examples/scripts/train_ppo_llama_ray_slurm.sh -------------------------------------------------------------------------------- /examples/scripts/train_ppo_llama_ray_with_remote_rm_sing_node.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/examples/scripts/train_ppo_llama_ray_with_remote_rm_sing_node.sh -------------------------------------------------------------------------------- /examples/scripts/train_ppo_llama_sft_ray_3B_rm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/examples/scripts/train_ppo_llama_sft_ray_3B_rm.sh -------------------------------------------------------------------------------- /examples/scripts/train_ppo_llama_sft_ray_8B_rm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/examples/scripts/train_ppo_llama_sft_ray_8B_rm.sh -------------------------------------------------------------------------------- /examples/scripts/train_ppo_llama_with_remote_rm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/examples/scripts/train_ppo_llama_with_remote_rm.sh -------------------------------------------------------------------------------- /examples/scripts/train_ppo_llama_with_reward_fn.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/examples/scripts/train_ppo_llama_with_reward_fn.sh -------------------------------------------------------------------------------- /examples/scripts/train_ppo_phi_ray_with_remote_rm_sing.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/examples/scripts/train_ppo_phi_ray_with_remote_rm_sing.sh -------------------------------------------------------------------------------- /examples/scripts/train_ppo_qwen_ray_with_remote_rm_sing.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/examples/scripts/train_ppo_qwen_ray_with_remote_rm_sing.sh -------------------------------------------------------------------------------- /examples/scripts/train_ppo_qwen_ray_with_remote_rm_sing_node.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/examples/scripts/train_ppo_qwen_ray_with_remote_rm_sing_node.sh -------------------------------------------------------------------------------- /examples/scripts/train_ppo_reinforce_8B.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/examples/scripts/train_ppo_reinforce_8B.sh -------------------------------------------------------------------------------- /openrlhf/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /openrlhf/cli/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /openrlhf/cli/batch_inference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/cli/batch_inference.py -------------------------------------------------------------------------------- /openrlhf/cli/code_contests_rm_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/cli/code_contests_rm_server.py -------------------------------------------------------------------------------- /openrlhf/cli/gpt_reward.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/cli/gpt_reward.py -------------------------------------------------------------------------------- /openrlhf/cli/interactive_chat.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/cli/interactive_chat.py -------------------------------------------------------------------------------- /openrlhf/cli/lora_combiner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/cli/lora_combiner.py -------------------------------------------------------------------------------- /openrlhf/cli/orm_benchmark.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/cli/orm_benchmark.py -------------------------------------------------------------------------------- /openrlhf/cli/orm_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/cli/orm_server.py -------------------------------------------------------------------------------- /openrlhf/cli/orm_server_efficient.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/cli/orm_server_efficient.py -------------------------------------------------------------------------------- /openrlhf/cli/serve_rm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/cli/serve_rm.py -------------------------------------------------------------------------------- /openrlhf/cli/train_dpo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/cli/train_dpo.py -------------------------------------------------------------------------------- /openrlhf/cli/train_kd.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/cli/train_kd.py -------------------------------------------------------------------------------- /openrlhf/cli/train_kto.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/cli/train_kto.py -------------------------------------------------------------------------------- /openrlhf/cli/train_ppo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/cli/train_ppo.py -------------------------------------------------------------------------------- /openrlhf/cli/train_ppo_ray.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/cli/train_ppo_ray.py -------------------------------------------------------------------------------- /openrlhf/cli/train_prm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/cli/train_prm.py -------------------------------------------------------------------------------- /openrlhf/cli/train_rm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/cli/train_rm.py -------------------------------------------------------------------------------- /openrlhf/cli/train_sft.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/cli/train_sft.py -------------------------------------------------------------------------------- /openrlhf/datasets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/datasets/__init__.py -------------------------------------------------------------------------------- /openrlhf/datasets/process_reward_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/datasets/process_reward_dataset.py -------------------------------------------------------------------------------- /openrlhf/datasets/prompts_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/datasets/prompts_dataset.py -------------------------------------------------------------------------------- /openrlhf/datasets/reward_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/datasets/reward_dataset.py -------------------------------------------------------------------------------- /openrlhf/datasets/sft_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/datasets/sft_dataset.py -------------------------------------------------------------------------------- /openrlhf/datasets/unpaired_preference_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/datasets/unpaired_preference_dataset.py -------------------------------------------------------------------------------- /openrlhf/datasets/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/datasets/utils.py -------------------------------------------------------------------------------- /openrlhf/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/models/__init__.py -------------------------------------------------------------------------------- /openrlhf/models/actor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/models/actor.py -------------------------------------------------------------------------------- /openrlhf/models/loss.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/models/loss.py -------------------------------------------------------------------------------- /openrlhf/models/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/models/model.py -------------------------------------------------------------------------------- /openrlhf/models/ring_attn_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/models/ring_attn_utils.py -------------------------------------------------------------------------------- /openrlhf/models/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/models/utils.py -------------------------------------------------------------------------------- /openrlhf/trainer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/trainer/__init__.py -------------------------------------------------------------------------------- /openrlhf/trainer/dpo_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/trainer/dpo_trainer.py -------------------------------------------------------------------------------- /openrlhf/trainer/kd_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/trainer/kd_trainer.py -------------------------------------------------------------------------------- /openrlhf/trainer/kto_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/trainer/kto_trainer.py -------------------------------------------------------------------------------- /openrlhf/trainer/ppo_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/trainer/ppo_trainer.py -------------------------------------------------------------------------------- /openrlhf/trainer/ppo_utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/trainer/ppo_utils/__init__.py -------------------------------------------------------------------------------- /openrlhf/trainer/ppo_utils/experience_maker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/trainer/ppo_utils/experience_maker.py -------------------------------------------------------------------------------- /openrlhf/trainer/ppo_utils/kl_controller.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/trainer/ppo_utils/kl_controller.py -------------------------------------------------------------------------------- /openrlhf/trainer/ppo_utils/replay_buffer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/trainer/ppo_utils/replay_buffer.py -------------------------------------------------------------------------------- /openrlhf/trainer/prm_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/trainer/prm_trainer.py -------------------------------------------------------------------------------- /openrlhf/trainer/ray/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/trainer/ray/__init__.py -------------------------------------------------------------------------------- /openrlhf/trainer/ray/launcher.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/trainer/ray/launcher.py -------------------------------------------------------------------------------- /openrlhf/trainer/ray/ppo_actor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/trainer/ray/ppo_actor.py -------------------------------------------------------------------------------- /openrlhf/trainer/ray/ppo_critic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/trainer/ray/ppo_critic.py -------------------------------------------------------------------------------- /openrlhf/trainer/ray/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/trainer/ray/utils.py -------------------------------------------------------------------------------- /openrlhf/trainer/ray/vllm_engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/trainer/ray/vllm_engine.py -------------------------------------------------------------------------------- /openrlhf/trainer/ray/vllm_worker_wrap.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/trainer/ray/vllm_worker_wrap.py -------------------------------------------------------------------------------- /openrlhf/trainer/rm_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/trainer/rm_trainer.py -------------------------------------------------------------------------------- /openrlhf/trainer/sft_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/trainer/sft_trainer.py -------------------------------------------------------------------------------- /openrlhf/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/utils/__init__.py -------------------------------------------------------------------------------- /openrlhf/utils/deepspeed/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/utils/deepspeed/__init__.py -------------------------------------------------------------------------------- /openrlhf/utils/deepspeed/deepspeed.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/utils/deepspeed/deepspeed.py -------------------------------------------------------------------------------- /openrlhf/utils/deepspeed/deepspeed_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/utils/deepspeed/deepspeed_utils.py -------------------------------------------------------------------------------- /openrlhf/utils/distributed_sampler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/utils/distributed_sampler.py -------------------------------------------------------------------------------- /openrlhf/utils/distributed_util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/utils/distributed_util.py -------------------------------------------------------------------------------- /openrlhf/utils/logging_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/utils/logging_utils.py -------------------------------------------------------------------------------- /openrlhf/utils/processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/utils/processor.py -------------------------------------------------------------------------------- /openrlhf/utils/remote_rm_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/utils/remote_rm_utils.py -------------------------------------------------------------------------------- /openrlhf/utils/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/utils/utils.py -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/pyproject.toml -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/requirements.txt -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/setup.py -------------------------------------------------------------------------------- /train_ppo_llama_ray_8B_rm_multi.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/train_ppo_llama_ray_8B_rm_multi.sh -------------------------------------------------------------------------------- /train_ppo_qwen_ray_32B_rm_multi.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/train_ppo_qwen_ray_32B_rm_multi.sh -------------------------------------------------------------------------------- /version.txt: -------------------------------------------------------------------------------- 1 | 0.5.8 --------------------------------------------------------------------------------