├── .github
    └── workflows
    │   └── python-package.yml
├── .gitignore
├── .pre-commit-config.yaml
├── LICENSE
├── README.md
├── README_old.md
├── README_old_ja.md
├── README_old_zh.md
├── backup
    ├── .pre-commit-config.yaml
    ├── CONTRIBUTING.md
    └── dockerfile
    │   ├── Dockerfile
    │   └── docker-entrypoint.sh
├── create_env.sh
├── data
    ├── aime_formatted_qwen.jsonl
    └── math_formatted.jsonl
├── docs
    ├── logo.png
    ├── ppo_examples.md
    └── ray_architecture.png
├── eval_config.yaml
├── evaluation
    ├── data
    │   ├── aime24
    │   │   └── test.jsonl
    │   ├── aime_full
    │   │   └── test.jsonl
    │   ├── aime_full_except_24
    │   │   └── train.jsonl
    │   ├── math
    │   │   ├── test.jsonl
    │   │   └── train.jsonl
    │   └── openai_math_splits
    │   │   ├── test.jsonl
    │   │   └── train.jsonl
    ├── data_loader.py
    ├── eval_math_data_parallel.py
    ├── evaluate.py
    ├── examples.py
    ├── gpu.py
    ├── grader.py
    ├── latex2sympy
    │   ├── .coveragerc
    │   ├── .gitignore
    │   ├── LICENSE.txt
    │   ├── PS.g4
    │   ├── README.md
    │   ├── __init__.py
    │   ├── antlr-4.11.1-complete.jar
    │   ├── asciimath_printer.py
    │   ├── description.txt
    │   ├── dev-requirements.in
    │   ├── dev-requirements.txt
    │   ├── gen
    │   │   ├── PS.interp
    │   │   ├── PS.tokens
    │   │   ├── PSLexer.interp
    │   │   ├── PSLexer.py
    │   │   ├── PSLexer.tokens
    │   │   ├── PSListener.py
    │   │   ├── PSParser.py
    │   │   └── __init__.py
    │   ├── icon.png
    │   ├── latex2sympy2.py
    │   ├── requirements.in
    │   ├── requirements.txt
    │   ├── sandbox
    │   │   ├── linalg_equations.py
    │   │   ├── linalg_span.py
    │   │   ├── matrix.py
    │   │   ├── matrix_placeholders.py
    │   │   ├── sandbox.py
    │   │   ├── sandbox_equality.py
    │   │   ├── sectan.py
    │   │   └── vector.py
    │   ├── scripts
    │   │   ├── compile.sh
    │   │   ├── coverage-ci.sh
    │   │   ├── coverage.sh
    │   │   ├── pre-commit
    │   │   ├── pre-push
    │   │   ├── publish.sh
    │   │   ├── setup-hooks.sh
    │   │   ├── setup.sh
    │   │   └── test.sh
    │   ├── setup.cfg
    │   ├── setup.py
    │   └── tests
    │   │   ├── __init__.py
    │   │   ├── abs_test.py
    │   │   ├── all_bad_test.py
    │   │   ├── all_good_test.py
    │   │   ├── atom_expr_test.py
    │   │   ├── binomial_test.py
    │   │   ├── ceil_test.py
    │   │   ├── complex_test.py
    │   │   ├── context.py
    │   │   ├── exp_test.py
    │   │   ├── floor_test.py
    │   │   ├── gcd_test.py
    │   │   ├── greek_test.py
    │   │   ├── grouping_test.py
    │   │   ├── lcm_test.py
    │   │   ├── left_right_cdot_test.py
    │   │   ├── linalg_test.py
    │   │   ├── max_test.py
    │   │   ├── min_test.py
    │   │   ├── mod_test.py
    │   │   ├── overline_test.py
    │   │   ├── pi_test.py
    │   │   ├── trig_test.py
    │   │   └── variable_test.py
    ├── model_utils.py
    ├── parser.py
    ├── prepare_data.py
    ├── python_executor.py
    ├── trajectory.py
    └── utils.py
├── examples
    └── scripts
    │   ├── backup
    │       ├── docker_run.sh
    │       ├── nvidia_docker_install.sh
    │       ├── reward_func.py
    │       ├── train_conditional_llama.sh
    │       ├── train_continue_pretrain_llama.sh
    │       ├── train_dpo_llama.sh
    │       ├── train_dpo_llama_34b.sh
    │       ├── train_dpo_ring_llama.sh
    │       ├── train_iterative_dpo_llama.sh
    │       ├── train_knowledge_distillation.sh
    │       ├── train_kto_llama.sh
    │       ├── train_llama_slurm.sh
    │       ├── train_prm_mistral.sh
    │       ├── train_reinforce_llama_ray.sh
    │       ├── train_rejection_sampling_llama.sh
    │       ├── train_rm_llama.sh
    │       ├── train_sft_llama.sh
    │       ├── train_sft_mixtral_lora.sh
    │       └── train_sft_qwen32b.sh
    │   ├── serve_remote_rm.sh
    │   ├── train_ppo_Qwen_32B_ray_rm_multi_aime.sh
    │   ├── train_ppo_Qwen_32B_ray_rm_multi_numinamath.sh
    │   ├── train_ppo_Qwen_32B_ray_test.sh
    │   ├── train_ppo_llama.sh
    │   ├── train_ppo_llama_3B_reinforce_local.sh
    │   ├── train_ppo_llama_3B_test.sh
    │   ├── train_ppo_llama_ray.sh
    │   ├── train_ppo_llama_ray_70b.sh
    │   ├── train_ppo_llama_ray_8B_rm.sh
    │   ├── train_ppo_llama_ray_8B_rm_local.sh
    │   ├── train_ppo_llama_ray_8B_rm_multi.sh
    │   ├── train_ppo_llama_ray_8B_rm_multi_aime.sh
    │   ├── train_ppo_llama_ray_8B_rm_multi_numinamath.sh
    │   ├── train_ppo_llama_ray_8B_rm_multi_qwen.sh
    │   ├── train_ppo_llama_ray_mini.sh
    │   ├── train_ppo_llama_ray_sing.sh
    │   ├── train_ppo_llama_ray_slurm.sh
    │   ├── train_ppo_llama_ray_with_remote_rm_sing_node.sh
    │   ├── train_ppo_llama_sft_ray_3B_rm.sh
    │   ├── train_ppo_llama_sft_ray_8B_rm.sh
    │   ├── train_ppo_llama_with_remote_rm.sh
    │   ├── train_ppo_llama_with_reward_fn.sh
    │   ├── train_ppo_phi_ray_with_remote_rm_sing.sh
    │   ├── train_ppo_qwen_ray_with_remote_rm_sing.sh
    │   ├── train_ppo_qwen_ray_with_remote_rm_sing_node.sh
    │   └── train_ppo_reinforce_8B.sh
├── openrlhf
    ├── __init__.py
    ├── cli
    │   ├── __init__.py
    │   ├── batch_inference.py
    │   ├── code_contests_rm_server.py
    │   ├── gpt_reward.py
    │   ├── interactive_chat.py
    │   ├── lora_combiner.py
    │   ├── orm_benchmark.py
    │   ├── orm_server.py
    │   ├── orm_server_efficient.py
    │   ├── serve_rm.py
    │   ├── train_dpo.py
    │   ├── train_kd.py
    │   ├── train_kto.py
    │   ├── train_ppo.py
    │   ├── train_ppo_ray.py
    │   ├── train_prm.py
    │   ├── train_rm.py
    │   └── train_sft.py
    ├── datasets
    │   ├── __init__.py
    │   ├── process_reward_dataset.py
    │   ├── prompts_dataset.py
    │   ├── reward_dataset.py
    │   ├── sft_dataset.py
    │   ├── unpaired_preference_dataset.py
    │   └── utils.py
    ├── models
    │   ├── __init__.py
    │   ├── actor.py
    │   ├── loss.py
    │   ├── model.py
    │   ├── ring_attn_utils.py
    │   └── utils.py
    ├── trainer
    │   ├── __init__.py
    │   ├── dpo_trainer.py
    │   ├── kd_trainer.py
    │   ├── kto_trainer.py
    │   ├── ppo_trainer.py
    │   ├── ppo_utils
    │   │   ├── __init__.py
    │   │   ├── experience_maker.py
    │   │   ├── kl_controller.py
    │   │   └── replay_buffer.py
    │   ├── prm_trainer.py
    │   ├── ray
    │   │   ├── __init__.py
    │   │   ├── launcher.py
    │   │   ├── ppo_actor.py
    │   │   ├── ppo_critic.py
    │   │   ├── utils.py
    │   │   ├── vllm_engine.py
    │   │   └── vllm_worker_wrap.py
    │   ├── rm_trainer.py
    │   └── sft_trainer.py
    └── utils
    │   ├── __init__.py
    │   ├── deepspeed
    │       ├── __init__.py
    │       ├── deepspeed.py
    │       └── deepspeed_utils.py
    │   ├── distributed_sampler.py
    │   ├── distributed_util.py
    │   ├── logging_utils.py
    │   ├── processor.py
    │   ├── remote_rm_utils.py
    │   └── utils.py
├── pyproject.toml
├── requirements.txt
├── setup.py
├── train_ppo_llama_ray_8B_rm_multi.sh
├── train_ppo_qwen_ray_32B_rm_multi.sh
└── version.txt


/.github/workflows/python-package.yml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/.github/workflows/python-package.yml


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/.gitignore


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/.pre-commit-config.yaml


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/LICENSE


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/README.md


--------------------------------------------------------------------------------
/README_old.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/README_old.md


--------------------------------------------------------------------------------
/README_old_ja.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/README_old_ja.md


--------------------------------------------------------------------------------
/README_old_zh.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/README_old_zh.md


--------------------------------------------------------------------------------
/backup/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/backup/.pre-commit-config.yaml


--------------------------------------------------------------------------------
/backup/CONTRIBUTING.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/backup/CONTRIBUTING.md


--------------------------------------------------------------------------------
/backup/dockerfile/Dockerfile:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/backup/dockerfile/Dockerfile


--------------------------------------------------------------------------------
/backup/dockerfile/docker-entrypoint.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/backup/dockerfile/docker-entrypoint.sh


--------------------------------------------------------------------------------
/create_env.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/create_env.sh


--------------------------------------------------------------------------------
/data/aime_formatted_qwen.jsonl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/data/aime_formatted_qwen.jsonl


--------------------------------------------------------------------------------
/data/math_formatted.jsonl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/data/math_formatted.jsonl


--------------------------------------------------------------------------------
/docs/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/docs/logo.png


--------------------------------------------------------------------------------
/docs/ppo_examples.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/docs/ppo_examples.md


--------------------------------------------------------------------------------
/docs/ray_architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/docs/ray_architecture.png


--------------------------------------------------------------------------------
/eval_config.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/eval_config.yaml


--------------------------------------------------------------------------------
/evaluation/data/aime24/test.jsonl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/data/aime24/test.jsonl


--------------------------------------------------------------------------------
/evaluation/data/aime_full/test.jsonl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/data/aime_full/test.jsonl


--------------------------------------------------------------------------------
/evaluation/data/aime_full_except_24/train.jsonl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/data/aime_full_except_24/train.jsonl


--------------------------------------------------------------------------------
/evaluation/data/math/test.jsonl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/data/math/test.jsonl


--------------------------------------------------------------------------------
/evaluation/data/math/train.jsonl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/data/math/train.jsonl


--------------------------------------------------------------------------------
/evaluation/data/openai_math_splits/test.jsonl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/data/openai_math_splits/test.jsonl


--------------------------------------------------------------------------------
/evaluation/data/openai_math_splits/train.jsonl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/data/openai_math_splits/train.jsonl


--------------------------------------------------------------------------------
/evaluation/data_loader.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/data_loader.py


--------------------------------------------------------------------------------
/evaluation/eval_math_data_parallel.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/eval_math_data_parallel.py


--------------------------------------------------------------------------------
/evaluation/evaluate.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/evaluate.py


--------------------------------------------------------------------------------
/evaluation/examples.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/examples.py


--------------------------------------------------------------------------------
/evaluation/gpu.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/gpu.py


--------------------------------------------------------------------------------
/evaluation/grader.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/grader.py


--------------------------------------------------------------------------------
/evaluation/latex2sympy/.coveragerc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/.coveragerc


--------------------------------------------------------------------------------
/evaluation/latex2sympy/.gitignore:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/.gitignore


--------------------------------------------------------------------------------
/evaluation/latex2sympy/LICENSE.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/LICENSE.txt


--------------------------------------------------------------------------------
/evaluation/latex2sympy/PS.g4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/PS.g4


--------------------------------------------------------------------------------
/evaluation/latex2sympy/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/README.md


--------------------------------------------------------------------------------
/evaluation/latex2sympy/__init__.py:
--------------------------------------------------------------------------------
1 | import latex2sympy


--------------------------------------------------------------------------------
/evaluation/latex2sympy/antlr-4.11.1-complete.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/antlr-4.11.1-complete.jar


--------------------------------------------------------------------------------
/evaluation/latex2sympy/asciimath_printer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/asciimath_printer.py


--------------------------------------------------------------------------------
/evaluation/latex2sympy/description.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/description.txt


--------------------------------------------------------------------------------
/evaluation/latex2sympy/dev-requirements.in:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/dev-requirements.in


--------------------------------------------------------------------------------
/evaluation/latex2sympy/dev-requirements.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/dev-requirements.txt


--------------------------------------------------------------------------------
/evaluation/latex2sympy/gen/PS.interp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/gen/PS.interp


--------------------------------------------------------------------------------
/evaluation/latex2sympy/gen/PS.tokens:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/gen/PS.tokens


--------------------------------------------------------------------------------
/evaluation/latex2sympy/gen/PSLexer.interp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/gen/PSLexer.interp


--------------------------------------------------------------------------------
/evaluation/latex2sympy/gen/PSLexer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/gen/PSLexer.py


--------------------------------------------------------------------------------
/evaluation/latex2sympy/gen/PSLexer.tokens:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/gen/PSLexer.tokens


--------------------------------------------------------------------------------
/evaluation/latex2sympy/gen/PSListener.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/gen/PSListener.py


--------------------------------------------------------------------------------
/evaluation/latex2sympy/gen/PSParser.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/gen/PSParser.py


--------------------------------------------------------------------------------
/evaluation/latex2sympy/gen/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/evaluation/latex2sympy/icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/icon.png


--------------------------------------------------------------------------------
/evaluation/latex2sympy/latex2sympy2.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/latex2sympy2.py


--------------------------------------------------------------------------------
/evaluation/latex2sympy/requirements.in:
--------------------------------------------------------------------------------
1 | sympy
2 | antlr4-python3-runtime
3 | 


--------------------------------------------------------------------------------
/evaluation/latex2sympy/requirements.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/requirements.txt


--------------------------------------------------------------------------------
/evaluation/latex2sympy/sandbox/linalg_equations.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/sandbox/linalg_equations.py


--------------------------------------------------------------------------------
/evaluation/latex2sympy/sandbox/linalg_span.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/sandbox/linalg_span.py


--------------------------------------------------------------------------------
/evaluation/latex2sympy/sandbox/matrix.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/sandbox/matrix.py


--------------------------------------------------------------------------------
/evaluation/latex2sympy/sandbox/matrix_placeholders.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/sandbox/matrix_placeholders.py


--------------------------------------------------------------------------------
/evaluation/latex2sympy/sandbox/sandbox.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/sandbox/sandbox.py


--------------------------------------------------------------------------------
/evaluation/latex2sympy/sandbox/sandbox_equality.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/sandbox/sandbox_equality.py


--------------------------------------------------------------------------------
/evaluation/latex2sympy/sandbox/sectan.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/sandbox/sectan.py


--------------------------------------------------------------------------------
/evaluation/latex2sympy/sandbox/vector.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/sandbox/vector.py


--------------------------------------------------------------------------------
/evaluation/latex2sympy/scripts/compile.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/scripts/compile.sh


--------------------------------------------------------------------------------
/evaluation/latex2sympy/scripts/coverage-ci.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/scripts/coverage-ci.sh


--------------------------------------------------------------------------------
/evaluation/latex2sympy/scripts/coverage.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/scripts/coverage.sh


--------------------------------------------------------------------------------
/evaluation/latex2sympy/scripts/pre-commit:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/scripts/pre-commit


--------------------------------------------------------------------------------
/evaluation/latex2sympy/scripts/pre-push:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/scripts/pre-push


--------------------------------------------------------------------------------
/evaluation/latex2sympy/scripts/publish.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/scripts/publish.sh


--------------------------------------------------------------------------------
/evaluation/latex2sympy/scripts/setup-hooks.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/scripts/setup-hooks.sh


--------------------------------------------------------------------------------
/evaluation/latex2sympy/scripts/setup.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/scripts/setup.sh


--------------------------------------------------------------------------------
/evaluation/latex2sympy/scripts/test.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/scripts/test.sh


--------------------------------------------------------------------------------
/evaluation/latex2sympy/setup.cfg:
--------------------------------------------------------------------------------
1 | [pycodestyle]
2 | max-line-length = 120
3 | ignore = E501
4 | 


--------------------------------------------------------------------------------
/evaluation/latex2sympy/setup.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/setup.py


--------------------------------------------------------------------------------
/evaluation/latex2sympy/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/evaluation/latex2sympy/tests/abs_test.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/tests/abs_test.py


--------------------------------------------------------------------------------
/evaluation/latex2sympy/tests/all_bad_test.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/tests/all_bad_test.py


--------------------------------------------------------------------------------
/evaluation/latex2sympy/tests/all_good_test.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/tests/all_good_test.py


--------------------------------------------------------------------------------
/evaluation/latex2sympy/tests/atom_expr_test.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/tests/atom_expr_test.py


--------------------------------------------------------------------------------
/evaluation/latex2sympy/tests/binomial_test.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/tests/binomial_test.py


--------------------------------------------------------------------------------
/evaluation/latex2sympy/tests/ceil_test.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/tests/ceil_test.py


--------------------------------------------------------------------------------
/evaluation/latex2sympy/tests/complex_test.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/tests/complex_test.py


--------------------------------------------------------------------------------
/evaluation/latex2sympy/tests/context.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/tests/context.py


--------------------------------------------------------------------------------
/evaluation/latex2sympy/tests/exp_test.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/tests/exp_test.py


--------------------------------------------------------------------------------
/evaluation/latex2sympy/tests/floor_test.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/tests/floor_test.py


--------------------------------------------------------------------------------
/evaluation/latex2sympy/tests/gcd_test.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/tests/gcd_test.py


--------------------------------------------------------------------------------
/evaluation/latex2sympy/tests/greek_test.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/tests/greek_test.py


--------------------------------------------------------------------------------
/evaluation/latex2sympy/tests/grouping_test.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/tests/grouping_test.py


--------------------------------------------------------------------------------
/evaluation/latex2sympy/tests/lcm_test.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/tests/lcm_test.py


--------------------------------------------------------------------------------
/evaluation/latex2sympy/tests/left_right_cdot_test.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/tests/left_right_cdot_test.py


--------------------------------------------------------------------------------
/evaluation/latex2sympy/tests/linalg_test.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/tests/linalg_test.py


--------------------------------------------------------------------------------
/evaluation/latex2sympy/tests/max_test.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/tests/max_test.py


--------------------------------------------------------------------------------
/evaluation/latex2sympy/tests/min_test.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/tests/min_test.py


--------------------------------------------------------------------------------
/evaluation/latex2sympy/tests/mod_test.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/tests/mod_test.py


--------------------------------------------------------------------------------
/evaluation/latex2sympy/tests/overline_test.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/tests/overline_test.py


--------------------------------------------------------------------------------
/evaluation/latex2sympy/tests/pi_test.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/tests/pi_test.py


--------------------------------------------------------------------------------
/evaluation/latex2sympy/tests/trig_test.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/tests/trig_test.py


--------------------------------------------------------------------------------
/evaluation/latex2sympy/tests/variable_test.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/latex2sympy/tests/variable_test.py


--------------------------------------------------------------------------------
/evaluation/model_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/model_utils.py


--------------------------------------------------------------------------------
/evaluation/parser.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/parser.py


--------------------------------------------------------------------------------
/evaluation/prepare_data.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/prepare_data.py


--------------------------------------------------------------------------------
/evaluation/python_executor.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/python_executor.py


--------------------------------------------------------------------------------
/evaluation/trajectory.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/trajectory.py


--------------------------------------------------------------------------------
/evaluation/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/evaluation/utils.py


--------------------------------------------------------------------------------
/examples/scripts/backup/docker_run.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/examples/scripts/backup/docker_run.sh


--------------------------------------------------------------------------------
/examples/scripts/backup/nvidia_docker_install.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/examples/scripts/backup/nvidia_docker_install.sh


--------------------------------------------------------------------------------
/examples/scripts/backup/reward_func.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/examples/scripts/backup/reward_func.py


--------------------------------------------------------------------------------
/examples/scripts/backup/train_conditional_llama.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/examples/scripts/backup/train_conditional_llama.sh


--------------------------------------------------------------------------------
/examples/scripts/backup/train_continue_pretrain_llama.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/examples/scripts/backup/train_continue_pretrain_llama.sh


--------------------------------------------------------------------------------
/examples/scripts/backup/train_dpo_llama.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/examples/scripts/backup/train_dpo_llama.sh


--------------------------------------------------------------------------------
/examples/scripts/backup/train_dpo_llama_34b.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/examples/scripts/backup/train_dpo_llama_34b.sh


--------------------------------------------------------------------------------
/examples/scripts/backup/train_dpo_ring_llama.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/examples/scripts/backup/train_dpo_ring_llama.sh


--------------------------------------------------------------------------------
/examples/scripts/backup/train_iterative_dpo_llama.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/examples/scripts/backup/train_iterative_dpo_llama.sh


--------------------------------------------------------------------------------
/examples/scripts/backup/train_knowledge_distillation.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/examples/scripts/backup/train_knowledge_distillation.sh


--------------------------------------------------------------------------------
/examples/scripts/backup/train_kto_llama.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/examples/scripts/backup/train_kto_llama.sh


--------------------------------------------------------------------------------
/examples/scripts/backup/train_llama_slurm.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/examples/scripts/backup/train_llama_slurm.sh


--------------------------------------------------------------------------------
/examples/scripts/backup/train_prm_mistral.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/examples/scripts/backup/train_prm_mistral.sh


--------------------------------------------------------------------------------
/examples/scripts/backup/train_reinforce_llama_ray.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/examples/scripts/backup/train_reinforce_llama_ray.sh


--------------------------------------------------------------------------------
/examples/scripts/backup/train_rejection_sampling_llama.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/examples/scripts/backup/train_rejection_sampling_llama.sh


--------------------------------------------------------------------------------
/examples/scripts/backup/train_rm_llama.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/examples/scripts/backup/train_rm_llama.sh


--------------------------------------------------------------------------------
/examples/scripts/backup/train_sft_llama.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/examples/scripts/backup/train_sft_llama.sh


--------------------------------------------------------------------------------
/examples/scripts/backup/train_sft_mixtral_lora.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/examples/scripts/backup/train_sft_mixtral_lora.sh


--------------------------------------------------------------------------------
/examples/scripts/backup/train_sft_qwen32b.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/examples/scripts/backup/train_sft_qwen32b.sh


--------------------------------------------------------------------------------
/examples/scripts/serve_remote_rm.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/examples/scripts/serve_remote_rm.sh


--------------------------------------------------------------------------------
/examples/scripts/train_ppo_Qwen_32B_ray_rm_multi_aime.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/examples/scripts/train_ppo_Qwen_32B_ray_rm_multi_aime.sh


--------------------------------------------------------------------------------
/examples/scripts/train_ppo_Qwen_32B_ray_rm_multi_numinamath.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/examples/scripts/train_ppo_Qwen_32B_ray_rm_multi_numinamath.sh


--------------------------------------------------------------------------------
/examples/scripts/train_ppo_Qwen_32B_ray_test.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/examples/scripts/train_ppo_Qwen_32B_ray_test.sh


--------------------------------------------------------------------------------
/examples/scripts/train_ppo_llama.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/examples/scripts/train_ppo_llama.sh


--------------------------------------------------------------------------------
/examples/scripts/train_ppo_llama_3B_reinforce_local.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/examples/scripts/train_ppo_llama_3B_reinforce_local.sh


--------------------------------------------------------------------------------
/examples/scripts/train_ppo_llama_3B_test.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/examples/scripts/train_ppo_llama_3B_test.sh


--------------------------------------------------------------------------------
/examples/scripts/train_ppo_llama_ray.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/examples/scripts/train_ppo_llama_ray.sh


--------------------------------------------------------------------------------
/examples/scripts/train_ppo_llama_ray_70b.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/examples/scripts/train_ppo_llama_ray_70b.sh


--------------------------------------------------------------------------------
/examples/scripts/train_ppo_llama_ray_8B_rm.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/examples/scripts/train_ppo_llama_ray_8B_rm.sh


--------------------------------------------------------------------------------
/examples/scripts/train_ppo_llama_ray_8B_rm_local.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/examples/scripts/train_ppo_llama_ray_8B_rm_local.sh


--------------------------------------------------------------------------------
/examples/scripts/train_ppo_llama_ray_8B_rm_multi.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/examples/scripts/train_ppo_llama_ray_8B_rm_multi.sh


--------------------------------------------------------------------------------
/examples/scripts/train_ppo_llama_ray_8B_rm_multi_aime.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/examples/scripts/train_ppo_llama_ray_8B_rm_multi_aime.sh


--------------------------------------------------------------------------------
/examples/scripts/train_ppo_llama_ray_8B_rm_multi_numinamath.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/examples/scripts/train_ppo_llama_ray_8B_rm_multi_numinamath.sh


--------------------------------------------------------------------------------
/examples/scripts/train_ppo_llama_ray_8B_rm_multi_qwen.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/examples/scripts/train_ppo_llama_ray_8B_rm_multi_qwen.sh


--------------------------------------------------------------------------------
/examples/scripts/train_ppo_llama_ray_mini.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/examples/scripts/train_ppo_llama_ray_mini.sh


--------------------------------------------------------------------------------
/examples/scripts/train_ppo_llama_ray_sing.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/examples/scripts/train_ppo_llama_ray_sing.sh


--------------------------------------------------------------------------------
/examples/scripts/train_ppo_llama_ray_slurm.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/examples/scripts/train_ppo_llama_ray_slurm.sh


--------------------------------------------------------------------------------
/examples/scripts/train_ppo_llama_ray_with_remote_rm_sing_node.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/examples/scripts/train_ppo_llama_ray_with_remote_rm_sing_node.sh


--------------------------------------------------------------------------------
/examples/scripts/train_ppo_llama_sft_ray_3B_rm.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/examples/scripts/train_ppo_llama_sft_ray_3B_rm.sh


--------------------------------------------------------------------------------
/examples/scripts/train_ppo_llama_sft_ray_8B_rm.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/examples/scripts/train_ppo_llama_sft_ray_8B_rm.sh


--------------------------------------------------------------------------------
/examples/scripts/train_ppo_llama_with_remote_rm.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/examples/scripts/train_ppo_llama_with_remote_rm.sh


--------------------------------------------------------------------------------
/examples/scripts/train_ppo_llama_with_reward_fn.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/examples/scripts/train_ppo_llama_with_reward_fn.sh


--------------------------------------------------------------------------------
/examples/scripts/train_ppo_phi_ray_with_remote_rm_sing.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/examples/scripts/train_ppo_phi_ray_with_remote_rm_sing.sh


--------------------------------------------------------------------------------
/examples/scripts/train_ppo_qwen_ray_with_remote_rm_sing.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/examples/scripts/train_ppo_qwen_ray_with_remote_rm_sing.sh


--------------------------------------------------------------------------------
/examples/scripts/train_ppo_qwen_ray_with_remote_rm_sing_node.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/examples/scripts/train_ppo_qwen_ray_with_remote_rm_sing_node.sh


--------------------------------------------------------------------------------
/examples/scripts/train_ppo_reinforce_8B.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/examples/scripts/train_ppo_reinforce_8B.sh


--------------------------------------------------------------------------------
/openrlhf/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/openrlhf/cli/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/openrlhf/cli/batch_inference.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/cli/batch_inference.py


--------------------------------------------------------------------------------
/openrlhf/cli/code_contests_rm_server.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/cli/code_contests_rm_server.py


--------------------------------------------------------------------------------
/openrlhf/cli/gpt_reward.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/cli/gpt_reward.py


--------------------------------------------------------------------------------
/openrlhf/cli/interactive_chat.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/cli/interactive_chat.py


--------------------------------------------------------------------------------
/openrlhf/cli/lora_combiner.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/cli/lora_combiner.py


--------------------------------------------------------------------------------
/openrlhf/cli/orm_benchmark.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/cli/orm_benchmark.py


--------------------------------------------------------------------------------
/openrlhf/cli/orm_server.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/cli/orm_server.py


--------------------------------------------------------------------------------
/openrlhf/cli/orm_server_efficient.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/cli/orm_server_efficient.py


--------------------------------------------------------------------------------
/openrlhf/cli/serve_rm.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/cli/serve_rm.py


--------------------------------------------------------------------------------
/openrlhf/cli/train_dpo.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/cli/train_dpo.py


--------------------------------------------------------------------------------
/openrlhf/cli/train_kd.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/cli/train_kd.py


--------------------------------------------------------------------------------
/openrlhf/cli/train_kto.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/cli/train_kto.py


--------------------------------------------------------------------------------
/openrlhf/cli/train_ppo.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/cli/train_ppo.py


--------------------------------------------------------------------------------
/openrlhf/cli/train_ppo_ray.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/cli/train_ppo_ray.py


--------------------------------------------------------------------------------
/openrlhf/cli/train_prm.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/cli/train_prm.py


--------------------------------------------------------------------------------
/openrlhf/cli/train_rm.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/cli/train_rm.py


--------------------------------------------------------------------------------
/openrlhf/cli/train_sft.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/cli/train_sft.py


--------------------------------------------------------------------------------
/openrlhf/datasets/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/datasets/__init__.py


--------------------------------------------------------------------------------
/openrlhf/datasets/process_reward_dataset.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/datasets/process_reward_dataset.py


--------------------------------------------------------------------------------
/openrlhf/datasets/prompts_dataset.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/datasets/prompts_dataset.py


--------------------------------------------------------------------------------
/openrlhf/datasets/reward_dataset.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/datasets/reward_dataset.py


--------------------------------------------------------------------------------
/openrlhf/datasets/sft_dataset.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/datasets/sft_dataset.py


--------------------------------------------------------------------------------
/openrlhf/datasets/unpaired_preference_dataset.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/datasets/unpaired_preference_dataset.py


--------------------------------------------------------------------------------
/openrlhf/datasets/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/datasets/utils.py


--------------------------------------------------------------------------------
/openrlhf/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/models/__init__.py


--------------------------------------------------------------------------------
/openrlhf/models/actor.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/models/actor.py


--------------------------------------------------------------------------------
/openrlhf/models/loss.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/models/loss.py


--------------------------------------------------------------------------------
/openrlhf/models/model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/models/model.py


--------------------------------------------------------------------------------
/openrlhf/models/ring_attn_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/models/ring_attn_utils.py


--------------------------------------------------------------------------------
/openrlhf/models/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/models/utils.py


--------------------------------------------------------------------------------
/openrlhf/trainer/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/trainer/__init__.py


--------------------------------------------------------------------------------
/openrlhf/trainer/dpo_trainer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/trainer/dpo_trainer.py


--------------------------------------------------------------------------------
/openrlhf/trainer/kd_trainer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/trainer/kd_trainer.py


--------------------------------------------------------------------------------
/openrlhf/trainer/kto_trainer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/trainer/kto_trainer.py


--------------------------------------------------------------------------------
/openrlhf/trainer/ppo_trainer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/trainer/ppo_trainer.py


--------------------------------------------------------------------------------
/openrlhf/trainer/ppo_utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/trainer/ppo_utils/__init__.py


--------------------------------------------------------------------------------
/openrlhf/trainer/ppo_utils/experience_maker.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/trainer/ppo_utils/experience_maker.py


--------------------------------------------------------------------------------
/openrlhf/trainer/ppo_utils/kl_controller.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/trainer/ppo_utils/kl_controller.py


--------------------------------------------------------------------------------
/openrlhf/trainer/ppo_utils/replay_buffer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/trainer/ppo_utils/replay_buffer.py


--------------------------------------------------------------------------------
/openrlhf/trainer/prm_trainer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/trainer/prm_trainer.py


--------------------------------------------------------------------------------
/openrlhf/trainer/ray/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/trainer/ray/__init__.py


--------------------------------------------------------------------------------
/openrlhf/trainer/ray/launcher.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/trainer/ray/launcher.py


--------------------------------------------------------------------------------
/openrlhf/trainer/ray/ppo_actor.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/trainer/ray/ppo_actor.py


--------------------------------------------------------------------------------
/openrlhf/trainer/ray/ppo_critic.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/trainer/ray/ppo_critic.py


--------------------------------------------------------------------------------
/openrlhf/trainer/ray/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/trainer/ray/utils.py


--------------------------------------------------------------------------------
/openrlhf/trainer/ray/vllm_engine.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/trainer/ray/vllm_engine.py


--------------------------------------------------------------------------------
/openrlhf/trainer/ray/vllm_worker_wrap.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/trainer/ray/vllm_worker_wrap.py


--------------------------------------------------------------------------------
/openrlhf/trainer/rm_trainer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/trainer/rm_trainer.py


--------------------------------------------------------------------------------
/openrlhf/trainer/sft_trainer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/trainer/sft_trainer.py


--------------------------------------------------------------------------------
/openrlhf/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/utils/__init__.py


--------------------------------------------------------------------------------
/openrlhf/utils/deepspeed/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/utils/deepspeed/__init__.py


--------------------------------------------------------------------------------
/openrlhf/utils/deepspeed/deepspeed.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/utils/deepspeed/deepspeed.py


--------------------------------------------------------------------------------
/openrlhf/utils/deepspeed/deepspeed_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/utils/deepspeed/deepspeed_utils.py


--------------------------------------------------------------------------------
/openrlhf/utils/distributed_sampler.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/utils/distributed_sampler.py


--------------------------------------------------------------------------------
/openrlhf/utils/distributed_util.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/utils/distributed_util.py


--------------------------------------------------------------------------------
/openrlhf/utils/logging_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/utils/logging_utils.py


--------------------------------------------------------------------------------
/openrlhf/utils/processor.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/utils/processor.py


--------------------------------------------------------------------------------
/openrlhf/utils/remote_rm_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/utils/remote_rm_utils.py


--------------------------------------------------------------------------------
/openrlhf/utils/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/openrlhf/utils/utils.py


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/pyproject.toml


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/requirements.txt


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/setup.py


--------------------------------------------------------------------------------
/train_ppo_llama_ray_8B_rm_multi.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/train_ppo_llama_ray_8B_rm_multi.sh


--------------------------------------------------------------------------------
/train_ppo_qwen_ray_32B_rm_multi.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GuanghaoYe/Emergence-of-Thinking/HEAD/train_ppo_qwen_ray_32B_rm_multi.sh


--------------------------------------------------------------------------------
/version.txt:
--------------------------------------------------------------------------------
1 | 0.5.8


--------------------------------------------------------------------------------