├── .gitignore ├── README.md ├── assets ├── apr-paper.pdf └── apr.png ├── pyproject.toml ├── src ├── __init__.py ├── countdown_utils.py ├── data │ ├── countdown.py │ ├── generate_apr.py │ ├── generate_problems.py │ ├── generate_sosp.py │ ├── scripts │ │ ├── create_prob.sh │ │ └── create_search.sh │ └── search.py ├── eval │ ├── eval_apr.py │ ├── eval_sosp.py │ └── parallel_infernce_utils.py └── utils.py ├── supervised-jax ├── README.md ├── launcher.py ├── llama3_train │ ├── .gitignore │ ├── README.md │ ├── gpt2_train_script.py │ ├── llama_train │ │ ├── __init__.py │ │ ├── gpt2.py │ │ ├── llama3.py │ │ ├── optimizer.py │ │ ├── serve.py │ │ ├── splash.py │ │ └── utils.py │ ├── llama_train_script.py │ ├── requirements.txt │ └── setup.py ├── scripts │ ├── hs-v3.sh │ ├── hsp-v3.sh │ ├── sos-v3.sh │ ├── training_hs-v2_llama.sh │ ├── training_hsp-v2_llama.sh │ ├── training_sos_llama.sh │ ├── training_sos_llama_1ep.sh │ ├── training_sos_llama_gpt2tok.sh │ ├── training_sos_llama_gpt2tok_1ep.sh │ └── training_sos_xiuyu.sh ├── to_dataset.ipynb ├── training_hsp.sh ├── training_hsp_2x.sh ├── training_run.sh └── training_run_test.sh └── tinyrl ├── README.md ├── configs ├── apr.yaml ├── apr_cond10.yaml └── sosp.yaml ├── data └── .gitkeep ├── requirements.txt ├── rollout ├── apr_utils.py └── sos_utils.py ├── trainer.py └── utils.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parallel-Reasoning/APR/HEAD/.gitignore -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parallel-Reasoning/APR/HEAD/README.md -------------------------------------------------------------------------------- /assets/apr-paper.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parallel-Reasoning/APR/HEAD/assets/apr-paper.pdf -------------------------------------------------------------------------------- /assets/apr.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parallel-Reasoning/APR/HEAD/assets/apr.png -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parallel-Reasoning/APR/HEAD/pyproject.toml -------------------------------------------------------------------------------- /src/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/countdown_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parallel-Reasoning/APR/HEAD/src/countdown_utils.py -------------------------------------------------------------------------------- /src/data/countdown.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parallel-Reasoning/APR/HEAD/src/data/countdown.py -------------------------------------------------------------------------------- /src/data/generate_apr.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parallel-Reasoning/APR/HEAD/src/data/generate_apr.py -------------------------------------------------------------------------------- /src/data/generate_problems.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parallel-Reasoning/APR/HEAD/src/data/generate_problems.py -------------------------------------------------------------------------------- /src/data/generate_sosp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parallel-Reasoning/APR/HEAD/src/data/generate_sosp.py -------------------------------------------------------------------------------- /src/data/scripts/create_prob.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parallel-Reasoning/APR/HEAD/src/data/scripts/create_prob.sh -------------------------------------------------------------------------------- /src/data/scripts/create_search.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parallel-Reasoning/APR/HEAD/src/data/scripts/create_search.sh -------------------------------------------------------------------------------- /src/data/search.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parallel-Reasoning/APR/HEAD/src/data/search.py -------------------------------------------------------------------------------- /src/eval/eval_apr.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parallel-Reasoning/APR/HEAD/src/eval/eval_apr.py -------------------------------------------------------------------------------- /src/eval/eval_sosp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parallel-Reasoning/APR/HEAD/src/eval/eval_sosp.py -------------------------------------------------------------------------------- /src/eval/parallel_infernce_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parallel-Reasoning/APR/HEAD/src/eval/parallel_infernce_utils.py -------------------------------------------------------------------------------- /src/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parallel-Reasoning/APR/HEAD/src/utils.py -------------------------------------------------------------------------------- /supervised-jax/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parallel-Reasoning/APR/HEAD/supervised-jax/README.md -------------------------------------------------------------------------------- /supervised-jax/launcher.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parallel-Reasoning/APR/HEAD/supervised-jax/launcher.py -------------------------------------------------------------------------------- /supervised-jax/llama3_train/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parallel-Reasoning/APR/HEAD/supervised-jax/llama3_train/.gitignore -------------------------------------------------------------------------------- /supervised-jax/llama3_train/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parallel-Reasoning/APR/HEAD/supervised-jax/llama3_train/README.md -------------------------------------------------------------------------------- /supervised-jax/llama3_train/gpt2_train_script.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parallel-Reasoning/APR/HEAD/supervised-jax/llama3_train/gpt2_train_script.py -------------------------------------------------------------------------------- /supervised-jax/llama3_train/llama_train/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /supervised-jax/llama3_train/llama_train/gpt2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parallel-Reasoning/APR/HEAD/supervised-jax/llama3_train/llama_train/gpt2.py -------------------------------------------------------------------------------- /supervised-jax/llama3_train/llama_train/llama3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parallel-Reasoning/APR/HEAD/supervised-jax/llama3_train/llama_train/llama3.py -------------------------------------------------------------------------------- /supervised-jax/llama3_train/llama_train/optimizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parallel-Reasoning/APR/HEAD/supervised-jax/llama3_train/llama_train/optimizer.py -------------------------------------------------------------------------------- /supervised-jax/llama3_train/llama_train/serve.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parallel-Reasoning/APR/HEAD/supervised-jax/llama3_train/llama_train/serve.py -------------------------------------------------------------------------------- /supervised-jax/llama3_train/llama_train/splash.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parallel-Reasoning/APR/HEAD/supervised-jax/llama3_train/llama_train/splash.py -------------------------------------------------------------------------------- /supervised-jax/llama3_train/llama_train/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parallel-Reasoning/APR/HEAD/supervised-jax/llama3_train/llama_train/utils.py -------------------------------------------------------------------------------- /supervised-jax/llama3_train/llama_train_script.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parallel-Reasoning/APR/HEAD/supervised-jax/llama3_train/llama_train_script.py -------------------------------------------------------------------------------- /supervised-jax/llama3_train/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parallel-Reasoning/APR/HEAD/supervised-jax/llama3_train/requirements.txt -------------------------------------------------------------------------------- /supervised-jax/llama3_train/setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parallel-Reasoning/APR/HEAD/supervised-jax/llama3_train/setup.py -------------------------------------------------------------------------------- /supervised-jax/scripts/hs-v3.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parallel-Reasoning/APR/HEAD/supervised-jax/scripts/hs-v3.sh -------------------------------------------------------------------------------- /supervised-jax/scripts/hsp-v3.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parallel-Reasoning/APR/HEAD/supervised-jax/scripts/hsp-v3.sh -------------------------------------------------------------------------------- /supervised-jax/scripts/sos-v3.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parallel-Reasoning/APR/HEAD/supervised-jax/scripts/sos-v3.sh -------------------------------------------------------------------------------- /supervised-jax/scripts/training_hs-v2_llama.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parallel-Reasoning/APR/HEAD/supervised-jax/scripts/training_hs-v2_llama.sh -------------------------------------------------------------------------------- /supervised-jax/scripts/training_hsp-v2_llama.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parallel-Reasoning/APR/HEAD/supervised-jax/scripts/training_hsp-v2_llama.sh -------------------------------------------------------------------------------- /supervised-jax/scripts/training_sos_llama.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parallel-Reasoning/APR/HEAD/supervised-jax/scripts/training_sos_llama.sh -------------------------------------------------------------------------------- /supervised-jax/scripts/training_sos_llama_1ep.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parallel-Reasoning/APR/HEAD/supervised-jax/scripts/training_sos_llama_1ep.sh -------------------------------------------------------------------------------- /supervised-jax/scripts/training_sos_llama_gpt2tok.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parallel-Reasoning/APR/HEAD/supervised-jax/scripts/training_sos_llama_gpt2tok.sh -------------------------------------------------------------------------------- /supervised-jax/scripts/training_sos_llama_gpt2tok_1ep.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parallel-Reasoning/APR/HEAD/supervised-jax/scripts/training_sos_llama_gpt2tok_1ep.sh -------------------------------------------------------------------------------- /supervised-jax/scripts/training_sos_xiuyu.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parallel-Reasoning/APR/HEAD/supervised-jax/scripts/training_sos_xiuyu.sh -------------------------------------------------------------------------------- /supervised-jax/to_dataset.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parallel-Reasoning/APR/HEAD/supervised-jax/to_dataset.ipynb -------------------------------------------------------------------------------- /supervised-jax/training_hsp.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parallel-Reasoning/APR/HEAD/supervised-jax/training_hsp.sh -------------------------------------------------------------------------------- /supervised-jax/training_hsp_2x.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parallel-Reasoning/APR/HEAD/supervised-jax/training_hsp_2x.sh -------------------------------------------------------------------------------- /supervised-jax/training_run.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parallel-Reasoning/APR/HEAD/supervised-jax/training_run.sh -------------------------------------------------------------------------------- /supervised-jax/training_run_test.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parallel-Reasoning/APR/HEAD/supervised-jax/training_run_test.sh -------------------------------------------------------------------------------- /tinyrl/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parallel-Reasoning/APR/HEAD/tinyrl/README.md -------------------------------------------------------------------------------- /tinyrl/configs/apr.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parallel-Reasoning/APR/HEAD/tinyrl/configs/apr.yaml -------------------------------------------------------------------------------- /tinyrl/configs/apr_cond10.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parallel-Reasoning/APR/HEAD/tinyrl/configs/apr_cond10.yaml -------------------------------------------------------------------------------- /tinyrl/configs/sosp.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parallel-Reasoning/APR/HEAD/tinyrl/configs/sosp.yaml -------------------------------------------------------------------------------- /tinyrl/data/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tinyrl/requirements.txt: -------------------------------------------------------------------------------- 1 | torch 2 | sglang 3 | wandb 4 | hydra-core 5 | termcolor 6 | -------------------------------------------------------------------------------- /tinyrl/rollout/apr_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parallel-Reasoning/APR/HEAD/tinyrl/rollout/apr_utils.py -------------------------------------------------------------------------------- /tinyrl/rollout/sos_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parallel-Reasoning/APR/HEAD/tinyrl/rollout/sos_utils.py -------------------------------------------------------------------------------- /tinyrl/trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parallel-Reasoning/APR/HEAD/tinyrl/trainer.py -------------------------------------------------------------------------------- /tinyrl/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Parallel-Reasoning/APR/HEAD/tinyrl/utils.py --------------------------------------------------------------------------------