├── README.md ├── environment.yml ├── llm_experiments ├── constants.py ├── data │ └── MATH500.json ├── eval_alpaca.py ├── eval_gpqa.py ├── eval_he.py ├── eval_math.py ├── grader_utils │ ├── gpqa_grader.py │ ├── he_check.py │ ├── he_execute.py │ ├── he_grader.py │ ├── math_grader.py │ ├── math_normalize.py │ └── parse_utils.py ├── passk_gpqa.py ├── passk_he.py ├── passk_math.py ├── power_samp_alpaca.py ├── power_samp_gpqa.py ├── power_samp_he.py ├── power_samp_math.py ├── power_samp_utils.py └── scripts │ ├── power_samp_alpaca.sh │ ├── power_samp_gpqa.sh │ ├── power_samp_he.sh │ └── power_samp_math.sh ├── teaser.png └── toy_composition.py /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aakaran/reasoning-with-sampling/HEAD/README.md -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aakaran/reasoning-with-sampling/HEAD/environment.yml -------------------------------------------------------------------------------- /llm_experiments/constants.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aakaran/reasoning-with-sampling/HEAD/llm_experiments/constants.py -------------------------------------------------------------------------------- /llm_experiments/data/MATH500.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aakaran/reasoning-with-sampling/HEAD/llm_experiments/data/MATH500.json -------------------------------------------------------------------------------- /llm_experiments/eval_alpaca.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aakaran/reasoning-with-sampling/HEAD/llm_experiments/eval_alpaca.py -------------------------------------------------------------------------------- /llm_experiments/eval_gpqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aakaran/reasoning-with-sampling/HEAD/llm_experiments/eval_gpqa.py -------------------------------------------------------------------------------- /llm_experiments/eval_he.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aakaran/reasoning-with-sampling/HEAD/llm_experiments/eval_he.py -------------------------------------------------------------------------------- /llm_experiments/eval_math.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aakaran/reasoning-with-sampling/HEAD/llm_experiments/eval_math.py -------------------------------------------------------------------------------- /llm_experiments/grader_utils/gpqa_grader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aakaran/reasoning-with-sampling/HEAD/llm_experiments/grader_utils/gpqa_grader.py -------------------------------------------------------------------------------- /llm_experiments/grader_utils/he_check.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aakaran/reasoning-with-sampling/HEAD/llm_experiments/grader_utils/he_check.py -------------------------------------------------------------------------------- /llm_experiments/grader_utils/he_execute.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aakaran/reasoning-with-sampling/HEAD/llm_experiments/grader_utils/he_execute.py -------------------------------------------------------------------------------- /llm_experiments/grader_utils/he_grader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aakaran/reasoning-with-sampling/HEAD/llm_experiments/grader_utils/he_grader.py -------------------------------------------------------------------------------- /llm_experiments/grader_utils/math_grader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aakaran/reasoning-with-sampling/HEAD/llm_experiments/grader_utils/math_grader.py -------------------------------------------------------------------------------- /llm_experiments/grader_utils/math_normalize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aakaran/reasoning-with-sampling/HEAD/llm_experiments/grader_utils/math_normalize.py -------------------------------------------------------------------------------- /llm_experiments/grader_utils/parse_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aakaran/reasoning-with-sampling/HEAD/llm_experiments/grader_utils/parse_utils.py -------------------------------------------------------------------------------- /llm_experiments/passk_gpqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aakaran/reasoning-with-sampling/HEAD/llm_experiments/passk_gpqa.py -------------------------------------------------------------------------------- /llm_experiments/passk_he.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aakaran/reasoning-with-sampling/HEAD/llm_experiments/passk_he.py -------------------------------------------------------------------------------- /llm_experiments/passk_math.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aakaran/reasoning-with-sampling/HEAD/llm_experiments/passk_math.py -------------------------------------------------------------------------------- /llm_experiments/power_samp_alpaca.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aakaran/reasoning-with-sampling/HEAD/llm_experiments/power_samp_alpaca.py -------------------------------------------------------------------------------- /llm_experiments/power_samp_gpqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aakaran/reasoning-with-sampling/HEAD/llm_experiments/power_samp_gpqa.py -------------------------------------------------------------------------------- /llm_experiments/power_samp_he.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aakaran/reasoning-with-sampling/HEAD/llm_experiments/power_samp_he.py -------------------------------------------------------------------------------- /llm_experiments/power_samp_math.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aakaran/reasoning-with-sampling/HEAD/llm_experiments/power_samp_math.py -------------------------------------------------------------------------------- /llm_experiments/power_samp_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aakaran/reasoning-with-sampling/HEAD/llm_experiments/power_samp_utils.py -------------------------------------------------------------------------------- /llm_experiments/scripts/power_samp_alpaca.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aakaran/reasoning-with-sampling/HEAD/llm_experiments/scripts/power_samp_alpaca.sh -------------------------------------------------------------------------------- /llm_experiments/scripts/power_samp_gpqa.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aakaran/reasoning-with-sampling/HEAD/llm_experiments/scripts/power_samp_gpqa.sh -------------------------------------------------------------------------------- /llm_experiments/scripts/power_samp_he.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aakaran/reasoning-with-sampling/HEAD/llm_experiments/scripts/power_samp_he.sh -------------------------------------------------------------------------------- /llm_experiments/scripts/power_samp_math.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aakaran/reasoning-with-sampling/HEAD/llm_experiments/scripts/power_samp_math.sh -------------------------------------------------------------------------------- /teaser.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aakaran/reasoning-with-sampling/HEAD/teaser.png -------------------------------------------------------------------------------- /toy_composition.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aakaran/reasoning-with-sampling/HEAD/toy_composition.py --------------------------------------------------------------------------------