├── .gitignore ├── LICENSE ├── README.md ├── assets ├── compute_matched_illustration.png └── teaser.png ├── environment.yml └── llmonk ├── evaluate ├── configs │ ├── README.md │ ├── llama70b_gpqa.yaml │ ├── llama70b_math128.yaml │ ├── llama8b_math128.yaml │ ├── qwen7b_math128.yaml │ └── qwq32b_aime25.yaml ├── eval_utils.py ├── math_datasets.py ├── math_pass_k.py └── plot_success_rate.py ├── generate ├── README.md ├── generate_solutions.py ├── prompts.py ├── vllm_server.py └── vllm_utils.py ├── utils.py └── verify ├── README.md ├── demo.ipynb ├── generate_verifications.py └── prompts ├── QwQ32B └── genrm_base.txt ├── gpqa ├── two_shot.txt └── zero_shot.txt ├── llama3.1_8b_instruct ├── finetuned.txt ├── genrm_base.txt └── genrm_base_zero_shot.txt ├── llama3.3_70b └── genrm_base.txt ├── qwen2.5_7b_instruct └── finetuned.txt └── training_data_gpt4o.txt /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nishadsinghi/sc-genrm-scaling/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nishadsinghi/sc-genrm-scaling/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nishadsinghi/sc-genrm-scaling/HEAD/README.md -------------------------------------------------------------------------------- /assets/compute_matched_illustration.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nishadsinghi/sc-genrm-scaling/HEAD/assets/compute_matched_illustration.png -------------------------------------------------------------------------------- /assets/teaser.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nishadsinghi/sc-genrm-scaling/HEAD/assets/teaser.png -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nishadsinghi/sc-genrm-scaling/HEAD/environment.yml -------------------------------------------------------------------------------- /llmonk/evaluate/configs/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nishadsinghi/sc-genrm-scaling/HEAD/llmonk/evaluate/configs/README.md -------------------------------------------------------------------------------- /llmonk/evaluate/configs/llama70b_gpqa.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nishadsinghi/sc-genrm-scaling/HEAD/llmonk/evaluate/configs/llama70b_gpqa.yaml -------------------------------------------------------------------------------- /llmonk/evaluate/configs/llama70b_math128.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nishadsinghi/sc-genrm-scaling/HEAD/llmonk/evaluate/configs/llama70b_math128.yaml -------------------------------------------------------------------------------- /llmonk/evaluate/configs/llama8b_math128.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nishadsinghi/sc-genrm-scaling/HEAD/llmonk/evaluate/configs/llama8b_math128.yaml -------------------------------------------------------------------------------- /llmonk/evaluate/configs/qwen7b_math128.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nishadsinghi/sc-genrm-scaling/HEAD/llmonk/evaluate/configs/qwen7b_math128.yaml -------------------------------------------------------------------------------- /llmonk/evaluate/configs/qwq32b_aime25.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nishadsinghi/sc-genrm-scaling/HEAD/llmonk/evaluate/configs/qwq32b_aime25.yaml -------------------------------------------------------------------------------- /llmonk/evaluate/eval_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nishadsinghi/sc-genrm-scaling/HEAD/llmonk/evaluate/eval_utils.py -------------------------------------------------------------------------------- /llmonk/evaluate/math_datasets.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nishadsinghi/sc-genrm-scaling/HEAD/llmonk/evaluate/math_datasets.py -------------------------------------------------------------------------------- /llmonk/evaluate/math_pass_k.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nishadsinghi/sc-genrm-scaling/HEAD/llmonk/evaluate/math_pass_k.py -------------------------------------------------------------------------------- /llmonk/evaluate/plot_success_rate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nishadsinghi/sc-genrm-scaling/HEAD/llmonk/evaluate/plot_success_rate.py -------------------------------------------------------------------------------- /llmonk/generate/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nishadsinghi/sc-genrm-scaling/HEAD/llmonk/generate/README.md -------------------------------------------------------------------------------- /llmonk/generate/generate_solutions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nishadsinghi/sc-genrm-scaling/HEAD/llmonk/generate/generate_solutions.py -------------------------------------------------------------------------------- /llmonk/generate/prompts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nishadsinghi/sc-genrm-scaling/HEAD/llmonk/generate/prompts.py -------------------------------------------------------------------------------- /llmonk/generate/vllm_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nishadsinghi/sc-genrm-scaling/HEAD/llmonk/generate/vllm_server.py -------------------------------------------------------------------------------- /llmonk/generate/vllm_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nishadsinghi/sc-genrm-scaling/HEAD/llmonk/generate/vllm_utils.py -------------------------------------------------------------------------------- /llmonk/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nishadsinghi/sc-genrm-scaling/HEAD/llmonk/utils.py -------------------------------------------------------------------------------- /llmonk/verify/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nishadsinghi/sc-genrm-scaling/HEAD/llmonk/verify/README.md -------------------------------------------------------------------------------- /llmonk/verify/demo.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nishadsinghi/sc-genrm-scaling/HEAD/llmonk/verify/demo.ipynb -------------------------------------------------------------------------------- /llmonk/verify/generate_verifications.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nishadsinghi/sc-genrm-scaling/HEAD/llmonk/verify/generate_verifications.py -------------------------------------------------------------------------------- /llmonk/verify/prompts/QwQ32B/genrm_base.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nishadsinghi/sc-genrm-scaling/HEAD/llmonk/verify/prompts/QwQ32B/genrm_base.txt -------------------------------------------------------------------------------- /llmonk/verify/prompts/gpqa/two_shot.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nishadsinghi/sc-genrm-scaling/HEAD/llmonk/verify/prompts/gpqa/two_shot.txt -------------------------------------------------------------------------------- /llmonk/verify/prompts/gpqa/zero_shot.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nishadsinghi/sc-genrm-scaling/HEAD/llmonk/verify/prompts/gpqa/zero_shot.txt -------------------------------------------------------------------------------- /llmonk/verify/prompts/llama3.1_8b_instruct/finetuned.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nishadsinghi/sc-genrm-scaling/HEAD/llmonk/verify/prompts/llama3.1_8b_instruct/finetuned.txt -------------------------------------------------------------------------------- /llmonk/verify/prompts/llama3.1_8b_instruct/genrm_base.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nishadsinghi/sc-genrm-scaling/HEAD/llmonk/verify/prompts/llama3.1_8b_instruct/genrm_base.txt -------------------------------------------------------------------------------- /llmonk/verify/prompts/llama3.1_8b_instruct/genrm_base_zero_shot.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nishadsinghi/sc-genrm-scaling/HEAD/llmonk/verify/prompts/llama3.1_8b_instruct/genrm_base_zero_shot.txt -------------------------------------------------------------------------------- /llmonk/verify/prompts/llama3.3_70b/genrm_base.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nishadsinghi/sc-genrm-scaling/HEAD/llmonk/verify/prompts/llama3.3_70b/genrm_base.txt -------------------------------------------------------------------------------- /llmonk/verify/prompts/qwen2.5_7b_instruct/finetuned.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nishadsinghi/sc-genrm-scaling/HEAD/llmonk/verify/prompts/qwen2.5_7b_instruct/finetuned.txt -------------------------------------------------------------------------------- /llmonk/verify/prompts/training_data_gpt4o.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nishadsinghi/sc-genrm-scaling/HEAD/llmonk/verify/prompts/training_data_gpt4o.txt --------------------------------------------------------------------------------