├── .gitignore ├── .python-version ├── LICENSE ├── README.md ├── README_euler.md ├── README_judges.md ├── configs ├── agent_scaffolds │ └── selfcheck.yaml ├── competitions │ ├── aime │ │ ├── aime_2024_I.yaml │ │ ├── aime_2024_II.yaml │ │ └── aime_2025.yaml │ ├── apex │ │ └── apex_2025.yaml │ ├── brumo │ │ └── brumo_2025.yaml │ ├── cmimc │ │ └── cmimc_2025.yaml │ ├── euler │ │ └── euler.yaml │ ├── hmmt │ │ ├── hmmt_feb_2024.yaml │ │ └── hmmt_feb_2025.yaml │ ├── imc │ │ └── imc_2025.yaml │ ├── imo │ │ └── imo_2025.yaml │ ├── kangaroo │ │ ├── kangaroo_2025_1-2.yaml │ │ ├── kangaroo_2025_11-12.yaml │ │ ├── kangaroo_2025_3-4.yaml │ │ ├── kangaroo_2025_5-6.yaml │ │ ├── kangaroo_2025_7-8.yaml │ │ └── kangaroo_2025_9-10.yaml │ ├── smt │ │ └── smt_2025.yaml │ └── usamo │ │ ├── usamo_2024.yaml │ │ └── usamo_2025.yaml └── models │ ├── anthropic │ ├── claude-35.yaml │ ├── claude-37.yaml │ ├── claude-opus-4.yaml │ ├── claude-sonnet-40.yaml │ └── claude-sonnet-45.yaml │ ├── deepseek │ ├── deepseek_distill_1.5b.yaml │ ├── deepseek_distill_14b.yaml │ ├── deepseek_distill_32b.yaml │ ├── deepseek_distill_70b.yaml │ ├── deepseek_r1.yaml │ ├── deepseek_r1_0528.yaml │ ├── deepseek_v3.yaml │ ├── deepseek_v31.yaml │ ├── deepseek_v32.yaml │ └── deepseek_v3_03_24.yaml │ ├── exclude.txt │ ├── gemini │ ├── deep-think.yaml │ ├── gemini-flash-2.0.yaml │ ├── gemini-flash-2.5.yaml │ ├── gemini-flash-thinking-2.0.yaml │ ├── gemini-pro-2.0.yaml │ ├── gemini-pro-2.5-05-06.yaml │ ├── gemini-pro-2.5-agent.yaml │ ├── gemini-pro-2.5-best-of-32.yaml │ └── gemini-pro-2.5.yaml │ ├── glm │ ├── glm-45-air.yaml │ ├── glm-45.yaml │ ├── glm-45v.yaml │ └── glm-46.yaml │ ├── llama4 │ └── maverick.yaml │ ├── moonshot │ └── k2.yaml │ ├── openai │ ├── gpt-4o.yaml │ ├── gpt-5-agent.yaml │ ├── gpt-5-mini.yaml │ ├── gpt-5-nano-agent.yaml │ ├── gpt-5-nano.yaml │ ├── gpt-5.yaml │ ├── o1-pro.yaml │ ├── o1.yaml │ ├── o3-mini--low.yaml │ ├── o3-mini--medium.yaml │ ├── o3-mini.yaml │ ├── o3.yaml │ ├── o4-mini--high.yaml │ ├── o4-mini--low.yaml │ ├── o4-mini--medium.yaml │ ├── oss-120b.yaml │ └── oss-20b.yaml │ ├── other │ ├── k2-think.yaml │ ├── limo.yaml │ ├── open-thoughts.yaml │ └── s1.yaml │ ├── phi4 │ ├── .ipynb_checkpoints │ │ └── 14b-reasoning-plus-checkpoint.yaml │ └── 14b-reasoning-plus.yaml │ ├── qwen │ ├── qwen3_235b_a22b.yaml │ ├── qwen3_235b_a22b_2507.yaml │ ├── qwen3_30b_a3b.yaml │ ├── qwen3_vl_235b_a22b_instruct.yaml │ ├── qwen3_vl_235b_a22b_thinking.yaml │ ├── qwq-preview.yaml │ └── qwq.yaml │ └── xai │ ├── grok-3-mini-high.yaml │ ├── grok-3-mini-low.yaml │ ├── grok-4-fast-reasoning.yaml │ ├── grok-4-fast-selfcheckagent.yaml │ ├── grok-4-new.yaml │ ├── grok-4.yaml │ └── grok.yaml ├── images ├── judge_overview.png └── matharena_icon.png ├── pyproject.toml ├── scripts ├── app.py ├── curation │ ├── check_latex.py │ ├── judge_parser.py │ ├── nuke_problems.py │ ├── test_parser_changes.py │ ├── upload_competition.py │ └── upload_outputs.py ├── euler │ └── project_euler_submit.py ├── extraction │ ├── comparison.py │ ├── human_score_data.py │ ├── imo_table.py │ ├── leaderboard.py │ ├── rank_correlation.py │ ├── tokens_table.py │ └── type_scoring.py ├── nuke_single_run.py ├── regrade.py ├── run.py ├── run_all_fa.sh └── run_all_kangaroo.sh ├── src └── matharena │ ├── __init__.py │ ├── api_client.py │ ├── code_execution.py │ ├── configs.py │ ├── grader.py │ ├── parse_manual.py │ ├── parser.py │ ├── request_logger.py │ ├── runner.py │ ├── runs.py │ ├── solvers │ ├── __init__.py │ ├── agent_pool.py │ ├── base_agent.py │ ├── base_solver.py │ ├── pure_model_solver.py │ ├── selfcheck_agent.py │ └── solver_response.py │ └── utils.py └── tests └── test_code_execution.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/.gitignore -------------------------------------------------------------------------------- /.python-version: -------------------------------------------------------------------------------- 1 | 3.12 2 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/README.md -------------------------------------------------------------------------------- /README_euler.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/README_euler.md -------------------------------------------------------------------------------- /README_judges.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/README_judges.md -------------------------------------------------------------------------------- /configs/agent_scaffolds/selfcheck.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/configs/agent_scaffolds/selfcheck.yaml -------------------------------------------------------------------------------- /configs/competitions/aime/aime_2024_I.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/configs/competitions/aime/aime_2024_I.yaml -------------------------------------------------------------------------------- /configs/competitions/aime/aime_2024_II.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/configs/competitions/aime/aime_2024_II.yaml -------------------------------------------------------------------------------- /configs/competitions/aime/aime_2025.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/configs/competitions/aime/aime_2025.yaml -------------------------------------------------------------------------------- /configs/competitions/apex/apex_2025.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/configs/competitions/apex/apex_2025.yaml -------------------------------------------------------------------------------- /configs/competitions/brumo/brumo_2025.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/configs/competitions/brumo/brumo_2025.yaml -------------------------------------------------------------------------------- /configs/competitions/cmimc/cmimc_2025.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/configs/competitions/cmimc/cmimc_2025.yaml -------------------------------------------------------------------------------- /configs/competitions/euler/euler.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/configs/competitions/euler/euler.yaml -------------------------------------------------------------------------------- /configs/competitions/hmmt/hmmt_feb_2024.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/configs/competitions/hmmt/hmmt_feb_2024.yaml -------------------------------------------------------------------------------- /configs/competitions/hmmt/hmmt_feb_2025.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/configs/competitions/hmmt/hmmt_feb_2025.yaml -------------------------------------------------------------------------------- /configs/competitions/imc/imc_2025.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/configs/competitions/imc/imc_2025.yaml -------------------------------------------------------------------------------- /configs/competitions/imo/imo_2025.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/configs/competitions/imo/imo_2025.yaml -------------------------------------------------------------------------------- /configs/competitions/kangaroo/kangaroo_2025_1-2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/configs/competitions/kangaroo/kangaroo_2025_1-2.yaml -------------------------------------------------------------------------------- /configs/competitions/kangaroo/kangaroo_2025_11-12.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/configs/competitions/kangaroo/kangaroo_2025_11-12.yaml -------------------------------------------------------------------------------- /configs/competitions/kangaroo/kangaroo_2025_3-4.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/configs/competitions/kangaroo/kangaroo_2025_3-4.yaml -------------------------------------------------------------------------------- /configs/competitions/kangaroo/kangaroo_2025_5-6.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/configs/competitions/kangaroo/kangaroo_2025_5-6.yaml -------------------------------------------------------------------------------- /configs/competitions/kangaroo/kangaroo_2025_7-8.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/configs/competitions/kangaroo/kangaroo_2025_7-8.yaml -------------------------------------------------------------------------------- /configs/competitions/kangaroo/kangaroo_2025_9-10.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/configs/competitions/kangaroo/kangaroo_2025_9-10.yaml -------------------------------------------------------------------------------- /configs/competitions/smt/smt_2025.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/configs/competitions/smt/smt_2025.yaml -------------------------------------------------------------------------------- /configs/competitions/usamo/usamo_2024.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/configs/competitions/usamo/usamo_2024.yaml -------------------------------------------------------------------------------- /configs/competitions/usamo/usamo_2025.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/configs/competitions/usamo/usamo_2025.yaml -------------------------------------------------------------------------------- /configs/models/anthropic/claude-35.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/configs/models/anthropic/claude-35.yaml -------------------------------------------------------------------------------- /configs/models/anthropic/claude-37.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/configs/models/anthropic/claude-37.yaml -------------------------------------------------------------------------------- /configs/models/anthropic/claude-opus-4.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/configs/models/anthropic/claude-opus-4.yaml -------------------------------------------------------------------------------- /configs/models/anthropic/claude-sonnet-40.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/configs/models/anthropic/claude-sonnet-40.yaml -------------------------------------------------------------------------------- /configs/models/anthropic/claude-sonnet-45.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/configs/models/anthropic/claude-sonnet-45.yaml -------------------------------------------------------------------------------- /configs/models/deepseek/deepseek_distill_1.5b.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/configs/models/deepseek/deepseek_distill_1.5b.yaml -------------------------------------------------------------------------------- /configs/models/deepseek/deepseek_distill_14b.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/configs/models/deepseek/deepseek_distill_14b.yaml -------------------------------------------------------------------------------- /configs/models/deepseek/deepseek_distill_32b.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/configs/models/deepseek/deepseek_distill_32b.yaml -------------------------------------------------------------------------------- /configs/models/deepseek/deepseek_distill_70b.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/configs/models/deepseek/deepseek_distill_70b.yaml -------------------------------------------------------------------------------- /configs/models/deepseek/deepseek_r1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/configs/models/deepseek/deepseek_r1.yaml -------------------------------------------------------------------------------- /configs/models/deepseek/deepseek_r1_0528.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/configs/models/deepseek/deepseek_r1_0528.yaml -------------------------------------------------------------------------------- /configs/models/deepseek/deepseek_v3.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/configs/models/deepseek/deepseek_v3.yaml -------------------------------------------------------------------------------- /configs/models/deepseek/deepseek_v31.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/configs/models/deepseek/deepseek_v31.yaml -------------------------------------------------------------------------------- /configs/models/deepseek/deepseek_v32.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/configs/models/deepseek/deepseek_v32.yaml -------------------------------------------------------------------------------- /configs/models/deepseek/deepseek_v3_03_24.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/configs/models/deepseek/deepseek_v3_03_24.yaml -------------------------------------------------------------------------------- /configs/models/exclude.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/configs/models/exclude.txt -------------------------------------------------------------------------------- /configs/models/gemini/deep-think.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/configs/models/gemini/deep-think.yaml -------------------------------------------------------------------------------- /configs/models/gemini/gemini-flash-2.0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/configs/models/gemini/gemini-flash-2.0.yaml -------------------------------------------------------------------------------- /configs/models/gemini/gemini-flash-2.5.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/configs/models/gemini/gemini-flash-2.5.yaml -------------------------------------------------------------------------------- /configs/models/gemini/gemini-flash-thinking-2.0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/configs/models/gemini/gemini-flash-thinking-2.0.yaml -------------------------------------------------------------------------------- /configs/models/gemini/gemini-pro-2.0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/configs/models/gemini/gemini-pro-2.0.yaml -------------------------------------------------------------------------------- /configs/models/gemini/gemini-pro-2.5-05-06.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/configs/models/gemini/gemini-pro-2.5-05-06.yaml -------------------------------------------------------------------------------- /configs/models/gemini/gemini-pro-2.5-agent.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/configs/models/gemini/gemini-pro-2.5-agent.yaml -------------------------------------------------------------------------------- /configs/models/gemini/gemini-pro-2.5-best-of-32.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/configs/models/gemini/gemini-pro-2.5-best-of-32.yaml -------------------------------------------------------------------------------- /configs/models/gemini/gemini-pro-2.5.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/configs/models/gemini/gemini-pro-2.5.yaml -------------------------------------------------------------------------------- /configs/models/glm/glm-45-air.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/configs/models/glm/glm-45-air.yaml -------------------------------------------------------------------------------- /configs/models/glm/glm-45.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/configs/models/glm/glm-45.yaml -------------------------------------------------------------------------------- /configs/models/glm/glm-45v.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/configs/models/glm/glm-45v.yaml -------------------------------------------------------------------------------- /configs/models/glm/glm-46.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/configs/models/glm/glm-46.yaml -------------------------------------------------------------------------------- /configs/models/llama4/maverick.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/configs/models/llama4/maverick.yaml -------------------------------------------------------------------------------- /configs/models/moonshot/k2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/configs/models/moonshot/k2.yaml -------------------------------------------------------------------------------- /configs/models/openai/gpt-4o.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/configs/models/openai/gpt-4o.yaml -------------------------------------------------------------------------------- /configs/models/openai/gpt-5-agent.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/configs/models/openai/gpt-5-agent.yaml -------------------------------------------------------------------------------- /configs/models/openai/gpt-5-mini.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/configs/models/openai/gpt-5-mini.yaml -------------------------------------------------------------------------------- /configs/models/openai/gpt-5-nano-agent.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/configs/models/openai/gpt-5-nano-agent.yaml -------------------------------------------------------------------------------- /configs/models/openai/gpt-5-nano.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/configs/models/openai/gpt-5-nano.yaml -------------------------------------------------------------------------------- /configs/models/openai/gpt-5.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/configs/models/openai/gpt-5.yaml -------------------------------------------------------------------------------- /configs/models/openai/o1-pro.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/configs/models/openai/o1-pro.yaml -------------------------------------------------------------------------------- /configs/models/openai/o1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/configs/models/openai/o1.yaml -------------------------------------------------------------------------------- /configs/models/openai/o3-mini--low.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/configs/models/openai/o3-mini--low.yaml -------------------------------------------------------------------------------- /configs/models/openai/o3-mini--medium.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/configs/models/openai/o3-mini--medium.yaml -------------------------------------------------------------------------------- /configs/models/openai/o3-mini.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/configs/models/openai/o3-mini.yaml -------------------------------------------------------------------------------- /configs/models/openai/o3.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/configs/models/openai/o3.yaml -------------------------------------------------------------------------------- /configs/models/openai/o4-mini--high.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/configs/models/openai/o4-mini--high.yaml -------------------------------------------------------------------------------- /configs/models/openai/o4-mini--low.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/configs/models/openai/o4-mini--low.yaml -------------------------------------------------------------------------------- /configs/models/openai/o4-mini--medium.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/configs/models/openai/o4-mini--medium.yaml -------------------------------------------------------------------------------- /configs/models/openai/oss-120b.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/configs/models/openai/oss-120b.yaml -------------------------------------------------------------------------------- /configs/models/openai/oss-20b.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/configs/models/openai/oss-20b.yaml -------------------------------------------------------------------------------- /configs/models/other/k2-think.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/configs/models/other/k2-think.yaml -------------------------------------------------------------------------------- /configs/models/other/limo.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/configs/models/other/limo.yaml -------------------------------------------------------------------------------- /configs/models/other/open-thoughts.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/configs/models/other/open-thoughts.yaml -------------------------------------------------------------------------------- /configs/models/other/s1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/configs/models/other/s1.yaml -------------------------------------------------------------------------------- /configs/models/phi4/.ipynb_checkpoints/14b-reasoning-plus-checkpoint.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/configs/models/phi4/.ipynb_checkpoints/14b-reasoning-plus-checkpoint.yaml -------------------------------------------------------------------------------- /configs/models/phi4/14b-reasoning-plus.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/configs/models/phi4/14b-reasoning-plus.yaml -------------------------------------------------------------------------------- /configs/models/qwen/qwen3_235b_a22b.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/configs/models/qwen/qwen3_235b_a22b.yaml -------------------------------------------------------------------------------- /configs/models/qwen/qwen3_235b_a22b_2507.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/configs/models/qwen/qwen3_235b_a22b_2507.yaml -------------------------------------------------------------------------------- /configs/models/qwen/qwen3_30b_a3b.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/configs/models/qwen/qwen3_30b_a3b.yaml -------------------------------------------------------------------------------- /configs/models/qwen/qwen3_vl_235b_a22b_instruct.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/configs/models/qwen/qwen3_vl_235b_a22b_instruct.yaml -------------------------------------------------------------------------------- /configs/models/qwen/qwen3_vl_235b_a22b_thinking.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/configs/models/qwen/qwen3_vl_235b_a22b_thinking.yaml -------------------------------------------------------------------------------- /configs/models/qwen/qwq-preview.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/configs/models/qwen/qwq-preview.yaml -------------------------------------------------------------------------------- /configs/models/qwen/qwq.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/configs/models/qwen/qwq.yaml -------------------------------------------------------------------------------- /configs/models/xai/grok-3-mini-high.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/configs/models/xai/grok-3-mini-high.yaml -------------------------------------------------------------------------------- /configs/models/xai/grok-3-mini-low.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/configs/models/xai/grok-3-mini-low.yaml -------------------------------------------------------------------------------- /configs/models/xai/grok-4-fast-reasoning.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/configs/models/xai/grok-4-fast-reasoning.yaml -------------------------------------------------------------------------------- /configs/models/xai/grok-4-fast-selfcheckagent.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/configs/models/xai/grok-4-fast-selfcheckagent.yaml -------------------------------------------------------------------------------- /configs/models/xai/grok-4-new.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/configs/models/xai/grok-4-new.yaml -------------------------------------------------------------------------------- /configs/models/xai/grok-4.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/configs/models/xai/grok-4.yaml -------------------------------------------------------------------------------- /configs/models/xai/grok.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/configs/models/xai/grok.yaml -------------------------------------------------------------------------------- /images/judge_overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/images/judge_overview.png -------------------------------------------------------------------------------- /images/matharena_icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/images/matharena_icon.png -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/pyproject.toml -------------------------------------------------------------------------------- /scripts/app.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/scripts/app.py -------------------------------------------------------------------------------- /scripts/curation/check_latex.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/scripts/curation/check_latex.py -------------------------------------------------------------------------------- /scripts/curation/judge_parser.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/scripts/curation/judge_parser.py -------------------------------------------------------------------------------- /scripts/curation/nuke_problems.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/scripts/curation/nuke_problems.py -------------------------------------------------------------------------------- /scripts/curation/test_parser_changes.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/scripts/curation/test_parser_changes.py -------------------------------------------------------------------------------- /scripts/curation/upload_competition.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/scripts/curation/upload_competition.py -------------------------------------------------------------------------------- /scripts/curation/upload_outputs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/scripts/curation/upload_outputs.py -------------------------------------------------------------------------------- /scripts/euler/project_euler_submit.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/scripts/euler/project_euler_submit.py -------------------------------------------------------------------------------- /scripts/extraction/comparison.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/scripts/extraction/comparison.py -------------------------------------------------------------------------------- /scripts/extraction/human_score_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/scripts/extraction/human_score_data.py -------------------------------------------------------------------------------- /scripts/extraction/imo_table.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/scripts/extraction/imo_table.py -------------------------------------------------------------------------------- /scripts/extraction/leaderboard.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/scripts/extraction/leaderboard.py -------------------------------------------------------------------------------- /scripts/extraction/rank_correlation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/scripts/extraction/rank_correlation.py -------------------------------------------------------------------------------- /scripts/extraction/tokens_table.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/scripts/extraction/tokens_table.py -------------------------------------------------------------------------------- /scripts/extraction/type_scoring.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/scripts/extraction/type_scoring.py -------------------------------------------------------------------------------- /scripts/nuke_single_run.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/scripts/nuke_single_run.py -------------------------------------------------------------------------------- /scripts/regrade.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/scripts/regrade.py -------------------------------------------------------------------------------- /scripts/run.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/scripts/run.py -------------------------------------------------------------------------------- /scripts/run_all_fa.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/scripts/run_all_fa.sh -------------------------------------------------------------------------------- /scripts/run_all_kangaroo.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/scripts/run_all_kangaroo.sh -------------------------------------------------------------------------------- /src/matharena/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/src/matharena/__init__.py -------------------------------------------------------------------------------- /src/matharena/api_client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/src/matharena/api_client.py -------------------------------------------------------------------------------- /src/matharena/code_execution.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/src/matharena/code_execution.py -------------------------------------------------------------------------------- /src/matharena/configs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/src/matharena/configs.py -------------------------------------------------------------------------------- /src/matharena/grader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/src/matharena/grader.py -------------------------------------------------------------------------------- /src/matharena/parse_manual.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/src/matharena/parse_manual.py -------------------------------------------------------------------------------- /src/matharena/parser.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/src/matharena/parser.py -------------------------------------------------------------------------------- /src/matharena/request_logger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/src/matharena/request_logger.py -------------------------------------------------------------------------------- /src/matharena/runner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/src/matharena/runner.py -------------------------------------------------------------------------------- /src/matharena/runs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/src/matharena/runs.py -------------------------------------------------------------------------------- /src/matharena/solvers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/src/matharena/solvers/__init__.py -------------------------------------------------------------------------------- /src/matharena/solvers/agent_pool.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/src/matharena/solvers/agent_pool.py -------------------------------------------------------------------------------- /src/matharena/solvers/base_agent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/src/matharena/solvers/base_agent.py -------------------------------------------------------------------------------- /src/matharena/solvers/base_solver.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/src/matharena/solvers/base_solver.py -------------------------------------------------------------------------------- /src/matharena/solvers/pure_model_solver.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/src/matharena/solvers/pure_model_solver.py -------------------------------------------------------------------------------- /src/matharena/solvers/selfcheck_agent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/src/matharena/solvers/selfcheck_agent.py -------------------------------------------------------------------------------- /src/matharena/solvers/solver_response.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/src/matharena/solvers/solver_response.py -------------------------------------------------------------------------------- /src/matharena/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/src/matharena/utils.py -------------------------------------------------------------------------------- /tests/test_code_execution.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eth-sri/matharena/HEAD/tests/test_code_execution.py --------------------------------------------------------------------------------