├── .claude ├── agents │ └── lighteval-porter.md └── settings.json ├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.yml │ └── feature_request.yml ├── dependabot.yml ├── pull_request_template.md └── workflows │ ├── ci.yml │ ├── claude-code-review.yml │ ├── claude.yml │ ├── code-freeze-bypass.yaml │ ├── dependency-check.yml │ ├── publish-openbench-core.yml │ ├── release-please.yaml │ ├── stale.yaml │ ├── update-docs-benchmarks.yml │ └── update-docs-changelog.yml ├── .gitignore ├── .pre-commit-config.yaml ├── .python-version ├── .release-please-manifest.json ├── .vscode └── settings.json ├── CHANGELOG.md ├── CITATION.cff ├── CLAUDE.md ├── CODEOWNERS ├── CONTRIBUTING.md ├── LICENSE.md ├── MANIFEST.in ├── README.md ├── docs ├── benchmarks │ ├── catalog.mdx │ ├── coding.mdx │ ├── cybersecurity.mdx │ ├── knowledge.mdx │ ├── math.mdx │ ├── reasoning.mdx │ └── search.mdx ├── changelog.mdx ├── cli │ ├── cache.mdx │ ├── describe.mdx │ ├── eval-retry.mdx │ ├── eval.mdx │ ├── list.mdx │ ├── overview.mdx │ └── view.mdx ├── configuration.mdx ├── development │ ├── architecture.mdx │ ├── contributing.mdx │ ├── extending.mdx │ └── mcq.mdx ├── docs.json ├── evals │ ├── exercism.mdx │ ├── gpqa-diamond.mdx │ ├── graphwalks.mdx │ ├── livemcpbench.mdx │ ├── mmlu.mdx │ ├── scicode.mdx │ └── simpleqa.mdx ├── favicon.svg ├── fonts │ ├── .gitkeep │ ├── NeueRegrade-Variable.ttf │ ├── NeueRegrade-Variable.woff2 │ └── README.md ├── images │ └── bench_view_eval.png ├── index.mdx ├── installation.mdx ├── logo │ ├── dark.svg │ └── light.svg ├── providers.mdx ├── quickstart.mdx ├── release-notes.mdx ├── snippets │ └── benchmarks.data.mdx └── troubleshooting.mdx ├── packages └── openbench-core │ └── pyproject.toml ├── pyproject.toml ├── release-please-config.json ├── scripts ├── export_benchmarks.py ├── export_changelog.py ├── make_clockbench_data.py ├── run_sync_pyproject.sh └── sync_pyproject.py ├── src └── openbench │ ├── __init__.py │ ├── _cli │ ├── __init__.py │ ├── cache_command.py │ ├── describe_command.py │ ├── eval_command.py │ ├── eval_retry_command.py │ ├── export.py │ ├── export_command.py │ ├── list_command.py │ ├── utils.py │ └── view_command.py │ ├── _registry.py │ ├── agents │ ├── __init__.py │ ├── aider.py │ ├── base.py │ ├── claude.py │ ├── docker_manager.py │ ├── manager.py │ ├── opencode.py │ └── roo.py │ ├── config.py │ ├── datasets │ ├── __init__.py │ ├── arc_agi.py │ ├── boolq.py │ ├── browsecomp.py │ ├── chartqapro.py │ ├── clockbench.py │ ├── cosafe_m2s.py │ ├── deep_research_bench.py │ ├── detailbench.py │ ├── drop.py │ ├── exercism.py │ ├── factscore.py │ ├── gpt_oss │ │ ├── __init__.py │ │ └── aime.py │ ├── graphwalks.py │ ├── healthbench.py │ ├── hle.py │ ├── humaneval.py │ ├── ifbench.py │ ├── ifeval.py │ ├── jsonschemabench │ │ ├── __init__.py │ │ ├── jsonschemabench.py │ │ └── openai_compatible_ids.txt │ ├── livemcpbench.py │ ├── math.py │ ├── mathvista.py │ ├── mbpp.py │ ├── mgsm.py │ ├── mhj_m2s.py │ ├── mmmu.py │ ├── mmstar.py │ ├── mmvetv2.py │ ├── mockaime.py │ ├── mrcr.py │ ├── multichallenge.py │ ├── ocrbenchv2.py │ ├── political_evenhandedness.py │ ├── polyglotoxicity.py │ ├── rocketscience.py │ ├── rootly_terraform.py │ ├── safemt_m2s.py │ ├── scicode.py │ ├── sealqa.py │ ├── simpleqa.py │ ├── simpleqa_verified.py │ ├── smt.py │ ├── tau_bench.py │ └── tumlu.py │ ├── eval_config.py │ ├── evals │ ├── __init__.py │ ├── agentdojo │ │ ├── LICENSE │ │ ├── __init__.py │ │ ├── agentdojo.py │ │ ├── agents │ │ │ ├── agent.py │ │ │ └── ground_truth_agent.py │ │ ├── attacks │ │ │ └── attack.py │ │ ├── base_tasks.py │ │ ├── data │ │ │ └── suites │ │ │ │ ├── banking │ │ │ │ ├── environment.yaml │ │ │ │ └── injection_vectors.yaml │ │ │ │ ├── slack │ │ │ │ ├── environment.yaml │ │ │ │ └── injection_vectors.yaml │ │ │ │ ├── travel │ │ │ │ ├── environment.yaml │ │ │ │ └── injection_vectors.yaml │ │ │ │ ├── workspace │ │ │ │ ├── environment.yaml │ │ │ │ ├── gpt_prompts.txt │ │ │ │ ├── include │ │ │ │ │ ├── calendar.yaml │ │ │ │ │ ├── cloud_drive.yaml │ │ │ │ │ └── inbox.yaml │ │ │ │ └── injection_vectors.yaml │ │ │ │ └── workspace_plus │ │ │ │ ├── environment.yaml │ │ │ │ ├── injection_vectors.yaml │ │ │ │ └── terminal │ │ │ │ ├── compose.yaml │ │ │ │ ├── emma-computer │ │ │ │ ├── Dockerfile │ │ │ │ └── home-dir │ │ │ │ │ ├── Desktop │ │ │ │ │ └── todo.txt │ │ │ │ │ └── Downloads │ │ │ │ │ ├── 1706.03762v7.pdf │ │ │ │ │ ├── Basketball_through_hoop.jpg │ │ │ │ │ ├── Eopsaltria_australis_-_Mogo_Campground.jpg │ │ │ │ │ ├── Moon_right-view_(Clementine_dataset).png │ │ │ │ │ └── meeting_notes_1215.txt │ │ │ │ └── mock-gist-server │ │ │ │ ├── certs │ │ │ │ ├── gen-certs.sh │ │ │ │ ├── nginx.crt │ │ │ │ └── nginx.key │ │ │ │ ├── fix-permissions.sh │ │ │ │ ├── gist.github.com │ │ │ │ └── bsptech │ │ │ │ │ └── 389024167213dae90b5a7483f9fd81d5 │ │ │ │ │ └── raw │ │ │ │ │ └── bde28851c49777d31581f8b7078e2b96 │ │ │ │ │ ├── diagnostic.sh │ │ │ │ │ └── diagnostic2.sh │ │ │ │ └── nginx.conf │ │ ├── dataset.py │ │ ├── scorer.py │ │ ├── strenum.py │ │ ├── task_combinators.py │ │ ├── task_suite.py │ │ ├── task_suites │ │ │ ├── banking │ │ │ │ ├── environment.py │ │ │ │ ├── injection_tasks.py │ │ │ │ ├── task_suite.py │ │ │ │ └── user_tasks.py │ │ │ ├── slack │ │ │ │ ├── environment.py │ │ │ │ ├── injection_tasks.py │ │ │ │ ├── task_suite.py │ │ │ │ └── user_tasks.py │ │ │ ├── travel │ │ │ │ ├── environment.py │ │ │ │ ├── injection_tasks.py │ │ │ │ ├── task_suite.py │ │ │ │ └── user_tasks.py │ │ │ ├── workspace │ │ │ │ ├── environment.py │ │ │ │ ├── injection_tasks.py │ │ │ │ ├── task_suite.py │ │ │ │ └── user_tasks.py │ │ │ └── workspace_plus │ │ │ │ ├── environment.py │ │ │ │ ├── injection_tasks.py │ │ │ │ ├── task_suite.py │ │ │ │ └── user_tasks.py │ │ ├── tools │ │ │ ├── __init__.py │ │ │ ├── banking_client.py │ │ │ ├── calendar_client.py │ │ │ ├── cloud_drive_client.py │ │ │ ├── email_client.py │ │ │ ├── file_reader.py │ │ │ ├── slack.py │ │ │ ├── terminal.py │ │ │ ├── travel_booking_client.py │ │ │ ├── types.py │ │ │ ├── user_account.py │ │ │ └── web.py │ │ ├── utils.py │ │ └── yaml_loader.py │ ├── agieval.py │ ├── anli.py │ ├── arabic_exams.py │ ├── arc.py │ ├── arc_agi.py │ ├── bbq.py │ ├── bigbench.py │ ├── bigbench_hard.py │ ├── blimp.py │ ├── boolq.py │ ├── browsecomp.py │ ├── chartqapro.py │ ├── clockbench.py │ ├── cosafe_m2s.py │ ├── deep_research_bench.py │ ├── detailbench.py │ ├── drop.py │ ├── ethics.py │ ├── exercism │ │ ├── Dockerfile │ │ ├── compose.yaml │ │ └── exercism.py │ ├── factscore.py │ ├── global_mmlu.py │ ├── glue.py │ ├── glue_standard.py │ ├── gpqa.py │ ├── gpqa_diamond.py │ ├── gpt_oss │ │ ├── __init__.py │ │ └── aime.py │ ├── graphwalks.py │ ├── gsm8k.py │ ├── gsm_plus.py │ ├── headqa.py │ ├── healthbench.py │ ├── hellaswag.py │ ├── hle.py │ ├── humaneval.py │ ├── ifbench.py │ ├── ifeval.py │ ├── jsonschemabench.py │ ├── legalsupport.py │ ├── livemcpbench.py │ ├── logiqa.py │ ├── math.py │ ├── matharena │ │ ├── __init__.py │ │ ├── aime_2023_I │ │ │ ├── __init__.py │ │ │ └── aime_2023_I.py │ │ ├── aime_2023_II │ │ │ ├── __init__.py │ │ │ └── aime_2023_II.py │ │ ├── aime_2024 │ │ │ ├── __init__.py │ │ │ └── aime_2024.py │ │ ├── aime_2024_I │ │ │ ├── __init__.py │ │ │ └── aime_2024_I.py │ │ ├── aime_2024_II │ │ │ ├── __init__.py │ │ │ └── aime_2024_II.py │ │ ├── aime_2025 │ │ │ ├── __init__.py │ │ │ └── aime_2025.py │ │ ├── aime_2025_II │ │ │ ├── __init__.py │ │ │ └── aime_2025_II.py │ │ ├── brumo_2025 │ │ │ ├── __init__.py │ │ │ └── brumo_2025.py │ │ ├── hmmt_feb_2023 │ │ │ ├── __init__.py │ │ │ └── hmmt_feb_2023.py │ │ ├── hmmt_feb_2024 │ │ │ ├── __init__.py │ │ │ └── hmmt_feb_2024.py │ │ ├── hmmt_feb_2025 │ │ │ ├── __init__.py │ │ │ └── hmmt_feb_2025.py │ │ └── matharena.py │ ├── mathqa.py │ ├── mathvista.py │ ├── mbpp.py │ ├── medmcqa.py │ ├── medqa.py │ ├── mgsm.py │ ├── mhj_m2s.py │ ├── mmlu.py │ ├── mmlu_pro.py │ ├── mmlu_redux.py │ ├── mmmlu.py │ ├── mmmu.py │ ├── mmmu_pro.py │ ├── mmstar.py │ ├── mmvetv2.py │ ├── mockaime.py │ ├── mrcr.py │ ├── multichallenge.py │ ├── musr.py │ ├── natural_questions.py │ ├── ocrbenchv2.py │ ├── openbookqa.py │ ├── piqa.py │ ├── political_evenhandedness.py │ ├── polyglotoxicity.py │ ├── prost.py │ ├── pubmedqa.py │ ├── qa4mre.py │ ├── qasper.py │ ├── race.py │ ├── rocketscience.py │ ├── rootly_gmcq.py │ ├── rootly_terraform.py │ ├── safemt_m2s.py │ ├── scicode.py │ ├── sciq.py │ ├── sealqa.py │ ├── simpleqa.py │ ├── simpleqa_verified.py │ ├── smt.py │ ├── social_iqa.py │ ├── squad_v2.py │ ├── supergpqa.py │ ├── swag.py │ ├── tau_bench.py │ ├── toxigen.py │ ├── triviaqa.py │ ├── truthfulqa.py │ ├── tumlu.py │ ├── winogrande.py │ ├── wsc273.py │ ├── xcopa.py │ ├── xstorycloze.py │ └── xwinograd.py │ ├── ifbench │ ├── __init__.py │ ├── instructions.py │ ├── instructions_registry.py │ └── instructions_util.py │ ├── metrics │ ├── __init__.py │ ├── chartqapro.py │ ├── clockbench.py │ ├── drop.py │ ├── factscore.py │ ├── graphwalks.py │ ├── grouped.py │ ├── healthbench.py │ ├── hle.py │ ├── ifeval.py │ ├── json_schema.py │ ├── mgsm.py │ ├── mmlu.py │ ├── mmlu_pro.py │ ├── mmstar.py │ ├── mmvetv2.py │ ├── mrcr.py │ ├── multichallenge.py │ ├── pass_hat.py │ ├── political_evenhandedness.py │ ├── polyglotoxicity.py │ ├── rocketscience.py │ ├── scicode.py │ ├── sealqa.py │ ├── simpleqa.py │ ├── simpleqa_verified.py │ └── strong_reject.py │ ├── model │ ├── __init__.py │ └── _providers │ │ ├── __init__.py │ │ ├── ai21.py │ │ ├── baseten.py │ │ ├── cerebras.py │ │ ├── cohere.py │ │ ├── crusoe.py │ │ ├── deepinfra.py │ │ ├── friendli.py │ │ ├── groq.py │ │ ├── helicone.py │ │ ├── huggingface.py │ │ ├── hyperbolic.py │ │ ├── lambda_ai.py │ │ ├── minimax.py │ │ ├── moonshot.py │ │ ├── nebius.py │ │ ├── nous.py │ │ ├── novita.py │ │ ├── openrouter.py │ │ ├── parasail.py │ │ ├── reka.py │ │ ├── sambanova.py │ │ ├── siliconflow.py │ │ ├── vercel.py │ │ ├── vllm.py │ │ └── wandb.py │ ├── monkeypatch │ ├── __init__.py │ ├── display_results_patch.py │ └── file_recorder_logfile_patch.py │ ├── prompts │ ├── __init__.py │ ├── chartqapro.py │ └── political_evenhandedness.py │ ├── provider_config.py │ ├── py.typed │ ├── scorers │ ├── __init__.py │ ├── arc_agi.py │ ├── browsecomp.py │ ├── chartqapro.py │ ├── clockbench.py │ ├── deep_research_bench.py │ ├── detailbench.py │ ├── drop.py │ ├── exercism.py │ ├── factscore.py │ ├── fallback_scorer.py │ ├── gpt_oss │ │ ├── __init__.py │ │ └── aime.py │ ├── grade_school_math.py │ ├── graphwalks.py │ ├── healthbench.py │ ├── hle.py │ ├── humaneval.py │ ├── ifbench.py │ ├── ifeval.py │ ├── json_schema.py │ ├── livemcpbench.py │ ├── math.py │ ├── mathvista.py │ ├── mbpp.py │ ├── mcq.py │ ├── mgsm.py │ ├── mmmu.py │ ├── mmstar.py │ ├── mmvetv2.py │ ├── mockaime.py │ ├── mrcr.py │ ├── multichallenge.py │ ├── ocrbenchv2.py │ ├── open_answer.py │ ├── political_evenhandedness.py │ ├── polyglotoxicity.py │ ├── qa.py │ ├── robust_boxed.py │ ├── scicode.py │ ├── score_boxed.py │ ├── score_last_number.py │ ├── sealqa.py │ ├── simpleqa.py │ ├── simpleqa_verified.py │ ├── smt.py │ ├── strong_reject.py │ ├── tau_bench.py │ └── tumlu.py │ ├── solvers │ ├── __init__.py │ ├── chartqapro.py │ ├── clockbench.py │ ├── deep_research_bench │ │ ├── __init__.py │ │ ├── deep_research_bench.py │ │ ├── deep_research_bench_fact.py │ │ └── deep_research_bench_race.py │ ├── exercism_solver.py │ ├── jsonschemabench.py │ ├── mmstar.py │ ├── ocrbenchv2.py │ ├── political_evenhandedness.py │ ├── scicode.py │ └── tau_bench.py │ ├── tools │ └── livemcpbench │ │ └── copilot │ │ ├── __init__.py │ │ ├── arg_generation.py │ │ ├── matcher.py │ │ ├── mcp_connection.py │ │ ├── prepare.py │ │ ├── router.py │ │ ├── schemas.py │ │ ├── server.py │ │ ├── toolsource.py │ │ └── upstream_cache.py │ └── utils │ ├── __init__.py │ ├── arc_parsing.py │ ├── cli_commands.py │ ├── deep_research_bench_prompts.py │ ├── docker.py │ ├── factscore_cache.py │ ├── factscore_wiki.py │ ├── image.py │ ├── imports.py │ ├── livemcpbench_cache.py │ ├── mcq.py │ ├── metadata.py │ └── text.py ├── tests ├── __init__.py ├── _cli │ ├── __init__.py │ ├── test_cache_command.py │ ├── test_cache_command_functions.py │ ├── test_eval_command.py │ ├── test_eval_command_functions.py │ └── test_export_command.py ├── conftest.py ├── integration │ ├── __init__.py │ └── test_cli.py ├── monkeypatch │ └── test_file_recorder_logfile_patch.py ├── test_cache.py ├── test_cli_epoch_reducers.py ├── test_epoch_reducer_registration.py ├── test_groq_provider.py ├── test_image_utils.py ├── test_json_schema_scorer.py ├── test_open_answer_scorer.py ├── test_pass_hat_reducer.py ├── test_registry.py ├── test_registry_imports.py ├── test_robust_scorers.py ├── test_text_utils.py ├── test_vllm_provider.py └── utils │ ├── test_factscore_download.py │ └── test_factscore_wiki.py └── uv.lock /.claude/agents/lighteval-porter.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/.claude/agents/lighteval-porter.md -------------------------------------------------------------------------------- /.claude/settings.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/.claude/settings.json -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/.github/ISSUE_TEMPLATE/bug_report.yml -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/.github/ISSUE_TEMPLATE/feature_request.yml -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/.github/dependabot.yml -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/.github/pull_request_template.md -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/.github/workflows/ci.yml -------------------------------------------------------------------------------- /.github/workflows/claude-code-review.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/.github/workflows/claude-code-review.yml -------------------------------------------------------------------------------- /.github/workflows/claude.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/.github/workflows/claude.yml -------------------------------------------------------------------------------- /.github/workflows/code-freeze-bypass.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/.github/workflows/code-freeze-bypass.yaml -------------------------------------------------------------------------------- /.github/workflows/dependency-check.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/.github/workflows/dependency-check.yml -------------------------------------------------------------------------------- /.github/workflows/publish-openbench-core.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/.github/workflows/publish-openbench-core.yml -------------------------------------------------------------------------------- /.github/workflows/release-please.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/.github/workflows/release-please.yaml -------------------------------------------------------------------------------- /.github/workflows/stale.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/.github/workflows/stale.yaml -------------------------------------------------------------------------------- /.github/workflows/update-docs-benchmarks.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/.github/workflows/update-docs-benchmarks.yml -------------------------------------------------------------------------------- /.github/workflows/update-docs-changelog.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/.github/workflows/update-docs-changelog.yml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/.gitignore -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/.pre-commit-config.yaml -------------------------------------------------------------------------------- /.python-version: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/.python-version -------------------------------------------------------------------------------- /.release-please-manifest.json: -------------------------------------------------------------------------------- 1 | { 2 | ".": "0.5.3" 3 | } -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/.vscode/settings.json -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/CHANGELOG.md -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/CITATION.cff -------------------------------------------------------------------------------- /CLAUDE.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/CLAUDE.md -------------------------------------------------------------------------------- /CODEOWNERS: -------------------------------------------------------------------------------- 1 | * @AarushSah @nmayorga7 2 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/CONTRIBUTING.md -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/LICENSE.md -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/MANIFEST.in -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/README.md -------------------------------------------------------------------------------- /docs/benchmarks/catalog.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/docs/benchmarks/catalog.mdx -------------------------------------------------------------------------------- /docs/benchmarks/coding.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/docs/benchmarks/coding.mdx -------------------------------------------------------------------------------- /docs/benchmarks/cybersecurity.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/docs/benchmarks/cybersecurity.mdx -------------------------------------------------------------------------------- /docs/benchmarks/knowledge.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/docs/benchmarks/knowledge.mdx -------------------------------------------------------------------------------- /docs/benchmarks/math.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/docs/benchmarks/math.mdx -------------------------------------------------------------------------------- /docs/benchmarks/reasoning.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/docs/benchmarks/reasoning.mdx -------------------------------------------------------------------------------- /docs/benchmarks/search.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/docs/benchmarks/search.mdx -------------------------------------------------------------------------------- /docs/changelog.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/docs/changelog.mdx -------------------------------------------------------------------------------- /docs/cli/cache.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/docs/cli/cache.mdx -------------------------------------------------------------------------------- /docs/cli/describe.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/docs/cli/describe.mdx -------------------------------------------------------------------------------- /docs/cli/eval-retry.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/docs/cli/eval-retry.mdx -------------------------------------------------------------------------------- /docs/cli/eval.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/docs/cli/eval.mdx -------------------------------------------------------------------------------- /docs/cli/list.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/docs/cli/list.mdx -------------------------------------------------------------------------------- /docs/cli/overview.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/docs/cli/overview.mdx -------------------------------------------------------------------------------- /docs/cli/view.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/docs/cli/view.mdx -------------------------------------------------------------------------------- /docs/configuration.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/docs/configuration.mdx -------------------------------------------------------------------------------- /docs/development/architecture.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/docs/development/architecture.mdx -------------------------------------------------------------------------------- /docs/development/contributing.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/docs/development/contributing.mdx -------------------------------------------------------------------------------- /docs/development/extending.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/docs/development/extending.mdx -------------------------------------------------------------------------------- /docs/development/mcq.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/docs/development/mcq.mdx -------------------------------------------------------------------------------- /docs/docs.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/docs/docs.json -------------------------------------------------------------------------------- /docs/evals/exercism.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/docs/evals/exercism.mdx -------------------------------------------------------------------------------- /docs/evals/gpqa-diamond.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/docs/evals/gpqa-diamond.mdx -------------------------------------------------------------------------------- /docs/evals/graphwalks.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/docs/evals/graphwalks.mdx -------------------------------------------------------------------------------- /docs/evals/livemcpbench.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/docs/evals/livemcpbench.mdx -------------------------------------------------------------------------------- /docs/evals/mmlu.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/docs/evals/mmlu.mdx -------------------------------------------------------------------------------- /docs/evals/scicode.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/docs/evals/scicode.mdx -------------------------------------------------------------------------------- /docs/evals/simpleqa.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/docs/evals/simpleqa.mdx -------------------------------------------------------------------------------- /docs/favicon.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/docs/favicon.svg -------------------------------------------------------------------------------- /docs/fonts/.gitkeep: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /docs/fonts/NeueRegrade-Variable.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/docs/fonts/NeueRegrade-Variable.ttf -------------------------------------------------------------------------------- /docs/fonts/NeueRegrade-Variable.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/docs/fonts/NeueRegrade-Variable.woff2 -------------------------------------------------------------------------------- /docs/fonts/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/docs/fonts/README.md -------------------------------------------------------------------------------- /docs/images/bench_view_eval.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/docs/images/bench_view_eval.png -------------------------------------------------------------------------------- /docs/index.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/docs/index.mdx -------------------------------------------------------------------------------- /docs/installation.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/docs/installation.mdx -------------------------------------------------------------------------------- /docs/logo/dark.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/docs/logo/dark.svg -------------------------------------------------------------------------------- /docs/logo/light.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/docs/logo/light.svg -------------------------------------------------------------------------------- /docs/providers.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/docs/providers.mdx -------------------------------------------------------------------------------- /docs/quickstart.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/docs/quickstart.mdx -------------------------------------------------------------------------------- /docs/release-notes.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/docs/release-notes.mdx -------------------------------------------------------------------------------- /docs/snippets/benchmarks.data.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/docs/snippets/benchmarks.data.mdx -------------------------------------------------------------------------------- /docs/troubleshooting.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/docs/troubleshooting.mdx -------------------------------------------------------------------------------- /packages/openbench-core/pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/packages/openbench-core/pyproject.toml -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/pyproject.toml -------------------------------------------------------------------------------- /release-please-config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/release-please-config.json -------------------------------------------------------------------------------- /scripts/export_benchmarks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/scripts/export_benchmarks.py -------------------------------------------------------------------------------- /scripts/export_changelog.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/scripts/export_changelog.py -------------------------------------------------------------------------------- /scripts/make_clockbench_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/scripts/make_clockbench_data.py -------------------------------------------------------------------------------- /scripts/run_sync_pyproject.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/scripts/run_sync_pyproject.sh -------------------------------------------------------------------------------- /scripts/sync_pyproject.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/scripts/sync_pyproject.py -------------------------------------------------------------------------------- /src/openbench/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/__init__.py -------------------------------------------------------------------------------- /src/openbench/_cli/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/_cli/__init__.py -------------------------------------------------------------------------------- /src/openbench/_cli/cache_command.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/_cli/cache_command.py -------------------------------------------------------------------------------- /src/openbench/_cli/describe_command.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/_cli/describe_command.py -------------------------------------------------------------------------------- /src/openbench/_cli/eval_command.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/_cli/eval_command.py -------------------------------------------------------------------------------- /src/openbench/_cli/eval_retry_command.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/_cli/eval_retry_command.py -------------------------------------------------------------------------------- /src/openbench/_cli/export.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/_cli/export.py -------------------------------------------------------------------------------- /src/openbench/_cli/export_command.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/_cli/export_command.py -------------------------------------------------------------------------------- /src/openbench/_cli/list_command.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/_cli/list_command.py -------------------------------------------------------------------------------- /src/openbench/_cli/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/_cli/utils.py -------------------------------------------------------------------------------- /src/openbench/_cli/view_command.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/_cli/view_command.py -------------------------------------------------------------------------------- /src/openbench/_registry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/_registry.py -------------------------------------------------------------------------------- /src/openbench/agents/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/agents/__init__.py -------------------------------------------------------------------------------- /src/openbench/agents/aider.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/agents/aider.py -------------------------------------------------------------------------------- /src/openbench/agents/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/agents/base.py -------------------------------------------------------------------------------- /src/openbench/agents/claude.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/agents/claude.py -------------------------------------------------------------------------------- /src/openbench/agents/docker_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/agents/docker_manager.py -------------------------------------------------------------------------------- /src/openbench/agents/manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/agents/manager.py -------------------------------------------------------------------------------- /src/openbench/agents/opencode.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/agents/opencode.py -------------------------------------------------------------------------------- /src/openbench/agents/roo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/agents/roo.py -------------------------------------------------------------------------------- /src/openbench/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/config.py -------------------------------------------------------------------------------- /src/openbench/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/openbench/datasets/arc_agi.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/datasets/arc_agi.py -------------------------------------------------------------------------------- /src/openbench/datasets/boolq.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/datasets/boolq.py -------------------------------------------------------------------------------- /src/openbench/datasets/browsecomp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/datasets/browsecomp.py -------------------------------------------------------------------------------- /src/openbench/datasets/chartqapro.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/datasets/chartqapro.py -------------------------------------------------------------------------------- /src/openbench/datasets/clockbench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/datasets/clockbench.py -------------------------------------------------------------------------------- /src/openbench/datasets/cosafe_m2s.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/datasets/cosafe_m2s.py -------------------------------------------------------------------------------- /src/openbench/datasets/deep_research_bench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/datasets/deep_research_bench.py -------------------------------------------------------------------------------- /src/openbench/datasets/detailbench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/datasets/detailbench.py -------------------------------------------------------------------------------- /src/openbench/datasets/drop.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/datasets/drop.py -------------------------------------------------------------------------------- /src/openbench/datasets/exercism.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/datasets/exercism.py -------------------------------------------------------------------------------- /src/openbench/datasets/factscore.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/datasets/factscore.py -------------------------------------------------------------------------------- /src/openbench/datasets/gpt_oss/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/openbench/datasets/gpt_oss/aime.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/datasets/gpt_oss/aime.py -------------------------------------------------------------------------------- /src/openbench/datasets/graphwalks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/datasets/graphwalks.py -------------------------------------------------------------------------------- /src/openbench/datasets/healthbench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/datasets/healthbench.py -------------------------------------------------------------------------------- /src/openbench/datasets/hle.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/datasets/hle.py -------------------------------------------------------------------------------- /src/openbench/datasets/humaneval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/datasets/humaneval.py -------------------------------------------------------------------------------- /src/openbench/datasets/ifbench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/datasets/ifbench.py -------------------------------------------------------------------------------- /src/openbench/datasets/ifeval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/datasets/ifeval.py -------------------------------------------------------------------------------- /src/openbench/datasets/jsonschemabench/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/datasets/jsonschemabench/__init__.py -------------------------------------------------------------------------------- /src/openbench/datasets/jsonschemabench/jsonschemabench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/datasets/jsonschemabench/jsonschemabench.py -------------------------------------------------------------------------------- /src/openbench/datasets/jsonschemabench/openai_compatible_ids.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/datasets/jsonschemabench/openai_compatible_ids.txt -------------------------------------------------------------------------------- /src/openbench/datasets/livemcpbench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/datasets/livemcpbench.py -------------------------------------------------------------------------------- /src/openbench/datasets/math.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/datasets/math.py -------------------------------------------------------------------------------- /src/openbench/datasets/mathvista.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/datasets/mathvista.py -------------------------------------------------------------------------------- /src/openbench/datasets/mbpp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/datasets/mbpp.py -------------------------------------------------------------------------------- /src/openbench/datasets/mgsm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/datasets/mgsm.py -------------------------------------------------------------------------------- /src/openbench/datasets/mhj_m2s.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/datasets/mhj_m2s.py -------------------------------------------------------------------------------- /src/openbench/datasets/mmmu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/datasets/mmmu.py -------------------------------------------------------------------------------- /src/openbench/datasets/mmstar.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/datasets/mmstar.py -------------------------------------------------------------------------------- /src/openbench/datasets/mmvetv2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/datasets/mmvetv2.py -------------------------------------------------------------------------------- /src/openbench/datasets/mockaime.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/datasets/mockaime.py -------------------------------------------------------------------------------- /src/openbench/datasets/mrcr.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/datasets/mrcr.py -------------------------------------------------------------------------------- /src/openbench/datasets/multichallenge.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/datasets/multichallenge.py -------------------------------------------------------------------------------- /src/openbench/datasets/ocrbenchv2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/datasets/ocrbenchv2.py -------------------------------------------------------------------------------- /src/openbench/datasets/political_evenhandedness.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/datasets/political_evenhandedness.py -------------------------------------------------------------------------------- /src/openbench/datasets/polyglotoxicity.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/datasets/polyglotoxicity.py -------------------------------------------------------------------------------- /src/openbench/datasets/rocketscience.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/datasets/rocketscience.py -------------------------------------------------------------------------------- /src/openbench/datasets/rootly_terraform.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/datasets/rootly_terraform.py -------------------------------------------------------------------------------- /src/openbench/datasets/safemt_m2s.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/datasets/safemt_m2s.py -------------------------------------------------------------------------------- /src/openbench/datasets/scicode.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/datasets/scicode.py -------------------------------------------------------------------------------- /src/openbench/datasets/sealqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/datasets/sealqa.py -------------------------------------------------------------------------------- /src/openbench/datasets/simpleqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/datasets/simpleqa.py -------------------------------------------------------------------------------- /src/openbench/datasets/simpleqa_verified.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/datasets/simpleqa_verified.py -------------------------------------------------------------------------------- /src/openbench/datasets/smt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/datasets/smt.py -------------------------------------------------------------------------------- /src/openbench/datasets/tau_bench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/datasets/tau_bench.py -------------------------------------------------------------------------------- /src/openbench/datasets/tumlu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/datasets/tumlu.py -------------------------------------------------------------------------------- /src/openbench/eval_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/eval_config.py -------------------------------------------------------------------------------- /src/openbench/evals/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/openbench/evals/agentdojo/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/agentdojo/LICENSE -------------------------------------------------------------------------------- /src/openbench/evals/agentdojo/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/agentdojo/__init__.py -------------------------------------------------------------------------------- /src/openbench/evals/agentdojo/agentdojo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/agentdojo/agentdojo.py -------------------------------------------------------------------------------- /src/openbench/evals/agentdojo/agents/agent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/agentdojo/agents/agent.py -------------------------------------------------------------------------------- /src/openbench/evals/agentdojo/agents/ground_truth_agent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/agentdojo/agents/ground_truth_agent.py -------------------------------------------------------------------------------- /src/openbench/evals/agentdojo/attacks/attack.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/agentdojo/attacks/attack.py -------------------------------------------------------------------------------- /src/openbench/evals/agentdojo/base_tasks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/agentdojo/base_tasks.py -------------------------------------------------------------------------------- /src/openbench/evals/agentdojo/data/suites/banking/environment.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/agentdojo/data/suites/banking/environment.yaml -------------------------------------------------------------------------------- /src/openbench/evals/agentdojo/data/suites/banking/injection_vectors.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/agentdojo/data/suites/banking/injection_vectors.yaml -------------------------------------------------------------------------------- /src/openbench/evals/agentdojo/data/suites/slack/environment.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/agentdojo/data/suites/slack/environment.yaml -------------------------------------------------------------------------------- /src/openbench/evals/agentdojo/data/suites/slack/injection_vectors.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/agentdojo/data/suites/slack/injection_vectors.yaml -------------------------------------------------------------------------------- /src/openbench/evals/agentdojo/data/suites/travel/environment.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/agentdojo/data/suites/travel/environment.yaml -------------------------------------------------------------------------------- /src/openbench/evals/agentdojo/data/suites/travel/injection_vectors.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/agentdojo/data/suites/travel/injection_vectors.yaml -------------------------------------------------------------------------------- /src/openbench/evals/agentdojo/data/suites/workspace/environment.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/agentdojo/data/suites/workspace/environment.yaml -------------------------------------------------------------------------------- /src/openbench/evals/agentdojo/data/suites/workspace/gpt_prompts.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/agentdojo/data/suites/workspace/gpt_prompts.txt -------------------------------------------------------------------------------- /src/openbench/evals/agentdojo/data/suites/workspace/include/calendar.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/agentdojo/data/suites/workspace/include/calendar.yaml -------------------------------------------------------------------------------- /src/openbench/evals/agentdojo/data/suites/workspace/include/cloud_drive.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/agentdojo/data/suites/workspace/include/cloud_drive.yaml -------------------------------------------------------------------------------- /src/openbench/evals/agentdojo/data/suites/workspace/include/inbox.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/agentdojo/data/suites/workspace/include/inbox.yaml -------------------------------------------------------------------------------- /src/openbench/evals/agentdojo/data/suites/workspace/injection_vectors.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/agentdojo/data/suites/workspace/injection_vectors.yaml -------------------------------------------------------------------------------- /src/openbench/evals/agentdojo/data/suites/workspace_plus/environment.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/agentdojo/data/suites/workspace_plus/environment.yaml -------------------------------------------------------------------------------- /src/openbench/evals/agentdojo/data/suites/workspace_plus/injection_vectors.yaml: -------------------------------------------------------------------------------- 1 | !include ../workspace/injection_vectors.yaml 2 | -------------------------------------------------------------------------------- /src/openbench/evals/agentdojo/data/suites/workspace_plus/terminal/compose.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/agentdojo/data/suites/workspace_plus/terminal/compose.yaml -------------------------------------------------------------------------------- /src/openbench/evals/agentdojo/data/suites/workspace_plus/terminal/emma-computer/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/agentdojo/data/suites/workspace_plus/terminal/emma-computer/Dockerfile -------------------------------------------------------------------------------- /src/openbench/evals/agentdojo/data/suites/workspace_plus/terminal/emma-computer/home-dir/Desktop/todo.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/agentdojo/data/suites/workspace_plus/terminal/emma-computer/home-dir/Desktop/todo.txt -------------------------------------------------------------------------------- /src/openbench/evals/agentdojo/data/suites/workspace_plus/terminal/emma-computer/home-dir/Downloads/1706.03762v7.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/agentdojo/data/suites/workspace_plus/terminal/emma-computer/home-dir/Downloads/1706.03762v7.pdf -------------------------------------------------------------------------------- /src/openbench/evals/agentdojo/data/suites/workspace_plus/terminal/emma-computer/home-dir/Downloads/Basketball_through_hoop.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/agentdojo/data/suites/workspace_plus/terminal/emma-computer/home-dir/Downloads/Basketball_through_hoop.jpg -------------------------------------------------------------------------------- /src/openbench/evals/agentdojo/data/suites/workspace_plus/terminal/emma-computer/home-dir/Downloads/Eopsaltria_australis_-_Mogo_Campground.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/agentdojo/data/suites/workspace_plus/terminal/emma-computer/home-dir/Downloads/Eopsaltria_australis_-_Mogo_Campground.jpg -------------------------------------------------------------------------------- /src/openbench/evals/agentdojo/data/suites/workspace_plus/terminal/emma-computer/home-dir/Downloads/Moon_right-view_(Clementine_dataset).png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/agentdojo/data/suites/workspace_plus/terminal/emma-computer/home-dir/Downloads/Moon_right-view_(Clementine_dataset).png -------------------------------------------------------------------------------- /src/openbench/evals/agentdojo/data/suites/workspace_plus/terminal/emma-computer/home-dir/Downloads/meeting_notes_1215.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/agentdojo/data/suites/workspace_plus/terminal/emma-computer/home-dir/Downloads/meeting_notes_1215.txt -------------------------------------------------------------------------------- /src/openbench/evals/agentdojo/data/suites/workspace_plus/terminal/mock-gist-server/certs/gen-certs.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/agentdojo/data/suites/workspace_plus/terminal/mock-gist-server/certs/gen-certs.sh -------------------------------------------------------------------------------- /src/openbench/evals/agentdojo/data/suites/workspace_plus/terminal/mock-gist-server/certs/nginx.crt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/agentdojo/data/suites/workspace_plus/terminal/mock-gist-server/certs/nginx.crt -------------------------------------------------------------------------------- /src/openbench/evals/agentdojo/data/suites/workspace_plus/terminal/mock-gist-server/certs/nginx.key: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/agentdojo/data/suites/workspace_plus/terminal/mock-gist-server/certs/nginx.key -------------------------------------------------------------------------------- /src/openbench/evals/agentdojo/data/suites/workspace_plus/terminal/mock-gist-server/fix-permissions.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/agentdojo/data/suites/workspace_plus/terminal/mock-gist-server/fix-permissions.sh -------------------------------------------------------------------------------- /src/openbench/evals/agentdojo/data/suites/workspace_plus/terminal/mock-gist-server/gist.github.com/bsptech/389024167213dae90b5a7483f9fd81d5/raw/bde28851c49777d31581f8b7078e2b96/diagnostic.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/agentdojo/data/suites/workspace_plus/terminal/mock-gist-server/gist.github.com/bsptech/389024167213dae90b5a7483f9fd81d5/raw/bde28851c49777d31581f8b7078e2b96/diagnostic.sh -------------------------------------------------------------------------------- /src/openbench/evals/agentdojo/data/suites/workspace_plus/terminal/mock-gist-server/gist.github.com/bsptech/389024167213dae90b5a7483f9fd81d5/raw/bde28851c49777d31581f8b7078e2b96/diagnostic2.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/agentdojo/data/suites/workspace_plus/terminal/mock-gist-server/gist.github.com/bsptech/389024167213dae90b5a7483f9fd81d5/raw/bde28851c49777d31581f8b7078e2b96/diagnostic2.sh -------------------------------------------------------------------------------- /src/openbench/evals/agentdojo/data/suites/workspace_plus/terminal/mock-gist-server/nginx.conf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/agentdojo/data/suites/workspace_plus/terminal/mock-gist-server/nginx.conf -------------------------------------------------------------------------------- /src/openbench/evals/agentdojo/dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/agentdojo/dataset.py -------------------------------------------------------------------------------- /src/openbench/evals/agentdojo/scorer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/agentdojo/scorer.py -------------------------------------------------------------------------------- /src/openbench/evals/agentdojo/strenum.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/agentdojo/strenum.py -------------------------------------------------------------------------------- /src/openbench/evals/agentdojo/task_combinators.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/agentdojo/task_combinators.py -------------------------------------------------------------------------------- /src/openbench/evals/agentdojo/task_suite.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/agentdojo/task_suite.py -------------------------------------------------------------------------------- /src/openbench/evals/agentdojo/task_suites/banking/environment.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/agentdojo/task_suites/banking/environment.py -------------------------------------------------------------------------------- /src/openbench/evals/agentdojo/task_suites/banking/injection_tasks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/agentdojo/task_suites/banking/injection_tasks.py -------------------------------------------------------------------------------- /src/openbench/evals/agentdojo/task_suites/banking/task_suite.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/agentdojo/task_suites/banking/task_suite.py -------------------------------------------------------------------------------- /src/openbench/evals/agentdojo/task_suites/banking/user_tasks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/agentdojo/task_suites/banking/user_tasks.py -------------------------------------------------------------------------------- /src/openbench/evals/agentdojo/task_suites/slack/environment.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/agentdojo/task_suites/slack/environment.py -------------------------------------------------------------------------------- /src/openbench/evals/agentdojo/task_suites/slack/injection_tasks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/agentdojo/task_suites/slack/injection_tasks.py -------------------------------------------------------------------------------- /src/openbench/evals/agentdojo/task_suites/slack/task_suite.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/agentdojo/task_suites/slack/task_suite.py -------------------------------------------------------------------------------- /src/openbench/evals/agentdojo/task_suites/slack/user_tasks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/agentdojo/task_suites/slack/user_tasks.py -------------------------------------------------------------------------------- /src/openbench/evals/agentdojo/task_suites/travel/environment.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/agentdojo/task_suites/travel/environment.py -------------------------------------------------------------------------------- /src/openbench/evals/agentdojo/task_suites/travel/injection_tasks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/agentdojo/task_suites/travel/injection_tasks.py -------------------------------------------------------------------------------- /src/openbench/evals/agentdojo/task_suites/travel/task_suite.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/agentdojo/task_suites/travel/task_suite.py -------------------------------------------------------------------------------- /src/openbench/evals/agentdojo/task_suites/travel/user_tasks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/agentdojo/task_suites/travel/user_tasks.py -------------------------------------------------------------------------------- /src/openbench/evals/agentdojo/task_suites/workspace/environment.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/agentdojo/task_suites/workspace/environment.py -------------------------------------------------------------------------------- /src/openbench/evals/agentdojo/task_suites/workspace/injection_tasks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/agentdojo/task_suites/workspace/injection_tasks.py -------------------------------------------------------------------------------- /src/openbench/evals/agentdojo/task_suites/workspace/task_suite.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/agentdojo/task_suites/workspace/task_suite.py -------------------------------------------------------------------------------- /src/openbench/evals/agentdojo/task_suites/workspace/user_tasks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/agentdojo/task_suites/workspace/user_tasks.py -------------------------------------------------------------------------------- /src/openbench/evals/agentdojo/task_suites/workspace_plus/environment.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/agentdojo/task_suites/workspace_plus/environment.py -------------------------------------------------------------------------------- /src/openbench/evals/agentdojo/task_suites/workspace_plus/injection_tasks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/agentdojo/task_suites/workspace_plus/injection_tasks.py -------------------------------------------------------------------------------- /src/openbench/evals/agentdojo/task_suites/workspace_plus/task_suite.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/agentdojo/task_suites/workspace_plus/task_suite.py -------------------------------------------------------------------------------- /src/openbench/evals/agentdojo/task_suites/workspace_plus/user_tasks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/agentdojo/task_suites/workspace_plus/user_tasks.py -------------------------------------------------------------------------------- /src/openbench/evals/agentdojo/tools/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/openbench/evals/agentdojo/tools/banking_client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/agentdojo/tools/banking_client.py -------------------------------------------------------------------------------- /src/openbench/evals/agentdojo/tools/calendar_client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/agentdojo/tools/calendar_client.py -------------------------------------------------------------------------------- /src/openbench/evals/agentdojo/tools/cloud_drive_client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/agentdojo/tools/cloud_drive_client.py -------------------------------------------------------------------------------- /src/openbench/evals/agentdojo/tools/email_client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/agentdojo/tools/email_client.py -------------------------------------------------------------------------------- /src/openbench/evals/agentdojo/tools/file_reader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/agentdojo/tools/file_reader.py -------------------------------------------------------------------------------- /src/openbench/evals/agentdojo/tools/slack.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/agentdojo/tools/slack.py -------------------------------------------------------------------------------- /src/openbench/evals/agentdojo/tools/terminal.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/agentdojo/tools/terminal.py -------------------------------------------------------------------------------- /src/openbench/evals/agentdojo/tools/travel_booking_client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/agentdojo/tools/travel_booking_client.py -------------------------------------------------------------------------------- /src/openbench/evals/agentdojo/tools/types.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/agentdojo/tools/types.py -------------------------------------------------------------------------------- /src/openbench/evals/agentdojo/tools/user_account.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/agentdojo/tools/user_account.py -------------------------------------------------------------------------------- /src/openbench/evals/agentdojo/tools/web.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/agentdojo/tools/web.py -------------------------------------------------------------------------------- /src/openbench/evals/agentdojo/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/agentdojo/utils.py -------------------------------------------------------------------------------- /src/openbench/evals/agentdojo/yaml_loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/agentdojo/yaml_loader.py -------------------------------------------------------------------------------- /src/openbench/evals/agieval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/agieval.py -------------------------------------------------------------------------------- /src/openbench/evals/anli.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/anli.py -------------------------------------------------------------------------------- /src/openbench/evals/arabic_exams.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/arabic_exams.py -------------------------------------------------------------------------------- /src/openbench/evals/arc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/arc.py -------------------------------------------------------------------------------- /src/openbench/evals/arc_agi.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/arc_agi.py -------------------------------------------------------------------------------- /src/openbench/evals/bbq.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/bbq.py -------------------------------------------------------------------------------- /src/openbench/evals/bigbench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/bigbench.py -------------------------------------------------------------------------------- /src/openbench/evals/bigbench_hard.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/bigbench_hard.py -------------------------------------------------------------------------------- /src/openbench/evals/blimp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/blimp.py -------------------------------------------------------------------------------- /src/openbench/evals/boolq.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/boolq.py -------------------------------------------------------------------------------- /src/openbench/evals/browsecomp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/browsecomp.py -------------------------------------------------------------------------------- /src/openbench/evals/chartqapro.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/chartqapro.py -------------------------------------------------------------------------------- /src/openbench/evals/clockbench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/clockbench.py -------------------------------------------------------------------------------- /src/openbench/evals/cosafe_m2s.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/cosafe_m2s.py -------------------------------------------------------------------------------- /src/openbench/evals/deep_research_bench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/deep_research_bench.py -------------------------------------------------------------------------------- /src/openbench/evals/detailbench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/detailbench.py -------------------------------------------------------------------------------- /src/openbench/evals/drop.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/drop.py -------------------------------------------------------------------------------- /src/openbench/evals/ethics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/ethics.py -------------------------------------------------------------------------------- /src/openbench/evals/exercism/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/exercism/Dockerfile -------------------------------------------------------------------------------- /src/openbench/evals/exercism/compose.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/exercism/compose.yaml -------------------------------------------------------------------------------- /src/openbench/evals/exercism/exercism.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/exercism/exercism.py -------------------------------------------------------------------------------- /src/openbench/evals/factscore.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/factscore.py -------------------------------------------------------------------------------- /src/openbench/evals/global_mmlu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/global_mmlu.py -------------------------------------------------------------------------------- /src/openbench/evals/glue.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/glue.py -------------------------------------------------------------------------------- /src/openbench/evals/glue_standard.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/glue_standard.py -------------------------------------------------------------------------------- /src/openbench/evals/gpqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/gpqa.py -------------------------------------------------------------------------------- /src/openbench/evals/gpqa_diamond.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/gpqa_diamond.py -------------------------------------------------------------------------------- /src/openbench/evals/gpt_oss/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/openbench/evals/gpt_oss/aime.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/gpt_oss/aime.py -------------------------------------------------------------------------------- /src/openbench/evals/graphwalks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/graphwalks.py -------------------------------------------------------------------------------- /src/openbench/evals/gsm8k.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/gsm8k.py -------------------------------------------------------------------------------- /src/openbench/evals/gsm_plus.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/gsm_plus.py -------------------------------------------------------------------------------- /src/openbench/evals/headqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/headqa.py -------------------------------------------------------------------------------- /src/openbench/evals/healthbench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/healthbench.py -------------------------------------------------------------------------------- /src/openbench/evals/hellaswag.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/hellaswag.py -------------------------------------------------------------------------------- /src/openbench/evals/hle.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/hle.py -------------------------------------------------------------------------------- /src/openbench/evals/humaneval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/humaneval.py -------------------------------------------------------------------------------- /src/openbench/evals/ifbench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/ifbench.py -------------------------------------------------------------------------------- /src/openbench/evals/ifeval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/ifeval.py -------------------------------------------------------------------------------- /src/openbench/evals/jsonschemabench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/jsonschemabench.py -------------------------------------------------------------------------------- /src/openbench/evals/legalsupport.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/legalsupport.py -------------------------------------------------------------------------------- /src/openbench/evals/livemcpbench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/livemcpbench.py -------------------------------------------------------------------------------- /src/openbench/evals/logiqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/logiqa.py -------------------------------------------------------------------------------- /src/openbench/evals/math.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/math.py -------------------------------------------------------------------------------- /src/openbench/evals/matharena/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /src/openbench/evals/matharena/aime_2023_I/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/openbench/evals/matharena/aime_2023_I/aime_2023_I.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/matharena/aime_2023_I/aime_2023_I.py -------------------------------------------------------------------------------- /src/openbench/evals/matharena/aime_2023_II/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/openbench/evals/matharena/aime_2023_II/aime_2023_II.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/matharena/aime_2023_II/aime_2023_II.py -------------------------------------------------------------------------------- /src/openbench/evals/matharena/aime_2024/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/openbench/evals/matharena/aime_2024/aime_2024.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/matharena/aime_2024/aime_2024.py -------------------------------------------------------------------------------- /src/openbench/evals/matharena/aime_2024_I/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/openbench/evals/matharena/aime_2024_I/aime_2024_I.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/matharena/aime_2024_I/aime_2024_I.py -------------------------------------------------------------------------------- /src/openbench/evals/matharena/aime_2024_II/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/openbench/evals/matharena/aime_2024_II/aime_2024_II.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/matharena/aime_2024_II/aime_2024_II.py -------------------------------------------------------------------------------- /src/openbench/evals/matharena/aime_2025/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/openbench/evals/matharena/aime_2025/aime_2025.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/matharena/aime_2025/aime_2025.py -------------------------------------------------------------------------------- /src/openbench/evals/matharena/aime_2025_II/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/openbench/evals/matharena/aime_2025_II/aime_2025_II.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/matharena/aime_2025_II/aime_2025_II.py -------------------------------------------------------------------------------- /src/openbench/evals/matharena/brumo_2025/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/openbench/evals/matharena/brumo_2025/brumo_2025.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/matharena/brumo_2025/brumo_2025.py -------------------------------------------------------------------------------- /src/openbench/evals/matharena/hmmt_feb_2023/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/openbench/evals/matharena/hmmt_feb_2023/hmmt_feb_2023.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/matharena/hmmt_feb_2023/hmmt_feb_2023.py -------------------------------------------------------------------------------- /src/openbench/evals/matharena/hmmt_feb_2024/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/openbench/evals/matharena/hmmt_feb_2024/hmmt_feb_2024.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/matharena/hmmt_feb_2024/hmmt_feb_2024.py -------------------------------------------------------------------------------- /src/openbench/evals/matharena/hmmt_feb_2025/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/openbench/evals/matharena/hmmt_feb_2025/hmmt_feb_2025.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/matharena/hmmt_feb_2025/hmmt_feb_2025.py -------------------------------------------------------------------------------- /src/openbench/evals/matharena/matharena.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/matharena/matharena.py -------------------------------------------------------------------------------- /src/openbench/evals/mathqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/mathqa.py -------------------------------------------------------------------------------- /src/openbench/evals/mathvista.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/mathvista.py -------------------------------------------------------------------------------- /src/openbench/evals/mbpp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/mbpp.py -------------------------------------------------------------------------------- /src/openbench/evals/medmcqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/medmcqa.py -------------------------------------------------------------------------------- /src/openbench/evals/medqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/medqa.py -------------------------------------------------------------------------------- /src/openbench/evals/mgsm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/mgsm.py -------------------------------------------------------------------------------- /src/openbench/evals/mhj_m2s.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/mhj_m2s.py -------------------------------------------------------------------------------- /src/openbench/evals/mmlu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/mmlu.py -------------------------------------------------------------------------------- /src/openbench/evals/mmlu_pro.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/mmlu_pro.py -------------------------------------------------------------------------------- /src/openbench/evals/mmlu_redux.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/mmlu_redux.py -------------------------------------------------------------------------------- /src/openbench/evals/mmmlu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/mmmlu.py -------------------------------------------------------------------------------- /src/openbench/evals/mmmu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/mmmu.py -------------------------------------------------------------------------------- /src/openbench/evals/mmmu_pro.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/mmmu_pro.py -------------------------------------------------------------------------------- /src/openbench/evals/mmstar.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/mmstar.py -------------------------------------------------------------------------------- /src/openbench/evals/mmvetv2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/mmvetv2.py -------------------------------------------------------------------------------- /src/openbench/evals/mockaime.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/mockaime.py -------------------------------------------------------------------------------- /src/openbench/evals/mrcr.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/mrcr.py -------------------------------------------------------------------------------- /src/openbench/evals/multichallenge.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/multichallenge.py -------------------------------------------------------------------------------- /src/openbench/evals/musr.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/musr.py -------------------------------------------------------------------------------- /src/openbench/evals/natural_questions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/natural_questions.py -------------------------------------------------------------------------------- /src/openbench/evals/ocrbenchv2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/ocrbenchv2.py -------------------------------------------------------------------------------- /src/openbench/evals/openbookqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/openbookqa.py -------------------------------------------------------------------------------- /src/openbench/evals/piqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/piqa.py -------------------------------------------------------------------------------- /src/openbench/evals/political_evenhandedness.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/political_evenhandedness.py -------------------------------------------------------------------------------- /src/openbench/evals/polyglotoxicity.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/polyglotoxicity.py -------------------------------------------------------------------------------- /src/openbench/evals/prost.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/prost.py -------------------------------------------------------------------------------- /src/openbench/evals/pubmedqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/pubmedqa.py -------------------------------------------------------------------------------- /src/openbench/evals/qa4mre.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/qa4mre.py -------------------------------------------------------------------------------- /src/openbench/evals/qasper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/qasper.py -------------------------------------------------------------------------------- /src/openbench/evals/race.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/race.py -------------------------------------------------------------------------------- /src/openbench/evals/rocketscience.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/rocketscience.py -------------------------------------------------------------------------------- /src/openbench/evals/rootly_gmcq.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/rootly_gmcq.py -------------------------------------------------------------------------------- /src/openbench/evals/rootly_terraform.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/rootly_terraform.py -------------------------------------------------------------------------------- /src/openbench/evals/safemt_m2s.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/safemt_m2s.py -------------------------------------------------------------------------------- /src/openbench/evals/scicode.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/scicode.py -------------------------------------------------------------------------------- /src/openbench/evals/sciq.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/sciq.py -------------------------------------------------------------------------------- /src/openbench/evals/sealqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/sealqa.py -------------------------------------------------------------------------------- /src/openbench/evals/simpleqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/simpleqa.py -------------------------------------------------------------------------------- /src/openbench/evals/simpleqa_verified.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/simpleqa_verified.py -------------------------------------------------------------------------------- /src/openbench/evals/smt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/smt.py -------------------------------------------------------------------------------- /src/openbench/evals/social_iqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/social_iqa.py -------------------------------------------------------------------------------- /src/openbench/evals/squad_v2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/squad_v2.py -------------------------------------------------------------------------------- /src/openbench/evals/supergpqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/supergpqa.py -------------------------------------------------------------------------------- /src/openbench/evals/swag.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/swag.py -------------------------------------------------------------------------------- /src/openbench/evals/tau_bench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/tau_bench.py -------------------------------------------------------------------------------- /src/openbench/evals/toxigen.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/toxigen.py -------------------------------------------------------------------------------- /src/openbench/evals/triviaqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/triviaqa.py -------------------------------------------------------------------------------- /src/openbench/evals/truthfulqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/truthfulqa.py -------------------------------------------------------------------------------- /src/openbench/evals/tumlu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/tumlu.py -------------------------------------------------------------------------------- /src/openbench/evals/winogrande.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/winogrande.py -------------------------------------------------------------------------------- /src/openbench/evals/wsc273.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/wsc273.py -------------------------------------------------------------------------------- /src/openbench/evals/xcopa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/xcopa.py -------------------------------------------------------------------------------- /src/openbench/evals/xstorycloze.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/xstorycloze.py -------------------------------------------------------------------------------- /src/openbench/evals/xwinograd.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/evals/xwinograd.py -------------------------------------------------------------------------------- /src/openbench/ifbench/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/openbench/ifbench/instructions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/ifbench/instructions.py -------------------------------------------------------------------------------- /src/openbench/ifbench/instructions_registry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/ifbench/instructions_registry.py -------------------------------------------------------------------------------- /src/openbench/ifbench/instructions_util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/ifbench/instructions_util.py -------------------------------------------------------------------------------- /src/openbench/metrics/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/metrics/__init__.py -------------------------------------------------------------------------------- /src/openbench/metrics/chartqapro.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/metrics/chartqapro.py -------------------------------------------------------------------------------- /src/openbench/metrics/clockbench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/metrics/clockbench.py -------------------------------------------------------------------------------- /src/openbench/metrics/drop.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/metrics/drop.py -------------------------------------------------------------------------------- /src/openbench/metrics/factscore.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/metrics/factscore.py -------------------------------------------------------------------------------- /src/openbench/metrics/graphwalks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/metrics/graphwalks.py -------------------------------------------------------------------------------- /src/openbench/metrics/grouped.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/metrics/grouped.py -------------------------------------------------------------------------------- /src/openbench/metrics/healthbench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/metrics/healthbench.py -------------------------------------------------------------------------------- /src/openbench/metrics/hle.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/metrics/hle.py -------------------------------------------------------------------------------- /src/openbench/metrics/ifeval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/metrics/ifeval.py -------------------------------------------------------------------------------- /src/openbench/metrics/json_schema.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/metrics/json_schema.py -------------------------------------------------------------------------------- /src/openbench/metrics/mgsm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/metrics/mgsm.py -------------------------------------------------------------------------------- /src/openbench/metrics/mmlu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/metrics/mmlu.py -------------------------------------------------------------------------------- /src/openbench/metrics/mmlu_pro.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/metrics/mmlu_pro.py -------------------------------------------------------------------------------- /src/openbench/metrics/mmstar.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/metrics/mmstar.py -------------------------------------------------------------------------------- /src/openbench/metrics/mmvetv2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/metrics/mmvetv2.py -------------------------------------------------------------------------------- /src/openbench/metrics/mrcr.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/metrics/mrcr.py -------------------------------------------------------------------------------- /src/openbench/metrics/multichallenge.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/metrics/multichallenge.py -------------------------------------------------------------------------------- /src/openbench/metrics/pass_hat.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/metrics/pass_hat.py -------------------------------------------------------------------------------- /src/openbench/metrics/political_evenhandedness.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/metrics/political_evenhandedness.py -------------------------------------------------------------------------------- /src/openbench/metrics/polyglotoxicity.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/metrics/polyglotoxicity.py -------------------------------------------------------------------------------- /src/openbench/metrics/rocketscience.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/metrics/rocketscience.py -------------------------------------------------------------------------------- /src/openbench/metrics/scicode.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/metrics/scicode.py -------------------------------------------------------------------------------- /src/openbench/metrics/sealqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/metrics/sealqa.py -------------------------------------------------------------------------------- /src/openbench/metrics/simpleqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/metrics/simpleqa.py -------------------------------------------------------------------------------- /src/openbench/metrics/simpleqa_verified.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/metrics/simpleqa_verified.py -------------------------------------------------------------------------------- /src/openbench/metrics/strong_reject.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/metrics/strong_reject.py -------------------------------------------------------------------------------- /src/openbench/model/__init__.py: -------------------------------------------------------------------------------- 1 | """openbench model providers.""" 2 | -------------------------------------------------------------------------------- /src/openbench/model/_providers/__init__.py: -------------------------------------------------------------------------------- 1 | """openbench custom model providers.""" 2 | -------------------------------------------------------------------------------- /src/openbench/model/_providers/ai21.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/model/_providers/ai21.py -------------------------------------------------------------------------------- /src/openbench/model/_providers/baseten.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/model/_providers/baseten.py -------------------------------------------------------------------------------- /src/openbench/model/_providers/cerebras.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/model/_providers/cerebras.py -------------------------------------------------------------------------------- /src/openbench/model/_providers/cohere.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/model/_providers/cohere.py -------------------------------------------------------------------------------- /src/openbench/model/_providers/crusoe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/model/_providers/crusoe.py -------------------------------------------------------------------------------- /src/openbench/model/_providers/deepinfra.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/model/_providers/deepinfra.py -------------------------------------------------------------------------------- /src/openbench/model/_providers/friendli.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/model/_providers/friendli.py -------------------------------------------------------------------------------- /src/openbench/model/_providers/groq.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/model/_providers/groq.py -------------------------------------------------------------------------------- /src/openbench/model/_providers/helicone.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/model/_providers/helicone.py -------------------------------------------------------------------------------- /src/openbench/model/_providers/huggingface.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/model/_providers/huggingface.py -------------------------------------------------------------------------------- /src/openbench/model/_providers/hyperbolic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/model/_providers/hyperbolic.py -------------------------------------------------------------------------------- /src/openbench/model/_providers/lambda_ai.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/model/_providers/lambda_ai.py -------------------------------------------------------------------------------- /src/openbench/model/_providers/minimax.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/model/_providers/minimax.py -------------------------------------------------------------------------------- /src/openbench/model/_providers/moonshot.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/model/_providers/moonshot.py -------------------------------------------------------------------------------- /src/openbench/model/_providers/nebius.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/model/_providers/nebius.py -------------------------------------------------------------------------------- /src/openbench/model/_providers/nous.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/model/_providers/nous.py -------------------------------------------------------------------------------- /src/openbench/model/_providers/novita.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/model/_providers/novita.py -------------------------------------------------------------------------------- /src/openbench/model/_providers/openrouter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/model/_providers/openrouter.py -------------------------------------------------------------------------------- /src/openbench/model/_providers/parasail.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/model/_providers/parasail.py -------------------------------------------------------------------------------- /src/openbench/model/_providers/reka.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/model/_providers/reka.py -------------------------------------------------------------------------------- /src/openbench/model/_providers/sambanova.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/model/_providers/sambanova.py -------------------------------------------------------------------------------- /src/openbench/model/_providers/siliconflow.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/model/_providers/siliconflow.py -------------------------------------------------------------------------------- /src/openbench/model/_providers/vercel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/model/_providers/vercel.py -------------------------------------------------------------------------------- /src/openbench/model/_providers/vllm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/model/_providers/vllm.py -------------------------------------------------------------------------------- /src/openbench/model/_providers/wandb.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/model/_providers/wandb.py -------------------------------------------------------------------------------- /src/openbench/monkeypatch/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/openbench/monkeypatch/display_results_patch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/monkeypatch/display_results_patch.py -------------------------------------------------------------------------------- /src/openbench/monkeypatch/file_recorder_logfile_patch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/monkeypatch/file_recorder_logfile_patch.py -------------------------------------------------------------------------------- /src/openbench/prompts/__init__.py: -------------------------------------------------------------------------------- 1 | """Prompt templates for OpenBench tasks.""" 2 | -------------------------------------------------------------------------------- /src/openbench/prompts/chartqapro.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/prompts/chartqapro.py -------------------------------------------------------------------------------- /src/openbench/prompts/political_evenhandedness.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/prompts/political_evenhandedness.py -------------------------------------------------------------------------------- /src/openbench/provider_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/provider_config.py -------------------------------------------------------------------------------- /src/openbench/py.typed: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/openbench/scorers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/scorers/__init__.py -------------------------------------------------------------------------------- /src/openbench/scorers/arc_agi.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/scorers/arc_agi.py -------------------------------------------------------------------------------- /src/openbench/scorers/browsecomp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/scorers/browsecomp.py -------------------------------------------------------------------------------- /src/openbench/scorers/chartqapro.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/scorers/chartqapro.py -------------------------------------------------------------------------------- /src/openbench/scorers/clockbench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/scorers/clockbench.py -------------------------------------------------------------------------------- /src/openbench/scorers/deep_research_bench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/scorers/deep_research_bench.py -------------------------------------------------------------------------------- /src/openbench/scorers/detailbench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/scorers/detailbench.py -------------------------------------------------------------------------------- /src/openbench/scorers/drop.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/scorers/drop.py -------------------------------------------------------------------------------- /src/openbench/scorers/exercism.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/scorers/exercism.py -------------------------------------------------------------------------------- /src/openbench/scorers/factscore.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/scorers/factscore.py -------------------------------------------------------------------------------- /src/openbench/scorers/fallback_scorer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/scorers/fallback_scorer.py -------------------------------------------------------------------------------- /src/openbench/scorers/gpt_oss/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/openbench/scorers/gpt_oss/aime.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/scorers/gpt_oss/aime.py -------------------------------------------------------------------------------- /src/openbench/scorers/grade_school_math.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/scorers/grade_school_math.py -------------------------------------------------------------------------------- /src/openbench/scorers/graphwalks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/scorers/graphwalks.py -------------------------------------------------------------------------------- /src/openbench/scorers/healthbench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/scorers/healthbench.py -------------------------------------------------------------------------------- /src/openbench/scorers/hle.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/scorers/hle.py -------------------------------------------------------------------------------- /src/openbench/scorers/humaneval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/scorers/humaneval.py -------------------------------------------------------------------------------- /src/openbench/scorers/ifbench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/scorers/ifbench.py -------------------------------------------------------------------------------- /src/openbench/scorers/ifeval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/scorers/ifeval.py -------------------------------------------------------------------------------- /src/openbench/scorers/json_schema.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/scorers/json_schema.py -------------------------------------------------------------------------------- /src/openbench/scorers/livemcpbench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/scorers/livemcpbench.py -------------------------------------------------------------------------------- /src/openbench/scorers/math.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/scorers/math.py -------------------------------------------------------------------------------- /src/openbench/scorers/mathvista.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/scorers/mathvista.py -------------------------------------------------------------------------------- /src/openbench/scorers/mbpp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/scorers/mbpp.py -------------------------------------------------------------------------------- /src/openbench/scorers/mcq.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/scorers/mcq.py -------------------------------------------------------------------------------- /src/openbench/scorers/mgsm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/scorers/mgsm.py -------------------------------------------------------------------------------- /src/openbench/scorers/mmmu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/scorers/mmmu.py -------------------------------------------------------------------------------- /src/openbench/scorers/mmstar.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/scorers/mmstar.py -------------------------------------------------------------------------------- /src/openbench/scorers/mmvetv2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/scorers/mmvetv2.py -------------------------------------------------------------------------------- /src/openbench/scorers/mockaime.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/scorers/mockaime.py -------------------------------------------------------------------------------- /src/openbench/scorers/mrcr.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/scorers/mrcr.py -------------------------------------------------------------------------------- /src/openbench/scorers/multichallenge.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/scorers/multichallenge.py -------------------------------------------------------------------------------- /src/openbench/scorers/ocrbenchv2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/scorers/ocrbenchv2.py -------------------------------------------------------------------------------- /src/openbench/scorers/open_answer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/scorers/open_answer.py -------------------------------------------------------------------------------- /src/openbench/scorers/political_evenhandedness.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/scorers/political_evenhandedness.py -------------------------------------------------------------------------------- /src/openbench/scorers/polyglotoxicity.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/scorers/polyglotoxicity.py -------------------------------------------------------------------------------- /src/openbench/scorers/qa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/scorers/qa.py -------------------------------------------------------------------------------- /src/openbench/scorers/robust_boxed.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/scorers/robust_boxed.py -------------------------------------------------------------------------------- /src/openbench/scorers/scicode.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/scorers/scicode.py -------------------------------------------------------------------------------- /src/openbench/scorers/score_boxed.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/scorers/score_boxed.py -------------------------------------------------------------------------------- /src/openbench/scorers/score_last_number.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/scorers/score_last_number.py -------------------------------------------------------------------------------- /src/openbench/scorers/sealqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/scorers/sealqa.py -------------------------------------------------------------------------------- /src/openbench/scorers/simpleqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/scorers/simpleqa.py -------------------------------------------------------------------------------- /src/openbench/scorers/simpleqa_verified.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/scorers/simpleqa_verified.py -------------------------------------------------------------------------------- /src/openbench/scorers/smt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/scorers/smt.py -------------------------------------------------------------------------------- /src/openbench/scorers/strong_reject.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/scorers/strong_reject.py -------------------------------------------------------------------------------- /src/openbench/scorers/tau_bench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/scorers/tau_bench.py -------------------------------------------------------------------------------- /src/openbench/scorers/tumlu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/scorers/tumlu.py -------------------------------------------------------------------------------- /src/openbench/solvers/__init__.py: -------------------------------------------------------------------------------- 1 | """Solver for exercism evaluation.""" 2 | -------------------------------------------------------------------------------- /src/openbench/solvers/chartqapro.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/solvers/chartqapro.py -------------------------------------------------------------------------------- /src/openbench/solvers/clockbench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/solvers/clockbench.py -------------------------------------------------------------------------------- /src/openbench/solvers/deep_research_bench/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/solvers/deep_research_bench/__init__.py -------------------------------------------------------------------------------- /src/openbench/solvers/deep_research_bench/deep_research_bench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/solvers/deep_research_bench/deep_research_bench.py -------------------------------------------------------------------------------- /src/openbench/solvers/deep_research_bench/deep_research_bench_fact.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/solvers/deep_research_bench/deep_research_bench_fact.py -------------------------------------------------------------------------------- /src/openbench/solvers/deep_research_bench/deep_research_bench_race.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/solvers/deep_research_bench/deep_research_bench_race.py -------------------------------------------------------------------------------- /src/openbench/solvers/exercism_solver.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/solvers/exercism_solver.py -------------------------------------------------------------------------------- /src/openbench/solvers/jsonschemabench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/solvers/jsonschemabench.py -------------------------------------------------------------------------------- /src/openbench/solvers/mmstar.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/solvers/mmstar.py -------------------------------------------------------------------------------- /src/openbench/solvers/ocrbenchv2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/solvers/ocrbenchv2.py -------------------------------------------------------------------------------- /src/openbench/solvers/political_evenhandedness.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/solvers/political_evenhandedness.py -------------------------------------------------------------------------------- /src/openbench/solvers/scicode.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/solvers/scicode.py -------------------------------------------------------------------------------- /src/openbench/solvers/tau_bench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/solvers/tau_bench.py -------------------------------------------------------------------------------- /src/openbench/tools/livemcpbench/copilot/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/tools/livemcpbench/copilot/__init__.py -------------------------------------------------------------------------------- /src/openbench/tools/livemcpbench/copilot/arg_generation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/tools/livemcpbench/copilot/arg_generation.py -------------------------------------------------------------------------------- /src/openbench/tools/livemcpbench/copilot/matcher.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/tools/livemcpbench/copilot/matcher.py -------------------------------------------------------------------------------- /src/openbench/tools/livemcpbench/copilot/mcp_connection.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/tools/livemcpbench/copilot/mcp_connection.py -------------------------------------------------------------------------------- /src/openbench/tools/livemcpbench/copilot/prepare.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/tools/livemcpbench/copilot/prepare.py -------------------------------------------------------------------------------- /src/openbench/tools/livemcpbench/copilot/router.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/tools/livemcpbench/copilot/router.py -------------------------------------------------------------------------------- /src/openbench/tools/livemcpbench/copilot/schemas.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/tools/livemcpbench/copilot/schemas.py -------------------------------------------------------------------------------- /src/openbench/tools/livemcpbench/copilot/server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/tools/livemcpbench/copilot/server.py -------------------------------------------------------------------------------- /src/openbench/tools/livemcpbench/copilot/toolsource.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/tools/livemcpbench/copilot/toolsource.py -------------------------------------------------------------------------------- /src/openbench/tools/livemcpbench/copilot/upstream_cache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/tools/livemcpbench/copilot/upstream_cache.py -------------------------------------------------------------------------------- /src/openbench/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/utils/__init__.py -------------------------------------------------------------------------------- /src/openbench/utils/arc_parsing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/utils/arc_parsing.py -------------------------------------------------------------------------------- /src/openbench/utils/cli_commands.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/utils/cli_commands.py -------------------------------------------------------------------------------- /src/openbench/utils/deep_research_bench_prompts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/utils/deep_research_bench_prompts.py -------------------------------------------------------------------------------- /src/openbench/utils/docker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/utils/docker.py -------------------------------------------------------------------------------- /src/openbench/utils/factscore_cache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/utils/factscore_cache.py -------------------------------------------------------------------------------- /src/openbench/utils/factscore_wiki.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/utils/factscore_wiki.py -------------------------------------------------------------------------------- /src/openbench/utils/image.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/utils/image.py -------------------------------------------------------------------------------- /src/openbench/utils/imports.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/utils/imports.py -------------------------------------------------------------------------------- /src/openbench/utils/livemcpbench_cache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/utils/livemcpbench_cache.py -------------------------------------------------------------------------------- /src/openbench/utils/mcq.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/utils/mcq.py -------------------------------------------------------------------------------- /src/openbench/utils/metadata.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/utils/metadata.py -------------------------------------------------------------------------------- /src/openbench/utils/text.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/src/openbench/utils/text.py -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | """Tests for the bench package.""" 2 | -------------------------------------------------------------------------------- /tests/_cli/__init__.py: -------------------------------------------------------------------------------- 1 | """Tests for the CLI module.""" 2 | -------------------------------------------------------------------------------- /tests/_cli/test_cache_command.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/tests/_cli/test_cache_command.py -------------------------------------------------------------------------------- /tests/_cli/test_cache_command_functions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/tests/_cli/test_cache_command_functions.py -------------------------------------------------------------------------------- /tests/_cli/test_eval_command.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/tests/_cli/test_eval_command.py -------------------------------------------------------------------------------- /tests/_cli/test_eval_command_functions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/tests/_cli/test_eval_command_functions.py -------------------------------------------------------------------------------- /tests/_cli/test_export_command.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/tests/_cli/test_export_command.py -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/tests/conftest.py -------------------------------------------------------------------------------- /tests/integration/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/integration/test_cli.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/tests/integration/test_cli.py -------------------------------------------------------------------------------- /tests/monkeypatch/test_file_recorder_logfile_patch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/tests/monkeypatch/test_file_recorder_logfile_patch.py -------------------------------------------------------------------------------- /tests/test_cache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/tests/test_cache.py -------------------------------------------------------------------------------- /tests/test_cli_epoch_reducers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/tests/test_cli_epoch_reducers.py -------------------------------------------------------------------------------- /tests/test_epoch_reducer_registration.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/tests/test_epoch_reducer_registration.py -------------------------------------------------------------------------------- /tests/test_groq_provider.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/tests/test_groq_provider.py -------------------------------------------------------------------------------- /tests/test_image_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/tests/test_image_utils.py -------------------------------------------------------------------------------- /tests/test_json_schema_scorer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/tests/test_json_schema_scorer.py -------------------------------------------------------------------------------- /tests/test_open_answer_scorer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/tests/test_open_answer_scorer.py -------------------------------------------------------------------------------- /tests/test_pass_hat_reducer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/tests/test_pass_hat_reducer.py -------------------------------------------------------------------------------- /tests/test_registry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/tests/test_registry.py -------------------------------------------------------------------------------- /tests/test_registry_imports.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/tests/test_registry_imports.py -------------------------------------------------------------------------------- /tests/test_robust_scorers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/tests/test_robust_scorers.py -------------------------------------------------------------------------------- /tests/test_text_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/tests/test_text_utils.py -------------------------------------------------------------------------------- /tests/test_vllm_provider.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/tests/test_vllm_provider.py -------------------------------------------------------------------------------- /tests/utils/test_factscore_download.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/tests/utils/test_factscore_download.py -------------------------------------------------------------------------------- /tests/utils/test_factscore_wiki.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/tests/utils/test_factscore_wiki.py -------------------------------------------------------------------------------- /uv.lock: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/groq/openbench/HEAD/uv.lock --------------------------------------------------------------------------------