├── .github └── workflows │ └── publish.yml ├── .gitignore ├── LICENSE ├── README.md ├── app ├── commits.py ├── probs.py ├── static │ └── styles.css ├── submissions │ ├── app.py │ └── templates │ │ ├── base.html │ │ ├── conversation.html │ │ ├── index.html │ │ ├── matrix.html │ │ └── patch_view.html ├── templates │ ├── base.html │ └── commits.html └── utils.py ├── pyproject.toml ├── src └── gso │ ├── __init__.py │ ├── analysis │ ├── __init__.py │ ├── qualitative │ │ ├── __init__.py │ │ ├── compare_patches.py │ │ ├── hack_detector.py │ │ ├── plot_hack_rates.py │ │ ├── run_detector.py │ │ └── utils.py │ └── quantitative │ │ ├── __init__.py │ │ ├── helpers.py │ │ ├── plot_compute_matrix.py │ │ ├── plot_models.py │ │ ├── plot_opt1_threshold.py │ │ ├── plot_opt_k.py │ │ └── plot_speedups.py │ ├── collect │ ├── README.md │ ├── __init__.py │ ├── analysis │ │ ├── __init__.py │ │ ├── apis.py │ │ ├── commits.py │ │ ├── parser.py │ │ ├── prompt.py │ │ ├── retriever.py │ │ └── utils.py │ ├── build_dataset.py │ ├── execute │ │ ├── __init__.py │ │ ├── evaluate.py │ │ ├── execute.py │ │ ├── helpers.py │ │ ├── phase1.txt │ │ ├── phase2.txt │ │ ├── skymgr.py │ │ └── template.yaml │ ├── generate │ │ ├── __init__.py │ │ ├── args.py │ │ ├── context.py │ │ ├── generate.py │ │ ├── harness.py │ │ ├── helpers.py │ │ ├── oversample.py │ │ └── prompt.py │ ├── pids.py │ ├── scripts │ │ ├── augment_dataset.py │ │ ├── build_manual.py │ │ └── merge_runs.py │ └── utils.py │ ├── constants.py │ ├── data │ ├── __init__.py │ ├── commit.py │ ├── dataset.py │ ├── entities.py │ ├── parsing.py │ ├── perf.py │ ├── problem.py │ └── repo.py │ ├── harness │ ├── README.md │ ├── __init__.py │ ├── environment │ │ ├── __init__.py │ │ ├── docker_build.py │ │ ├── docker_utils.py │ │ ├── dockerfile.py │ │ └── patches.py │ ├── grading │ │ ├── __init__.py │ │ ├── evalscript.py │ │ ├── grade.py │ │ ├── metrics.py │ │ ├── report.py │ │ └── utils.py │ ├── opt_at_k.py │ ├── prepare_images.py │ ├── run_evaluation.py │ ├── scripts │ │ ├── all-gso-instance-images.txt │ │ └── pull_images.sh │ └── utils.py │ ├── logger.py │ └── utils │ ├── io.py │ ├── multiprocess.py │ ├── patch_parser.py │ └── suggest_deps.py └── uv.lock /.github/workflows/publish.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gso-bench/gso/HEAD/.github/workflows/publish.yml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gso-bench/gso/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gso-bench/gso/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gso-bench/gso/HEAD/README.md -------------------------------------------------------------------------------- /app/commits.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gso-bench/gso/HEAD/app/commits.py -------------------------------------------------------------------------------- /app/probs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gso-bench/gso/HEAD/app/probs.py -------------------------------------------------------------------------------- /app/static/styles.css: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gso-bench/gso/HEAD/app/static/styles.css -------------------------------------------------------------------------------- /app/submissions/app.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gso-bench/gso/HEAD/app/submissions/app.py -------------------------------------------------------------------------------- /app/submissions/templates/base.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gso-bench/gso/HEAD/app/submissions/templates/base.html -------------------------------------------------------------------------------- /app/submissions/templates/conversation.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gso-bench/gso/HEAD/app/submissions/templates/conversation.html -------------------------------------------------------------------------------- /app/submissions/templates/index.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gso-bench/gso/HEAD/app/submissions/templates/index.html -------------------------------------------------------------------------------- /app/submissions/templates/matrix.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gso-bench/gso/HEAD/app/submissions/templates/matrix.html -------------------------------------------------------------------------------- /app/submissions/templates/patch_view.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gso-bench/gso/HEAD/app/submissions/templates/patch_view.html -------------------------------------------------------------------------------- /app/templates/base.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gso-bench/gso/HEAD/app/templates/base.html -------------------------------------------------------------------------------- /app/templates/commits.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gso-bench/gso/HEAD/app/templates/commits.html -------------------------------------------------------------------------------- /app/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gso-bench/gso/HEAD/app/utils.py -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gso-bench/gso/HEAD/pyproject.toml -------------------------------------------------------------------------------- /src/gso/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gso-bench/gso/HEAD/src/gso/__init__.py -------------------------------------------------------------------------------- /src/gso/analysis/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/gso/analysis/qualitative/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/gso/analysis/qualitative/compare_patches.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gso-bench/gso/HEAD/src/gso/analysis/qualitative/compare_patches.py -------------------------------------------------------------------------------- /src/gso/analysis/qualitative/hack_detector.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gso-bench/gso/HEAD/src/gso/analysis/qualitative/hack_detector.py -------------------------------------------------------------------------------- /src/gso/analysis/qualitative/plot_hack_rates.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gso-bench/gso/HEAD/src/gso/analysis/qualitative/plot_hack_rates.py -------------------------------------------------------------------------------- /src/gso/analysis/qualitative/run_detector.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gso-bench/gso/HEAD/src/gso/analysis/qualitative/run_detector.py -------------------------------------------------------------------------------- /src/gso/analysis/qualitative/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gso-bench/gso/HEAD/src/gso/analysis/qualitative/utils.py -------------------------------------------------------------------------------- /src/gso/analysis/quantitative/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/gso/analysis/quantitative/helpers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gso-bench/gso/HEAD/src/gso/analysis/quantitative/helpers.py -------------------------------------------------------------------------------- /src/gso/analysis/quantitative/plot_compute_matrix.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gso-bench/gso/HEAD/src/gso/analysis/quantitative/plot_compute_matrix.py -------------------------------------------------------------------------------- /src/gso/analysis/quantitative/plot_models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gso-bench/gso/HEAD/src/gso/analysis/quantitative/plot_models.py -------------------------------------------------------------------------------- /src/gso/analysis/quantitative/plot_opt1_threshold.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gso-bench/gso/HEAD/src/gso/analysis/quantitative/plot_opt1_threshold.py -------------------------------------------------------------------------------- /src/gso/analysis/quantitative/plot_opt_k.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gso-bench/gso/HEAD/src/gso/analysis/quantitative/plot_opt_k.py -------------------------------------------------------------------------------- /src/gso/analysis/quantitative/plot_speedups.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gso-bench/gso/HEAD/src/gso/analysis/quantitative/plot_speedups.py -------------------------------------------------------------------------------- /src/gso/collect/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gso-bench/gso/HEAD/src/gso/collect/README.md -------------------------------------------------------------------------------- /src/gso/collect/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/gso/collect/analysis/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/gso/collect/analysis/apis.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gso-bench/gso/HEAD/src/gso/collect/analysis/apis.py -------------------------------------------------------------------------------- /src/gso/collect/analysis/commits.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gso-bench/gso/HEAD/src/gso/collect/analysis/commits.py -------------------------------------------------------------------------------- /src/gso/collect/analysis/parser.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gso-bench/gso/HEAD/src/gso/collect/analysis/parser.py -------------------------------------------------------------------------------- /src/gso/collect/analysis/prompt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gso-bench/gso/HEAD/src/gso/collect/analysis/prompt.py -------------------------------------------------------------------------------- /src/gso/collect/analysis/retriever.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gso-bench/gso/HEAD/src/gso/collect/analysis/retriever.py -------------------------------------------------------------------------------- /src/gso/collect/analysis/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gso-bench/gso/HEAD/src/gso/collect/analysis/utils.py -------------------------------------------------------------------------------- /src/gso/collect/build_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gso-bench/gso/HEAD/src/gso/collect/build_dataset.py -------------------------------------------------------------------------------- /src/gso/collect/execute/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/gso/collect/execute/evaluate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gso-bench/gso/HEAD/src/gso/collect/execute/evaluate.py -------------------------------------------------------------------------------- /src/gso/collect/execute/execute.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gso-bench/gso/HEAD/src/gso/collect/execute/execute.py -------------------------------------------------------------------------------- /src/gso/collect/execute/helpers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gso-bench/gso/HEAD/src/gso/collect/execute/helpers.py -------------------------------------------------------------------------------- /src/gso/collect/execute/phase1.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gso-bench/gso/HEAD/src/gso/collect/execute/phase1.txt -------------------------------------------------------------------------------- /src/gso/collect/execute/phase2.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gso-bench/gso/HEAD/src/gso/collect/execute/phase2.txt -------------------------------------------------------------------------------- /src/gso/collect/execute/skymgr.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gso-bench/gso/HEAD/src/gso/collect/execute/skymgr.py -------------------------------------------------------------------------------- /src/gso/collect/execute/template.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gso-bench/gso/HEAD/src/gso/collect/execute/template.yaml -------------------------------------------------------------------------------- /src/gso/collect/generate/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/gso/collect/generate/args.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gso-bench/gso/HEAD/src/gso/collect/generate/args.py -------------------------------------------------------------------------------- /src/gso/collect/generate/context.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gso-bench/gso/HEAD/src/gso/collect/generate/context.py -------------------------------------------------------------------------------- /src/gso/collect/generate/generate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gso-bench/gso/HEAD/src/gso/collect/generate/generate.py -------------------------------------------------------------------------------- /src/gso/collect/generate/harness.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gso-bench/gso/HEAD/src/gso/collect/generate/harness.py -------------------------------------------------------------------------------- /src/gso/collect/generate/helpers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gso-bench/gso/HEAD/src/gso/collect/generate/helpers.py -------------------------------------------------------------------------------- /src/gso/collect/generate/oversample.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gso-bench/gso/HEAD/src/gso/collect/generate/oversample.py -------------------------------------------------------------------------------- /src/gso/collect/generate/prompt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gso-bench/gso/HEAD/src/gso/collect/generate/prompt.py -------------------------------------------------------------------------------- /src/gso/collect/pids.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gso-bench/gso/HEAD/src/gso/collect/pids.py -------------------------------------------------------------------------------- /src/gso/collect/scripts/augment_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gso-bench/gso/HEAD/src/gso/collect/scripts/augment_dataset.py -------------------------------------------------------------------------------- /src/gso/collect/scripts/build_manual.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gso-bench/gso/HEAD/src/gso/collect/scripts/build_manual.py -------------------------------------------------------------------------------- /src/gso/collect/scripts/merge_runs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gso-bench/gso/HEAD/src/gso/collect/scripts/merge_runs.py -------------------------------------------------------------------------------- /src/gso/collect/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gso-bench/gso/HEAD/src/gso/collect/utils.py -------------------------------------------------------------------------------- /src/gso/constants.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gso-bench/gso/HEAD/src/gso/constants.py -------------------------------------------------------------------------------- /src/gso/data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gso-bench/gso/HEAD/src/gso/data/__init__.py -------------------------------------------------------------------------------- /src/gso/data/commit.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gso-bench/gso/HEAD/src/gso/data/commit.py -------------------------------------------------------------------------------- /src/gso/data/dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gso-bench/gso/HEAD/src/gso/data/dataset.py -------------------------------------------------------------------------------- /src/gso/data/entities.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gso-bench/gso/HEAD/src/gso/data/entities.py -------------------------------------------------------------------------------- /src/gso/data/parsing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gso-bench/gso/HEAD/src/gso/data/parsing.py -------------------------------------------------------------------------------- /src/gso/data/perf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gso-bench/gso/HEAD/src/gso/data/perf.py -------------------------------------------------------------------------------- /src/gso/data/problem.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gso-bench/gso/HEAD/src/gso/data/problem.py -------------------------------------------------------------------------------- /src/gso/data/repo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gso-bench/gso/HEAD/src/gso/data/repo.py -------------------------------------------------------------------------------- /src/gso/harness/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gso-bench/gso/HEAD/src/gso/harness/README.md -------------------------------------------------------------------------------- /src/gso/harness/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/gso/harness/environment/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/gso/harness/environment/docker_build.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gso-bench/gso/HEAD/src/gso/harness/environment/docker_build.py -------------------------------------------------------------------------------- /src/gso/harness/environment/docker_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gso-bench/gso/HEAD/src/gso/harness/environment/docker_utils.py -------------------------------------------------------------------------------- /src/gso/harness/environment/dockerfile.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gso-bench/gso/HEAD/src/gso/harness/environment/dockerfile.py -------------------------------------------------------------------------------- /src/gso/harness/environment/patches.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gso-bench/gso/HEAD/src/gso/harness/environment/patches.py -------------------------------------------------------------------------------- /src/gso/harness/grading/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/gso/harness/grading/evalscript.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gso-bench/gso/HEAD/src/gso/harness/grading/evalscript.py -------------------------------------------------------------------------------- /src/gso/harness/grading/grade.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gso-bench/gso/HEAD/src/gso/harness/grading/grade.py -------------------------------------------------------------------------------- /src/gso/harness/grading/metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gso-bench/gso/HEAD/src/gso/harness/grading/metrics.py -------------------------------------------------------------------------------- /src/gso/harness/grading/report.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gso-bench/gso/HEAD/src/gso/harness/grading/report.py -------------------------------------------------------------------------------- /src/gso/harness/grading/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gso-bench/gso/HEAD/src/gso/harness/grading/utils.py -------------------------------------------------------------------------------- /src/gso/harness/opt_at_k.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gso-bench/gso/HEAD/src/gso/harness/opt_at_k.py -------------------------------------------------------------------------------- /src/gso/harness/prepare_images.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gso-bench/gso/HEAD/src/gso/harness/prepare_images.py -------------------------------------------------------------------------------- /src/gso/harness/run_evaluation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gso-bench/gso/HEAD/src/gso/harness/run_evaluation.py -------------------------------------------------------------------------------- /src/gso/harness/scripts/all-gso-instance-images.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gso-bench/gso/HEAD/src/gso/harness/scripts/all-gso-instance-images.txt -------------------------------------------------------------------------------- /src/gso/harness/scripts/pull_images.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gso-bench/gso/HEAD/src/gso/harness/scripts/pull_images.sh -------------------------------------------------------------------------------- /src/gso/harness/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gso-bench/gso/HEAD/src/gso/harness/utils.py -------------------------------------------------------------------------------- /src/gso/logger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gso-bench/gso/HEAD/src/gso/logger.py -------------------------------------------------------------------------------- /src/gso/utils/io.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gso-bench/gso/HEAD/src/gso/utils/io.py -------------------------------------------------------------------------------- /src/gso/utils/multiprocess.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gso-bench/gso/HEAD/src/gso/utils/multiprocess.py -------------------------------------------------------------------------------- /src/gso/utils/patch_parser.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gso-bench/gso/HEAD/src/gso/utils/patch_parser.py -------------------------------------------------------------------------------- /src/gso/utils/suggest_deps.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gso-bench/gso/HEAD/src/gso/utils/suggest_deps.py -------------------------------------------------------------------------------- /uv.lock: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gso-bench/gso/HEAD/uv.lock --------------------------------------------------------------------------------