├── .github
    ├── CODE_OF_CONDUCT.md
    ├── dependabot.yml
    ├── mlc_config.json
    └── workflows
    │   ├── build-docs.yaml
    │   ├── check-links.yaml
    │   └── pytest.yaml
├── .gitignore
├── .pre-commit-config.yaml
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── agent
    ├── _gen_trajs.sh
    ├── _infer_model.sh
    ├── _traj_mgr.sh
    ├── swesmith_gen_claude.yaml
    ├── swesmith_gen_gpt.yaml
    ├── swesmith_infer.yaml
    └── swesmith_install_repo.yaml
├── configs
    ├── bug_gen
    │   ├── README.md
    │   ├── class_basic.yml
    │   ├── func_fun.yml
    │   ├── lm_modify.yml
    │   └── lm_rewrite.yml
    ├── install_repo.sh
    ├── issue_gen
    │   ├── ig_tests.yaml
    │   ├── ig_v1.yaml
    │   └── ig_v2.yaml
    └── train
    │   ├── dpo_qwen_32b.yml
    │   ├── dpo_qwen_7b.yml
    │   ├── full_ft_qwen_32b.yml
    │   └── full_ft_qwen_7b.yml
├── docs
    ├── CNAME
    ├── assets
    │   ├── banner.png
    │   ├── bug_gen_overview.png
    │   ├── combine.png
    │   ├── home
    │   │   ├── collection.png
    │   │   ├── leaderboard.png
    │   │   └── swesmith.png
    │   ├── lm_generate.png
    │   ├── overview-light.png
    │   ├── overview.png
    │   ├── paper.pdf.html
    │   ├── pr_mirror.png
    │   ├── procedural.png
    │   ├── sbcli_logo.svg
    │   ├── sbcli_logo_text_below.svg
    │   ├── sweagent_logo.svg
    │   ├── sweagent_logo_text_below.svg
    │   ├── swebench_logo.png
    │   ├── swebench_logo_text_below.svg
    │   ├── swerex_logo.svg
    │   ├── swerex_logo_text_below.svg
    │   ├── swesmith_logo.png
    │   └── swesmith_logo_text_below.svg
    ├── blog.html
    ├── css
    │   ├── TiltNeon.ttf
    │   ├── bubbles.css
    │   ├── carousel.css
    │   ├── custom.css
    │   ├── home.css
    │   └── mkdocstrings.css
    ├── docs
    │   └── index.md
    ├── getting_started
    │   ├── assets.md
    │   ├── index.md
    │   ├── installation.md
    │   └── quickstart.md
    ├── guides
    │   ├── create_instances.md
    │   ├── difficulty_rating.md
    │   ├── env_construction.md
    │   ├── harnesses.md
    │   ├── index.md
    │   ├── issue_gen.md
    │   └── train_swe_agent.md
    ├── index.html
    └── overrides
    │   └── main.html
├── mkdocs.yml
├── pyproject.toml
├── scripts
    ├── calculate_cost.py
    ├── cheatsheet.sh
    ├── train.get_difficulties.sh
    ├── train.run_ft_torchtune.sh
    ├── train.run_ft_unsloth.sh
    └── train.serve_sglang.sh
├── swesmith
    ├── __init__.py
    ├── bug_gen
    │   ├── collect_patches.py
    │   ├── combine
    │   │   ├── same_file.py
    │   │   └── same_module.py
    │   ├── criteria.py
    │   ├── get_cost.py
    │   ├── llm
    │   │   ├── modify.py
    │   │   ├── rewrite.py
    │   │   └── utils.py
    │   ├── mirror
    │   │   ├── generate.py
    │   │   └── prompts.py
    │   ├── procedural
    │   │   ├── __init__.py
    │   │   ├── classes.py
    │   │   ├── control_flow.py
    │   │   ├── generate.py
    │   │   ├── operations.py
    │   │   └── remove.py
    │   └── utils.py
    ├── build_repo
    │   ├── __init__.py
    │   ├── create_images.py
    │   ├── download_images.py
    │   └── try_install.py
    ├── constants.py
    ├── harness
    │   ├── __init__.py
    │   ├── eval.py
    │   ├── gather.py
    │   ├── grading.py
    │   ├── log_parsers.py
    │   ├── utils.py
    │   └── valid.py
    ├── issue_gen
    │   ├── generate.py
    │   ├── get_from_pr.py
    │   ├── get_from_tests.py
    │   ├── get_static.py
    │   ├── utils.py
    │   └── viewer.py
    ├── train
    │   ├── README.md
    │   ├── difficulty_rater
    │   │   ├── create_datasets.py
    │   │   ├── get_difficulties.py
    │   │   └── test_rater.py
    │   ├── download_checkpoint.py
    │   ├── run
    │   │   ├── ft_torchtune.py
    │   │   └── ft_unsloth.py
    │   ├── serve_sglang.py
    │   └── traj_mgr
    │   │   ├── clean_trajs.py
    │   │   ├── combine_trajs.py
    │   │   ├── transform_to_ft.py
    │   │   ├── transform_to_ft_list.py
    │   │   └── utils.py
    └── utils.py
└── tests
    ├── __init__.py
    ├── bug_gen
        ├── llm
        │   └── test_utils_llm.py
        ├── procedural
        │   ├── test_classes.py
        │   ├── test_control_flow.py
        │   ├── test_operations.py
        │   └── test_remove.py
        └── test_utils.py
    ├── conftest.py
    └── test_utils.py


/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 | updates:
3 |   # Maintain dependencies for GitHub Actions
4 |   - package-ecosystem: "github-actions"
5 |     directory: "/"
6 |     schedule:
7 |       interval: "weekly"
8 | 


--------------------------------------------------------------------------------
/.github/mlc_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "ignorePatterns": [
 3 |     {
 4 |       "pattern": "https://github.com/?.*"
 5 |     },
 6 |     {
 7 |       "pattern": "https://platform.openai.com/docs/.*"
 8 |     },
 9 |     {
10 |       "pattern": "https://docs.anthropic.com/.*"
11 |     },
12 |     {
13 |       "pattern": ".*localhost.*"
14 |     },
15 |     {
16 |       "pattern": "https?://(.*\\.)?twitter\\.com/.*"
17 |     },
18 |     {
19 |       "pattern": "https?://(.*\\.)?x\\.com/.*"
20 |     }
21 |   ]
22 | }
23 | 


--------------------------------------------------------------------------------
/.github/workflows/build-docs.yaml:
--------------------------------------------------------------------------------
 1 | name: build-docs
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 |       - "build-docs-*"
 8 |   pull_request:
 9 |     branches:
10 |       - main
11 | permissions:
12 |   contents: write
13 | jobs:
14 |   deploy:
15 |     runs-on: ubuntu-latest
16 |     steps:
17 |       - uses: actions/checkout@v4
18 |         with:
19 |           fetch-depth: 0
20 |       - name: Configure Git Credentials
21 |         run: |
22 |           git config user.name github-actions[bot]
23 |           git config user.email 41898282+github-actions[bot]@users.noreply.github.com
24 |       - uses: actions/setup-python@v5
25 |         with:
26 |           python-version: 3.x
27 |       - run: echo "cache_id=$(date --utc '+%V')" >> $GITHUB_ENV
28 |       - uses: actions/cache@v4
29 |         with:
30 |           key: mkdocs-material-${{ env.cache_id }}
31 |           path: .cache
32 |           restore-keys: |
33 |             mkdocs-material-
34 |       - name: Install uv
35 |         run: |
36 |           curl -LsSf https://astral.sh/uv/install.sh | sh
37 |       - run: uv pip install --python ${Python_ROOT_DIR} '.[docs]'
38 |       - name: Build Documentation
39 |         if: github.ref != 'refs/heads/main'
40 |         run: mkdocs build
41 |       - name: Build + Deploy Documentation
42 |         if: github.ref == 'refs/heads/main'
43 |         run: mkdocs gh-deploy --force
44 | 


--------------------------------------------------------------------------------
/.github/workflows/check-links.yaml:
--------------------------------------------------------------------------------
 1 | name: Check Markdown links
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |   push:
 6 |   pull_request:
 7 |   schedule:
 8 |     - cron: "0 0 1 * *"
 9 | 
10 | jobs:
11 |   markdown-link-check:
12 |     runs-on: ubuntu-latest
13 |     steps:
14 |       - uses: actions/checkout@master
15 |       - uses: gaurav-nelson/github-action-markdown-link-check@v1
16 |         with:
17 |           config-file: '.github/mlc_config.json'
18 | 


--------------------------------------------------------------------------------
/.github/workflows/pytest.yaml:
--------------------------------------------------------------------------------
 1 | name: Pytest
 2 | 
 3 | env:
 4 |   GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
 5 | 
 6 | on:
 7 |   push:
 8 |     branches:
 9 |       - main
10 |       - "test-ci/**"
11 |     paths-ignore:
12 |       - 'docs/**'
13 |       - 'README.md'
14 |       - 'mkdocs.yml'
15 |   pull_request:
16 |     branches:
17 |       - main
18 |     paths-ignore:
19 |       - 'docs/**'
20 |       - 'README.md'
21 |       - 'mkdocs.yml'
22 | 
23 | # Not possible to test windows capability:
24 | # https://github.com/orgs/community/discussions/25491
25 | jobs:
26 |   test:
27 |     runs-on: ubuntu-latest
28 |     defaults:
29 |       run:
30 |         shell: bash -l {0}
31 |     steps:
32 |       - name: Checkout code
33 |         uses: actions/checkout@v4
34 |       - uses: actions/setup-python@v5
35 |         with:
36 |           python-version: '3.10'
37 |       - name: Install uv
38 |         run: |
39 |           curl -LsSf https://astral.sh/uv/install.sh | sh
40 |       - name: Install dependencies
41 |         run: |
42 |             uv pip install --python ${Python_ROOT_DIR} '.'
43 |       - name: Install dev dependencies
44 |         run: |
45 |             uv pip install --python ${Python_ROOT_DIR} pytest pytest-cov
46 |       - name: Run pytest
47 |         uses: sjvrijn/pytest-last-failed@v2
48 |         with:
49 |           pytest-args: '--exitfirst --cov'
50 |       - name: Explicitly convert coverage to xml
51 |         run: coverage xml
52 |       - name: Upload coverage reports to Codecov
53 |         uses: codecov/codecov-action@v5.4.3
54 |         with:
55 |           token: ${{ secrets.CODECOV_TOKEN }}
56 |           slug: SWE-bench/SWE-smith


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Files
  2 | .env
  3 | .DS_Store
  4 | .api_key
  5 | 
  6 | # Folders
  7 | hidden/
  8 | logs/
  9 | notebooks/
 10 | 
 11 | # General python
 12 | 
 13 | # Created by https://www.toptal.com/developers/gitignore/api/python
 14 | # Edit at https://www.toptal.com/developers/gitignore?templates=python
 15 | 
 16 | ### Python ###
 17 | # Byte-compiled / optimized / DLL files
 18 | __pycache__/
 19 | *.py[cod]
 20 | *$py.class
 21 | 
 22 | # C extensions
 23 | *.so
 24 | 
 25 | # Distribution / packaging
 26 | .Python
 27 | build/
 28 | develop-eggs/
 29 | dist/
 30 | downloads/
 31 | eggs/
 32 | .eggs/
 33 | lib/
 34 | lib64/
 35 | parts/
 36 | sdist/
 37 | var/
 38 | wheels/
 39 | share/python-wheels/
 40 | *.egg-info/
 41 | .installed.cfg
 42 | *.egg
 43 | MANIFEST
 44 | 
 45 | # PyInstaller
 46 | #  Usually these files are written by a python script from a template
 47 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 48 | *.manifest
 49 | *.spec
 50 | 
 51 | # Installer logs
 52 | pip-log.txt
 53 | pip-delete-this-directory.txt
 54 | 
 55 | # Unit test / coverage reports
 56 | htmlcov/
 57 | .tox/
 58 | .nox/
 59 | .coverage
 60 | .coverage.*
 61 | .cache
 62 | nosetests.xml
 63 | coverage.xml
 64 | *.cover
 65 | *.py,cover
 66 | .hypothesis/
 67 | .pytest_cache/
 68 | cover/
 69 | 
 70 | # Translations
 71 | *.mo
 72 | *.pot
 73 | 
 74 | # Django stuff:
 75 | *.log
 76 | local_settings.py
 77 | db.sqlite3
 78 | db.sqlite3-journal
 79 | 
 80 | # Flask stuff:
 81 | instance/
 82 | .webassets-cache
 83 | 
 84 | # Scrapy stuff:
 85 | .scrapy
 86 | 
 87 | # Sphinx documentation
 88 | docs/_build/
 89 | 
 90 | # PyBuilder
 91 | .pybuilder/
 92 | target/
 93 | 
 94 | # Jupyter Notebook
 95 | .ipynb_checkpoints
 96 | 
 97 | # IPython
 98 | profile_default/
 99 | ipython_config.py
100 | 
101 | # pyenv
102 | #   For a library or package, you might want to ignore these files since the code is
103 | #   intended to run in multiple environments; otherwise, check them in:
104 | # .python-version
105 | 
106 | # pipenv
107 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
108 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
109 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
110 | #   install all needed dependencies.
111 | #Pipfile.lock
112 | 
113 | # poetry
114 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
115 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
116 | #   commonly ignored for libraries.
117 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
118 | #poetry.lock
119 | 
120 | # pdm
121 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
122 | #pdm.lock
123 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
124 | #   in version control.
125 | #   https://pdm.fming.dev/#use-with-ide
126 | .pdm.toml
127 | 
128 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
129 | __pypackages__/
130 | 
131 | # Celery stuff
132 | celerybeat-schedule
133 | celerybeat.pid
134 | 
135 | # SageMath parsed files
136 | *.sage.py
137 | 
138 | # Environments
139 | .env
140 | .venv
141 | env/
142 | venv/
143 | ENV/
144 | env.bak/
145 | venv.bak/
146 | 
147 | # Spyder project settings
148 | .spyderproject
149 | .spyproject
150 | 
151 | # Rope project settings
152 | .ropeproject
153 | 
154 | # mkdocs documentation
155 | /site
156 | 
157 | # mypy
158 | .mypy_cache/
159 | .dmypy.json
160 | dmypy.json
161 | 
162 | # Pyre type checker
163 | .pyre/
164 | 
165 | # pytype static type analyzer
166 | .pytype/
167 | 
168 | # Cython debug symbols
169 | cython_debug/
170 | 
171 | # PyCharm
172 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
173 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
174 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
175 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
176 | #.idea/
177 | 
178 | ### Python Patch ###
179 | # Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration
180 | poetry.toml
181 | 
182 | # ruff
183 | .ruff_cache/
184 | 
185 | # LSP config files
186 | pyrightconfig.json
187 | 
188 | # End of https://www.toptal.com/developers/gitignore/api/python
189 | 
190 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 | - repo: https://github.com/astral-sh/ruff-pre-commit
 3 |   # Ruff version
 4 |   rev: v0.11.12
 5 |   hooks:
 6 |     # Run the linter
 7 |     - id: ruff
 8 |       # Only fix newly changed lines
 9 |       args: [ --fix, --diff ]
10 |     # Run the formatter
11 |     - id: ruff-format
12 |       args: [ --exclude=notebooks ]


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # Contributing
2 | 
3 | Thanks for your interest in contributing to SWE-smith! There's several ways to contribute.
4 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 John Yang
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | <p align="center">
 2 |   <a href="https://swesmith.com/">
 3 |     <img src="docs/assets/banner.png" style="height: 10em" alt="Kawhi the SWE-smith" />
 4 |   </a>
 5 | </p>
 6 | 
 7 | <br>
 8 | 
 9 | <div align="center">
10 | <a href="https://www.python.org/">
11 |   <img alt="Build" src="https://img.shields.io/badge/Python-3.10+-1f425f.svg?color=purple">
12 | </a>
13 | <a href="https://copyright.princeton.edu/policy">
14 |   <img alt="License" src="https://img.shields.io/badge/License-MIT-blue">
15 | </a>
16 | <a href="https://badge.fury.io/py/swesmith">
17 |   <img src="https://badge.fury.io/py/swesmith.svg">
18 | </a>
19 | <a href="https://arxiv.org/abs/2504.21798">
20 |   <img src="https://img.shields.io/badge/arXiv-2504.21798-b31b1b.svg">
21 | </a>
22 | </div>
23 | 
24 | <hr />
25 | 
26 | SWE-smith is a toolkit for training software engineering (SWE) agents. With SWE-smith, you can:
27 | * Create an *unlimited* number of [SWE-bench](https://github.com/SWE-bench/SWE-bench) style task instances for any Python repository.
28 | * *Generate trajectories* of [SWE-agent](https://github.com/SWE-agent/SWE-agent) solving those task instances.
29 | * *Train local LMs* on these trajectories to improve their software engineering capabilities ([SWE-agent-LM-32B](https://huggingface.co/SWE-bench/SWE-agent-LM-32B)).
30 | 
31 | ## 🚀 Get Started
32 | Check out the [documentation](https://swesmith.com/getting_started/) for a complete guide on how to use SWE-smith, including how to
33 | * [Install](https://swesmith.com/getting_started/installation/) the repository locally or as a PyPI package.
34 | * [Create Task Instances](https://swesmith.com/guides/create_instances/) for any Python repository with SWE-smith.
35 | * Use your task instance to [train your own SWE-agents](https://swesmith.com/guides/train_swe_agent/)
36 | 
37 | ## 🏎️ Quick Start
38 | Install the repo:
39 | ```bash
40 | git clone https://github.com/SWE-bench/SWE-smith
41 | cd SWE-smith
42 | conda create -n smith python=3.10;
43 | conda activate smith;
44 | pip install -e .
45 | ```
46 | 
47 | Then, check out `scripts/cheatsheet.sh` for scripts to (1) create execution environments, (2) create task instances, and (3) train SWE-agents.
48 | 
49 | > [!TIP]
50 | > SWE-smith requires Docker to create execution environments. SWE-smith was developed and tested on Ubuntu 22.04.4 LTS.
51 | > We do *not* plan on supporting Windows or MacOS.
52 | 
53 | ## 💿 Resources
54 | In addition to this toolkit, we've also provided several artifacts on the [SWE-bench HuggingFace](https://huggingface.co/SWE-bench), including:
55 | * [50k Python Task Instances](https://huggingface.co/datasets/SWE-bench/SWE-smith), created using SWE-smith.
56 | * [SWE-agent-LM-32B](https://huggingface.co/SWE-bench/SWE-agent-LM-32B), trained using SWE-smith. Achieves **41.6%** pass@1 on [SWE-bench Verified](https://huggingface.co/datasets/SWE-bench/SWE-bench_Verified)!
57 | * [5k Trajectories](https://huggingface.co/datasets/SWE-bench/SWE-smith-trajectories) that SWE-agent-LM-32B was trained on.
58 | 
59 | And there's more coming!
60 | 
61 | ## 💫 Contributions
62 | Excited about SWE-smith? We're actively working on several follow ups, and love meaningful collaborations! What we're thinking about...
63 | * Make SWE-smith work for non-Python languages
64 | * New bug generation techniques
65 | * Train SWE-agents with more trajectories and new methods
66 | 
67 | Check out the [Contributing Guide](CONTRIBUTING.md) for more.
68 | 
69 | Contact Person: [John Yang](https://john-b-yang.github.io/), [Kilian Lieret](https://lieret.net)
70 | (Email: [johnby@stanford.edu](mailto:johnby@stanford.edu))
71 | 
72 | ## 🪪 License
73 | MIT. Check `LICENSE` for more information.
74 | 
75 | ## ✍️ Citation
76 | 
77 | ```bibtex
78 | @misc{yang2025swesmith,
79 |   title={SWE-smith: Scaling Data for Software Engineering Agents}, 
80 |   author={John Yang and Kilian Leret and Carlos E. Jimenez and Alexander Wettig and Kabir Khandpur and Yanzhe Zhang and Binyuan Hui and Ofir Press and Ludwig Schmidt and Diyi Yang},
81 |   year={2025},
82 |   eprint={2504.21798},
83 |   archivePrefix={arXiv},
84 |   primaryClass={cs.SE},
85 |   url={https://arxiv.org/abs/2504.21798}, 
86 | }
87 | ```
88 | 
89 | ## 📕 Related Works
90 | <div align="center">
91 |   <a href="https://github.com/SWE-bench/SWE-bench"><img src="docs/assets/swebench_logo_text_below.svg" alt="SWE-bench" height="120px"></a>
92 |   &nbsp;&nbsp;
93 |   <a href="https://github.com/SWE-agent/SWE-agent"><img src="docs/assets/sweagent_logo_text_below.svg" alt="SWE-agent" height="120px"></a>
94 |   &nbsp;&nbsp;
95 |   <a href="https://github.com/SWE-agent/SWE-ReX"><img src="docs/assets/swerex_logo_text_below.svg" alt="SWE-ReX" height="120px"></a>
96 |   &nbsp;&nbsp;
97 |   <a href="https://github.com/SWE-bench/sb-cli"><img src="docs/assets/sbcli_logo_text_below.svg" alt="sb-cli" height="120px"></a>
98 | </div>
99 | 


--------------------------------------------------------------------------------
/agent/_gen_trajs.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | sweagent run-batch --num_workers 20 \
 4 |     --instances.deployment.docker_args=--memory=10g \
 5 |     --config agent/swesmith_gen_claude.yaml \
 6 |     --instances.path /home/john-b-yang/swe-smith/logs/experiments/exp8__ig_orig.json \
 7 |     --output_dir trajectories/john-b-yang/swesmith_gen__claude-3.5__t-0.00_p-1.00__c.2.00__exp8__ig_orig_run2 \
 8 |     --random_delay_multiplier=1 \
 9 |     --agent.model.temperature 0.0
10 | 
11 | # Remember to set CLAUDE_API_KEY_ROTATION=key1:::key2:::key3
12 | 


--------------------------------------------------------------------------------
/agent/_infer_model.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | sweagent run-batch --config agent/swesmith_infer.yaml \
4 | 	--instances.deployment.docker_args=--memory=10g \
5 | 	--agent.model.api_base https://svt25nwvnpipwz.r20.modal.host/v1 \
6 | 	--random_delay_multiplier=1 \
7 | 	--output_dir trajectories/john-b-yang/swesmith.ablation.bug.lm_reimplement_500
8 | 


--------------------------------------------------------------------------------
/agent/_traj_mgr.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | python -m swesmith.train.traj_mgr.clean_trajs trajectories/
4 | 
5 | python -m swesmith.train.traj_mgr.combine_trajs
6 | 
7 | python -m swesmith.train.traj_mgr.transform_to_ft


--------------------------------------------------------------------------------
/agent/swesmith_gen_claude.yaml:
--------------------------------------------------------------------------------
 1 | # Heavily based on https://github.com/SWE-agent/SWE-agent/blob/main/config/anthropic_filemap.yaml
 2 | instances:
 3 |   type: swesmith
 4 |   shuffle: true
 5 | agent:
 6 |   templates:
 7 |     system_template: |-
 8 |       You are a helpful assistant that can interact with a computer to solve tasks.
 9 |     instance_template: |-
10 |       <uploaded_files>
11 |       {{working_dir}}
12 |       </uploaded_files>
13 |       I've uploaded a python code repository in the directory {{working_dir}}. Consider the following PR description:
14 | 
15 |       <pr_description>
16 |       {{problem_statement}}
17 |       </pr_description>
18 | 
19 |       Can you help me implement the necessary changes to the repository so that the requirements specified in the <pr_description> are met?
20 |       I've already taken care of all changes to any of the test files described in the <pr_description>. This means you DON'T have to modify the testing logic or any of the tests in any way!
21 |       Your task is to make the minimal changes to non-tests files in the {{working_dir}} directory to ensure the <pr_description> is satisfied.
22 |       Follow these steps to resolve the issue:
23 |       1. As a first step, it might be a good idea to find and read code relevant to the <pr_description>
24 |       2. Create a script to reproduce the error and execute it with `python <filename.py>` using the bash tool, to confirm the error
25 |       3. Edit the source code of the repo to resolve the issue
26 |       4. Rerun your reproduce script and confirm that the error is fixed!
27 |       5. Think about edgecases and make sure your fix handles them as well
28 |       Your thinking should be thorough and so it's fine if it's very long.
29 |     next_step_template: |-
30 |       OBSERVATION:
31 |       {{observation}}
32 |     next_step_no_output_template: |-
33 |       Your command ran successfully and did not produce any output.
34 |   tools:
35 |     bundles:
36 |       - path: tools/registry
37 |       - path: tools/edit_anthropic
38 |       - path: tools/review_on_submit_m
39 |     registry_variables:
40 |       USE_FILEMAP: 'true'
41 |       SUBMIT_REVIEW_MESSAGES:
42 |         - |
43 |           Thank you for your work on this issue. Please carefully follow the steps below to help review your changes.
44 | 
45 |           1. If you made any changes to your code after running the reproduction script, please run the reproduction script again.
46 |             If the reproduction script is failing, please revisit your changes and make sure they are correct.
47 |             If you have already removed your reproduction script, please ignore this step.
48 |           2. Remove your reproduction script (if you haven't done so already).
49 |           3. If you have modified any TEST files, please revert them to the state they had before you started fixing the issue.
50 |             You can do this with `git checkout -- /path/to/test/file.py`. Use below <diff> to find the files you need to revert.
51 |           4. Run the submit command again to confirm.
52 | 
53 |           Here is a list of all of your changes:
54 | 
55 |           <diff>
56 |           {{diff}}
57 |           </diff>
58 |     enable_bash_tool: true
59 |     parse_function:
60 |       type: function_calling
61 |     execution_timeout: 300
62 |   history_processors:
63 |     - type: cache_control
64 |       last_n_messages: 2
65 |   model:
66 |     # name: claude-3-5-sonnet-20241022
67 |     name: claude-3-7-sonnet-20250219
68 |     max_output_tokens: 64000
69 |     api_key: $CLAUDE_API_KEY_ROTATION
70 |     per_instance_cost_limit: 2.
71 |     per_instance_call_limit: 75
72 |     # delay: 1
73 | 


--------------------------------------------------------------------------------
/agent/swesmith_gen_gpt.yaml:
--------------------------------------------------------------------------------
 1 | # Heavily based on https://github.com/SWE-agent/SWE-agent/blob/main/config/anthropic_filemap.yaml
 2 | instances:
 3 |   type: swesmith
 4 |   shuffle: true
 5 | agent:
 6 |   templates:
 7 |     system_template: |-
 8 |       You are a helpful assistant that can interact with a computer to solve tasks.
 9 |     instance_template: |-
10 |       <uploaded_files>
11 |       {{working_dir}}
12 |       </uploaded_files>
13 |       I've uploaded a python code repository in the directory {{working_dir}}. Consider the following PR description:
14 | 
15 |       <pr_description>
16 |       {{problem_statement}}
17 |       </pr_description>
18 | 
19 |       Can you help me implement the necessary changes to the repository so that the requirements specified in the <pr_description> are met?
20 |       I've already taken care of all changes to any of the test files described in the <pr_description>. This means you DON'T have to modify the testing logic or any of the tests in any way!
21 |       Your task is to make the minimal changes to non-tests files in the {{working_dir}} directory to ensure the <pr_description> is satisfied.
22 |       Follow these steps to resolve the issue:
23 |       1. As a first step, it might be a good idea to find and read code relevant to the <pr_description>
24 |       2. Create a script to reproduce the error and execute it with `python <filename.py>` using the bash tool, to confirm the error
25 |       3. Edit the source code of the repo to resolve the issue
26 |       4. Rerun your reproduce script and confirm that the error is fixed!
27 |       5. Think about edgecases and make sure your fix handles them as well
28 |       Your thinking should be thorough and so it's fine if it's very long.
29 |     next_step_template: |-
30 |       OBSERVATION:
31 |       {{observation}}
32 |     next_step_no_output_template: |-
33 |       Your command ran successfully and did not produce any output.
34 |   tools:
35 |     execution_timeout: 300
36 |     bundles:
37 |       - path: tools/registry
38 |       - path: tools/edit_anthropic
39 |       - path: tools/submit
40 |     env_variables:
41 |       USE_FILEMAP: 'true'
42 |     enable_bash_tool: true
43 |     parse_function:
44 |       type: function_calling
45 |   model:
46 |     name: gpt-4o-2024-08-06
47 |     per_instance_cost_limit: 2.
48 |     per_instance_call_limit: 75
49 |     # delay: 1
50 | 


--------------------------------------------------------------------------------
/agent/swesmith_install_repo.yaml:
--------------------------------------------------------------------------------
 1 | agent:
 2 |   templates:
 3 |     system_template: |-
 4 |       You are a helpful assistant that can interact with a computer to solve tasks.
 5 |     instance_template: |-
 6 |       <uploaded_files>
 7 |       {{working_dir}}
 8 |       </uploaded_files>
 9 |       I've uploaded a python code repository in the directory {{working_dir}}.
10 | 
11 |       Can you please help me install this repository?
12 |       Your goal should be to configure the repository's development environment such that existing tests pass.
13 |       You are currently in the root directory of the repository, and nothing has been installed yet.
14 |       You in an Ubuntu 22.04 environment.
15 | 
16 |       The repository is predominantly written in Python. Here are several tips for installing it:
17 |       1. A good place to start is to look for a `CONTRIBUTING.[md|rst]` file, which will often contain instructions on how to install the repository and any dependencies it may have. Occasionally, the `README.md` file may also contain installation instructions.
18 |       2. Usually, a repository may have `setup.py` or `pyproject.toml` files which can be used to install the package. `pip install -e .` is commonly used, although many packages will also require an additional specifier that installs development packages as well (e.g. `pip install -e .[dev]`).
19 |       3. To check whether the repository was installed successfully, run tests and see if they pass. You can usually find tests in a `tests/` or `test/` directory. You can run tests using `pytest` or `unittest`, depending on the framework used by the repository.
20 |       4. Sometimes, you will need to install additional packages, often listed in a `requirements.txt` or `environment.yml` file. Also, be mindful of Ubuntu system dependencies that may need to be installed via `apt-get` (e.g. `sudo apt-get install <package>`).
21 | 
22 |       Once you are finished with installing the repository, run the `submit` command to submit your changes for review
23 |     next_step_template: |-
24 |       OBSERVATION:
25 |       {{observation}}
26 |     next_step_no_output_template: |-
27 |       Your command ran successfully and did not produce any output.
28 |   tools:
29 |     bundles:
30 |       - path: tools/registry
31 |       - path: tools/edit_anthropic
32 |       - path: tools/submit
33 |     registry_variables:
34 |       USE_FILEMAP: 'true'
35 |       enable_bash_tool: true
36 |       parse_function:
37 |         type: function_calling
38 |       execution_timeout: 300
39 |   history_processors:
40 |     - type: cache_control
41 |       last_n_messages: 2
42 |   model:
43 |     name: claude-3-7-sonnet-20250219
44 |     api_key: $CLAUDE_API_KEY_ROTATION
45 |     per_instance_cost_limit: 2.
46 |     per_instance_call_limit: 150
47 |     delay: 1
48 | 


--------------------------------------------------------------------------------
/configs/bug_gen/README.md:
--------------------------------------------------------------------------------
 1 | # Writing Config. Files for Bug Generation
 2 | 
 3 | To create bugs using `swesmith.bug_gen.llm.modify`, the script takes in a configuration file
 4 | that allows one to (1) define what kind of bug(s) the LLM should generate, and (2) identify
 5 | what functions to run this generation for.
 6 | 
 7 | Here are the steps to create a config file for creating a specific kind of bug.
 8 | 
 9 | 1. Create a `configs/bug_gen/*.yaml file`. Typically, the naming convention is `func_<name_of_bug>.yaml`.
10 | 2. Within the `.yaml` file, define the following prompts / fields:
11 | ```yaml
12 | name: <name of bug, usually func_*>
13 | criteria: reference to criteria in swesmith/bug_gen/llm/criteria.py
14 | parameters: any additional information you'd like to include + can be referenced in the prompts
15 | system: |-
16 |     prompt
17 | demonstration: |-
18 |     prompt
19 | instance: |-
20 |     prompt
21 | ```
22 | 3. (Optional) You can use one of the existing criteria, or create a new one in `swesmith/bug_gen/llm/criteria.py`
23 |     * The purpose of defining a criteria is to only consider functions where it would be possible to introduce such a bug.
24 |     * For example, if you write a prompt for off by one bugs, but the function doesn't have loops or list indexing, then it's likely the LLM cannot generate a reasonably effective and difficult bug.
25 | 
26 | > A criteria function usually follows the below form:
27 | ```python
28 | def filter_<criteria>(code_entity: CodeEntity) -> bool:
29 |     """
30 |     `code_entity` is an object representing a function. It includes several
31 |     pieces of information, most notably:
32 |         * `src_code`: The raw string repr. of a function
33 |         * `src_node`: An AST node representation of a function.
34 |     """
35 |     node = code_entity.src_node
36 |     # Logic for checking whether a function has a property has typically been
37 |     # enforced by checking node properties (of course, you're not limited to this)
38 |     if satisfies_criteria:
39 |         return True
40 |     return False
41 | ```
42 | 
43 | Once you create the `.yaml` with a specified criteria, from this repo, run:
44 | ```bash
45 | python -m swesmith.bug_gen.llm.modify \
46 |     --repo datamade/usaddress \
47 |     --model openai/gpt-4o \
48 |     --entity_type func \
49 |     --prompt_config configs/bug_gen/func_<your config>.yml \
50 |     --n_workers 4  # 4 parallel queries to LM etc.
51 | ```
52 | where `--repo` should point to one of the repositories [here](https://github.com/orgs/swesmith/repositories). (Note: should just be `<owner>/<repo>`, without the `.<commit>`)
53 | 


--------------------------------------------------------------------------------
/configs/bug_gen/func_fun.yml:
--------------------------------------------------------------------------------
 1 | version: 1
 2 | name: func_fun
 3 | criteria: all
 4 | parameters:
 5 |   tips:
 6 | system: |-
 7 |   You are a simulation of a tired, deadline-pressured developer who has just worked 14 consecutive hours. 
 8 | 
 9 |   Your task was to improve the provided code. 
10 |   Despite your best intentions, your exhausted state causes you to introduce subtle, real-world bugs that would pass code review but cause issues in production.
11 | 
12 |   Rewrite a function such that it introduces a logical bug that will subtly break existing unit tests in a codebase.
13 | 
14 |   Here's how to proceed:
15 | 
16 |   1. First understand what the code is trying to achieve
17 |   2. Consider how a well-intentioned but fatigued developer might misunderstand it
18 |   3. Implement changes based on that flawed understanding
19 |   4. Ensure the bug represents a genuine cognitive error, not a contrived modification
20 |   5. The code should look like a good-faith attempt at solving the problem
21 |   6. The bug should be something that could genuinely ship to production
22 | 
23 |   Tips about the bug-introducing task:
24 | 
25 |   - It should not cause compilation errors.
26 |   - It should not be a syntax error.
27 |   - It should be subtle and challenging to detect.
28 |   - It should not modify the function signature.
29 |   - It should not modify the documentation significantly.
30 |   - For longer functions, if there is an opportunity to introduce multiple bugs, please do!
31 |   - Please DO NOT INCLUDE COMMENTS IN THE CODE indicating the bug location or the bug itself.
32 |   - Your code must be included in triple backticks.
33 | 
34 |   Your answer should be formatted as follows:
35 | 
36 |   Explanation:
37 |   <explanation>
38 | 
39 |   Bugged Code:
40 |   ```
41 |   <bugged_code>
42 |   ```
43 | demonstration: ""
44 | instance: |-
45 |   <INPUT>
46 |   {{src_code}}
47 |   </INPUT>
48 | 
49 |   <IMPORTANT>As a reminder, Please DO NOT INCLUDE ANY COMMENTS IN THE CODE OR POINT OUT THE BUG IN ANY WAY.</IMPORTANT>
50 | 
51 |   OUTPUT:


--------------------------------------------------------------------------------
/configs/bug_gen/lm_modify.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | name: lm_modify
 3 | criteria: simple_complexity10
 4 | parameters:
 5 |   bug_examples:
 6 |     - "Alter calculation order for incorrect results: Rearrange the sequence of operations in a calculation to subtly change the output (e.g., change (a + b) * c to a + (b * c))."
 7 |     - "Introduce subtle data transformation errors: Modify data processing logic, such as flipping a sign, truncating a value, or applying the wrong transformation function."
 8 |     - "Change variable assignments to alter computation state: Assign a wrong or outdated value to a variable that affects subsequent logic."
 9 |     - "Mishandle edge cases for specific inputs: Change handling logic to ignore or improperly handle boundary cases, like an empty array or a null input."
10 |     - "Modify logic in conditionals or loops: Adjust conditions or loop boundaries (e.g., replace <= with <) to change the control flow."
11 |     - "Introduce off-by-one errors in indices or loop boundaries: Shift an index or iteration boundary by one, such as starting a loop at 1 instead of 0."
12 |     - "Adjust default values or constants to affect behavior: Change a hardcoded value or default parameter that alters how the function behaves under normal use."
13 |     - "Reorder operations while maintaining syntax: Rearrange steps in a process so the function produces incorrect intermediate results without breaking the code."
14 |     - "Swallow exceptions or return defaults silently: Introduce logic that catches an error but doesn't log or handle it properly, leading to silent failures."
15 |   tips:
16 |     - "It should not cause compilation errors."
17 |     - "It should not be a syntax error."
18 |     - "It should be subtle and challenging to detect."
19 |     - "It should not modify the function signature."
20 |     - "It should not modify the documentation significantly."
21 |     - "For longer functions, if there is an opportunity to introduce multiple bugs, please do!"
22 |     - "Please DO NOT INCLUDE COMMENTS IN THE CODE indicating the bug location or the bug itself."
23 | system: |-
24 |   You are a software developer doing chaos monkey testing.
25 |   Your job is to rewrite a function such that it introduces a logical bug that will break existing unit test(s) in a codebase.
26 | 
27 |   To this end, some kinds of bugs you might introduce include:
28 |   {% for bug in (bug_examples | shuffle)[:3] %}
29 |   - {{ bug -}}
30 |   {% endfor %}
31 | 
32 |   Tips about the bug-introducing task:
33 |   {% for tip in tips | shuffle %}
34 |   - {{ tip -}}
35 |   {% endfor %}
36 | 
37 |   Your answer should be formatted as follows:
38 | 
39 |   Explanation:
40 |   <explanation>
41 | 
42 |   Bugged Code:
43 |   ```
44 |   <bugged_code>
45 |   ```
46 | demonstration: ""
47 | instance: |-
48 |   <INPUT>
49 |   {{src_code}}
50 |   </INPUT>
51 | 
52 |   <IMPORTANT>As a reminder, Please DO NOT INCLUDE ANY COMMENTS IN THE CODE OR POINT OUT THE BUG IN ANY WAY.</IMPORTANT>
53 | 
54 |   OUTPUT:


--------------------------------------------------------------------------------
/configs/bug_gen/lm_rewrite.yml:
--------------------------------------------------------------------------------
 1 | name: lm_rewrite
 2 | system: |-
 3 |   You are a software developer and you have been asked to implement a function.
 4 | 
 5 |   You will be given the contents of an entire file, with one or more functions defined in it.
 6 |   Please implement the function(s) that are missing.
 7 |   Do NOT modify the function signature, including the function name, parameters, return types, or docstring if provided.
 8 |   Do NOT change any other code in the file.
 9 |   You should not use any external libraries.
10 | instance: |-
11 |   Please implement the function `{func_signature}` in the following code:
12 | 
13 |   ```
14 |   {file_src_code}
15 |   ```
16 | 
17 |   Remember, you should not modify the function signature, including the function name, parameters, return types, or docstring if provided.
18 |   Do NOT change any other code in the file.
19 |   Format your output as:
20 | 
21 |   <explanation>
22 | 
23 |   ```
24 |   {func_to_write}
25 |   ```


--------------------------------------------------------------------------------
/configs/install_repo.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | . /opt/miniconda3/bin/activate
4 | conda create -n testbed python=3.10 -yq
5 | conda activate testbed
6 | pip install -e .
7 | pip install pytest
8 | 


--------------------------------------------------------------------------------
/configs/issue_gen/ig_tests.yaml:
--------------------------------------------------------------------------------
 1 | system: |-
 2 |   You are a software engineer and you have been asked to give an issue report.
 3 | 
 4 |   You will be given the following input:
 5 |   1. Test Source Code: The source code for a test in a GitHub repository that is currently failing.
 6 |   2. Test Execution Output: The execution output of running the test.
 7 | 
 8 |   Given this input, please write a GitHub issue report.
 9 | 
10 |   Guidelines:
11 |   - Use a natural tone, as if reported by a developer.
12 |   - DO NOT mention the test that failed.
13 |   - Include information about how to reproduce the issue. You can use the test source code to write reproduction code. Use the test execution output to convey the expected behavior and what the actual current behavior is.
14 | demonstration: |-
15 |   Here is an example of a well written GitHub issue. Mimic the style and information of this issue in your response.
16 |   -----------------------------------
17 |   {demo}
18 | instance: |-
19 |   Now, write a GitHub issue that conveys the problem reflected in the failing test.
20 | 
21 |   Remember,
22 |   - DO NOT GIVE AWAY THE TEST THAT FAILED.
23 |   - DO NOT SAY THAT EXISTING TEST(s) FAILED.
24 |   - DO NOT SUGGEST RUNNING ANY TESTING COMMANDS (e.g., pytest).
25 |   - Mimic the style and information of the issue text from the demonstration.
26 |   - Keep the length of the issue text reasonable and similar to the demonstration.
27 |   - Use the test source code to write reproduction code.
28 |   - Use the test execution output to convey the expected behavior and what the actual current behavior is.
29 | 
30 |   {input}
31 | 
32 |   **Issue Text**
33 |   <START WRITING>


--------------------------------------------------------------------------------
/configs/issue_gen/ig_v1.yaml:
--------------------------------------------------------------------------------
 1 | settings:
 2 |   n_instructions: 1      # number of instructions to generate
 3 |   repro_code_n: 1        # number of repo tests to include in prompt
 4 |   repro_code_rate: 0     # % of task instances to generate repro code for
 5 |   add_test_output: True  # whether to include test output (from validation step)
 6 | system: |-
 7 |   **Task:**
 8 |   Write a realistic GitHub issue for the following **patch (diff output)** that introduces a bug. The issue should:
 9 |   - Clearly describe the problem observed in the original (buggy) code.
10 |   - Include relevant details like which function or part of the code is affected.
11 |   - Explain expected vs. actual behavior.
12 |   - Suggest possible causes without explicitly stating the correct fix.
13 |   - Use a natural tone, as if reported by a developer.
14 | 
15 |   Additional Context:
16 |   - The diff shows changes to a file, where - lines represent the original (working) code that was removed.
17 |   - + lines represent the new (fixed) code that was added.
18 |   - The bug existed in the removed (-) lines, and the fix is in the added (+) lines.
19 |   - Focus on describing the issue in the removed lines, not explaining the new fix verbatim.
20 | demonstration: |-
21 |   Here is an example of a well formed GitHub issue:
22 | 
23 |   **Issue Text**
24 |   {{problem_statement}}
25 | instance: |-
26 |   Now, write a GitHub issue for the following patch (diff output).
27 | 
28 |   Remember to:
29 |   - Clearly describe the problem observed in the original (buggy) code.
30 |   - Include some relevant details like which function or part of the code is affected. BUT, don't be too specific
31 |   - DO NOT GIVE AWAY THE FIX! THE SOLUTION CODE SHOULD NEVER APPEAR IN YOUR RESPONSE.
32 |   - DO NOT SAY THAT EXISTING TEST(s) FAILED.
33 |   - DO NOT SUGGEST RUNNING ANY TESTING COMMANDS (e.g., pytest).
34 |   - Mimic the style of the issue text from the demonstration.
35 |   - Keep the length of the issue text reasonable and similar to the demonstration.
36 | 
37 |   **Bug Patch (Diff Output):**
38 |   {{patch}}
39 | 
40 |   **Issue Text**
41 |   <START WRITING>


--------------------------------------------------------------------------------
/configs/issue_gen/ig_v2.yaml:
--------------------------------------------------------------------------------
 1 | settings: {}
 2 | system: |-
 3 |   You are a software engineer helping to create a realistic dataset of synthetic GitHub issues.
 4 |   
 5 |   You will be given the following input:
 6 | 
 7 |   1. Demonstration: A realistic GitHub issue to mimic (included in the <demonstration> tag).
 8 |   2. Patch: A git diff output/pull request changes that introduces a bug (included in the <patch> tag).
 9 |   3. Test output: The output of running the tests after the patch is applied (included in the <test_output> tag).
10 |   4. Test source code: Source code for one or more tests that failed (included in the <test_source_code> tag).
11 | 
12 |   Output: A realistic GitHub issue for the patch.
13 | 
14 |   Guidelines:
15 | 
16 |   - Mimic the style and structure of the demonstration issues. 
17 |     If the demonstration issues are not well structured, your output should also be not well structured.
18 |     If the demonstrations use improper or no markdown, your output should also use improper or no markdown.
19 |     If the demonstrations are short/long, your output should also be short/long (if possible).
20 |     If the demonstrations include human "flavor text" or "fluff", your output should also include human "flavor text" or "fluff".
21 |     Do this even if it conflicts with your default behavior of trying to be extremely concise and helpful.
22 |   - DO NOT explain the fix/what caused the bug itself, focus on how to reproduce the issue it introduces
23 |   - Do not mention pytest or what exact test failed. Instead, generate a realistic issue.
24 |   - If possible, include information about how to reproduce the issue. An ideal reproduction script should raise an error
25 |     or print an unexpected output together with the expected output.
26 |     However, still include this information in a style very similar to the demonstration issues.
27 | demonstration: |-
28 |   Here are a few realistic GitHub issues that you can mimic.
29 | 
30 |   {% for problem_statement in demo_problem_statements[:2] %}
31 |   <demonstration>
32 |   {{problem_statement}}
33 |   </demonstration>
34 |   {% endfor %}
35 | instance: |-
36 |   Now, write a GitHub issue for the following patch (diff output).
37 | 
38 |   <IMPORTANT>
39 |   - DO NOT GIVE AWAY THE FIX! THE SOLUTION CODE SHOULD NEVER APPEAR IN YOUR RESPONSE.
40 |   - DO NOT SAY THAT EXISTING TEST(s) FAILED.
41 |   - DO NOT SUGGEST RUNNING ANY TESTING COMMANDS (e.g., pytest).
42 |   - Mimic the style and information of the issue text from the demonstration.
43 |   - Keep the length of the issue text reasonable and similar to the demonstration.
44 |   </IMPORTANT>
45 | 
46 |   <patch>
47 |   {{patch}}
48 |   </patch>
49 | 
50 |   <test_output>
51 |   {{test_output}}
52 |   </test_output>
53 | 
54 |   <test_source_code>
55 |   {% for test in test_funcs[:5] %}
56 |   {{test}}
57 |   {% endfor %}
58 |   </test_source_code>
59 | 
60 |   **Issue Text**
61 |   <START WRITING>


--------------------------------------------------------------------------------
/configs/train/dpo_qwen_32b.yml:
--------------------------------------------------------------------------------
  1 | exp_name: qwen2p5-coder-32b-dpo-lr1e-5-warmup5___ft_xml_all_250413
  2 | output_dir: /llm-weights/final/${exp_name}
  3 | 
  4 | # Model Arguments
  5 | model:
  6 |   _component_: torchtune.models.qwen2_5.qwen2_5_32b_instruct
  7 | 
  8 | tokenizer:
  9 |   _component_: torchtune.models.qwen2_5.qwen2_5_tokenizer
 10 |   path: /llm-weights/Qwen/Qwen2.5-Coder-32B-Instruct/vocab.json
 11 |   merges_file: /llm-weights/Qwen/Qwen2.5-Coder-32B-Instruct/merges.txt
 12 |   max_seq_len: 32768
 13 | 
 14 | checkpointer:
 15 |   _component_: torchtune.training.FullModelHFCheckpointer
 16 |   checkpoint_dir: /llm-weights/Qwen/Qwen2.5-Coder-32B-Instruct
 17 |   checkpoint_files: [
 18 |     model-00001-of-00014.safetensors,
 19 |     model-00002-of-00014.safetensors,
 20 |     model-00003-of-00014.safetensors,
 21 |     model-00004-of-00014.safetensors,
 22 |     model-00005-of-00014.safetensors,
 23 |     model-00006-of-00014.safetensors,
 24 |     model-00007-of-00014.safetensors,
 25 |     model-00008-of-00014.safetensors,
 26 |     model-00009-of-00014.safetensors,
 27 |     model-00010-of-00014.safetensors,
 28 |     model-00011-of-00014.safetensors,
 29 |     model-00012-of-00014.safetensors,
 30 |     model-00013-of-00014.safetensors,
 31 |     model-00014-of-00014.safetensors,
 32 |   ]
 33 |   recipe_checkpoint: null
 34 |   output_dir: ${output_dir}
 35 |   model_type: QWEN2
 36 |   safe_serialization: True
 37 | resume_from_checkpoint: False
 38 | 
 39 | # Dataset and Sampler
 40 | dataset:
 41 |   _component_: torchtune.datasets.preference_dataset
 42 |   source: json
 43 |   data_files: /datasets/trajectories_dpo/dpo_250413.json
 44 |   conversation_column: messages
 45 |   conversation_style: openai
 46 |   new_system_prompt: null
 47 |   packed: False  # True increases speed
 48 |   column_map:
 49 |     chosen: chosen_conversations
 50 |     rejected: rejected_conversations
 51 |   train_on_input: False
 52 |   split: train
 53 | seed: 42
 54 | shuffle: True
 55 | batch_size: 1
 56 | 
 57 | # Optimizer and Scheduler
 58 | optimizer:
 59 |   _component_: torch.optim.AdamW
 60 |   fused: True
 61 |   weight_decay: 0.01
 62 |   lr: 1e-5
 63 | lr_scheduler:
 64 |   _component_: torchtune.training.lr_schedulers.get_cosine_schedule_with_warmup
 65 |   num_warmup_steps: 5
 66 | optimizer_in_bwd: True
 67 | loss:
 68 |   _component_: torchtune.rlhf.loss.DPOLoss
 69 |   beta: 0.05
 70 |   label_smoothing: 0
 71 | 
 72 | # Training
 73 | epochs: 3
 74 | max_steps_per_epoch: null
 75 | gradient_accumulation_steps: 1 # Use to increase virtual batch size
 76 | compile: False  # pytorch compile, set to true for better perf/memory
 77 | 
 78 | # Logging
 79 | metric_logger:
 80 |   _component_: torchtune.training.metric_logging.WandBLogger
 81 |   project: devrl-sft
 82 |   group: ${exp_name}
 83 |   job_type: full_dpo_distributed
 84 | log_every_n_steps: 1
 85 | log_peak_memory_stats: True
 86 | 
 87 | # Environment
 88 | device: cuda
 89 | dtype: bf16
 90 | enable_activation_checkpointing: True  # True reduces memory
 91 | enable_activation_offloading: False  # True reduces memory
 92 | 
 93 | # Show case the usage of pytorch profiler
 94 | # Set enabled to False as it's only needed for debugging training
 95 | profiler:
 96 |   _component_: torchtune.training.setup_torch_profiler
 97 | 
 98 |   enabled: False
 99 | 
100 |   #Output directory of trace artifacts
101 |   output_dir: ${output_dir}/profiling_outputs
102 | 
103 |   #`torch.profiler.ProfilerActivity` types to trace
104 |   cpu: True
105 |   cuda: True
106 | 
107 |   #trace options passed to `torch.profiler.profile`
108 |   profile_memory: False
109 |   with_stack: False
110 |   record_shapes: True
111 |   with_flops: False
112 | 
113 |   # `torch.profiler.schedule` options:
114 |   # wait_steps -> wait, warmup_steps -> warmup, active_steps -> active, num_cycles -> repeat
115 |   wait_steps: 5
116 |   warmup_steps: 5
117 |   active_steps: 2
118 |   num_cycles: 1


--------------------------------------------------------------------------------
/configs/train/dpo_qwen_7b.yml:
--------------------------------------------------------------------------------
  1 | exp_name: qwen2p5-coder-7b-dpo-lr1e-5-warmup5___ft_xml_all_250414
  2 | output_dir: /llm-weights/dpo/${exp_name}
  3 | 
  4 | # Model Arguments
  5 | model:
  6 |   _component_: torchtune.models.qwen2_5.qwen2_5_7b_instruct
  7 | 
  8 | tokenizer:
  9 |   _component_: torchtune.models.qwen2_5.qwen2_5_tokenizer
 10 |   path: /llm-weights/Qwen/Qwen2.5-Coder-7B-Instruct/vocab.json
 11 |   merges_file: /llm-weights/Qwen/Qwen2.5-Coder-7B-Instruct/merges.txt
 12 |   max_seq_len: 32768
 13 | 
 14 | checkpointer:
 15 |   _component_: torchtune.training.FullModelHFCheckpointer
 16 |   checkpoint_dir: /llm-weights/outputs/qwen2p5-coder-7b-full-lr1e-4-warmup5___all_250331.jsonl/epoch_4
 17 |   checkpoint_files: [
 18 |     ft-model-00001-of-00004.safetensors,
 19 |     ft-model-00002-of-00004.safetensors,
 20 |     ft-model-00003-of-00004.safetensors,
 21 |     ft-model-00004-of-00004.safetensors,
 22 |   ]
 23 |   recipe_checkpoint: null
 24 |   output_dir: ${output_dir}
 25 |   model_type: QWEN2
 26 |   safe_serialization: True
 27 | resume_from_checkpoint: False
 28 | 
 29 | # The ref_checkpointer should always point to the original weights.
 30 | ref_checkpointer:
 31 |   _component_: torchtune.training.FullModelHFCheckpointer
 32 |   checkpoint_dir: /llm-weights/Qwen/Qwen2.5-Coder-7B-Instruct
 33 |   checkpoint_files: [
 34 |     model-00001-of-00004.safetensors,
 35 |     model-00002-of-00004.safetensors,
 36 |     model-00003-of-00004.safetensors,
 37 |     model-00004-of-00004.safetensors,
 38 |   ]
 39 |   recipe_checkpoint: null
 40 |   output_dir: ${output_dir}
 41 |   model_type: QWEN2
 42 |   safe_serialization: True
 43 | 
 44 | # Dataset and Sampler
 45 | dataset:
 46 |   _component_: torchtune.datasets.preference_dataset
 47 |   source: json
 48 |   data_files: /datasets/trajectories_dpo/swesmith_dpo_250414.json
 49 |   column_map:
 50 |     chosen: chosen_conversations
 51 |     rejected: rejected_conversations
 52 |   train_on_input: False
 53 | seed: 42
 54 | shuffle: True
 55 | batch_size: 1
 56 | 
 57 | # Optimizer and Scheduler
 58 | optimizer:
 59 |   _component_: torch.optim.AdamW
 60 |   fused: True
 61 |   weight_decay: 0.05
 62 |   lr: 2e-5
 63 | lr_scheduler:
 64 |   _component_: torchtune.training.lr_schedulers.get_cosine_schedule_with_warmup
 65 |   num_warmup_steps: 5
 66 | optimizer_in_bwd: False
 67 | loss:
 68 |   _component_: torchtune.rlhf.loss.DPOLoss
 69 |   beta: 0.05
 70 |   label_smoothing: 0
 71 | 
 72 | # Training
 73 | epochs: 2
 74 | max_steps_per_epoch: null
 75 | gradient_accumulation_steps: 4  # Use to increase effective batch size
 76 | compile: False  # torch.compile the model + loss, True increases speed + decreases memory
 77 | 
 78 | # Logging
 79 | metric_logger:
 80 |   _component_: torchtune.training.metric_logging.WandBLogger
 81 |   project: devrl-sft
 82 |   group: ${exp_name}
 83 |   job_type: full_dpo_distributed
 84 | log_every_n_steps: 1
 85 | log_peak_memory_stats: True
 86 | 
 87 | # Environment
 88 | device: cuda
 89 | dtype: bf16
 90 | enable_activation_checkpointing: True  # True reduces memory
 91 | enable_activation_offloading: False  # True reduces memory
 92 | 
 93 | # Show case the usage of pytorch profiler
 94 | # Set enabled to False as it's only needed for debugging training
 95 | profiler:
 96 |   _component_: torchtune.training.setup_torch_profiler
 97 | 
 98 |   enabled: False
 99 | 
100 |   #Output directory of trace artifacts
101 |   output_dir: ${output_dir}/profiling_outputs
102 | 
103 |   #`torch.profiler.ProfilerActivity` types to trace
104 |   cpu: True
105 |   cuda: True
106 | 
107 |   #trace options passed to `torch.profiler.profile`
108 |   profile_memory: False
109 |   with_stack: False
110 |   record_shapes: True
111 |   with_flops: False
112 | 
113 |   # `torch.profiler.schedule` options:
114 |   # wait_steps -> wait, warmup_steps -> warmup, active_steps -> active, num_cycles -> repeat
115 |   wait_steps: 5
116 |   warmup_steps: 5
117 |   active_steps: 2
118 |   num_cycles: 1


--------------------------------------------------------------------------------
/configs/train/full_ft_qwen_32b.yml:
--------------------------------------------------------------------------------
  1 | # Config for multi-device full finetuning in full_finetune_distributed.py
  2 | # using a Qwen2.5 7B model
  3 | #
  4 | # This config assumes that you've run the following command before launching
  5 | # this run:
  6 | #   tune download Qwen/Qwen2.5-7B-Instruct --output-dir /tmp/Qwen2_5-7B-Instruct
  7 | #
  8 | # To launch on 2 devices, run the following command from root:
  9 | #   tune run --nnodes 1 --nproc_per_node 2 full_finetune_distributed --config qwen2_5/7B_full
 10 | #
 11 | # You can add specific overrides through the command line. For example
 12 | # to override the checkpointer directory while launching training
 13 | # you can run:
 14 | #   tune run --nnodes 1 --nproc_per_node 2 full_finetune_distributed --config qwen2_5/7B_full checkpointer.checkpoint_dir=<YOUR_CHECKPOINT_DIR>
 15 | #
 16 | # This config works best when the model is being fine-tuned on 2+ GPUs.
 17 | # Single device full finetuning requires more memory optimizations. It's
 18 | # best to use 7B_full_single_device.yaml for those cases
 19 | 
 20 | exp_name: qwen2p5-coder-32b-full-lr5e-5-warmup5___ft_xml_all_250413
 21 | output_dir: /llm-weights/final/${exp_name}
 22 | # Model Arguments
 23 | model:
 24 |   _component_: torchtune.models.qwen2_5.qwen2_5_32b_instruct
 25 | 
 26 | tokenizer:
 27 |   _component_: torchtune.models.qwen2_5.qwen2_5_tokenizer
 28 |   path: /llm-weights/Qwen/Qwen2.5-Coder-32B-Instruct/vocab.json
 29 |   merges_file: /llm-weights/Qwen/Qwen2.5-Coder-32B-Instruct/merges.txt
 30 |   max_seq_len: 32768
 31 | 
 32 | checkpointer:
 33 |   _component_: torchtune.training.FullModelHFCheckpointer
 34 |   checkpoint_dir: /llm-weights/Qwen/Qwen2.5-Coder-32B-Instruct
 35 |   checkpoint_files: [
 36 |     model-00001-of-00014.safetensors,
 37 |     model-00002-of-00014.safetensors,
 38 |     model-00003-of-00014.safetensors,
 39 |     model-00004-of-00014.safetensors,
 40 |     model-00005-of-00014.safetensors,
 41 |     model-00006-of-00014.safetensors,
 42 |     model-00007-of-00014.safetensors,
 43 |     model-00008-of-00014.safetensors,
 44 |     model-00009-of-00014.safetensors,
 45 |     model-00010-of-00014.safetensors,
 46 |     model-00011-of-00014.safetensors,
 47 |     model-00012-of-00014.safetensors,
 48 |     model-00013-of-00014.safetensors,
 49 |     model-00014-of-00014.safetensors,
 50 |   ]
 51 |   recipe_checkpoint: null
 52 |   output_dir: ${output_dir}
 53 |   model_type: QWEN2
 54 |   safe_serialization: True
 55 | resume_from_checkpoint: False
 56 | 
 57 | # Dataset and Sampler
 58 | dataset:
 59 |   _component_: torchtune.datasets.chat_dataset
 60 |   source: json
 61 |   data_files: /datasets/trajectories_sft/ft_xml_all_250413.jsonl
 62 |   split: train
 63 |   conversation_column: messages
 64 |   conversation_style: openai
 65 |   train_on_input: False
 66 |   new_system_prompt: null
 67 |   packed: False  # True increases speed
 68 | seed: 42
 69 | shuffle: True
 70 | batch_size: 1
 71 | 
 72 | # Optimizer and Scheduler
 73 | optimizer:
 74 |   _component_: torch.optim.AdamW
 75 |   fused: True
 76 |   weight_decay: 0.01
 77 |   lr: 5e-5
 78 | lr_scheduler:
 79 |   _component_: torchtune.training.lr_schedulers.get_cosine_schedule_with_warmup
 80 |   num_warmup_steps: 5
 81 | optimizer_in_bwd: True
 82 | loss:
 83 |   _component_: torchtune.modules.loss.CEWithChunkedOutputLoss
 84 | 
 85 | # Training
 86 | epochs: 3
 87 | max_steps_per_epoch: null
 88 | gradient_accumulation_steps: 1 # Use to increase virtual batch size
 89 | compile: True  # pytorch compile, set to true for better perf/memory
 90 | 
 91 | # Logging
 92 | metric_logger:
 93 |   _component_: torchtune.training.metric_logging.WandBLogger
 94 |   project: devrl-sft
 95 |   group: ${exp_name}
 96 |   job_type: full_finetune_distributed
 97 | log_every_n_steps: 1
 98 | log_peak_memory_stats: True
 99 | 
100 | # Environment
101 | device: cuda
102 | dtype: bf16
103 | enable_activation_checkpointing: True  # True reduces memory
104 | enable_activation_offloading: False  # True reduces memory
105 | # custom_sharded_layers: ['tok_embeddings']  # Layers to shard separately (useful for large vocab size models). Lower Memory, but lower speed.
106 | 
107 | # Show case the usage of pytorch profiler
108 | # Set enabled to False as it's only needed for debugging training
109 | profiler:
110 |   _component_: torchtune.training.setup_torch_profiler
111 | 
112 |   enabled: False
113 | 
114 |   #Output directory of trace artifacts
115 |   output_dir: ${output_dir}/profiling_outputs
116 | 
117 |   #`torch.profiler.ProfilerActivity` types to trace
118 |   cpu: True
119 |   cuda: True
120 | 
121 |   #trace options passed to `torch.profiler.profile`
122 |   profile_memory: False
123 |   with_stack: False
124 |   record_shapes: True
125 |   with_flops: False
126 | 
127 |   # `torch.profiler.schedule` options:
128 |   # wait_steps -> wait, warmup_steps -> warmup, active_steps -> active, num_cycles -> repeat
129 |   wait_steps: 5
130 |   warmup_steps: 5
131 |   active_steps: 2
132 |   num_cycles: 1


--------------------------------------------------------------------------------
/configs/train/full_ft_qwen_7b.yml:
--------------------------------------------------------------------------------
  1 | exp_name: qwen2p5-coder-7b-full-lr5e-5-warmup5___ft_xml_all_250331
  2 | output_dir: /llm-weights/outputs/${exp_name}
  3 | 
  4 | # Model Arguments
  5 | model:
  6 |   _component_: torchtune.models.qwen2_5.qwen2_5_7b_instruct
  7 | 
  8 | tokenizer:
  9 |   _component_: torchtune.models.qwen2_5.qwen2_5_tokenizer
 10 |   path: /llm-weights/Qwen/Qwen2.5-Coder-7B-Instruct/vocab.json
 11 |   merges_file: /llm-weights/Qwen/Qwen2.5-Coder-7B-Instruct/merges.txt
 12 |   max_seq_len: 32768
 13 | 
 14 | checkpointer:
 15 |   _component_: torchtune.training.FullModelHFCheckpointer
 16 |   checkpoint_dir: /llm-weights/Qwen/Qwen2.5-Coder-7B-Instruct
 17 |   checkpoint_files: [
 18 |     model-00001-of-00004.safetensors,
 19 |     model-00002-of-00004.safetensors,
 20 |     model-00003-of-00004.safetensors,
 21 |     model-00004-of-00004.safetensors,
 22 |   ]
 23 |   recipe_checkpoint: null
 24 |   output_dir: ${output_dir}
 25 |   model_type: QWEN2
 26 |   safe_serialization: True
 27 | resume_from_checkpoint: False
 28 | 
 29 | # Dataset and Sampler
 30 | dataset:
 31 |   _component_: torchtune.datasets.chat_dataset
 32 |   source: json
 33 |   data_files: /datasets/trajectories_sft/ft_xml_all_250331.jsonl
 34 |   split: train
 35 |   conversation_column: messages
 36 |   conversation_style: openai
 37 |   train_on_input: False
 38 |   new_system_prompt: null
 39 |   packed: False  # True increases speed
 40 | seed: 42
 41 | shuffle: True
 42 | batch_size: 1
 43 | 
 44 | # Optimizer and Scheduler
 45 | optimizer:
 46 |   _component_: torch.optim.AdamW
 47 |   fused: True
 48 |   weight_decay: 0.01
 49 |   lr: 5e-5
 50 | lr_scheduler:
 51 |   _component_: torchtune.training.lr_schedulers.get_cosine_schedule_with_warmup
 52 |   num_warmup_steps: 5
 53 | optimizer_in_bwd: False
 54 | loss:
 55 |   _component_: torchtune.modules.loss.CEWithChunkedOutputLoss
 56 | 
 57 | # Training
 58 | epochs: 3
 59 | max_steps_per_epoch: null
 60 | gradient_accumulation_steps: 4 # Use to increase virtual batch size
 61 | compile: True  # pytorch compile, set to true for better perf/memory
 62 | 
 63 | # Logging
 64 | metric_logger:
 65 |   _component_: torchtune.training.metric_logging.WandBLogger
 66 |   project: devrl-sft
 67 |   group: ${exp_name}
 68 |   job_type: full_finetune_distributed
 69 | log_every_n_steps: 1
 70 | log_peak_memory_stats: True
 71 | 
 72 | # Environment
 73 | device: cuda
 74 | dtype: bf16
 75 | enable_activation_checkpointing: True  # True reduces memory
 76 | enable_activation_offloading: False  # True reduces memory
 77 | # custom_sharded_layers: ['tok_embeddings']  # Layers to shard separately (useful for large vocab size models). Lower Memory, but lower speed.
 78 | 
 79 | # Show case the usage of pytorch profiler
 80 | # Set enabled to False as it's only needed for debugging training
 81 | profiler:
 82 |   _component_: torchtune.training.setup_torch_profiler
 83 | 
 84 |   enabled: False
 85 | 
 86 |   #Output directory of trace artifacts
 87 |   output_dir: ${output_dir}/profiling_outputs
 88 | 
 89 |   #`torch.profiler.ProfilerActivity` types to trace
 90 |   cpu: True
 91 |   cuda: True
 92 | 
 93 |   #trace options passed to `torch.profiler.profile`
 94 |   profile_memory: False
 95 |   with_stack: False
 96 |   record_shapes: True
 97 |   with_flops: False
 98 | 
 99 |   # `torch.profiler.schedule` options:
100 |   # wait_steps -> wait, warmup_steps -> warmup, active_steps -> active, num_cycles -> repeat
101 |   wait_steps: 5
102 |   warmup_steps: 5
103 |   active_steps: 2
104 |   num_cycles: 1


--------------------------------------------------------------------------------
/docs/CNAME:
--------------------------------------------------------------------------------
1 | swesmith.com
2 | 


--------------------------------------------------------------------------------
/docs/assets/banner.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SWE-bench/SWE-smith/cfb6cf29568d0841ce15620c96ec795243b229fa/docs/assets/banner.png


--------------------------------------------------------------------------------
/docs/assets/bug_gen_overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SWE-bench/SWE-smith/cfb6cf29568d0841ce15620c96ec795243b229fa/docs/assets/bug_gen_overview.png


--------------------------------------------------------------------------------
/docs/assets/combine.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SWE-bench/SWE-smith/cfb6cf29568d0841ce15620c96ec795243b229fa/docs/assets/combine.png


--------------------------------------------------------------------------------
/docs/assets/home/collection.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SWE-bench/SWE-smith/cfb6cf29568d0841ce15620c96ec795243b229fa/docs/assets/home/collection.png


--------------------------------------------------------------------------------
/docs/assets/home/leaderboard.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SWE-bench/SWE-smith/cfb6cf29568d0841ce15620c96ec795243b229fa/docs/assets/home/leaderboard.png


--------------------------------------------------------------------------------
/docs/assets/home/swesmith.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SWE-bench/SWE-smith/cfb6cf29568d0841ce15620c96ec795243b229fa/docs/assets/home/swesmith.png


--------------------------------------------------------------------------------
/docs/assets/lm_generate.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SWE-bench/SWE-smith/cfb6cf29568d0841ce15620c96ec795243b229fa/docs/assets/lm_generate.png


--------------------------------------------------------------------------------
/docs/assets/overview-light.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SWE-bench/SWE-smith/cfb6cf29568d0841ce15620c96ec795243b229fa/docs/assets/overview-light.png


--------------------------------------------------------------------------------
/docs/assets/overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SWE-bench/SWE-smith/cfb6cf29568d0841ce15620c96ec795243b229fa/docs/assets/overview.png


--------------------------------------------------------------------------------
/docs/assets/paper.pdf.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 |     <meta http-equiv="refresh" content="0; url=https://arxiv.org/abs/2504.21798">
 5 |     <title>Redirecting...</title>
 6 | </head>
 7 | <body>
 8 |     <p>If you are not redirected automatically, follow this <a href="https://arxiv.org/abs/2504.21798">link</a>.</p>
 9 | </body>
10 | </html>


--------------------------------------------------------------------------------
/docs/assets/pr_mirror.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SWE-bench/SWE-smith/cfb6cf29568d0841ce15620c96ec795243b229fa/docs/assets/pr_mirror.png


--------------------------------------------------------------------------------
/docs/assets/procedural.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SWE-bench/SWE-smith/cfb6cf29568d0841ce15620c96ec795243b229fa/docs/assets/procedural.png


--------------------------------------------------------------------------------
/docs/assets/sbcli_logo.svg:
--------------------------------------------------------------------------------
 1 | <svg width="658" height="353" viewBox="0 0 658 353" fill="none" xmlns="http://www.w3.org/2000/svg">
 2 | <path d="M55.1537 318.026C51.7794 303.904 48.0221 289.861 45.1064 275.646C37.9205 240.61 31.0843 205.503 24.1762 170.411C16.1041 129.406 7.63666 88.4686 0.481801 47.3038C-0.897936 39.3656 2.87274 30.6123 5.58506 21.6882C15.1668 20.5639 23.6075 19.2929 31.9346 19.2829C134.817 19.1586 237.7 19.2873 340.582 19.0712C350.484 19.0504 358.728 21.8786 366.073 28.1404C371.661 32.9035 377.615 36.9801 383.547 42.386C398.77 126.163 410.072 211.032 431.491 293.541C431.423 316.71 423.039 325.081 400.592 325.171C398.492 325.18 396.393 325.16 394.294 325.161C321.603 325.174 248.912 325.19 176.222 325.201C144.941 325.206 113.66 325.123 82.3807 325.257C75.0251 325.288 68.1349 325.05 62.4362 319.241C61.0433 317.821 57.6351 318.378 55.1537 318.026Z" fill="#A8A3A7"/>
 3 | <path d="M103.675 1.07191C157.678 1.0985 211.024 0.958722 266.558 0.990249C269.471 0.965039 272.376 1.10224 273.759 1.07198C304.545 1.11839 334.851 0.711146 365.626 1.33516C382.219 1.67169 396.154 10.3148 403.346 26.2693C396.468 32.4468 389.815 37.0288 383.547 42.386C377.961 37.6212 371.661 32.9035 366.073 28.1404C358.728 21.8786 350.484 19.0504 340.582 19.0712C237.7 19.2873 134.817 19.1586 31.9346 19.2829C23.6075 19.2929 14.6875 21.0059 5.58506 21.6882C11.2591 6.63185 25.4035 2.28426 39.589 1.4272C56.7915 0.387894 74.8957 1.08602 93.7812 1.07179C98.5512 1.07262 100.528 1.10091 103.675 1.07191Z" fill="#BEB2BD"/>
 4 | <path d="M515.829 280.943C514.796 283.698 513.116 285.976 510.432 288.96C492.127 288.58 474.647 288.974 457.176 288.064C457.143 285.846 457.427 281.109 457.335 278.243C476.681 278.147 495.607 280.091 515.829 280.943Z" fill="#A8A3A7"/>
 5 | <path fill-rule="evenodd" clip-rule="evenodd" d="M403.346 26.2693C396.46 31.8133 389.47 36.3877 383.547 42.386C398.682 125.68 408.684 211.738 431.24 293.46C430.514 316.575 423.037 325.081 400.59 325.171C399.364 325.176 398.138 325.172 396.912 325.167C396.039 325.164 395.165 325.16 394.291 325.161L317.505 325.175C270.41 325.185 223.315 325.194 176.219 325.201C167.217 325.202 158.216 325.196 149.214 325.19C126.935 325.176 104.657 325.162 82.3785 325.257C75.0229 325.288 68.1327 325.05 62.434 319.241C60.6676 317.44 57.3192 317.744 55.1537 318.026C62.7049 339.915 77.4495 351.235 99.7865 351.436C119.364 351.612 138.942 351.716 158.52 351.82C166.961 351.865 175.401 351.91 183.842 351.96C183.842 351.96 201.052 352.088 220.341 352.042C290.845 353.207 361.493 352.261 432.003 352.023C435.82 352.011 457.323 350.123 457.176 350.487C478.346 353.369 498.075 348.707 517.677 344.076C526.575 341.973 535.446 339.877 544.414 338.495C560.582 336.004 576.629 331.994 592.64 328.49C598.173 327.278 603.706 326.067 609.245 324.898C609.245 324.898 632.023 320.412 639.66 318.175C651.795 314.62 656.121 304.062 657.117 292.56C654.52 289.364 651.061 286.39 648.118 283.561C617.827 283.096 587.294 282.142 556.93 281.869C542.997 281.671 529.108 281.184 515.829 280.943C513.978 283.397 512.273 285.827 510.432 288.96C514.659 290.812 519.076 292.14 523.328 293.919C537.731 295.135 554.881 296.283 570.136 296.957C570.143 296.985 570.149 297.013 570.155 297.041L583.324 299.759C580.268 300.502 577.878 301.566 575.237 302.232C569.882 303.581 565.204 304.76 560.483 305.727C556.374 306.568 552.261 307.392 548.141 308.217C536.974 310.453 525.379 312.42 514.032 315.057C514.032 315.057 518.968 322.188 522.131 326.756C522.131 326.756 517.373 335.735 512.237 336.737C500.538 333.137 500.533 333.055 501.433 318.657C490.404 318.189 477.133 318.47 464.537 318.657C461.837 315.957 460.037 311.458 459.138 308.758C458.124 301.414 457.797 295.713 457.176 288.064C457.151 285.037 457.836 281.707 457.335 278.243C453.818 262.723 449.511 249.221 446.648 234.91C441.872 211.044 437.124 187.172 432.375 163.301C424.852 125.477 417.328 87.6528 409.697 49.85C408.066 41.7662 406.269 34.0301 403.346 26.2693Z" fill="#8B7887"/>
 6 | <path d="M583.324 299.759C577.025 303.359 567.877 304.615 560.827 306.058C545.581 309.18 530.271 311.333 514.032 315.057C509.379 316.521 505.122 317.939 501.433 318.657C489.365 318.895 477.28 318.731 464.537 318.657C461.837 315.957 460.037 312.358 459.138 308.758C480.584 303.391 502.186 298.639 523.328 293.919C541.327 294.665 566.589 291.391 583.324 299.759Z" fill="#BEB2BD"/>
 7 | <path d="M523.328 293.919C502.341 299.297 480.968 303.896 459.138 308.758C457.82 302.061 457.797 295.713 457.176 288.064C474.655 288.165 492.118 287.902 510.432 288.96C515.363 290.226 519.085 292.144 523.328 293.919Z" fill="#542F3F"/>
 8 | <path d="M501.433 318.657C501.433 318.657 509.637 315.888 514.032 315.057C522.131 315.057 522.129 323.707 522.131 326.756C522.136 335.837 515.042 336.19 512.237 336.737C500.538 337.637 496.74 325.947 501.433 318.657Z" fill="#542F3F"/>
 9 | <path d="M265.323 169.3C265.037 168.535 264.525 167.877 263.855 167.411C263.184 166.946 262.388 166.696 261.572 166.696H213.277L241.134 112.534C241.449 111.924 241.602 111.243 241.578 110.557C241.554 109.871 241.353 109.202 240.995 108.616C240.638 108.03 240.135 107.546 239.536 107.211C238.936 106.876 238.261 106.7 237.574 106.702C236.615 106.703 235.688 107.048 234.962 107.674L146.972 183.666C146.354 184.199 145.914 184.909 145.711 185.699C145.508 186.489 145.552 187.323 145.836 188.087C146.12 188.852 146.632 189.512 147.301 189.977C147.971 190.443 148.768 190.693 149.583 190.693H197.878L170.021 244.856C169.706 245.466 169.553 246.147 169.577 246.833C169.601 247.519 169.802 248.187 170.16 248.773C170.517 249.359 171.02 249.844 171.619 250.179C172.219 250.514 172.894 250.689 173.581 250.687C174.54 250.687 175.467 250.342 176.193 249.715L264.183 173.723C264.802 173.19 265.243 172.481 265.447 171.69C265.651 170.899 265.608 170.065 265.323 169.3Z" fill="#FFBD5D"/>
10 | </svg>
11 | 


--------------------------------------------------------------------------------
/docs/assets/swebench_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SWE-bench/SWE-smith/cfb6cf29568d0841ce15620c96ec795243b229fa/docs/assets/swebench_logo.png


--------------------------------------------------------------------------------
/docs/assets/swesmith_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SWE-bench/SWE-smith/cfb6cf29568d0841ce15620c96ec795243b229fa/docs/assets/swesmith_logo.png


--------------------------------------------------------------------------------
/docs/css/TiltNeon.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SWE-bench/SWE-smith/cfb6cf29568d0841ce15620c96ec795243b229fa/docs/css/TiltNeon.ttf


--------------------------------------------------------------------------------
/docs/css/bubbles.css:
--------------------------------------------------------------------------------
 1 | /* Floating bubbles styles */
 2 | .floating-bubbles {
 3 |     position: fixed;
 4 |     bottom: 20px;
 5 |     right: 20px;
 6 |     display: flex;
 7 |     flex-direction: column;
 8 |     gap: 10px;
 9 |     z-index: 1000;
10 |   }
11 |   
12 |   .floating-bubbles-title {
13 |     position: absolute;
14 |     top: -30px;
15 |     right: 0;
16 |     font-size: 12px;
17 |     color: #777;
18 |     text-align: right;
19 |     font-weight: bold;
20 |     opacity: 0;
21 |     visibility: hidden;
22 |     transition: opacity 0.3s ease, visibility 0.3s ease;
23 |     white-space: nowrap;
24 |   }
25 |   
26 |   .floating-bubbles:hover .floating-bubbles-title {
27 |     opacity: 1;
28 |     visibility: visible;
29 |   }
30 |   
31 |   .bubble {
32 |     width: 40px;
33 |     height: 40px;
34 |     display: flex;
35 |     justify-content: center;
36 |     align-items: center;
37 |     position: relative;
38 |     transition: transform 0.3s ease;
39 |   }
40 |   
41 |   .bubble:hover {
42 |     transform: scale(1.1);
43 |   }
44 |   
45 |   .bubble img {
46 |     width: 40px;
47 |     height: 40px;
48 |   }
49 |   
50 |   .bubble-tooltip {
51 |     position: absolute;
52 |     right: 60px;
53 |     background-color: #333;
54 |     color: white;
55 |     padding: 5px 10px;
56 |     border-radius: 4px;
57 |     font-size: 14px;
58 |     white-space: nowrap;
59 |     opacity: 0;
60 |     visibility: hidden;
61 |     transition: opacity 0.3s ease, visibility 0.3s ease;
62 |   }
63 |   
64 |   .bubble:hover .bubble-tooltip {
65 |     opacity: 1;
66 |     visibility: visible;
67 |   }
68 |   
69 |   .floating-bubbles:hover .bubble-tooltip {
70 |     opacity: 1;
71 |     visibility: visible;
72 |   }
73 |   
74 |   /* Hide on mobile */
75 |   @media (max-width: 768px) {
76 |     .floating-bubbles {
77 |       display: none;
78 |     }
79 |   }


--------------------------------------------------------------------------------
/docs/css/carousel.css:
--------------------------------------------------------------------------------
 1 |   .carousel {
 2 |     position: relative;
 3 |     max-width: 100%;
 4 |     overflow: hidden;
 5 |     border-radius: 0.5rem;
 6 |     box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
 7 |     background: #fff;
 8 |     margin-bottom: 1rem;
 9 |   }
10 | 
11 |   .slides {
12 |     display: flex;
13 |     transition: transform 0.5s ease-in-out;
14 |   }
15 | 
16 |   .slide {
17 |     min-width: 100%;
18 |     position: relative;
19 |   }
20 | 
21 |   .slide img {
22 |     width: 100%;
23 |     display: block;
24 |   }
25 | 
26 |   .caption {
27 |     position: absolute;
28 |     bottom: 1rem;
29 |     left: 1rem;
30 |     background: rgba(0, 0, 0, 0.6);
31 |     color: #fff;
32 |     padding: 0.5rem 1rem;
33 |     border-radius: 0.25rem;
34 |   }
35 | 
36 |   .nav-buttons {
37 |     position: absolute;
38 |     top: 50%;
39 |     width: 100%;
40 |     display: flex;
41 |     justify-content: space-between;
42 |     transform: translateY(-50%);
43 |   }
44 | 
45 |   .nav-buttons button {
46 |     background: rgba(0, 0, 0, 0.4);
47 |     color: #fff;
48 |     border: none;
49 |     width:  2em;
50 |     height: 2em;
51 |     font-size: 1.2rem;
52 |     cursor: pointer;
53 |     border-radius: 50%;
54 |     transition: background 0.2s ease;
55 |     display: flex;
56 |     align-items: center;
57 |     justify-content: center;
58 |   }
59 | 
60 |   .nav-buttons button:hover {
61 |     background: rgba(0, 0, 0, 0.7);
62 |   }


--------------------------------------------------------------------------------
/docs/css/custom.css:
--------------------------------------------------------------------------------
 1 | [data-md-color-scheme="default"] {
 2 |   --md-default-bg-color: #fff7ec;
 3 |   --md-primary-fg-color: #D49017;
 4 |   --md-typeset-a-color: #006caa;
 5 |   --md-code-bg-color: #e7e7e7;
 6 | }
 7 | 
 8 | [data-md-color-scheme="slate"] {
 9 |   --md-primary-fg-color: #D49017;
10 |   --md-default-fg-color: #fff7ec;
11 |   --md-default-bg-color: #111111;
12 | }
13 | 
14 | .theme-img--light,
15 | .theme-img--dark {
16 |   display: none;
17 | }
18 | 
19 | body[data-md-color-scheme="default"] .theme-img--light { display: block; }
20 | body[data-md-color-scheme="slate"] .theme-img--dark { display: block; }
21 | 
22 | .clickable-banner {
23 |   color: #000000;
24 | }
25 | 
26 | .md-main__inner.md-grid,
27 | .md-grid {
28 |   max-width: 64rem;
29 | }
30 | 
31 | @media screen and (min-width: 1220px) {
32 |   .md-main__inner.md-grid,
33 |   .md-grid {
34 |     max-width: 64rem;
35 |   }
36 | }
37 | 
38 | .md-typeset h1,
39 | .md-typeset h2,
40 | .md-typeset h3 {
41 |   font-weight: 400;
42 |   color: var(
43 |     --md-primary-fg-color-dark
44 |   ); /* this actually works for both light and dark themes */
45 | }
46 | 
47 | @font-face {
48 |   font-family: "TiltNeon";
49 |   src: url("/assets/TiltNeon.ttf") format('truetype');
50 | }
51 | 
52 | :root {
53 |   --md-text-font: "Agbalumo"; 
54 | }


--------------------------------------------------------------------------------
/docs/css/home.css:
--------------------------------------------------------------------------------
  1 | :root {
  2 |   --bg-color: #1e1e1e;
  3 |   --nav-color: #2a2a2a;
  4 |   --off-white: #e0e0e0;
  5 | }
  6 | 
  7 | /* Base styling */
  8 | * {
  9 |     box-sizing: border-box;
 10 |     margin: 0;
 11 |     padding: 0;
 12 |     font-size: 0.9rem;
 13 |   }
 14 |   
 15 |   body {
 16 |     font-family: 'IBM Plex Mono', monospace;
 17 |     background-color: var(--bg-color);
 18 |     color: var(--off-white);
 19 |     display: flex;
 20 |     flex-direction: column;
 21 |     align-items: center;
 22 |     min-height: 100vh;
 23 |   }
 24 | 
 25 |   main {
 26 |     max-width: 520px;
 27 |     width: 100%;
 28 |   }
 29 | 
 30 |   a {
 31 |     color: var(--off-white);
 32 |     transition: color 0.3s ease;
 33 |   }
 34 | 
 35 |   a:hover {
 36 |     color: #ffffff;
 37 |   }
 38 |   
 39 |   /* Navigation bar */
 40 |   nav {
 41 |     width: 100%;
 42 |     background-color: var(--nav-color);
 43 |     position: sticky;
 44 |     top: 0;
 45 |     z-index: 1000;
 46 |   }
 47 |   
 48 |   .nav-bar {
 49 |     list-style: none;
 50 |     display: flex;
 51 |     justify-content: center;
 52 |     padding: 1rem 0;
 53 |   }
 54 |   
 55 |   .nav-bar li {
 56 |     margin: 0 2rem;
 57 |   }
 58 |   
 59 |   .nav-bar a {
 60 |     text-decoration: none;
 61 |     color: #bbbbbb;
 62 |     font-weight: 500;
 63 |     font-size: 0.95rem;
 64 |     text-transform: uppercase;
 65 |     letter-spacing: 1px;
 66 |     transition: color 0.3s ease;
 67 |   }
 68 |   
 69 |   .nav-bar a:hover {
 70 |     color: #ffffff;
 71 |   }
 72 |   
 73 |   /* Title */
 74 |   .title {
 75 |     margin: 3rem 0 0.5rem;
 76 |     font-size: 2.5rem;
 77 |     font-weight: 600;
 78 |     letter-spacing: 0.5px;
 79 |     text-align: center;
 80 |   }
 81 | 
 82 |   .subtitle {
 83 |     margin: 0.5em 0 2rem;
 84 |     font-size: 1.2rem;
 85 |     font-weight: 400;
 86 |     text-align: center;
 87 |     color: #ccc;
 88 |   }
 89 |   
 90 |   /* YouTube Embed */
 91 |   .video-container {
 92 |     aspect-ratio: 16 / 9;
 93 |     border-radius: 1em;
 94 |     overflow: hidden;
 95 |     box-shadow: 0 6px 20px rgba(0, 0, 0, 0.4);
 96 |     display: flex;
 97 |     justify-content: center;
 98 |   }
 99 |   
100 |   .video-container iframe {
101 |     width: 100%;
102 |     height: 100%;
103 |     border: none;
104 |     display: block;
105 |   }
106 | 
107 |   section {
108 |     margin: 1.5rem auto;
109 |   }
110 | 
111 |   .ecosystem p {
112 |     margin-bottom: 1rem;
113 |   }
114 | 
115 |   .summary p {
116 |     margin-bottom: 1rem;
117 |   }
118 | 
119 |   .collab div {
120 |     background: none;
121 |     padding: 0.25rem 0;
122 |     border-radius: 0;
123 |     box-shadow: none;
124 |     line-height: 1.6;
125 |   }
126 | 
127 |   .cite-block {
128 |     background-color: #272525;
129 |     border-radius: 1em;
130 |     color: white;
131 |     font-family: monospace;
132 |     font-size: 0.7rem;
133 |     overflow-x: auto;
134 |     overflow-y: hidden;
135 |     padding: 1em;
136 |     white-space: pre;
137 |   }
138 | 
139 |   .fire-text {
140 |     background: linear-gradient(90deg, #ff2d00, #ff6f00, #ffc300, #ff6f00, #ff2d00);
141 |     background-size: 300% 100%;
142 |     background-clip: text;
143 |     -webkit-background-clip: text;
144 |     color: transparent;
145 |     -webkit-text-fill-color: transparent;
146 |     animation: fireGradientShift 5s ease-in-out infinite;
147 |   }
148 |   
149 |   @keyframes fireGradientShift {
150 |     0% {
151 |       background-position: 0% 50%;
152 |     }
153 |     50% {
154 |       background-position: 100% 50%;
155 |     }
156 |     100% {
157 |       background-position: 0% 50%;
158 |     }
159 |   }
160 | 
161 |   .fire-logo {
162 |     animation: fireHueShift 5s ease-in-out infinite;
163 |     transform-origin: center;
164 |   }
165 | 
166 |   .blog p {
167 |     margin-bottom: 1rem;
168 |   }
169 | 
170 |   .blog h3 {
171 |     margin-top: 2.5rem;
172 |     margin-bottom: 1rem;
173 |   }
174 | 
175 |   .blog ol {
176 |     li {
177 |       margin: 1rem 0;
178 |     }
179 |   }
180 | 
181 |   blockquote {
182 |     border-left: 0.25em solid #b77917; /* Indigo-500 */
183 |     background: #000;
184 |     padding: 1rem 1.5rem;
185 |     margin: 1.5rem 0;
186 |     box-shadow: 0 1px 3px rgba(0, 0, 0, 0.05);
187 |     border-radius: 0.5rem;
188 |     position: relative;
189 |   }


--------------------------------------------------------------------------------
/docs/css/mkdocstrings.css:
--------------------------------------------------------------------------------
 1 | /* From https://mkdocstrings.github.io/python/usage/customization/#symbol-types */
 2 | [data-md-color-scheme="default"] {
 3 |   --doc-symbol-parameter-fg-color: #df50af;
 4 |   --doc-symbol-attribute-fg-color: #0079ff;
 5 |   --doc-symbol-function-fg-color: #00dfa2;
 6 |   --doc-symbol-method-fg-color: #00dfa2;
 7 |   --doc-symbol-class-fg-color: #d1b619;
 8 |   --doc-symbol-module-fg-color: #ff0060;
 9 | 
10 |   --doc-symbol-parameter-bg-color: #df50af1a;
11 |   --doc-symbol-attribute-bg-color: #0079ff1a;
12 |   --doc-symbol-function-bg-color: #00dfa21a;
13 |   --doc-symbol-method-bg-color: #00dfa21a;
14 |   --doc-symbol-class-bg-color: #d1b6191a;
15 |   --doc-symbol-module-bg-color: #ff00601a;
16 | }
17 | 


--------------------------------------------------------------------------------
/docs/docs/index.md:
--------------------------------------------------------------------------------
1 | <!-- Redirect to getting_started -->
2 | <meta http-equiv="refresh" content="0; url=/getting_started/" />


--------------------------------------------------------------------------------
/docs/getting_started/assets.md:
--------------------------------------------------------------------------------
 1 | # Assets
 2 | 
 3 | In addition to the paper and codebase, we release the following assets created with SWE-smith:
 4 | 
 5 | 1. **Environments for 128 GitHub repositories.** You can download the environments (Docker images) locally by running the following command from the root directory of SWE-smith:
 6 | ```bash
 7 | python swesmith/build_repo/download_images.py
 8 | ```
 9 | 
10 | 2. **SWE-smith dataset of 50k+ task instances**, made available as a [HuggingFace dataset](https://huggingface.co/datasets/SWE-bench/SWE-smith).
11 | 
12 | 3. **5k expert trajectories** + **SWE-agent-LM-32B**.
13 | To create `SWE-agent-LM-32B`, we fine-tuned [Qwen 2.5 Coder Instruct 32B]() on the 5k trajectories.
14 | `SWE-agent-LM-32B` achieves 40.2% pass@1 on SWE-bench Verified.
15 | The trajectories are uploaded to a [HuggingFace dataset](https://huggingface.co/datasets/SWE-bench/SWE-smith-trajectories).
16 | We also release the [32B](https://huggingface.co/SWE-bench/SWE-agent-LM-32B) and [7B](https://huggingface.co/SWE-bench/SWE-agent-LM-7B) versions of the model.
17 | 
18 | 4. **SWE-Rater-32B**, a Qwen 2.5 Coder Instruct 32B model fine-tuned on human annotated ratings of a SWE-bench task instance's difficulty.
19 | We release it as a [HuggingFace model](https://huggingface.co/SWE-bench/SWE-Rater-32B).


--------------------------------------------------------------------------------
/docs/getting_started/index.md:
--------------------------------------------------------------------------------
 1 | # SWE-smith
 2 | 
 3 | <div style="text-align:center">
 4 |   <img src="../assets/banner.png" alt="SWE-smith" style="max-height: 10em"/>
 5 | </div>
 6 | 
 7 | SWE-smith is toolkit for training Software Engineering (SWE) agents. With SWE-smith, you can:
 8 | 
 9 | <div class="theme-img-wrapper">
10 |   <img src="/assets/overview-light.png" class="theme-img theme-img--light" alt="Light Mode Image" style="width:100%">
11 |   <img src="/assets/overview.png" class="theme-img theme-img--dark" alt="Dark Mode Image" style="width:100%">
12 | </div>
13 | 
14 | Check out the [installation](installation.md) guide to get started, then head over to the [tutorials](../guides/index.md) to learn
15 | about how to use SWE-smith.
16 | 
17 | If you use SWE-smith in your work, we'd greatly appreciate a citation:
18 | 
19 | ```bibtex
20 | @misc{yang2025swesmith,
21 |   title={SWE-smith: Scaling Data for Software Engineering Agents}, 
22 |   author={John Yang and Kilian Leret and Carlos E. Jimenez and Alexander Wettig and Kabir Khandpur and Yanzhe Zhang and Binyuan Hui and Ofir Press and Ludwig Schmidt and Diyi Yang},
23 |   year={2025},
24 |   eprint={2504.21798},
25 |   archivePrefix={arXiv},
26 |   primaryClass={cs.SE},
27 |   url={https://arxiv.org/abs/2504.21798},
28 | }
29 | ```
30 | 


--------------------------------------------------------------------------------
/docs/getting_started/installation.md:
--------------------------------------------------------------------------------
 1 | # Installation
 2 | 
 3 | For the latest stable release
 4 | 
 5 | ```bash
 6 | pip install swesmith
 7 | ```
 8 | 
 9 | For the latest development version
10 | 
11 | ```bash
12 | git clone https://github.com/SWE-bench/SWE-smith
13 | cd SWE-smith
14 | conda create -n swesmith python=3.10; conda activate swesmith
15 | pip install -e .
16 | ```
17 | 
18 | If you plan to contribute to SWE-smith, please also perform:
19 | 
20 | ```bash
21 | pre-commit install
22 | ```


--------------------------------------------------------------------------------
/docs/getting_started/quickstart.md:
--------------------------------------------------------------------------------
 1 | We recommend checking out the [tutorials](../guides/index.md) for comprehensive guidance on SWE-smith usage.
 2 | 
 3 | However, if you learn more easily by playing with the code, here's sequences of scripts corresponding to different SWE-smith workflows.
 4 | If you run into issues, please consult the [tutorials](../guides/index.md) first, then open an [issue](https://github.com/SWE-bench/SWE-smith/issues/new/choose) if you can't find a solution.
 5 | 
 6 | ### Creating Task Instances
 7 | ```bash
 8 | # Run LM rewrite strategy to produce bugs
 9 | python -m swesmith.bug_gen.llm.modify pandas-dev__pandas.95280573 \
10 |     --config_file configs/bug_gen/lm_modify.yml \
11 |     --model claude-3-7-sonnet-20250219 \
12 |     --n_bugs 1 \
13 |     --n_workers=20
14 | 
15 | # Collect all task instances into a single file for validation
16 | python -m swesmith.bug_gen.collect_patches logs/bug_gen/pandas-dev__pandas.95280573/
17 | 
18 | # Run validation on the collected task instances
19 | python -m swesmith.harness.valid logs/bug_gen/pandas-dev__pandas.95280573_all_patches.json \
20 |   --run_id pandas_test \
21 |   --max_workers=8
22 | 
23 | # Gather valid task instances
24 | python -m swesmith.harness.gather logs/run_validation/pandas_test
25 | 
26 | # Generate issues for the valid task instances
27 | python -m swesmith.issue_gen.generate \
28 |     --dataset_path logs/run_validation/basic/pandas_test.json \
29 |   --model claude-3-7-sonnet-20250219 \
30 |   --n_workers=1 \
31 |   --config_file configs/issue_gen/ig_v2.yaml \
32 |   --experiment_id ig_v2
33 | ```
34 | 
35 | !!! tip "Next steps"
36 | 
37 |     We provide [detailed tutorials](../guides/index.md) on each of these steps.


--------------------------------------------------------------------------------
/docs/guides/difficulty_rating.md:
--------------------------------------------------------------------------------
 1 | To see how SWE-smith compares against real world tasks (e.g. SWE-bench), we LoRA Fine-Tuned a [Qwen 2.5 32B Coder Instruct](https://github.com/QwenLM/Qwen2.5-Coder) model on 1.5k human ratings of the difficulty of real world bugs.
 2 | 
 3 | Given the issue text and patch associated with a task instance, the model will rate the task as "easy" (< 15 min), "medium" (15 min - 1 hour), or "hard" (1+ hours).
 4 | 
 5 | ## Inference
 6 | 
 7 | You can rate the difficulty of your own task instances by following these steps:
 8 | 
 9 | 1. Download the [HuggingFace checkpoint]().
10 | 
11 | 2. Use `sglang` to serve the checkpoint. The training scripts available in the SWE-smith repository use [Modal](https://modal.com/) as a compute service for hosting inference.
12 | 
13 | ```bash
14 | N_HOURS=4 N_GPUS=4 modal run --detach swesmith/train/serve_sglang.py \
15 |     --model-path /path/to/checkpoint \
16 |     --served-model-name gpt-4o \
17 |     --tokenizer-path /path/to/Qwen2.5-Coder-32B-Instruct
18 | ```
19 | 
20 | 3. Run the following script:
21 | 
22 | ```bash
23 | python swesmith/train/difficulty_rater/get_difficulties.py \
24 |     --base_url <URL where model is hosted> \
25 |     --dataset_path path/to/dataset.json
26 | ```
27 | 
28 | The script will generate a `.json` file containing a mapping from each task instance to a difficulty score.
29 | You can then compute the dataset's difficulty score as the average of all task instance scores.
30 | 
31 | ## Prior Datasets
32 | 
33 | Using our model, we've assessed the difficulty of existing datasets, assigning scores of 1/5/9 to easy/medium/hard tasks.
34 | 
35 | | Dataset                | # Instances | Score  | `easy` | `med` | `hard` |
36 | |------------------------|-------------|--------|--------|-------|--------|
37 | | SWE-bench              | 2294        | 5.014  | 438    | 1408  | 446    |
38 | | └── Lite               | 300         | 3.893  | 93     | 197   | 10     |
39 | | └── Verified           | 500         | 3.960  | 173    | 284   | 43     |
40 | | SWE-bench Multimodal   | 510         | 6.036  | 55     | 265   | 186    |
41 | | SWE-gym                | 2438        | 5.625  | 288    | 1456  | 664    |
42 | | └── Lite               | 230         | 3.890  | 67     | 156   | 4      |
43 | | SWE-smith (LM Modify)  | 1000        | 3.304  | 441    | 542   | 17     |
44 | | SWE-smith (LM Rewrite) | 1000        | 5.272  | 68     | 796   | 136    |
45 | | SWE-smith (Procedural) | 1000        | 3.596  | 374    | 603   | 23     |
46 | | SWE-smith (PR Mirror)  | 1000        | 4.876  | 206    | 619   | 175    |
47 | | SWE-smith (Combine)    | 1000        | 5.720  | 52     | 716   | 232    |
48 | 
49 | From the table, we demonstrate that SWE-smith task instances are comparable to real world tasks, and that our bug generation techniques allow for a wide range of task difficulties.


--------------------------------------------------------------------------------
/docs/guides/env_construction.md:
--------------------------------------------------------------------------------
 1 | SWE-smith enables automatic construction of execution environments for repositories.
 2 | We'll review the two steps of this process:
 3 | 
 4 | 1. SWE-agent + LM attempts to install a repository + run the testing suite.
 5 | 2. Construct an execution environment (Docker image).
 6 | 
 7 | For this section, we'll use the [Instagram/MonkeyType](https://github.com/Instagram/MonkeyType/) repository as a running example, 
 8 | specifically at commit [`70c3acf`](https://github.com/Instagram/MonkeyType/tree/70c3acf62950be5dfb28743c7a719bfdecebcd84).
 9 | 
10 | ## Automatically Install Repos with SWE-agent
11 | 
12 | Coming soon!
13 | 
14 | ## Create an Execution Environment
15 | First, create the conda environment for the target repository.
16 | ```bash
17 | python -m swesmith.build_repo.try_install Instagram/MonkeyType install_repo.sh \
18 |     --commit 70c3acf62950be5dfb28743c7a719bfdecebcd84
19 | ```
20 | where `install_repo.sh` is the script that installs the repository.
21 | ([Example](https://github.com/SWE-bench/SWE-smith/blob/main/configs/install_repo.sh))
22 | 
23 | If successful, two artifacts will be produced under `logs/build_repo/records/`:
24 | * `sweenv_[repo + commit].yml`: A dump of the conda environment that was created.
25 | * `sweenv_[repo + commit].sh`: A log of the installation process.
26 | 
27 | Next, run the following command to create a Docker image for the repository.
28 | 
29 | ```bash
30 | python -m swesmith.build_repo.create_images --repos Instagram/MonkeyType
31 | ```
32 | 
33 | This command will create two artifacts:
34 | 1. A mirror of the original repository at the specified commit, created under [`swesmith`](https://github.com/orgs/swesmith/repositories). To change the organization, you can...
35 |     * Pass in an `--org` argument, or
36 |     * (If built from source) Change `ORG_NAME` in `swesmith/constants.py`
37 | 2. A Docker image (`swesmith.x86_64.<repo>.<commit>`) which contains the installed codebase.
38 | 
39 | It's good practice to check that your Docker image works as expected.
40 | ```bash
41 | docker run -it --rm swesmith.x86_64.instagram__monkeytype.70c3acf6
42 | ```
43 | Within the container, run the testing suite (e.g. `pytest`) to ensure that the codebase is functioning as expected.
44 | 
45 | !!! note "Get existing Docker images"
46 | 
47 |     All repositories represented in the SWE-smith [dataset](https://huggingface.co/datasets/SWE-bench/SWE-smith) are available to download. Simply run:
48 |     ```bash
49 |     python -m swesmith.build_repo.download_images
50 |     ```
51 | 


--------------------------------------------------------------------------------
/docs/guides/harnesses.md:
--------------------------------------------------------------------------------
 1 | # Validation & Evaluation
 2 | 
 3 | Great! You now have an execution environment + a bunch of candidate task instances. How do we determine which ones can be used for training?
 4 | 
 5 | We provide two harnesses for the purposes of:
 6 | 
 7 | * Validation: To check if a candidate task instance is usable (breaks 1+ existing tests).
 8 | * Evaluation: To check if the proposed solution for a task instance is correct.
 9 | 
10 | The purposes of these harnesses are identical to their motivations in [SWE-bench](https://swe-bench.github.io).
11 | 
12 | ## Validation
13 | The validation harness is used to check if a candidate task instance is usable (breaks 1+ existing tests).
14 | 
15 | Once you've generated task instance candidates, follow these steps to validate them:
16 | 
17 | 1. Collect the candidates
18 | 
19 | ```bash
20 | python -m swesmith.bug_gen.collect_patches logs/bug_gen/<repo>
21 | ```
22 | 
23 | This produces a `logs/bug_gen/<repo>_all_patches.json` file with all the candidate task instances.
24 | 
25 | 2. Run validation
26 | 
27 | ```bash
28 | python -m swesmith.harness.valid \
29 |     logs/bug_gen/<repo>_all_patches.json \
30 |     --run_id <run_id>
31 | ```
32 | 
33 | The validation harness works in two steps.
34 | First, it runs the original repository's test suite to get the passing statuses of the existing tests.
35 | Then, it applies each candidate task instance to the repository and runs the test suite again.
36 | If the candidate task instance breaks 1+ existing tests, it is considered a usable task instance.
37 | 
38 | For each task instance, the validation harness produces a `logs/run_validation/<run_id>/<instance_id>` folder containing the following information:
39 | 
40 | * `eval.sh`: The sequence of test command(s) run
41 | * `patch.diff`: The candidate task instance
42 | * `report.json`: `FAIL_TO_PASS` and `PASS_TO_PASS` test cases
43 | * `run_instance.log`: The full trace of running validation
44 | * `test_output.txt`: The standard output of the test command(s)
45 | 
46 | 3. Collect validated task instances
47 | 
48 | ```bash
49 | python -m swesmith.harness.gather logs/run_validation/<run_id>
50 | ```
51 | 
52 | Task instances with 1+ `FAIL_TO_PASS` test cases and 1+ `PASS_TO_PASS` test cases are considered valid.
53 | 
54 | This script performs two actions:
55 | 
56 | * It collects all valid task instances into a `logs/task_insts/<run_id>.json`. Each instance contains the following information:
57 | ```json
58 | {
59 |     "instance_id": <instance_id>,
60 |     "repo": <repo>,
61 |     "patch": <The diff that, when applied, creates the bug>,
62 |     "FAIL_TO_PASS": <List of broken test cases>,
63 |     "PASS_TO_PASS": <List of passing test cases>,
64 |     "created_at": <timestamp>,
65 |     "image_name": <docker image name>,
66 |     "base_commit": <base commit hash>,
67 | }
68 | ```
69 | * For each valid task instance, a branch called `<instance_id>` is created in the repository. The branch corresponds to the repository with the task instance's bug patch applied.
70 | 
71 | ## Evaluation
72 | 
73 | The evaluation harness is used to check if the proposed solution for a task instance is correct.
74 | 
75 | You can run this script to sanity check that testing for validated task instances works as expected:
76 | 
77 | ```bash
78 | python -m swesmith.harness.eval \
79 |     --dataset_path bugs/task_insts/{repo}.json \
80 |     --predictions_path gold \
81 |     --run_id sanity
82 | ```
83 | 
84 | If you want to run on real predictions, simply replace `gold` with the path to your predictions, which should look like:
85 | 
86 | ```json
87 | {
88 |     "instance_id": <instance_id>,
89 |     "patch": <The diff that, when applied, fixes the bug>,
90 |     "model_name_or_path": <The model used to generate the patch>,
91 | }
92 | ```
93 | 


--------------------------------------------------------------------------------
/docs/guides/index.md:
--------------------------------------------------------------------------------
1 | # Tutorials


--------------------------------------------------------------------------------
/docs/guides/issue_gen.md:
--------------------------------------------------------------------------------
 1 | You have a bunch of task instances with executable environments.
 2 | You're very close to training SWE-agents on this data.
 3 | There's one last step - let's generate issue text.
 4 | 
 5 | We primarily use LM's to generate issue text.
 6 | 
 7 | ```bash
 8 | python swesmith/issue_gen/generate.py logs/task_insts/<repo>.json \
 9 |     --config_file configs/issue_gen/ig_v2.yaml \
10 |     --model anthropic/claude-3-7-sonnet-20250219 \
11 |     --n_workers 4 \
12 |     --experiment_id ig_v2 \
13 |     --use_existing
14 | ```
15 | 
16 | This will generated issue text for each task instance, producing several artifacts along the way:
17 | 
18 | * Under `logs/issue_gen/ig_v2/<repo>`, there will be a folder for each task instance, containing:
19 |     * `messages.json`: The messages fed to the LM to generate the issue text.
20 |     * `metadata.json`: Conatins the issue text + inference cost.
21 | * In the same directory as `logs/task_insts/<repo>.json`, a `logs/issue_gen/<repo>__ig_v2_n1.json` file will be created, which is a copy of the original file with issue text added to each task instance (as the `problem_statement` field).
22 | 
23 | ## Alternatives
24 | 
25 | In our paper, we discuss several alternatives for generating issue text.
26 | While our experiments suggest that LM generated issue text is the best proxy for real issue text, we provide instructions for the alternatives below.
27 | 
28 | **Static Issue Text**
29 | 
30 | The problem statement is generated by randomly selecting one of 7 static issue text templates.
31 | 
32 | ```bash
33 | python swesmith/issue_gen/get_static.py logs/task_insts/<repo>.json
34 | ```
35 | 
36 | Produces a `logs/issue_gen/<repo>__ig_static.json` file.
37 | 
38 | **Random F2P Test Case**
39 | 
40 | The problem statement shows a randomly selected Fail-to-Pass test case from the task instance.
41 | 
42 | ```bash
43 | python swesmith/issue_gen/get_from_tests.py logs/task_insts/<repo>.json
44 | ```
45 | 
46 | **Original Issue Text**
47 | 
48 | !!! note
49 |     This strategy only works for some PR Mirrors, if the pull request the mirror is based on has issue(s) associated with it.
50 | 
51 | ```bash
52 | python swesmith/issue_gen/get_from_pr.py logs/task_insts/<repo>.json
53 | ```
54 | 
55 | Produces a `logs/issue_gen/<repo>__ig_orig.json` file.
56 | 


--------------------------------------------------------------------------------
/docs/guides/train_swe_agent.md:
--------------------------------------------------------------------------------
  1 | # Training SWE-agents
  2 | 
  3 | Now the fun part - we provide details on how to operationalize SWE-smith for training SWE-agents!
  4 | 
  5 | Specifically, we'll cover the workflow for Rejection Sampling Fine Tuning.
  6 | 
  7 | !!! note "SWE-agent"
  8 | 
  9 |     The documentation in this section is heavily grounded in the [SWE-agent](https://github.com/SWE-agent/SWE-agent) library.
 10 |     We do *not* plan to explicitly support non SWE-agent scaffolds, but it should not be difficult - the main adaptations would just be how you generate expert trajectories and predictions for evaluation.
 11 | 
 12 | There's several steps we'll cover:
 13 | 
 14 | 1. Creating a subset of SWE-smith task instances.
 15 | 2. Generating expert trajectories for those task instances.
 16 | 3. Training a model on the expert trajectories.
 17 | 4. Evaluating the model on SWE-bench (Lite/Verified/Multimodal).
 18 | 
 19 | ## Creating SWE-smith Subset
 20 | 
 21 | If you are using SWE-smith, the dataset of all [SWE-smith](https://huggingface.co/datasets/SWE-bench/SWE-smith) is quite large.
 22 | Usually, we recommend training on a subset.
 23 | To curate a subset, you might use the following logic.
 24 | 
 25 | ```python
 26 | import json
 27 | 
 28 | from datasets import load_dataset
 29 | swesmith = load_dataset("SWE-bench/SWE-smith", split="train")
 30 | 
 31 | subset_name = "subset0"
 32 | def criteria(task_instance):
 33 |     return ".pr_" in task_instance["instance_id"] and \
 34 |         len(task_instance["FAIL_TO_PASS"]) <= 5 and \
 35 |         len(task_instance["FAIL_TO_PASS"]) >= 2
 36 | bugs = [x for x in swesmith if criteria(x)]
 37 | print(f"Found {len(bugs)} bugs that match criteria")
 38 | with open(f"logs/experiments/{subset_name}.json", "w") as f:
 39 |     json.dump(bugs, fp=f, indent=2)
 40 | ```
 41 | 
 42 | ## Generate Expert Trajectories
 43 | 
 44 | 1. Clone [SWE-agent](https://github.com/SWE-agent/SWE-agent). Make sure to follow the installation instructions [here](https://swe-agent.com/latest/installation/source/).
 45 | 
 46 | 2. Create a soft link of the `agent/` folder to SWE-agent, meaning in SWE-agent, run:
 47 | ```bash
 48 | ln -s path/to/SWE-smith/agent/ .
 49 | ```
 50 | 
 51 | 3. In SWE-agent, run exeprt trajectory generation:
 52 | ```bash
 53 | ./agent/_gen_trajs.sh
 54 | ```
 55 | Check the file to see how the script works. You'll need to adjust the `--instances.path` argument to point to the subset you created in the previous step.
 56 | 
 57 | ## Train Model
 58 | 
 59 | The previous step will generate individual trajectories per task instance under the `SWE-agent/trajectories/<username>/<run ID>/` folder.
 60 | 
 61 | We'll now determine which trajectories correspond to resolved instances, convert them to a format that can be used for SFT, and then train a model with them.
 62 | 
 63 | 1. (From SWE-smith) Run evaluation on training task instances.
 64 | ```bash
 65 | python -m swesmith.harness.eval \
 66 |     --dataset_path path/to/subset0.json \
 67 |     --predictions_path path/to/trajectories/<username>/<run ID>/preds.json \
 68 |     --run_id <run ID> \
 69 |     --max_workers 10 \
 70 |     --timeout 240
 71 | ```
 72 | 
 73 | !!! tip "`preds.json`"
 74 |     If there is no `preds.json`, run `sweagent merge-preds trajectories/<username>/<run ID>/`.
 75 | 
 76 | This evaluation will generate a `logs/run_evaluation/<run ID>/`
 77 | folder with a `report.json` file indicating which instance IDs were successfully resolved.
 78 | 
 79 | 2. (From SWE-smith) Convert trajectories into SFT format.
 80 | 
 81 | ```bash
 82 | python -m swesmith.train.traj_mgr.transform_to_ft \
 83 |     --traj_dir path/to/trajectories/<username>/<run ID>/ \
 84 |     --eval_dir logs/run_evaluation/<run ID>/ \
 85 |     --only_resolved
 86 | ```
 87 | 
 88 | This will product an `ft_xml_*.jsonl` file under the `trajectories_sft/` folder.
 89 | This dataset can be used directly for SFT.
 90 | 
 91 | 3. Run training. First, upload the file to Modal
 92 | ```bash
 93 | modal volume put <volume> trajectories_sft/ft_xml_*.jsonl
 94 | ```
 95 | 
 96 | Then, modify `config/train/full_ft_qwen_7b.yml` to point to the file in Modal.
 97 | 
 98 | Finally, run the training script:
 99 | ```bash
100 | ./scripts/train.run_ft_torchtune.py
101 | ```
102 | 
103 | ## Evaluation
104 | Run inference on SWE-agent + your SFT'ed model on SWE-bench (Lite/Verified/Multimodal).
105 | 
106 | 1. (From SWE-smith) Update `scripts/train.serve_sglang.sh` to point at SFT'ed model, then run it.
107 | 
108 | 2. (From SWE-agent) Run inference:
109 | ```bash
110 | ./agent/_infer_model.sh
111 | ```
112 | Make sure the Modal URL is correct and change the evaluation dataset as desired.
113 | 
114 | 3. When inference finishes, run evaluation on the model's predictions. (Check out [sb-cli](https://github.com/SWE-bench/sb-cli/tree/main) for more information on how to conveniently run evaluation for SWE-bench-* datasets.)
115 | ```bash
116 | sb-cli submit swe-bench_verified test \
117 |     --predictions_path trajectories/<username>/<run ID>/preds.json \
118 |     --run_id <run ID>
119 | ```


--------------------------------------------------------------------------------
/docs/overrides/main.html:
--------------------------------------------------------------------------------
 1 | {% extends "base.html" %}
 2 | 
 3 | {% block content %}
 4 | {{ super() }}
 5 | 
 6 | <!-- Floating Bubbles -->
 7 | <div class="floating-bubbles">
 8 |   <div class="floating-bubbles-title">Our projects</div>
 9 |   <a href="https://swebench.com" class="bubble" title="SWE-bench">
10 |     <img src="{{ 'assets/swebench_logo.png' | url }}" alt="SWE-bench">
11 |     <span class="bubble-tooltip">SWE-bench</span>
12 |   </a>
13 |   <a href="https://swe-agent.com/" class="bubble" title="SWE-agent">
14 |     <img src="{{ 'assets/sweagent_logo.svg' | url }}" alt="SWE-agent">
15 |     <span class="bubble-tooltip">SWE-agent</span>
16 |   </a>
17 |   <a href="https://swe-rex.com/" class="bubble" title="SWE-rex">
18 |     <img src="{{ 'assets/swerex_logo.svg' | url }}" alt="SWE-rex">
19 |     <span class="bubble-tooltip">SWE-ReX</span>
20 |   </a>
21 |   <!-- <a href="https://swesmith.com" class="bubble" title="SWE-smith">
22 |     <img src="{{ 'assets/swesmith_logo.png' | url }}" alt="SWE-smith">
23 |     <span class="bubble-tooltip">SWE-smith</span>
24 |   </a> -->
25 |   <a href="https://www.swebench.com/sb-cli/" class="bubble" title="sb-cli">
26 |     <img src="{{ 'assets/sbcli_logo.svg' | url }}" alt="sb-cli">
27 |     <span class="bubble-tooltip">sb-cli</span>
28 |   </a>
29 | </div>
30 | <!-- Google tag (gtag.js) -->
31 | <script async src="https://www.googletagmanager.com/gtag/js?id=G-46KMJE3755"></script>
32 | <script>
33 |   window.dataLayer = window.dataLayer || [];
34 |   function gtag(){dataLayer.push(arguments);}
35 |   gtag('js', new Date());
36 | 
37 |   gtag('config', 'G-46KMJE3755');
38 | </script>
39 | {% endblock %} 


--------------------------------------------------------------------------------
/mkdocs.yml:
--------------------------------------------------------------------------------
  1 | site_name: SWE-smith Documentation
  2 | site_url: https://swe-smith.com/
  3 | theme:
  4 |   name: material
  5 |   custom_dir: docs/overrides
  6 |   icon:
  7 |     repo: fontawesome/brands/github
  8 |     annotation: material/chevron-right-circle
  9 |   logo: assets/swesmith_logo.png
 10 |   favicon: assets/swesmith_logo.png
 11 |   palette:
 12 |     - media: "(prefers-color-scheme)"
 13 |       toggle:
 14 |         icon: material/brightness-auto
 15 |         name: Switch to light mode
 16 |     - scheme: default
 17 |       # primary: black # override in custom.css
 18 |       accent: deep orange
 19 |       media: "(prefers-color-scheme: light)"
 20 |       toggle:
 21 |         icon: material/weather-night
 22 |         name: Switch to dark mode
 23 |     - scheme: slate
 24 |       # primary: black # override in custom.css
 25 |       accent: deep orange
 26 |       media: "(prefers-color-scheme: dark)"
 27 |       toggle:
 28 |         icon: material/weather-sunny
 29 |         name: Switch to light mode
 30 |   features:
 31 |     - navigation.tabs
 32 |     - navigation.tabs.sticky
 33 |     - navigation.indexes
 34 |     - content.action.edit
 35 |     - navigation.footer
 36 |     - content.code.copy
 37 |     - content.footnote.tooltips
 38 |     - header.autohide
 39 |     - announce.dismiss
 40 |     - content.code.annotate
 41 | markdown_extensions:
 42 |   - sane_lists
 43 |   - admonition
 44 |   - pymdownx.details
 45 |   - pymdownx.superfences
 46 |   - pymdownx.magiclink
 47 |   - footnotes
 48 |   - attr_list
 49 |   - md_in_html
 50 |   - pymdownx.snippets:
 51 |       check_paths: true
 52 |   - pymdownx.emoji:
 53 |       emoji_index: !!python/name:material.extensions.emoji.twemoji
 54 |       emoji_generator: !!python/name:material.extensions.emoji.to_svg
 55 | nav:
 56 |   - Home: index.html
 57 |   - Getting started:
 58 |     - getting_started/index.md
 59 |     - Installation: getting_started/installation.md
 60 |     - Assets: getting_started/assets.md
 61 |     - Quickstart: getting_started/quickstart.md
 62 |   - Tutorials:
 63 |     - guides/index.md
 64 |     - Build Environments: guides/env_construction.md
 65 |     - Create Instances: guides/create_instances.md
 66 |     - Validation & Evaluation: guides/harnesses.md
 67 |     - Generate Issue Text: guides/issue_gen.md
 68 |     - Rate Difficulty: guides/difficulty_rating.md
 69 |     - Train SWE-agents: guides/train_swe_agent.md
 70 | plugins:
 71 |    - glightbox
 72 |    - search
 73 |    - include-markdown
 74 |    - mike:
 75 |       canonical_version: latest
 76 |       version_selector: true
 77 |    - mkdocstrings:
 78 |       default_handler: python
 79 |       handlers:
 80 |         python:
 81 |           paths: ['swesmith']
 82 |           options:
 83 |             merge_init_into_class: true
 84 |             summary: false
 85 |             show_root_heading: true
 86 |             heading_level: 2
 87 |             docstring_style: google
 88 |             show_if_no_docstring: true
 89 |             show_signature: true
 90 |             show_signature_annotations: true
 91 |             signature_crossrefs: true
 92 |             separate_signature: true
 93 |             show_symbol_type_heading: true
 94 |             show_symbol_type_toc: true
 95 |             extensions:
 96 |               - griffe_pydantic:
 97 |                   schema: false
 98 | repo_url: https://github.com/SWE-bench/SWE-smith
 99 | repo_name: SWE-bench/SWE-smith
100 | edit_uri: edit/main/docs/
101 | extra_css:
102 |   - css/custom.css
103 |   - css/mkdocstrings.css
104 |   - css/bubbles.css
105 | extra:
106 |   version:
107 |     provider: mike
108 |   analytics:
109 |     provider: google
110 |     property: G-T5P2NYGJYR


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ['setuptools>=42']
 3 | build-backend = 'setuptools.build_meta'
 4 | 
 5 | [project]
 6 | name = "swesmith"
 7 | dynamic = ["version"]
 8 | authors = [
 9 |     {name = "John Yang", email = "byjohnyang@gmail.com"}
10 | ]
11 | description = "The official SWE-smith package - A toolkit for generating software engineering training data at scale."
12 | readme = "README.md"
13 | requires-python = ">=3.10"
14 | keywords = ["nlp", "benchmark", "code"]
15 | license = {file = "LICENSE"}
16 | classifiers = [
17 |     "Programming Language :: Python :: 3.10",
18 |     "Programming Language :: Python :: 3.11",
19 |     "Programming Language :: Python :: 3 :: Only",
20 |     "License :: OSI Approved :: MIT License",
21 |     "Operating System :: OS Independent",
22 | ]
23 | dependencies = [
24 |     "astor",
25 |     "datasets",
26 |     "docker",
27 |     "ghapi",
28 |     "jupyter",
29 |     "libcst",
30 |     "litellm",
31 |     "matplotlib",
32 |     "modal",
33 |     "openai",
34 |     "pre-commit",
35 |     "python-dotenv",
36 |     "rich",
37 |     "sglang",
38 |     "sparklines",
39 |     "swebench",
40 |     "tiktoken",
41 |     "tqdm",
42 |     "unidiff",
43 |     "textual",
44 | ]
45 | 
46 | [project.optional-dependencies]
47 | docs = [
48 |     "mkdocs",
49 |     "mkdocs-material",
50 |     "mkdocs-glightbox",
51 |     "mkdocs-include-markdown-plugin",
52 |     "mkdocstrings[python]>=0.18",
53 |     "mike",
54 | ]
55 | test = [
56 |     "pytest",
57 |     "pytest-cov",
58 | ]
59 | 
60 | [tool.pytest.ini_options]
61 | testpaths = ["tests"]
62 | python_files = "test_*.py"
63 | python_classes = "Test*"
64 | python_functions = "test_*"
65 | 
66 | [tool.setuptools]
67 | include-package-data = true
68 | 
69 | [tool.setuptools.dynamic]
70 | version = {attr = "swesmith.__version__"}
71 | 
72 | [tool.setuptools.packages.find]
73 | where = ["."]
74 | namespaces = false
75 | 
76 | [project.urls]
77 | "Documentation" = "https://github.com/SWE-bench/SWE-smith"
78 | "Bug Reports" = "https://github.com/SWE-bench/SWE-smith/issues"
79 | "Source Code" = "https://github.com/SWE-bench/SWE-smith"
80 | "Website" = "https://swesmith.com"
81 | 
82 | [tool.ruff]
83 | exclude = ["notebooks"]
84 | 


--------------------------------------------------------------------------------
/scripts/calculate_cost.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Purpose: Calculate the cost of generating bugs across all repositories
 3 | 
 4 | Usage: python scripts/calculate_cost.py <bug_type (e.g. "lm_rewrite")>
 5 | """
 6 | 
 7 | import argparse
 8 | import os
 9 | 
10 | from swesmith.bug_gen.get_cost import main as get_cost
11 | from swesmith.constants import LOG_DIR_BUG_GEN
12 | 
13 | 
14 | def main(bug_type: str) -> None:
15 |     folders = [
16 |         x
17 |         for x in os.listdir(LOG_DIR_BUG_GEN)
18 |         if os.path.isdir(os.path.join(LOG_DIR_BUG_GEN, x))
19 |     ]
20 |     total_cost, total_bugs = 0, 0
21 |     print("Repo | Cost | Bugs | Cost/Instance")
22 |     for folder in folders:
23 |         cost, bugs, per_instance = get_cost(
24 |             os.path.join(LOG_DIR_BUG_GEN, folder), bug_type
25 |         )
26 |         if cost == 0:
27 |             continue
28 |         print(f"- {folder}: {cost} | {bugs} | {per_instance}")
29 |         total_cost += cost
30 |         total_bugs += bugs
31 |     print(
32 |         f"Total: {round(total_cost, 2)} | {total_bugs} | {round(total_cost / total_bugs, 6)}"
33 |     )
34 | 
35 | 
36 | if __name__ == "__main__":
37 |     parser = argparse.ArgumentParser(
38 |         description="Determine the total cost of generating bugs across all repositories"
39 |     )
40 |     parser.add_argument(
41 |         dest="bug_type",
42 |         type=str,
43 |         help="Type of patches to collect. (default: all)",
44 |         default="all",
45 |     )
46 |     args = parser.parse_args()
47 |     main(**vars(args))
48 | 


--------------------------------------------------------------------------------
/scripts/cheatsheet.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | # This file contains examples for how to call all scripts and functionalities provided by the SWE-smith toolkit.
  4 | 
  5 | ## The scripts are written such that you do *not* need to have the repository installed locally (run `pip install swesmith`).
  6 | ## *Although*, some scripts require config files (you can download them from the repo).
  7 | 
  8 | ## NOTE: If you want to create repositories + task instances under your own account,
  9 | ## change swesmith/constants.py:29 (the `ORG_NAME` variable) to your own account.
 10 | 
 11 | 
 12 | ###### MARK: Create Environment for Repository ######
 13 | 
 14 | # Attempts to create a conda environment for the repo. If successfully, a
 15 | # dump of the conda environment is saved to `logs/build_images/records``
 16 | python -m swesmith.build_repo.try_install Instagram/MonkeyType configs/install_repo.sh --commit 70c3acf62950be5dfb28743c7a719bfdecebcd84
 17 | 
 18 | # Download all existing SWE-smith environments
 19 | # (All images downloaded by default, but you can specify a specific repo
 20 | # from https://github.com/orgs/swesmith/repositories using `--repo`)
 21 | python -m swesmith.build_repo.download_images
 22 | 
 23 | # Create execution environment (Docker images) for all repositories
 24 | python -m swesmith.build_repo.create_images --repos Instagram/MonkeyType
 25 | 
 26 | 
 27 | ###### MARK: Generate Candidate Task Instances ######
 28 | 
 29 | # This would point at "https://github.com/swesmith/Instagram__MonkeyType.70c3acf6"
 30 | repo="Instagram__MonkeyType.70c3acf6"
 31 | 
 32 | # LM Rewrite
 33 | python -m swesmith.bug_gen.llm.rewrite $repo \
 34 |     --model anthropic/claude-3-7-sonnet-20250219 \
 35 |     --type func \
 36 |     --config_file configs/bug_gen/lm_rewrite.yml \
 37 |     --n_workers 1
 38 | 
 39 | # LM Modify
 40 | python -m swesmith.bug_gen.llm.modify $repo \
 41 |     --n_bugs 1 \
 42 |     --model openai/gpt-4o \
 43 |     --entity_type func \
 44 |     --prompt_config configs/bug_gen/lm_modify.yml
 45 | 
 46 | # Procedural Modifications
 47 | python -m swesmith.bug_gen.procedural.generate $repo \
 48 |     --type func \
 49 |     --max_bugs 10
 50 | 
 51 | # Combine (Same File) - Must have validated task instances to run this script
 52 | python -m swesmith.bug_gen.combine.same_file logs/bug_gen/$repo \
 53 |     --num_patches 3 \
 54 |     --limit_per_file 15 \
 55 |     --max_combos 100
 56 | 
 57 | # Combine (Same Module) - Must have validated task instances to run this script
 58 | python -m swesmith.bug_gen.combine.same_module logs/bug_gen/$repo \
 59 |     --num_patches 2 \
 60 |     --limit_per_module 20 \
 61 |     --max_combos 200 \
 62 |     --depth 2
 63 | 
 64 | # PR Mirroring
 65 | ## NOTE: `path/to/task_candidates.jsonl` is the output of running this
 66 | ## the SWE-bench task candidate collection script:
 67 | ## https://github.com/SWE-bench/SWE-bench/blob/main/swebench/collect/run_get_tasks_pipeline.sh
 68 | python -m swesmith.bug_gen.mirror.generate path/to/task_candidates.jsonl --model openai/o3-mini
 69 | 
 70 | 
 71 | ###### MARK: Validate + Evaluate Task Instances ######
 72 | ## NOTE: Before running the below, make sure
 73 | ## - You have created task instances
 74 | ## - The repository you're creating task instances for has an environment (Docker image)
 75 | ## - (If testing is not pytest) You've specified a log parser in swesmith/harness/log_parsers.py
 76 | 
 77 | # Collect all patches
 78 | python -m swesmith.bug_gen.collect_patches logs/bug_gen/$repo
 79 | 
 80 | # Run validation
 81 | python -m swesmith.harness.valid logs/bug_gen/$repo_all_patches.json \
 82 |     --run_id $repo
 83 | 
 84 | # Collect task instances with 1+ F2P
 85 | python -m swesmith.harness.gather logs/run_validation/$repo
 86 | 
 87 | # Run evaluation
 88 | python -m swesmith.harness.eval \
 89 |     --dataset_path logs/task_insts/$repo.json \
 90 |     --predictions_path gold \
 91 |     --run_id $repo
 92 | 
 93 | 
 94 | ####### MARK: Generate Issues ######
 95 | python -m swesmith.issue_gen.generate logs/task_insts/$repo.json \
 96 |     --config_file configs/issue_gen/ig_v2.yaml \
 97 |     --model anthropic/claude-3-7-sonnet-20250219 \
 98 |     --n_workers 4 \
 99 |     --experiment_id ig_v2 \
100 |     --use_existing
101 | 
102 | # Alternatives:
103 | # python -m swesmith.issue_gen.get_from_pr logs/task_insts/$repo.json
104 | # python -m swesmith.issue_gen.get_from_tests logs/task_insts/$repo.json
105 | # python -m swesmith.issue_gen.get_static logs/task_insts/$repo.json


--------------------------------------------------------------------------------
/scripts/train.get_difficulties.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | python swesmith/train/difficulty_rater/get_difficulties.py \
4 |     --base_url https://ylok22798a8ebw.r15.modal.host \
5 |     --dataset_path logs/experiments/exp32__ig_v2_n1.json
6 |     # --dataset_path ../swe_gym_instances_solved.json
7 | 


--------------------------------------------------------------------------------
/scripts/train.run_ft_torchtune.sh:
--------------------------------------------------------------------------------
1 | N_GPUS=8 modal run --detach swesmith/train/run/ft_torchtune.py --config configs/train/full_ft_qwen_32b.yml
2 | 
3 | # N_GPUS=2 modal run --detach swesmith/train/run/ft_torchtune.py --config configs/train/full_ft_qwen_7b.yml


--------------------------------------------------------------------------------
/scripts/train.run_ft_unsloth.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | N_GPUS=1 modal run --detach swesmith/train/run/ft_unsloth.py


--------------------------------------------------------------------------------
/scripts/train.serve_sglang.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # N_HOURS=4 N_GPUS=4 modal run --detach swesmith/train/serve_sglang.py \
 4 | #     --model-path /llm-weights/outputs/qwen2p5-coder-32b-lora-lr1e-4-warmup5___difficulty/qwen2p5-coder-32b-lora-lr1e-4-warmup5___difficulty/merged \
 5 | #     --served-model-name gpt-4o \
 6 | #     --tokenizer-path /llm-weights/Qwen/Qwen2.5-Coder-32B-Instruct
 7 | 
 8 | N_HOURS=3 N_GPUS=8 modal run --detach swesmith/train/serve_sglang.py \
 9 |     --model-path /llm-weights/final/qwen2p5-coder-32b-full-lr5e-5-warmup5___ft_xml_all_250413_run3/epoch_2 \
10 |     --served-model-name gpt-4o \
11 |     --tokenizer-path /llm-weights/Qwen/Qwen2.5-Coder-32B-Instruct
12 | 


--------------------------------------------------------------------------------
/swesmith/__init__.py:
--------------------------------------------------------------------------------
1 | __version__ = "0.0.2"
2 | 


--------------------------------------------------------------------------------
/swesmith/bug_gen/collect_patches.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Purpose: Collect all the patches into a single json file that can be fed into swesmith.harness.valid
  3 | 
  4 | Usage: python -m swesmith.bug_gen.collect_patches logs/bug_gen/<repo>
  5 | 
  6 | NOTE: Must be with respect to a logs/bug_gen/<...>/ directory
  7 | """
  8 | 
  9 | import argparse
 10 | import os
 11 | import json
 12 | from pathlib import Path
 13 | 
 14 | from swebench.harness.constants import KEY_INSTANCE_ID
 15 | from swesmith.constants import LOG_DIR_BUG_GEN, KEY_IMAGE_NAME, KEY_PATCH, PREFIX_BUG
 16 | from swesmith.utils import get_image_name
 17 | 
 18 | 
 19 | def main(bug_gen_path: str | Path, bug_type: str = "all", num_bugs: int = -1):
 20 |     """
 21 |     Collect all the patches into a single json file that can be fed into swebench.harness.valid
 22 |     :param repo_path: Path to the bug_gen logs.
 23 |     :param bug_type: Type of patches to collect. (default: all)
 24 |     :param num_bugs: Number of bugs to collect. (default: all)
 25 |     """
 26 |     bug_gen_path = Path(bug_gen_path)
 27 |     if not bug_gen_path.resolve().is_relative_to((Path() / LOG_DIR_BUG_GEN).resolve()):
 28 |         print(
 29 |             f"Warning: {bug_gen_path} may not point to a bug_gen log directory (should be in {(Path() / LOG_DIR_BUG_GEN).resolve()})."
 30 |         )
 31 | 
 32 |     repo = bug_gen_path.name
 33 |     commit = repo.rsplit(".", 1)[-1]
 34 |     repo = repo.rsplit(".", 1)[0]
 35 |     image_name = get_image_name(repo, commit)
 36 | 
 37 |     patches = []
 38 |     prefix = f"{PREFIX_BUG}__"
 39 |     if bug_type != "all":
 40 |         prefix += bug_type + "_"
 41 |     exit_loop = False
 42 |     for root, _, files in os.walk(bug_gen_path):
 43 |         for file in files:
 44 |             if file.startswith(prefix) and file.endswith(".diff"):
 45 |                 bug_type_and_uuid = file.split(f"{PREFIX_BUG}__")[-1].split(".diff")[0]
 46 |                 instance_id = f"{repo}.{commit}.{bug_type_and_uuid}"
 47 |                 patch = {}
 48 | 
 49 |                 # Add metadata if it exists
 50 |                 metadata_file = f"metadata__{bug_type_and_uuid}.json"
 51 |                 if os.path.exists(os.path.join(root, metadata_file)):
 52 |                     patch.update(json.load(open(os.path.join(root, metadata_file))))
 53 | 
 54 |                 # Add necessary bug patch information
 55 |                 patch.update(
 56 |                     {
 57 |                         KEY_INSTANCE_ID: instance_id,
 58 |                         KEY_PATCH: open(os.path.join(root, file), "r").read(),
 59 |                         KEY_IMAGE_NAME: image_name,
 60 |                     }
 61 |                 )
 62 |                 patches.append(patch)
 63 |                 if num_bugs != -1 and len(patches) >= num_bugs:
 64 |                     exit_loop = True
 65 |                     break
 66 |         if exit_loop:
 67 |             break
 68 | 
 69 |     bug_patches_file = (
 70 |         bug_gen_path.parent / f"{bug_gen_path.name}_{bug_type}_patches.json"
 71 |     )
 72 |     if num_bugs != -1:
 73 |         bug_patches_file = bug_patches_file.with_name(
 74 |             bug_patches_file.stem + f"_n{num_bugs}" + bug_patches_file.suffix
 75 |         )
 76 |     if len(patches) > 0:
 77 |         with open(bug_patches_file, "w") as f:
 78 |             f.write(json.dumps(patches, indent=4))
 79 |         print(f"Saved {len(patches)} patches to {bug_patches_file}")
 80 |     else:
 81 |         print(f"No patches found for `{bug_type}` in {bug_gen_path}")
 82 | 
 83 | 
 84 | if __name__ == "__main__":
 85 |     parser = argparse.ArgumentParser(
 86 |         description="Collect all the patches into a single json file that can be fed into swesmith.harness.valid"
 87 |     )
 88 |     parser.add_argument("bug_gen_path", help="Path to the bug_gen logs.")
 89 |     parser.add_argument(
 90 |         "--type",
 91 |         dest="bug_type",
 92 |         type=str,
 93 |         help="Type of patches to collect. (default: all)",
 94 |         default="all",
 95 |     )
 96 |     parser.add_argument(
 97 |         "-n",
 98 |         "--num_bugs",
 99 |         type=int,
100 |         help="Number of bugs to collect. (default: all)",
101 |         default=-1,
102 |     )
103 |     args = parser.parse_args()
104 |     main(**vars(args))
105 | 


--------------------------------------------------------------------------------
/swesmith/bug_gen/get_cost.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Purpose: Determine the total cost of LLM generated bugs (bug__func*.json) for a given repository.
 3 | 
 4 | Usage: python -m swesmith.bug_gen.get_cost logs/bug_gen/<repo>
 5 | """
 6 | 
 7 | import argparse
 8 | import json
 9 | import os
10 | 
11 | 
12 | def main(repo_path: str, bug_type: str) -> float:
13 |     total_cost = 0.0
14 |     total_bugs = 0
15 |     prefix = "metadata__"
16 |     if bug_type != "all":
17 |         prefix += f"{bug_type}"
18 |     for root, _, files in os.walk(repo_path):
19 |         for file in files:
20 |             if file.startswith(prefix) and file.endswith(".json"):
21 |                 with open(os.path.join(root, file), "r") as f:
22 |                     data = json.load(f)
23 |                     if "cost" in data:
24 |                         total_cost += data["cost"]
25 |                         total_bugs += 1
26 |     per_instance = total_cost / total_bugs if total_bugs > 0 else 0
27 |     return total_cost, total_bugs, per_instance
28 | 
29 | 
30 | if __name__ == "__main__":
31 |     parser = argparse.ArgumentParser(
32 |         description="Determine the total cost of generating bugs for a given repository."
33 |     )
34 |     parser.add_argument("repo_path", help="Path to the bug_gen logs.")
35 |     parser.add_argument(
36 |         "--type",
37 |         dest="bug_type",
38 |         type=str,
39 |         help="Type of patches to collect. (default: all)",
40 |         default="all",
41 |     )
42 |     args = parser.parse_args()
43 |     print(main(**vars(args)))
44 | 


--------------------------------------------------------------------------------
/swesmith/bug_gen/llm/utils.py:
--------------------------------------------------------------------------------
 1 | import ast
 2 | import astor
 3 | import re
 4 | 
 5 | 
 6 | PROMPT_KEYS = ["system", "demonstration", "instance"]
 7 | 
 8 | 
 9 | def extract_code_block(text: str) -> str:
10 |     pattern = r"```(?:\w+)?\n(.*?)```"
11 |     match = re.search(pattern, text, re.DOTALL)
12 |     return match.group(1).strip() if match else ""
13 | 
14 | 
15 | def get_function_signature(node):
16 |     """Generate the function signature as a string."""
17 |     args = [ast.unparse(arg) for arg in node.args.args]  # For Python 3.9+
18 |     args_str = ", ".join(args)
19 |     return f"def {node.name}({args_str})"
20 | 
21 | 
22 | def strip_function_body(source_code):
23 |     tree = ast.parse(source_code)
24 | 
25 |     class FunctionBodyStripper(ast.NodeTransformer):
26 |         def visit_FunctionDef(self, node):
27 |             # Keep the original arguments and decorator list
28 |             new_node = ast.FunctionDef(
29 |                 name=node.name,
30 |                 args=node.args,
31 |                 body=[],  # Empty body initially
32 |                 decorator_list=node.decorator_list,
33 |                 returns=node.returns,
34 |                 type_params=getattr(node, "type_params", None),  # For Python 3.12+
35 |             )
36 | 
37 |             # Add docstring if it exists
38 |             if (
39 |                 node.body
40 |                 and isinstance(node.body[0], ast.Expr)
41 |                 and isinstance(node.body[0].value, ast.Str)
42 |             ):
43 |                 new_node.body.append(node.body[0])
44 | 
45 |             # Add a comment indicating to implement this function
46 |             new_node.body.append(ast.Expr(ast.Str("TODO: Implement this function")))
47 | 
48 |             # Add a 'pass' statement after the docstring
49 |             new_node.body.append(ast.Pass())
50 | 
51 |             return new_node
52 | 
53 |     stripped_tree = FunctionBodyStripper().visit(tree)
54 |     ast.fix_missing_locations(stripped_tree)
55 | 
56 |     return astor.to_source(stripped_tree)
57 | 


--------------------------------------------------------------------------------
/swesmith/bug_gen/mirror/prompts.py:
--------------------------------------------------------------------------------
 1 | RECOVERY_PROMPT = """You are given the source code of a file and a corresponding diff patch that reflects changes made to this file.
 2 | Your task is to rewrite the entire source code while reversing the changes indicated by the diff patch.
 3 | That is, if a line was added in the diff, remove it; if a line was removed, add it back; and if a line was modified, restore it to its previous state.
 4 | 
 5 | DO NOT MAKE ANY OTHER CHANGES TO THE SOURCE CODE. If a line was not explicitly added or removed in the diff, it should remain unchanged in the output.
 6 | 
 7 | INPUT:
 8 | <source_code>
 9 | Source code will be provided here.
10 | </source_code>
11 | 
12 | <diff_patch>
13 | Diff patch will be provided here.
14 | </diff_patch>
15 | 
16 | OUTPUT:
17 | The fully rewritten source code, after undoing all changes specified in the diff.
18 | The output should be valid Python code.
19 | """
20 | 
21 | DEMO_PROMPT = """Demonstration:
22 | 
23 | INPUT:
24 | <source_code>
25 | def greet(name):
26 |     print(f"Hi, {name}! How's it going?")
27 |     print("Even though this line is not in the diff, it should remain unchanged.")
28 | 
29 | def farewell(name):
30 |     print(f"Goodbye, {name}!")
31 | </source_code>
32 | 
33 | <diff_patch>
34 | diff --git a/greet.py b/greet.py
35 | index 1234567..7654321 100644
36 | --- a/greet.py
37 | +++ b/greet.py
38 | @@ -1,4 +1,4 @@
39 |  def greet(name):
40 | -    print(f"Hello, {name}! How are you?")
41 | +    print(f"Hi, {name}! How's it going?")
42 | 
43 |  def farewell(name):
44 |      print(f"Goodbye, {name}!")
45 | </diff_patch>
46 | </input>
47 | 
48 | OUTPUT:
49 | def greet(name):
50 |     print(f"Hello, {name}! How are you?")
51 |     print("Even though this line is not in the diff, it should remain unchanged.")
52 | 
53 | def farewell(name):
54 |     print(f"Goodbye, {name}!")
55 | """
56 | 
57 | TASK_PROMPT = """Task:
58 | 
59 | INPUT:
60 | <source_code>
61 | {}
62 | </source_code>
63 | 
64 | <diff_patch>
65 | {}
66 | </diff_patch>
67 | </input>
68 | 
69 | NOTES:
70 | - As a reminder, DO NOT MAKE ANY OTHER CHANGES TO THE SOURCE CODE. If a line was not explicitly added or removed in the diff, it should remain unchanged in the output.
71 | - Only make changes based on lines that were:
72 |     * Added (have a + in front of them)
73 |     * Removed (have a - in front of them)
74 | - DO NOT PROVIDE ANY TEXT ASIDE FROM THE REWRITTEN FILE. ANSWER WITH ONLY THE REWRITTEN CODE.
75 | 
76 | OUTPUT:"""
77 | 


--------------------------------------------------------------------------------
/swesmith/bug_gen/procedural/__init__.py:
--------------------------------------------------------------------------------
 1 | import libcst
 2 | import random
 3 | 
 4 | from swesmith.constants import DEFAULT_PM_LIKELIHOOD
 5 | 
 6 | 
 7 | class BaseProceduralModifier(libcst.CSTTransformer):
 8 |     def __init__(self, likelihood: float = DEFAULT_PM_LIKELIHOOD, seed: float = 24):
 9 |         super().__init__()
10 |         assert 0 <= likelihood <= 1, "Likelihood must be between 0 and 1."
11 |         self.rand = random.Random(seed)
12 |         self.likelihood = likelihood
13 | 
14 |     def flip(self) -> bool:
15 |         return self.rand.random() < self.likelihood
16 | 
17 | 
18 | FLIPPED_OPERATORS = {
19 |     libcst.Add: libcst.Subtract,
20 |     libcst.And: libcst.Or,
21 |     libcst.BitAnd: libcst.BitOr,
22 |     libcst.BitAnd: libcst.BitXor,
23 |     libcst.BitOr: libcst.BitAnd,
24 |     libcst.BitXor: libcst.BitAnd,
25 |     libcst.Divide: libcst.Multiply,
26 |     libcst.Equal: libcst.NotEqual,
27 |     libcst.FloorDivide: libcst.Modulo,
28 |     libcst.GreaterThan: libcst.LessThan,
29 |     libcst.GreaterThanEqual: libcst.LessThanEqual,
30 |     libcst.In: libcst.NotIn,
31 |     libcst.Is: libcst.IsNot,
32 |     libcst.IsNot: libcst.Is,
33 |     libcst.LeftShift: libcst.RightShift,
34 |     libcst.LessThan: libcst.GreaterThan,
35 |     libcst.LessThanEqual: libcst.GreaterThanEqual,
36 |     libcst.Modulo: libcst.FloorDivide,
37 |     libcst.Multiply: libcst.Divide,
38 |     libcst.NotEqual: libcst.Equal,
39 |     libcst.NotIn: libcst.In,
40 |     libcst.Or: libcst.And,
41 |     libcst.Power: libcst.Multiply,
42 |     libcst.RightShift: libcst.LeftShift,
43 |     libcst.Subtract: libcst.Add,
44 | }
45 | 


--------------------------------------------------------------------------------
/swesmith/bug_gen/procedural/classes.py:
--------------------------------------------------------------------------------
  1 | import libcst
  2 | 
  3 | from swesmith.bug_gen.procedural import BaseProceduralModifier
  4 | from swesmith.bug_gen.criteria import *
  5 | 
  6 | 
  7 | class ClassRemoveBasesModifier(BaseProceduralModifier):
  8 |     explanation: str = "The base class has been removed from the class definition."
  9 |     name: str = "func_pm_class_rm_base"
 10 |     conditions: list = [filter_classes_has_parents, filter_min_simple_complexity]
 11 | 
 12 |     def leave_ClassDef(self, original_node, updated_node):
 13 |         bases = list(updated_node.bases)
 14 |         if len(bases) > 0 and self.flip():
 15 |             if len(bases) == 1:
 16 |                 bases = []
 17 |             else:
 18 |                 to_remove = self.rand.randint(0, len(bases) - 1)
 19 |                 bases.pop(to_remove)
 20 | 
 21 |         return updated_node.with_changes(bases=tuple(bases))
 22 | 
 23 | 
 24 | class ClassShuffleMethodsModifier(BaseProceduralModifier):
 25 |     explanation: str = "The methods in a class have been shuffled."
 26 |     name: str = "func_pm_class_shuffle_funcs"
 27 |     conditions: list = [filter_classes, filter_min_simple_complexity]
 28 | 
 29 |     def leave_ClassDef(self, original_node, updated_node):
 30 |         methods = [
 31 |             n for n in updated_node.body.body if isinstance(n, libcst.FunctionDef)
 32 |         ]
 33 |         non_methods = [
 34 |             n for n in updated_node.body.body if not isinstance(n, libcst.FunctionDef)
 35 |         ]
 36 |         self.rand.shuffle(methods)
 37 |         new_body = non_methods + methods
 38 |         return updated_node.with_changes(
 39 |             body=updated_node.body.with_changes(body=tuple(new_body))
 40 |         )
 41 | 
 42 | 
 43 | class ClassRemoveFuncsModifier(BaseProceduralModifier):
 44 |     explanation: str = (
 45 |         "Method(s) and their reference(s) have been removed from the class."
 46 |     )
 47 |     name: str = "func_pm_class_rm_funcs"
 48 |     conditions: list = [filter_classes, filter_min_simple_complexity]
 49 | 
 50 |     def leave_ClassDef(
 51 |         self, original_node: libcst.ClassDef, updated_node: libcst.ClassDef
 52 |     ) -> libcst.ClassDef:
 53 |         # Access the statements inside the indented block
 54 |         body_statements = list(updated_node.body.body)
 55 | 
 56 |         # Track which function names we're removing
 57 |         removed_functions = set()
 58 | 
 59 |         # First pass: identify functions to remove
 60 |         new_body_statements = []
 61 |         for stmt in body_statements:
 62 |             if isinstance(stmt, libcst.FunctionDef) and self.flip():
 63 |                 # Track this function name for removal
 64 |                 removed_functions.add(stmt.name.value)
 65 |                 # Skip this function (remove it)
 66 |                 continue
 67 |             new_body_statements.append(stmt)
 68 | 
 69 |         # Only proceed if we actually removed something
 70 |         if not removed_functions:
 71 |             return updated_node
 72 | 
 73 |         # Create a reference remover to clean up references to removed functions
 74 |         reference_remover = FunctionReferenceRemover(removed_functions)
 75 | 
 76 |         # Second pass: process the remaining statements to remove references
 77 |         clean_statements = []
 78 |         for stmt in new_body_statements:
 79 |             # The correct way to apply a transformer to a node
 80 |             clean_stmt = stmt.visit(reference_remover)
 81 |             clean_statements.append(clean_stmt)
 82 | 
 83 |         # Create a new indented block with the cleaned statements
 84 |         new_body = updated_node.body.with_changes(body=tuple(clean_statements))
 85 | 
 86 |         # Return the updated class with the new body
 87 |         return updated_node.with_changes(body=new_body)
 88 | 
 89 | 
 90 | class FunctionReferenceRemover(libcst.CSTTransformer):
 91 |     """Helper transformer to remove references to deleted functions."""
 92 | 
 93 |     def __init__(self, removed_functions):
 94 |         super().__init__()
 95 |         self.removed_functions = removed_functions
 96 |         self.in_self_attr = False
 97 | 
 98 |     def visit_Attribute(self, node: libcst.Attribute) -> bool:
 99 |         # Check if this is a self.method_name pattern
100 |         if (
101 |             isinstance(node.value, libcst.Name)
102 |             and node.value.value == "self"
103 |             and node.attr.value in self.removed_functions
104 |         ):
105 |             self.in_self_attr = True
106 |         return True
107 | 
108 |     def leave_Attribute(
109 |         self, original_node: libcst.Attribute, updated_node: libcst.Attribute
110 |     ) -> libcst.BaseExpression:
111 |         if (
112 |             isinstance(updated_node.value, libcst.Name)
113 |             and updated_node.value.value == "self"
114 |             and updated_node.attr.value in self.removed_functions
115 |         ):
116 |             # Reset state
117 |             self.in_self_attr = False
118 |         return updated_node
119 | 
120 |     def leave_Call(
121 |         self, original_node: libcst.Call, updated_node: libcst.Call
122 |     ) -> libcst.BaseExpression:
123 |         # Check if we're calling a removed function through self
124 |         if self.in_self_attr:
125 |             # Reset state
126 |             self.in_self_attr = False
127 |             # Replace with a placeholder that won't cause errors
128 |             return libcst.Name(value="None")
129 |         return updated_node
130 | 


--------------------------------------------------------------------------------
/swesmith/bug_gen/procedural/control_flow.py:
--------------------------------------------------------------------------------
 1 | import libcst
 2 | 
 3 | from functools import partial
 4 | from swesmith.bug_gen.procedural import BaseProceduralModifier
 5 | from swesmith.bug_gen.criteria import *
 6 | 
 7 | 
 8 | class ControlIfElseInvertModifier(BaseProceduralModifier):
 9 |     explanation: str = (
10 |         "The if-else conditions may be out of order, or the bodies are inverted."
11 |     )
12 |     name: str = "func_pm_ctrl_invert_if"
13 |     conditions: list = [
14 |         filter_functions,
15 |         filter_if_else,
16 |         partial(filter_min_simple_complexity, threshold=5),
17 |     ]
18 | 
19 |     def leave_If(self, original_node: libcst.If, updated_node: libcst.If) -> libcst.If:
20 |         if not self.flip():
21 |             return updated_node
22 | 
23 |         # Only proceed if there's an else branch to swap with
24 |         if not updated_node.orelse:
25 |             return updated_node
26 | 
27 |         # We need to handle standard else blocks
28 |         if isinstance(updated_node.orelse, libcst.Else):
29 |             # Store the original bodies
30 |             if_body = updated_node.body
31 |             else_body = updated_node.orelse.body
32 | 
33 |             # Create a new else clause with the original if body
34 |             new_else = libcst.Else(
35 |                 body=if_body,
36 |                 whitespace_before_colon=updated_node.orelse.whitespace_before_colon,
37 |             )
38 | 
39 |             # Return a new If statement with swapped bodies
40 |             return updated_node.with_changes(body=else_body, orelse=new_else)
41 | 
42 |         # Skip elif cases for now
43 |         return updated_node
44 | 
45 | 
46 | class ControlShuffleLinesModifier(BaseProceduralModifier):
47 |     explanation: str = "The lines inside a function may be out of order."
48 |     name: str = "func_pm_ctrl_shuffle"
49 |     conditions: list = [
50 |         filter_functions,
51 |         partial(filter_min_simple_complexity, threshold=3),
52 |         partial(filter_max_simple_complexity, threshold=10),
53 |     ]
54 | 
55 |     def leave_FunctionDef(
56 |         self, original_node: libcst.FunctionDef, updated_node: libcst.FunctionDef
57 |     ) -> libcst.FunctionDef:
58 |         # Skip modification if random check fails
59 |         if not self.flip():
60 |             return updated_node
61 | 
62 |         # Make sure we're working with an indented block
63 |         if not isinstance(updated_node.body, libcst.IndentedBlock):
64 |             return updated_node
65 | 
66 |         # Get the body statements
67 |         body = list(updated_node.body.body)
68 | 
69 |         # Don't shuffle if there are fewer than 2 statements
70 |         if len(body) < 2:
71 |             return updated_node
72 | 
73 |         # Create a shuffled copy of the statements
74 |         shuffled_body = body.copy()
75 |         self.rand.shuffle(shuffled_body)
76 | 
77 |         # Create a new indented block with the shuffled statements
78 |         new_body = libcst.IndentedBlock(
79 |             body=tuple(shuffled_body),
80 |             indent=updated_node.body.indent,
81 |             header=updated_node.body.header,
82 |             footer=updated_node.body.footer,
83 |         )
84 | 
85 |         # Return the updated function with the new body
86 |         return updated_node.with_changes(body=new_body)
87 | 


--------------------------------------------------------------------------------
/swesmith/bug_gen/procedural/remove.py:
--------------------------------------------------------------------------------
 1 | import libcst
 2 | 
 3 | from swesmith.bug_gen.procedural import BaseProceduralModifier
 4 | from swesmith.bug_gen.criteria import *
 5 | 
 6 | 
 7 | class RemoveLoopModifier(BaseProceduralModifier):
 8 |     explanation: str = "There is one or more missing loops that is causing the bug."
 9 |     name: str = "func_pm_remove_loop"
10 |     conditions: list = [
11 |         filter_functions,
12 |         filter_loops,
13 |         filter_min_simple_complexity,
14 |     ]  # Assuming filter functions will be applied externally
15 | 
16 |     def leave_For(self, original_node, updated_node):
17 |         return libcst.RemoveFromParent() if self.flip() else updated_node
18 | 
19 |     def leave_While(self, original_node, updated_node):
20 |         return libcst.RemoveFromParent() if self.flip() else updated_node
21 | 
22 | 
23 | class RemoveConditionalModifier(BaseProceduralModifier):
24 |     explanation: str = "There is one or more missing conditionals that causes the bug."
25 |     name: str = "func_pm_remove_cond"
26 |     conditions: list = [
27 |         filter_functions,
28 |         filter_conditionals,
29 |         filter_min_simple_complexity,
30 |     ]
31 | 
32 |     def leave_If(self, original_node, updated_node):
33 |         return libcst.RemoveFromParent() if self.flip() else updated_node
34 | 
35 | 
36 | class RemoveAssignModifier(BaseProceduralModifier):
37 |     explanation: str = "There is likely a missing assignment in the code."
38 |     name: str = "func_pm_remove_assign"
39 |     conditions: list = [
40 |         filter_functions,
41 |         filter_assignments,
42 |         filter_min_simple_complexity,
43 |     ]
44 | 
45 |     def leave_Assign(self, original_node, updated_node):
46 |         return libcst.RemoveFromParent() if self.flip() else updated_node
47 | 
48 |     def leave_AugAssign(self, original_node, updated_node):
49 |         return libcst.RemoveFromParent() if self.flip() else updated_node
50 | 
51 | 
52 | class RemoveWrapperModifier(BaseProceduralModifier):
53 |     explanation: str = "There are missing wrappers (with, try blocks) in the code."
54 |     name: str = "func_pm_remove_wrapper"
55 |     conditions: list = [filter_functions, filter_wrappers, filter_min_simple_complexity]
56 | 
57 |     def leave_With(self, original_node, updated_node):
58 |         return libcst.RemoveFromParent() if self.flip() else updated_node
59 | 
60 |     def leave_AsyncWith(self, original_node, updated_node):
61 |         return libcst.RemoveFromParent() if self.flip() else updated_node
62 | 
63 |     def leave_Try(self, original_node, updated_node):
64 |         return libcst.RemoveFromParent() if self.flip() else updated_node
65 | 


--------------------------------------------------------------------------------
/swesmith/build_repo/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SWE-bench/SWE-smith/cfb6cf29568d0841ce15620c96ec795243b229fa/swesmith/build_repo/__init__.py


--------------------------------------------------------------------------------
/swesmith/build_repo/download_images.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Purpose: Standalone script to download all SWEFT images
  3 | 
  4 | Usage: python -m swesmith.build_repo.download_images
  5 | """
  6 | 
  7 | import argparse
  8 | import docker
  9 | import os
 10 | import json
 11 | import requests
 12 | 
 13 | DOCKER_ORG = "jyangballin"
 14 | TAG = "latest"
 15 | 
 16 | 
 17 | def get_docker_hub_login():
 18 |     docker_config_path = os.path.expanduser("~/.docker/config.json")
 19 | 
 20 |     try:
 21 |         with open(docker_config_path, "r") as config_file:
 22 |             docker_config = json.load(config_file)
 23 | 
 24 |         auths = docker_config.get("auths", {})
 25 |         docker_hub = auths.get("https://index.docker.io/v1/")
 26 | 
 27 |         if not docker_hub:
 28 |             raise Exception(
 29 |                 "Docker Hub credentials not found. Please log in using 'docker login'."
 30 |             )
 31 | 
 32 |         # The token is encoded in Base64 (username:password), decode it
 33 |         from base64 import b64decode
 34 | 
 35 |         auth_token = docker_hub.get("auth")
 36 |         if not auth_token:
 37 |             raise Exception("No auth token found in Docker config.")
 38 | 
 39 |         decoded_auth = b64decode(auth_token).decode("utf-8")
 40 |         username, password = decoded_auth.split(":", 1)
 41 |         return username, password
 42 | 
 43 |     except FileNotFoundError:
 44 |         raise Exception(
 45 |             "Docker config file not found. Have you logged in using 'docker login'?"
 46 |         )
 47 |     except Exception as e:
 48 |         raise Exception(f"Error retrieving Docker Hub token: {e}")
 49 | 
 50 | 
 51 | def get_dockerhub_token(username, password):
 52 |     """Get DockerHub authentication token"""
 53 |     auth_url = "https://hub.docker.com/v2/users/login"
 54 |     auth_data = {"username": username, "password": password}
 55 |     response = requests.post(auth_url, json=auth_data)
 56 |     response.raise_for_status()
 57 |     return response.json()["token"]
 58 | 
 59 | 
 60 | def get_docker_repositories(username, token):
 61 |     url = f"https://hub.docker.com/v2/repositories/{username}/"
 62 |     headers = {"Authorization": f"Bearer {token}"}
 63 | 
 64 |     repositories = []
 65 |     while url:
 66 |         response = requests.get(url, headers=headers)
 67 |         if response.status_code != 200:
 68 |             raise Exception(
 69 |                 f"Failed to fetch repositories: {response.status_code}, {response.text}"
 70 |             )
 71 | 
 72 |         data = response.json()
 73 |         repositories.extend(data.get("results", []))
 74 |         url = data.get("next")  # Get the next page URL, if any
 75 | 
 76 |     return repositories
 77 | 
 78 | 
 79 | def main(repo: str, proceed: bool = True):
 80 |     username, password = get_docker_hub_login()
 81 |     token = get_dockerhub_token(username, password)
 82 |     client = docker.from_env()
 83 | 
 84 |     # Get list of swesmith repositories
 85 |     repos = get_docker_repositories(DOCKER_ORG, token)
 86 |     repos = [r for r in repos if r["name"].startswith("swesmith")]
 87 |     if repo:
 88 |         repos = [
 89 |             r
 90 |             for r in repos
 91 |             if repo.replace("__", "_1776_") in r["name"]
 92 |             or repo in r["name"]
 93 |             or repo.replace("/", "_1776_") in r["name"]
 94 |         ]
 95 |         if len(repos) == 0:
 96 |             print(f"Could not find image for {repo}, exiting...")
 97 |             return
 98 | 
 99 |     print(f"Found {len(repos)} environments:")
100 |     for idx, r in enumerate(repos):
101 |         print("-", r["name"])
102 |         if idx == 4:
103 |             print(f"(+ {len(repos) - 5} more...)")
104 |             break
105 |     if not proceed and input("Proceed with downloading images? (y/n): ").lower() != "y":
106 |         return
107 | 
108 |     # Download images
109 |     for r in repos:
110 |         print(f"Downloading {r['name']}...")
111 |         client.images.pull(f"{DOCKER_ORG}/{r['name']}:{TAG}")
112 |         # Rename images via tagging
113 |         new_name = f"{r['name'].replace('_1776_', '__')}:{TAG}"
114 |         client.images.get(f"{DOCKER_ORG}/{r['name']}:{TAG}").tag(new_name)
115 | 
116 | 
117 | if __name__ == "__main__":
118 |     parser = argparse.ArgumentParser()
119 |     parser.add_argument("--repo", type=str, help="Repository name", default=None)
120 |     parser.add_argument(
121 |         "-y",
122 |         "--proceed",
123 |         action="store_true",
124 |         help="Proceed with downloading images",
125 |     )
126 |     args = parser.parse_args()
127 |     main(**vars(args))
128 | 


--------------------------------------------------------------------------------
/swesmith/harness/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SWE-bench/SWE-smith/cfb6cf29568d0841ce15620c96ec795243b229fa/swesmith/harness/__init__.py


--------------------------------------------------------------------------------
/swesmith/harness/log_parsers.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | 
  3 | from swebench.harness.constants import TestStatus
  4 | 
  5 | 
  6 | def parse_log_pytest(log: str) -> dict[str, str]:
  7 |     """
  8 |     Parser for test logs generated with PyTest framework
  9 | 
 10 |     Args:
 11 |         log (str): log content
 12 |     Returns:
 13 |         dict: test case to test status mapping
 14 |     """
 15 |     test_status_map = {}
 16 |     for line in log.split("\n"):
 17 |         for status in TestStatus:
 18 |             is_match = re.match(f"^(\S+)(\s+){status.value}", line)
 19 |             if is_match:
 20 |                 test_status_map[is_match.group(1)] = status.value
 21 |                 continue
 22 |     return test_status_map
 23 | 
 24 | 
 25 | def parse_log_mypy(log: str) -> dict[str, str]:
 26 |     """Parser for test logs generated by mypy"""
 27 |     test_status_map = {}
 28 |     for line in log.split("\n"):
 29 |         for status in [
 30 |             TestStatus.PASSED.value,
 31 |             TestStatus.FAILED.value,
 32 |         ]:
 33 |             if status in line:
 34 |                 test_case = line.split()[-1]
 35 |                 test_status_map[test_case] = status
 36 |                 break
 37 |     return test_status_map
 38 | 
 39 | 
 40 | def parse_log_python_slugify(log: str) -> dict[str, str]:
 41 |     """Parser for test logs generated by un33k/python-slugify"""
 42 |     test_status_map = {}
 43 |     pattern = "^([a-zA-Z0-9_\-,\.\s\(\)']+)\s\.{3}\s"
 44 |     for line in log.split("\n"):
 45 |         is_match = re.match(f"{pattern}ok$", line)
 46 |         if is_match:
 47 |             test_status_map[is_match.group(1)] = TestStatus.PASSED.value
 48 |             continue
 49 |         for keyword, status in {
 50 |             "FAIL": TestStatus.FAILED,
 51 |             "ERROR": TestStatus.ERROR,
 52 |         }.items():
 53 |             is_match = re.match(f"{pattern}{keyword}$", line)
 54 |             if is_match:
 55 |                 test_status_map[is_match.group(1)] = status.value
 56 |                 continue
 57 |     return test_status_map
 58 | 
 59 | 
 60 | def parse_log_tornado(log: str) -> dict[str, str]:
 61 |     """Parser for test logs generated by tornadoweb/tornado"""
 62 |     test_status_map = {}
 63 |     for line in log.split("\n"):
 64 |         if line.endswith("... ok"):
 65 |             test_case = line.split(" ... ")[0]
 66 |             test_status_map[test_case] = TestStatus.PASSED.value
 67 |         elif " ... skipped " in line:
 68 |             test_case = line.split(" ... ")[0]
 69 |             test_status_map[test_case] = TestStatus.SKIPPED.value
 70 |         elif any([line.startswith(x) for x in ["ERROR:", "FAIL:"]]):
 71 |             test_case = " ".join(line.split()[1:3])
 72 |             test_status_map[test_case] = TestStatus.FAILED.value
 73 |     return test_status_map
 74 | 
 75 | 
 76 | def parse_log_paramiko(log: str) -> dict[str, str]:
 77 |     """Parser for test logs generated by paramiko/paramiko"""
 78 |     test_status_map = {}
 79 |     for line in log.split("\n"):
 80 |         for status in TestStatus:
 81 |             is_match = re.match(f"^{status.value}\s(\S+)", line)
 82 |             if is_match:
 83 |                 test_status_map[is_match.group(1)] = status.value
 84 |                 continue
 85 |     return test_status_map
 86 | 
 87 | 
 88 | def parse_log_autograd(log: str) -> dict[str, str]:
 89 |     """Parser for test logs generated by pytorch/pytorch"""
 90 |     test_status_map = {}
 91 |     for line in log.split("\n"):
 92 |         for status in TestStatus:
 93 |             is_match = re.match(f"^\[gw\d\]\s{status.value}\s(\S+)", line)
 94 |             if is_match:
 95 |                 test_status_map[is_match.group(1)] = status.value
 96 |                 continue
 97 |     return test_status_map
 98 | 
 99 | 
100 | MAP_REPO_TO_PARSER = {
101 |     "HIPS/autograd": parse_log_autograd,
102 |     "paramiko/paramiko": parse_log_paramiko,
103 |     "python/mypy": parse_log_mypy,
104 |     "tornadoweb/tornado": parse_log_tornado,
105 |     "un33k/python-slugify": parse_log_python_slugify,
106 | }
107 | 


--------------------------------------------------------------------------------
/swesmith/issue_gen/get_from_pr.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Purpose: Given a bug patch, retrieve the issue text from the PR that the bug was created from.
 3 | 
 4 | python swesmith/issue_gen/get_from_pr.py logs/experiments/*.json
 5 | """
 6 | 
 7 | import argparse
 8 | import json
 9 | 
10 | from pathlib import Path
11 | from swesmith.constants import LOG_DIR_BUG_GEN
12 | from swesmith.bug_gen.mirror.generate import INSTANCE_REF, MIRROR_PR
13 | from tqdm.auto import tqdm
14 | 
15 | 
16 | def transform_to_sweb_inst_id(inst):
17 |     repo = inst["repo"].split("/", 1)[-1].rsplit(".", 1)[0]
18 |     pr_num = inst["instance_id"].rsplit("_", 1)[-1]
19 |     return f"{repo}-{pr_num}"
20 | 
21 | 
22 | def get_original_ps_from_pr(instance, log_dir_bug_gen=LOG_DIR_BUG_GEN):
23 |     log_dir_bug_gen = Path(log_dir_bug_gen)
24 |     sweb_inst_id = transform_to_sweb_inst_id(instance)
25 |     pr_num = sweb_inst_id.rsplit("-", 1)[-1]
26 |     metadata_path = (
27 |         log_dir_bug_gen
28 |         / instance["repo"].split("/")[-1]
29 |         / MIRROR_PR
30 |         / sweb_inst_id
31 |         / f"metadata__pr_{pr_num}.json"
32 |     )
33 |     if not metadata_path.exists():
34 |         return ""
35 |     metadata = json.load(open(metadata_path, "r"))
36 |     if INSTANCE_REF not in metadata:
37 |         return ""
38 |     ps = metadata[INSTANCE_REF]["problem_statement"]
39 |     return ps
40 | 
41 | 
42 | def main(dataset_path: str):
43 |     dataset_path = Path(dataset_path)
44 | 
45 |     # Load bug dataset
46 |     dataset = json.load(open(dataset_path, "r"))
47 |     print(f"Found {len(dataset)} task instances to generate instructions for")
48 |     kept = []
49 |     for instance in tqdm(dataset):
50 |         ps = get_original_ps_from_pr(instance)
51 |         if len(ps.strip()) > 0:
52 |             instance["problem_statement"] = ps
53 |             kept.append(instance)
54 |     print(
55 |         f"{len(kept)} instances have problem statements ({len(dataset) - len(kept)} missing)"
56 |     )
57 | 
58 |     if len(kept) > 0:
59 |         # Create .json version of the dataset
60 |         output_path = dataset_path.parent / f"{dataset_path.stem}__ig_orig.json"
61 |         with open(output_path, "w") as f:
62 |             json.dump(kept, f, indent=2)
63 |         print(f"Wrote dataset with original problem statements to {output_path}")
64 |     else:
65 |         print("No instances found with original problem statements.")
66 | 
67 | 
68 | if __name__ == "__main__":
69 |     parser = argparse.ArgumentParser()
70 |     parser.add_argument(
71 |         "dataset_path",
72 |         type=str,
73 |         help="Path to the dataset",
74 |     )
75 |     args = parser.parse_args()
76 |     main(**vars(args))
77 | 


--------------------------------------------------------------------------------
/swesmith/issue_gen/get_static.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Purpose: Given a task instance, attached a fixed problem statement to the issue text.
  3 | 
  4 | python swesmith/issue_gen/get_fixed.py logs/experiments/*.json
  5 | """
  6 | 
  7 | import argparse
  8 | import json
  9 | import random
 10 | from typing import Set
 11 | 
 12 | from pathlib import Path
 13 | from swebench.harness.constants import FAIL_TO_PASS, KEY_INSTANCE_ID
 14 | from swesmith.bug_gen.procedural.generate import (
 15 |     PM_TECHNIQUES_CLASSES,
 16 |     PM_TECHNIQUES_FUNCS,
 17 | )
 18 | from tqdm.auto import tqdm
 19 | from unidiff import PatchSet
 20 | 
 21 | BUG_TYPE_TO_PROMPT = {
 22 |     x.name: x.explanation for x in PM_TECHNIQUES_CLASSES + PM_TECHNIQUES_FUNCS
 23 | }
 24 | 
 25 | # MARK: Basic says-nothing prompt
 26 | PROMPT_BASIC = (
 27 |     """There is a bug in this codebase. Please look into it and resolve the issue."""
 28 | )
 29 | 
 30 | # MARK: Prompts that mention file names
 31 | PROMPT_FILES = """There are bug(s) in this codebase, likely located in the following file(s):
 32 | {gold_files}
 33 | 
 34 | Please look into them and fix any bugs that you find."""
 35 | 
 36 | # MARK: Prompts that mention file + function names
 37 | PROMPT_FILES_FUNCS = """There are bug(s) in this codebase, likely located in the following file(s).
 38 | {gold_files}
 39 | 
 40 | I think these function(s) are relevant to the bug:
 41 | {gold_funcs}
 42 | 
 43 | Please look into them and fix any bugs that you find."""
 44 | 
 45 | # MARK: Prompts that mention test cases
 46 | PROMPT_TESTS_BASIC = (
 47 |     """Several tests in the codebase are breaking. Please find the bugs and fix them."""
 48 | )
 49 | PROMPT_TESTS_F2P = """Several tests in the codebase are breaking.
 50 | 
 51 | The tests that are failing are:
 52 | {f2p_list}
 53 | 
 54 | Please fix the codebase such that the tests pass."""
 55 | 
 56 | # MARK: Prompts that mention the type of bug
 57 | PROMPT_BUG_TYPE_BASIC = """There is a bug in this codebase. {bug_type}Please look into it and resolve the issue."""
 58 | PROMPT_BUG_TYPE_FILES = """There is a bug in this codebase. {bug_type}It seems to be related to the following files:" \
 59 | {gold_files}
 60 | Please look into these files and resolve the issue."""
 61 | PROMPT_BUG_TYPE_FILES_TESTS = """There is a bug in this codebase. {bug_type}It seems to be related to the following files:
 62 | {gold_files}
 63 | 
 64 | Please look into these files and resolve the issue. I believe a test case is also failing because of this bug:
 65 | {f2p_single}"""
 66 | PROMPT_BUG_TYPE_FILES_FUNCS_TESTS = """There is a bug in this codebase. {bug_type}It seems to be related to the following files:
 67 | {gold_files}
 68 | 
 69 | I think these function(s) are relevant to the bug:
 70 | {gold_funcs}
 71 | 
 72 | Please look into this and resolve the issue. I believe a test case is also failing because of this bug:
 73 | {f2p_single}"""
 74 | 
 75 | PROMPT_POOL = [
 76 |     (PROMPT_BASIC, 0.05),
 77 |     (PROMPT_FILES, 0.1),
 78 |     (PROMPT_FILES_FUNCS, 0.15),
 79 |     (PROMPT_TESTS_BASIC, 0.1),
 80 |     (PROMPT_TESTS_F2P, 0.1),
 81 |     (PROMPT_BUG_TYPE_BASIC, 0.05),
 82 |     (PROMPT_BUG_TYPE_FILES, 0.15),
 83 |     (PROMPT_BUG_TYPE_FILES_TESTS, 0.15),
 84 |     (PROMPT_BUG_TYPE_FILES_FUNCS_TESTS, 0.15),
 85 | ]
 86 | 
 87 | random.seed(24)
 88 | 
 89 | 
 90 | def print_list(x):
 91 |     return "- " + "\n- ".join(x)
 92 | 
 93 | 
 94 | def get_bug_exp(instance) -> str:
 95 |     inst_id = instance[KEY_INSTANCE_ID]
 96 |     for bug_type, prompt in BUG_TYPE_TO_PROMPT.items():
 97 |         if bug_type in inst_id:
 98 |             return prompt
 99 |     return ""
100 | 
101 | 
102 | def get_changed_functions(patch_text) -> Set[str]:
103 |     patch = PatchSet(patch_text.splitlines())
104 |     changed_funcs = set()
105 | 
106 |     for file in patch:
107 |         for hunk in file:
108 |             for line in hunk:
109 |                 if line.is_added or line.is_removed:
110 |                     # Extract function context
111 |                     function_name = hunk.section_header
112 |                     if function_name:
113 |                         changed_funcs.add(function_name.strip())
114 | 
115 |     return changed_funcs
116 | 
117 | 
118 | def main(dataset_path: str | Path) -> None:
119 |     dataset_path = Path(dataset_path)
120 |     dataset = []
121 |     if dataset_path.name.endswith(".json"):
122 |         dataset = json.load(open(dataset_path, "r"))
123 |     elif dataset_path.name.endswith(".jsonl"):
124 |         dataset = [json.loads(x) for x in open(dataset_path, "r")]
125 |     else:
126 |         raise ValueError(
127 |             f"Unsupported file format (must be .json, .jsonl): {dataset_path}"
128 |         )
129 |     dataset_path = Path(dataset_path)
130 |     print(f"Found {len(dataset)} task instances to generate instructions for")
131 | 
132 |     prompt_pool = [x[0] for x in PROMPT_POOL]
133 |     prompt_weights = [x[1] for x in PROMPT_POOL]
134 |     for instance in tqdm(dataset):
135 |         instance["bug_type"] = get_bug_exp(instance)
136 |         instance["f2p_single"] = random.choice(instance[FAIL_TO_PASS])
137 |         instance["f2p_list"] = print_list(instance[FAIL_TO_PASS])
138 |         instance["gold_files"] = print_list(
139 |             [x.path for x in PatchSet(instance["patch"])]
140 |         )
141 |         instance["gold_funcs"] = print_list(get_changed_functions(instance["patch"]))
142 | 
143 |         prompt = random.choices(prompt_pool, weights=prompt_weights, k=1)[0]
144 |         instance["problem_statement"] = prompt.format(**instance)
145 |     out_path = dataset_path.parent / f"{dataset_path.stem}__ig_static.json"
146 |     with open(out_path, "w") as f:
147 |         json.dump(dataset, f, indent=2)
148 |         print(f"Wrote dataset with static instructions to {out_path}")
149 | 
150 | 
151 | if __name__ == "__main__":
152 |     parser = argparse.ArgumentParser()
153 |     parser.add_argument("dataset_path", type=str, help="Path to the dataset file")
154 |     args = parser.parse_args()
155 |     main(**vars(args))
156 | 


--------------------------------------------------------------------------------
/swesmith/issue_gen/utils.py:
--------------------------------------------------------------------------------
 1 | import ast
 2 | import os
 3 | import random
 4 | from typing import Any
 5 | 
 6 | from pathlib import Path
 7 | from swebench.harness.constants import FAIL_TO_PASS
 8 | from swesmith.utils import clone_repo
 9 | 
10 | 
11 | def extract_pytest_test(
12 |     file_path: str | Path, test_name: str, class_name: str | None = None
13 | ) -> str | None:
14 |     try:
15 |         with open(file_path, "r", encoding="utf-8") as f:
16 |             tree = ast.parse(f.read())
17 |     except Exception:
18 |         return None
19 | 
20 |     # If class_name is provided, look inside the class
21 |     if class_name:
22 |         for node in tree.body:
23 |             if isinstance(node, ast.ClassDef) and node.name == class_name:
24 |                 for method in node.body:
25 |                     if isinstance(method, ast.FunctionDef) and method.name == test_name:
26 |                         return ast.unparse(method)  # Extract function from class
27 |     else:
28 |         # Look for a top-level function
29 |         for node in tree.body:
30 |             if isinstance(node, ast.FunctionDef) and node.name == test_name:
31 |                 return ast.unparse(node)  # Extract function
32 | 
33 |     return None
34 | 
35 | 
36 | def get_test_function(instance: dict, idx: int | None = None) -> dict[str, Any]:
37 |     # test names are in pytest format (e.g., test_file::test_name)
38 |     test = (
39 |         random.choice(instance[FAIL_TO_PASS])
40 |         if idx is None
41 |         else instance[FAIL_TO_PASS][idx]
42 |         if idx < len(instance[FAIL_TO_PASS])
43 |         else instance[FAIL_TO_PASS][-1]
44 |     )
45 |     class_name = None
46 |     if "::" not in test:
47 |         test_file = "test.py"
48 |         test_name = test.split()[0]
49 |     else:
50 |         test_file, test_name = test.split("::", 1)
51 |         if "::" in test_name:
52 |             class_name, test_name = test_name.split("::", 1)
53 |         # Remove any parameters from the test name
54 |         test_name = test_name.split("[")[0]
55 | 
56 |     # Clone repo for instance
57 |     repo = instance["repo"]
58 |     repo_name = repo.split("/")[-1]
59 |     cloned = clone_repo(repo_name)
60 | 
61 |     # Update test_file to be relative to the repo
62 |     test_file = os.path.join(repo_name, test_file)
63 | 
64 |     return {
65 |         "test_src": extract_pytest_test(test_file, test_name, class_name),
66 |         "test_file": test_file,
67 |         "test_name": test_name,
68 |         "class_name": class_name,
69 |         "repo_name": repo_name,
70 |         "cloned": cloned,
71 |     }
72 | 


--------------------------------------------------------------------------------
/swesmith/train/README.md:
--------------------------------------------------------------------------------
 1 | # SWE-smith Training Code
 2 | This folder contains the training scripts for fine-tuning on SWE-smith trajectories.
 3 | 
 4 | The code is heavily inspired by the [SWE-gym](https://github.com/SWE-Gym/SWE-Gym) team. We thank them for open-sourcing their codebase, allowing for easy reproduction of the fine-tuning procedure they used.
 5 | If you found this part of the codebase useful, please make sure to [cite the SWE-gym](https://github.com/SWE-Gym/SWE-Gym?tab=readme-ov-file#-citation) team as well.
 6 | 
 7 | ### Notes
 8 | All fine-tuning + model serving is carried out with [Modal](https://modal.com/).
 9 | 
10 | To fine tune a model, follow this procedure:
11 | 1. Download a model checkpoint from HuggingFace
12 | ```bash
13 | modal run download_checkpoint.py --source-repo Qwen/Qwen2.5-7B-Instruct --target-dir /llm-weights/Qwen/Qwen2.5-7B-Instruct
14 | ```
15 | 
16 | 2. Run fine tuning with a SWE-smith dataset
17 | ```bash
18 | NGPUS=8 modal run train/run_ft_torchtune.py --config train/config/torchtune.yml
19 | ```
20 | 
21 | 3. Host model with `sglang` and run inference with SWE-agent
22 | ```bash
23 | N_HOURS=4 N_GPUS=4 modal run --detach serve_sglang.py --model-path /llm-weights/my-oss-model --served-model-name gpt-4o --tokenizer-path /llm-weights/Qwen/Qwen2.5-Coder-32B-Instruct
24 | ```
25 | 
26 | From the SWE-agent local repository, run the following command to start running inference with the local model
27 | ```bash
28 | #!/bin/bash
29 | 
30 | sweagent run-batch \
31 |     --agent.model.api_base <REPLACE WITH MODAL LINK>/v1 \
32 |     --agent.model.api_key swesmith \ # This is set in serve_sglang.py
33 |     --agent.model.name gpt-4o \        # TODO: Change this when SWE-agent is fixed
34 |     --instances.type swe_bench \
35 |     --instances.dataset_name jyang20/swebv-mini \
36 |     --instances.split test \
37 |     --config config/anthropic_no_fcalls.yaml
38 |     # --instances.evaluate True # Install sb-cli for this
39 | ```
40 | 


--------------------------------------------------------------------------------
/swesmith/train/difficulty_rater/create_datasets.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Purpose: Create difficulty train / test datasets from SWE-bench Verified annotations of task difficulty.
 3 | 
 4 | Usage:
 5 | python train/difficulty_rater/create_datasets.py
 6 | 
 7 | NOTE: Please include the follwing files in the same directory when running this script:
 8 | - ensembled_annotations_public.csv
 9 | - samples_with_3_annotations_public.csv
10 | """
11 | 
12 | import json
13 | import pandas as pd
14 | 
15 | from collections import Counter
16 | from datasets import load_dataset
17 | from swebench.harness.constants import KEY_INSTANCE_ID
18 | 
19 | PROMPT_SYSTEM = """Below I have given you information about a GitHub pull request. The information includes
20 | the problem statement describing the bug and the patch representing the changes made that
21 | successfully resolves the issue. Please categorize the difficulty of the original task based
22 | on this information. There are 4 levels of difficulty you can choose from:
23 | 
24 | * <15 min fix
25 | * 15 min - 1 hour
26 | * 1-4 hours
27 | * >4 hours"""
28 | 
29 | PROMPT_INSTANCE = """### Input:
30 | **Problem Statement**
31 | {problem_statement}
32 | 
33 | **Solution Patch**
34 | {patch}
35 | 
36 | **Response**
37 | """
38 | 
39 | if __name__ == "__main__":
40 |     sweb = load_dataset("SWE-bench/SWE-bench")
41 |     sweb_map = {x[KEY_INSTANCE_ID]: x for x in sweb["test"]}
42 |     ensembled = pd.read_csv("ensembled_annotations_public.csv")
43 |     samplesw3 = pd.read_csv("samples_with_3_annotations_public.csv")
44 | 
45 |     df = ensembled[[KEY_INSTANCE_ID, "difficulty"]]
46 |     test_df = df.sample(frac=0.2, random_state=42)
47 |     train_df = df.drop(test_df.index)
48 |     print(f"Train size: {len(train_df)}, Test size: {len(test_df)}")
49 | 
50 |     for pair in [
51 |         ("difficulty_train.jsonl", train_df),
52 |         ("difficulty_test.jsonl", test_df),
53 |     ]:
54 |         distribution = []
55 |         with open(pair[0], "w") as f:
56 |             for row in pair[1].itertuples(index=False, name=None):
57 |                 inst = sweb_map[row[0]]
58 |                 label = row[1]
59 |                 if label == ">4 hours":
60 |                     label = "1-4 hours"
61 |                 messages = {
62 |                     "messages": [
63 |                         {"role": "system", "content": PROMPT_SYSTEM},
64 |                         {"role": "user", "content": PROMPT_INSTANCE.format(**inst)},
65 |                         {"role": "assistant", "content": label},
66 |                     ]
67 |                 }
68 |                 distribution.append(label)
69 |                 f.write(json.dumps(messages) + "\n")
70 | 
71 |         print(f"{pair[0]} distribution:")
72 |         for k, v in Counter(distribution).items():
73 |             print(f"* {k}: {v} ({round(v * 100 / len(distribution), 2)}%)")
74 | 
75 |     check = [json.loads(x) for x in open("difficulty_train.jsonl").readlines()]
76 |     print(len(check))
77 |     check = [json.loads(x) for x in open("difficulty_test.jsonl").readlines()]
78 |     print(len(check))
79 | 


--------------------------------------------------------------------------------
/swesmith/train/difficulty_rater/get_difficulties.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Purpose: Get difficulty ratings for different bugs
  3 | 
  4 | Usage:
  5 | python train/difficulty_rater/get_difficulties.py --base_url <base_url> --dataset_path <dataset_path>
  6 | 
  7 | NOTE:
  8 | Make sure the sglang server for the difficulty rating model is running.
  9 | """
 10 | 
 11 | import argparse
 12 | import json
 13 | import openai
 14 | import os
 15 | 
 16 | from collections import Counter
 17 | from concurrent.futures import ThreadPoolExecutor, as_completed
 18 | from swebench.harness.constants import KEY_INSTANCE_ID
 19 | from swesmith.constants import SGLANG_API_KEY
 20 | from swesmith.train.difficulty_rater.create_datasets import (
 21 |     PROMPT_SYSTEM,
 22 |     PROMPT_INSTANCE,
 23 | )
 24 | from tqdm.auto import tqdm
 25 | 
 26 | DIFFICULTY_SCORE = {"15 min - 1 hour": 5, "1-4 hours": 9, "<15 min fix": 1}
 27 | 
 28 | 
 29 | def process_instance(client, instance):
 30 |     try:
 31 |         response = client.chat.completions.create(
 32 |             model="gpt-4o",
 33 |             messages=[
 34 |                 {"role": "system", "content": PROMPT_SYSTEM},
 35 |                 {"role": "user", "content": PROMPT_INSTANCE.format(**instance)},
 36 |             ],
 37 |             temperature=0,
 38 |             max_tokens=64,
 39 |         )
 40 |         difficulty = response.choices[0].message.content.strip()
 41 |         return {
 42 |             KEY_INSTANCE_ID: instance[KEY_INSTANCE_ID],
 43 |             "difficulty": difficulty,
 44 |         }
 45 |     except:
 46 |         return {
 47 |             KEY_INSTANCE_ID: instance[KEY_INSTANCE_ID],
 48 |             "difficulty": "error",
 49 |         }
 50 | 
 51 | 
 52 | def main(base_url, dataset_path, overwrite=False):
 53 |     client = openai.Client(base_url=f"{base_url}/v1", api_key=SGLANG_API_KEY)
 54 | 
 55 |     dataset = None
 56 |     if dataset_path.endswith(".json"):
 57 |         dataset = json.load(open(dataset_path))
 58 |     elif dataset_path.endswith(".jsonl"):
 59 |         dataset = [json.loads(line) for line in open(dataset_path).readlines()]
 60 | 
 61 |     ext = ".json" if dataset_path.endswith(".json") else ".jsonl"
 62 |     difficulties_path = dataset_path.replace(ext, "_difficulties.jsonl")
 63 | 
 64 |     id_to_diff = {}
 65 |     completed = []
 66 |     mode = "w"
 67 |     if os.path.exists(difficulties_path) and not overwrite:
 68 |         for line in open(difficulties_path).readlines():
 69 |             line = json.loads(line)
 70 |             id_to_diff[line[KEY_INSTANCE_ID]] = line["difficulty"]
 71 |             completed.append(line[KEY_INSTANCE_ID])
 72 |         print(f"Skipping {len(completed)} completed instances")
 73 |         dataset = [x for x in dataset if x[KEY_INSTANCE_ID] not in completed]
 74 |         mode = "a"
 75 | 
 76 |     print(f"Rating {len(dataset)} instances (will write to {difficulties_path})")
 77 |     num_threads = 4  # Adjust based on API rate limits
 78 |     with (
 79 |         ThreadPoolExecutor(max_workers=num_threads) as executor,
 80 |         open(difficulties_path, mode) as f,
 81 |     ):
 82 |         future_to_instance = {
 83 |             executor.submit(process_instance, client, instance): instance
 84 |             for instance in dataset
 85 |         }
 86 | 
 87 |         for future in tqdm(as_completed(future_to_instance), total=len(dataset)):
 88 |             result = future.result()
 89 |             if result:  # Skip None values
 90 |                 f.write(json.dumps(result) + "\n")
 91 |                 id_to_diff[result[KEY_INSTANCE_ID]] = result["difficulty"]
 92 | 
 93 |     print(f"Assessed difficulty for {len(id_to_diff)} instances")
 94 |     difficulty_dist = Counter(id_to_diff.values())
 95 |     print(difficulty_dist)
 96 |     for k in list(difficulty_dist.keys()):
 97 |         if k not in DIFFICULTY_SCORE:
 98 |             del difficulty_dist[k]
 99 |     difficulty_rating = round(
100 |         sum(
101 |             DIFFICULTY_SCORE[rating] * count
102 |             for rating, count in difficulty_dist.items()
103 |         )
104 |         / sum(difficulty_dist.values()),
105 |         3,
106 |     )
107 |     print(f"Difficulty score: {difficulty_rating}")
108 |     print(f"Saved to {difficulties_path}")
109 | 
110 | 
111 | if __name__ == "__main__":
112 |     parser = argparse.ArgumentParser(
113 |         description="Get difficulty ratings for different bugs"
114 |     )
115 |     parser.add_argument(
116 |         "--base_url", type=str, required=True, help="Base URL of the Model API"
117 |     )
118 |     parser.add_argument(
119 |         "--dataset_path", type=str, required=True, help="Path to the dataset"
120 |     )
121 |     parser.add_argument(
122 |         "--overwrite",
123 |         action="store_true",
124 |         help="Whether to overwrite existing difficulties",
125 |     )
126 |     args = parser.parse_args()
127 |     main(**vars(args))
128 | 


--------------------------------------------------------------------------------
/swesmith/train/difficulty_rater/test_rater.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Purpose: Test the difficulty rater model
 3 | 
 4 | Usage: python train/difficulty_rater/test_rater.py --base_url <base_url>
 5 | 
 6 | NOTE: Please make sure the sglang server is running and `difficulty_test.jsonl` is in the same directory.
 7 | """
 8 | 
 9 | import argparse
10 | import json
11 | import openai
12 | 
13 | from tqdm.auto import tqdm
14 | 
15 | 
16 | def main(base_url: str):
17 |     test_insts = [json.loads(x) for x in open("difficulty_test.jsonl")]
18 |     client = openai.Client(base_url=f"{base_url}/v1", api_key="swesmith")
19 |     responses = []
20 | 
21 |     for inst in tqdm(test_insts):
22 |         answer = inst["messages"][-1]["content"]
23 |         messages = inst["messages"][:-1]
24 | 
25 |         response = client.chat.completions.create(
26 |             model="gpt-4o",
27 |             messages=messages,
28 |             temperature=0,
29 |             max_tokens=64,
30 |         )
31 |         resp = response.choices[0].message.content.strip()
32 |         pred = resp
33 |         if "\n" in pred:
34 |             pred = pred.split("\n")[0]
35 |         responses.append([pred, answer, resp])
36 | 
37 |     print(
38 |         f"Accuracy: {round(sum([x[0] == x[1] for x in responses]) / len(responses) * 100, 4)}%"
39 |     )
40 | 
41 |     sig_diff = 0
42 |     for x in responses:
43 |         if (x[0] == "1-4 hours" and x[1] == "<15 min fix") or (
44 |             x[1] == "1-4 hours" and x[0] == "<15 min fix"
45 |         ):
46 |             sig_diff += 1
47 |     print(f"# of significantly different preds: {sig_diff}")
48 | 
49 | 
50 | if __name__ == "__main__":
51 |     parser = argparse.ArgumentParser()
52 |     parser.add_argument("--base_url", type=str, default="http://localhost:8000")
53 |     args = parser.parse_args()
54 |     main(**vars(args))
55 | 


--------------------------------------------------------------------------------
/swesmith/train/download_checkpoint.py:
--------------------------------------------------------------------------------
 1 | """
 2 | From: https://github.com/SWE-Gym/SWE-Gym/blob/main/scripts/modal_misc/download_checkpoint.py
 3 | 
 4 | Download a checkpoint from Hugging Face.
 5 | 
 6 | modal run download_checkpoint.py --source-repo /path/to/source_repo --target-dir /path/to/target_dir
 7 | 
 8 | Example:
 9 | modal run download_checkpoint.py --source-repo meta-llama/Llama-3.3-70B-Instruct --target-dir /llm-weights/meta-llama/Llama-3.3-70B-Instruct
10 | """
11 | 
12 | import modal
13 | import os
14 | 
15 | from swesmith.constants import VOLUME_NAME_MODEL
16 | 
17 | app = modal.App("download-hf-ckpts")
18 | model_volume = modal.Volume.from_name(VOLUME_NAME_MODEL, create_if_missing=True)
19 | 
20 | image = (
21 |     modal.Image.debian_slim(python_version="3.12")
22 |     .apt_install(["git", "git-lfs"])
23 |     .pip_install("huggingface_hub[cli]")
24 | )
25 | 
26 | 
27 | MINUTES = 60  # seconds
28 | HOURS = 60 * MINUTES
29 | 
30 | 
31 | @app.function(
32 |     volumes={f"/{VOLUME_NAME_MODEL}": model_volume},
33 |     image=image,
34 |     timeout=1 * HOURS,
35 |     secrets=[modal.Secret.from_name("john-hf-secret")],
36 | )
37 | def download_ckpts(source_repo: str, target_dir: str):
38 |     # make sure target_dir exists
39 |     os.makedirs(target_dir, exist_ok=True)
40 | 
41 |     import subprocess
42 |     import sys
43 | 
44 |     command = "git lfs install"
45 |     subprocess.run(
46 |         command.split(),
47 |         stdout=sys.stdout,
48 |         stderr=sys.stderr,
49 |         check=True,
50 |     )
51 | 
52 |     command = f"huggingface-cli download {source_repo} --local-dir {target_dir}"
53 |     subprocess.run(
54 |         command.split(),
55 |         stdout=sys.stdout,
56 |         stderr=sys.stderr,
57 |         check=True,
58 |     )
59 |     model_volume.commit()
60 | 
61 | 
62 | @app.local_entrypoint()
63 | def main(source_repo: str, target_dir: str):
64 |     download_ckpts.remote(source_repo=source_repo, target_dir=target_dir)
65 | 


--------------------------------------------------------------------------------
/swesmith/train/run/ft_torchtune.py:
--------------------------------------------------------------------------------
 1 | """From: https://github.com/SWE-Gym/SWE-Gym/blob/main/scripts/training/openhands/train_torchtune_full.py
 2 | 
 3 | Full fine tune an LM using torchtune
 4 | 
 5 | modal run swesmith/train/run/ft_torchtune.py --config /path/to/config.yaml
 6 | """
 7 | 
 8 | import os
 9 | import modal
10 | import yaml
11 | 
12 | from swesmith.constants import VOLUME_NAME_DATASET, VOLUME_NAME_MODEL
13 | 
14 | torchtune_image = (
15 |     modal.Image.debian_slim(python_version="3.12")
16 |     .apt_install("git")
17 |     .pip_install(
18 |         [
19 |             "torch",
20 |             "torchvision",
21 |             "torchao",
22 |             "wandb",
23 |             "torchtune",
24 |         ]
25 |     )
26 | )
27 | 
28 | 
29 | app = modal.App("torchtune-training")
30 | trained_model_volume = modal.Volume.from_name(VOLUME_NAME_MODEL, create_if_missing=True)
31 | dataset_volume = modal.Volume.from_name(VOLUME_NAME_DATASET, create_if_missing=True)
32 | 
33 | MINUTES = 60  # seconds
34 | HOURS = 60 * MINUTES
35 | N_GPUS = int(os.environ.get("N_GPUS", 2))
36 | N_HOURS = int(os.environ.get("N_HOURS", 10))
37 | 
38 | 
39 | @app.function(
40 |     image=torchtune_image,
41 |     # gpu=modal.gpu.A100(count=N_GPU, size="80GB"),
42 |     gpu=f"H100:{N_GPUS}",
43 |     volumes={
44 |         f"/{VOLUME_NAME_MODEL}": trained_model_volume,
45 |         f"/{VOLUME_NAME_DATASET}": dataset_volume,
46 |     },
47 |     timeout=N_HOURS * HOURS,
48 |     secrets=[
49 |         modal.Secret.from_name("john-wandb-secret"),
50 |         modal.Secret.from_name("john-hf-secret"),
51 |     ],
52 | )
53 | def run_train(config_name: str, config: dict, n_gpus: int):
54 |     config_path = f"/tmp/{config_name}.yaml"
55 |     with open(config_path, "w") as f:
56 |         yaml.dump(config, f)
57 |     command = f"tune run --nnodes 1 --nproc_per_node {n_gpus} full_finetune_distributed --config {config_path}"
58 |     import subprocess
59 |     import sys
60 | 
61 |     subprocess.run(
62 |         command.split(),
63 |         stdout=sys.stdout,
64 |         stderr=sys.stderr,
65 |         check=True,
66 |     )
67 |     trained_model_volume.commit()
68 | 
69 | 
70 | @app.local_entrypoint()
71 | def main(config: str):
72 |     # load yaml config
73 |     config_name = os.path.basename(config)
74 |     with open(config, "r") as f:
75 |         config = yaml.safe_load(f)
76 |     run_train.remote(config_name=config_name, config=config, n_gpus=N_GPUS)
77 | 


--------------------------------------------------------------------------------
/swesmith/train/serve_sglang.py:
--------------------------------------------------------------------------------
 1 | """Host a model with SGLang
 2 | 
 3 | N_HOURS=4 N_GPUS=4 modal run --detach serve_sglang.py --model-path /llm-weights/my-oss-model --served-model-name my-oss-model --tokenizer-path /llm-weights/Qwen/Qwen2.5-Coder-32B-Instruct
 4 | 
 5 | NOTE: Make sure /llm-weights/my-oss-model points at a folder with weights (on Modal Volume)
 6 | """
 7 | 
 8 | import modal
 9 | import os
10 | import shutil
11 | import subprocess
12 | import sys
13 | 
14 | from swesmith.constants import VOLUME_NAME_MODEL, SGLANG_API_KEY
15 | 
16 | sglang_image = (
17 |     modal.Image.debian_slim(python_version="3.12")
18 |     .pip_install("sglang[all]==0.3.6")
19 |     .run_commands("pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/")
20 | )
21 | 
22 | MINUTES = 60  # seconds
23 | HOURS = 60 * MINUTES
24 | 
25 | try:
26 |     volume = modal.Volume.from_name(VOLUME_NAME_MODEL, create_if_missing=False)
27 | except modal.exception.NotFoundError:
28 |     raise Exception("Download models first with modal run download_model_to_volume.py")
29 | 
30 | N_GPUS = int(os.environ.get("N_GPUS", 2))
31 | N_HOURS = float(os.environ.get("N_HOURS", 4))
32 | 
33 | app = modal.App("sglang-serve")
34 | 
35 | 
36 | @app.function(
37 |     image=sglang_image,
38 |     gpu=modal.gpu.A100(count=N_GPUS, size="80GB"),
39 |     # gpu=modal.gpu.H100(count=N_GPUS),
40 |     container_idle_timeout=5 * MINUTES,
41 |     timeout=int(N_HOURS * HOURS),
42 |     allow_concurrent_inputs=1000,
43 |     volumes={f"/{VOLUME_NAME_MODEL}": volume},
44 | )
45 | def run_server(
46 |     model_path: str,
47 |     served_model_name: str,
48 |     tokenizer_path: str,
49 |     context_length: int,
50 |     n_gpus: int,
51 | ):
52 |     # first check if model_path has config.json, if not copy it from tokenizer_path
53 |     if not os.path.exists(os.path.join(model_path, "config.json")):
54 |         print(f"Copying config.json from {tokenizer_path} to {model_path}")
55 |         shutil.copy(
56 |             os.path.join(tokenizer_path, "config.json"),
57 |             os.path.join(model_path, "config.json"),
58 |         )
59 |         # print the content of the config.json
60 |         print("Content of the config.json:")
61 |         with open(os.path.join(model_path, "config.json"), "r") as f:
62 |             print(f.read())
63 |     assert os.path.exists(os.path.join(model_path, "config.json")), (
64 |         f"config.json not found in {model_path}. os.listdir(model_path): {os.listdir(model_path)}"
65 |     )
66 | 
67 |     with modal.forward(3000, unencrypted=True) as tunnel:
68 |         command = f"python -m sglang.launch_server --model-path {model_path} --tokenizer-path {tokenizer_path} --tp-size {n_gpus} --port 3000 --host 0.0.0.0 --served-model-name {served_model_name} --context-length {context_length} --api-key {SGLANG_API_KEY}"
69 |         print("Server listening at", tunnel.url)
70 |         subprocess.run(
71 |             command.split(),
72 |             stdout=sys.stdout,
73 |             stderr=sys.stderr,
74 |             check=True,
75 |         )
76 | 
77 | 
78 | @app.local_entrypoint()
79 | def main(
80 |     model_path: str,
81 |     served_model_name: str,
82 |     tokenizer_path: str = "/llm-weights/Qwen/Qwen2.5-Coder-7B-Instruct",
83 |     context_length: int = 32768,
84 | ):
85 |     print(f"Serving {model_path} on {served_model_name} with {N_GPUS} GPUs")
86 |     print(f"Timeout: {N_HOURS} hours")
87 |     run_server.remote(
88 |         model_path, served_model_name, tokenizer_path, context_length, N_GPUS
89 |     )
90 | 


--------------------------------------------------------------------------------
/swesmith/train/traj_mgr/clean_trajs.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Remove unnecessary files from the trajectories directory.
 3 | 
 4 | Usage: python swesmith/
 5 | """
 6 | 
 7 | import argparse
 8 | import os
 9 | 
10 | 
11 | def main(traj_dir):
12 |     assert traj_dir.startswith("trajectories"), (
13 |         "This script can only be run on SWE-agent trajectories."
14 |     )
15 |     for folder in sorted(
16 |         [x for x in os.listdir(traj_dir) if os.path.isdir(os.path.join(traj_dir, x))]
17 |     ):
18 |         folder = os.path.join(traj_dir, folder)
19 |         removed = 0
20 |         for root, _, files in os.walk(folder):
21 |             for file in files:
22 |                 if any(
23 |                     [
24 |                         file.endswith(ext)
25 |                         for ext in [
26 |                             ".config.yaml",
27 |                             ".debug.log",
28 |                             ".info.log",
29 |                             ".trace.log",
30 |                         ]
31 |                     ]
32 |                 ):
33 |                     if file == "run_batch.config.yaml":
34 |                         continue
35 |                     # Delete this file
36 |                     os.remove(os.path.join(root, file))
37 |                     removed += 1
38 |         print(f"{folder}: Removed {removed} files.")
39 | 
40 | 
41 | if __name__ == "__main__":
42 |     parser = argparse.ArgumentParser()
43 |     parser.add_argument(
44 |         "traj_dir",
45 |         type=str,
46 |         help="Path to the directory containing the trajectories.",
47 |     )
48 |     args = parser.parse_args()
49 |     main(**vars(args))
50 | 


--------------------------------------------------------------------------------
/swesmith/train/traj_mgr/combine_trajs.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Purpose: Combine multiple .jsonl files together and shuffle the lines, where the .jsonl files correspond to
  3 | SFT datasets of SWE-agent expert trajectories.
  4 | 
  5 | Usage: You should run this script in the root directory of the SWE-agent repository.
  6 | 
  7 | python -m swesmith.train.traj_mgr.combine_trajs
  8 | """
  9 | 
 10 | import argparse
 11 | import json
 12 | import os
 13 | import random
 14 | import rich
 15 | import sys
 16 | 
 17 | from pathlib import Path
 18 | from sparklines import sparklines
 19 | from swebench.harness.constants import KEY_INSTANCE_ID
 20 | 
 21 | SFT_DIR = Path("trajectories_sft/")
 22 | 
 23 | 
 24 | def merge_and_shuffle_jsonl(
 25 |     max_per_inst: int = 3,
 26 |     output_file: str = None,
 27 |     seed: int = 24,
 28 | ):
 29 |     # List all .jsonl files in expert_trajs/
 30 |     try:
 31 |         all_trajs = sorted([f for f in os.listdir(SFT_DIR) if f.endswith(".jsonl")])
 32 |         print("Select 2+ files to merge:")
 33 |         print("Index | Filename | # Trajectories")
 34 |         for idx, file in enumerate(all_trajs):
 35 |             if file.endswith(".jsonl"):
 36 |                 num_trajs = len(
 37 |                     open(os.path.join(SFT_DIR, file), "r", encoding="utf-8").readlines()
 38 |                 )
 39 |                 print(f"{idx}: {file} ({num_trajs})")
 40 |         selected_indices = input(
 41 |             "Enter the indices of the files to merge (specify indices or range of indices, e.g. `7 11-13`): "
 42 |         )
 43 |         process_idx = (
 44 |             lambda idx: list(range(int(idx.split("-")[0]), int(idx.split("-")[1]) + 1))
 45 |             if "-" in idx
 46 |             else [int(idx.strip())]
 47 |         )
 48 |         selected_indices = [
 49 |             idx for part in selected_indices.split() for idx in process_idx(part)
 50 |         ]
 51 |         files = [os.path.join(SFT_DIR, all_trajs[idx]) for idx in selected_indices]
 52 | 
 53 |         if not output_file:
 54 |             output_file = input("Name of output file (without extension): ") + ".jsonl"
 55 |             output_file = Path(os.path.join(SFT_DIR, output_file))
 56 |         else:
 57 |             output_file = Path(output_file)
 58 |     except KeyboardInterrupt:
 59 |         print("\nExiting...")
 60 |         return
 61 | 
 62 |     # Read all lines from the input JSONL files
 63 |     inst_to_trajs = {}
 64 |     for file in files:
 65 |         try:
 66 |             with open(file, "r", encoding="utf-8") as f:
 67 |                 for traj in f.readlines():
 68 |                     traj = json.loads(traj)
 69 |                     inst_id = traj[KEY_INSTANCE_ID]
 70 |                     if inst_id not in inst_to_trajs:
 71 |                         inst_to_trajs[inst_id] = []
 72 |                     inst_to_trajs[inst_id].append(traj)
 73 |         except FileNotFoundError:
 74 |             print(f"Warning: File not found - {file}", file=sys.stderr)
 75 |         except Exception as e:
 76 |             print(f"Error reading {file}: {e}", file=sys.stderr)
 77 | 
 78 |     all_trajs = []
 79 |     random.seed(seed)
 80 |     bug_types, repo_count = {}, {}
 81 |     for k, v in inst_to_trajs.items():
 82 |         s = min(len(v), max_per_inst)
 83 |         all_trajs.extend(random.sample(v, s))
 84 | 
 85 |         bug_type = k.rsplit(".", 1)[-1].rsplit("_", 1)[0]
 86 |         if bug_type.startswith("func_pm"):
 87 |             bug_type = "func_pm"
 88 |         if bug_type not in bug_types:
 89 |             bug_types[bug_type] = 0
 90 |         bug_types[bug_type] += s
 91 | 
 92 |         repo = k.rsplit(".", 1)[0]
 93 |         if repo not in repo_count:
 94 |             repo_count[repo] = 0
 95 |         repo_count[repo] += s
 96 |     random.shuffle(all_trajs)
 97 |     rich.print(bug_types)
 98 |     rich.print(sparklines(bug_types.values())[0])
 99 | 
100 |     # Write to the output file
101 |     with open(output_file, "w", encoding="utf-8") as f:
102 |         for traj in all_trajs:
103 |             f.write(json.dumps(traj) + "\n")
104 | 
105 |     print(
106 |         f"Merged and shuffled content written to {output_file} ({len(all_trajs)} lines)"
107 |     )
108 | 
109 |     metadata_file = str(output_file.parent / f"metadata__{output_file.stem}.json")
110 |     print(f"Writing metadata to {metadata_file}")
111 |     with open(metadata_file, "w") as f:
112 |         json.dump(
113 |             {
114 |                 "output_file": str(output_file),
115 |                 "num_files": len(files),
116 |                 "num_trajs": len(all_trajs),
117 |                 "max_per_inst": max_per_inst,
118 |                 "bug_types_dist": bug_types,
119 |                 "seed": seed,
120 |                 "files": files,
121 |                 "repo_count": [
122 |                     f"{repo} | {count}"
123 |                     for repo, count in sorted(
124 |                         repo_count.items(), key=lambda x: x[1], reverse=True
125 |                     )
126 |                 ],
127 |             },
128 |             f,
129 |             indent=4,
130 |         )
131 | 
132 | 
133 | if __name__ == "__main__":
134 |     parser = argparse.ArgumentParser(
135 |         description="Merge and shuffle multiple JSONL files."
136 |     )
137 |     parser.add_argument(
138 |         "-m",
139 |         "--max_per_inst",
140 |         type=int,
141 |         default=3,
142 |         help="Max number of trajectories per instance.",
143 |     )
144 |     parser.add_argument("-o", "--output_file", help="Name of the output file.")
145 |     parser.add_argument(
146 |         "-s", "--seed", type=int, default=24, help="Random seed for shuffling."
147 |     )
148 | 
149 |     args = parser.parse_args()
150 |     merge_and_shuffle_jsonl(**vars(args))
151 | 


--------------------------------------------------------------------------------
/swesmith/train/traj_mgr/transform_to_ft.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Given a folder of SWE-agent trajectories, extracts the trajectories
  3 | and transforms them into a fine-tuning compatible format, namely...
  4 | 
  5 | [
  6 |   {
  7 |     "messages": [
  8 |       {
  9 |         "role": "system",
 10 |         "content": "system prompt (optional)"
 11 |       },
 12 |       {
 13 |         "role": "user",
 14 |         "content": "human instruction"
 15 |       },
 16 |       {
 17 |         "role": "assistant",
 18 |         "content": "model response"
 19 |       }
 20 |     ]
 21 |   },
 22 |   ...
 23 | ]
 24 | 
 25 | Usage: (from SWE-agent directory)
 26 | python -m swesmith.train.traj_mgr.transform_to_ft --traj_dir <path> \
 27 |     --eval_dir <path> \
 28 |     --resolved_only
 29 | """
 30 | 
 31 | import argparse
 32 | import json
 33 | import os
 34 | 
 35 | from swesmith.train.traj_mgr.utils import MAP_STYLE_TO_FUNC
 36 | from tqdm.auto import tqdm
 37 | 
 38 | 
 39 | def main(
 40 |     traj_dir: str,
 41 |     eval_dir: str,
 42 |     style: str,
 43 |     only_resolved: bool = False,
 44 |     out_dir: str = ".",
 45 | ):
 46 |     if style not in MAP_STYLE_TO_FUNC:
 47 |         raise ValueError(
 48 |             f"Style {style} not supported. Options: {list(MAP_STYLE_TO_FUNC.keys())}"
 49 |         )
 50 |     transform_traj = MAP_STYLE_TO_FUNC[style]
 51 | 
 52 |     folders = [
 53 |         x for x in os.listdir(traj_dir) if os.path.isdir(os.path.join(traj_dir, x))
 54 |     ]
 55 |     print(f"Found {len(folders)} trajectory folders in {traj_dir}")
 56 | 
 57 |     if only_resolved and os.path.exists(eval_dir):
 58 |         print("Only keeping trajectories for resolved instances")
 59 | 
 60 |     if not os.path.exists(out_dir):
 61 |         from pathlib import Path
 62 | 
 63 |         Path(out_dir).mkdir(parents=True, exist_ok=True)
 64 |     out_path = os.path.join(out_dir, f"ft_{style}_{os.path.basename(eval_dir)}.jsonl")
 65 | 
 66 |     num_trajs = 0
 67 |     with open(out_path, "w") as f:
 68 |         for folder in tqdm(folders):
 69 |             if folder not in os.listdir(eval_dir):
 70 |                 continue
 71 |             if "report.json" not in os.listdir(os.path.join(eval_dir, folder)):
 72 |                 continue
 73 | 
 74 |             if only_resolved:
 75 |                 report_path = os.path.join(eval_dir, folder, "report.json")
 76 |                 report = json.load(open(report_path, "r"))
 77 |                 is_resolved = (
 78 |                     report.get("resolved", False)
 79 |                     if folder not in report
 80 |                     else report[folder].get("resolved", False)
 81 |                 )
 82 |                 if not is_resolved:
 83 |                     continue
 84 | 
 85 |             traj_path = os.path.join(traj_dir, folder, f"{folder}.traj")
 86 |             traj = transform_traj(json.load(open(traj_path, "r")))
 87 |             traj["instance_id"] = folder
 88 |             f.write(json.dumps(traj) + "\n")
 89 |             num_trajs += 1
 90 | 
 91 |     print(f"Found {num_trajs} valid trajectories")
 92 |     print(f"Wrote trajectories to {out_path}")
 93 | 
 94 | 
 95 | if __name__ == "__main__":
 96 |     if False:
 97 |         arg_parser = argparse.ArgumentParser(
 98 |             description="Transform SWE-agent trajectories to fine-tuning format"
 99 |         )
100 |         arg_parser.add_argument(
101 |             "--traj_dir",
102 |             type=str,
103 |             required=True,
104 |             help="Path to folder containing SWE-agent trajectories",
105 |         )
106 |         arg_parser.add_argument(
107 |             "--eval_dir",
108 |             type=str,
109 |             required=True,
110 |             help="Path to folder containing evaluation results",
111 |         )
112 |         arg_parser.add_argument(
113 |             "--style",
114 |             type=str,
115 |             required=False,
116 |             default="xml",
117 |             help="Style of the trajectories",
118 |         )
119 |         arg_parser.add_argument(
120 |             "--only_resolved",
121 |             action="store_true",
122 |             required=False,
123 |             help="Only keep trajectories for resolved instances",
124 |         )
125 |         arg_parser.add_argument(
126 |             "--out_path",
127 |             type=str,
128 |             required=False,
129 |             default=".",
130 |             help="Path to output directory",
131 |         )
132 |         args = arg_parser.parse_args()
133 |         main(**vars(args))
134 | 
135 |     USER = "john-b-yang"
136 |     TRAJS_EXP_PREFIX = "swesmith_gen_"
137 |     PATH_TO_TRAJS = f"trajectories/{USER}/"
138 |     PATH_TO_EVAL_DIR = f"/home/{USER}/swe-smith/logs/run_evaluation/"
139 | 
140 |     for run_id in sorted(os.listdir(PATH_TO_TRAJS)):
141 |         if not run_id.startswith(TRAJS_EXP_PREFIX):
142 |             continue
143 |         traj_dir = os.path.join(PATH_TO_TRAJS, run_id)
144 |         eval_dir = os.path.join(PATH_TO_EVAL_DIR, run_id)
145 |         out_path = f"trajectories_sft/ft_xml_{os.path.basename(eval_dir)}.jsonl"
146 |         if os.path.exists(out_path):
147 |             num = len(open(out_path, "r").readlines())
148 |             print(f"Skipping {out_path} because it already exists ({num} trajs)")
149 |             continue
150 |         print("*" * 20)
151 |         main(
152 |             traj_dir,
153 |             eval_dir,
154 |             style="xml",
155 |             only_resolved=True,
156 |             out_dir="trajectories_sft/",
157 |         )
158 | 


--------------------------------------------------------------------------------
/swesmith/train/traj_mgr/transform_to_ft_list.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python3
 2 | 
 3 | 
 4 | """
 5 | Similar to transform_to_ft.py, but takes a list of paths to trajectories to transform.
 6 | All filtering must have already been done on the list of paths.
 7 | 
 8 | Example usage:
 9 | python transform_to_ft_list.py --traj_list traj_list.json --out_path ft_list.jsonl
10 | 
11 | See transform_to_ft.py for more details on the format of the output.
12 | """
13 | 
14 | import argparse
15 | import json
16 | 
17 | from pathlib import Path
18 | from swesmith.train.traj_mgr.utils import transform_traj_xml
19 | from tqdm.auto import tqdm
20 | 
21 | 
22 | def main(traj_list_file: Path, out_path: Path) -> None:
23 |     traj_paths: list[str] = json.loads(Path(traj_list_file).read_text())
24 |     print(f"Transforming {len(traj_paths)} trajectories")
25 |     out_path.parent.mkdir(parents=True, exist_ok=True)
26 |     with open(out_path, "w") as f:
27 |         for traj_path in tqdm(traj_paths):
28 |             traj = json.loads(Path(traj_path).read_text())
29 |             traj_xml = transform_traj_xml(traj)
30 |             f.write(json.dumps(traj_xml) + "\n")
31 | 
32 |     print(f"Wrote {len(traj_paths)} trajectories to {out_path}")
33 | 
34 | 
35 | if __name__ == "__main__":
36 |     parser = argparse.ArgumentParser()
37 |     parser.add_argument(
38 |         "--traj_list",
39 |         type=Path,
40 |         required=True,
41 |         help="Path to file containing list of trajectories to transform",
42 |     )
43 |     parser.add_argument(
44 |         "--out_path", type=Path, required=True, help="Path to output .jsonlfile"
45 |     )
46 |     args = parser.parse_args()
47 |     main(traj_list_file=args.traj_list, out_path=args.out_path)
48 | 


--------------------------------------------------------------------------------
/swesmith/train/traj_mgr/utils.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import yaml
 3 | 
 4 | XML_STR_REPLACES = ["old_str", "new_str", "file_text"]
 5 | 
 6 | 
 7 | # TODO: Fix this, this is hardcoded, so will break if not called from root of a directory
 8 | SYSTEM_PROMPT = yaml.safe_load(open("agent/swesmith_infer.yaml", "r"))["agent"][
 9 |     "templates"
10 | ]["system_template"]
11 | 
12 | 
13 | def transform_traj_backticks(traj: dict) -> dict:
14 |     new_traj = []
15 |     for message in traj["trajectory"][-1]["messages"][:-1]:
16 |         # Pick out the last message b/c it contains full trajectory
17 |         # Also, skip the last message b/c it's just the patch output (post-submit)
18 |         role = message["role"] if message["role"] != "tool" else "user"
19 |         if message["role"] == "assistant":
20 |             content = f"{message['thought']}\n\n```\n{message['action']}\n```"
21 |         elif message["role"] == "system":
22 |             content = message["content"]
23 |         else:
24 |             assert len(message["content"]) == 1
25 |             content = message["content"][0]["text"]
26 |         new_traj.append({"role": role, "content": content})
27 |     return {"messages": new_traj}
28 | 
29 | 
30 | def transform_traj_xml(traj: dict) -> dict:
31 |     def tool_call_to_action(tool_calls):
32 |         actions = []
33 |         if tool_calls is None:
34 |             return []
35 |         for tool_call in tool_calls:
36 |             action = [f"<function={tool_call['function']['name']}>"]
37 |             arguments = json.loads(tool_call["function"]["arguments"])
38 |             for k, v in arguments.items():
39 |                 a = f"<parameter={k}>{v}</parameter>"
40 |                 if k in XML_STR_REPLACES:
41 |                     a = f"<parameter={k}>\n{v}\n</parameter>"
42 |                 action.append(a)
43 |             action.append("</function>")
44 |             actions.append("\n".join(action))
45 |         return actions
46 | 
47 |     new_traj = []
48 |     messages = traj["trajectory"][-1]["messages"][:-1]
49 |     for message in messages:
50 |         role = message["role"] if message["role"] != "tool" else "user"
51 |         if message["role"] == "assistant":
52 |             if message["content"] == "Exit due to cost limit":
53 |                 content = (
54 |                     "Since we have successfully fixed the issue and verified it works, "
55 |                     + "let's submit the changes:\n\n"
56 |                     + "<function=submit>\n</function>"
57 |                 )
58 |             else:
59 |                 action = "\n".join(tool_call_to_action(message["tool_calls"]))
60 |                 content = f"{message['thought']}\n\n{action}"
61 |         elif message["role"] == "system":
62 |             content = SYSTEM_PROMPT
63 |         else:
64 |             if isinstance(message["content"], list):
65 |                 assert len(message["content"]) == 1
66 |                 content = message["content"][0]["text"]
67 |             elif isinstance(message["content"], str):
68 |                 content = message["content"]
69 |             else:
70 |                 raise ValueError(f"Message type not recognized: {type(message)}")
71 |         new_traj.append({"role": role, "content": content})
72 |     return {"messages": new_traj}
73 | 
74 | 
75 | MAP_STYLE_TO_FUNC = {"ticks": transform_traj_backticks, "xml": transform_traj_xml}
76 | 


--------------------------------------------------------------------------------
/swesmith/utils.py:
--------------------------------------------------------------------------------
  1 | import hashlib
  2 | import os
  3 | import platform
  4 | import random
  5 | import string
  6 | import subprocess
  7 | 
  8 | from ghapi.all import GhApi
  9 | from pathlib import Path
 10 | from swesmith.constants import MAP_REPO_TO_SPECS, ORG_NAME, LOG_DIR_ENV_RECORDS
 11 | 
 12 | 
 13 | def get_arch_and_platform() -> tuple[str, str]:
 14 |     """
 15 |     Get the architecture and platform for the current machine.
 16 |     """
 17 |     arch = "x86_64" if platform.machine() not in {"aarch64", "arm64"} else "arm64"
 18 |     if arch == "x86_64":
 19 |         pltf = "linux/x86_64"
 20 |     elif arch == "arm64":
 21 |         pltf = "linux/arm64/v8"
 22 |     else:
 23 |         raise ValueError(f"Invalid architecture: {arch}")
 24 |     return arch, pltf
 25 | 
 26 | 
 27 | def get_image_name(repo: str, commit: str, arch: str | None = None) -> str:
 28 |     """
 29 |     Get the docker image ID for a repository at a specific commit.
 30 |     """
 31 |     arch = arch or get_arch_and_platform()[0]
 32 |     return f"swesmith.{arch}.{repo.replace('/', '__').lower()}.{commit[:8]}"
 33 | 
 34 | 
 35 | def get_repo_commit_from_image_name(image_name: str) -> tuple[str, str]:
 36 |     """
 37 |     Get the repository and commit from a docker image ID.
 38 |     """
 39 |     # Parsing supports repos with '.' in their name
 40 |     image_name = image_name.split(".", 2)[-1]
 41 |     repo = image_name.rsplit(".", 1)[0].replace("__", "/")
 42 |     partial_commit = image_name.rsplit(".", 1)[-1]
 43 |     for repo_name in MAP_REPO_TO_SPECS:
 44 |         # Hack because docker image_name must be lowercase
 45 |         if repo_name.lower() == repo:
 46 |             repo = repo_name
 47 |             break
 48 |     commit = get_full_commit(repo, partial_commit)
 49 |     return repo, commit
 50 | 
 51 | 
 52 | def get_env_yml_path(repo: str, commit: str) -> str:
 53 |     """
 54 |     Get the path to the environment.yml file for a repository at a specific commit.
 55 |     """
 56 |     if len(commit) != 40:
 57 |         raise ValueError(
 58 |             f"Must provide full commit hash, not partial commit ({commit})"
 59 |         )
 60 |     return f"{LOG_DIR_ENV_RECORDS}/sweenv_{repo.replace('/', '__')}_{commit}.yml"
 61 | 
 62 | 
 63 | def get_full_commit(repo, partial_commit) -> str:
 64 |     """
 65 |     Get the full commit hash for a repository at a specific commit.
 66 |     """
 67 |     for commit in MAP_REPO_TO_SPECS[repo]:
 68 |         if commit.startswith(partial_commit):
 69 |             return commit
 70 | 
 71 |     raise ValueError(f"Commit {partial_commit} not found for repository {repo}.")
 72 | 
 73 | 
 74 | def get_repo_name(repo, commit) -> str:
 75 |     """
 76 |     Get the SWE-smith GitHub repository name for a repository at a specific commit.
 77 |     """
 78 |     return f"{repo.replace('/', '__')}.{commit[:8]}"
 79 | 
 80 | 
 81 | def clone_repo(repo: str, dest: str | None = None, org: str = ORG_NAME) -> str | None:
 82 |     """Clone a repository from GitHub."""
 83 |     if not os.path.exists(dest or repo):
 84 |         clone_cmd = (
 85 |             f"git clone git@github.com:{org}/{repo}.git"
 86 |             if dest is None
 87 |             else f"git clone git@github.com:{org}/{repo}.git {dest}"
 88 |         )
 89 |         subprocess.run(
 90 |             clone_cmd,
 91 |             check=True,
 92 |             shell=True,
 93 |             stdout=subprocess.DEVNULL,
 94 |             stderr=subprocess.DEVNULL,
 95 |         )
 96 |         return repo if dest is None else dest
 97 |     return None
 98 | 
 99 | 
100 | def generate_hash(s):
101 |     return "".join(
102 |         random.Random(int(hashlib.sha256(s.encode()).hexdigest(), 16)).choices(
103 |             string.ascii_lowercase + string.digits, k=8
104 |         )
105 |     )
106 | 
107 | 
108 | def get_test_paths(dir_path: str, ext: str = ".py") -> list[Path]:
109 |     """
110 |     Get all testing file paths relative to the given directory.
111 |     """
112 |     return [
113 |         Path(os.path.relpath(os.path.join(root, file), dir_path))
114 |         for root, _, files in os.walk(Path(dir_path).resolve())
115 |         for file in files
116 |         if (
117 |             (
118 |                 any([x in root.split("/") for x in ["tests", "test", "specs"]])
119 |                 or file.lower().startswith("test")
120 |                 or file.rsplit(".", 1)[0].endswith("test")
121 |             )
122 |             and (ext is None or file.endswith(ext))
123 |         )
124 |     ]
125 | 
126 | 
127 | def does_repo_exist(repo: str) -> bool:
128 |     """
129 |     Check if a repository exists in project organization.
130 |     """
131 |     GITHUB_TOKEN = os.getenv("GITHUB_TOKEN")
132 |     api = GhApi(token=GITHUB_TOKEN)
133 |     org_repos = [
134 |         x["name"]
135 |         for page in range(1, 3)
136 |         for x in api.repos.list_for_org(ORG_NAME, per_page=100, page=page)  # type: ignore
137 |     ]
138 |     return repo in org_repos
139 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SWE-bench/SWE-smith/cfb6cf29568d0841ce15620c96ec795243b229fa/tests/__init__.py


--------------------------------------------------------------------------------
/tests/bug_gen/llm/test_utils_llm.py:
--------------------------------------------------------------------------------
 1 | import ast
 2 | from swesmith.bug_gen.llm.utils import extract_code_block, get_function_signature
 3 | 
 4 | 
 5 | def parse_func(code):
 6 |     return ast.parse(code).body[0]
 7 | 
 8 | 
 9 | def test_extract_code_block_basic():
10 |     text = """
11 |     Here is some code:
12 |     ```python\nprint('hello')\n```
13 |     """
14 |     assert extract_code_block(text) == "print('hello')"
15 | 
16 | 
17 | def test_extract_code_block_no_language():
18 |     text = """
19 |     Example:
20 |     ```\nfoo = 1\nbar = 2\n```
21 |     """
22 |     assert extract_code_block(text) == "foo = 1\nbar = 2"
23 | 
24 | 
25 | def test_extract_code_block_no_block():
26 |     text = "No code block here."
27 |     assert extract_code_block(text) == ""
28 | 
29 | 
30 | def test_extract_code_block_multiple_blocks():
31 |     text = """
32 |     ```python\nfirst = True\n```
33 |     Some text
34 |     ```python\nsecond = False\n```
35 |     """
36 |     # Should extract only the first block
37 |     assert extract_code_block(text) == "first = True"
38 | 
39 | 
40 | def test_extract_code_block_strip_whitespace():
41 |     text = """
42 |     ```\n   a = 1\n   b = 2   \n\n```
43 |     """
44 |     assert extract_code_block(text) == "a = 1\n   b = 2"
45 | 
46 | 
47 | def test_get_function_signature_simple():
48 |     node = parse_func("def foo(a, b): pass")
49 |     assert get_function_signature(node) == "def foo(a, b)"
50 | 
51 | 
52 | def test_get_function_signature_no_args():
53 |     node = parse_func("def bar(): pass")
54 |     assert get_function_signature(node) == "def bar()"
55 | 
56 | 
57 | def test_get_function_signature_with_defaults():
58 |     node = parse_func("def baz(a, b=2): pass")
59 |     assert get_function_signature(node) == "def baz(a, b)"
60 | 
61 | 
62 | def test_get_function_signature_varargs():
63 |     node = parse_func("def qux(*args, **kwargs): pass")
64 |     assert get_function_signature(node) == "def qux()"
65 | 
66 | 
67 | def test_get_function_signature_annotations():
68 |     node = parse_func("def annotated(a: int, b: str) -> None: pass")
69 |     assert get_function_signature(node) == "def annotated(a: int, b: str)"
70 | 


--------------------------------------------------------------------------------
/tests/bug_gen/procedural/test_classes.py:
--------------------------------------------------------------------------------
  1 | import libcst
  2 | import pytest
  3 | from swesmith.bug_gen.procedural.classes import (
  4 |     ClassRemoveBasesModifier,
  5 |     ClassShuffleMethodsModifier,
  6 |     ClassRemoveFuncsModifier,
  7 | )
  8 | 
  9 | 
 10 | @pytest.mark.parametrize(
 11 |     "src,expected_variants",
 12 |     [
 13 |         # Remove single base
 14 |         (
 15 |             """
 16 | class Foo(Bar):
 17 |     pass
 18 | """,
 19 |             [
 20 |                 "class Foo():\n    pass",
 21 |             ],
 22 |         ),
 23 |         # Remove one of multiple bases
 24 |         (
 25 |             """
 26 | class Foo(Bar, Baz):
 27 |     pass
 28 | """,
 29 |             [
 30 |                 "class Foo(Bar):\n    pass",
 31 |                 "class Foo(Baz):\n    pass",
 32 |                 "class Foo():\n    pass",
 33 |             ],
 34 |         ),
 35 |     ],
 36 | )
 37 | def test_class_remove_bases(src, expected_variants):
 38 |     module = libcst.parse_module(src)
 39 |     modifier = ClassRemoveBasesModifier(likelihood=1.0, seed=42)
 40 |     modified = module.visit(modifier)
 41 |     assert any(
 42 |         modified.code.strip() == variant.strip() for variant in expected_variants
 43 |     )
 44 | 
 45 | 
 46 | @pytest.mark.parametrize(
 47 |     "src,expected_variants",
 48 |     [
 49 |         # Shuffle two methods
 50 |         (
 51 |             """class Foo:
 52 |     def a(self):
 53 |         pass
 54 |     def b(self):
 55 |         pass
 56 | """,
 57 |             [
 58 |                 "class Foo:\n    def a(self):\n        pass\n    def b(self):\n        pass",
 59 |                 "class Foo:\n    def b(self):\n        pass\n    def a(self):\n        pass",
 60 |             ],
 61 |         ),
 62 |         # No shuffle if only one method
 63 |         (
 64 |             """class Bar:
 65 |     def a(self):
 66 |         pass
 67 | """,
 68 |             [
 69 |                 "class Bar:\n    def a(self):\n        pass",
 70 |             ],
 71 |         ),
 72 |     ],
 73 | )
 74 | def test_class_shuffle_methods(src, expected_variants):
 75 |     module = libcst.parse_module(src)
 76 |     modifier = ClassShuffleMethodsModifier(likelihood=1.0, seed=42)
 77 |     modified = module.visit(modifier)
 78 |     assert any(
 79 |         modified.code.strip() == variant.strip() for variant in expected_variants
 80 |     )
 81 | 
 82 | 
 83 | @pytest.mark.parametrize(
 84 |     "src,expected_variants",
 85 |     [
 86 |         # Remove a method and its reference
 87 |         (
 88 |             """class Foo:
 89 |     def a(self):
 90 |         pass
 91 |     def b(self):
 92 |         self.a()
 93 |         return 1
 94 | """,
 95 |             [
 96 |                 # Only b remains, and self.a() is replaced with None
 97 |                 "class Foo:\n    def b(self):\n        None\n        return 1\n",
 98 |                 # Only a remains
 99 |                 "class Foo:\n    def a(self):\n        pass\n",
100 |                 # Both removed
101 |                 "class Foo:\n    pass\n",
102 |             ],
103 |         ),
104 |         # Remove both methods
105 |         (
106 |             """class Bar:
107 |     def a(self):
108 |         pass
109 |     def b(self):
110 |         pass
111 | """,
112 |             [
113 |                 "class Bar:\n    pass\n",
114 |                 "class Bar:\n\n",
115 |             ],
116 |         ),
117 |         # No removal if no methods
118 |         (
119 |             """class Baz:
120 |     x = 1
121 | """,
122 |             [
123 |                 "class Baz:\n    x = 1\n",
124 |             ],
125 |         ),
126 |     ],
127 | )
128 | def test_class_remove_funcs(src, expected_variants):
129 |     module = libcst.parse_module(src)
130 |     modifier = ClassRemoveFuncsModifier(likelihood=1.0, seed=42)
131 |     modified = module.visit(modifier)
132 |     assert any(
133 |         modified.code.strip() == variant.strip() for variant in expected_variants
134 |     )
135 | 


--------------------------------------------------------------------------------
/tests/bug_gen/procedural/test_control_flow.py:
--------------------------------------------------------------------------------
 1 | import libcst
 2 | import pytest
 3 | from swesmith.bug_gen.procedural.control_flow import (
 4 |     ControlIfElseInvertModifier,
 5 |     ControlShuffleLinesModifier,
 6 | )
 7 | 
 8 | 
 9 | @pytest.mark.parametrize(
10 |     "src,expected",
11 |     [
12 |         # Simple if-else inversion
13 |         (
14 |             """
15 | def foo(x):
16 |     if x > 0:
17 |         return 1
18 |     else:
19 |         return -1
20 | """,
21 |             """def foo(x):
22 |     if x > 0:
23 |         return -1
24 |     else:
25 |         return 1
26 | """,
27 |         ),
28 |         # No else branch, should not change
29 |         (
30 |             """
31 | def bar(x):
32 |     if x == 0:
33 |         return 0
34 | """,
35 |             """def bar(x):
36 |     if x == 0:
37 |         return 0
38 | """,
39 |         ),
40 |     ],
41 | )
42 | def test_control_if_else_invert(src, expected):
43 |     module = libcst.parse_module(src)
44 |     modifier = ControlIfElseInvertModifier(likelihood=1.0, seed=42)
45 |     modified = module.visit(modifier)
46 |     assert modified.code.strip() == expected.strip()
47 | 
48 | 
49 | @pytest.mark.parametrize(
50 |     "src,expected_variants",
51 |     [
52 |         # Function with two statements to shuffle
53 |         (
54 |             """
55 | def foo():
56 |     a = 1
57 |     b = 2
58 | """,
59 |             [
60 |                 "def foo():\n    a = 1\n    b = 2\n",
61 |                 "def foo():\n    b = 2\n    a = 1\n",
62 |             ],
63 |         ),
64 |         # Function with only one statement, should not change
65 |         (
66 |             """
67 | def bar():
68 |     x = 42
69 | """,
70 |             [
71 |                 "def bar():\n    x = 42\n",
72 |             ],
73 |         ),
74 |     ],
75 | )
76 | def test_control_shuffle_lines(src, expected_variants):
77 |     module = libcst.parse_module(src)
78 |     modifier = ControlShuffleLinesModifier(likelihood=1.0, seed=42)
79 |     modified = module.visit(modifier)
80 |     assert any(
81 |         modified.code.strip() == variant.strip() for variant in expected_variants
82 |     )
83 | 


--------------------------------------------------------------------------------
/tests/bug_gen/procedural/test_operations.py:
--------------------------------------------------------------------------------
  1 | import libcst
  2 | import pytest
  3 | from swesmith.bug_gen.procedural.operations import (
  4 |     OperationBreakChainsModifier,
  5 |     OperationChangeConstantsModifier,
  6 | )
  7 | 
  8 | 
  9 | @pytest.mark.parametrize(
 10 |     "src,expected_variants",
 11 |     [
 12 |         # Case 1: left is a BinaryOperation
 13 |         (
 14 |             """
 15 | def foo(a, b, c):
 16 |     return a + b + c
 17 | """,
 18 |             [
 19 |                 "def foo(a, b, c):\n    return a + c\n",
 20 |                 "def foo(a, b, c):\n    return a + b\n",
 21 |             ],
 22 |         ),
 23 |         # Case 2: right is a BinaryOperation
 24 |         (
 25 |             """
 26 | def bar(x, y, z):
 27 |     return x * (y * z)
 28 | """,
 29 |             [
 30 |                 "def bar(x, y, z):\n    return x * z\n",
 31 |             ],
 32 |         ),
 33 |         # Case 3: no BinaryOperation, should not change
 34 |         (
 35 |             """
 36 | def baz(x):
 37 |     return x + 1
 38 | """,
 39 |             [
 40 |                 "def baz(x):\n    return x + 1\n",
 41 |             ],
 42 |         ),
 43 |         # Case 4: multiple BinaryOperations, should break one chain
 44 |         (
 45 |             """
 46 | def qux(a, b, c, d):
 47 |     return a + b + c * d
 48 | """,
 49 |             [
 50 |                 "def qux(a, b, c, d):\n    return a + (c * d)\n",
 51 |                 "def qux(a, b, c, d):\n    return (a + b) + c\n",
 52 |                 "def qux(a, b, c, d):\n    return a + c * d\n",
 53 |                 "def qux(a, b, c, d):\n    return (a + b) + d\n",
 54 |             ],
 55 |         ),
 56 |     ],
 57 | )
 58 | def test_operation_break_chains(src, expected_variants):
 59 |     module = libcst.parse_module(src)
 60 |     modifier = OperationBreakChainsModifier(likelihood=0.5, seed=42)  # deterministic
 61 |     modified = module.visit(modifier)
 62 |     result = modified.code
 63 |     assert any(result.strip() == variant.strip() for variant in expected_variants), (
 64 |         f"Got: {result!r}, expected one of: {expected_variants!r}"
 65 |     )
 66 | 
 67 | 
 68 | @pytest.mark.parametrize(
 69 |     "src,expected_variants",
 70 |     [
 71 |         # Case 1: left is an integer constant
 72 |         (
 73 |             """
 74 | def foo():
 75 |     return 2 + x
 76 | """,
 77 |             [
 78 |                 "def foo():\n    return 1 + x\n",
 79 |                 "def foo():\n    return 3 + x\n",
 80 |             ],
 81 |         ),
 82 |         # Case 2: right is an integer constant
 83 |         (
 84 |             """
 85 | def bar():
 86 |     return y - 5
 87 | """,
 88 |             [
 89 |                 "def bar():\n    return y - 4\n",
 90 |                 "def bar():\n    return y - 6\n",
 91 |             ],
 92 |         ),
 93 |         # Case 3: both sides are integer constants
 94 |         (
 95 |             """
 96 | def baz():
 97 |     return 10 * 20
 98 | """,
 99 |             [
100 |                 "def baz():\n    return 9 * 19\n",
101 |                 "def baz():\n    return 9 * 21\n",
102 |                 "def baz():\n    return 11 * 19\n",
103 |                 "def baz():\n    return 11 * 21\n",
104 |             ],
105 |         ),
106 |         # Case 4: no integer constants, should not change
107 |         (
108 |             """
109 | def qux(a, b):
110 |     return a / b
111 | """,
112 |             [
113 |                 "def qux(a, b):\n    return a / b\n",
114 |             ],
115 |         ),
116 |     ],
117 | )
118 | def test_operation_change_constants(src, expected_variants):
119 |     module = libcst.parse_module(src)
120 |     modifier = OperationChangeConstantsModifier(likelihood=1.0, seed=42)  # always flip
121 |     modified = module.visit(modifier)
122 |     result = modified.code
123 |     assert any(result.strip() == variant.strip() for variant in expected_variants), (
124 |         f"Got: {result!r}, expected one of: {expected_variants!r}"
125 |     )
126 | 


--------------------------------------------------------------------------------
/tests/bug_gen/procedural/test_remove.py:
--------------------------------------------------------------------------------
  1 | import libcst
  2 | import pytest
  3 | from swesmith.bug_gen.procedural.remove import (
  4 |     RemoveLoopModifier,
  5 |     RemoveConditionalModifier,
  6 |     RemoveAssignModifier,
  7 |     RemoveWrapperModifier,
  8 | )
  9 | 
 10 | 
 11 | @pytest.mark.parametrize(
 12 |     "src,expected",
 13 |     [
 14 |         # Remove for loop
 15 |         (
 16 |             """
 17 | def foo():
 18 |     for i in range(3):
 19 |         print(i)
 20 |     return 1
 21 | """,
 22 |             """def foo():
 23 |     return 1
 24 | """,
 25 |         ),
 26 |         # Remove while loop
 27 |         (
 28 |             """
 29 | def bar():
 30 |     while True:
 31 |         break
 32 |     return 2
 33 | """,
 34 |             """def bar():
 35 |     return 2
 36 | """,
 37 |         ),
 38 |     ],
 39 | )
 40 | def test_remove_loop(src, expected):
 41 |     module = libcst.parse_module(src)
 42 |     modifier = RemoveLoopModifier(likelihood=1.0, seed=42)
 43 |     modified = module.visit(modifier)
 44 |     assert modified.code.strip() == expected.strip()
 45 | 
 46 | 
 47 | @pytest.mark.parametrize(
 48 |     "src,expected",
 49 |     [
 50 |         # Remove if statement
 51 |         (
 52 |             """
 53 | def foo(x):
 54 |     if x > 0:
 55 |         return x
 56 |     return 0
 57 | """,
 58 |             """def foo(x):
 59 |     return 0
 60 | """,
 61 |         ),
 62 |         # If with else, remove whole if
 63 |         (
 64 |             """
 65 | def bar(x):
 66 |     if x < 0:
 67 |         return -1
 68 |     else:
 69 |         return 1
 70 | """,
 71 |             """def bar(x):
 72 |     pass
 73 | """,
 74 |         ),
 75 |     ],
 76 | )
 77 | def test_remove_conditional(src, expected):
 78 |     module = libcst.parse_module(src)
 79 |     modifier = RemoveConditionalModifier(likelihood=1.0, seed=42)
 80 |     modified = module.visit(modifier)
 81 |     assert modified.code.strip() == expected.strip()
 82 | 
 83 | 
 84 | @pytest.mark.parametrize(
 85 |     "src,expected",
 86 |     [
 87 |         # Remove assignment
 88 |         (
 89 |             """
 90 | def foo():
 91 |     x = 1
 92 |     return x
 93 | """,
 94 |             """def foo():
 95 |     return x
 96 | """,
 97 |         ),
 98 |         # Remove augmented assignment
 99 |         (
100 |             """
101 | def bar():
102 |     y = 2
103 |     y += 3
104 |     return y
105 | """,
106 |             """def bar():
107 |     return y
108 | """,
109 |         ),
110 |     ],
111 | )
112 | def test_remove_assign(src, expected):
113 |     module = libcst.parse_module(src)
114 |     modifier = RemoveAssignModifier(likelihood=1.0, seed=42)
115 |     modified = module.visit(modifier)
116 |     assert modified.code.strip() == expected.strip()
117 | 
118 | 
119 | @pytest.mark.parametrize(
120 |     "src,expected",
121 |     [
122 |         # Remove with block
123 |         (
124 |             """
125 | def foo():
126 |     with open('f') as f:
127 |         data = f.read()
128 |     return 1
129 | """,
130 |             """def foo():
131 |     return 1
132 | """,
133 |         ),
134 |         # Remove try block
135 |         (
136 |             """
137 | def bar():
138 |     try:
139 |         x = 1
140 |     except Exception:
141 |         x = 2
142 |     return x
143 | """,
144 |             """def bar():
145 |     return x
146 | """,
147 |         ),
148 |     ],
149 | )
150 | def test_remove_wrapper(src, expected):
151 |     module = libcst.parse_module(src)
152 |     modifier = RemoveWrapperModifier(likelihood=1.0, seed=42)
153 |     modified = module.visit(modifier)
154 |     assert modified.code.strip() == expected.strip()
155 | 


--------------------------------------------------------------------------------
/tests/bug_gen/test_utils.py:
--------------------------------------------------------------------------------
  1 | import ast
  2 | import os
  3 | import shutil
  4 | import tempfile
  5 | import unittest
  6 | 
  7 | from swesmith.bug_gen import utils
  8 | 
  9 | 
 10 | class TestUtils(unittest.TestCase):
 11 |     def setUp(self):
 12 |         self.test_dir = tempfile.mkdtemp()
 13 |         self.test_file = os.path.join(self.test_dir, "test.py")
 14 |         with open(self.test_file, "w") as f:
 15 |             f.write("""
 16 | def foo():
 17 |     return 1
 18 | 
 19 | class Bar:
 20 |     def baz(self):
 21 |         return 2
 22 | """)
 23 | 
 24 |     def tearDown(self):
 25 |         shutil.rmtree(self.test_dir)
 26 | 
 27 |     def test_apply_code_change(self):
 28 |         # Setup CodeEntity and BugRewrite
 29 |         node = ast.parse(open(self.test_file).read()).body[0]
 30 |         entity = utils.get_entity_from_node(
 31 |             node, open(self.test_file).read(), self.test_file
 32 |         )
 33 |         bug = utils.BugRewrite(
 34 |             rewrite="def foo():\n    return 42\n",
 35 |             explanation="change return",
 36 |             strategy="test",
 37 |         )
 38 |         utils.apply_code_change(entity, bug)
 39 |         with open(self.test_file) as f:
 40 |             content = f.read()
 41 |         self.assertIn("return 42", content)
 42 | 
 43 |     def test_apply_patches(self):
 44 |         # Create a git repo and patch file
 45 |         repo = tempfile.mkdtemp()
 46 |         subprocess = __import__("subprocess")
 47 |         subprocess.run(["git", "init"], cwd=repo, check=True, stdout=subprocess.DEVNULL)
 48 |         test_file = os.path.join(repo, "a.py")
 49 |         with open(test_file, "w") as f:
 50 |             f.write("print('hi')\n")
 51 |         for cmd in [
 52 |             "git branch -m main",
 53 |             "git add a.py",
 54 |             'git config user.email "you@example.com"',
 55 |             'git config user.name "Your Name"',
 56 |             "git commit -m init",
 57 |         ]:
 58 |             subprocess.run(
 59 |                 cmd.split(),
 60 |                 cwd=repo,
 61 |                 check=True,
 62 |                 stdout=subprocess.DEVNULL,
 63 |             )
 64 |         with open(test_file, "w") as f:
 65 |             f.write("print('bye')\n")
 66 |         patch = utils.get_patch(repo)
 67 |         patch_file = os.path.join(self.test_dir, "patch.diff")
 68 |         print(patch)
 69 |         with open(patch_file, "w") as f:
 70 |             f.write(patch)
 71 |         # Reset rep o before applying patch
 72 |         subprocess.run(
 73 |             ["git", "reset", "--hard"], cwd=repo, check=True, stdout=subprocess.DEVNULL
 74 |         )
 75 |         subprocess.run(
 76 |             ["git", "clean", "-fd"], cwd=repo, check=True, stdout=subprocess.DEVNULL
 77 |         )
 78 |         # Apply the patch
 79 |         result = utils.apply_patches(repo, [patch_file])
 80 |         self.assertIsInstance(result, str)
 81 |         shutil.rmtree(repo)
 82 | 
 83 |     def test_extract_entities_from_directory(self):
 84 |         entities = utils.extract_entities_from_directory(
 85 |             self.test_dir, "func", exclude_tests=False
 86 |         )
 87 |         self.assertTrue(any(e.src_code.startswith("def foo") for e in entities))
 88 |         entities = utils.extract_entities_from_directory(
 89 |             self.test_dir, "class", exclude_tests=False
 90 |         )
 91 |         self.assertTrue(any("class Bar" in e.src_code for e in entities))
 92 | 
 93 |     def test_get_combos(self):
 94 |         items = [1, 2, 3]
 95 |         combos = utils.get_combos(items, 2, 2)
 96 |         self.assertEqual(len(combos), 2)
 97 |         self.assertTrue(all(len(c) >= 2 for c in combos))
 98 | 
 99 |     def test_get_entity_from_node(self):
100 |         with open(self.test_file) as f:
101 |             content = f.read()
102 |         tree = ast.parse(content)
103 |         node = tree.body[0]
104 |         entity = utils.get_entity_from_node(node, content, self.test_file)
105 |         self.assertEqual(entity.line_start, 2)
106 |         self.assertIn("def foo", entity.src_code)
107 | 
108 |     def test_get_patch(self):
109 |         repo = tempfile.mkdtemp()
110 |         subprocess = __import__("subprocess")
111 |         subprocess.run(["git", "init"], cwd=repo, check=True, stdout=subprocess.DEVNULL)
112 |         test_file = os.path.join(repo, "b.py")
113 |         with open(test_file, "w") as f:
114 |             f.write("print('hi')\n")
115 |         for cmd in [
116 |             "git add b.py",
117 |             'git config user.email "you@example.com"',
118 |             'git config user.name "Your Name"',
119 |             "git commit -m init",
120 |         ]:
121 |             subprocess.run(
122 |                 cmd.split(),
123 |                 cwd=repo,
124 |                 check=True,
125 |                 stdout=subprocess.DEVNULL,
126 |             )
127 |         with open(test_file, "w") as f:
128 |             f.write("print('bye')\n")
129 |         patch = utils.get_patch(repo)
130 |         self.assertIsInstance(patch, str)
131 |         shutil.rmtree(repo)
132 | 
133 | 
134 | if __name__ == "__main__":
135 |     unittest.main()
136 | 


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Common pytest fixtures and configuration for SWE-smith tests.
 3 | """
 4 | 
 5 | import os
 6 | import sys
 7 | 
 8 | # Add the repository root to the Python path to ensure imports work correctly
 9 | repo_root = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
10 | if repo_root not in sys.path:
11 |     sys.path.insert(0, repo_root)
12 | 


--------------------------------------------------------------------------------
/tests/test_utils.py:
--------------------------------------------------------------------------------
  1 | from swesmith.utils import *
  2 | from unittest.mock import patch
  3 | 
  4 | 
  5 | def test_get_repo_commit_from_image_name():
  6 |     image_name = "swesmith.x86_64.instagram__monkeytype.70c3acf6"
  7 |     repo, commit = get_repo_commit_from_image_name(image_name)
  8 |     assert repo == "Instagram/MonkeyType"
  9 |     assert commit == "70c3acf62950be5dfb28743c7a719bfdecebcd84"
 10 | 
 11 | 
 12 | def test_get_repo_name():
 13 |     repo = "Instagram/MonkeyType"
 14 |     commit = "70c3acf62950be5dfb28743c7a719bfdecebcd84"
 15 |     image_name = get_image_name(repo, commit)
 16 |     assert image_name == "swesmith.x86_64.instagram__monkeytype.70c3acf6"
 17 | 
 18 | 
 19 | def test_get_full_commit():
 20 |     repo = "Instagram/MonkeyType"
 21 |     partial_commit = "70c3acf6"
 22 |     full_commit = get_full_commit(repo, partial_commit)
 23 |     assert full_commit == "70c3acf62950be5dfb28743c7a719bfdecebcd84"
 24 | 
 25 | 
 26 | def test_clone_repo():
 27 |     repo = "TestRepo"
 28 |     dest = None
 29 |     org = "TestOrg"
 30 |     expected_cmd = f"git clone git@github.com:{org}/{repo}.git"
 31 |     with (
 32 |         patch("os.path.exists", return_value=False) as mock_exists,
 33 |         patch("subprocess.run") as mock_run,
 34 |     ):
 35 |         result = clone_repo(repo, dest, org)
 36 |         mock_exists.assert_called_once_with(repo)
 37 |         mock_run.assert_called_once_with(
 38 |             expected_cmd,
 39 |             check=True,
 40 |             shell=True,
 41 |             stdout=subprocess.DEVNULL,
 42 |             stderr=subprocess.DEVNULL,
 43 |         )
 44 |         assert result == repo
 45 | 
 46 |     # Test with dest specified
 47 |     dest = "some_dir"
 48 |     expected_cmd = f"git clone git@github.com:{org}/{repo}.git {dest}"
 49 |     with (
 50 |         patch("os.path.exists", return_value=False) as mock_exists,
 51 |         patch("subprocess.run") as mock_run,
 52 |     ):
 53 |         result = clone_repo(repo, dest, org)
 54 |         mock_exists.assert_called_once_with(dest)
 55 |         mock_run.assert_called_once_with(
 56 |             expected_cmd,
 57 |             check=True,
 58 |             shell=True,
 59 |             stdout=subprocess.DEVNULL,
 60 |             stderr=subprocess.DEVNULL,
 61 |         )
 62 |         assert result == dest
 63 | 
 64 |     # Test when repo already exists
 65 |     with (
 66 |         patch("os.path.exists", return_value=True) as mock_exists,
 67 |         patch("subprocess.run") as mock_run,
 68 |     ):
 69 |         result = clone_repo(repo, dest, org)
 70 |         mock_exists.assert_called_once_with(dest)
 71 |         mock_run.assert_not_called()
 72 |         assert result is None
 73 | 
 74 | 
 75 | def test_get_test_paths(tmp_path):
 76 |     # Create directory structure
 77 |     (tmp_path / "tests").mkdir()
 78 |     (tmp_path / "src").mkdir()
 79 |     (tmp_path / "specs").mkdir()
 80 |     # Test files
 81 |     test_files = [
 82 |         tmp_path / "tests" / "test_foo.py",
 83 |         tmp_path / "tests" / "foo_test.py",
 84 |         tmp_path / "specs" / "bar_test.py",
 85 |         tmp_path / "src" / "test_bar.py",
 86 |         tmp_path / "src" / "baz_test.py",
 87 |     ]
 88 |     # Non-test files
 89 |     non_test_files = [
 90 |         tmp_path / "src" / "foo.py",
 91 |         tmp_path / "src" / "bar.txt",
 92 |         tmp_path / "src" / "gin.py",
 93 |     ]
 94 |     for f in test_files + non_test_files:
 95 |         f.parent.mkdir(parents=True, exist_ok=True)
 96 |         f.write_text("# test file" if f in test_files else "# not a test file")
 97 | 
 98 |     # Call get_test_paths
 99 |     result = get_test_paths(str(tmp_path))
100 |     result_set = set(str(p) for p in result)
101 |     # Expected: all test_files, relative to tmp_path
102 |     expected = set(str(f.relative_to(tmp_path)) for f in test_files)
103 |     assert result_set == expected
104 | 
105 | 
106 | def test_does_repo_exist():
107 |     repo_name = "TestRepo"
108 |     # Mock environment variable and GhApi
109 |     with (
110 |         patch("os.getenv", return_value="dummy_token") as mock_getenv,
111 |         patch("swesmith.utils.GhApi") as mock_GhApi,
112 |     ):
113 |         mock_api_instance = mock_GhApi.return_value
114 |         # Simulate repo exists in first page
115 |         mock_api_instance.repos.list_for_org.side_effect = [
116 |             [{"name": repo_name}],  # page 1
117 |             [],  # page 2
118 |         ]
119 |         assert does_repo_exist(repo_name) is True
120 |         # Simulate repo does not exist
121 |         mock_api_instance.repos.list_for_org.side_effect = [[{"name": "OtherRepo"}], []]
122 |         assert does_repo_exist(repo_name) is False
123 | 


--------------------------------------------------------------------------------