├── SWE-agent ├── tests │ ├── __init__.py │ ├── test_packaging.py │ ├── test_data │ │ ├── trajectories │ │ │ ├── gpt4__swe-agent-test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1 │ │ │ │ └── solution_missing_colon.py │ │ │ └── gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1 │ │ │ │ ├── patches │ │ │ │ └── pydicom__pydicom-1458.patch │ │ │ │ └── all_preds.jsonl │ │ └── data_sources │ │ │ └── human_eval.json │ ├── conftest.py │ ├── test_models.py │ ├── test_parsing.py │ ├── test_env.py │ ├── test_replay.py │ └── test_run.py ├── inspector │ ├── __init__.py │ ├── trajectories │ ├── favicon.ico │ ├── icons │ │ ├── computer.png │ │ ├── swellama_tan.png │ │ ├── swellama_blue.png │ │ ├── swellama_brown.png │ │ ├── swellama_grey.png │ │ ├── swe-agent-logo-50.png │ │ └── edit_icon.svg │ ├── index.html │ ├── README.md │ └── fileViewer.js ├── sweagent │ ├── agent │ │ ├── __init__.py │ │ ├── README.md │ │ └── history_processors.py │ ├── api │ │ ├── __init__.py │ │ ├── requirements.txt │ │ ├── utils.py │ │ └── hooks.py │ ├── environment │ │ ├── __init__.py │ │ └── retrieve_graph.py │ ├── frontend │ │ ├── src │ │ │ ├── static │ │ │ │ ├── envFeed.css │ │ │ │ ├── agentFeed.css │ │ │ │ ├── agentMessage.css │ │ │ │ ├── footer.css │ │ │ │ ├── logPanel.css │ │ │ │ ├── message.css │ │ │ │ ├── envMessage.css │ │ │ │ ├── runControl.css │ │ │ │ ├── index.css │ │ │ │ ├── macbar.css │ │ │ │ ├── header.css │ │ │ │ ├── run.css │ │ │ │ └── font.css │ │ │ ├── assets │ │ │ │ ├── copy.png │ │ │ │ ├── logo.png │ │ │ │ ├── pli.png │ │ │ │ ├── link_icons │ │ │ │ │ ├── code.png │ │ │ │ │ ├── data.png │ │ │ │ │ └── paper.png │ │ │ │ ├── panel_icons │ │ │ │ │ ├── issue.png │ │ │ │ │ ├── editor.png │ │ │ │ │ ├── terminal.png │ │ │ │ │ └── workspace.png │ │ │ │ └── repo_icons │ │ │ │ │ ├── pvlib.png │ │ │ │ │ ├── sympy.png │ │ │ │ │ ├── pydicom.png │ │ │ │ │ ├── pyvista.png │ │ │ │ │ └── marshmallow-code.png │ │ │ ├── setupTests.js │ │ │ ├── App.test.js │ │ │ ├── components │ │ │ │ ├── Footer.js │ │ │ │ ├── MacBar.js │ │ │ │ ├── AgentMessage.js │ │ │ │ ├── Header.js │ │ │ │ ├── panels │ │ │ │ │ ├── AgentFeed.js │ │ │ │ │ ├── EnvFeed.js │ │ │ │ │ └── LogPanel.js │ │ │ │ ├── utils │ │ │ │ │ └── icons │ │ │ │ │ │ └── ExpandIcon.js │ │ │ │ └── EnvMessage.js │ │ │ ├── reportWebVitals.js │ │ │ ├── App.js │ │ │ ├── index.js │ │ │ ├── index.css │ │ │ └── logo.svg │ │ ├── public │ │ │ ├── robots.txt │ │ │ ├── favicon.ico │ │ │ ├── logo192.png │ │ │ ├── logo512.png │ │ │ ├── manifest.json │ │ │ └── index.html │ │ ├── .gitignore │ │ ├── package.json │ │ └── README.md │ └── __init__.py ├── docker │ ├── requirements.txt │ ├── eval.Dockerfile │ ├── getconda.sh │ ├── swe.Dockerfile │ └── README.md ├── assets │ ├── inspector.png │ ├── results+preview.png │ ├── swe-agent-banner.png │ └── template_workflow.png ├── environment.yml ├── scripts │ ├── remove_all_containers.sh │ ├── run.sh │ ├── run_replay.sh │ ├── run_from_url.sh │ ├── run_jsonl.sh │ ├── run_and_eval.sh │ └── README.md ├── docs │ ├── assets │ │ ├── open_port_default.png │ │ └── open_port_in_browser.png │ └── github_codespace.md ├── .devcontainer │ ├── README.md │ ├── postcreate.sh │ ├── oncreate.sh │ ├── bashrc_epilog.sh │ ├── devcontainer.json │ └── sample_keys.cfg ├── build_deploy.sh ├── mlc_config.json ├── Dockerfile ├── codecov.yml ├── requirements.txt ├── setup.sh ├── config │ ├── commands │ │ ├── _split_string.py │ │ ├── README.md │ │ ├── cursors_edit_linting.sh │ │ ├── edit_linting.sh │ │ └── search.sh │ ├── README.md │ └── configs │ │ ├── xml_sys-env_window100-detailed_cmd_format-last_5_history-1_demos.yaml │ │ └── xml_sys-env_window100-detailed_cmd_format-full_history-1_demos.yaml ├── analysis │ └── analysis.py ├── keys.cfg ├── .pre-commit-config.yaml ├── evaluation │ └── run_eval.sh ├── make_demos │ ├── README.md │ └── convert_traj_to_demo.py ├── trajectories │ └── README.md ├── release_dockerhub.sh ├── .dockerignore ├── pyproject.toml └── run_replay.py ├── run_repograph_sweagent.sh ├── requirements.txt ├── run_repograph_agentless.sh ├── repograph ├── graph_searcher.py └── utils.py ├── agentless ├── util │ ├── utils.py │ ├── parse_global_var.py │ ├── compress_file.py │ └── api_requests.py └── get_repo_structure │ └── get_patch_info.py ├── .gitignore └── README.md /SWE-agent/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /SWE-agent/inspector/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /SWE-agent/sweagent/agent/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /SWE-agent/sweagent/api/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /SWE-agent/sweagent/environment/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /SWE-agent/inspector/trajectories: -------------------------------------------------------------------------------- 1 | ../trajectories -------------------------------------------------------------------------------- /SWE-agent/sweagent/frontend/src/static/envFeed.css: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /SWE-agent/docker/requirements.txt: -------------------------------------------------------------------------------- 1 | anthropic[bedrock] 2 | config 3 | openai -------------------------------------------------------------------------------- /SWE-agent/assets/inspector.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ozyyshr/RepoGraph/HEAD/SWE-agent/assets/inspector.png -------------------------------------------------------------------------------- /SWE-agent/inspector/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ozyyshr/RepoGraph/HEAD/SWE-agent/inspector/favicon.ico -------------------------------------------------------------------------------- /SWE-agent/assets/results+preview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ozyyshr/RepoGraph/HEAD/SWE-agent/assets/results+preview.png -------------------------------------------------------------------------------- /SWE-agent/assets/swe-agent-banner.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ozyyshr/RepoGraph/HEAD/SWE-agent/assets/swe-agent-banner.png -------------------------------------------------------------------------------- /SWE-agent/sweagent/frontend/public/robots.txt: -------------------------------------------------------------------------------- 1 | # https://www.robotstxt.org/robotstxt.html 2 | User-agent: * 3 | Disallow: 4 | -------------------------------------------------------------------------------- /SWE-agent/sweagent/frontend/src/static/agentFeed.css: -------------------------------------------------------------------------------- 1 | .agentFeed { 2 | grid-column: 1 / 3; 3 | grid-row: 1 / 5; 4 | } 5 | -------------------------------------------------------------------------------- /SWE-agent/assets/template_workflow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ozyyshr/RepoGraph/HEAD/SWE-agent/assets/template_workflow.png -------------------------------------------------------------------------------- /SWE-agent/inspector/icons/computer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ozyyshr/RepoGraph/HEAD/SWE-agent/inspector/icons/computer.png -------------------------------------------------------------------------------- /SWE-agent/sweagent/frontend/src/static/agentMessage.css: -------------------------------------------------------------------------------- 1 | .agentMessageTitle { 2 | font-weight: bold; 3 | color: black; 4 | } 5 | -------------------------------------------------------------------------------- /SWE-agent/environment.yml: -------------------------------------------------------------------------------- 1 | name: swe-agent 2 | dependencies: 3 | - python=3.9 4 | - pip 5 | - pip: 6 | - -r requirements.txt 7 | -------------------------------------------------------------------------------- /SWE-agent/inspector/icons/swellama_tan.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ozyyshr/RepoGraph/HEAD/SWE-agent/inspector/icons/swellama_tan.png -------------------------------------------------------------------------------- /SWE-agent/scripts/remove_all_containers.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Remove all docker containers 4 | 5 | docker rm -f $(docker ps -aq) 6 | -------------------------------------------------------------------------------- /SWE-agent/docs/assets/open_port_default.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ozyyshr/RepoGraph/HEAD/SWE-agent/docs/assets/open_port_default.png -------------------------------------------------------------------------------- /SWE-agent/inspector/icons/swellama_blue.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ozyyshr/RepoGraph/HEAD/SWE-agent/inspector/icons/swellama_blue.png -------------------------------------------------------------------------------- /SWE-agent/inspector/icons/swellama_brown.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ozyyshr/RepoGraph/HEAD/SWE-agent/inspector/icons/swellama_brown.png -------------------------------------------------------------------------------- /SWE-agent/inspector/icons/swellama_grey.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ozyyshr/RepoGraph/HEAD/SWE-agent/inspector/icons/swellama_grey.png -------------------------------------------------------------------------------- /SWE-agent/tests/test_packaging.py: -------------------------------------------------------------------------------- 1 | from sweagent import __version__ 2 | 3 | 4 | def test_version(): 5 | assert __version__.count(".") == 2 -------------------------------------------------------------------------------- /SWE-agent/docs/assets/open_port_in_browser.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ozyyshr/RepoGraph/HEAD/SWE-agent/docs/assets/open_port_in_browser.png -------------------------------------------------------------------------------- /SWE-agent/inspector/icons/swe-agent-logo-50.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ozyyshr/RepoGraph/HEAD/SWE-agent/inspector/icons/swe-agent-logo-50.png -------------------------------------------------------------------------------- /SWE-agent/sweagent/frontend/public/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ozyyshr/RepoGraph/HEAD/SWE-agent/sweagent/frontend/public/favicon.ico -------------------------------------------------------------------------------- /SWE-agent/sweagent/frontend/public/logo192.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ozyyshr/RepoGraph/HEAD/SWE-agent/sweagent/frontend/public/logo192.png -------------------------------------------------------------------------------- /SWE-agent/sweagent/frontend/public/logo512.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ozyyshr/RepoGraph/HEAD/SWE-agent/sweagent/frontend/public/logo512.png -------------------------------------------------------------------------------- /SWE-agent/sweagent/frontend/src/assets/copy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ozyyshr/RepoGraph/HEAD/SWE-agent/sweagent/frontend/src/assets/copy.png -------------------------------------------------------------------------------- /SWE-agent/sweagent/frontend/src/assets/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ozyyshr/RepoGraph/HEAD/SWE-agent/sweagent/frontend/src/assets/logo.png -------------------------------------------------------------------------------- /SWE-agent/sweagent/frontend/src/assets/pli.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ozyyshr/RepoGraph/HEAD/SWE-agent/sweagent/frontend/src/assets/pli.png -------------------------------------------------------------------------------- /SWE-agent/.devcontainer/README.md: -------------------------------------------------------------------------------- 1 | # .devcontainer 2 | 3 | The files in this directory configure a VSCode environment like 4 | GitHub codespaces. 5 | -------------------------------------------------------------------------------- /SWE-agent/.devcontainer/postcreate.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -euo pipefail 4 | set -x 5 | 6 | pip install -e '.' 7 | docker pull sweagent/swe-agent 8 | -------------------------------------------------------------------------------- /run_repograph_sweagent.sh: -------------------------------------------------------------------------------- 1 | cd SWE-agent 2 | 3 | python run.py --model_name gpt4 \ 4 | --per_instance_cost_limit 2.00 \ 5 | --config_file ./config/default.yaml -------------------------------------------------------------------------------- /SWE-agent/sweagent/frontend/src/assets/link_icons/code.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ozyyshr/RepoGraph/HEAD/SWE-agent/sweagent/frontend/src/assets/link_icons/code.png -------------------------------------------------------------------------------- /SWE-agent/sweagent/frontend/src/assets/link_icons/data.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ozyyshr/RepoGraph/HEAD/SWE-agent/sweagent/frontend/src/assets/link_icons/data.png -------------------------------------------------------------------------------- /SWE-agent/sweagent/frontend/src/assets/link_icons/paper.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ozyyshr/RepoGraph/HEAD/SWE-agent/sweagent/frontend/src/assets/link_icons/paper.png -------------------------------------------------------------------------------- /SWE-agent/sweagent/frontend/src/assets/panel_icons/issue.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ozyyshr/RepoGraph/HEAD/SWE-agent/sweagent/frontend/src/assets/panel_icons/issue.png -------------------------------------------------------------------------------- /SWE-agent/sweagent/frontend/src/assets/repo_icons/pvlib.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ozyyshr/RepoGraph/HEAD/SWE-agent/sweagent/frontend/src/assets/repo_icons/pvlib.png -------------------------------------------------------------------------------- /SWE-agent/sweagent/frontend/src/assets/repo_icons/sympy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ozyyshr/RepoGraph/HEAD/SWE-agent/sweagent/frontend/src/assets/repo_icons/sympy.png -------------------------------------------------------------------------------- /SWE-agent/sweagent/frontend/src/assets/panel_icons/editor.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ozyyshr/RepoGraph/HEAD/SWE-agent/sweagent/frontend/src/assets/panel_icons/editor.png -------------------------------------------------------------------------------- /SWE-agent/sweagent/frontend/src/assets/repo_icons/pydicom.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ozyyshr/RepoGraph/HEAD/SWE-agent/sweagent/frontend/src/assets/repo_icons/pydicom.png -------------------------------------------------------------------------------- /SWE-agent/sweagent/frontend/src/assets/repo_icons/pyvista.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ozyyshr/RepoGraph/HEAD/SWE-agent/sweagent/frontend/src/assets/repo_icons/pyvista.png -------------------------------------------------------------------------------- /SWE-agent/sweagent/frontend/src/assets/panel_icons/terminal.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ozyyshr/RepoGraph/HEAD/SWE-agent/sweagent/frontend/src/assets/panel_icons/terminal.png -------------------------------------------------------------------------------- /SWE-agent/sweagent/frontend/src/assets/panel_icons/workspace.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ozyyshr/RepoGraph/HEAD/SWE-agent/sweagent/frontend/src/assets/panel_icons/workspace.png -------------------------------------------------------------------------------- /SWE-agent/sweagent/frontend/src/assets/repo_icons/marshmallow-code.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ozyyshr/RepoGraph/HEAD/SWE-agent/sweagent/frontend/src/assets/repo_icons/marshmallow-code.png -------------------------------------------------------------------------------- /SWE-agent/sweagent/frontend/src/static/footer.css: -------------------------------------------------------------------------------- 1 | footer { 2 | flex: 0 0 auto; 3 | } 4 | 5 | footer { 6 | align-items: center; 7 | justify-content: center; 8 | display: flex; 9 | } 10 | -------------------------------------------------------------------------------- /SWE-agent/build_deploy.sh: -------------------------------------------------------------------------------- 1 | # !bin/bash 2 | 3 | python3 -m build 4 | 5 | python3 -m twine upload --skip-existing --repository pypi dist/* 6 | # python3 -m twine upload --skip-existing --repository testpypi dist/* 7 | -------------------------------------------------------------------------------- /SWE-agent/scripts/run.sh: -------------------------------------------------------------------------------- 1 | python run.py \ 2 | --model_name human \ 3 | --data_path ./data/dev-easy/swe-bench-dev-easy.json \ 4 | --per_instance_cost_limit 3.00 \ 5 | --config_file ./config/default.yaml 6 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | tree-sitter==0.21.3 2 | tree-sitter-languages==1.10.2 3 | grep-ast==0.3.2 4 | networkx==3.2.1 5 | pygments==2.18.0 6 | tqdm 7 | datasets 8 | openai==1.42.0 9 | tiktoken==0.7.0 10 | libcst==1.4.0 -------------------------------------------------------------------------------- /SWE-agent/docker/eval.Dockerfile: -------------------------------------------------------------------------------- 1 | FROM sweagent/swe-agent:latest 2 | 3 | COPY ../evaluation/evaluation.py /evaluation.py 4 | RUN pip install git+https://github.com/princeton-nlp/SWE-bench.git 5 | RUN pip install unidiff 6 | CMD ["python", "/evaluation.py"] 7 | -------------------------------------------------------------------------------- /SWE-agent/mlc_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "ignorePatterns": [ 3 | { 4 | "pattern": "https://github.com/issues?.*" 5 | }, 6 | { 7 | "pattern": ".*localhost.*" 8 | }, 9 | { 10 | "pattern": "https://platform.openai.com/docs/.*" 11 | } 12 | ] 13 | } 14 | -------------------------------------------------------------------------------- /SWE-agent/scripts/run_replay.sh: -------------------------------------------------------------------------------- 1 | python run_replay.py \ 2 | --traj_path ./trajectories/johnbyang/-demonstrations/pvlib__pvlib-python-1216.default_test.traj \ 3 | --data_path ./trajectories/johnbyang/-demonstrations/pvlib__pvlib-python-1216.json \ 4 | --config_file config/e2e/default_test.yaml -------------------------------------------------------------------------------- /SWE-agent/scripts/run_from_url.sh: -------------------------------------------------------------------------------- 1 | python run.py \ 2 | --model_name human \ 3 | --data_path https://github.com/psf/requests/issues/6254 \ 4 | --base_commit ac3be98b19f4d09c6a970b271a3ae30f3d0858f7 \ 5 | --per_instance_cost_limit 3.00 \ 6 | --config_file ./config/default_from_url.yaml 7 | -------------------------------------------------------------------------------- /SWE-agent/sweagent/frontend/src/setupTests.js: -------------------------------------------------------------------------------- 1 | // jest-dom adds custom jest matchers for asserting on DOM nodes. 2 | // allows you to do things like: 3 | // expect(element).toHaveTextContent(/react/i) 4 | // learn more: https://github.com/testing-library/jest-dom 5 | import "@testing-library/jest-dom"; 6 | -------------------------------------------------------------------------------- /SWE-agent/sweagent/frontend/src/App.test.js: -------------------------------------------------------------------------------- 1 | import { render, screen } from "@testing-library/react"; 2 | import App from "./App"; 3 | 4 | test("renders learn react link", () => { 5 | render(); 6 | const linkElement = screen.getByText(/learn react/i); 7 | expect(linkElement).toBeInTheDocument(); 8 | }); 9 | -------------------------------------------------------------------------------- /SWE-agent/sweagent/frontend/src/static/logPanel.css: -------------------------------------------------------------------------------- 1 | .logPanel { 2 | grid-column: 3 / 6; 3 | grid-row: 3 / 5; 4 | color: white; 5 | } 6 | 7 | .logPanel .innerDiv { 8 | padding-left: 1em; 9 | font-size: 75%; 10 | } 11 | 12 | .logPanel .scrollableDiv { 13 | background-color: #1e1e1e !important; 14 | } 15 | -------------------------------------------------------------------------------- /SWE-agent/sweagent/frontend/src/static/message.css: -------------------------------------------------------------------------------- 1 | .message { 2 | background-color: #eee; 3 | border-radius: 1em 1em 1em 1em; 4 | box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1); 5 | padding: 0.25em 1em; 6 | margin: 1em; 7 | overflow-x: hidden; 8 | } 9 | 10 | .highlight { 11 | background-color: #c0d6ff; 12 | } 13 | -------------------------------------------------------------------------------- /SWE-agent/sweagent/frontend/src/static/envMessage.css: -------------------------------------------------------------------------------- 1 | .envMessageCommand { 2 | border-radius: 1em 1em 0em 0em; 3 | margin: 1em 1em 0em 1em; 4 | /* border-bottom: 1px dashed black; */ 5 | } 6 | 7 | .envMessageOutput, 8 | .envMessageDiff { 9 | border-radius: 0em 0em 1em 1em; 10 | margin: 0em 1em 1em 1em; 11 | } 12 | -------------------------------------------------------------------------------- /SWE-agent/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.9 2 | 3 | # Set the working directory 4 | WORKDIR /app 5 | 6 | # Install Docker CLI using the official Docker installation script 7 | RUN curl -fsSL https://get.docker.com -o get-docker.sh && \ 8 | sh get-docker.sh 9 | 10 | # Copy the application code 11 | # Do this last to take advantage of the docker layer mechanism 12 | COPY . /app 13 | 14 | # Install Python dependencies 15 | RUN pip install . 16 | -------------------------------------------------------------------------------- /SWE-agent/sweagent/frontend/.gitignore: -------------------------------------------------------------------------------- 1 | # See https://help.github.com/articles/ignoring-files/ for more about ignoring files. 2 | 3 | # dependencies 4 | /node_modules/** 5 | /.pnp 6 | .pnp.js 7 | 8 | # testing 9 | /coverage 10 | 11 | # production 12 | /build 13 | 14 | # misc 15 | .DS_Store 16 | .env.local 17 | .env.development.local 18 | .env.test.local 19 | .env.production.local 20 | 21 | npm-debug.log* 22 | yarn-debug.log* 23 | yarn-error.log* 24 | -------------------------------------------------------------------------------- /SWE-agent/sweagent/frontend/src/components/Footer.js: -------------------------------------------------------------------------------- 1 | import "../static/footer.css"; 2 | 3 | const Footer = () => { 4 | return ( 5 | 13 | ); 14 | }; 15 | 16 | export default Footer; 17 | -------------------------------------------------------------------------------- /SWE-agent/.devcontainer/oncreate.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -euo pipefail 4 | set -x 5 | 6 | # Run from repo root 7 | 8 | sudo usermod -aG docker vscode 9 | sudo chmod 666 /var/run/docker.sock 10 | pip install -e '.' 11 | cp .devcontainer/sample_keys.cfg keys.cfg 12 | cat .devcontainer/bashrc_epilog.sh >> ~/.bashrc 13 | 14 | # Install nodejs 15 | curl -fsSL https://deb.nodesource.com/setup_22.x | sudo -E bash - &&\ 16 | sudo apt-get install -y nodejs 17 | -------------------------------------------------------------------------------- /SWE-agent/sweagent/frontend/src/reportWebVitals.js: -------------------------------------------------------------------------------- 1 | const reportWebVitals = (onPerfEntry) => { 2 | if (onPerfEntry && onPerfEntry instanceof Function) { 3 | import("web-vitals").then(({ getCLS, getFID, getFCP, getLCP, getTTFB }) => { 4 | getCLS(onPerfEntry); 5 | getFID(onPerfEntry); 6 | getFCP(onPerfEntry); 7 | getLCP(onPerfEntry); 8 | getTTFB(onPerfEntry); 9 | }); 10 | } 11 | }; 12 | 13 | export default reportWebVitals; 14 | -------------------------------------------------------------------------------- /SWE-agent/tests/test_data/trajectories/gpt4__swe-agent-test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1/solution_missing_colon.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | 4 | def division(a: float, b: float) -> float: 5 | if b == 0: 6 | raise ValueError("Cannot divide by zero") 7 | return a / b 8 | 9 | 10 | if __name__ == "__main__": 11 | try: 12 | print(division(23, 0)) 13 | except ValueError as e: 14 | print(e) 15 | 16 | -------------------------------------------------------------------------------- /SWE-agent/codecov.yml: -------------------------------------------------------------------------------- 1 | # Configuration for codecov 2 | coverage: 3 | status: 4 | project: 5 | default: 6 | # If we get < 50% coverage, codecov is gonna mark it a failure 7 | target: 50% 8 | threshold: null 9 | patch: 10 | default: 11 | # Codecov won't mark it as a failure if a patch is not covered well 12 | informational: true 13 | github_checks: 14 | # Don't mark lines that aren't covered 15 | annotations: false 16 | 17 | -------------------------------------------------------------------------------- /SWE-agent/sweagent/frontend/src/components/MacBar.js: -------------------------------------------------------------------------------- 1 | import React from "react"; 2 | 3 | import "../static/macbar.css"; 4 | 5 | const MacBar = ({ title, logo, dark = false }) => { 6 | const darkClass = dark ? "dark" : ""; 7 | return ( 8 |
9 |
10 | title 11 | {title} 12 |
13 |
14 | ); 15 | }; 16 | 17 | export default MacBar; 18 | -------------------------------------------------------------------------------- /SWE-agent/requirements.txt: -------------------------------------------------------------------------------- 1 | anthropic[bedrock] 2 | config 3 | datasets 4 | docker >= 7.1.0 # https://github.com/princeton-nlp/SWE-agent/issues/379 5 | gymnasium 6 | numpy 7 | openai>=1.0 8 | pandas 9 | rich 10 | ruamel.yaml 11 | swebench>=2.0.0 12 | tenacity 13 | unidiff 14 | simple-parsing 15 | # Versions of together below 1.1.0 are not compatible. see https://github.com/princeton-nlp/SWE-agent/issues/135 16 | together>=1.1.0 17 | ollama 18 | rich-argparse 19 | flask 20 | flask-cors 21 | flask-socketio 22 | groq -------------------------------------------------------------------------------- /SWE-agent/sweagent/frontend/src/App.js: -------------------------------------------------------------------------------- 1 | import Footer from "./components/Footer"; 2 | import { Routes, Route } from "react-router-dom"; 3 | import Run from "./Run"; 4 | import Header from "./components/Header"; 5 | import "./static/font.css"; 6 | import "./static/index.css"; 7 | 8 | function App() { 9 | return ( 10 |
11 |
12 | 13 | } /> 14 | 15 |
16 |
17 | ); 18 | } 19 | 20 | export default App; 21 | -------------------------------------------------------------------------------- /SWE-agent/.devcontainer/bashrc_epilog.sh: -------------------------------------------------------------------------------- 1 | 2 | if [ -z "$(docker images -q sweagent/swe-agent 2> /dev/null)" ]; then ─╯ 3 | echo "⚠️ Please wait for the postCreateCommand to start and finish (a new window will appear shortly) ⚠️" 4 | fi 5 | 6 | echo "Here's an example SWE-agent command to try out:" 7 | echo "python run.py --model_name gpt4 --data_path https://github.com/pvlib/pvlib-python/issues/1603 --config_file config/default_from_url.yaml" 8 | echo "Alternatively, start the web UI with " 9 | echo "./start_web_ui.sh" 10 | -------------------------------------------------------------------------------- /SWE-agent/setup.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # bash strict mode 4 | set -euo pipefail 5 | 6 | echo "Setting up docker image for swe-agent..." 7 | # TARGETARCH should be set automatically on most (but not all) systems, see 8 | # https://github.com/princeton-nlp/SWE-agent/issues/245 9 | docker build -t sweagent/swe-agent:latest -f docker/swe.Dockerfile --build-arg TARGETARCH=$(uname -m) . 10 | 11 | echo "Setting up docker image for evaluation..." 12 | docker build -t sweagent/swe-eval:latest -f docker/eval.Dockerfile . 13 | 14 | echo "Done with setup!" 15 | -------------------------------------------------------------------------------- /SWE-agent/config/commands/_split_string.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import sys 3 | 4 | def print_flake8_output(input_string, show_line_numbers=False): 5 | for value in input_string.split("\n"): 6 | parts = value.split() 7 | if not show_line_numbers: 8 | print(f"- {' '.join(parts[1:])}") 9 | else: 10 | line_nums = ":".join(parts[0].split(":")[1:]) 11 | print(f"- {line_nums} {' '.join(parts[1:])}") 12 | 13 | if __name__ == "__main__": 14 | lint_output = sys.argv[1] 15 | print_flake8_output(lint_output) 16 | -------------------------------------------------------------------------------- /SWE-agent/analysis/analysis.py: -------------------------------------------------------------------------------- 1 | import json 2 | import re 3 | 4 | with open('/shared/data3/siruo2/SWE-agent/trajectories/siruo2/gpt4__SWE-bench_Lite__default__t-0.00__p-0.95__c-3.00__install-1/pvlib__pvlib-python-1072.traj', 'r') as f: 5 | data = json.load(f) 6 | 7 | pattern = r"'fname': '([^']+)',\s" 8 | files = [] 9 | 10 | for item in data['codegraph']: 11 | if 'fname' in item['codegraph_cxt']: 12 | for f in re.findall(pattern, item['codegraph_cxt']): 13 | if 'test' not in f: 14 | files.append(f) 15 | 16 | print(len(list(set(files)))) 17 | -------------------------------------------------------------------------------- /SWE-agent/sweagent/frontend/src/static/runControl.css: -------------------------------------------------------------------------------- 1 | .tab-content > .tab-pane { 2 | border-left: 1px solid #ddd; 3 | border-right: 1px solid #ddd; 4 | border-bottom: 1px solid #ddd; 5 | padding: 10px; 6 | } 7 | 8 | .nav-tabs { 9 | margin-bottom: 0 !important; 10 | } 11 | 12 | .tab-pane { 13 | background-color: white; 14 | } 15 | 16 | div .runControl { 17 | background-color: white; 18 | padding: 10px; 19 | border-radius: 0 0 0.5em 0.5em; 20 | border: 1px solid #ddd; 21 | border-top: none; 22 | display: flex; 23 | justify-content: space-between; 24 | } 25 | -------------------------------------------------------------------------------- /SWE-agent/inspector/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | Trajectory Viewer 4 | 5 | 6 | 7 | 8 |
9 |

Trajectory File Viewer

10 | 11 |

Conversation History

12 |
No file selected.
13 |
14 | 15 |
16 |
17 | 18 | 19 | -------------------------------------------------------------------------------- /SWE-agent/sweagent/frontend/public/manifest.json: -------------------------------------------------------------------------------- 1 | { 2 | "short_name": "React App", 3 | "name": "Create React App Sample", 4 | "icons": [ 5 | { 6 | "src": "favicon.ico", 7 | "sizes": "64x64 32x32 24x24 16x16", 8 | "type": "image/x-icon" 9 | }, 10 | { 11 | "src": "logo192.png", 12 | "type": "image/png", 13 | "sizes": "192x192" 14 | }, 15 | { 16 | "src": "logo512.png", 17 | "type": "image/png", 18 | "sizes": "512x512" 19 | } 20 | ], 21 | "start_url": ".", 22 | "display": "standalone", 23 | "theme_color": "#000000", 24 | "background_color": "#ffffff" 25 | } 26 | -------------------------------------------------------------------------------- /SWE-agent/docs/github_codespace.md: -------------------------------------------------------------------------------- 1 | # SWE-agent on GitHub Codespaces 2 | 3 | ## Running the web UI 4 | 5 | Go to the terminal and enter 6 | 7 | ```bash 8 | ./start_web_ui.sh 9 | ``` 10 | 11 | After a while, you should see a popup offering you to forward port `3000`. Click `Open in Browser`. 12 | 13 | ![port 3000 forwarding popup](assets/open_port_default.png) 14 | 15 | 16 | If you instead only see the offer to forward port `8000`, do not click it (this is the port that's being used by the backend). 17 | 18 | Instead, click on the `Ports` tab, and click on the globe next to port `3000`: 19 | 20 | ![port 3000 forwarding manual](assets/open_port_in_browser.png) -------------------------------------------------------------------------------- /SWE-agent/keys.cfg: -------------------------------------------------------------------------------- 1 | All keys are commented out by default. Make sure to remove the leading '#' of the relevant lines 2 | GITHUB_TOKEN: 'GitHub Token to clone private repos' 3 | OPENAI_API_KEY: 'OpenAI API Key Here if using OpenAI Model' 4 | ANTHROPIC_API_KEY: 'Anthropic API Key Here if using Anthropic Model' 5 | TOGETHER_API_KEY: 'Together API Key Here if using Together Model' 6 | AZURE_OPENAI_API_KEY: '[REDACTED]' 7 | AZURE_OPENAI_ENDPOINT: 'Azure OpenAI Endpoint Here if using Azure OpenAI Model' 8 | AZURE_OPENAI_DEPLOYMENT: 'Azure OpenAI Deployment Here if using Azure OpenAI Model' 9 | AZURE_OPENAI_API_VERSION: 'Azure OpenAI API Version Here if using Azure OpenAI Model' 10 | OPENAI_API_BASE_URL: '[REDACTED]' -------------------------------------------------------------------------------- /SWE-agent/sweagent/api/requirements.txt: -------------------------------------------------------------------------------- 1 | anthropic==0.25.3 2 | blinker==1.7.0 3 | certifi==2024.2.2 4 | cloudpickle==3.0.0 5 | config==0.5.1 6 | decorator @ file:///home/conda/feedstock_root/build_artifacts/decorator_1641555617451/work 7 | docker==7.0.0 8 | Flask==3.0.3 9 | Flask-Cors==4.0.0 10 | Flask-SocketIO==5.3.6 11 | ghapi==1.0.5 12 | GitPython==3.1.43 13 | gymnasium==0.29.1 14 | multidict==6.0.5 15 | multiprocess==0.70.16 16 | numpy==1.26.4 17 | ollama==0.1.8 18 | openai==1.20.0 19 | pandas==2.2.2 20 | pyarrow==15.0.2 21 | python-dateutil==2.9.0.post0 22 | python-dotenv==1.0.1 23 | python-engineio==4.9.0 24 | python-socketio==5.11.2 25 | pytz==2024.1 26 | PyYAML==6.0.1 27 | requests==2.31.0 28 | swebench==1.1.0 29 | tokenizers==0.15.2 -------------------------------------------------------------------------------- /SWE-agent/.devcontainer/devcontainer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "SWE-Agent Codespace", 3 | "image": "mcr.microsoft.com/vscode/devcontainers/miniconda:0-3", 4 | "customizations":{ 5 | "vscode":{ 6 | "extensions": [ 7 | "ms-python.python", 8 | "ms-azuretools.vscode-docker", 9 | "ms-toolsai.jupyter" 10 | ] 11 | } 12 | }, 13 | "onCreateCommand": "./.devcontainer/oncreate.sh", 14 | "postCreateCommand": "./.devcontainer/postcreate.sh", 15 | "features": { 16 | "docker-in-docker": "latest" 17 | }, 18 | "mounts": [ 19 | "source=/var/run/docker.sock,target=/var/run/docker.sock,type=bind" 20 | ], 21 | "remoteUser": "vscode" 22 | } 23 | -------------------------------------------------------------------------------- /SWE-agent/sweagent/frontend/src/index.js: -------------------------------------------------------------------------------- 1 | import React from "react"; 2 | import { createRoot } from "react-dom/client"; 3 | import { BrowserRouter } from "react-router-dom"; 4 | import "bootstrap/dist/css/bootstrap.css"; 5 | import App from "./App"; 6 | import reportWebVitals from "./reportWebVitals"; 7 | 8 | const container = document.getElementById("root"); 9 | const root = createRoot(container); // Create a root. 10 | root.render( 11 | 12 | 13 | , 14 | ); 15 | 16 | // If you want to start measuring performance in your app, pass a function 17 | // to log results (for example: reportWebVitals(console.log)) 18 | // or send to an analytics endpoint. Learn more: https://bit.ly/CRA-vitals 19 | reportWebVitals(); 20 | -------------------------------------------------------------------------------- /SWE-agent/docker/getconda.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Helper script to get the right conda version inside of the container 4 | # This logic is put inside of the container rather than in the build script 5 | # so that we can easily do multi-platform builds 6 | 7 | arch=$1 8 | echo "arch", $arch 9 | if [[ "$arch" == "x86_64" || "$arch" == "amd64" ]]; then 10 | echo "Building the x86 Docker image" 11 | wget https://repo.anaconda.com/miniconda/Miniconda3-py39_23.11.0-1-Linux-x86_64.sh -O miniconda.sh 12 | elif [[ "$arch" == "aarch64" || "$arch" == "arm64" ]]; then 13 | echo "Ayy, arm64 in the house!" 14 | wget https://repo.anaconda.com/miniconda/Miniconda3-py39_23.11.0-1-Linux-aarch64.sh -O miniconda.sh 15 | else 16 | echo "unknown architecture detected?" 17 | echo $arch 18 | exit 1 19 | fi 20 | -------------------------------------------------------------------------------- /SWE-agent/sweagent/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.2.0" 2 | 3 | from pathlib import Path 4 | from sweagent.agent.agents import ( 5 | Agent, 6 | AgentArguments, 7 | ) 8 | 9 | from sweagent.agent.models import ( 10 | ModelArguments, 11 | ) 12 | 13 | from sweagent.environment.swe_env import ( 14 | EnvironmentArguments, 15 | SWEEnv, 16 | ) 17 | 18 | from sweagent.environment.utils import ( 19 | get_data_path_name, 20 | ) 21 | 22 | PACKAGE_DIR = Path(__file__).resolve().parent 23 | assert PACKAGE_DIR.is_dir() 24 | CONFIG_DIR = PACKAGE_DIR.parent / "config" 25 | assert CONFIG_DIR.is_dir() 26 | 27 | 28 | __all__ = [ 29 | "Agent", 30 | "AgentArguments", 31 | "ModelArguments", 32 | "EnvironmentArguments", 33 | "SWEEnv", 34 | "get_data_path_name", 35 | "PACKAGE_DIR", 36 | ] 37 | -------------------------------------------------------------------------------- /SWE-agent/.devcontainer/sample_keys.cfg: -------------------------------------------------------------------------------- 1 | # All keys are commented out by default. Make sure to remove the leading '#' of the relevant lines 2 | # GITHUB_TOKEN: 'GitHub Token to clone private repos' 3 | # OPENAI_API_KEY: 'OpenAI API Key Here if using OpenAI Model' 4 | # ANTHROPIC_API_KEY: 'Anthropic API Key Here if using Anthropic Model' 5 | # TOGETHER_API_KEY: 'Together API Key Here if using Together Model' 6 | # AZURE_OPENAI_API_KEY: 'Azure OpenAI API Key Here if using Azure OpenAI Model' 7 | # AZURE_OPENAI_ENDPOINT: 'Azure OpenAI Endpoint Here if using Azure OpenAI Model' 8 | # AZURE_OPENAI_DEPLOYMENT: 'Azure OpenAI Deployment Here if using Azure OpenAI Model' 9 | # AZURE_OPENAI_API_VERSION: 'Azure OpenAI API Version Here if using Azure OpenAI Model' 10 | # OPENAI_API_BASE_URL: 'LM base URL here if using Local or alternative api Endpoint' 11 | -------------------------------------------------------------------------------- /SWE-agent/sweagent/frontend/src/index.css: -------------------------------------------------------------------------------- 1 | body { 2 | font-family: "basic-sans"; 3 | background-color: #fff7ec; 4 | } 5 | 6 | hr { 7 | border: none; 8 | border-top: 1px solid #bbb; 9 | margin: 0 auto; 10 | margin-top: 1em; 11 | } 12 | 13 | .container-demo { 14 | margin: 0 auto; 15 | width: 54%; 16 | } 17 | 18 | .container-about { 19 | margin: 0 auto; 20 | width: 50%; 21 | } 22 | 23 | @media only screen and (max-width: 768px) { 24 | hr { 25 | width: 95%; 26 | } 27 | .container-demo { 28 | width: 95%; 29 | } 30 | .container-about { 31 | width: 95%; 32 | } 33 | } 34 | 35 | @media only screen and (max-width: 1440px) { 36 | .container-about { 37 | width: 75%; 38 | } 39 | } 40 | 41 | @media only screen and (max-width: 2400px) { 42 | .container-demo { 43 | width: 80%; 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /SWE-agent/sweagent/frontend/src/static/index.css: -------------------------------------------------------------------------------- 1 | body { 2 | font-family: "basic-sans"; 3 | background-color: #fff7ec; 4 | } 5 | 6 | hr { 7 | border: none; 8 | border-top: 1px solid #bbb; 9 | margin: 0 auto; 10 | margin-top: 1em; 11 | } 12 | 13 | .container-demo { 14 | margin: 0 auto; 15 | width: 54%; 16 | } 17 | 18 | .container-about { 19 | margin: 0 auto; 20 | width: 50%; 21 | } 22 | 23 | @media only screen and (max-width: 768px) { 24 | hr { 25 | width: 95%; 26 | } 27 | .container-demo { 28 | width: 95%; 29 | } 30 | .container-about { 31 | width: 95%; 32 | } 33 | } 34 | 35 | @media only screen and (max-width: 1440px) { 36 | .container-about { 37 | width: 75%; 38 | } 39 | } 40 | 41 | @media only screen and (max-width: 2400px) { 42 | .container-demo { 43 | width: 80%; 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /run_repograph_agentless.sh: -------------------------------------------------------------------------------- 1 | PYTHONPATH=".:agentless/" python agentless/fl/localize.py \ 2 | --file_level \ 3 | --related_level \ 4 | --fine_grain_line_level \ 5 | --output_folder=results/location \ 6 | --top_n=3 \ 7 | --compress \ 8 | --context_window=10 \ 9 | --repo_graph 10 | 11 | PYTHONPATH=".:agentless/" python agentless/repair/repair.py \ 12 | --loc_file=results/location/loc_outputs_codegraph.jsonl \ 13 | --output_folder=results/repair \ 14 | --loc_interval \ 15 | --top_n=3 \ 16 | --context_window=10 \ 17 | --max_samples=10 \ 18 | --cot \ 19 | --diff_format \ 20 | --gen_and_process \ 21 | --repo_graph 22 | 23 | PYTHONPATH=".:agentless/" python agentless/repair/rerank.py \ 24 | --patch_folder=results/repair \ 25 | --num_samples=10 \ 26 | --deduplicate \ 27 | --plausible 28 | -------------------------------------------------------------------------------- /SWE-agent/sweagent/frontend/src/components/AgentMessage.js: -------------------------------------------------------------------------------- 1 | import React from "react"; 2 | 3 | import "../static/message.css"; 4 | import "../static/agentMessage.css"; 5 | import { Gear } from "react-bootstrap-icons"; 6 | 7 | const AgentMessage = ({ 8 | item, 9 | handleMouseEnter, 10 | handleMouseLeave, 11 | isHighlighted, 12 | feedRef, 13 | }) => { 14 | const stepClass = item.step !== null ? `step${item.step}` : ""; 15 | const highlightClass = isHighlighted ? "highlight" : ""; 16 | 17 | return ( 18 |
handleMouseEnter(item, feedRef)} 21 | onMouseLeave={handleMouseLeave} 22 | > 23 | {item.type !== "thought" && } 24 | {item.message} 25 |
26 | ); 27 | }; 28 | 29 | export default AgentMessage; 30 | -------------------------------------------------------------------------------- /SWE-agent/sweagent/frontend/src/static/macbar.css: -------------------------------------------------------------------------------- 1 | .mac-window-top-bar { 2 | background-color: #f0f0f0; 3 | background-image: linear-gradient(to bottom, #f0f0f0, #e0e0e0); 4 | border-top-left-radius: 0.5em; 5 | border-top-right-radius: 0.5em; 6 | height: 2em; 7 | position: relative; 8 | display: flex; 9 | align-items: center; 10 | } 11 | 12 | .mac-window-top-bar.dark { 13 | background-color: #f0f0f0; 14 | background-image: linear-gradient( 15 | to bottom, 16 | rgba(0, 0, 0, 0.6), 17 | rgba(0, 0, 0, 0.9) 18 | ); 19 | /* messageStyle={{ 20 | color: "white", 21 | fontSize: "smaller", 22 | marginBottom: "0.1em", 23 | }} */ 24 | } 25 | 26 | .label { 27 | align-items: center; 28 | display: flex; 29 | margin: 1em 1em 1em 0.5em; 30 | 31 | img { 32 | height: 1em; 33 | margin-right: 0.4em; 34 | } 35 | 36 | span { 37 | color: #888; 38 | font-size: smaller; 39 | } 40 | } 41 | 42 | .dark .label span { 43 | color: #eae5e5; 44 | } 45 | -------------------------------------------------------------------------------- /SWE-agent/sweagent/frontend/src/components/Header.js: -------------------------------------------------------------------------------- 1 | import React from "react"; 2 | import "../static/header.css"; 3 | import logo from "../assets/logo.png"; 4 | import { Link } from "react-router-dom"; 5 | 6 | const Header = () => { 7 | return ( 8 | <> 9 |
10 | 15 | swe agent logo 20 | 21 | {/* 22 | 23 | 24 | 29 | 30 | */} 31 |
32 | 33 | ); 34 | }; 35 | 36 | export default Header; 37 | -------------------------------------------------------------------------------- /SWE-agent/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/patches/pydicom__pydicom-1458.patch: -------------------------------------------------------------------------------- 1 | 2 | diff --git a/pydicom/pixel_data_handlers/numpy_handler.py b/pydicom/pixel_data_handlers/numpy_handler.py 3 | index 8e8d319ae..6a1221b72 100644 4 | --- a/pydicom/pixel_data_handlers/numpy_handler.py 5 | +++ b/pydicom/pixel_data_handlers/numpy_handler.py 6 | @@ -285,9 +285,10 @@ def get_pixeldata(ds: "Dataset", read_only: bool = False) -> "np.ndarray": 7 | ) 8 | 9 | required_elements = [ 10 | - 'BitsAllocated', 'Rows', 'Columns', 'PixelRepresentation', 11 | - 'SamplesPerPixel', 'PhotometricInterpretation' 12 | + 'BitsAllocated', 'Rows', 'Columns', 'SamplesPerPixel', 'PhotometricInterpretation' 13 | ] 14 | + if 'PixelData' in ds: 15 | + required_elements.append('PixelRepresentation') 16 | missing = [elem for elem in required_elements if elem not in ds] 17 | if missing: 18 | raise AttributeError( 19 | -------------------------------------------------------------------------------- /SWE-agent/inspector/README.md: -------------------------------------------------------------------------------- 1 | # Inspector 2 | We provide a web interface for visualizing `.traj` files from the `trajectories` folder more easily. 3 | 4 | **Set Up** 5 | * Run `python server.py trajectories` 6 | * Open http://localhost:8000 in your browser to use the inspector. 7 | 8 | **Additional flags** 9 | - `--data_path`: Path to SWE-bench style dataset that trajectories were generated for (Optional) 10 | - `--directory`: Directory of trajectories to inspect (Defaults to `./trajectories` folder) 11 | - `--port`: Port to host web app (Defaults to `8000`). 12 | 13 | **Example Usage** 14 | 15 | From running the command: 16 | ``` 17 | python server.py --directory trajectories/carlosejimenez/gpt-4-1106-preview__swe-bench-dev-40-seed24__default_sys-env_window100-detailed_cmd_format-full_history-1_demos__t-0.20__p-0.95__c-4.00__install-1__sweep-01-run-4 18 | ``` 19 | The inspector will then be launched in the browser: 20 | 21 |

22 | swe-agent.com 23 |

24 | -------------------------------------------------------------------------------- /SWE-agent/scripts/run_jsonl.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Check if an argument was provided 4 | if [ "$#" -ne 1 ]; then 5 | echo "Usage: $0 " 6 | exit 1 7 | fi 8 | 9 | FILE="$1" 10 | 11 | # Check if the file exists and is readable 12 | if [ ! -f "$FILE" ] || [ ! -r "$FILE" ]; then 13 | echo "Error: File '$FILE' does not exist or is not readable." 14 | exit 2 15 | fi 16 | 17 | # Iterate over each line of the JSONL file 18 | while IFS= read -r line; do 19 | # Construct command arguments from the JSON map 20 | # jq -r '. | to_entries | .[] | "--\(.key) \(.value)"' converts each key-value pair in the JSON object 21 | # into a format suitable for passing to the Python script 22 | # xargs -n 2 groups them back into pairs to handle as arguments correctly 23 | ARGS=$(echo "$line" | jq -r '. | to_entries | .[] | "--\(.key) \(.value)"' | xargs -n 2 echo) 24 | 25 | # Execute the Python script with the constructed arguments 26 | echo $ARGS 27 | python run.py $ARGS 28 | done < "$FILE" 29 | -------------------------------------------------------------------------------- /SWE-agent/sweagent/frontend/src/static/header.css: -------------------------------------------------------------------------------- 1 | header { 2 | margin-bottom: 1em; 3 | margin-top: 1.5em; 4 | margin-left: auto; 5 | margin-right: auto; 6 | align-items: left; 7 | justify-content: left; 8 | display: flex; 9 | width: 54%; 10 | } 11 | 12 | @media only screen and (max-width: 2400px) { 13 | header { 14 | width: 80%; 15 | } 16 | } 17 | 18 | header button { 19 | background: none; 20 | border: none; 21 | padding: 0; 22 | font-family: inherit; 23 | font-size: inherit; 24 | color: inherit; 25 | text-decoration: none; 26 | cursor: pointer; 27 | position: relative; 28 | } 29 | 30 | header button::before { 31 | content: ""; 32 | position: absolute; 33 | bottom: 0; 34 | left: 0; 35 | width: 0; 36 | height: 2px; 37 | background-color: #000; /* Change color as needed */ 38 | transition: width 0.3s; 39 | } 40 | 41 | header button:hover::before { 42 | width: 100%; 43 | } 44 | 45 | /* to revert Link colors to default */ 46 | header a { 47 | text-decoration: none; 48 | color: inherit; 49 | margin: 0 0.75em; 50 | } 51 | -------------------------------------------------------------------------------- /SWE-agent/tests/test_data/trajectories/gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1/all_preds.jsonl: -------------------------------------------------------------------------------- 1 | {"model_name_or_path": "gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1", "instance_id": "pydicom__pydicom-1458", "model_patch": "\ndiff --git a/pydicom/pixel_data_handlers/numpy_handler.py b/pydicom/pixel_data_handlers/numpy_handler.py\nindex 8e8d319ae..6a1221b72 100644\n--- a/pydicom/pixel_data_handlers/numpy_handler.py\n+++ b/pydicom/pixel_data_handlers/numpy_handler.py\n@@ -285,9 +285,10 @@ def get_pixeldata(ds: \"Dataset\", read_only: bool = False) -> \"np.ndarray\":\n )\n \n required_elements = [\n- 'BitsAllocated', 'Rows', 'Columns', 'PixelRepresentation',\n- 'SamplesPerPixel', 'PhotometricInterpretation'\n+ 'BitsAllocated', 'Rows', 'Columns', 'SamplesPerPixel', 'PhotometricInterpretation'\n ]\n+ if 'PixelData' in ds:\n+ required_elements.append('PixelRepresentation')\n missing = [elem for elem in required_elements if elem not in ds]\n if missing:\n raise AttributeError(\n"} 2 | -------------------------------------------------------------------------------- /SWE-agent/.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | ci: 2 | autoupdate_commit_msg: "chore: update pre-commit hooks" 3 | 4 | repos: 5 | - repo: https://github.com/pre-commit/pre-commit-hooks 6 | rev: "v4.6.0" 7 | hooks: 8 | - id: check-added-large-files 9 | - id: check-case-conflict 10 | - id: check-merge-conflict 11 | - id: check-symlinks 12 | - id: mixed-line-ending 13 | - id: detect-private-key 14 | - id: check-ast 15 | 16 | - repo: https://github.com/crate-ci/typos 17 | rev: v1.20.7 18 | hooks: 19 | - id: typos 20 | files: \.(py|md|rst|yaml|toml) 21 | # empty to do not write fixes 22 | args: [] 23 | exclude: pyproject.toml 24 | 25 | - repo: https://github.com/astral-sh/ruff-pre-commit 26 | rev: v0.4.3 27 | hooks: 28 | # Run the linter. 29 | - id: ruff 30 | # Run the formatter. 31 | # - id: ruff-format 32 | 33 | - repo: https://github.com/pre-commit/mirrors-prettier 34 | rev: "" # Use the sha or tag you want to point at 35 | hooks: 36 | - id: prettier 37 | types_or: ["javascript", "css"] 38 | -------------------------------------------------------------------------------- /SWE-agent/evaluation/run_eval.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # The first positional argument 4 | predictions_path=$1 5 | 6 | # Check if predictions_path is not provided 7 | if [ -z "$predictions_path" ]; then 8 | echo "Usage: $0 [dataset_name_or_path] [results_dir] [testbed_dir]" 9 | exit 1 10 | fi 11 | 12 | # Default values for the optional arguments 13 | dataset_name_or_path="${2:-princeton-nlp/SWE-bench}" 14 | results_dir="${3:-results}" 15 | testbed_dir="${4:-testbed}" 16 | 17 | # If results or testbed directories do not exist, create them 18 | if [ ! -d "$results_dir" ]; then 19 | mkdir -p "$results_dir" 20 | echo "Created results directory at $results_dir" 21 | fi 22 | 23 | if [ ! -d "$testbed_dir" ]; then 24 | mkdir -p "$testbed_dir" 25 | echo "Created testbed directory at $testbed_dir" 26 | fi 27 | 28 | # Run the Python script with the specified arguments 29 | python evaluation.py \ 30 | --predictions_path "$predictions_path" \ 31 | --swe_bench_tasks "$dataset_name_or_path" \ 32 | --log_dir "$results_dir" \ 33 | --testbed "$testbed_dir" \ 34 | --skip_existing \ 35 | --timeout 900 \ 36 | --verbose 37 | -------------------------------------------------------------------------------- /SWE-agent/scripts/run_and_eval.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #this script runs and evaluates the agent N times. 3 | #to run: 4 | #bash run_and_eval.sh '' default_with_inclusive_edit_demo_v2 data/dev-easy/swe-bench-dev-easy-med.json 3 5 | # vars: suffix template data number of runs 6 | 7 | # define user variables 8 | suffix=${1:-''} 9 | template=$2 10 | dataset_path=$3 11 | num_runs=$4 12 | 13 | # extract filename from the dataset path 14 | dataset_name=`basename $dataset_path` 15 | 16 | for((i=1; i<=num_runs; i++)); do 17 | # command 1 18 | python run.py --model_name gpt4 --data_path $dataset_path --config_file config/configs/$template.yaml --suffix ${suffix}run${i} --temperature 0.2 --top_p 0.95 --per_instance_cost_limit 3.00 --install_environment 1 19 | 20 | # command 2 21 | python evaluation/evaluation.py \ 22 | --predictions_path trajectories/$USER/gpt4__${dataset_name}__$template__t-0.20__p-0.95__c-3.00__install-1__${suffix}run${i}/all_preds.jsonl \ 23 | --swe_bench_tasks $dataset_path \ 24 | --log_dir ./results \ 25 | --testbed ./testbed \ 26 | --skip_existing \ 27 | --timeout 900 \ 28 | --verbose 29 | done 30 | -------------------------------------------------------------------------------- /SWE-agent/inspector/icons/edit_icon.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | -------------------------------------------------------------------------------- /SWE-agent/sweagent/environment/retrieve_graph.py: -------------------------------------------------------------------------------- 1 | # retrieve code graph scripts 2 | 3 | import pickle 4 | import sys 5 | import json 6 | 7 | def main(func_name): 8 | with open('/graph.pkl', 'rb') as f: 9 | G = pickle.load(f) 10 | with open('/tags.json', 'r') as f: 11 | tags = f.readlines() 12 | tags = [json.loads(tag) for tag in tags] 13 | 14 | try: 15 | successors = list(G.successors(func_name)) 16 | predecessors = list(G.predecessors(func_name)) 17 | tags2names = {tag['name']: tag for tag in tags} 18 | returned_files = [] 19 | for item in successors+[func_name]+predecessors: 20 | if 'test' in tags2names[item]['fname']: 21 | continue 22 | returned_files.append({ 23 | "fname": tags2names[item]['fname'], 24 | 'line': tags2names[item]['line'], 25 | 'name': tags2names[item]['name'], 26 | 'kind': tags2names[item]['kind'], 27 | 'category': tags2names[item]['category'], 28 | 'info': tags2names[item]['info'], 29 | }) 30 | print(returned_files) 31 | except: 32 | print("None") 33 | 34 | if __name__ == '__main__': 35 | func_name = sys.argv[1] 36 | main(func_name) -------------------------------------------------------------------------------- /SWE-agent/docker/swe.Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:jammy 2 | 3 | ARG TARGETARCH 4 | 5 | # Install third party tools 6 | RUN apt-get update && \ 7 | apt-get install -y bash gcc git jq wget g++ make && \ 8 | apt-get clean && \ 9 | rm -rf /var/lib/apt/lists/* 10 | 11 | # Initialize git 12 | RUN git config --global user.email "sweagent@pnlp.org" 13 | RUN git config --global user.name "sweagent" 14 | 15 | # Environment variables 16 | ENV ROOT='/dev/' 17 | RUN prompt() { echo " > "; }; 18 | ENV PS1="> " 19 | 20 | # Create file for tracking edits, test patch 21 | RUN touch /root/files_to_edit.txt 22 | RUN touch /root/test.patch 23 | 24 | # add ls file indicator 25 | RUN echo "alias ls='ls -F'" >> /root/.bashrc 26 | 27 | # Install miniconda 28 | ENV PATH="/root/miniconda3/bin:${PATH}" 29 | ARG PATH="/root/miniconda3/bin:${PATH}" 30 | COPY docker/getconda.sh . 31 | RUN bash getconda.sh ${TARGETARCH} \ 32 | && rm getconda.sh \ 33 | && mkdir /root/.conda \ 34 | && bash miniconda.sh -b \ 35 | && rm -f miniconda.sh 36 | RUN conda --version \ 37 | && conda init bash \ 38 | && conda config --append channels conda-forge 39 | 40 | # Install python packages 41 | COPY docker/requirements.txt /root/requirements.txt 42 | RUN pip install -r /root/requirements.txt 43 | 44 | WORKDIR / 45 | 46 | CMD ["/bin/bash"] 47 | -------------------------------------------------------------------------------- /SWE-agent/sweagent/frontend/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "frontend", 3 | "version": "0.1.0", 4 | "private": true, 5 | "dependencies": { 6 | "@testing-library/jest-dom": "^5.17.0", 7 | "@testing-library/react": "^13.4.0", 8 | "@testing-library/user-event": "^13.5.0", 9 | "axios": "^1.6.8", 10 | "bootstrap": "^5.3.3", 11 | "pm2": "^5.3.1", 12 | "react": "^18.2.0", 13 | "react-bootstrap": "^2.10.2", 14 | "react-bootstrap-icons": "^1.11.4", 15 | "react-dom": "^18.2.0", 16 | "react-markdown": "^9.0.1", 17 | "react-router-dom": "^6.22.3", 18 | "react-scripts": "5.0.1", 19 | "react-syntax-highlighter": "^15.5.0", 20 | "socket.io-client": "^4.7.5", 21 | "web-vitals": "^2.1.4" 22 | }, 23 | "scripts": { 24 | "start": "react-scripts start", 25 | "build": "react-scripts build", 26 | "test": "react-scripts test", 27 | "eject": "react-scripts eject" 28 | }, 29 | "eslintConfig": { 30 | "extends": [ 31 | "react-app", 32 | "react-app/jest" 33 | ] 34 | }, 35 | "browserslist": { 36 | "production": [ 37 | ">0.2%", 38 | "not dead", 39 | "not op_mini all" 40 | ], 41 | "development": [ 42 | "last 1 chrome version", 43 | "last 1 firefox version", 44 | "last 1 safari version" 45 | ] 46 | }, 47 | "proxy": "http://localhost:8000" 48 | } 49 | -------------------------------------------------------------------------------- /SWE-agent/tests/conftest.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import sys 4 | from pathlib import Path 5 | 6 | from pytest import fixture 7 | import sweagent.environment.utils as env_utils 8 | 9 | # this is a hack and should be removed when we have a better solution 10 | _this_dir = Path(__file__).resolve().parent 11 | root_dir = _this_dir.parent 12 | package_dir = root_dir / "sweagent" 13 | sys.path.insert(0, str(root_dir)) 14 | sys.path.insert(1, str(package_dir)) 15 | os.environ["SWE_AGENT_EXPERIMENTAL_COMMUNICATE"] = "1" 16 | env_utils.START_UP_DELAY = 1 17 | 18 | 19 | @fixture 20 | def test_data_path() -> Path: 21 | p = _this_dir / "test_data" 22 | assert p.is_dir() 23 | return p 24 | 25 | @fixture 26 | def test_trajectories_path(test_data_path) -> Path: 27 | p = test_data_path / "trajectories" 28 | assert p.is_dir() 29 | return p 30 | 31 | 32 | @fixture 33 | def test_data_sources_path(test_data_path) -> Path: 34 | p = test_data_path / "data_sources" 35 | assert p.is_dir() 36 | return p 37 | 38 | @fixture 39 | def test_trajectory_path(test_trajectories_path) -> Path: 40 | traj = test_trajectories_path / "gpt4__klieret__swe-agent-test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1" / "klieret__swe-agent-test-repo-i1.traj" 41 | assert traj.exists() 42 | return traj 43 | 44 | @fixture 45 | def test_trajectory(test_trajectory_path): 46 | return json.loads(test_trajectory_path.read_text()) 47 | -------------------------------------------------------------------------------- /SWE-agent/sweagent/frontend/src/components/panels/AgentFeed.js: -------------------------------------------------------------------------------- 1 | import React, { useEffect } from "react"; 2 | import Message from "../AgentMessage"; 3 | import MacBar from "../MacBar"; 4 | import editorLogo from "../../assets/panel_icons/editor.png"; 5 | import "../../static/agentFeed.css"; 6 | 7 | function useScrollToBottom(feed, ref) { 8 | useEffect(() => { 9 | if (ref.current) { 10 | ref.current.scrollTop = ref.current.scrollHeight; 11 | } 12 | }, [feed, ref]); 13 | } 14 | 15 | const AgentFeed = ({ 16 | feed, 17 | highlightedStep, 18 | handleMouseEnter, 19 | handleMouseLeave, 20 | selfRef, 21 | }) => { 22 | useScrollToBottom(feed, selfRef); 23 | 24 | return ( 25 |
26 | 27 |
28 |
29 | {feed.map((item, index) => ( 30 | 40 | ))} 41 |
42 |
43 |
44 |
45 | ); 46 | }; 47 | 48 | export default AgentFeed; 49 | -------------------------------------------------------------------------------- /SWE-agent/sweagent/frontend/src/components/panels/EnvFeed.js: -------------------------------------------------------------------------------- 1 | import React, { useEffect } from "react"; 2 | import EnvMessage from "../EnvMessage"; 3 | 4 | import MacBar from "../MacBar"; 5 | import terminalLogo from "../../assets/panel_icons/terminal.png"; 6 | import "../../static/envFeed.css"; 7 | 8 | function useScrollToBottom(feed, ref) { 9 | useEffect(() => { 10 | if (ref.current) { 11 | ref.current.scrollTop = ref.current.scrollHeight; 12 | } 13 | }, [feed, ref]); 14 | } 15 | 16 | const EnvFeed = ({ 17 | feed, 18 | highlightedStep, 19 | handleMouseEnter, 20 | handleMouseLeave, 21 | selfRef, 22 | }) => { 23 | useScrollToBottom(feed, selfRef); 24 | 25 | return ( 26 |
27 | 28 |
29 |
30 | {feed.map((item, index) => ( 31 | 41 | ))} 42 |
43 |
44 |
45 |
46 | ); 47 | }; 48 | 49 | export default EnvFeed; 50 | -------------------------------------------------------------------------------- /SWE-agent/sweagent/frontend/src/components/utils/icons/ExpandIcon.js: -------------------------------------------------------------------------------- 1 | import React from "react"; 2 | 3 | const ExpandIcon = ({ fillColor, height, style }) => ( 4 | 11 | 12 | 19 | 26 | 27 | 28 | ); 29 | 30 | // Set default props 31 | ExpandIcon.defaultProps = { 32 | fillColor: "#000000", 33 | height: "24px", 34 | style: {}, 35 | }; 36 | 37 | export default ExpandIcon; 38 | -------------------------------------------------------------------------------- /repograph/graph_searcher.py: -------------------------------------------------------------------------------- 1 | import networkx as nx 2 | 3 | class RepoSearcher: 4 | def __init__(self, graph): 5 | self.graph = graph 6 | 7 | def one_hop_neighbors(self, query): 8 | # get one-hop neighbors from networkx graph 9 | return list(self.graph.neighbors(query)) 10 | 11 | def two_hop_neighbors(self, query): 12 | # get two-hop neighbors from networkx graph 13 | one_hop = self.one_hop_neighbors(query) 14 | two_hop = [] 15 | for node in one_hop: 16 | two_hop.extend(self.one_hop_neighbors(node)) 17 | return list(set(two_hop)) 18 | 19 | def dfs(self, query, depth): 20 | # perform depth-first search on networkx graph 21 | visited = [] 22 | stack = [(query, 0)] 23 | while stack: 24 | node, level = stack.pop() 25 | if node not in visited: 26 | visited.append(node) 27 | if level < depth: 28 | stack.extend( 29 | [(n, level + 1) for n in self.one_hop_neighbors(node)] 30 | ) 31 | return visited 32 | 33 | def bfs(self, query, depth): 34 | # perform breadth-first search on networkx graph 35 | visited = [] 36 | queue = [(query, 0)] 37 | while queue: 38 | node, level = queue.pop(0) 39 | if node not in visited: 40 | visited.append(node) 41 | if level < depth: 42 | queue.extend( 43 | [(n, level + 1) for n in self.one_hop_neighbors(node)] 44 | ) 45 | return visited -------------------------------------------------------------------------------- /agentless/util/utils.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | import pandas as pd 4 | 5 | 6 | def load_jsonl(filepath): 7 | """ 8 | Load a JSONL file from the given filepath. 9 | 10 | Arguments: 11 | filepath -- the path to the JSONL file to load 12 | 13 | Returns: 14 | A list of dictionaries representing the data in each line of the JSONL file. 15 | """ 16 | with open(filepath, "r") as file: 17 | return [json.loads(line) for line in file] 18 | 19 | 20 | def write_jsonl(data, filepath): 21 | """ 22 | Write data to a JSONL file at the given filepath. 23 | 24 | Arguments: 25 | data -- a list of dictionaries to write to the JSONL file 26 | filepath -- the path to the JSONL file to write 27 | """ 28 | with open(filepath, "w") as file: 29 | for entry in data: 30 | file.write(json.dumps(entry) + "\n") 31 | 32 | 33 | def load_json(filepath): 34 | return json.load(open(filepath, "r")) 35 | 36 | 37 | def combine_by_instance_id(data): 38 | """ 39 | Combine data entries by their instance ID. 40 | 41 | Arguments: 42 | data -- a list of dictionaries with instance IDs and other information 43 | 44 | Returns: 45 | A list of combined dictionaries by instance ID with all associated data. 46 | """ 47 | combined_data = defaultdict(lambda: defaultdict(list)) 48 | for item in data: 49 | instance_id = item.get("instance_id") 50 | if not instance_id: 51 | continue 52 | for key, value in item.items(): 53 | if key != "instance_id": 54 | combined_data[instance_id][key].extend( 55 | value if isinstance(value, list) else [value] 56 | ) 57 | return [ 58 | {**{"instance_id": iid}, **details} for iid, details in combined_data.items() 59 | ] 60 | -------------------------------------------------------------------------------- /SWE-agent/make_demos/README.md: -------------------------------------------------------------------------------- 1 | # Make demos 2 | An important way to show LMs how to use commands and interact with the environment is through providing a demonstration - which is basically a completed trajectory that the LM can learn from. 3 | 4 | For simplicity we just ingest demonstrations in the from of a trajectory file. However, since trajectory files are usually JSON, you can convert them to yaml using the `convert_traj_to_demo.py` script to be more human-readable and easier to edit. 5 | 6 | Demo (yaml) files are stored in the `make_demos/demos` directory by default and consist primarily of the sequence of actions that an LM would need to take to complete a task. It's important that your demo have the proper format to be parsed by SWE-agent and your config. 7 | 8 | Here's how you can make a demo: 9 | 1. Find a basic trajectory that you already like and want to use as the basis for your demo. 10 | - For instance, consider the `trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__default_sys-env_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj` trajectory for reference. 11 | 2. Run `python convert_traj_to_demo.py ` to convert the trajectory to a demo. 12 | - This demo will be saved as a readable yaml file in the `make_demos/demos` directory. 13 | 3. Edit the demo by hand to make it work for your particular use case and configuration. 14 | 4. Run `python run_replay.py --traj_path --config_file ` to execute the actions of the demo, have the system generate the execution output, and ensure that it works as expected. 15 | - Protip: you can use `sweagent.run` with `--model_name human` to manually execute actions in the environment - useful for debugging. 16 | 5. Inspect the resulting trajectory to ensure it was executed correctly. -------------------------------------------------------------------------------- /SWE-agent/sweagent/frontend/src/components/panels/LogPanel.js: -------------------------------------------------------------------------------- 1 | import MacBar from "../MacBar"; 2 | import workspaceLogo from "../../assets/panel_icons/workspace.png"; 3 | import "../../static/logPanel.css"; 4 | import { Button } from "react-bootstrap"; 5 | import { Clipboard } from "react-bootstrap-icons"; 6 | 7 | const LogPanel = ({ logs, logsRef, isComputing }) => { 8 | const copyToClipboard = (text) => { 9 | // Create a temporary textarea element 10 | const textarea = document.createElement("textarea"); 11 | textarea.value = text; 12 | document.body.appendChild(textarea); 13 | 14 | // Select and copy the text 15 | textarea.select(); 16 | document.execCommand("copy"); 17 | 18 | // Clean up 19 | document.body.removeChild(textarea); 20 | }; 21 | 22 | const handleCopy = () => { 23 | const contentToCopy = document.getElementById("logContent").innerText; 24 | copyToClipboard(contentToCopy); 25 | }; 26 | 27 | return ( 28 |
29 | 30 |
31 |
32 |
{logs}
33 |
34 |
35 | {!isComputing && logs && ( 36 |
39 | 46 |
47 | )} 48 |
49 |
50 | ); 51 | }; 52 | 53 | export default LogPanel; 54 | -------------------------------------------------------------------------------- /SWE-agent/sweagent/frontend/public/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 12 | 13 | 17 | 18 | 27 | React App 28 | 29 | 30 | 31 |
32 | 42 | 43 | 44 | -------------------------------------------------------------------------------- /SWE-agent/tests/test_models.py: -------------------------------------------------------------------------------- 1 | from unittest.mock import MagicMock, Mock, patch 2 | from sweagent.agent.models import OpenAIModel, ModelArguments, TogetherModel 3 | import pytest 4 | 5 | 6 | @pytest.fixture 7 | def openai_mock_client(): 8 | model = Mock() 9 | response = Mock() 10 | choice = Mock() 11 | choice.message.content = "test" 12 | response.choices = [choice] 13 | response.usage.prompt_tokens = 10 14 | response.usage.completion_tokens = 10 15 | model.chat.completions.create = MagicMock(return_value=response) 16 | 17 | return model 18 | 19 | @pytest.fixture 20 | def mock_together_response(): 21 | return { 22 | "choices": [{"text": "Hello"}], 23 | "usage": {"prompt_tokens": 10, "completion_tokens": 10}, 24 | } 25 | 26 | 27 | TEST_HISTORY = [ 28 | { 29 | "role": "system", 30 | "content": "Hello, how are you?" 31 | } 32 | ] 33 | 34 | 35 | def test_openai_model(openai_mock_client): 36 | for model_name in list(OpenAIModel.MODELS) + list(OpenAIModel.SHORTCUTS): 37 | TEST_MODEL_ARGUMENTS = ModelArguments(model_name) 38 | with patch("sweagent.agent.models.config.Config"), patch("sweagent.agent.models.OpenAI"): 39 | model = OpenAIModel(TEST_MODEL_ARGUMENTS, []) 40 | model.client = openai_mock_client 41 | model.query(TEST_HISTORY) 42 | 43 | 44 | @pytest.mark.parametrize("model_name", list(TogetherModel.MODELS) + list(TogetherModel.SHORTCUTS)) 45 | def test_together_model(mock_together_response, model_name): 46 | with patch("sweagent.agent.models.config.Config"), \ 47 | patch("sweagent.agent.models.together") as mock_together: 48 | mock_together.version = '1.1.0' 49 | mock_together.Complete.create.return_value = mock_together_response 50 | 51 | model_args = ModelArguments(model_name) 52 | model = TogetherModel(model_args, []) 53 | model.query(TEST_HISTORY) 54 | -------------------------------------------------------------------------------- /SWE-agent/trajectories/README.md: -------------------------------------------------------------------------------- 1 | # Trajectories 2 | 3 | The `trajectories/` folder is the default location that experiment results (invocations of `run.py`) will be written to. 4 | 5 | At a high level, the experiments folder is organized in the following manner: 6 | ``` 7 | trajectories 8 | ├── 👩‍💻 9 | │ ├── 🧪 10 | │ │ ├── all_preds.jsonl 11 | │ │ ├── args.yaml 12 | │ │ ├── *.html (Webpage Files) 13 | │ │ └── *.traj (Trajectories) 14 | │ └── 🧪 15 | │ ├── all_preds.jsonl 16 | │ ├── args.yaml 17 | │ ├── *.html (Webpage Files) 18 | │ └── *.traj (Trajectories) 19 | ├── 👨‍💻 20 | │ ├── 🧪 21 | │ │ └── ... 22 | │ └── 🧪 23 | │ └── ... 24 | ... 25 | ``` 26 | Where every experiment follows the pattern `trajectories//`. The `` is automatically inferred from your system, and the `experiment name` is inferred from the arguments of the `run.py`. 27 | 28 | ## How an Experiment Folder is Generated 29 | 30 | Each call to `run.py` produces a single `trajectories//` folder containing the following assets: 31 | * `all_preds.jsonl`: A single file containing all of the predictions generated for the experiment (1 prediction per task instance), where each line is formatted as: 32 | ``` 33 | { 34 | "instance_id": "", 35 | "model_patch": "<.patch file content string>", 36 | "model_name_or_path": "", 37 | } 38 | ``` 39 | * `args.yaml`: A summary of the configurations for the experiment run. 40 | * `.traj`: A `.json` formatted file containing the (thought, action, observation) turns generated by SWE-agent towards solving ``. 41 | * `.html`: An `.html` single webpage render of the trajectory, which can be directly opened in the browser for easier viewing of the trajectory. 42 | 43 | > ⚠️ Notes 44 | > * Evaluation is not completed by `run.py`, it is a separate step. 45 | > * `all_preds.jsonl` can be referenced directly into `evaluation/run_eval.sh` to run evaluation. -------------------------------------------------------------------------------- /SWE-agent/sweagent/frontend/src/static/run.css: -------------------------------------------------------------------------------- 1 | .App { 2 | text-align: center; 3 | } 4 | 5 | .App-logo { 6 | height: 40vmin; 7 | pointer-events: none; 8 | } 9 | 10 | @media (prefers-reduced-motion: no-preference) { 11 | .App-logo { 12 | animation: App-logo-spin infinite 20s linear; 13 | } 14 | } 15 | 16 | .App-header { 17 | background-color: #282c34; 18 | min-height: 100vh; 19 | display: flex; 20 | flex-direction: column; 21 | align-items: center; 22 | justify-content: center; 23 | font-size: calc(10px + 2vmin); 24 | color: white; 25 | } 26 | 27 | .App-link { 28 | color: #61dafb; 29 | } 30 | 31 | @keyframes App-logo-spin { 32 | from { 33 | transform: rotate(0deg); 34 | } 35 | to { 36 | transform: rotate(360deg); 37 | } 38 | } 39 | 40 | /* .message { 41 | border: 1px solid #ccc; 42 | padding: 10px; 43 | margin-bottom: 10px; 44 | border-radius: 5px; 45 | } 46 | 47 | #container { 48 | display: flex; 49 | justify-content: space-between; 50 | padding: 10px; 51 | } 52 | 53 | #agentfeed, #environmentfeed { 54 | flex: 1; 55 | margin: 10px; 56 | max-width: 50%; 57 | overflow-y: auto; 58 | height: 500px; 59 | border: 1px solid #ccc; 60 | padding: 10px; 61 | border-radius: 5px; 62 | } */ 63 | 64 | #demo hr { 65 | border-top: 1px dotted #bbb; 66 | margin: 0.5em 0; 67 | width: 100%; 68 | } 69 | 70 | #demo .panels { 71 | display: grid; 72 | height: 65vh; 73 | grid-template-columns: 1fr 1fr 1fr 1fr 1fr; 74 | grid-template-rows: 1fr 2fr 1fr 1fr; 75 | 76 | .agentFeed, 77 | .envFeed, 78 | .logPanel { 79 | background-color: transparent; 80 | display: flex; 81 | flex-direction: column; 82 | height: 100%; 83 | margin: 0.2em; 84 | overflow: auto; 85 | } 86 | 87 | .envFeed { 88 | grid-column: 3 / 6; 89 | grid-row: 1 / 3; 90 | height: 95%; 91 | } 92 | 93 | .innerDiv { 94 | flex-grow: 1; 95 | } 96 | 97 | .scrollableDiv { 98 | height: 100%; 99 | overflow-y: hidden; 100 | background-color: white; 101 | border-radius: 0 0 0.5em 0.5em; 102 | } 103 | 104 | .scrollableDiv:hover { 105 | overflow: auto; 106 | } 107 | 108 | pre { 109 | white-space: pre-wrap; 110 | overflow-x: auto; 111 | } 112 | } 113 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # macOS files 2 | .DS_Store 3 | results/ 4 | 5 | # Byte-compiled / optimized / DLL files 6 | __pycache__/ 7 | *.py[cod] 8 | *$py.class 9 | 10 | # C extensions 11 | *.so 12 | 13 | # Distribution / packaging 14 | .Python 15 | build/ 16 | develop-eggs/ 17 | dist/ 18 | downloads/ 19 | eggs/ 20 | .eggs/ 21 | lib/ 22 | lib64/ 23 | parts/ 24 | sdist/ 25 | var/ 26 | wheels/ 27 | pip-wheel-metadata/ 28 | share/python-wheels/ 29 | *.egg-info/ 30 | .installed.cfg 31 | *.egg 32 | MANIFEST 33 | 34 | # PyInstaller 35 | # Usually these files are written by a python script from a template 36 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 37 | *.manifest 38 | *.spec 39 | 40 | # Installer logs 41 | pip-log.txt 42 | pip-delete-this-directory.txt 43 | 44 | # Unit test / coverage reports 45 | htmlcov/ 46 | .tox/ 47 | .nox/ 48 | .coverage 49 | .coverage.* 50 | .cache 51 | nosetests.xml 52 | coverage.xml 53 | *.cover 54 | *.py,cover 55 | .hypothesis/ 56 | .pytest_cache/ 57 | 58 | # Translations 59 | *.mo 60 | *.pot 61 | 62 | # Django stuff: 63 | *.log 64 | local_settings.py 65 | db.sqlite3 66 | db.sqlite3-journal 67 | 68 | # Flask stuff: 69 | instance/ 70 | .webassets-cache 71 | 72 | # Scrapy stuff: 73 | .scrapy 74 | 75 | # Sphinx documentation 76 | docs/_build/ 77 | 78 | # PyBuilder 79 | target/ 80 | 81 | # Jupyter Notebook 82 | .ipynb_checkpoints 83 | 84 | # IPython 85 | profile_default/ 86 | ipython_config.py 87 | 88 | # pyenv 89 | .python-version 90 | 91 | # pipenv 92 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 93 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 94 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 95 | # install all needed dependencies. 96 | #Pipfile.lock 97 | 98 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 99 | __pypackages__/ 100 | 101 | # Celery stuff 102 | celerybeat-schedule 103 | celerybeat.pid 104 | 105 | # SageMath parsed files 106 | *.sage.py 107 | 108 | # Environments 109 | .env 110 | .venv 111 | env/ 112 | venv/ 113 | ENV/ 114 | env.bak/ 115 | venv.bak/ 116 | 117 | # Spyder project settings 118 | .spyderproject 119 | .spyproject 120 | 121 | # Rope project settings 122 | .ropeproject 123 | 124 | # mkdocs documentation 125 | /site 126 | 127 | # mypy 128 | .mypy_cache/ 129 | .dmypy.json 130 | dmypy.json 131 | 132 | # Pyre type checker 133 | .pyre/ 134 | *.xml 135 | *.gif -------------------------------------------------------------------------------- /SWE-agent/sweagent/frontend/src/logo.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # RepoGraph: Enhancing AI Software Engineering with Repository-level Code Graph 2 | 3 | ## 📜 Overview 4 | 5 | We introduce RepoGraph, an effective plug-in repo-level module that offers the desired context and substantially boosts the LLMs' AI software engineering capability. 6 | 7 | ## 🆕 News 8 | 9 | We released the first version RepoGraph and its integration with [SWE-bench](https://www.swebench.com/) methods! 10 | 11 | ## 🤖 Code Setup 12 | 13 | ### Foder and files 14 | 15 | `repograph` contains the code for construct and retrieve related context from the graph. 16 | 17 | `agentless` and `SWE-agent` incorporates the integrated version of RepoGraph with the two methods. 18 | 19 | Currently this version may take a little long time to run for a repo. We provide a cached version for all repos in SWE-bench, download it from [huggingface datasets](https://huggingface.co/datasets/MrZilinXiao/RepoGraph) or [Google Drive](https://drive.google.com/file/d/1-0d-OgGoOf3i54bWcf8H0egjQyTSZ8dG/view?usp=sharing) and put it under `repo_structures`. 20 | 21 | ### How to run? 22 | 23 | To generate the repograph for a given repository, simply run: 24 | 25 | ```bash 26 | python ./repograph/construct_graph.py 27 | ``` 28 | 29 | This will produce two files, `tags_{instance_id}.jsonl` stores the line-level information and `{instance_id}.pkl` is the graph constructed using networkx. 30 | 31 | ## Integration with models on SWE-bench 32 | 33 | ### Procedural framework 34 | 35 | For a procedural framework, RepoGraph could be integrated into every step of the pipeline. Refer to `--repo_graph` hyperparameter for controllability in different stages. 36 | 37 | To run RepoGraph with Agentless, use command: 38 | 39 | ```bash 40 | bash run_repograph_agentless.sh 41 | ``` 42 | 43 | ### Agent framework 44 | 45 | To integrate RepoGraph with agent framework such as SWE-agent, we simply add an extra action in its initial action space. Specifically, you can look up for `search_repo()` in corresponding dir. The signature is defined as: 46 | 47 | ```python 48 | search_repo: 49 | docstring: searches in the current repository with a specific function or class, and returns the def and ref relations for the search term. 50 | signature: search_repo 51 | arguments: 52 | - search_term (string) [required]: function or class to look for in the repository. 53 | ``` 54 | 55 | To run RepoGraph with SWE-agent, use command: 56 | 57 | ```bash 58 | bash run_repograph_sweagent.sh 59 | ``` 60 | 61 | We are working on prepreints for details in RepoGraph and a more comprehensive/easy integration with exsiting models. Stay tuned!! 62 | -------------------------------------------------------------------------------- /agentless/get_repo_structure/get_patch_info.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import json 3 | import re 4 | from collections import defaultdict 5 | 6 | 7 | def parse_patch(patch): 8 | """ 9 | Parse a git patch into a structured format. 10 | 11 | Parameters: 12 | patch (str): The git patch as a string. 13 | 14 | Returns: 15 | list: A list of dictionaries representing the file changes and hunks. 16 | """ 17 | file_changes = [] 18 | current_file = None 19 | current_hunk = None 20 | deleted_lines = 0 21 | 22 | patch_lines = patch.split("\n") 23 | for line in patch_lines: 24 | if line.startswith("diff --git"): 25 | # Reset for new files 26 | if current_file: 27 | file_changes.append(current_file) 28 | current_file = {"file": "", "hunks": []} 29 | elif line.startswith("--- a/"): 30 | pass 31 | elif line.startswith("+++ b/"): 32 | if current_file is not None: 33 | current_file["file"] = line[6:] 34 | elif line.startswith("@@ "): 35 | if current_file is not None: 36 | match = re.match(r"@@ -(\d+)(?:,\d+)? \+(\d+)(?:,\d+)? @@", line) 37 | if match: 38 | current_hunk = {"start_line": int(match.group(2)), "changes": []} 39 | current_file["hunks"].append(current_hunk) 40 | deleted_lines = 0 41 | added_lines = 0 42 | elif line.startswith("+") or line.startswith("-"): 43 | if current_hunk is not None: 44 | change_type = "add" if line.startswith("+") else "delete" 45 | if change_type == "delete": 46 | deleted_lines += 1 47 | current_hunk["changes"].append( 48 | { 49 | "type": change_type, 50 | "content": line[1:].strip(), 51 | "line": current_hunk["start_line"] - added_lines, 52 | } 53 | ) 54 | current_hunk["start_line"] += 1 55 | else: 56 | added_lines += 1 57 | current_hunk["changes"].append( 58 | { 59 | "type": change_type, 60 | "content": line[1:].strip(), 61 | "line": current_hunk["start_line"] - deleted_lines, 62 | } 63 | ) 64 | current_hunk["start_line"] += 1 65 | else: 66 | if current_hunk is not None: 67 | current_hunk["start_line"] += 1 68 | 69 | if current_file: 70 | file_changes.append(current_file) 71 | 72 | return file_changes 73 | -------------------------------------------------------------------------------- /SWE-agent/sweagent/frontend/src/components/EnvMessage.js: -------------------------------------------------------------------------------- 1 | import React from "react"; 2 | 3 | import "../static/message.css"; 4 | import "../static/envMessage.css"; 5 | 6 | import { Prism as SyntaxHighlighter } from "react-syntax-highlighter"; 7 | import bash from "react-syntax-highlighter/dist/esm/languages/prism/bash"; 8 | import { prism } from "react-syntax-highlighter/dist/esm/styles/prism"; 9 | 10 | // SyntaxHighlighter.registerLanguage('bash', bash); 11 | 12 | function capitalizeFirstLetter(str) { 13 | return str[0].toUpperCase() + str.slice(1); 14 | } 15 | 16 | const EnvMessage = ({ 17 | item, 18 | handleMouseEnter, 19 | handleMouseLeave, 20 | isHighlighted, 21 | feedRef, 22 | }) => { 23 | const stepClass = item.step !== null ? `step${item.step}` : ""; 24 | const highlightClass = isHighlighted ? "highlight" : ""; 25 | const messageTypeClass = "envMessage" + capitalizeFirstLetter(item.type); 26 | 27 | const paddingBottom = item.type === "command" ? "0" : "0.5em"; 28 | const paddingTop = ["output", "diff"].includes(item.type) ? "0" : "0.5em"; 29 | 30 | const customStyle = { 31 | margin: 0, 32 | padding: `${paddingTop} 0.5em ${paddingBottom} 0.5em`, 33 | overflowX: "hidden", 34 | overflowY: "hidden", 35 | lineHeight: "100%", 36 | backgroundColor: "transparent", 37 | fontSize: "93%", 38 | }; 39 | 40 | const codeTagProps = { 41 | style: { 42 | boxShadow: "none", 43 | margin: "0", 44 | overflowY: "hidden", 45 | overflowX: "hidden", 46 | padding: "0", 47 | lineHeight: "inherit", 48 | fontSize: "93%", 49 | }, 50 | }; 51 | 52 | const typeToLanguage = { 53 | command: "bash", 54 | output: "markdown", 55 | diff: "diff", 56 | }; 57 | 58 | if (item.format !== "text") { 59 | return ( 60 |
handleMouseEnter(item, feedRef)} 63 | onMouseLeave={handleMouseLeave} 64 | > 65 | 74 | {item.message} 75 | 76 |
77 | ); 78 | } else { 79 | return ( 80 |
handleMouseEnter(item, feedRef)} 83 | > 84 | {item.message} 85 |
86 | ); 87 | } 88 | }; 89 | 90 | export default EnvMessage; 91 | -------------------------------------------------------------------------------- /agentless/util/parse_global_var.py: -------------------------------------------------------------------------------- 1 | # TODO: maybe merge this into the structure preprocessing. 2 | import libcst as cst 3 | import libcst.matchers as m 4 | from libcst.display import dump 5 | 6 | 7 | class GlobalVariableVisitor(cst.CSTVisitor): 8 | METADATA_DEPENDENCIES = (cst.metadata.PositionProvider,) 9 | 10 | def __init__(self): 11 | self.global_assigns = [] 12 | 13 | def leave_Module(self, original_node: cst.Module) -> list: 14 | assigns = [] 15 | for stmt in original_node.body: 16 | if m.matches(stmt, m.SimpleStatementLine()) and m.matches( 17 | stmt.body[0], m.Assign() 18 | ): 19 | start_pos = self.get_metadata(cst.metadata.PositionProvider, stmt).start 20 | end_pos = self.get_metadata(cst.metadata.PositionProvider, stmt).end 21 | assigns.append([stmt, start_pos, end_pos]) 22 | self.global_assigns.extend(assigns) 23 | 24 | 25 | def parse_global_var_from_code(file_content: str) -> dict[str, dict]: 26 | """Parse global variables.""" 27 | try: 28 | tree = cst.parse_module(file_content) 29 | except: 30 | return file_content 31 | 32 | wrapper = cst.metadata.MetadataWrapper(tree) 33 | visitor = GlobalVariableVisitor() 34 | wrapper.visit(visitor) 35 | 36 | global_assigns = {} 37 | for assign_stmt, start_pos, end_pos in visitor.global_assigns: 38 | for t in assign_stmt.body: 39 | try: 40 | targets = [t.targets[0].target.value] 41 | except: 42 | try: 43 | targets = t.targets[0].target.elements 44 | targets = [x.value.value for x in targets] 45 | except: 46 | targets = [] 47 | for target_var in targets: 48 | global_assigns[target_var] = { 49 | "start_line": start_pos.line, 50 | "end_line": end_pos.line, 51 | } 52 | return global_assigns 53 | 54 | 55 | def test_parse_global_var_from_file(): 56 | code = """ 57 | \"\"\" 58 | this is a module 59 | ... 60 | \"\"\" 61 | const_var = {1,2,3} 62 | const_dict = { 63 | 'day': 'Monday', 64 | 'month': 'January', 65 | } 66 | a, b = 1, 2 67 | import os 68 | 69 | class fooClass: 70 | '''this is a class''' 71 | 72 | def __init__(self, x): 73 | '''initialization.''' 74 | self.x = x 75 | 76 | def print(self): 77 | print(self.x) 78 | 79 | def test(): 80 | a = fooClass(3) 81 | a.print() 82 | 83 | """ 84 | res = parse_global_var_from_code(code) 85 | assert res == { 86 | "const_var": {"start_line": 6, "end_line": 6}, 87 | "const_dict": {"start_line": 7, "end_line": 10}, 88 | "a": {"start_line": 11, "end_line": 11}, 89 | "b": {"start_line": 11, "end_line": 11}, 90 | } 91 | 92 | 93 | if __name__ == "__main__": 94 | test_parse_global_var_from_file() 95 | -------------------------------------------------------------------------------- /SWE-agent/sweagent/frontend/src/static/font.css: -------------------------------------------------------------------------------- 1 | @font-face { 2 | font-family: "basic-sans"; 3 | src: 4 | url("https://use.typekit.net/af/8883dd/00000000000000007735abe1/30/l?primer=7cdcb44be4a7db8877ffa5c0007b8dd865b3bbc383831fe2ea177f62257a9191&fvd=i6&v=3") 5 | format("woff2"), 6 | url("https://use.typekit.net/af/8883dd/00000000000000007735abe1/30/d?primer=7cdcb44be4a7db8877ffa5c0007b8dd865b3bbc383831fe2ea177f62257a9191&fvd=i6&v=3") 7 | format("woff"), 8 | url("https://use.typekit.net/af/8883dd/00000000000000007735abe1/30/a?primer=7cdcb44be4a7db8877ffa5c0007b8dd865b3bbc383831fe2ea177f62257a9191&fvd=i6&v=3") 9 | format("opentype"); 10 | font-display: auto; 11 | font-style: italic; 12 | font-weight: 600; 13 | font-stretch: normal; 14 | } 15 | 16 | @font-face { 17 | font-family: "basic-sans"; 18 | src: 19 | url("https://use.typekit.net/af/c68f64/00000000000000007735abe6/30/l?primer=7cdcb44be4a7db8877ffa5c0007b8dd865b3bbc383831fe2ea177f62257a9191&fvd=n6&v=3") 20 | format("woff2"), 21 | url("https://use.typekit.net/af/c68f64/00000000000000007735abe6/30/d?primer=7cdcb44be4a7db8877ffa5c0007b8dd865b3bbc383831fe2ea177f62257a9191&fvd=n6&v=3") 22 | format("woff"), 23 | url("https://use.typekit.net/af/c68f64/00000000000000007735abe6/30/a?primer=7cdcb44be4a7db8877ffa5c0007b8dd865b3bbc383831fe2ea177f62257a9191&fvd=n6&v=3") 24 | format("opentype"); 25 | font-display: auto; 26 | font-style: normal; 27 | font-weight: 600; 28 | font-stretch: normal; 29 | } 30 | 31 | @font-face { 32 | font-family: "basic-sans"; 33 | src: 34 | url("https://use.typekit.net/af/49252d/00000000000000007735abed/30/l?primer=7cdcb44be4a7db8877ffa5c0007b8dd865b3bbc383831fe2ea177f62257a9191&fvd=i4&v=3") 35 | format("woff2"), 36 | url("https://use.typekit.net/af/49252d/00000000000000007735abed/30/d?primer=7cdcb44be4a7db8877ffa5c0007b8dd865b3bbc383831fe2ea177f62257a9191&fvd=i4&v=3") 37 | format("woff"), 38 | url("https://use.typekit.net/af/49252d/00000000000000007735abed/30/a?primer=7cdcb44be4a7db8877ffa5c0007b8dd865b3bbc383831fe2ea177f62257a9191&fvd=i4&v=3") 39 | format("opentype"); 40 | font-display: auto; 41 | font-style: italic; 42 | font-weight: 400; 43 | font-stretch: normal; 44 | } 45 | 46 | @font-face { 47 | font-family: "basic-sans"; 48 | src: 49 | url("https://use.typekit.net/af/721f9c/00000000000000007735abf2/30/l?primer=7cdcb44be4a7db8877ffa5c0007b8dd865b3bbc383831fe2ea177f62257a9191&fvd=n4&v=3") 50 | format("woff2"), 51 | url("https://use.typekit.net/af/721f9c/00000000000000007735abf2/30/d?primer=7cdcb44be4a7db8877ffa5c0007b8dd865b3bbc383831fe2ea177f62257a9191&fvd=n4&v=3") 52 | format("woff"), 53 | url("https://use.typekit.net/af/721f9c/00000000000000007735abf2/30/a?primer=7cdcb44be4a7db8877ffa5c0007b8dd865b3bbc383831fe2ea177f62257a9191&fvd=n4&v=3") 54 | format("opentype"); 55 | font-display: auto; 56 | font-style: normal; 57 | font-weight: 400; 58 | font-stretch: normal; 59 | } 60 | -------------------------------------------------------------------------------- /agentless/util/compress_file.py: -------------------------------------------------------------------------------- 1 | import libcst as cst 2 | import libcst.matchers as m 3 | 4 | 5 | class CompressTransformer(cst.CSTTransformer): 6 | DESCRIPTION = str = "Replaces function body with ..." 7 | replacement_string = '"$$FUNC_BODY_REPLACEMENT_STRING$$"' 8 | 9 | def __init__(self, keep_constant=True): 10 | self.keep_constant = keep_constant 11 | 12 | def leave_Module( 13 | self, original_node: cst.Module, updated_node: cst.Module 14 | ) -> cst.Module: 15 | new_body = [ 16 | stmt 17 | for stmt in updated_node.body 18 | if m.matches(stmt, m.ClassDef()) 19 | or m.matches(stmt, m.FunctionDef()) 20 | or ( 21 | self.keep_constant 22 | and m.matches(stmt, m.SimpleStatementLine()) 23 | and m.matches(stmt.body[0], m.Assign()) 24 | ) 25 | ] 26 | return updated_node.with_changes(body=new_body) 27 | 28 | def leave_ClassDef( 29 | self, original_node: cst.ClassDef, updated_node: cst.ClassDef 30 | ) -> cst.ClassDef: 31 | # Remove docstring in the class body 32 | new_body = [ 33 | stmt 34 | for stmt in updated_node.body.body 35 | if not ( 36 | m.matches(stmt, m.SimpleStatementLine()) 37 | and m.matches(stmt.body[0], m.Expr()) 38 | and m.matches(stmt.body[0].value, m.SimpleString()) 39 | ) 40 | ] 41 | return updated_node.with_changes(body=cst.IndentedBlock(body=new_body)) 42 | 43 | def leave_FunctionDef( 44 | self, original_node: cst.FunctionDef, updated_node: cst.FunctionDef 45 | ) -> cst.CSTNode: 46 | new_expr = cst.Expr(value=cst.SimpleString(value=self.replacement_string)) 47 | new_body = cst.IndentedBlock((new_expr,)) 48 | # another way: replace with pass? 49 | return updated_node.with_changes(body=new_body) 50 | 51 | 52 | code = """ 53 | \"\"\" 54 | this is a module 55 | ... 56 | \"\"\" 57 | const = {1,2,3} 58 | import os 59 | 60 | class fooClass: 61 | '''this is a class''' 62 | 63 | def __init__(self, x): 64 | '''initialization.''' 65 | self.x = x 66 | 67 | def print(self): 68 | print(self.x) 69 | 70 | def test(): 71 | a = fooClass(3) 72 | a.print() 73 | 74 | """ 75 | 76 | 77 | def get_skeleton(raw_code, keep_constant: bool = True): 78 | try: 79 | tree = cst.parse_module(raw_code) 80 | except: 81 | return raw_code 82 | 83 | transformer = CompressTransformer(keep_constant=keep_constant) 84 | modified_tree = tree.visit(transformer) 85 | code = modified_tree.code 86 | code = code.replace(CompressTransformer.replacement_string + "\n", "...\n") 87 | code = code.replace(CompressTransformer.replacement_string, "...\n") 88 | return code 89 | 90 | 91 | def test_compress(): 92 | skeleton = get_skeleton(code, True) 93 | print(skeleton) 94 | 95 | 96 | if __name__ == "__main__": 97 | test_compress() 98 | -------------------------------------------------------------------------------- /SWE-agent/docker/README.md: -------------------------------------------------------------------------------- 1 | # Docker 2 | To ensure reproducibility and sandboxed execution of SWE-agent actions across systems, we adopt practices established in [prior work](https://intercode-benchmark.github.io/) and use [🐋 Docker](https://www.docker.com/) containers to carry out SWE-agent inference. 3 | 4 | * The `swe.Dockerfile` file is the customized image written for the environment of SWE-agent. 5 | * The `./setup.sh` script automatically builds this image. 6 | * When `run.py` is invoked, containers are automatically created from the built image. 7 | * There is no need to manually build a container from the image. 8 | 9 | Here, we explain what each line in `swe.Dockerfile` does: 10 | 11 | 1. **Base Image**: Start from the latest version of the Ubuntu image. 12 | ```bash 13 | FROM ubuntu:latest 14 | ``` 15 | 2. **Build Argument**: Define a build argument `MINICONDA_URL` that will be used to specify the Miniconda installer URL during the build process. 16 | ```bash 17 | ARG MINICONDA_URL 18 | ``` 19 | 3. **Install Third-Party Tools**: Update the package lists for the Ubuntu package manager and install several essential development tools. Clean up after the installation. 20 | ```bash 21 | RUN apt-get update && \ 22 | apt-get install -y bash gcc git jq wget g++ make && \ 23 | apt-get clean && \ 24 | rm -rf /var/lib/apt/lists/* 25 | ``` 26 | 4. **Initialize Git**: Configure global Git settings with a user email and name. 27 | ```bash 28 | RUN git config --global user.email "sweagent@pnlp.org" 29 | RUN git config --global user.name "sweagent" 30 | ``` 31 | 5. **Environment Variables**: Set the `ROOT` environment variable and customize the shell prompt. 32 | ```bash 33 | ENV ROOT='/dev/' 34 | RUN prompt() { echo " > "; }; 35 | ENV PS1="> " 36 | ``` 37 | 6. **Create Assets for Inference**: Create two files that are used to track metadata during an episode. 38 | ```bash 39 | RUN touch /root/files_to_edit.txt 40 | RUN touch /root/test.patch 41 | ``` 42 | 7. **Enhance `ls` Command**: Modify the `.bashrc` file to alias the `ls` command. 43 | ```bash 44 | RUN echo "alias ls='ls -F'" >> /root/.bashrc 45 | ``` 46 | 8. Install Miniconda: Download and install Miniconda, then initialize conda with Bash support and add `conda-forge` to the channels list. 47 | ```bash 48 | ENV PATH="/root/miniconda3/bin:${PATH}" 49 | ARG PATH="/root/miniconda3/bin:${PATH}" 50 | RUN wget ${MINICONDA_URL} -O miniconda.sh \ 51 | && mkdir /root/.conda \ 52 | && bash miniconda.sh -b \ 53 | && rm -f miniconda.sh 54 | RUN conda --version \ 55 | && conda init bash \ 56 | && conda config --append channels conda-forge 57 | ``` 58 | 9. **Install Python Packages**: Copy the `requirements.txt` file into the image and install the specified Python packages. 59 | ```bash 60 | COPY docker/requirements.txt /root/requirements.txt 61 | RUN pip install -r /root/requirements.txt 62 | ``` 63 | 10. **Set Working Directory**: Set the working directory to the root directory. 64 | ```bash 65 | WORKDIR / 66 | ``` 67 | 11. **Default Command**: Set the default command to open a Bash shell when the container starts. 68 | ```bash 69 | CMD ["/bin/bash"] 70 | ``` -------------------------------------------------------------------------------- /SWE-agent/release_dockerhub.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # This script builds the official docker images and pushes them to dockerhub 4 | # after checking with the user. 5 | 6 | # NOTE: To clear the buildx cache, run the following command: 7 | # docker buildx prune --all or more specifically docker buildx rm 8 | 9 | # bash strict mode 10 | set -euo pipefail 11 | 12 | # Check if exactly one argument is supplied 13 | if [ "$#" -ne 2 ]; then 14 | echo "Usage: $0 " >&2 15 | exit 1 16 | fi 17 | 18 | USER=${1} 19 | VERSION_STR=${2} 20 | 21 | if [[ -z "$USER" ]]; then 22 | echo "User name cannot be empty" >&2 23 | exit 3 24 | fi 25 | if [[ "$USER" != "sweagent" ]]; then 26 | echo "Careful here! Even if the username isn't sweagent, swe-eval will still be built on top of the sweagent/swe-agent image." >&2 27 | read -p "Do you want to proceed? (yes) " response 28 | if [[ "${response}" != "yes" ]]; then 29 | echo "Exiting..." >&2 30 | exit 4 31 | fi 32 | fi 33 | 34 | 35 | # The argument should be in the form of x.x.x where each x can be one or more digits 36 | if [[ $VERSION_STR =~ ^[0-9]+\.[0-9]+\.[0-9]+$ ]] || [ "$VERSION_STR" = "latest" ]; then 37 | echo "Validated version number" 38 | else 39 | echo "Argument must be 'latest' or in the form x.x.x, where x is one or more numbers." >&2 40 | exit 2 41 | fi 42 | 43 | 44 | DOCKER_CONTEXT_NAME="sweagent-multiplatform" 45 | docker buildx use "$DOCKER_CONTEXT_NAME" || docker buildx create --use --name "$DOCKER_CONTEXT_NAME" 46 | 47 | on_error() { 48 | echo "====> ERROR!!! IMPORTANT: Make sure if you've already pushed something to dockerhub or pushed the tag to github!" >&2 49 | } 50 | trap on_error ERR 51 | 52 | echo "------------------------------------------" 53 | echo "Building swe-agent" 54 | echo "------------------------------------------" 55 | docker buildx build --platform=linux/amd64,linux/arm64 -t ${USER}/swe-agent:${VERSION_STR} -f docker/swe.Dockerfile --push . 56 | echo "🔥 swe-agent pushed to dockerhub" 57 | echo "------------------------------------------" 58 | echo "Building swe-eval" 59 | echo "------------------------------------------" 60 | docker buildx build --platform=linux/amd64,linux/arm64 -t ${USER}/swe-eval:${VERSION_STR} -f docker/eval.Dockerfile --push . 61 | echo "🔥 swe-eval pushed to dockerhub" 62 | echo "------------------------------------------" 63 | echo "Building swe-agent-run" 64 | echo "------------------------------------------" 65 | docker buildx build --platform=linux/amd64,linux/arm64 -t ${USER}/swe-agent-run:${VERSION_STR} --push . 66 | echo "🔥 swe-agent-run pushed to dockerhub" 67 | echo "------------------------------------------" 68 | echo "Building of all images done" 69 | echo "------------------------------------------" 70 | 71 | 72 | if [ "$VERSION_STR" != "latest" ]; then 73 | git tag v${VERSION_STR} || { 74 | echo "Failed to create a tag in git" >&2 75 | exit 5 76 | } 77 | echo "🔥 Tag v${VERSION_STR} created in git (local)!" 78 | 79 | git push origin v${VERSION_STR} || { 80 | echo "Failed to push the tag to github" >&2 81 | exit 6 82 | } 83 | echo "🔥 Tag v${VERSION_STR} pushed to github" 84 | fi -------------------------------------------------------------------------------- /SWE-agent/tests/test_parsing.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from sweagent.agent.commands import Command 3 | from sweagent.agent.parsing import ( 4 | FormatError, ParseFunction, ActionParser, ThoughtActionParser, 5 | XMLThoughtActionParser, EditFormat, Identity, JsonParser 6 | ) 7 | 8 | 9 | def test_parse_function_registry(): 10 | assert isinstance(ParseFunction.get("ActionParser"), ActionParser) 11 | assert isinstance(ParseFunction.get("ThoughtActionParser"), ThoughtActionParser) 12 | assert isinstance(ParseFunction.get("XMLThoughtActionParser"), XMLThoughtActionParser) 13 | assert isinstance(ParseFunction.get("EditFormat"), EditFormat) 14 | assert isinstance(ParseFunction.get("Identity"), Identity) 15 | assert isinstance(ParseFunction.get("JsonParser"), JsonParser) 16 | with pytest.raises(ValueError): 17 | ParseFunction.get("InvalidParser") 18 | 19 | 20 | def test_action_parser(): 21 | parser = ActionParser() 22 | command = Command(code='ls', name='ls') 23 | thought, action = parser("ls -l", [command]) 24 | assert thought == "ls -l" 25 | assert action == "ls -l" 26 | with pytest.raises(FormatError): 27 | parser("invalid command", [command]) 28 | 29 | 30 | def test_thought_action_parser(): 31 | parser = ThoughtActionParser() 32 | model_response = "Let's look at the files in the current directory.\n```\nls -l\n```" 33 | thought, action = parser(model_response, []) 34 | assert thought == "Let's look at the files in the current directory.\n" 35 | assert action == "ls -l\n" 36 | with pytest.raises(FormatError): 37 | parser("No code block", []) 38 | 39 | 40 | def test_xml_thought_action_parser(): 41 | parser = XMLThoughtActionParser() 42 | model_response = "Let's look at the files in the current directory.\n\nls -l\n" 43 | thought, action = parser(model_response, []) 44 | assert thought == "Let's look at the files in the current directory." 45 | assert action == "ls -l" 46 | with pytest.raises(FormatError): 47 | parser("No command tags", []) 48 | 49 | 50 | def test_edit_format_parser(): 51 | parser = EditFormat() 52 | model_response = "Let's replace the contents.\n```\nimport os\nos.listdir()\n```" 53 | thought, action = parser(model_response, []) 54 | assert thought == "Let's replace the contents.\n" 55 | assert action == "import os\nos.listdir()\n" 56 | with pytest.raises(FormatError): 57 | parser("No code block", []) 58 | 59 | 60 | def test_identity_parser(): 61 | parser = Identity() 62 | model_response = "Return as is" 63 | thought, action = parser(model_response, []) 64 | assert thought == model_response 65 | assert action == model_response 66 | 67 | 68 | def test_json_parser(): 69 | parser = JsonParser() 70 | model_response = '{"thought": "List files", "command": {"name": "ls", "arguments": {"path": "."}}}' 71 | thought, action = parser(model_response, []) 72 | assert thought == "List files" 73 | assert action == "ls ." 74 | 75 | invalid_json = "Not a JSON" 76 | with pytest.raises(FormatError): 77 | parser(invalid_json, []) 78 | 79 | missing_keys = '{"thought": "Missing command key"}' 80 | with pytest.raises(FormatError): 81 | parser(missing_keys, []) 82 | -------------------------------------------------------------------------------- /SWE-agent/config/commands/README.md: -------------------------------------------------------------------------------- 1 | # Command Configuration 2 | In this document, we describe how to implement your own commands for the SWE-agent ACI. 3 | To see examples of command implementations, open the `.sh` and `.py` files in this folder. 4 | 5 | ## Scaffolding 6 | Every command subscribes to the following skeleton code. 7 | ```shell 8 | # @yaml 9 | # signature: [command] [argument(s)] 10 | # docstring: [Brief description of what your command does.] 11 | # arguments: 12 | # [argument 1 name]: 13 | # type: [type (i.e. integer, string)] 14 | # description: [Brief description of this argument] 15 | # required: [true|false] 16 | # [argument 2 name]: 17 | # ... 18 | [command]() { 19 | # Implementation here 20 | } 21 | ``` 22 | * If a command takes in arguments, reference them via positional parameters notation (i.e. `$1`). 23 | * If there are no arguments, omit the `arguments` section. 24 | * The implementation for your command is unconstrained. There are no limitations on the form of the underlying command code. 25 | * The minimal documentation requirements are `signature` and `docstring`. 26 | * If you'd like multiple commands to make modifications to a similar body of functions, we recommend using global variables. 27 | * For instance, in `config/commands/default.sh`, you'll see we define the `CURRENT_LINE` variable for the file viewer. This variable is modified across multiple commands, including `open`, `goto`, `scroll_up`, `scroll_down`, and `edit`. 28 | * You can also leverage third party libraries (check out how we do linting enabled `edit` in `config/commands/edit_linting.sh`). 29 | * To show effects of the command, print to standard output (i.e. `echo`). SWE-agent is implemented such that it does not look for a return value from these commands. 30 | 31 | ## Displaying the Command to SWE-agent 32 | After you define a command, there are a small set of additional steps to making it available for the agent to use. 33 | 34 | First, within your config file... 35 | * Add `config/commands/.sh` file to the `command_files` field. 36 | * Set the `parse_command` field to `ParseCommandBash` or `ParseCommandDetailed`. This key points to the functionality that generates how command documentation is shown to the agent. 37 | * Decide which template(s) you want to show the `{command_docs}` in. 38 | * We strongly recommend including `{command_docs}` in the `system_template`, which is the first message shown to the agent for every task instance episode. 39 | * You might also consider adding `{command_docs}` to the `format_error_template`, which is shown if the response provided by a model is malformed. 40 | * (Optional) Including a demonstration that uses a command is helpful to showcase proper use + increases the frequency with which the agent uses the command. If you'd like to add a demonstration... 41 | * Create a demonstration manually (i.e. `python run.py --model human_thought ...`) or automatically (i.e. `python run_replay --traj_path ...`) 42 | * Add/Update the demonstration to the `demonstrations` argument. 43 | * Update `demonstration_template` to control how the demonstration is displayed to the agent. 44 | 45 | > If you're not familiar with how SWE-agent configuration files work, we recommend checking out the [`config` README](https://github.com/princeton-nlp/SWE-agent/tree/main/config). 46 | 47 | Next, run your configuration and see how your agent uses the commands! 48 | ```bash 49 | python run.py --config_file config/[your config].yaml ... 50 | ``` 51 | -------------------------------------------------------------------------------- /SWE-agent/sweagent/frontend/README.md: -------------------------------------------------------------------------------- 1 | # Getting Started with Create React App 2 | 3 | This project was bootstrapped with [Create React App](https://github.com/facebook/create-react-app). 4 | 5 | ## Available Scripts 6 | 7 | In the project directory, you can run: 8 | 9 | ### `npm start` 10 | 11 | Runs the app in the development mode.\ 12 | Open [http://localhost:3000](http://localhost:3000) to view it in your browser. 13 | 14 | The page will reload when you make changes.\ 15 | You may also see any lint errors in the console. 16 | 17 | ### `npm test` 18 | 19 | Launches the test runner in the interactive watch mode.\ 20 | See the section about [running tests](https://facebook.github.io/create-react-app/docs/running-tests) for more information. 21 | 22 | ### `npm run build` 23 | 24 | Builds the app for production to the `build` folder.\ 25 | It correctly bundles React in production mode and optimizes the build for the best performance. 26 | 27 | The build is minified and the filenames include the hashes.\ 28 | Your app is ready to be deployed! 29 | 30 | See the section about [deployment](https://facebook.github.io/create-react-app/docs/deployment) for more information. 31 | 32 | ### `npm run eject` 33 | 34 | **Note: this is a one-way operation. Once you `eject`, you can't go back!** 35 | 36 | If you aren't satisfied with the build tool and configuration choices, you can `eject` at any time. This command will remove the single build dependency from your project. 37 | 38 | Instead, it will copy all the configuration files and the transitive dependencies (webpack, Babel, ESLint, etc) right into your project so you have full control over them. All of the commands except `eject` will still work, but they will point to the copied scripts so you can tweak them. At this point you're on your own. 39 | 40 | You don't have to ever use `eject`. The curated feature set is suitable for small and middle deployments, and you shouldn't feel obligated to use this feature. However we understand that this tool wouldn't be useful if you couldn't customize it when you are ready for it. 41 | 42 | ## Learn More 43 | 44 | You can learn more in the [Create React App documentation](https://facebook.github.io/create-react-app/docs/getting-started). 45 | 46 | To learn React, check out the [React documentation](https://reactjs.org/). 47 | 48 | ### Code Splitting 49 | 50 | This section has moved here: [https://facebook.github.io/create-react-app/docs/code-splitting](https://facebook.github.io/create-react-app/docs/code-splitting) 51 | 52 | ### Analyzing the Bundle Size 53 | 54 | This section has moved here: [https://facebook.github.io/create-react-app/docs/analyzing-the-bundle-size](https://facebook.github.io/create-react-app/docs/analyzing-the-bundle-size) 55 | 56 | ### Making a Progressive Web App 57 | 58 | This section has moved here: [https://facebook.github.io/create-react-app/docs/making-a-progressive-web-app](https://facebook.github.io/create-react-app/docs/making-a-progressive-web-app) 59 | 60 | ### Advanced Configuration 61 | 62 | This section has moved here: [https://facebook.github.io/create-react-app/docs/advanced-configuration](https://facebook.github.io/create-react-app/docs/advanced-configuration) 63 | 64 | ### Deployment 65 | 66 | This section has moved here: [https://facebook.github.io/create-react-app/docs/deployment](https://facebook.github.io/create-react-app/docs/deployment) 67 | 68 | ### `npm run build` fails to minify 69 | 70 | This section has moved here: [https://facebook.github.io/create-react-app/docs/troubleshooting#npm-run-build-fails-to-minify](https://facebook.github.io/create-react-app/docs/troubleshooting#npm-run-build-fails-to-minify) 71 | -------------------------------------------------------------------------------- /SWE-agent/.dockerignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | 131 | # Mac files 132 | *.DS_Store 133 | 134 | # Custom 135 | keys.cfg 136 | 137 | # iPython Notebooks 138 | *.ipynb 139 | 140 | # Evaluation folders 141 | results/ 142 | testbed/ 143 | temp/ 144 | 145 | # Ignore all YAML files in data/ 146 | data/*/ic-* 147 | data/*/single-issues 148 | 149 | # Fine tuning data 150 | fine_tune/*.ipynb 151 | fine_tune/subtasks/*.jsonl 152 | temp*.jsonl 153 | 154 | # Inspector 155 | inspector/*.json 156 | 157 | # Ignore all files in the private folder 158 | private/ 159 | 160 | ### Website 161 | 162 | # dependencies 163 | website/frontend/node_modules 164 | website/frontend/package-lock.json 165 | website/frontend/.pnp 166 | *.pnp.js 167 | 168 | # testing 169 | website/frontend/coverage 170 | 171 | # production 172 | website/frontend/build 173 | 174 | # misc 175 | *.env.local 176 | *.env.development.local 177 | *.env.test.local 178 | *.env.production.local 179 | .api_key 180 | *npm-debug.log* 181 | *yarn-debug.log* 182 | *yarn-error.log* 183 | 184 | 185 | # demo yamls (for editing) 186 | *.demo.yaml 187 | 188 | # trajectory files 189 | trajectories/** 190 | !trajectories/demonstrations/** 191 | 192 | .vscode/** 193 | 194 | # PyCharm 195 | .idea/ 196 | -------------------------------------------------------------------------------- /SWE-agent/sweagent/api/utils.py: -------------------------------------------------------------------------------- 1 | import ctypes 2 | import inspect 3 | import re 4 | import threading 5 | 6 | 7 | def _async_raise(tid, exctype): 8 | """Raises an exception in the threads with id tid 9 | 10 | This code is modified from the following SO answer: 11 | Author: Philippe F 12 | Posted: Nov 28, 2008 13 | URL: https://stackoverflow.com/a/325528/ 14 | """ 15 | if not inspect.isclass(exctype): 16 | raise TypeError("Only types can be raised (not instances)") 17 | res = ctypes.pythonapi.PyThreadState_SetAsyncExc( 18 | ctypes.c_long(tid), ctypes.py_object(exctype) 19 | ) 20 | if res == 0: 21 | raise ValueError("invalid thread id") 22 | elif res != 1: 23 | # "if it returns a number greater than one, you're in trouble, 24 | # and you should call it again with exc=NULL to revert the effect" 25 | ctypes.pythonapi.PyThreadState_SetAsyncExc(ctypes.c_long(tid), None) 26 | raise SystemError("PyThreadState_SetAsyncExc failed") 27 | 28 | 29 | class ThreadWithExc(threading.Thread): 30 | """A thread class that supports raising an exception in the thread from 31 | another thread. 32 | 33 | This code is modified from the following SO answer: 34 | Author: Philippe F 35 | Posted: Nov 28, 2008 36 | URL: https://stackoverflow.com/a/325528/ 37 | """ 38 | 39 | def _get_my_tid(self): 40 | """determines this (self's) thread id 41 | 42 | CAREFUL: this function is executed in the context of the caller 43 | thread, to get the identity of the thread represented by this 44 | instance. 45 | """ 46 | if not self.is_alive(): 47 | raise threading.ThreadError("the thread is not active") 48 | 49 | # do we have it cached? 50 | if hasattr(self, "_thread_id"): 51 | return self._thread_id 52 | 53 | # no, look for it in the _active dict 54 | for tid, tobj in threading._active.items(): 55 | if tobj is self: 56 | self._thread_id = tid 57 | return tid 58 | 59 | raise RuntimeError("could not determine the thread's id") 60 | 61 | def raise_exc(self, exctype): 62 | """Raises the given exception type in the context of this thread. 63 | 64 | If the thread is busy in a system call (time.sleep(), 65 | socket.accept(), ...), the exception is simply ignored. 66 | 67 | If you are sure that your exception should terminate the thread, 68 | one way to ensure that it works is: 69 | 70 | t = ThreadWithExc( ... ) 71 | ... 72 | t.raise_exc( SomeException ) 73 | while t.isAlive(): 74 | time.sleep( 0.1 ) 75 | t.raise_exc( SomeException ) 76 | 77 | If the exception is to be caught by the thread, you need a way to 78 | check that your thread has caught it. 79 | 80 | CAREFUL: this function is executed in the context of the 81 | caller thread, to raise an exception in the context of the 82 | thread represented by this instance. 83 | """ 84 | _async_raise(self._get_my_tid(), exctype) 85 | 86 | 87 | # From Martijn Pieters at https://stackoverflow.com/a/14693789 88 | # 7-bit C1 ANSI sequences 89 | _ANSI_ESCAPE = re.compile( 90 | r""" 91 | \x1B # ESC 92 | (?: # 7-bit C1 Fe (except CSI) 93 | [@-Z\\-_] 94 | | # or [ for CSI, followed by a control sequence 95 | \[ 96 | [0-?]* # Parameter bytes 97 | [ -/]* # Intermediate bytes 98 | [@-~] # Final byte 99 | ) 100 | """, 101 | re.VERBOSE, 102 | ) 103 | 104 | 105 | def strip_ansi_sequences(string: str) -> str: 106 | return _ANSI_ESCAPE.sub("", string) 107 | -------------------------------------------------------------------------------- /SWE-agent/tests/test_env.py: -------------------------------------------------------------------------------- 1 | import dataclasses 2 | import os 3 | from pathlib import Path 4 | import subprocess 5 | import pytest 6 | import yaml 7 | from sweagent.environment.swe_env import EnvHook, EnvironmentArguments, SWEEnv 8 | from contextlib import contextmanager 9 | import docker 10 | 11 | 12 | @pytest.fixture(scope="module") 13 | def test_env_args(tmpdir_factory, ): 14 | """This will use a persistent container""" 15 | local_repo_path = tmpdir_factory.getbasetemp() / "swe-agent-test-repo" 16 | clone_cmd = ["git", "clone", "https://github.com/klieret/swe-agent-test-repo", local_repo_path] 17 | subprocess.run(clone_cmd, check=True) 18 | data_path = local_repo_path / "problem_statements" / "1.md" 19 | test_env_args = EnvironmentArguments( 20 | data_path=str(data_path), 21 | repo_path=str(local_repo_path), 22 | image_name="sweagent/swe-agent:latest", 23 | container_name="test-container-134245890345098", 24 | ) 25 | yield test_env_args 26 | # Cleanup (after session ends) 27 | client = docker.from_env() 28 | container = client.containers.get(test_env_args.container_name) 29 | container.remove(force=True) 30 | 31 | 32 | @contextmanager 33 | def swe_env_context(env_args): 34 | """Context manager to make sure we close the shell on the container 35 | so that we can reuse it. 36 | """ 37 | 38 | env = SWEEnv(env_args) 39 | try: 40 | yield env 41 | finally: 42 | env.close() 43 | 44 | 45 | @pytest.mark.slow 46 | def test_init_swe_env(test_env_args): 47 | with swe_env_context(test_env_args) as env: 48 | env.reset() 49 | 50 | 51 | @pytest.mark.slow 52 | def test_init_swe_env_non_persistent(test_env_args): 53 | test_env_args = dataclasses.replace(test_env_args, container_name=None) 54 | with swe_env_context(test_env_args) as env: 55 | env.reset() 56 | 57 | 58 | @pytest.mark.slow 59 | def test_execute_setup_script(tmp_path, test_env_args): 60 | test_script = "echo 'hello world'" 61 | script_path = Path(tmp_path / "test_script.sh") 62 | script_path.write_text(test_script) 63 | test_env_args = dataclasses.replace(test_env_args, environment_setup=script_path) 64 | with swe_env_context(test_env_args) as env: 65 | env.reset() 66 | 67 | 68 | @pytest.mark.slow 69 | def test_execute_environment(tmp_path, test_env_args): 70 | test_env = { 71 | "python": "3.6", 72 | "packages": "pytest", 73 | "pip_packages": ["tox"], 74 | "install": "echo 'installing'", 75 | } 76 | env_config_path = Path(tmp_path / "env_config.yml") 77 | env_config_path.write_text(yaml.dump(test_env)) 78 | test_env_args = dataclasses.replace(test_env_args, environment_setup=env_config_path) 79 | with swe_env_context(test_env_args) as env: 80 | env.reset() 81 | 82 | 83 | @pytest.mark.slow 84 | def test_open_pr(test_env_args): 85 | test_env_args = dataclasses.replace(test_env_args, data_path="https://github.com/klieret/swe-agent-test-repo/issues/1", repo_path="") 86 | with swe_env_context(test_env_args) as env: 87 | env.reset() 88 | env.open_pr(_dry_run=True, trajectory=[]) 89 | 90 | 91 | @pytest.mark.slow 92 | def test_interrupt_close(test_env_args): 93 | with swe_env_context(test_env_args) as env: 94 | env.reset() 95 | env.interrupt() 96 | 97 | 98 | @pytest.mark.slow 99 | def test_communicate_old(test_env_args): 100 | del os.environ["SWE_AGENT_EXPERIMENTAL_COMMUNICATE"] 101 | try: 102 | with swe_env_context(test_env_args) as env: 103 | env.reset() 104 | except: 105 | raise 106 | finally: 107 | os.environ["SWE_AGENT_EXPERIMENTAL_COMMUNICATE"] = "1" 108 | 109 | 110 | @pytest.mark.slow 111 | def test_env_with_hook(test_env_args): 112 | with swe_env_context(test_env_args) as env: 113 | env.add_hook(EnvHook()) 114 | env.reset() -------------------------------------------------------------------------------- /SWE-agent/scripts/README.md: -------------------------------------------------------------------------------- 1 | # Scripts 2 | 3 | This README contains documentation for the main inference script `run.sh` along with some miscellaneous scripts that may be helpful. 4 | 5 | > [!WARNING] 6 | > These scripts have been written to be invoked from the root of this codebase (i.e. `./scripts/run.sh`). 7 | 8 | ## 🏃 Inference Script 9 | The `./run.sh` script has been provided as an example of how to invoke `run.py`. 10 | 11 | A single `run.py` call will generate a `trajectory//` folder containing the trajectories and predictions generated by a `` model run on every instance in the `` dataset. 12 | 13 | The following is a comprehensive guide to using the provided `run.py` script, detailing available command-line arguments, their purposes, and default values. Flags that you might find helpful have been marked with a 💡. 14 | 15 | The code and explanation of the implementations for *configuration based workflows* are explained in [`agent/`](../sweagent/agent/README.md). 16 | 17 | > [!TIP] 18 | > Run `python run.py --help` to view the most up-to-date documentation of the arguments. 19 | 20 | #### Optional Arguments 21 | * `-h, --help`: Show the help message and exit. 22 | 23 | #### Script Arguments 24 | These arguments configure the script's behavior: 25 | * `--instance_filter ` 💡: Run instances that match this regex pattern. Default is .*. 26 | * `--noskip_existing, --skip_existing,`: [Do not] skip instances that have been completed before. 27 | * `--suffix `: Appends a suffix to the name of the folder containing the trajectories for an experiment run. 28 | 29 | #### Environment Arguments 30 | These arguments are related to the environment configuration: 31 | * `--data_path ` 💡: Path to the data file -or- a Hugging Face dataset -or- a GitHub issue URL. 32 | * `--base_commit `: You can specify the base commit sha to checkout. This is determined automatically for instances in SWE-bench. 33 | * `--image_name `: Name of the Docker image to use. Default is swe-agent. 34 | * `--noinstall_environment, --install_environment`: [Do not] install the environment. Default is True. 35 | * `--noverbose, --verbose`: Enable verbose output. Default is False. 36 | * `--timeout `: Timeout in seconds. Default is 35. 37 | * `--container_name ` 💡: Name of the Docker container if you would like to create a persistent container. Optional. 38 | 39 | > [!WARNING] 40 | > If you specify a container name, do not run multiple instances of `run.py` with the same container name! 41 | 42 | #### AgentArguments 43 | Configure agent behavior: 44 | * `--config_file ` 💡: Path to the configuration YAML file. Default is config/default.yaml. 45 | 46 | #### ModelArguments 47 | Configure model parameters: 48 | * `--model_name ` 💡: Name of the model. Default is `gpt4`. 49 | * `--per_instance_cost_limit ` 💡: Per-instance cost limit (interactive loop will automatically terminate when cost limit is hit). Default is 3.0. 50 | * `--temperature ` 💡: Model temperature. Default is 0.0. 51 | * `--top_p ` 💡: Top p filtering. Default is 0.95. 52 | * `--total_cost_limit `: Total cost limit. Default is 0.0 (unlimited). 53 | 54 | ### 📙 Example Usage 55 | Run with custom data path and verbose mode: 56 | ```bash 57 | python run.py --data_path /path/to/data.json --verbose 58 | ``` 59 | 60 | Specify a model and adjust the temperature and top_p parameters: 61 | ```bash 62 | python run.py --model_name gpt4 --temperature 0.2 --top_p 0.9 63 | ``` 64 | 65 | ## 🛠️ Miscellaneous Scripts 66 | - `remove_all_containers.sh`: Forcibly removes all Docker containers currently present on the system. 67 | - `run_and_eval.sh`: Runs SWE-agent inference and evaluation on a specified dataset N times. You can specify the `dataset_path`, `num_runs`, `template`, and `suffix` arguments. 68 | - `run_jsonl.sh`: Run SWE-agent inference from a `.jsonl` file that contains a SWE-bench style task instance. 69 | - `run_replay.sh`: Run SWE-agent inference from a `.traj` file. This is useful for automatically creating a new demonstration for a new config from an existing sequence of actions. 70 | -------------------------------------------------------------------------------- /SWE-agent/sweagent/agent/README.md: -------------------------------------------------------------------------------- 1 | # Agents 2 | The `agent` folder contains the logic for handling model inference and facilitating their interaction with `SWEEnv`. 3 | The following documentation describing the purpose and classes of each file. 4 | 5 | #### `agents.py` 6 | This file defines the `Agent` class, which facilitates the interaction between an agent and the environment. The `AgentConfig` and `AgentArguments` data classes compile all arguments into a single file. 7 | - `Agent`: Main class for handling model behavior + interaction with environment 8 | - `__init__`: Sets up model, assistant, configurations, and arguments 9 | - `state_command`: Getter for bash command for extracting env. state 10 | - `setup`: Resets cost stats, initializes system message (+ demonstrations), and returns full list of bash commands to define within environment. 11 | - `forward`: Main inference call to model. 12 | - `forward_model`: Determines appropriate observation template, then makes inference call to model 13 | - `forward_with_format_check`: Invokes `forward_model`, with retry calls to handle blocked or malformed actions. 14 | - `forward_with_error_check`: Wraps `forward_with_format_check` with exception handling. 15 | 16 | #### `commands.py` 17 | This file defines the abstraction for custom commands (non-native functions that are implemented in bash) that agents can invoke in `swe-agent` environment. On top of the abstraction, helper functions to extract commands' documentation and compile `.sh` files into separate `Command` objects are provided. There are also fields for establishing the input/output of each action and control flow of actions via templates. 18 | - `AssistantMetadata`: Defines templates for formatting input/output to sub-assistant calls 19 | - `Command`: Defines fields of a custom command 20 | - `ControlMetadata` (WIP): Defines template fields that format the observations for the next agent `forward` inference call 21 | - `generate_command_docs`: Extracts docstrings from each command to form comprehensive documentation. 22 | - `parse_command_file`: Converts bash file content to separate `Command` objects 23 | 24 | #### `models.py` 25 | This file defines the abstraction for running inference on API models. In addition, the `BaseModel` abstraction also defines a set of cost-related fields for tracking instance-level and total expenses accumulated across a single model run. 26 | - `AnthropicModel`: Handles inference + cost logging for Anthropic Models 27 | - `BedrockModel`: handles inference + cost logging for Amazon Bedrock-provided models (Anthropic Claude only) 28 | - `APIStats`: Cost tracking fields that are updated per model inference 29 | - `BaseModel`: Abstract class that defines the common logic for updating cost stats 30 | - `get_model`: Returns initialized `[Anthropic|Bedrock|Human|OpenAI]Model` based on given arguments + commands 31 | - `HumanModel`: Handles inference for human task worker 32 | - `ModelArguments`: Model name, hyperparameter, and cost limit arguments 33 | - `OpenAIModel`: Handles inference + cost logging for OpenAI models 34 | 35 | #### `parsing.py` 36 | This file defines the abstraction for parsing the output of the model inference. The `Parsing` class is used to extract the relevant information from the model's output and format it into a response that can be used by the `Agent` class. 37 | - `Parsing`: Abstract class that defines the common logic for parsing model output 38 | 39 | #### `history_processors.py` 40 | This file defines the abstraction for processing the history of the environment. The `HistoryProcessor` class is used to extract the relevant information from the history of the environment and format it into a response that can be used by the `Agent` class. 41 | - `HistoryProcessor`: Abstract class that defines the common logic for processing the history of the environment 42 | - `DefaultHistoryProcessor`: Default implementation of `HistoryProcessor` that processes the history of the environment 43 | 44 | ### Environment Usage 45 | * To skip over a task instance, use the `skip` keyword 46 | * To submit for evaluation, use the `submit` keyword 47 | * To exit the `SWEEnv` environment, perform a keyboard interrupt (`^ c`) -------------------------------------------------------------------------------- /repograph/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import ast 3 | 4 | def create_structure(directory_path): 5 | """Create the structure of the repository directory by parsing Python files. 6 | :param directory_path: Path to the repository directory. 7 | :return: A dictionary representing the structure. 8 | """ 9 | structure = {} 10 | 11 | for root, _, files in os.walk(directory_path): 12 | repo_name = os.path.basename(directory_path) 13 | relative_root = os.path.relpath(root, directory_path) 14 | if relative_root == ".": 15 | relative_root = repo_name 16 | curr_struct = structure 17 | for part in relative_root.split(os.sep): 18 | if part not in curr_struct: 19 | curr_struct[part] = {} 20 | curr_struct = curr_struct[part] 21 | for file_name in files: 22 | if file_name.endswith(".py"): 23 | file_path = os.path.join(root, file_name) 24 | class_info, function_names, file_lines = parse_python_file(file_path) 25 | curr_struct[file_name] = { 26 | "classes": class_info, 27 | "functions": function_names, 28 | "text": file_lines, 29 | } 30 | else: 31 | curr_struct[file_name] = {} 32 | 33 | return structure 34 | 35 | def parse_python_file(file_path, file_content=None): 36 | """Parse a Python file to extract class and function definitions with their line numbers. 37 | :param file_path: Path to the Python file. 38 | :return: Class names, function names, and file contents 39 | """ 40 | if file_content is None: 41 | try: 42 | with open(file_path, "r") as file: 43 | file_content = file.read() 44 | parsed_data = ast.parse(file_content) 45 | except Exception as e: # Catch all types of exceptions 46 | print(f"Error in file {file_path}: {e}") 47 | return [], [], "" 48 | else: 49 | try: 50 | parsed_data = ast.parse(file_content) 51 | except Exception as e: # Catch all types of exceptions 52 | print(f"Error in file {file_path}: {e}") 53 | return [], [], "" 54 | 55 | class_info = [] 56 | function_names = [] 57 | class_methods = set() 58 | 59 | for node in ast.walk(parsed_data): 60 | if isinstance(node, ast.ClassDef): 61 | methods = [] 62 | for n in node.body: 63 | if isinstance(n, ast.FunctionDef): 64 | methods.append( 65 | { 66 | "name": n.name, 67 | "start_line": n.lineno, 68 | "end_line": n.end_lineno, 69 | "text": file_content.splitlines()[ 70 | n.lineno - 1 : n.end_lineno 71 | ], 72 | } 73 | ) 74 | class_methods.add(n.name) 75 | class_info.append( 76 | { 77 | "name": node.name, 78 | "start_line": node.lineno, 79 | "end_line": node.end_lineno, 80 | "text": file_content.splitlines()[ 81 | node.lineno - 1 : node.end_lineno 82 | ], 83 | "methods": methods, 84 | } 85 | ) 86 | elif isinstance(node, ast.FunctionDef) and not isinstance( 87 | node, ast.AsyncFunctionDef 88 | ): 89 | if node.name not in class_methods: 90 | function_names.append( 91 | { 92 | "name": node.name, 93 | "start_line": node.lineno, 94 | "end_line": node.end_lineno, 95 | "text": file_content.splitlines()[ 96 | node.lineno - 1 : node.end_lineno 97 | ], 98 | } 99 | ) 100 | 101 | return class_info, function_names, file_content.splitlines() -------------------------------------------------------------------------------- /SWE-agent/pyproject.toml: -------------------------------------------------------------------------------- 1 | # Guide (user-friendly): 2 | # https://packaging.python.org/en/latest/guides/writing-pyproject-toml/ 3 | # Specification (technical, formal): 4 | # https://packaging.python.org/en/latest/specifications/pyproject-toml/ 5 | 6 | 7 | # Choosing a build backend: 8 | [build-system] 9 | requires = ["setuptools"] # REQUIRED if [build-system] table is used 10 | build-backend = "setuptools.build_meta" # If not defined, then legacy behavior can happen. 11 | 12 | 13 | [project] 14 | name = "sweagent" 15 | dynamic = ["version", "dependencies"] 16 | description = "The official SWE-agent package - an open source Agent Computer Interface for running language models as software engineers." 17 | readme = "README.md" 18 | requires-python = ">=3.9" 19 | license = {file = "LICENSE"} 20 | keywords = ["nlp", "agents", "code"] 21 | authors = [ 22 | {name = "Carlos E. Jimenez", email = "carlosej@princeton.edu" }, 23 | {name = "John Yang", email = "byjohnyang@gmail.com" } 24 | ] 25 | 26 | # Classifiers help users find your project by categorizing it. 27 | classifiers = [ 28 | # How mature is this project? Common values are 29 | # 3 - Alpha, 4 - Beta, 5 - Production/Stable 30 | "Operating System :: OS Independent", 31 | # Indicate who your project is intended for 32 | "Intended Audience :: Developers", 33 | # Pick your license as you wish 34 | "License :: OSI Approved :: MIT License", 35 | "Programming Language :: Python :: 3.9", 36 | "Programming Language :: Python :: 3 :: Only", 37 | ] 38 | 39 | [tool.setuptools] 40 | include-package-data = true 41 | 42 | [tool.setuptools.dynamic] 43 | version = {attr = "sweagent.__version__"} 44 | dependencies = {file = ["requirements.txt"]} 45 | 46 | [tool.setuptools.packages.find] 47 | where = ["."] 48 | namespaces = false 49 | 50 | [project.urls] 51 | "Homepage" = "https://swe-agent.com" 52 | "Bug Reports" = "http://github.com/princeton-nlp/SWE-agent/issues" 53 | "Documentation" = "https://github.com/princeton-nlp/SWE-agent" 54 | "Source" = "http://github.com/princeton-nlp/SWE-agent" 55 | 56 | 57 | [tool.pytest.ini_options] 58 | markers = [ 59 | "slow: marks tests as slow (deselect with '-m \"not slow\"')", 60 | ] 61 | testpaths = [ 62 | "tests" 63 | ] 64 | xfail_strict = true 65 | 66 | [tool.ruff] 67 | # Exclude a variety of commonly ignored directories. 68 | exclude = [ 69 | ".bzr", 70 | ".direnv", 71 | ".eggs", 72 | ".git", 73 | ".git-rewrite", 74 | ".hg", 75 | ".ipynb_checkpoints", 76 | ".mypy_cache", 77 | ".nox", 78 | ".pants.d", 79 | ".pyenv", 80 | ".pytest_cache", 81 | ".pytype", 82 | ".ruff_cache", 83 | ".svn", 84 | ".tox", 85 | ".venv", 86 | ".vscode", 87 | "__pypackages__", 88 | "_build", 89 | "buck-out", 90 | "build", 91 | "dist", 92 | "node_modules", 93 | "site-packages", 94 | "venv", 95 | ] 96 | 97 | # Same as Black. 98 | line-length = 88 99 | indent-width = 4 100 | 101 | # Assume Python 3.8 102 | target-version = "py38" 103 | 104 | [tool.ruff.lint] 105 | # Enable Pyflakes (`F`) and a subset of the pycodestyle (`E`) codes by default. 106 | # Unlike Flake8, Ruff doesn't enable pycodestyle warnings (`W`) or 107 | # McCabe complexity (`C901`) by default. 108 | select = ["F821", "F822", "E999", "E902", "F401", "F841"] 109 | ignore = [] 110 | 111 | # Allow fix for all enabled rules (when `--fix`) is provided. 112 | fixable = ["ALL"] 113 | unfixable = [] 114 | 115 | # Allow unused variables when underscore-prefixed. 116 | dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" 117 | 118 | [tool.ruff.format] 119 | # Like Black, use double quotes for strings. 120 | quote-style = "double" 121 | 122 | # Like Black, indent with spaces, rather than tabs. 123 | indent-style = "space" 124 | 125 | # Like Black, respect magic trailing commas. 126 | skip-magic-trailing-comma = false 127 | 128 | # Like Black, automatically detect the appropriate line ending. 129 | line-ending = "auto" 130 | 131 | [tool.typos.default.extend-identifiers] 132 | # *sigh* this just isn't worth the cost of fixing 133 | ACI = "ACI" 134 | 135 | [tool.typos.default.extend-words] 136 | # Don't correct the surname "Teh" 137 | aci = "aci" 138 | -------------------------------------------------------------------------------- /SWE-agent/make_demos/convert_traj_to_demo.py: -------------------------------------------------------------------------------- 1 | import json 2 | import io 3 | from ruamel.yaml import YAML 4 | from ruamel.yaml.scalarstring import LiteralScalarString as LSS 5 | from pathlib import Path 6 | from argparse import ArgumentParser 7 | 8 | 9 | DEMO_COMMENT = """# This is a demo file generated from trajectory file: 10 | # {traj_path} 11 | # You can use this demo file to replay the actions in the trajectory with run_replay.py. 12 | # You can edit the content of the actions in this file to modify the replay behavior. 13 | # NOTICE: 14 | # Only the actions of the assistant will be replayed. 15 | # You do not need to modify the observation's contents or any other fields. 16 | # You can add or remove actions to modify the replay behavior.""" 17 | 18 | 19 | def convert_to_literal_string(d): 20 | """ 21 | Convert any multi-line strings to LiteralScalarString 22 | """ 23 | if isinstance(d, dict): 24 | for key, value in d.items(): 25 | if isinstance(value, str) and '\n' in value: 26 | d[key] = LSS(value.replace('\r\n', '\n').replace('\r', '\n')) 27 | elif isinstance(value, dict): 28 | convert_to_literal_string(value) 29 | elif isinstance(d, list): 30 | for i, item in enumerate(d): 31 | if isinstance(item, str) and '\n' in item: 32 | d[i] = LSS(item.replace('\r\n', '\n').replace('\r', '\n')) 33 | elif isinstance(item, dict): 34 | convert_to_literal_string(item) 35 | elif isinstance(d, str) and '\n' in d: 36 | d = LSS(d.replace('\r\n', '\n').replace('\r', '\n')) 37 | else: 38 | raise ValueError(f"Unsupported type: {type(d)}") 39 | return d 40 | 41 | 42 | def save_demo(data, file, traj_path): 43 | """ 44 | Save a single task instance as a yaml file 45 | """ 46 | data = convert_to_literal_string(data) 47 | yaml = YAML() 48 | yaml.indent(mapping=2, sequence=4, offset=2) 49 | buffer = io.StringIO() 50 | yaml.dump(data, buffer) 51 | content = buffer.getvalue() 52 | header = DEMO_COMMENT.format(traj_path=traj_path) 53 | with open(file, "w") as f: 54 | f.write(f"{header}\n{content}") 55 | 56 | 57 | def convert_traj_to_action_demo(traj_path: str, output_file: str = None, include_user: bool = False): 58 | traj = json.load(open(traj_path)) 59 | history = traj["history"] 60 | action_traj = list() 61 | admissible_roles = {"assistant", "user"} if include_user else {"assistant"} 62 | for step in history: 63 | if step['role'] in admissible_roles and step.get('agent', 'primary') == 'primary': 64 | action_traj.append({k: v for k, v in step.items() if k in {'content', 'role'}}) 65 | save_demo(action_traj, output_file, traj_path) 66 | print(f"Saved demo to {output_file}") 67 | 68 | 69 | def main(traj_path: str, output_dir: str = None, suffix: str = "", overwrite: bool = False, include_user: bool = False): 70 | filename = '/'.join([Path(traj_path).parent.name + suffix, Path(traj_path).name.rsplit('.traj', 1)[0]]) + ".demo.yaml" 71 | output_file = Path(output_dir) / filename 72 | if output_file.exists() and not overwrite: 73 | raise FileExistsError(f"Output file already exists: {output_file}") 74 | output_file.parent.mkdir(parents=True, exist_ok=True) 75 | convert_traj_to_action_demo(traj_path, output_file, include_user) 76 | 77 | 78 | def string2bool(s): 79 | if s.lower() in {"true", "1"}: 80 | return True 81 | elif s.lower() in {"false", "0"}: 82 | return False 83 | else: 84 | raise ValueError(f"Invalid boolean string: {s}") 85 | 86 | 87 | if __name__ == "__main__": 88 | parser = ArgumentParser() 89 | parser.add_argument("traj_path", type=str, help="Path to trajectory file") 90 | parser.add_argument("--output_dir", type=str, help="Output directory for action demos", default="./demos") 91 | parser.add_argument("--suffix", type=str, help="Suffix for the output file", default="") 92 | parser.add_argument("--overwrite", type=string2bool, help="Overwrite existing files", default=False, nargs='?') 93 | parser.add_argument("--include_user", type=string2bool, help="Include user responses (computer)", default=False, nargs='?') 94 | args = parser.parse_args() 95 | main(**vars(args)) 96 | -------------------------------------------------------------------------------- /agentless/util/api_requests.py: -------------------------------------------------------------------------------- 1 | import signal 2 | import time 3 | from typing import Dict, Union 4 | 5 | import openai 6 | import tiktoken 7 | 8 | client = openai.OpenAI() 9 | 10 | 11 | def num_tokens_from_messages(message, model="gpt-3.5-turbo-0301"): 12 | """Returns the number of tokens used by a list of messages.""" 13 | try: 14 | encoding = tiktoken.encoding_for_model(model) 15 | except KeyError: 16 | encoding = tiktoken.get_encoding("cl100k_base") 17 | if isinstance(message, list): 18 | # use last message. 19 | num_tokens = len(encoding.encode(message[0]["content"])) 20 | else: 21 | num_tokens = len(encoding.encode(message)) 22 | return num_tokens 23 | 24 | 25 | def create_chatgpt_config( 26 | message: Union[str, list], 27 | max_tokens: int, 28 | temperature: float = 1, 29 | batch_size: int = 1, 30 | system_message: str = "You are a helpful assistant.", 31 | model: str = "gpt-3.5-turbo", 32 | ) -> Dict: 33 | if isinstance(message, list): 34 | config = { 35 | "model": model, 36 | "max_tokens": max_tokens, 37 | "temperature": temperature, 38 | "n": batch_size, 39 | "messages": [{"role": "system", "content": system_message}] + message, 40 | } 41 | else: 42 | config = { 43 | "model": model, 44 | "max_tokens": max_tokens, 45 | "temperature": temperature, 46 | "n": batch_size, 47 | "messages": [ 48 | {"role": "system", "content": system_message}, 49 | {"role": "user", "content": message}, 50 | ], 51 | } 52 | return config 53 | 54 | 55 | def handler(signum, frame): 56 | # swallow signum and frame 57 | raise Exception("end of time") 58 | 59 | 60 | def request_chatgpt_engine(config): 61 | ret = None 62 | while ret is None: 63 | try: 64 | signal.signal(signal.SIGALRM, handler) 65 | signal.alarm(100) 66 | ret = client.chat.completions.create(**config) 67 | signal.alarm(0) 68 | except openai._exceptions.BadRequestError as e: 69 | print(e) 70 | signal.alarm(0) 71 | except openai._exceptions.RateLimitError as e: 72 | print("Rate limit exceeded. Waiting...") 73 | print(e) 74 | signal.alarm(0) 75 | time.sleep(5) 76 | except openai._exceptions.APIConnectionError as e: 77 | print("API connection error. Waiting...") 78 | signal.alarm(0) 79 | time.sleep(5) 80 | except Exception as e: 81 | print("Unknown error. Waiting...") 82 | print(e) 83 | signal.alarm(0) 84 | time.sleep(1) 85 | return ret 86 | 87 | 88 | def create_anthropic_config( 89 | message: str, 90 | prefill_message: str, 91 | max_tokens: int, 92 | temperature: float = 1, 93 | batch_size: int = 1, 94 | system_message: str = "You are a helpful assistant.", 95 | model: str = "claude-2.1", 96 | ) -> Dict: 97 | if isinstance(message, list): 98 | config = { 99 | "model": model, 100 | "temperature": temperature, 101 | "max_tokens": max_tokens, 102 | "system": system_message, 103 | "messages": message, 104 | } 105 | else: 106 | config = { 107 | "model": model, 108 | "temperature": temperature, 109 | "max_tokens": max_tokens, 110 | "system": system_message, 111 | "messages": [ 112 | {"role": "user", "content": message}, 113 | {"role": "assistant", "content": prefill_message}, 114 | ], 115 | } 116 | return config 117 | 118 | 119 | def request_anthropic_engine(client, config): 120 | ret = None 121 | while ret is None: 122 | try: 123 | signal.signal(signal.SIGALRM, handler) 124 | signal.alarm(100) 125 | ret = client.messages.create(**config) 126 | signal.alarm(0) 127 | except Exception as e: 128 | print("Unknown error. Waiting...") 129 | print(e) 130 | signal.alarm(0) 131 | time.sleep(10) 132 | return ret 133 | -------------------------------------------------------------------------------- /SWE-agent/tests/test_data/data_sources/human_eval.json: -------------------------------------------------------------------------------- 1 | [{"instance_id": "swe-bench__humaneval-30", "problem_statement": "I have a function that needs implementing, can you help?", "created_at": "2023110716", "version": "1.0", "test_patch": "diff --git a/test.py b/test.py\nnew file mode 100644\nindex 0000000..52ecda2\n--- /dev/null\n+++ b/test.py\n@@ -0,0 +1,13 @@\n+from main import get_positive\n+\n+\n+METADATA = {}\n+\n+\n+def check(candidate):\n+ assert candidate([-1, -2, 4, 5, 6]) == [4, 5, 6]\n+ assert candidate([5, 3, -5, 2, 3, 3, 9, 0, 123, 1, -10]) == [5, 3, 2, 3, 3, 9, 123, 1]\n+ assert candidate([-1, -2]) == []\n+ assert candidate([]) == []\n+\n+check(get_positive)\n", "base_commit": "0880311", "base_commit_with_tests": "b2e380b", "environment_setup_commit": null, "hints_text": null, "repo": "swe-bench/humaneval", "FAIL_TO_PASS": "", "PASS_TO_PASS": ""}, {"instance_id": "swe-bench__humaneval-85", "problem_statement": "I have a function that needs implementing, can you help?", "created_at": "2023110716", "version": "1.0", "test_patch": "diff --git a/test.py b/test.py\nnew file mode 100644\nindex 0000000..13d6e1f\n--- /dev/null\n+++ b/test.py\n@@ -0,0 +1,12 @@\n+from main import add\n+def check(candidate):\n+\n+ # Check some simple cases\n+ assert candidate([4, 88]) == 88\n+ assert candidate([4, 5, 6, 7, 2, 122]) == 122\n+ assert candidate([4, 0, 6, 7]) == 0\n+ assert candidate([4, 4, 6, 8]) == 12\n+\n+ # Check some edge cases that are easy to work out by hand.\n+ \n+check(add)\n", "base_commit": "2de55bc", "base_commit_with_tests": "c8c997b", "environment_setup_commit": null, "hints_text": null, "repo": "swe-bench/humaneval", "FAIL_TO_PASS": "", "PASS_TO_PASS": ""}, {"instance_id": "swe-bench__humaneval-22", "problem_statement": "I have a function that needs implementing, can you help?", "created_at": "2023110716", "version": "1.0", "test_patch": "diff --git a/test.py b/test.py\nnew file mode 100644\nindex 0000000..d881459\n--- /dev/null\n+++ b/test.py\n@@ -0,0 +1,14 @@\n+from main import filter_integers\n+\n+\n+METADATA = {\n+ 'author': 'jt',\n+ 'dataset': 'test'\n+}\n+\n+\n+def check(candidate):\n+ assert candidate([]) == []\n+ assert candidate([4, {}, [], 23.2, 9, 'adasd']) == [4, 9]\n+ assert candidate([3, 'c', 3, 3, 'a', 'b']) == [3, 3, 3]\n+check(filter_integers)\n", "base_commit": "f0dbe5e", "base_commit_with_tests": "55cc474", "environment_setup_commit": null, "hints_text": null, "repo": "swe-bench/humaneval", "FAIL_TO_PASS": "", "PASS_TO_PASS": ""}, {"instance_id": "swe-bench__humaneval-104", "problem_statement": "I have a function that needs implementing, can you help?", "created_at": "2023110716", "version": "1.0", "test_patch": "diff --git a/test.py b/test.py\nnew file mode 100644\nindex 0000000..617da5a\n--- /dev/null\n+++ b/test.py\n@@ -0,0 +1,13 @@\n+from main import unique_digits\n+def check(candidate):\n+\n+ # Check some simple cases\n+ assert candidate([15, 33, 1422, 1]) == [1, 15, 33]\n+ assert candidate([152, 323, 1422, 10]) == []\n+ assert candidate([12345, 2033, 111, 151]) == [111, 151]\n+ assert candidate([135, 103, 31]) == [31, 135]\n+\n+ # Check some edge cases that are easy to work out by hand.\n+ assert True\n+\n+check(unique_digits)\n", "base_commit": "b52ee85", "base_commit_with_tests": "4a92a50", "environment_setup_commit": null, "hints_text": null, "repo": "swe-bench/humaneval", "FAIL_TO_PASS": "", "PASS_TO_PASS": ""}, {"instance_id": "swe-bench__humaneval-0", "problem_statement": "I have a function that needs implementing, can you help?", "created_at": "2023110716", "version": "1.0", "test_patch": "diff --git a/test.py b/test.py\nnew file mode 100644\nindex 0000000..2d57340\n--- /dev/null\n+++ b/test.py\n@@ -0,0 +1,19 @@\n+from main import has_close_elements\n+\n+\n+METADATA = {\n+ 'author': 'jt',\n+ 'dataset': 'test'\n+}\n+\n+\n+def check(candidate):\n+ assert candidate([1.0, 2.0, 3.9, 4.0, 5.0, 2.2], 0.3) == True\n+ assert candidate([1.0, 2.0, 3.9, 4.0, 5.0, 2.2], 0.05) == False\n+ assert candidate([1.0, 2.0, 5.9, 4.0, 5.0], 0.95) == True\n+ assert candidate([1.0, 2.0, 5.9, 4.0, 5.0], 0.8) == False\n+ assert candidate([1.0, 2.0, 3.0, 4.0, 5.0, 2.0], 0.1) == True\n+ assert candidate([1.1, 2.2, 3.1, 4.1, 5.1], 1.0) == True\n+ assert candidate([1.1, 2.2, 3.1, 4.1, 5.1], 0.5) == False\n+\n+check(has_close_elements)\n", "base_commit": "afba737", "base_commit_with_tests": "c7e41b2", "environment_setup_commit": null, "hints_text": null, "repo": "swe-bench/humaneval", "FAIL_TO_PASS": "", "PASS_TO_PASS": ""}] 2 | -------------------------------------------------------------------------------- /SWE-agent/sweagent/agent/history_processors.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | from abc import abstractmethod 4 | from dataclasses import dataclass 5 | 6 | 7 | class FormatError(Exception): 8 | pass 9 | 10 | # ABSTRACT BASE CLASSES 11 | 12 | class HistoryProcessorMeta(type): 13 | _registry = {} 14 | 15 | def __new__(cls, name, bases, attrs): 16 | new_cls = super().__new__(cls, name, bases, attrs) 17 | if name != "HistoryProcessor": 18 | cls._registry[name] = new_cls 19 | return new_cls 20 | 21 | 22 | @dataclass 23 | class HistoryProcessor(metaclass=HistoryProcessorMeta): 24 | def __init__(self, *args, **kwargs): 25 | pass 26 | 27 | @abstractmethod 28 | def __call__(self, history: list[str]) -> list[str]: 29 | raise NotImplementedError 30 | 31 | @classmethod 32 | def get(cls, name, *args, **kwargs): 33 | try: 34 | return cls._registry[name](*args, **kwargs) 35 | except KeyError: 36 | raise ValueError(f"Model output parser ({name}) not found.") 37 | 38 | 39 | # DEFINE NEW PARSING FUNCTIONS BELOW THIS LINE 40 | class DefaultHistoryProcessor(HistoryProcessor): 41 | def __call__(self, history): 42 | return history 43 | 44 | 45 | def last_n_history(history, n): 46 | if n <= 0: 47 | raise ValueError('n must be a positive integer') 48 | new_history = list() 49 | user_messages = len([entry for entry in history if (entry['role'] == 'user' and not entry.get('is_demo', False))]) 50 | user_msg_idx = 0 51 | for entry in history: 52 | data = entry.copy() 53 | if data['role'] != 'user': 54 | new_history.append(entry) 55 | continue 56 | if data.get('is_demo', False): 57 | new_history.append(entry) 58 | continue 59 | else: 60 | user_msg_idx += 1 61 | if user_msg_idx == 1 or user_msg_idx in range(user_messages - n + 1, user_messages + 1): 62 | new_history.append(entry) 63 | else: 64 | data['content'] = f'Old output omitted ({len(entry["content"].splitlines())} lines)' 65 | new_history.append(data) 66 | return new_history 67 | 68 | 69 | class LastNObservations(HistoryProcessor): 70 | def __init__(self, n): 71 | self.n = n 72 | 73 | def __call__(self, history): 74 | return last_n_history(history, self.n) 75 | 76 | 77 | class Last2Observations(HistoryProcessor): 78 | def __call__(self, history): 79 | return last_n_history(history, 2) 80 | 81 | 82 | class Last5Observations(HistoryProcessor): 83 | def __call__(self, history): 84 | return last_n_history(history, 5) 85 | 86 | 87 | class ClosedWindowHistoryProcessor(HistoryProcessor): 88 | pattern = re.compile(r'^(\d+)\:.*?(\n|$)', re.MULTILINE) 89 | file_pattern = re.compile(r'\[File:\s+(.*)\s+\(\d+\s+lines\ total\)\]') 90 | 91 | def __call__(self, history): 92 | new_history = list() 93 | # For each value in history, keep track of which windows have been shown. 94 | # We want to mark windows that should stay open (they're the last window for a particular file) 95 | # Then we'll replace all other windows with a simple summary of the window (i.e. number of lines) 96 | windows = set() 97 | for entry in reversed(history): 98 | data = entry.copy() 99 | if data['role'] != 'user': 100 | new_history.append(entry) 101 | continue 102 | if data.get('is_demo', False): 103 | new_history.append(entry) 104 | continue 105 | matches = list(self.pattern.finditer(entry['content'])) 106 | if len(matches) >= 1: 107 | file_match = self.file_pattern.search(entry['content']) 108 | if file_match: 109 | file = file_match.group(1) 110 | else: 111 | continue 112 | if file in windows: 113 | start = matches[0].start() 114 | end = matches[-1].end() 115 | data['content'] = ( 116 | entry['content'][:start] +\ 117 | f'Outdated window with {len(matches)} lines omitted...\n' +\ 118 | entry['content'][end:] 119 | ) 120 | windows.add(file) 121 | new_history.append(data) 122 | history = list(reversed(new_history)) 123 | return history 124 | -------------------------------------------------------------------------------- /SWE-agent/run_replay.py: -------------------------------------------------------------------------------- 1 | """Replay a trajectory""" 2 | 3 | import json 4 | import os 5 | import yaml 6 | 7 | from argparse import ArgumentParser 8 | from typing import Any, Dict, List 9 | import run as runscript 10 | 11 | 12 | def process_single_traj(traj_path: str, config_file: str, data_path: str, suffix: str, *, forward_args: List[str]): 13 | """ 14 | 15 | Args: 16 | traj_path (str): _description_ 17 | config_file (str): _description_ 18 | data_path (str): _description_ 19 | suffix (str): _description_ 20 | forward_args (List[str]): Passed to run.py 21 | 22 | Raises: 23 | ValueError: Incorrect paths or other config issue 24 | 25 | Returns: 26 | None 27 | """ 28 | replay_action_trajs_path = "temp_replay.jsonl" 29 | 30 | # Open trajectory file, extract responses as actions 31 | if traj_path.endswith(".yaml"): 32 | traj_data = dict() 33 | with open(traj_path, "r") as f: 34 | traj_data["history"] = yaml.safe_load(f) 35 | else: 36 | traj_data = json.load(open(traj_path, "r")) 37 | actions = [x["content"] for x in traj_data["history"] if x["role"] == "assistant"] 38 | instance_id = traj_path.split("/")[-1].split(".")[0] 39 | with open(replay_action_trajs_path, "w") as f: 40 | print( 41 | json.dumps({instance_id: actions}), 42 | file=f, 43 | end="\n", 44 | flush=True 45 | ) 46 | 47 | # Get data_path from args.yaml 48 | if data_path is None: 49 | args_path = os.path.join( 50 | os.path.dirname(traj_path), 51 | "args.yaml" 52 | ) 53 | args = yaml.safe_load(open(args_path)) 54 | data_path = args['environment']['data_path'] 55 | 56 | # Identify the relevant task instance and create it 57 | def create_task_instances_tmp_file(data: List[Dict[str, Any]]) -> str: 58 | """Helper function to create a temporary file to write task instances to. 59 | Returns path to the temporary file. 60 | """ 61 | data = [d for d in data if d["instance_id"] == instance_id] 62 | tmp_path = instance_id + ".jsonl" 63 | with open(tmp_path, "w") as f: 64 | for d in data: 65 | print(json.dumps(d), file=f, end="\n", flush=True) 66 | return tmp_path 67 | 68 | is_other = False 69 | if data_path.endswith(".jsonl"): 70 | replay_task_instances_path = create_task_instances_tmp_file([json.loads(x) for x in open(data_path, "r").readlines()]) 71 | elif data_path.endswith(".json"): 72 | replay_task_instances_path = create_task_instances_tmp_file(json.load(open(data_path))) 73 | else: 74 | # Assume data_path is a github url or local url 75 | is_other = True 76 | replay_task_instances_path = data_path 77 | 78 | # Call run.py via subprocess 79 | run_args = [ 80 | "--config_file", config_file, 81 | "--data_path", replay_task_instances_path, 82 | "--install_environment", "True", 83 | "--model_name", "replay", 84 | "--replay_path", replay_action_trajs_path, 85 | *forward_args, 86 | ] 87 | if is_other: 88 | # Not sure if this only applies to github urls for data_path 89 | run_args.extend(["--skip_existing", "False"]) 90 | if suffix is not None: 91 | run_args.extend(["--suffix", suffix]) 92 | script_args = runscript.get_args(run_args) 93 | runscript.main(script_args) 94 | 95 | os.remove(replay_action_trajs_path) 96 | if not is_other: 97 | os.remove(replay_task_instances_path) 98 | 99 | def main( 100 | traj_path: str, 101 | config_file: str, 102 | data_path: str, 103 | suffix: str, 104 | *, 105 | forward_args: List[str], 106 | ): 107 | process_single_traj(traj_path, config_file, data_path, suffix, forward_args=forward_args) 108 | 109 | 110 | def get_args(args=None): 111 | parser = ArgumentParser(description=__doc__) 112 | parser.add_argument("--traj_path", help="Path to trajectory to replay", default=None) 113 | parser.add_argument("--config_file", help="Path to template", required=True) 114 | parser.add_argument("--data_path", help="(Optional) Path to data file containing task instances ref'ed by replay trajectories", default=None) 115 | parser.add_argument("--suffix", help="(Optional) Suffix argument appended to end of traj path", default=None) 116 | args, remaining_args = parser.parse_known_args(args=args) 117 | return args, remaining_args 118 | 119 | 120 | if __name__ == "__main__": 121 | args, remaining_args = get_args() 122 | main(**vars(args), forward_args=remaining_args) 123 | -------------------------------------------------------------------------------- /SWE-agent/config/commands/cursors_edit_linting.sh: -------------------------------------------------------------------------------- 1 | # @yaml 2 | # signature: |- 3 | # edit 4 | # 5 | # end_of_edit 6 | # docstring: replaces *all* of the text between the START CURSOR and the END CURSOR with the replacement_text. The replacement text is terminated by a line with only end_of_edit on it. All of the will be entered, so make sure your indentation is formatted properly. To enter text at the beginning of the file, set START CURSOR and END CURSOR to 0. Use set_cursors to move the cursors around. Python files will be checked for syntax errors after the edit. 7 | # end_name: end_of_edit 8 | # arguments: 9 | # replacement_text: 10 | # type: string 11 | # description: the text to replace the current selection with 12 | # required: true 13 | edit() { 14 | if [ -z "$CURRENT_FILE" ] 15 | then 16 | echo 'No file open. Use the `open` command first.' 17 | return 18 | fi 19 | local start_line=$((START_CURSOR - 1)) 20 | start_line=$((start_line < 0 ? 0 : start_line)) 21 | local end_line=$((END_CURSOR)) 22 | end_line=$((end_line < 0 ? 0 : end_line)) 23 | 24 | local replacement=() 25 | while IFS= read -r line 26 | do 27 | replacement+=("$line") 28 | done 29 | 30 | local num_lines=${#replacement[@]} 31 | # Create a backup of the current file 32 | cp "$CURRENT_FILE" "/root/$(basename "$CURRENT_FILE")_backup" 33 | # Read the file line by line into an array 34 | mapfile -t lines < "$CURRENT_FILE" 35 | local new_lines=("${lines[@]:0:$start_line}" "${replacement[@]}" "${lines[@]:$((end_line))}") 36 | # Write the new stuff directly back into the original file 37 | printf "%s\n" "${new_lines[@]}" >| "$CURRENT_FILE" 38 | # Run linter 39 | if [[ $CURRENT_FILE == *.py ]]; then 40 | lint_output=$(flake8 --isolated --select=F821,F822,F831,E111,E112,E113,E999,E902 "$CURRENT_FILE" 2>&1) 41 | else 42 | # do nothing 43 | lint_output="" 44 | fi 45 | # if there is no output, then the file is good 46 | if [ -z "$lint_output" ]; then 47 | _constrain_line 48 | # set to START + num_lines - 1, unless num_lines is 0, then set to START 49 | export END_CURSOR=$((num_lines == 0 ? START_CURSOR : START_CURSOR + num_lines - 1)) 50 | export START_CURSOR=$START_CURSOR 51 | _print 52 | echo "File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary." 53 | else 54 | echo "Your proposed edit has introduced new syntax error(s). Please read this error message carefully and then retry editing the file." 55 | echo "" 56 | echo "ERRORS:" 57 | _split_string "$lint_output" 58 | echo "" 59 | 60 | # Save original values 61 | original_current_line=$CURRENT_LINE 62 | original_window=$WINDOW 63 | original_end_cursor=$END_CURSOR 64 | 65 | # Update values 66 | export CURRENT_LINE=$(( (num_lines / 2) + start_line )) # Set to "center" of edit 67 | export WINDOW=$((num_lines + 10)) # Show +/- 5 lines around edit 68 | export END_CURSOR=$((num_lines == 0 ? START_CURSOR : START_CURSOR + num_lines - 1)) 69 | 70 | echo "This is how your edit would have looked if applied" 71 | echo "-------------------------------------------------" 72 | _constrain_line 73 | _print 74 | echo "-------------------------------------------------" 75 | echo "" 76 | 77 | # Restoring CURRENT_FILE to original contents. 78 | cp "/root/$(basename "$CURRENT_FILE")_backup" "$CURRENT_FILE" 79 | 80 | export CURRENT_LINE=$(( ((end_line - start_line) / 2) + start_line )) # Set to "center" of edit 81 | export WINDOW=$((end_line - start_line + 10)) 82 | export END_CURSOR=$original_end_cursor 83 | 84 | echo "This is the original code before your edit" 85 | echo "-------------------------------------------------" 86 | _constrain_line 87 | _print 88 | echo "-------------------------------------------------" 89 | 90 | # Restore original values 91 | export CURRENT_LINE=$original_current_line 92 | export WINDOW=$original_window 93 | export END_CURSOR=$original_end_cursor 94 | 95 | echo "Your changes have NOT been applied. Please fix your edit command and try again." 96 | echo "You either need to 1) Specify the correct start/end line arguments or 2) Correct your edit code." 97 | echo "DO NOT re-run the same failed edit command. Running it again will lead to the same error." 98 | fi 99 | # Remove backup file 100 | rm -f "/root/$(basename "$CURRENT_FILE")_backup" 101 | } 102 | -------------------------------------------------------------------------------- /SWE-agent/config/README.md: -------------------------------------------------------------------------------- 1 | # Configuration 2 | 3 | This folder contains details describing how to write your own configurations to control how agents can interact with the `SWEEnv` environment. 4 | A configuration is represented as a single `.yaml` file, allowing you to... 5 | * Define the **commands** that agents may use to traverse + modify a codebase. 6 | * Write **prompts** that are deterministically/conditionally shown to the agent over the course of a single trajectory. 7 | * Control the **input/output interface** that sits between the agent and `SWEEnv`. 8 | 9 | ## Configuration File Fields 10 | The configuration is a `.yaml` file that consists of several fields. They are fully represented in this following outline: 11 | 12 | ```yaml 13 | # Prompt Templates: Control how observations of environment are shown to agent 14 | system_template: | # .yaml syntax for multi-line string value 15 | First `system` message shown to agent 16 | instance_template: |- # .yaml syntax for multi-line string value w/ no new line 17 | Instance prompt, contains task instance-specific content 18 | next_step_template: |- 19 | Format template of per-turn observation (Contains standard output from agent's action) 20 | next_step_no_output_template: |- 21 | Format template of observation when there is no standard output from the agent's action 22 | format_error_template: |- 23 | Format template of error message (Used when agent's action causes an error) 24 | demonstration_template: | 25 | Format template for showing a demonstration to the agent 26 | demonstrations: 27 | - `trajectories///*.traj` 28 | - File is a demonstration of how to solve a task. This could an agent generated trajectory. 29 | - You can include 1+ demonstrations 30 | 31 | # Environment States: Define features of the SWEEnv environment 32 | env_variables: 33 | # Default variables for SWEEnv at the beginning of each instance 34 | CURRENT_FILE: 0 35 | CURRENT_LINE: 36 | OVERLAP: 37 | SEARCH_FILES: 38 | SEARCH_INDEX: 39 | SEARCH_RESULTS: 40 | WINDOW_SIZE: 41 | START_INDEX: 42 | END_INDEX: 43 | START_CURSOR: 44 | END_CUROSR: 45 | START_CURSORS_MARK: 46 | END_CURSOR_MARK: 47 | state_command: | 48 | # `state_command` allows you to update state variables to reflect any aspect of the environment (e.g. current working directory) 49 | name: state 50 | code: | 51 | state() { echo '{"pwd": "'$PWD'"}'; 52 | 53 | # Action Interface: Define how an agent interacts with the SWEEnv environment 54 | command_files: 55 | - path/to/bash_file.sh 56 | - Each file contains a list of commands implemented in bash 57 | - You can include 1+ command files 58 | parse_command: Reference to functionality for defining command documentation 59 | history_processor: Reference to functionality for controlling agent's message history 60 | parse_function: Parser run on agent output 61 | ``` 62 | 63 | In this directory, we recommend looking at... 64 | * `configs/` for examples of properly formatted configuration files. Each configuration differs in its set of commands, input/output format, demonstrations, etc. 65 | * `commands/` for the bash implementations of the custom commands that SWE-agent uses to navigate + edit the codebase. 66 | 67 | ## How a Configuration File is Processed 68 | Some notes on processing that occurs on config fields when SWE-agent is run: 69 | * Commands specified in `command_files` will be parsed into a single block of documentation text that can be referenced as `{command_docs}`. 70 | * `env_variables` are the default variables for the bash environment at the beginning of each instance. 71 | * `state_command` is used to extract state information from the bash environment (formatted as json) to be used in the templates given to the agent. 72 | 73 | Possible variables that can be used in templates are: 74 | - `{command_docs}` (an automatically compiled collection of available commands + their docstrings) 75 | - any variable given in `env_variables` (same spelling), e.g., `{WINDOW_SIZE}` 76 | - any variable extracted as json as part of the `state_command` function 77 | - the last observation `{observation}` 78 | - ... this list will grow as we implement more features! 79 | 80 | ## Template Workflow 81 | The following diagram illustrates where each template is shown within a single episode of solving one task instance. 82 | 83 |

84 | Template Workflow 85 |

86 | 87 | One of three templates can be shown per turn: 88 | * "Next Step" (`next_step_template`): Displayed if the model's action successfully runs. The output and a prompt for the next action is shown 89 | * "Next Step (No Output)" (`next_step_no_output_template`): Displayed if the model's action successfully runs, but does not produce any standard output (e.g. `rm`, `cd`) 90 | * "Format Error" (`format_error_template`): Displayed if the model's response is malformed. Over the next two turns... 91 | * If one of the model's next response is correct, the message history is updated such that the "Format Error" turn is not kept. The episode continues. 92 | * If the model's next two responses are both malformed, the episode terminates. 93 | -------------------------------------------------------------------------------- /SWE-agent/tests/test_replay.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import subprocess 3 | from run_replay import get_args, main 4 | import pytest 5 | 6 | 7 | @pytest.fixture 8 | def swe_agent_test_repo_clone(tmp_path): 9 | local_repo_path = tmp_path / "swe-agent-test-repo" 10 | clone_cmd = ["git", "clone", "https://github.com/klieret/swe-agent-test-repo", local_repo_path] 11 | subprocess.run(clone_cmd, check=True) 12 | return local_repo_path 13 | 14 | 15 | @pytest.fixture 16 | def swe_agent_test_repo_traj(test_trajectories_path) -> Path: 17 | p = test_trajectories_path / "gpt4__swe-agent-test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1" / "6e44b9__sweagenttestrepo-1c2844.traj" 18 | assert p.is_file() 19 | return p 20 | 21 | 22 | @pytest.fixture 23 | def swe_agent_test_repo_local_problem_stmt(swe_agent_test_repo_clone) -> Path: 24 | problem_stmt = swe_agent_test_repo_clone / "problem_statements" / "1.md" 25 | assert problem_stmt.is_file() 26 | return problem_stmt 27 | 28 | 29 | @pytest.mark.slow 30 | @pytest.mark.parametrize("problem_statement_source", ["github", "local"]) 31 | def test_model_replay_github_repo(swe_agent_test_repo_traj, problem_statement_source, swe_agent_test_repo_local_problem_stmt): 32 | if problem_statement_source == "github": 33 | data_path = "https://github.com/klieret/swe-agent-test-repo/issues/1" 34 | elif problem_statement_source == "local": 35 | data_path = str(swe_agent_test_repo_local_problem_stmt) 36 | args = [ 37 | "--traj_path", 38 | str(swe_agent_test_repo_traj), 39 | "--data_path", 40 | data_path, 41 | "--config_file", 42 | "config/default_from_url.yaml", 43 | "--raise_exceptions", 44 | ] 45 | if problem_statement_source == "local": 46 | args.extend(["--repo_path", str("https://github.com/klieret/swe-agent-test-repo/")]) 47 | args, remaining_args = get_args(args) 48 | main(**vars(args), forward_args=remaining_args) 49 | 50 | 51 | @pytest.mark.slow 52 | def test_model_replay_from_json(test_trajectories_path, test_data_sources_path): 53 | traj_path = test_trajectories_path / "gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1" / "pydicom__pydicom-1458.traj" 54 | assert traj_path.is_file() 55 | data_path = test_data_sources_path / "swe-bench-dev-easy_first_only.json" 56 | assert data_path.is_file() 57 | args = [ 58 | "--traj_path", 59 | str(traj_path), 60 | "--data_path", 61 | str(data_path), 62 | "--config_file", 63 | "config/default.yaml", 64 | "--raise_exceptions", 65 | ] 66 | args, remaining_args = get_args(args) 67 | main(**vars(args), forward_args=remaining_args) 68 | 69 | 70 | 71 | 72 | def test_run_cli_help(): 73 | args = [ 74 | "python", 75 | "run_replay.py", 76 | "--help", 77 | ] 78 | subprocess.run(args, check=True) 79 | 80 | 81 | @pytest.mark.slow 82 | @pytest.mark.parametrize("problem_statement_source", ["github", "local"]) 83 | def test_model_replay_local_repo(swe_agent_test_repo_clone, swe_agent_test_repo_traj, problem_statement_source): 84 | local_repo_path = swe_agent_test_repo_clone 85 | if problem_statement_source == "github": 86 | problem_statement_path = "https://github.com/klieret/swe-agent-test-repo/issues/1" 87 | elif problem_statement_source == "local": 88 | problem_statement_path = local_repo_path / "problem_statements" / "1.md" 89 | assert problem_statement_path.is_file() 90 | else: 91 | raise ValueError(problem_statement_source) 92 | run_cmd = [ 93 | "--traj_path", 94 | str(swe_agent_test_repo_traj), 95 | "--repo_path", 96 | str(local_repo_path), 97 | "--config_file", 98 | "config/default_from_url.yaml", 99 | "--data_path", 100 | str(problem_statement_path), 101 | "--apply_patch", 102 | "--raise_exceptions", 103 | ] 104 | print(run_cmd) 105 | args, remaining_args = get_args(run_cmd) 106 | main(**vars(args), forward_args=remaining_args) 107 | solution = (swe_agent_test_repo_traj.parent / "solution_missing_colon.py").read_text().strip() 108 | solution_retrieved = (local_repo_path / "tests" / "missing_colon.py").read_text().strip() 109 | assert solution == solution_retrieved 110 | 111 | 112 | def test_exception_replay_local_dirty(swe_agent_test_repo_clone, swe_agent_test_repo_traj): 113 | """Test that swe-agent refuses to work if the local repo is dirty""" 114 | problem_statement_path = swe_agent_test_repo_clone / "problem_statements" / "1.md" 115 | test_file = swe_agent_test_repo_clone / "tests" / "missing_colon.py" 116 | assert test_file.is_file() 117 | test_file.write_text(test_file.read_text().replace("division", "division_function")) 118 | run_cmd = [ 119 | "--traj_path", 120 | str(swe_agent_test_repo_traj), 121 | "--repo_path", 122 | str(swe_agent_test_repo_clone), 123 | "--config_file", 124 | "config/default_from_url.yaml", 125 | "--data_path", 126 | str(problem_statement_path), 127 | "--apply_patch", 128 | "--raise_exceptions", 129 | ] 130 | args, remaining_args = get_args(run_cmd) 131 | with pytest.raises(ValueError, match=".*dirty.*"): 132 | main(**vars(args), forward_args=remaining_args) -------------------------------------------------------------------------------- /SWE-agent/config/commands/edit_linting.sh: -------------------------------------------------------------------------------- 1 | # @yaml 2 | # signature: |- 3 | # edit : 4 | # 5 | # end_of_edit 6 | # docstring: replaces lines through (inclusive) with the given text in the open file. The replacement text is terminated by a line with only end_of_edit on it. All of the will be entered, so make sure your indentation is formatted properly. Python files will be checked for syntax errors after the edit. If the system detects a syntax error, the edit will not be executed. Simply try to edit the file again, but make sure to read the error message and modify the edit command you issue accordingly. Issuing the same command a second time will just lead to the same error message again. 7 | # end_name: end_of_edit 8 | # arguments: 9 | # start_line: 10 | # type: integer 11 | # description: the line number to start the edit at 12 | # required: true 13 | # end_line: 14 | # type: integer 15 | # description: the line number to end the edit at (inclusive) 16 | # required: true 17 | # replacement_text: 18 | # type: string 19 | # description: the text to replace the current selection with 20 | # required: true 21 | edit() { 22 | if [ -z "$CURRENT_FILE" ] 23 | then 24 | echo 'No file open. Use the `open` command first.' 25 | return 26 | fi 27 | 28 | local start_line="$(echo $1: | cut -d: -f1)" 29 | local end_line="$(echo $1: | cut -d: -f2)" 30 | 31 | if [ -z "$start_line" ] || [ -z "$end_line" ] 32 | then 33 | echo "Usage: edit :" 34 | return 35 | fi 36 | 37 | local re='^[0-9]+$' 38 | if ! [[ $start_line =~ $re ]]; then 39 | echo "Usage: edit :" 40 | echo "Error: start_line must be a number" 41 | return 42 | fi 43 | if ! [[ $end_line =~ $re ]]; then 44 | echo "Usage: edit :" 45 | echo "Error: end_line must be a number" 46 | return 47 | fi 48 | 49 | # Bash array starts at 0, so let's adjust 50 | local start_line=$((start_line - 1)) 51 | local end_line=$((end_line)) 52 | 53 | local line_count=0 54 | local replacement=() 55 | while IFS= read -r line 56 | do 57 | replacement+=("$line") 58 | ((line_count++)) 59 | done 60 | 61 | # Create a backup of the current file 62 | cp "$CURRENT_FILE" "/root/$(basename "$CURRENT_FILE")_backup" 63 | 64 | # Read the file line by line into an array 65 | mapfile -t lines < "$CURRENT_FILE" 66 | local new_lines=("${lines[@]:0:$start_line}" "${replacement[@]}" "${lines[@]:$((end_line))}") 67 | # Write the new stuff directly back into the original file 68 | printf "%s\n" "${new_lines[@]}" >| "$CURRENT_FILE" 69 | 70 | # Run linter 71 | if [[ $CURRENT_FILE == *.py ]]; then 72 | lint_output=$(flake8 --isolated --select=F821,F822,F831,E111,E112,E113,E999,E902 "$CURRENT_FILE" 2>&1) 73 | else 74 | # do nothing 75 | lint_output="" 76 | fi 77 | 78 | # if there is no output, then the file is good 79 | if [ -z "$lint_output" ]; then 80 | export CURRENT_LINE=$start_line 81 | _constrain_line 82 | _print 83 | 84 | echo "File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary." 85 | else 86 | echo "Your proposed edit has introduced new syntax error(s). Please read this error message carefully and then retry editing the file." 87 | echo "" 88 | echo "ERRORS:" 89 | _split_string "$lint_output" 90 | echo "" 91 | 92 | # Save original values 93 | original_current_line=$CURRENT_LINE 94 | original_window=$WINDOW 95 | 96 | # Update values 97 | export CURRENT_LINE=$(( (line_count / 2) + start_line )) # Set to "center" of edit 98 | export WINDOW=$((line_count + 10)) # Show +/- 5 lines around edit 99 | 100 | echo "This is how your edit would have looked if applied" 101 | echo "-------------------------------------------------" 102 | _constrain_line 103 | _print 104 | echo "-------------------------------------------------" 105 | echo "" 106 | 107 | # Restoring CURRENT_FILE to original contents. 108 | cp "/root/$(basename "$CURRENT_FILE")_backup" "$CURRENT_FILE" 109 | 110 | export CURRENT_LINE=$(( ((end_line - start_line + 1) / 2) + start_line )) 111 | export WINDOW=$((end_line - start_line + 10)) 112 | 113 | echo "This is the original code before your edit" 114 | echo "-------------------------------------------------" 115 | _constrain_line 116 | _print 117 | echo "-------------------------------------------------" 118 | 119 | # Restore original values 120 | export CURRENT_LINE=$original_current_line 121 | export WINDOW=$original_window 122 | 123 | echo "Your changes have NOT been applied. Please fix your edit command and try again." 124 | echo "You either need to 1) Specify the correct start/end line arguments or 2) Correct your edit code." 125 | echo "DO NOT re-run the same failed edit command. Running it again will lead to the same error." 126 | fi 127 | 128 | # Remove backup file 129 | rm -f "/root/$(basename "$CURRENT_FILE")_backup" 130 | } 131 | -------------------------------------------------------------------------------- /SWE-agent/tests/test_run.py: -------------------------------------------------------------------------------- 1 | import json 2 | from pathlib import Path 3 | import subprocess 4 | from typing import Any, Dict 5 | import pytest 6 | 7 | from run import ActionsArguments, Main, MainHook, OpenPRHook, ScriptArguments 8 | from sweagent.agent.agents import Agent, AgentArguments, AgentHook 9 | from sweagent.agent.models import ModelArguments 10 | from sweagent.environment.swe_env import EnvironmentArguments, SWEEnv 11 | 12 | def test_run_cli_help(): 13 | args = [ 14 | "python", 15 | "run.py", 16 | "--help", 17 | ] 18 | subprocess.run(args, check=True) 19 | 20 | 21 | 22 | @pytest.fixture 23 | def open_pr_hook_init_for_sop(): 24 | hook = OpenPRHook() 25 | hook._token = "" 26 | hook._env = None 27 | hook._data_path = "https://github.com/klieret/swe-agent-test-repo/issues/1" 28 | hook._open_pr = True 29 | hook._skip_if_commits_reference_issue = True 30 | return hook 31 | 32 | 33 | @pytest.fixture 34 | def info_dict(): 35 | return { 36 | "submission": "asdf", 37 | "exit_status": "submitted", 38 | } 39 | 40 | 41 | def test_should_open_pr_fail_submission(open_pr_hook_init_for_sop, info_dict): 42 | hook = open_pr_hook_init_for_sop 43 | info_dict["submission"] = None 44 | assert not hook.should_open_pr(info_dict) 45 | 46 | 47 | def test_should_open_pr_fail_exit(open_pr_hook_init_for_sop, info_dict): 48 | hook = open_pr_hook_init_for_sop 49 | info_dict["exit_status"] = "fail" 50 | assert not hook.should_open_pr(info_dict) 51 | 52 | 53 | def test_should_open_pr_fail_invalid_url(open_pr_hook_init_for_sop, info_dict): 54 | hook = open_pr_hook_init_for_sop 55 | hook._data_path = "asdf" 56 | assert not hook.should_open_pr(info_dict) 57 | 58 | 59 | def test_should_open_pr_fail_closed(open_pr_hook_init_for_sop, info_dict): 60 | hook = open_pr_hook_init_for_sop 61 | hook._data_path = "https://github.com/klieret/swe-agent-test-repo/issues/16" 62 | assert not hook.should_open_pr(info_dict) 63 | 64 | 65 | def test_should_open_pr_fail_assigned(open_pr_hook_init_for_sop, info_dict): 66 | hook = open_pr_hook_init_for_sop 67 | hook._data_path = "https://github.com/klieret/swe-agent-test-repo/issues/17" 68 | assert not hook.should_open_pr(info_dict) 69 | 70 | 71 | def test_should_open_pr_fail_locked(open_pr_hook_init_for_sop, info_dict): 72 | hook = open_pr_hook_init_for_sop 73 | hook._data_path = "https://github.com/klieret/swe-agent-test-repo/issues/18" 74 | assert not hook.should_open_pr(info_dict) 75 | 76 | def test_should_open_pr_fail_has_pr(open_pr_hook_init_for_sop, info_dict): 77 | hook = open_pr_hook_init_for_sop 78 | hook._data_path = "https://github.com/klieret/swe-agent-test-repo/issues/19" 79 | assert not hook.should_open_pr(info_dict) 80 | 81 | 82 | def test_should_open_pr_success_has_pr_override(open_pr_hook_init_for_sop, info_dict): 83 | hook = open_pr_hook_init_for_sop 84 | hook._data_path = "https://github.com/klieret/swe-agent-test-repo/issues/19" 85 | hook._skip_if_commits_reference_issue = False 86 | assert hook.should_open_pr(info_dict) 87 | 88 | 89 | class RaisesExceptionHook(MainHook): 90 | def on_instance_start(self, *, index: int, instance: Dict[str, Any]): 91 | raise ValueError("test exception") 92 | 93 | @pytest.fixture 94 | def test_script_args(): 95 | return ScriptArguments( 96 | suffix="", 97 | environment=EnvironmentArguments( 98 | image_name="sweagent/swe-agent:latest", 99 | data_path="https://github.com/klieret/swe-agent-test-repo/issues/1", 100 | split="dev", 101 | verbose=True, 102 | install_environment=True, 103 | ), 104 | skip_existing=True, 105 | agent=AgentArguments( 106 | model=ModelArguments( 107 | model_name="instant_empty_submit", 108 | total_cost_limit=0.0, 109 | per_instance_cost_limit=3.0, 110 | temperature=0.0, 111 | top_p=0.95, 112 | ), 113 | config_file=Path("config/default.yaml"), 114 | ), 115 | actions=ActionsArguments(open_pr=False, skip_if_commits_reference_issue=True), 116 | raise_exceptions=True, 117 | ) 118 | 119 | 120 | def test_exception_raised(test_script_args): 121 | assert test_script_args.raise_exceptions 122 | main = Main(test_script_args) 123 | main.add_hook(RaisesExceptionHook()) 124 | with pytest.raises(ValueError, match="test exception"): 125 | main.main() 126 | 127 | 128 | class CreateFakeLogFile(MainHook): 129 | """Testing the skip functionality""" 130 | def on_init(self, *, args: ScriptArguments, agent: Agent, env: SWEEnv, traj_dir: Path): 131 | self._traj_dir = traj_dir 132 | (traj_dir / "args.yaml").write_text("asdf") 133 | 134 | def on_instance_start(self, *, index: int, instance: Dict[str, Any]): 135 | instance_id = instance["instance_id"] 136 | dct = { 137 | "info": {"exit_status": "submitted"}, 138 | } 139 | (self._traj_dir / f"{instance_id}.traj").write_text(json.dumps(dct)) 140 | 141 | 142 | 143 | def test_existing_corrupted_args(test_script_args): 144 | main = Main(test_script_args) 145 | main.add_hook(CreateFakeLogFile()) 146 | main.main() 147 | 148 | 149 | 150 | def test_main_hook(test_script_args): 151 | main = Main(test_script_args) 152 | main.add_hook(MainHook()) 153 | main.main() 154 | 155 | 156 | def test_agent_with_hook(test_script_args): 157 | main = Main(test_script_args) 158 | main.agent.add_hook(AgentHook()) 159 | main.main() -------------------------------------------------------------------------------- /SWE-agent/inspector/fileViewer.js: -------------------------------------------------------------------------------- 1 | let currentFileName = null; // Store the current file name 2 | let trajectoryDirectory = ""; // Global variable to store the directory 3 | let timeoutIds = []; // Store timeout IDs for pending operations 4 | 5 | function getBaseUrl() { 6 | const protocol = window.location.protocol; 7 | const host = window.location.hostname; 8 | const port = window.location.port; 9 | 10 | // Use the default port if the port number is empty (for standard HTTP/HTTPS) 11 | const defaultPort = 12 | protocol === "http:" && !port 13 | ? "80" 14 | : protocol === "https:" && !port 15 | ? "443" 16 | : port; 17 | 18 | return `${protocol}//${host}:${defaultPort}`; 19 | } 20 | 21 | function fetchFiles() { 22 | const baseUrl = getBaseUrl(); 23 | fetch(`${baseUrl}/files`) 24 | .then((response) => response.json()) 25 | .then((files) => { 26 | const fileList = document.getElementById("fileList"); 27 | fileList.innerHTML = ""; 28 | files.forEach((file) => { 29 | const fileElement = document.createElement("li"); 30 | fileElement.textContent = file; 31 | fileElement.onclick = () => viewFile(file.split(" ")[0]); 32 | fileList.appendChild(fileElement); 33 | }); 34 | }); 35 | } 36 | 37 | function viewFile(fileName) { 38 | // Clear any pending message loading from previous files 39 | timeoutIds.forEach((timeoutId) => clearTimeout(timeoutId)); 40 | timeoutIds = []; // Reset the list of timeout IDs 41 | 42 | const baseUrl = getBaseUrl(); 43 | fetch(`${baseUrl}/trajectory/${fileName}`) 44 | .then((response) => { 45 | if (!response.ok) { 46 | throw new Error("Network response was not ok"); 47 | } 48 | return response.json(); 49 | }) 50 | .then((content) => { 51 | const container = document.getElementById("fileContent"); 52 | container.innerHTML = ""; // Clear existing content 53 | 54 | if (content.history && Array.isArray(content.history)) { 55 | let delay = 200; // Initial delay 56 | const delayIncrement = 50; // Delay between each message, in milliseconds 57 | 58 | content.history.forEach((item, index) => { 59 | const timeoutId = setTimeout(() => { 60 | const contentText = item.content 61 | ? item.content.replace(//g, ">") 62 | : ""; 63 | let roleClass = 64 | item.agent && item.agent !== "primary" 65 | ? "subroutine" 66 | : item.role 67 | ? item.role.toLowerCase().replaceAll(" ", "-") 68 | : "default"; 69 | const elementId = "historyItem" + index; 70 | const historyItem = document.createElement("div"); 71 | historyItem.className = `history-item ${roleClass} fade-in`; 72 | historyItem.id = elementId; 73 | if (contentText.includes("--- DEMONSTRATION ---")) { 74 | item.role = "demo"; 75 | } else if ("is_demo" in item && item.is_demo === true) { 76 | item.role += "[demo]"; 77 | } 78 | historyItem.innerHTML = ` 79 |
80 | 81 | ${item.role} 82 | 83 |
84 |
85 |
${contentText}
86 |
87 |
88 | `; 89 | container.appendChild(historyItem); 90 | }, delay); 91 | 92 | delay += delayIncrement; // Increment delay for the next message 93 | timeoutIds.push(timeoutId); // Store the timeout ID 94 | }); 95 | } else { 96 | container.textContent = "No history content found."; 97 | } 98 | }) 99 | .catch((error) => { 100 | console.error("Error fetching file:", error); 101 | document.getElementById("fileContent").textContent = 102 | "Error loading content. " + error; 103 | }); 104 | 105 | // Highlight the selected file in the list 106 | document.querySelectorAll("#fileList li").forEach((li) => { 107 | li.classList.remove("selected"); 108 | if (li.textContent.split(" ")[0] === fileName) { 109 | li.classList.add("selected"); 110 | } 111 | }); 112 | } 113 | 114 | function refreshCurrentFile() { 115 | if (currentFileName) { 116 | const currentScrollPosition = 117 | document.documentElement.scrollTop || document.body.scrollTop; 118 | viewFile(currentFileName.split(" ")[0]); // Reload the current file 119 | // Restore the scroll position after the content is loaded 120 | setTimeout(() => { 121 | window.scrollTo(0, currentScrollPosition); 122 | }, 100); 123 | } 124 | } 125 | 126 | function fetchDirectoryInfo() { 127 | const baseUrl = getBaseUrl(); 128 | fetch(`${baseUrl}/directory_info`) 129 | .then((response) => response.json()) 130 | .then((data) => { 131 | if (data.directory) { 132 | trajectoryDirectory = data.directory; // Store the directory 133 | document.title = `Trajectory Viewer: ${data.directory}`; 134 | document.querySelector("h1").textContent = 135 | `Trajectory Viewer: ${data.directory}`; 136 | } 137 | }) 138 | .catch((error) => console.error("Error fetching directory info:", error)); 139 | } 140 | 141 | window.onload = function () { 142 | fetchFiles(); 143 | fetchDirectoryInfo(); 144 | }; 145 | -------------------------------------------------------------------------------- /SWE-agent/sweagent/api/hooks.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | import sys 3 | import io 4 | 5 | from sweagent import PACKAGE_DIR 6 | from sweagent.agent.agents import AgentHook 7 | from flask_socketio import SocketIO 8 | 9 | from sweagent.api.utils import strip_ansi_sequences 10 | from sweagent.environment.swe_env import EnvHook 11 | 12 | # baaaaaaad 13 | sys.path.append(str(PACKAGE_DIR.parent)) 14 | from run import MainHook 15 | 16 | 17 | class StreamToSocketIO(io.StringIO): 18 | def __init__( 19 | self, 20 | wu: "WebUpdate", 21 | ): 22 | super().__init__() 23 | self._wu = wu 24 | 25 | def write(self, message): 26 | message = strip_ansi_sequences(message) 27 | self._wu.up_log(message) 28 | 29 | def flush(self): 30 | pass 31 | 32 | 33 | class WebUpdate: 34 | """This class talks to socketio. It's pretty much a wrapper around socketio.emit.""" 35 | 36 | def __init__(self, socketio: SocketIO): 37 | self._socketio = socketio 38 | self.log_stream = StreamToSocketIO(self) 39 | 40 | def _emit(self, event, data): 41 | """Directly wrap around socketio.emit""" 42 | self._socketio.emit(event, data) 43 | 44 | def up_log(self, message: str, level="info"): 45 | """Update the log""" 46 | self._emit("log_message", {"message": message, "level": level}) 47 | 48 | def up_agent( 49 | self, 50 | message: str, 51 | *, 52 | format: str = "markdown", 53 | thought_idx: Optional[int] = None, 54 | type_: str = "info", 55 | ): 56 | """Update the agent feed""" 57 | self._emit( 58 | "update", 59 | { 60 | "feed": "agent", 61 | "message": message, 62 | "format": format, 63 | "thought_idx": thought_idx, 64 | "type": type_, 65 | }, 66 | ) 67 | 68 | def up_env( 69 | self, 70 | message: str, 71 | *, 72 | type_: str, 73 | format: str = "markdown", 74 | thought_idx: Optional[int] = None, 75 | ): 76 | """Update the environment feed""" 77 | self._emit( 78 | "update", 79 | { 80 | "feed": "env", 81 | "message": message, 82 | "format": format, 83 | "thought_idx": thought_idx, 84 | "type": type_, 85 | }, 86 | ) 87 | 88 | def finish_run(self): 89 | """Finish the run. We use that to control which buttons are active.""" 90 | self._emit("finish_run", {}) 91 | 92 | 93 | class MainUpdateHook(MainHook): 94 | def __init__(self, wu: WebUpdate): 95 | """This hooks into the Main class to update the web interface""" 96 | self._wu = wu 97 | 98 | def on_start(self): 99 | self._wu.up_env( 100 | message="Environment container initialized", format="text", type_="info" 101 | ) 102 | 103 | def on_end(self): 104 | self._wu.up_agent(message="The run has ended", format="text") 105 | self._wu.finish_run() 106 | 107 | def on_instance_completed(self, *, info, trajectory): 108 | print(info.get("submission")) 109 | if info.get("submission") and info["exit_status"] == "submitted": 110 | msg = ( 111 | "The submission was successful. You can find the patch (diff) in the right panel. " 112 | "To apply it to your code, run `git apply /path/to/patch/file.patch`. " 113 | ) 114 | self._wu.up_agent(msg, type_="success") 115 | 116 | 117 | class AgentUpdateHook(AgentHook): 118 | def __init__(self, wu: WebUpdate): 119 | """This hooks into the Agent class to update the web interface""" 120 | self._wu = wu 121 | self._sub_action = None 122 | self._thought_idx = 0 123 | 124 | def on_actions_generated(self, *, thought: str, action: str, output: str): 125 | self._thought_idx += 1 126 | for prefix in ["DISCUSSION\n", "THOUGHT\n", "DISCUSSION", "THOUGHT"]: 127 | thought = thought.replace(prefix, "") 128 | self._wu.up_agent( 129 | message=thought, 130 | format="markdown", 131 | thought_idx=self._thought_idx, 132 | type_="thought", 133 | ) 134 | 135 | def on_sub_action_started(self, *, sub_action: dict): 136 | # msg = f"```bash\n{sub_action['action']}\n```" 137 | msg = "$ " + sub_action["action"].strip() 138 | self._sub_action = sub_action["action"].strip() 139 | self._wu.up_env(message=msg, thought_idx=self._thought_idx, type_="command") 140 | 141 | def on_sub_action_executed(self, *, obs: str, done: bool): 142 | type_ = "output" 143 | if self._sub_action == "submit": 144 | type_ = "diff" 145 | if obs is None: 146 | # This can happen for empty patch submissions 147 | obs = "" 148 | msg = obs.strip() 149 | self._wu.up_env(message=msg, thought_idx=self._thought_idx, type_=type_) 150 | 151 | 152 | class EnvUpdateHook(EnvHook): 153 | def __init__(self, wu: WebUpdate): 154 | """This hooks into the environment class to update the web interface""" 155 | self._wu = wu 156 | 157 | def on_close(self): 158 | self._wu.up_env(message="Environment closed", format="text", type_="info") 159 | 160 | # def on_query_message_added( 161 | # self, 162 | # *, 163 | # role: str, 164 | # content: str, 165 | # agent: str, 166 | # is_demo: bool = False, 167 | # thought: str = "", 168 | # action: str = "" 169 | # ): 170 | # if role == "assistant": 171 | # return 172 | # if thought or action: 173 | # return 174 | # if is_demo: 175 | # return self._wu.up_agent(title="Demo", message=content, thought_idx=self._thought_idx + 1) 176 | # self._wu.up_agent(title="Query", message=content, thought_idx=self._thought_idx + 1) 177 | -------------------------------------------------------------------------------- /SWE-agent/config/commands/search.sh: -------------------------------------------------------------------------------- 1 | # @yaml 2 | # signature: search_dir [] 3 | # docstring: searches for search_term in all files in dir. If dir is not provided, searches in the current directory 4 | # arguments: 5 | # search_term: 6 | # type: string 7 | # description: the term to search for 8 | # required: true 9 | # dir: 10 | # type: string 11 | # description: the directory to search in (if not provided, searches in the current directory) 12 | # required: false 13 | search_dir() { 14 | if [ $# -eq 1 ]; then 15 | local search_term="$1" 16 | local dir="./" 17 | elif [ $# -eq 2 ]; then 18 | local search_term="$1" 19 | if [ -d "$2" ]; then 20 | local dir="$2" 21 | else 22 | echo "Directory $2 not found" 23 | return 24 | fi 25 | else 26 | echo "Usage: search_dir []" 27 | return 28 | fi 29 | dir=$(realpath "$dir") 30 | local matches=$(find "$dir" -type f ! -path '*/.*' -exec grep -nIH -- "$search_term" {} + | cut -d: -f1 | sort | uniq -c) 31 | # if no matches, return 32 | if [ -z "$matches" ]; then 33 | echo "No matches found for \"$search_term\" in $dir" 34 | return 35 | fi 36 | # Calculate total number of matches 37 | local num_matches=$(echo "$matches" | awk '{sum+=$1} END {print sum}') 38 | # calculate total number of files matched 39 | local num_files=$(echo "$matches" | wc -l | awk '{$1=$1; print $0}') 40 | # if num_files is > 100, print an error 41 | if [ $num_files -gt 100 ]; then 42 | echo "More than $num_files files matched for \"$search_term\" in $dir. Please narrow your search." 43 | return 44 | fi 45 | 46 | echo "Found $num_matches matches for \"$search_term\" in $dir:" 47 | echo "$matches" | awk '{$2=$2; gsub(/^\.+\/+/, "./", $2); print $2 " ("$1" matches)"}' 48 | echo "End of matches for \"$search_term\" in $dir" 49 | } 50 | 51 | # @yaml 52 | # signature: search_file [] 53 | # docstring: searches for search_term in file. If file is not provided, searches in the current open file 54 | # arguments: 55 | # search_term: 56 | # type: string 57 | # description: the term to search for 58 | # required: true 59 | # file: 60 | # type: string 61 | # description: the file to search in (if not provided, searches in the current open file) 62 | # required: false 63 | search_file() { 64 | # Check if the first argument is provided 65 | if [ -z "$1" ]; then 66 | echo "Usage: search_file []" 67 | return 68 | fi 69 | # Check if the second argument is provided 70 | if [ -n "$2" ]; then 71 | # Check if the provided argument is a valid file 72 | if [ -f "$2" ]; then 73 | local file="$2" # Set file if valid 74 | else 75 | echo "Usage: search_file []" 76 | echo "Error: File name $2 not found. Please provide a valid file name." 77 | return # Exit if the file is not valid 78 | fi 79 | else 80 | # Check if a file is open 81 | if [ -z "$CURRENT_FILE" ]; then 82 | echo "No file open. Use the open command first." 83 | return # Exit if no file is open 84 | fi 85 | local file="$CURRENT_FILE" # Set file to the current open file 86 | fi 87 | local search_term="$1" 88 | file=$(realpath "$file") 89 | # Use grep to directly get the desired formatted output 90 | local matches=$(grep -nH -- "$search_term" "$file") 91 | # Check if no matches were found 92 | if [ -z "$matches" ]; then 93 | echo "No matches found for \"$search_term\" in $file" 94 | return 95 | fi 96 | # Calculate total number of matches 97 | local num_matches=$(echo "$matches" | wc -l | awk '{$1=$1; print $0}') 98 | 99 | # calculate total number of lines matched 100 | local num_lines=$(echo "$matches" | cut -d: -f1 | sort | uniq | wc -l | awk '{$1=$1; print $0}') 101 | # if num_lines is > 100, print an error 102 | if [ $num_lines -gt 100 ]; then 103 | echo "More than $num_lines lines matched for \"$search_term\" in $file. Please narrow your search." 104 | return 105 | fi 106 | 107 | # Print the total number of matches and the matches themselves 108 | echo "Found $num_matches matches for \"$search_term\" in $file:" 109 | echo "$matches" | cut -d: -f1-2 | sort -u -t: -k2,2n | while IFS=: read -r filename line_number; do 110 | echo "Line $line_number:$(sed -n "${line_number}p" "$file")" 111 | done 112 | echo "End of matches for \"$search_term\" in $file" 113 | } 114 | 115 | # @yaml 116 | # signature: find_file [] 117 | # docstring: finds all files with the given name in dir. If dir is not provided, searches in the current directory 118 | # arguments: 119 | # file_name: 120 | # type: string 121 | # description: the name of the file to search for 122 | # required: true 123 | # dir: 124 | # type: string 125 | # description: the directory to search in (if not provided, searches in the current directory) 126 | # required: false 127 | find_file() { 128 | if [ $# -eq 1 ]; then 129 | local file_name="$1" 130 | local dir="./" 131 | elif [ $# -eq 2 ]; then 132 | local file_name="$1" 133 | if [ -d "$2" ]; then 134 | local dir="$2" 135 | else 136 | echo "Directory $2 not found" 137 | return 138 | fi 139 | else 140 | echo "Usage: find_file []" 141 | return 142 | fi 143 | 144 | dir=$(realpath "$dir") 145 | local matches=$(find "$dir" -type f -name "$file_name") 146 | # if no matches, return 147 | if [ -z "$matches" ]; then 148 | echo "No matches found for \"$file_name\" in $dir" 149 | return 150 | fi 151 | # Calculate total number of matches 152 | local num_matches=$(echo "$matches" | wc -l | awk '{$1=$1; print $0}') 153 | echo "Found $num_matches matches for \"$file_name\" in $dir:" 154 | echo "$matches" | awk '{print $0}' 155 | } 156 | -------------------------------------------------------------------------------- /SWE-agent/config/configs/xml_sys-env_window100-detailed_cmd_format-last_5_history-1_demos.yaml: -------------------------------------------------------------------------------- 1 | system_template: |- 2 | SETTING: You are an autonomous programmer, and you're working directly in the command line with a special interface. 3 | 4 | The special interface consists of a file editor that shows you {WINDOW} lines of a file at a time. 5 | In addition to typical bash commands, you can also use the following commands to help you navigate and edit files. 6 | 7 | COMMANDS: 8 | {command_docs} 9 | 10 | Please note that THE EDIT COMMAND REQUIRES PROPER INDENTATION. 11 | If you'd like to add the line ' print(x)' you must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run. 12 | 13 | RESPONSE FORMAT: 14 | Your shell prompt is formatted as follows: 15 | (Open file: ) $ 16 | 17 | You need to format your output using two fields; discussion and command. 18 | Your output should always include _one_ discussion and _one_ command field EXACTLY as in the following example: 19 | DISCUSSION 20 | First I'll start by using ls to see what files are in the current directory. Then maybe we can look at some relevant files to see what they look like. 21 | 22 | ls -a 23 | 24 | 25 | You should only include a *SINGLE* command in the command section and then wait for a response from the shell before continuing with more discussion and commands. Everything you include in the DISCUSSION section will be saved for future reference. 26 | If you'd like to issue two commands at once, PLEASE DO NOT DO THAT! Please instead first submit just the first command, and then after receiving a response you'll be able to issue the second command. 27 | You're free to use any other bash commands you want (e.g. find, grep, cat, ls, cd) in addition to the special commands listed above. 28 | However, the environment does NOT support interactive session commands (e.g. python, vim), so please do not invoke them. 29 | instance_template: |- 30 | We're currently solving the following issue within our repository. Here's the issue text: 31 | ISSUE: 32 | {issue} 33 | 34 | INSTRUCTIONS: 35 | Now, you're going to solve this issue on your own. Your terminal session has started and you're in the repository's root directory. You can use any bash commands or the special interface to help you. Edit all the files you need to and run any checks or tests that you want. 36 | Remember, YOU CAN ONLY ENTER ONE COMMAND AT A TIME. You should always wait for feedback after every command. 37 | When you're satisfied with all of the changes you've made, you can submit your changes to the code base by simply running the submit command. 38 | Note however that you cannot use any interactive session commands (e.g. python, vim) in this environment, but you can write scripts and run them. E.g. you can write a python script and then run it with `python .py`. 39 | 40 | NOTE ABOUT THE EDIT COMMAND: Indentation really matters! When editing a file, make sure to insert appropriate indentation before each line! 41 | 42 | IMPORTANT TIPS: 43 | 1. Always start by trying to replicate the bug that the issues discusses. 44 | If the issue includes code for reproducing the bug, we recommend that you re-implement that in your environment, and run it to make sure you can reproduce the bug. 45 | Then start trying to fix it. 46 | When you think you've fixed the bug, re-run the bug reproduction script to make sure that the bug has indeed been fixed. 47 | 48 | If the bug reproduction script does not print anything when it successfully runs, we recommend adding a print("Script completed successfully, no errors.") command at the end of the file, 49 | so that you can be sure that the script indeed ran fine all the way through. 50 | 51 | 2. If you run a command and it doesn't work, try running a different command. A command that did not work once will not work the second time unless you modify it! 52 | 53 | 3. If you open a file and need to get to an area around a specific line that is not in the first 100 lines, say line 583, don't just use the scroll_down command multiple times. Instead, use the goto 583 command. It's much quicker. 54 | 55 | 4. If the bug reproduction script requires inputting/reading a specific file, such as buggy-input.png, and you'd like to understand how to input that file, conduct a search in the existing repo code, to see whether someone else has already done that. Do this by running the command: find_file "buggy-input.png" If that doesn't work, use the linux 'find' command. 56 | 57 | 5. Always make sure to look at the currently open file and the current working directory (which appears right after the currently open file). The currently open file might be in a different directory than the working directory! Note that some commands, such as 'create', open files, so they might change the current open file. 58 | 59 | 6. When editing files, it is easy to accidentally specify a wrong line number or to write code with incorrect indentation. Always check the code after you issue an edit to make sure that it reflects what you wanted to accomplish. If it didn't, issue another command to fix it. 60 | 61 | 62 | (Open file: {open_file}) 63 | (Current directory: {working_dir}) 64 | bash-$ 65 | next_step_template: |- 66 | {observation} 67 | (Open file: {open_file}) 68 | (Current directory: {working_dir}) 69 | bash-$ 70 | next_step_no_output_template: |- 71 | Your command ran successfully and did not produce any output. 72 | (Open file: {open_file}) 73 | (Current directory: {working_dir}) 74 | bash-$ 75 | put_demos_in_history: true 76 | state_command: 77 | name: state 78 | code: | 79 | state() { 80 | local working_dir="$PWD"; 81 | if [ -z $CURRENT_FILE ]; then 82 | echo '{"open_file": "n/a", "working_dir": "'$working_dir'"}'; 83 | else 84 | echo '{"open_file": "'$(realpath $CURRENT_FILE)'", "working_dir": "'$working_dir'"}'; 85 | fi 86 | }; 87 | parse_function: XMLThoughtActionParser 88 | env_variables: 89 | WINDOW: 100 90 | OVERLAP: 2 91 | CURRENT_LINE: 0 92 | CURRENT_FILE: '' 93 | SEARCH_RESULTS: () 94 | SEARCH_FILES: () 95 | SEARCH_INDEX: 0 96 | command_files: 97 | - config/commands/defaults.sh 98 | - config/commands/search.sh 99 | - config/commands/edit_linting.sh 100 | - config/commands/_split_string.py 101 | parse_command: ParseCommandDetailed 102 | history_processor: Last5Observations 103 | demonstrations: 104 | - trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__xml_sys-env_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj 105 | -------------------------------------------------------------------------------- /SWE-agent/config/configs/xml_sys-env_window100-detailed_cmd_format-full_history-1_demos.yaml: -------------------------------------------------------------------------------- 1 | system_template: |- 2 | SETTING: You are an autonomous programmer, and you're working directly in the command line with a special interface. 3 | 4 | The special interface consists of a file editor that shows you {WINDOW} lines of a file at a time. 5 | In addition to typical bash commands, you can also use the following commands to help you navigate and edit files. 6 | 7 | COMMANDS: 8 | {command_docs} 9 | 10 | Please note that THE EDIT COMMAND REQUIRES PROPER INDENTATION. 11 | If you'd like to add the line ' print(x)' you must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run. 12 | 13 | RESPONSE FORMAT: 14 | Your shell prompt is formatted as follows: 15 | (Open file: ) $ 16 | 17 | You need to format your output using two fields; discussion and command. 18 | Your output should always include _one_ discussion and _one_ command field EXACTLY as in the following example: 19 | DISCUSSION 20 | First I'll start by using ls to see what files are in the current directory. Then maybe we can look at some relevant files to see what they look like. 21 | 22 | ls -a 23 | 24 | 25 | You should only include a *SINGLE* command in the command section and then wait for a response from the shell before continuing with more discussion and commands. Everything you include in the DISCUSSION section will be saved for future reference. 26 | If you'd like to issue two commands at once, PLEASE DO NOT DO THAT! Please instead first submit just the first command, and then after receiving a response you'll be able to issue the second command. 27 | You're free to use any other bash commands you want (e.g. find, grep, cat, ls, cd) in addition to the special commands listed above. 28 | However, the environment does NOT support interactive session commands (e.g. python, vim), so please do not invoke them. 29 | instance_template: |- 30 | We're currently solving the following issue within our repository. Here's the issue text: 31 | ISSUE: 32 | {issue} 33 | 34 | INSTRUCTIONS: 35 | Now, you're going to solve this issue on your own. Your terminal session has started and you're in the repository's root directory. You can use any bash commands or the special interface to help you. Edit all the files you need to and run any checks or tests that you want. 36 | Remember, YOU CAN ONLY ENTER ONE COMMAND AT A TIME. You should always wait for feedback after every command. 37 | When you're satisfied with all of the changes you've made, you can submit your changes to the code base by simply running the submit command. 38 | Note however that you cannot use any interactive session commands (e.g. python, vim) in this environment, but you can write scripts and run them. E.g. you can write a python script and then run it with `python .py`. 39 | 40 | NOTE ABOUT THE EDIT COMMAND: Indentation really matters! When editing a file, make sure to insert appropriate indentation before each line! 41 | 42 | IMPORTANT TIPS: 43 | 1. Always start by trying to replicate the bug that the issues discusses. 44 | If the issue includes code for reproducing the bug, we recommend that you re-implement that in your environment, and run it to make sure you can reproduce the bug. 45 | Then start trying to fix it. 46 | When you think you've fixed the bug, re-run the bug reproduction script to make sure that the bug has indeed been fixed. 47 | 48 | If the bug reproduction script does not print anything when it successfully runs, we recommend adding a print("Script completed successfully, no errors.") command at the end of the file, 49 | so that you can be sure that the script indeed ran fine all the way through. 50 | 51 | 2. If you run a command and it doesn't work, try running a different command. A command that did not work once will not work the second time unless you modify it! 52 | 53 | 3. If you open a file and need to get to an area around a specific line that is not in the first 100 lines, say line 583, don't just use the scroll_down command multiple times. Instead, use the goto 583 command. It's much quicker. 54 | 55 | 4. If the bug reproduction script requires inputting/reading a specific file, such as buggy-input.png, and you'd like to understand how to input that file, conduct a search in the existing repo code, to see whether someone else has already done that. Do this by running the command: find_file "buggy-input.png" If that doesn't work, use the linux 'find' command. 56 | 57 | 5. Always make sure to look at the currently open file and the current working directory (which appears right after the currently open file). The currently open file might be in a different directory than the working directory! Note that some commands, such as 'create', open files, so they might change the current open file. 58 | 59 | 6. When editing files, it is easy to accidentally specify a wrong line number or to write code with incorrect indentation. Always check the code after you issue an edit to make sure that it reflects what you wanted to accomplish. If it didn't, issue another command to fix it. 60 | 61 | 62 | (Open file: {open_file}) 63 | (Current directory: {working_dir}) 64 | bash-$ 65 | next_step_template: |- 66 | {observation} 67 | (Open file: {open_file}) 68 | (Current directory: {working_dir}) 69 | bash-$ 70 | next_step_no_output_template: |- 71 | Your command ran successfully and did not produce any output. 72 | (Open file: {open_file}) 73 | (Current directory: {working_dir}) 74 | bash-$ 75 | put_demos_in_history: true 76 | state_command: 77 | name: state 78 | code: | 79 | state() { 80 | local working_dir="$PWD"; 81 | if [ -z $CURRENT_FILE ]; then 82 | echo '{"open_file": "n/a", "working_dir": "'$working_dir'"}'; 83 | else 84 | echo '{"open_file": "'$(realpath $CURRENT_FILE)'", "working_dir": "'$working_dir'"}'; 85 | fi 86 | }; 87 | parse_function: XMLThoughtActionParser 88 | env_variables: 89 | WINDOW: 100 90 | OVERLAP: 2 91 | CURRENT_LINE: 0 92 | CURRENT_FILE: '' 93 | SEARCH_RESULTS: () 94 | SEARCH_FILES: () 95 | SEARCH_INDEX: 0 96 | command_files: 97 | - config/commands/defaults.sh 98 | - config/commands/search.sh 99 | - config/commands/edit_linting.sh 100 | - config/commands/_split_string.py 101 | parse_command: ParseCommandDetailed 102 | history_processor: DefaultHistoryProcessor 103 | demonstrations: 104 | - trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__xml_sys-env_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj 105 | --------------------------------------------------------------------------------