46 | );
47 | };
48 |
49 | export default EnvFeed;
50 |
--------------------------------------------------------------------------------
/SWE-agent/sweagent/frontend/src/components/utils/icons/ExpandIcon.js:
--------------------------------------------------------------------------------
1 | import React from "react";
2 |
3 | const ExpandIcon = ({ fillColor, height, style }) => (
4 |
28 | );
29 |
30 | // Set default props
31 | ExpandIcon.defaultProps = {
32 | fillColor: "#000000",
33 | height: "24px",
34 | style: {},
35 | };
36 |
37 | export default ExpandIcon;
38 |
--------------------------------------------------------------------------------
/repograph/graph_searcher.py:
--------------------------------------------------------------------------------
1 | import networkx as nx
2 |
3 | class RepoSearcher:
4 | def __init__(self, graph):
5 | self.graph = graph
6 |
7 | def one_hop_neighbors(self, query):
8 | # get one-hop neighbors from networkx graph
9 | return list(self.graph.neighbors(query))
10 |
11 | def two_hop_neighbors(self, query):
12 | # get two-hop neighbors from networkx graph
13 | one_hop = self.one_hop_neighbors(query)
14 | two_hop = []
15 | for node in one_hop:
16 | two_hop.extend(self.one_hop_neighbors(node))
17 | return list(set(two_hop))
18 |
19 | def dfs(self, query, depth):
20 | # perform depth-first search on networkx graph
21 | visited = []
22 | stack = [(query, 0)]
23 | while stack:
24 | node, level = stack.pop()
25 | if node not in visited:
26 | visited.append(node)
27 | if level < depth:
28 | stack.extend(
29 | [(n, level + 1) for n in self.one_hop_neighbors(node)]
30 | )
31 | return visited
32 |
33 | def bfs(self, query, depth):
34 | # perform breadth-first search on networkx graph
35 | visited = []
36 | queue = [(query, 0)]
37 | while queue:
38 | node, level = queue.pop(0)
39 | if node not in visited:
40 | visited.append(node)
41 | if level < depth:
42 | queue.extend(
43 | [(n, level + 1) for n in self.one_hop_neighbors(node)]
44 | )
45 | return visited
--------------------------------------------------------------------------------
/agentless/util/utils.py:
--------------------------------------------------------------------------------
1 | import json
2 |
3 | import pandas as pd
4 |
5 |
6 | def load_jsonl(filepath):
7 | """
8 | Load a JSONL file from the given filepath.
9 |
10 | Arguments:
11 | filepath -- the path to the JSONL file to load
12 |
13 | Returns:
14 | A list of dictionaries representing the data in each line of the JSONL file.
15 | """
16 | with open(filepath, "r") as file:
17 | return [json.loads(line) for line in file]
18 |
19 |
20 | def write_jsonl(data, filepath):
21 | """
22 | Write data to a JSONL file at the given filepath.
23 |
24 | Arguments:
25 | data -- a list of dictionaries to write to the JSONL file
26 | filepath -- the path to the JSONL file to write
27 | """
28 | with open(filepath, "w") as file:
29 | for entry in data:
30 | file.write(json.dumps(entry) + "\n")
31 |
32 |
33 | def load_json(filepath):
34 | return json.load(open(filepath, "r"))
35 |
36 |
37 | def combine_by_instance_id(data):
38 | """
39 | Combine data entries by their instance ID.
40 |
41 | Arguments:
42 | data -- a list of dictionaries with instance IDs and other information
43 |
44 | Returns:
45 | A list of combined dictionaries by instance ID with all associated data.
46 | """
47 | combined_data = defaultdict(lambda: defaultdict(list))
48 | for item in data:
49 | instance_id = item.get("instance_id")
50 | if not instance_id:
51 | continue
52 | for key, value in item.items():
53 | if key != "instance_id":
54 | combined_data[instance_id][key].extend(
55 | value if isinstance(value, list) else [value]
56 | )
57 | return [
58 | {**{"instance_id": iid}, **details} for iid, details in combined_data.items()
59 | ]
60 |
--------------------------------------------------------------------------------
/SWE-agent/make_demos/README.md:
--------------------------------------------------------------------------------
1 | # Make demos
2 | An important way to show LMs how to use commands and interact with the environment is through providing a demonstration - which is basically a completed trajectory that the LM can learn from.
3 |
4 | For simplicity we just ingest demonstrations in the from of a trajectory file. However, since trajectory files are usually JSON, you can convert them to yaml using the `convert_traj_to_demo.py` script to be more human-readable and easier to edit.
5 |
6 | Demo (yaml) files are stored in the `make_demos/demos` directory by default and consist primarily of the sequence of actions that an LM would need to take to complete a task. It's important that your demo have the proper format to be parsed by SWE-agent and your config.
7 |
8 | Here's how you can make a demo:
9 | 1. Find a basic trajectory that you already like and want to use as the basis for your demo.
10 | - For instance, consider the `trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__default_sys-env_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj` trajectory for reference.
11 | 2. Run `python convert_traj_to_demo.py ` to convert the trajectory to a demo.
12 | - This demo will be saved as a readable yaml file in the `make_demos/demos` directory.
13 | 3. Edit the demo by hand to make it work for your particular use case and configuration.
14 | 4. Run `python run_replay.py --traj_path --config_file ` to execute the actions of the demo, have the system generate the execution output, and ensure that it works as expected.
15 | - Protip: you can use `sweagent.run` with `--model_name human` to manually execute actions in the environment - useful for debugging.
16 | 5. Inspect the resulting trajectory to ensure it was executed correctly.
--------------------------------------------------------------------------------
/SWE-agent/sweagent/frontend/src/components/panels/LogPanel.js:
--------------------------------------------------------------------------------
1 | import MacBar from "../MacBar";
2 | import workspaceLogo from "../../assets/panel_icons/workspace.png";
3 | import "../../static/logPanel.css";
4 | import { Button } from "react-bootstrap";
5 | import { Clipboard } from "react-bootstrap-icons";
6 |
7 | const LogPanel = ({ logs, logsRef, isComputing }) => {
8 | const copyToClipboard = (text) => {
9 | // Create a temporary textarea element
10 | const textarea = document.createElement("textarea");
11 | textarea.value = text;
12 | document.body.appendChild(textarea);
13 |
14 | // Select and copy the text
15 | textarea.select();
16 | document.execCommand("copy");
17 |
18 | // Clean up
19 | document.body.removeChild(textarea);
20 | };
21 |
22 | const handleCopy = () => {
23 | const contentToCopy = document.getElementById("logContent").innerText;
24 | copyToClipboard(contentToCopy);
25 | };
26 |
27 | return (
28 |
29 |
30 |
31 |
32 |
{logs}
33 |
34 |
35 | {!isComputing && logs && (
36 |
39 |
46 |
47 | )}
48 |
49 |
50 | );
51 | };
52 |
53 | export default LogPanel;
54 |
--------------------------------------------------------------------------------
/SWE-agent/sweagent/frontend/public/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
12 |
13 |
17 |
18 |
27 | React App
28 |
29 |
30 |
31 |
32 |
42 |
43 |
44 |
--------------------------------------------------------------------------------
/SWE-agent/tests/test_models.py:
--------------------------------------------------------------------------------
1 | from unittest.mock import MagicMock, Mock, patch
2 | from sweagent.agent.models import OpenAIModel, ModelArguments, TogetherModel
3 | import pytest
4 |
5 |
6 | @pytest.fixture
7 | def openai_mock_client():
8 | model = Mock()
9 | response = Mock()
10 | choice = Mock()
11 | choice.message.content = "test"
12 | response.choices = [choice]
13 | response.usage.prompt_tokens = 10
14 | response.usage.completion_tokens = 10
15 | model.chat.completions.create = MagicMock(return_value=response)
16 |
17 | return model
18 |
19 | @pytest.fixture
20 | def mock_together_response():
21 | return {
22 | "choices": [{"text": "Hello"}],
23 | "usage": {"prompt_tokens": 10, "completion_tokens": 10},
24 | }
25 |
26 |
27 | TEST_HISTORY = [
28 | {
29 | "role": "system",
30 | "content": "Hello, how are you?"
31 | }
32 | ]
33 |
34 |
35 | def test_openai_model(openai_mock_client):
36 | for model_name in list(OpenAIModel.MODELS) + list(OpenAIModel.SHORTCUTS):
37 | TEST_MODEL_ARGUMENTS = ModelArguments(model_name)
38 | with patch("sweagent.agent.models.config.Config"), patch("sweagent.agent.models.OpenAI"):
39 | model = OpenAIModel(TEST_MODEL_ARGUMENTS, [])
40 | model.client = openai_mock_client
41 | model.query(TEST_HISTORY)
42 |
43 |
44 | @pytest.mark.parametrize("model_name", list(TogetherModel.MODELS) + list(TogetherModel.SHORTCUTS))
45 | def test_together_model(mock_together_response, model_name):
46 | with patch("sweagent.agent.models.config.Config"), \
47 | patch("sweagent.agent.models.together") as mock_together:
48 | mock_together.version = '1.1.0'
49 | mock_together.Complete.create.return_value = mock_together_response
50 |
51 | model_args = ModelArguments(model_name)
52 | model = TogetherModel(model_args, [])
53 | model.query(TEST_HISTORY)
54 |
--------------------------------------------------------------------------------
/SWE-agent/trajectories/README.md:
--------------------------------------------------------------------------------
1 | # Trajectories
2 |
3 | The `trajectories/` folder is the default location that experiment results (invocations of `run.py`) will be written to.
4 |
5 | At a high level, the experiments folder is organized in the following manner:
6 | ```
7 | trajectories
8 | ├── 👩💻
9 | │ ├── 🧪
10 | │ │ ├── all_preds.jsonl
11 | │ │ ├── args.yaml
12 | │ │ ├── *.html (Webpage Files)
13 | │ │ └── *.traj (Trajectories)
14 | │ └── 🧪
15 | │ ├── all_preds.jsonl
16 | │ ├── args.yaml
17 | │ ├── *.html (Webpage Files)
18 | │ └── *.traj (Trajectories)
19 | ├── 👨💻
20 | │ ├── 🧪
21 | │ │ └── ...
22 | │ └── 🧪
23 | │ └── ...
24 | ...
25 | ```
26 | Where every experiment follows the pattern `trajectories//`. The `` is automatically inferred from your system, and the `experiment name` is inferred from the arguments of the `run.py`.
27 |
28 | ## How an Experiment Folder is Generated
29 |
30 | Each call to `run.py` produces a single `trajectories//` folder containing the following assets:
31 | * `all_preds.jsonl`: A single file containing all of the predictions generated for the experiment (1 prediction per task instance), where each line is formatted as:
32 | ```
33 | {
34 | "instance_id": "",
35 | "model_patch": "<.patch file content string>",
36 | "model_name_or_path": "",
37 | }
38 | ```
39 | * `args.yaml`: A summary of the configurations for the experiment run.
40 | * `.traj`: A `.json` formatted file containing the (thought, action, observation) turns generated by SWE-agent towards solving ``.
41 | * `.html`: An `.html` single webpage render of the trajectory, which can be directly opened in the browser for easier viewing of the trajectory.
42 |
43 | > ⚠️ Notes
44 | > * Evaluation is not completed by `run.py`, it is a separate step.
45 | > * `all_preds.jsonl` can be referenced directly into `evaluation/run_eval.sh` to run evaluation.
--------------------------------------------------------------------------------
/SWE-agent/sweagent/frontend/src/static/run.css:
--------------------------------------------------------------------------------
1 | .App {
2 | text-align: center;
3 | }
4 |
5 | .App-logo {
6 | height: 40vmin;
7 | pointer-events: none;
8 | }
9 |
10 | @media (prefers-reduced-motion: no-preference) {
11 | .App-logo {
12 | animation: App-logo-spin infinite 20s linear;
13 | }
14 | }
15 |
16 | .App-header {
17 | background-color: #282c34;
18 | min-height: 100vh;
19 | display: flex;
20 | flex-direction: column;
21 | align-items: center;
22 | justify-content: center;
23 | font-size: calc(10px + 2vmin);
24 | color: white;
25 | }
26 |
27 | .App-link {
28 | color: #61dafb;
29 | }
30 |
31 | @keyframes App-logo-spin {
32 | from {
33 | transform: rotate(0deg);
34 | }
35 | to {
36 | transform: rotate(360deg);
37 | }
38 | }
39 |
40 | /* .message {
41 | border: 1px solid #ccc;
42 | padding: 10px;
43 | margin-bottom: 10px;
44 | border-radius: 5px;
45 | }
46 |
47 | #container {
48 | display: flex;
49 | justify-content: space-between;
50 | padding: 10px;
51 | }
52 |
53 | #agentfeed, #environmentfeed {
54 | flex: 1;
55 | margin: 10px;
56 | max-width: 50%;
57 | overflow-y: auto;
58 | height: 500px;
59 | border: 1px solid #ccc;
60 | padding: 10px;
61 | border-radius: 5px;
62 | } */
63 |
64 | #demo hr {
65 | border-top: 1px dotted #bbb;
66 | margin: 0.5em 0;
67 | width: 100%;
68 | }
69 |
70 | #demo .panels {
71 | display: grid;
72 | height: 65vh;
73 | grid-template-columns: 1fr 1fr 1fr 1fr 1fr;
74 | grid-template-rows: 1fr 2fr 1fr 1fr;
75 |
76 | .agentFeed,
77 | .envFeed,
78 | .logPanel {
79 | background-color: transparent;
80 | display: flex;
81 | flex-direction: column;
82 | height: 100%;
83 | margin: 0.2em;
84 | overflow: auto;
85 | }
86 |
87 | .envFeed {
88 | grid-column: 3 / 6;
89 | grid-row: 1 / 3;
90 | height: 95%;
91 | }
92 |
93 | .innerDiv {
94 | flex-grow: 1;
95 | }
96 |
97 | .scrollableDiv {
98 | height: 100%;
99 | overflow-y: hidden;
100 | background-color: white;
101 | border-radius: 0 0 0.5em 0.5em;
102 | }
103 |
104 | .scrollableDiv:hover {
105 | overflow: auto;
106 | }
107 |
108 | pre {
109 | white-space: pre-wrap;
110 | overflow-x: auto;
111 | }
112 | }
113 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # macOS files
2 | .DS_Store
3 | results/
4 |
5 | # Byte-compiled / optimized / DLL files
6 | __pycache__/
7 | *.py[cod]
8 | *$py.class
9 |
10 | # C extensions
11 | *.so
12 |
13 | # Distribution / packaging
14 | .Python
15 | build/
16 | develop-eggs/
17 | dist/
18 | downloads/
19 | eggs/
20 | .eggs/
21 | lib/
22 | lib64/
23 | parts/
24 | sdist/
25 | var/
26 | wheels/
27 | pip-wheel-metadata/
28 | share/python-wheels/
29 | *.egg-info/
30 | .installed.cfg
31 | *.egg
32 | MANIFEST
33 |
34 | # PyInstaller
35 | # Usually these files are written by a python script from a template
36 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
37 | *.manifest
38 | *.spec
39 |
40 | # Installer logs
41 | pip-log.txt
42 | pip-delete-this-directory.txt
43 |
44 | # Unit test / coverage reports
45 | htmlcov/
46 | .tox/
47 | .nox/
48 | .coverage
49 | .coverage.*
50 | .cache
51 | nosetests.xml
52 | coverage.xml
53 | *.cover
54 | *.py,cover
55 | .hypothesis/
56 | .pytest_cache/
57 |
58 | # Translations
59 | *.mo
60 | *.pot
61 |
62 | # Django stuff:
63 | *.log
64 | local_settings.py
65 | db.sqlite3
66 | db.sqlite3-journal
67 |
68 | # Flask stuff:
69 | instance/
70 | .webassets-cache
71 |
72 | # Scrapy stuff:
73 | .scrapy
74 |
75 | # Sphinx documentation
76 | docs/_build/
77 |
78 | # PyBuilder
79 | target/
80 |
81 | # Jupyter Notebook
82 | .ipynb_checkpoints
83 |
84 | # IPython
85 | profile_default/
86 | ipython_config.py
87 |
88 | # pyenv
89 | .python-version
90 |
91 | # pipenv
92 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
93 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
94 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
95 | # install all needed dependencies.
96 | #Pipfile.lock
97 |
98 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
99 | __pypackages__/
100 |
101 | # Celery stuff
102 | celerybeat-schedule
103 | celerybeat.pid
104 |
105 | # SageMath parsed files
106 | *.sage.py
107 |
108 | # Environments
109 | .env
110 | .venv
111 | env/
112 | venv/
113 | ENV/
114 | env.bak/
115 | venv.bak/
116 |
117 | # Spyder project settings
118 | .spyderproject
119 | .spyproject
120 |
121 | # Rope project settings
122 | .ropeproject
123 |
124 | # mkdocs documentation
125 | /site
126 |
127 | # mypy
128 | .mypy_cache/
129 | .dmypy.json
130 | dmypy.json
131 |
132 | # Pyre type checker
133 | .pyre/
134 | *.xml
135 | *.gif
--------------------------------------------------------------------------------
/SWE-agent/sweagent/frontend/src/logo.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # RepoGraph: Enhancing AI Software Engineering with Repository-level Code Graph
2 |
3 | ## 📜 Overview
4 |
5 | We introduce RepoGraph, an effective plug-in repo-level module that offers the desired context and substantially boosts the LLMs' AI software engineering capability.
6 |
7 | ## 🆕 News
8 |
9 | We released the first version RepoGraph and its integration with [SWE-bench](https://www.swebench.com/) methods!
10 |
11 | ## 🤖 Code Setup
12 |
13 | ### Foder and files
14 |
15 | `repograph` contains the code for construct and retrieve related context from the graph.
16 |
17 | `agentless` and `SWE-agent` incorporates the integrated version of RepoGraph with the two methods.
18 |
19 | Currently this version may take a little long time to run for a repo. We provide a cached version for all repos in SWE-bench, download it from [huggingface datasets](https://huggingface.co/datasets/MrZilinXiao/RepoGraph) or [Google Drive](https://drive.google.com/file/d/1-0d-OgGoOf3i54bWcf8H0egjQyTSZ8dG/view?usp=sharing) and put it under `repo_structures`.
20 |
21 | ### How to run?
22 |
23 | To generate the repograph for a given repository, simply run:
24 |
25 | ```bash
26 | python ./repograph/construct_graph.py
27 | ```
28 |
29 | This will produce two files, `tags_{instance_id}.jsonl` stores the line-level information and `{instance_id}.pkl` is the graph constructed using networkx.
30 |
31 | ## Integration with models on SWE-bench
32 |
33 | ### Procedural framework
34 |
35 | For a procedural framework, RepoGraph could be integrated into every step of the pipeline. Refer to `--repo_graph` hyperparameter for controllability in different stages.
36 |
37 | To run RepoGraph with Agentless, use command:
38 |
39 | ```bash
40 | bash run_repograph_agentless.sh
41 | ```
42 |
43 | ### Agent framework
44 |
45 | To integrate RepoGraph with agent framework such as SWE-agent, we simply add an extra action in its initial action space. Specifically, you can look up for `search_repo()` in corresponding dir. The signature is defined as:
46 |
47 | ```python
48 | search_repo:
49 | docstring: searches in the current repository with a specific function or class, and returns the def and ref relations for the search term.
50 | signature: search_repo
51 | arguments:
52 | - search_term (string) [required]: function or class to look for in the repository.
53 | ```
54 |
55 | To run RepoGraph with SWE-agent, use command:
56 |
57 | ```bash
58 | bash run_repograph_sweagent.sh
59 | ```
60 |
61 | We are working on prepreints for details in RepoGraph and a more comprehensive/easy integration with exsiting models. Stay tuned!!
62 |
--------------------------------------------------------------------------------
/agentless/get_repo_structure/get_patch_info.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import json
3 | import re
4 | from collections import defaultdict
5 |
6 |
7 | def parse_patch(patch):
8 | """
9 | Parse a git patch into a structured format.
10 |
11 | Parameters:
12 | patch (str): The git patch as a string.
13 |
14 | Returns:
15 | list: A list of dictionaries representing the file changes and hunks.
16 | """
17 | file_changes = []
18 | current_file = None
19 | current_hunk = None
20 | deleted_lines = 0
21 |
22 | patch_lines = patch.split("\n")
23 | for line in patch_lines:
24 | if line.startswith("diff --git"):
25 | # Reset for new files
26 | if current_file:
27 | file_changes.append(current_file)
28 | current_file = {"file": "", "hunks": []}
29 | elif line.startswith("--- a/"):
30 | pass
31 | elif line.startswith("+++ b/"):
32 | if current_file is not None:
33 | current_file["file"] = line[6:]
34 | elif line.startswith("@@ "):
35 | if current_file is not None:
36 | match = re.match(r"@@ -(\d+)(?:,\d+)? \+(\d+)(?:,\d+)? @@", line)
37 | if match:
38 | current_hunk = {"start_line": int(match.group(2)), "changes": []}
39 | current_file["hunks"].append(current_hunk)
40 | deleted_lines = 0
41 | added_lines = 0
42 | elif line.startswith("+") or line.startswith("-"):
43 | if current_hunk is not None:
44 | change_type = "add" if line.startswith("+") else "delete"
45 | if change_type == "delete":
46 | deleted_lines += 1
47 | current_hunk["changes"].append(
48 | {
49 | "type": change_type,
50 | "content": line[1:].strip(),
51 | "line": current_hunk["start_line"] - added_lines,
52 | }
53 | )
54 | current_hunk["start_line"] += 1
55 | else:
56 | added_lines += 1
57 | current_hunk["changes"].append(
58 | {
59 | "type": change_type,
60 | "content": line[1:].strip(),
61 | "line": current_hunk["start_line"] - deleted_lines,
62 | }
63 | )
64 | current_hunk["start_line"] += 1
65 | else:
66 | if current_hunk is not None:
67 | current_hunk["start_line"] += 1
68 |
69 | if current_file:
70 | file_changes.append(current_file)
71 |
72 | return file_changes
73 |
--------------------------------------------------------------------------------
/SWE-agent/sweagent/frontend/src/components/EnvMessage.js:
--------------------------------------------------------------------------------
1 | import React from "react";
2 |
3 | import "../static/message.css";
4 | import "../static/envMessage.css";
5 |
6 | import { Prism as SyntaxHighlighter } from "react-syntax-highlighter";
7 | import bash from "react-syntax-highlighter/dist/esm/languages/prism/bash";
8 | import { prism } from "react-syntax-highlighter/dist/esm/styles/prism";
9 |
10 | // SyntaxHighlighter.registerLanguage('bash', bash);
11 |
12 | function capitalizeFirstLetter(str) {
13 | return str[0].toUpperCase() + str.slice(1);
14 | }
15 |
16 | const EnvMessage = ({
17 | item,
18 | handleMouseEnter,
19 | handleMouseLeave,
20 | isHighlighted,
21 | feedRef,
22 | }) => {
23 | const stepClass = item.step !== null ? `step${item.step}` : "";
24 | const highlightClass = isHighlighted ? "highlight" : "";
25 | const messageTypeClass = "envMessage" + capitalizeFirstLetter(item.type);
26 |
27 | const paddingBottom = item.type === "command" ? "0" : "0.5em";
28 | const paddingTop = ["output", "diff"].includes(item.type) ? "0" : "0.5em";
29 |
30 | const customStyle = {
31 | margin: 0,
32 | padding: `${paddingTop} 0.5em ${paddingBottom} 0.5em`,
33 | overflowX: "hidden",
34 | overflowY: "hidden",
35 | lineHeight: "100%",
36 | backgroundColor: "transparent",
37 | fontSize: "93%",
38 | };
39 |
40 | const codeTagProps = {
41 | style: {
42 | boxShadow: "none",
43 | margin: "0",
44 | overflowY: "hidden",
45 | overflowX: "hidden",
46 | padding: "0",
47 | lineHeight: "inherit",
48 | fontSize: "93%",
49 | },
50 | };
51 |
52 | const typeToLanguage = {
53 | command: "bash",
54 | output: "markdown",
55 | diff: "diff",
56 | };
57 |
58 | if (item.format !== "text") {
59 | return (
60 |
86 | );
87 | }
88 | };
89 |
90 | export default EnvMessage;
91 |
--------------------------------------------------------------------------------
/agentless/util/parse_global_var.py:
--------------------------------------------------------------------------------
1 | # TODO: maybe merge this into the structure preprocessing.
2 | import libcst as cst
3 | import libcst.matchers as m
4 | from libcst.display import dump
5 |
6 |
7 | class GlobalVariableVisitor(cst.CSTVisitor):
8 | METADATA_DEPENDENCIES = (cst.metadata.PositionProvider,)
9 |
10 | def __init__(self):
11 | self.global_assigns = []
12 |
13 | def leave_Module(self, original_node: cst.Module) -> list:
14 | assigns = []
15 | for stmt in original_node.body:
16 | if m.matches(stmt, m.SimpleStatementLine()) and m.matches(
17 | stmt.body[0], m.Assign()
18 | ):
19 | start_pos = self.get_metadata(cst.metadata.PositionProvider, stmt).start
20 | end_pos = self.get_metadata(cst.metadata.PositionProvider, stmt).end
21 | assigns.append([stmt, start_pos, end_pos])
22 | self.global_assigns.extend(assigns)
23 |
24 |
25 | def parse_global_var_from_code(file_content: str) -> dict[str, dict]:
26 | """Parse global variables."""
27 | try:
28 | tree = cst.parse_module(file_content)
29 | except:
30 | return file_content
31 |
32 | wrapper = cst.metadata.MetadataWrapper(tree)
33 | visitor = GlobalVariableVisitor()
34 | wrapper.visit(visitor)
35 |
36 | global_assigns = {}
37 | for assign_stmt, start_pos, end_pos in visitor.global_assigns:
38 | for t in assign_stmt.body:
39 | try:
40 | targets = [t.targets[0].target.value]
41 | except:
42 | try:
43 | targets = t.targets[0].target.elements
44 | targets = [x.value.value for x in targets]
45 | except:
46 | targets = []
47 | for target_var in targets:
48 | global_assigns[target_var] = {
49 | "start_line": start_pos.line,
50 | "end_line": end_pos.line,
51 | }
52 | return global_assigns
53 |
54 |
55 | def test_parse_global_var_from_file():
56 | code = """
57 | \"\"\"
58 | this is a module
59 | ...
60 | \"\"\"
61 | const_var = {1,2,3}
62 | const_dict = {
63 | 'day': 'Monday',
64 | 'month': 'January',
65 | }
66 | a, b = 1, 2
67 | import os
68 |
69 | class fooClass:
70 | '''this is a class'''
71 |
72 | def __init__(self, x):
73 | '''initialization.'''
74 | self.x = x
75 |
76 | def print(self):
77 | print(self.x)
78 |
79 | def test():
80 | a = fooClass(3)
81 | a.print()
82 |
83 | """
84 | res = parse_global_var_from_code(code)
85 | assert res == {
86 | "const_var": {"start_line": 6, "end_line": 6},
87 | "const_dict": {"start_line": 7, "end_line": 10},
88 | "a": {"start_line": 11, "end_line": 11},
89 | "b": {"start_line": 11, "end_line": 11},
90 | }
91 |
92 |
93 | if __name__ == "__main__":
94 | test_parse_global_var_from_file()
95 |
--------------------------------------------------------------------------------
/SWE-agent/sweagent/frontend/src/static/font.css:
--------------------------------------------------------------------------------
1 | @font-face {
2 | font-family: "basic-sans";
3 | src:
4 | url("https://use.typekit.net/af/8883dd/00000000000000007735abe1/30/l?primer=7cdcb44be4a7db8877ffa5c0007b8dd865b3bbc383831fe2ea177f62257a9191&fvd=i6&v=3")
5 | format("woff2"),
6 | url("https://use.typekit.net/af/8883dd/00000000000000007735abe1/30/d?primer=7cdcb44be4a7db8877ffa5c0007b8dd865b3bbc383831fe2ea177f62257a9191&fvd=i6&v=3")
7 | format("woff"),
8 | url("https://use.typekit.net/af/8883dd/00000000000000007735abe1/30/a?primer=7cdcb44be4a7db8877ffa5c0007b8dd865b3bbc383831fe2ea177f62257a9191&fvd=i6&v=3")
9 | format("opentype");
10 | font-display: auto;
11 | font-style: italic;
12 | font-weight: 600;
13 | font-stretch: normal;
14 | }
15 |
16 | @font-face {
17 | font-family: "basic-sans";
18 | src:
19 | url("https://use.typekit.net/af/c68f64/00000000000000007735abe6/30/l?primer=7cdcb44be4a7db8877ffa5c0007b8dd865b3bbc383831fe2ea177f62257a9191&fvd=n6&v=3")
20 | format("woff2"),
21 | url("https://use.typekit.net/af/c68f64/00000000000000007735abe6/30/d?primer=7cdcb44be4a7db8877ffa5c0007b8dd865b3bbc383831fe2ea177f62257a9191&fvd=n6&v=3")
22 | format("woff"),
23 | url("https://use.typekit.net/af/c68f64/00000000000000007735abe6/30/a?primer=7cdcb44be4a7db8877ffa5c0007b8dd865b3bbc383831fe2ea177f62257a9191&fvd=n6&v=3")
24 | format("opentype");
25 | font-display: auto;
26 | font-style: normal;
27 | font-weight: 600;
28 | font-stretch: normal;
29 | }
30 |
31 | @font-face {
32 | font-family: "basic-sans";
33 | src:
34 | url("https://use.typekit.net/af/49252d/00000000000000007735abed/30/l?primer=7cdcb44be4a7db8877ffa5c0007b8dd865b3bbc383831fe2ea177f62257a9191&fvd=i4&v=3")
35 | format("woff2"),
36 | url("https://use.typekit.net/af/49252d/00000000000000007735abed/30/d?primer=7cdcb44be4a7db8877ffa5c0007b8dd865b3bbc383831fe2ea177f62257a9191&fvd=i4&v=3")
37 | format("woff"),
38 | url("https://use.typekit.net/af/49252d/00000000000000007735abed/30/a?primer=7cdcb44be4a7db8877ffa5c0007b8dd865b3bbc383831fe2ea177f62257a9191&fvd=i4&v=3")
39 | format("opentype");
40 | font-display: auto;
41 | font-style: italic;
42 | font-weight: 400;
43 | font-stretch: normal;
44 | }
45 |
46 | @font-face {
47 | font-family: "basic-sans";
48 | src:
49 | url("https://use.typekit.net/af/721f9c/00000000000000007735abf2/30/l?primer=7cdcb44be4a7db8877ffa5c0007b8dd865b3bbc383831fe2ea177f62257a9191&fvd=n4&v=3")
50 | format("woff2"),
51 | url("https://use.typekit.net/af/721f9c/00000000000000007735abf2/30/d?primer=7cdcb44be4a7db8877ffa5c0007b8dd865b3bbc383831fe2ea177f62257a9191&fvd=n4&v=3")
52 | format("woff"),
53 | url("https://use.typekit.net/af/721f9c/00000000000000007735abf2/30/a?primer=7cdcb44be4a7db8877ffa5c0007b8dd865b3bbc383831fe2ea177f62257a9191&fvd=n4&v=3")
54 | format("opentype");
55 | font-display: auto;
56 | font-style: normal;
57 | font-weight: 400;
58 | font-stretch: normal;
59 | }
60 |
--------------------------------------------------------------------------------
/agentless/util/compress_file.py:
--------------------------------------------------------------------------------
1 | import libcst as cst
2 | import libcst.matchers as m
3 |
4 |
5 | class CompressTransformer(cst.CSTTransformer):
6 | DESCRIPTION = str = "Replaces function body with ..."
7 | replacement_string = '"$$FUNC_BODY_REPLACEMENT_STRING$$"'
8 |
9 | def __init__(self, keep_constant=True):
10 | self.keep_constant = keep_constant
11 |
12 | def leave_Module(
13 | self, original_node: cst.Module, updated_node: cst.Module
14 | ) -> cst.Module:
15 | new_body = [
16 | stmt
17 | for stmt in updated_node.body
18 | if m.matches(stmt, m.ClassDef())
19 | or m.matches(stmt, m.FunctionDef())
20 | or (
21 | self.keep_constant
22 | and m.matches(stmt, m.SimpleStatementLine())
23 | and m.matches(stmt.body[0], m.Assign())
24 | )
25 | ]
26 | return updated_node.with_changes(body=new_body)
27 |
28 | def leave_ClassDef(
29 | self, original_node: cst.ClassDef, updated_node: cst.ClassDef
30 | ) -> cst.ClassDef:
31 | # Remove docstring in the class body
32 | new_body = [
33 | stmt
34 | for stmt in updated_node.body.body
35 | if not (
36 | m.matches(stmt, m.SimpleStatementLine())
37 | and m.matches(stmt.body[0], m.Expr())
38 | and m.matches(stmt.body[0].value, m.SimpleString())
39 | )
40 | ]
41 | return updated_node.with_changes(body=cst.IndentedBlock(body=new_body))
42 |
43 | def leave_FunctionDef(
44 | self, original_node: cst.FunctionDef, updated_node: cst.FunctionDef
45 | ) -> cst.CSTNode:
46 | new_expr = cst.Expr(value=cst.SimpleString(value=self.replacement_string))
47 | new_body = cst.IndentedBlock((new_expr,))
48 | # another way: replace with pass?
49 | return updated_node.with_changes(body=new_body)
50 |
51 |
52 | code = """
53 | \"\"\"
54 | this is a module
55 | ...
56 | \"\"\"
57 | const = {1,2,3}
58 | import os
59 |
60 | class fooClass:
61 | '''this is a class'''
62 |
63 | def __init__(self, x):
64 | '''initialization.'''
65 | self.x = x
66 |
67 | def print(self):
68 | print(self.x)
69 |
70 | def test():
71 | a = fooClass(3)
72 | a.print()
73 |
74 | """
75 |
76 |
77 | def get_skeleton(raw_code, keep_constant: bool = True):
78 | try:
79 | tree = cst.parse_module(raw_code)
80 | except:
81 | return raw_code
82 |
83 | transformer = CompressTransformer(keep_constant=keep_constant)
84 | modified_tree = tree.visit(transformer)
85 | code = modified_tree.code
86 | code = code.replace(CompressTransformer.replacement_string + "\n", "...\n")
87 | code = code.replace(CompressTransformer.replacement_string, "...\n")
88 | return code
89 |
90 |
91 | def test_compress():
92 | skeleton = get_skeleton(code, True)
93 | print(skeleton)
94 |
95 |
96 | if __name__ == "__main__":
97 | test_compress()
98 |
--------------------------------------------------------------------------------
/SWE-agent/docker/README.md:
--------------------------------------------------------------------------------
1 | # Docker
2 | To ensure reproducibility and sandboxed execution of SWE-agent actions across systems, we adopt practices established in [prior work](https://intercode-benchmark.github.io/) and use [🐋 Docker](https://www.docker.com/) containers to carry out SWE-agent inference.
3 |
4 | * The `swe.Dockerfile` file is the customized image written for the environment of SWE-agent.
5 | * The `./setup.sh` script automatically builds this image.
6 | * When `run.py` is invoked, containers are automatically created from the built image.
7 | * There is no need to manually build a container from the image.
8 |
9 | Here, we explain what each line in `swe.Dockerfile` does:
10 |
11 | 1. **Base Image**: Start from the latest version of the Ubuntu image.
12 | ```bash
13 | FROM ubuntu:latest
14 | ```
15 | 2. **Build Argument**: Define a build argument `MINICONDA_URL` that will be used to specify the Miniconda installer URL during the build process.
16 | ```bash
17 | ARG MINICONDA_URL
18 | ```
19 | 3. **Install Third-Party Tools**: Update the package lists for the Ubuntu package manager and install several essential development tools. Clean up after the installation.
20 | ```bash
21 | RUN apt-get update && \
22 | apt-get install -y bash gcc git jq wget g++ make && \
23 | apt-get clean && \
24 | rm -rf /var/lib/apt/lists/*
25 | ```
26 | 4. **Initialize Git**: Configure global Git settings with a user email and name.
27 | ```bash
28 | RUN git config --global user.email "sweagent@pnlp.org"
29 | RUN git config --global user.name "sweagent"
30 | ```
31 | 5. **Environment Variables**: Set the `ROOT` environment variable and customize the shell prompt.
32 | ```bash
33 | ENV ROOT='/dev/'
34 | RUN prompt() { echo " > "; };
35 | ENV PS1="> "
36 | ```
37 | 6. **Create Assets for Inference**: Create two files that are used to track metadata during an episode.
38 | ```bash
39 | RUN touch /root/files_to_edit.txt
40 | RUN touch /root/test.patch
41 | ```
42 | 7. **Enhance `ls` Command**: Modify the `.bashrc` file to alias the `ls` command.
43 | ```bash
44 | RUN echo "alias ls='ls -F'" >> /root/.bashrc
45 | ```
46 | 8. Install Miniconda: Download and install Miniconda, then initialize conda with Bash support and add `conda-forge` to the channels list.
47 | ```bash
48 | ENV PATH="/root/miniconda3/bin:${PATH}"
49 | ARG PATH="/root/miniconda3/bin:${PATH}"
50 | RUN wget ${MINICONDA_URL} -O miniconda.sh \
51 | && mkdir /root/.conda \
52 | && bash miniconda.sh -b \
53 | && rm -f miniconda.sh
54 | RUN conda --version \
55 | && conda init bash \
56 | && conda config --append channels conda-forge
57 | ```
58 | 9. **Install Python Packages**: Copy the `requirements.txt` file into the image and install the specified Python packages.
59 | ```bash
60 | COPY docker/requirements.txt /root/requirements.txt
61 | RUN pip install -r /root/requirements.txt
62 | ```
63 | 10. **Set Working Directory**: Set the working directory to the root directory.
64 | ```bash
65 | WORKDIR /
66 | ```
67 | 11. **Default Command**: Set the default command to open a Bash shell when the container starts.
68 | ```bash
69 | CMD ["/bin/bash"]
70 | ```
--------------------------------------------------------------------------------
/SWE-agent/release_dockerhub.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | # This script builds the official docker images and pushes them to dockerhub
4 | # after checking with the user.
5 |
6 | # NOTE: To clear the buildx cache, run the following command:
7 | # docker buildx prune --all or more specifically docker buildx rm
8 |
9 | # bash strict mode
10 | set -euo pipefail
11 |
12 | # Check if exactly one argument is supplied
13 | if [ "$#" -ne 2 ]; then
14 | echo "Usage: $0 " >&2
15 | exit 1
16 | fi
17 |
18 | USER=${1}
19 | VERSION_STR=${2}
20 |
21 | if [[ -z "$USER" ]]; then
22 | echo "User name cannot be empty" >&2
23 | exit 3
24 | fi
25 | if [[ "$USER" != "sweagent" ]]; then
26 | echo "Careful here! Even if the username isn't sweagent, swe-eval will still be built on top of the sweagent/swe-agent image." >&2
27 | read -p "Do you want to proceed? (yes) " response
28 | if [[ "${response}" != "yes" ]]; then
29 | echo "Exiting..." >&2
30 | exit 4
31 | fi
32 | fi
33 |
34 |
35 | # The argument should be in the form of x.x.x where each x can be one or more digits
36 | if [[ $VERSION_STR =~ ^[0-9]+\.[0-9]+\.[0-9]+$ ]] || [ "$VERSION_STR" = "latest" ]; then
37 | echo "Validated version number"
38 | else
39 | echo "Argument must be 'latest' or in the form x.x.x, where x is one or more numbers." >&2
40 | exit 2
41 | fi
42 |
43 |
44 | DOCKER_CONTEXT_NAME="sweagent-multiplatform"
45 | docker buildx use "$DOCKER_CONTEXT_NAME" || docker buildx create --use --name "$DOCKER_CONTEXT_NAME"
46 |
47 | on_error() {
48 | echo "====> ERROR!!! IMPORTANT: Make sure if you've already pushed something to dockerhub or pushed the tag to github!" >&2
49 | }
50 | trap on_error ERR
51 |
52 | echo "------------------------------------------"
53 | echo "Building swe-agent"
54 | echo "------------------------------------------"
55 | docker buildx build --platform=linux/amd64,linux/arm64 -t ${USER}/swe-agent:${VERSION_STR} -f docker/swe.Dockerfile --push .
56 | echo "🔥 swe-agent pushed to dockerhub"
57 | echo "------------------------------------------"
58 | echo "Building swe-eval"
59 | echo "------------------------------------------"
60 | docker buildx build --platform=linux/amd64,linux/arm64 -t ${USER}/swe-eval:${VERSION_STR} -f docker/eval.Dockerfile --push .
61 | echo "🔥 swe-eval pushed to dockerhub"
62 | echo "------------------------------------------"
63 | echo "Building swe-agent-run"
64 | echo "------------------------------------------"
65 | docker buildx build --platform=linux/amd64,linux/arm64 -t ${USER}/swe-agent-run:${VERSION_STR} --push .
66 | echo "🔥 swe-agent-run pushed to dockerhub"
67 | echo "------------------------------------------"
68 | echo "Building of all images done"
69 | echo "------------------------------------------"
70 |
71 |
72 | if [ "$VERSION_STR" != "latest" ]; then
73 | git tag v${VERSION_STR} || {
74 | echo "Failed to create a tag in git" >&2
75 | exit 5
76 | }
77 | echo "🔥 Tag v${VERSION_STR} created in git (local)!"
78 |
79 | git push origin v${VERSION_STR} || {
80 | echo "Failed to push the tag to github" >&2
81 | exit 6
82 | }
83 | echo "🔥 Tag v${VERSION_STR} pushed to github"
84 | fi
--------------------------------------------------------------------------------
/SWE-agent/tests/test_parsing.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | from sweagent.agent.commands import Command
3 | from sweagent.agent.parsing import (
4 | FormatError, ParseFunction, ActionParser, ThoughtActionParser,
5 | XMLThoughtActionParser, EditFormat, Identity, JsonParser
6 | )
7 |
8 |
9 | def test_parse_function_registry():
10 | assert isinstance(ParseFunction.get("ActionParser"), ActionParser)
11 | assert isinstance(ParseFunction.get("ThoughtActionParser"), ThoughtActionParser)
12 | assert isinstance(ParseFunction.get("XMLThoughtActionParser"), XMLThoughtActionParser)
13 | assert isinstance(ParseFunction.get("EditFormat"), EditFormat)
14 | assert isinstance(ParseFunction.get("Identity"), Identity)
15 | assert isinstance(ParseFunction.get("JsonParser"), JsonParser)
16 | with pytest.raises(ValueError):
17 | ParseFunction.get("InvalidParser")
18 |
19 |
20 | def test_action_parser():
21 | parser = ActionParser()
22 | command = Command(code='ls', name='ls')
23 | thought, action = parser("ls -l", [command])
24 | assert thought == "ls -l"
25 | assert action == "ls -l"
26 | with pytest.raises(FormatError):
27 | parser("invalid command", [command])
28 |
29 |
30 | def test_thought_action_parser():
31 | parser = ThoughtActionParser()
32 | model_response = "Let's look at the files in the current directory.\n```\nls -l\n```"
33 | thought, action = parser(model_response, [])
34 | assert thought == "Let's look at the files in the current directory.\n"
35 | assert action == "ls -l\n"
36 | with pytest.raises(FormatError):
37 | parser("No code block", [])
38 |
39 |
40 | def test_xml_thought_action_parser():
41 | parser = XMLThoughtActionParser()
42 | model_response = "Let's look at the files in the current directory.\n\nls -l\n"
43 | thought, action = parser(model_response, [])
44 | assert thought == "Let's look at the files in the current directory."
45 | assert action == "ls -l"
46 | with pytest.raises(FormatError):
47 | parser("No command tags", [])
48 |
49 |
50 | def test_edit_format_parser():
51 | parser = EditFormat()
52 | model_response = "Let's replace the contents.\n```\nimport os\nos.listdir()\n```"
53 | thought, action = parser(model_response, [])
54 | assert thought == "Let's replace the contents.\n"
55 | assert action == "import os\nos.listdir()\n"
56 | with pytest.raises(FormatError):
57 | parser("No code block", [])
58 |
59 |
60 | def test_identity_parser():
61 | parser = Identity()
62 | model_response = "Return as is"
63 | thought, action = parser(model_response, [])
64 | assert thought == model_response
65 | assert action == model_response
66 |
67 |
68 | def test_json_parser():
69 | parser = JsonParser()
70 | model_response = '{"thought": "List files", "command": {"name": "ls", "arguments": {"path": "."}}}'
71 | thought, action = parser(model_response, [])
72 | assert thought == "List files"
73 | assert action == "ls ."
74 |
75 | invalid_json = "Not a JSON"
76 | with pytest.raises(FormatError):
77 | parser(invalid_json, [])
78 |
79 | missing_keys = '{"thought": "Missing command key"}'
80 | with pytest.raises(FormatError):
81 | parser(missing_keys, [])
82 |
--------------------------------------------------------------------------------
/SWE-agent/config/commands/README.md:
--------------------------------------------------------------------------------
1 | # Command Configuration
2 | In this document, we describe how to implement your own commands for the SWE-agent ACI.
3 | To see examples of command implementations, open the `.sh` and `.py` files in this folder.
4 |
5 | ## Scaffolding
6 | Every command subscribes to the following skeleton code.
7 | ```shell
8 | # @yaml
9 | # signature: [command] [argument(s)]
10 | # docstring: [Brief description of what your command does.]
11 | # arguments:
12 | # [argument 1 name]:
13 | # type: [type (i.e. integer, string)]
14 | # description: [Brief description of this argument]
15 | # required: [true|false]
16 | # [argument 2 name]:
17 | # ...
18 | [command]() {
19 | # Implementation here
20 | }
21 | ```
22 | * If a command takes in arguments, reference them via positional parameters notation (i.e. `$1`).
23 | * If there are no arguments, omit the `arguments` section.
24 | * The implementation for your command is unconstrained. There are no limitations on the form of the underlying command code.
25 | * The minimal documentation requirements are `signature` and `docstring`.
26 | * If you'd like multiple commands to make modifications to a similar body of functions, we recommend using global variables.
27 | * For instance, in `config/commands/default.sh`, you'll see we define the `CURRENT_LINE` variable for the file viewer. This variable is modified across multiple commands, including `open`, `goto`, `scroll_up`, `scroll_down`, and `edit`.
28 | * You can also leverage third party libraries (check out how we do linting enabled `edit` in `config/commands/edit_linting.sh`).
29 | * To show effects of the command, print to standard output (i.e. `echo`). SWE-agent is implemented such that it does not look for a return value from these commands.
30 |
31 | ## Displaying the Command to SWE-agent
32 | After you define a command, there are a small set of additional steps to making it available for the agent to use.
33 |
34 | First, within your config file...
35 | * Add `config/commands/.sh` file to the `command_files` field.
36 | * Set the `parse_command` field to `ParseCommandBash` or `ParseCommandDetailed`. This key points to the functionality that generates how command documentation is shown to the agent.
37 | * Decide which template(s) you want to show the `{command_docs}` in.
38 | * We strongly recommend including `{command_docs}` in the `system_template`, which is the first message shown to the agent for every task instance episode.
39 | * You might also consider adding `{command_docs}` to the `format_error_template`, which is shown if the response provided by a model is malformed.
40 | * (Optional) Including a demonstration that uses a command is helpful to showcase proper use + increases the frequency with which the agent uses the command. If you'd like to add a demonstration...
41 | * Create a demonstration manually (i.e. `python run.py --model human_thought ...`) or automatically (i.e. `python run_replay --traj_path ...`)
42 | * Add/Update the demonstration to the `demonstrations` argument.
43 | * Update `demonstration_template` to control how the demonstration is displayed to the agent.
44 |
45 | > If you're not familiar with how SWE-agent configuration files work, we recommend checking out the [`config` README](https://github.com/princeton-nlp/SWE-agent/tree/main/config).
46 |
47 | Next, run your configuration and see how your agent uses the commands!
48 | ```bash
49 | python run.py --config_file config/[your config].yaml ...
50 | ```
51 |
--------------------------------------------------------------------------------
/SWE-agent/sweagent/frontend/README.md:
--------------------------------------------------------------------------------
1 | # Getting Started with Create React App
2 |
3 | This project was bootstrapped with [Create React App](https://github.com/facebook/create-react-app).
4 |
5 | ## Available Scripts
6 |
7 | In the project directory, you can run:
8 |
9 | ### `npm start`
10 |
11 | Runs the app in the development mode.\
12 | Open [http://localhost:3000](http://localhost:3000) to view it in your browser.
13 |
14 | The page will reload when you make changes.\
15 | You may also see any lint errors in the console.
16 |
17 | ### `npm test`
18 |
19 | Launches the test runner in the interactive watch mode.\
20 | See the section about [running tests](https://facebook.github.io/create-react-app/docs/running-tests) for more information.
21 |
22 | ### `npm run build`
23 |
24 | Builds the app for production to the `build` folder.\
25 | It correctly bundles React in production mode and optimizes the build for the best performance.
26 |
27 | The build is minified and the filenames include the hashes.\
28 | Your app is ready to be deployed!
29 |
30 | See the section about [deployment](https://facebook.github.io/create-react-app/docs/deployment) for more information.
31 |
32 | ### `npm run eject`
33 |
34 | **Note: this is a one-way operation. Once you `eject`, you can't go back!**
35 |
36 | If you aren't satisfied with the build tool and configuration choices, you can `eject` at any time. This command will remove the single build dependency from your project.
37 |
38 | Instead, it will copy all the configuration files and the transitive dependencies (webpack, Babel, ESLint, etc) right into your project so you have full control over them. All of the commands except `eject` will still work, but they will point to the copied scripts so you can tweak them. At this point you're on your own.
39 |
40 | You don't have to ever use `eject`. The curated feature set is suitable for small and middle deployments, and you shouldn't feel obligated to use this feature. However we understand that this tool wouldn't be useful if you couldn't customize it when you are ready for it.
41 |
42 | ## Learn More
43 |
44 | You can learn more in the [Create React App documentation](https://facebook.github.io/create-react-app/docs/getting-started).
45 |
46 | To learn React, check out the [React documentation](https://reactjs.org/).
47 |
48 | ### Code Splitting
49 |
50 | This section has moved here: [https://facebook.github.io/create-react-app/docs/code-splitting](https://facebook.github.io/create-react-app/docs/code-splitting)
51 |
52 | ### Analyzing the Bundle Size
53 |
54 | This section has moved here: [https://facebook.github.io/create-react-app/docs/analyzing-the-bundle-size](https://facebook.github.io/create-react-app/docs/analyzing-the-bundle-size)
55 |
56 | ### Making a Progressive Web App
57 |
58 | This section has moved here: [https://facebook.github.io/create-react-app/docs/making-a-progressive-web-app](https://facebook.github.io/create-react-app/docs/making-a-progressive-web-app)
59 |
60 | ### Advanced Configuration
61 |
62 | This section has moved here: [https://facebook.github.io/create-react-app/docs/advanced-configuration](https://facebook.github.io/create-react-app/docs/advanced-configuration)
63 |
64 | ### Deployment
65 |
66 | This section has moved here: [https://facebook.github.io/create-react-app/docs/deployment](https://facebook.github.io/create-react-app/docs/deployment)
67 |
68 | ### `npm run build` fails to minify
69 |
70 | This section has moved here: [https://facebook.github.io/create-react-app/docs/troubleshooting#npm-run-build-fails-to-minify](https://facebook.github.io/create-react-app/docs/troubleshooting#npm-run-build-fails-to-minify)
71 |
--------------------------------------------------------------------------------
/SWE-agent/.dockerignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | pip-wheel-metadata/
24 | share/python-wheels/
25 | *.egg-info/
26 | .installed.cfg
27 | *.egg
28 | MANIFEST
29 |
30 | # PyInstaller
31 | # Usually these files are written by a python script from a template
32 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
33 | *.manifest
34 | *.spec
35 |
36 | # Installer logs
37 | pip-log.txt
38 | pip-delete-this-directory.txt
39 |
40 | # Unit test / coverage reports
41 | htmlcov/
42 | .tox/
43 | .nox/
44 | .coverage
45 | .coverage.*
46 | .cache
47 | nosetests.xml
48 | coverage.xml
49 | *.cover
50 | *.py,cover
51 | .hypothesis/
52 | .pytest_cache/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | target/
76 |
77 | # Jupyter Notebook
78 | .ipynb_checkpoints
79 |
80 | # IPython
81 | profile_default/
82 | ipython_config.py
83 |
84 | # pyenv
85 | .python-version
86 |
87 | # pipenv
88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
91 | # install all needed dependencies.
92 | #Pipfile.lock
93 |
94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
95 | __pypackages__/
96 |
97 | # Celery stuff
98 | celerybeat-schedule
99 | celerybeat.pid
100 |
101 | # SageMath parsed files
102 | *.sage.py
103 |
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 |
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 |
117 | # Rope project settings
118 | .ropeproject
119 |
120 | # mkdocs documentation
121 | /site
122 |
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 |
128 | # Pyre type checker
129 | .pyre/
130 |
131 | # Mac files
132 | *.DS_Store
133 |
134 | # Custom
135 | keys.cfg
136 |
137 | # iPython Notebooks
138 | *.ipynb
139 |
140 | # Evaluation folders
141 | results/
142 | testbed/
143 | temp/
144 |
145 | # Ignore all YAML files in data/
146 | data/*/ic-*
147 | data/*/single-issues
148 |
149 | # Fine tuning data
150 | fine_tune/*.ipynb
151 | fine_tune/subtasks/*.jsonl
152 | temp*.jsonl
153 |
154 | # Inspector
155 | inspector/*.json
156 |
157 | # Ignore all files in the private folder
158 | private/
159 |
160 | ### Website
161 |
162 | # dependencies
163 | website/frontend/node_modules
164 | website/frontend/package-lock.json
165 | website/frontend/.pnp
166 | *.pnp.js
167 |
168 | # testing
169 | website/frontend/coverage
170 |
171 | # production
172 | website/frontend/build
173 |
174 | # misc
175 | *.env.local
176 | *.env.development.local
177 | *.env.test.local
178 | *.env.production.local
179 | .api_key
180 | *npm-debug.log*
181 | *yarn-debug.log*
182 | *yarn-error.log*
183 |
184 |
185 | # demo yamls (for editing)
186 | *.demo.yaml
187 |
188 | # trajectory files
189 | trajectories/**
190 | !trajectories/demonstrations/**
191 |
192 | .vscode/**
193 |
194 | # PyCharm
195 | .idea/
196 |
--------------------------------------------------------------------------------
/SWE-agent/sweagent/api/utils.py:
--------------------------------------------------------------------------------
1 | import ctypes
2 | import inspect
3 | import re
4 | import threading
5 |
6 |
7 | def _async_raise(tid, exctype):
8 | """Raises an exception in the threads with id tid
9 |
10 | This code is modified from the following SO answer:
11 | Author: Philippe F
12 | Posted: Nov 28, 2008
13 | URL: https://stackoverflow.com/a/325528/
14 | """
15 | if not inspect.isclass(exctype):
16 | raise TypeError("Only types can be raised (not instances)")
17 | res = ctypes.pythonapi.PyThreadState_SetAsyncExc(
18 | ctypes.c_long(tid), ctypes.py_object(exctype)
19 | )
20 | if res == 0:
21 | raise ValueError("invalid thread id")
22 | elif res != 1:
23 | # "if it returns a number greater than one, you're in trouble,
24 | # and you should call it again with exc=NULL to revert the effect"
25 | ctypes.pythonapi.PyThreadState_SetAsyncExc(ctypes.c_long(tid), None)
26 | raise SystemError("PyThreadState_SetAsyncExc failed")
27 |
28 |
29 | class ThreadWithExc(threading.Thread):
30 | """A thread class that supports raising an exception in the thread from
31 | another thread.
32 |
33 | This code is modified from the following SO answer:
34 | Author: Philippe F
35 | Posted: Nov 28, 2008
36 | URL: https://stackoverflow.com/a/325528/
37 | """
38 |
39 | def _get_my_tid(self):
40 | """determines this (self's) thread id
41 |
42 | CAREFUL: this function is executed in the context of the caller
43 | thread, to get the identity of the thread represented by this
44 | instance.
45 | """
46 | if not self.is_alive():
47 | raise threading.ThreadError("the thread is not active")
48 |
49 | # do we have it cached?
50 | if hasattr(self, "_thread_id"):
51 | return self._thread_id
52 |
53 | # no, look for it in the _active dict
54 | for tid, tobj in threading._active.items():
55 | if tobj is self:
56 | self._thread_id = tid
57 | return tid
58 |
59 | raise RuntimeError("could not determine the thread's id")
60 |
61 | def raise_exc(self, exctype):
62 | """Raises the given exception type in the context of this thread.
63 |
64 | If the thread is busy in a system call (time.sleep(),
65 | socket.accept(), ...), the exception is simply ignored.
66 |
67 | If you are sure that your exception should terminate the thread,
68 | one way to ensure that it works is:
69 |
70 | t = ThreadWithExc( ... )
71 | ...
72 | t.raise_exc( SomeException )
73 | while t.isAlive():
74 | time.sleep( 0.1 )
75 | t.raise_exc( SomeException )
76 |
77 | If the exception is to be caught by the thread, you need a way to
78 | check that your thread has caught it.
79 |
80 | CAREFUL: this function is executed in the context of the
81 | caller thread, to raise an exception in the context of the
82 | thread represented by this instance.
83 | """
84 | _async_raise(self._get_my_tid(), exctype)
85 |
86 |
87 | # From Martijn Pieters at https://stackoverflow.com/a/14693789
88 | # 7-bit C1 ANSI sequences
89 | _ANSI_ESCAPE = re.compile(
90 | r"""
91 | \x1B # ESC
92 | (?: # 7-bit C1 Fe (except CSI)
93 | [@-Z\\-_]
94 | | # or [ for CSI, followed by a control sequence
95 | \[
96 | [0-?]* # Parameter bytes
97 | [ -/]* # Intermediate bytes
98 | [@-~] # Final byte
99 | )
100 | """,
101 | re.VERBOSE,
102 | )
103 |
104 |
105 | def strip_ansi_sequences(string: str) -> str:
106 | return _ANSI_ESCAPE.sub("", string)
107 |
--------------------------------------------------------------------------------
/SWE-agent/tests/test_env.py:
--------------------------------------------------------------------------------
1 | import dataclasses
2 | import os
3 | from pathlib import Path
4 | import subprocess
5 | import pytest
6 | import yaml
7 | from sweagent.environment.swe_env import EnvHook, EnvironmentArguments, SWEEnv
8 | from contextlib import contextmanager
9 | import docker
10 |
11 |
12 | @pytest.fixture(scope="module")
13 | def test_env_args(tmpdir_factory, ):
14 | """This will use a persistent container"""
15 | local_repo_path = tmpdir_factory.getbasetemp() / "swe-agent-test-repo"
16 | clone_cmd = ["git", "clone", "https://github.com/klieret/swe-agent-test-repo", local_repo_path]
17 | subprocess.run(clone_cmd, check=True)
18 | data_path = local_repo_path / "problem_statements" / "1.md"
19 | test_env_args = EnvironmentArguments(
20 | data_path=str(data_path),
21 | repo_path=str(local_repo_path),
22 | image_name="sweagent/swe-agent:latest",
23 | container_name="test-container-134245890345098",
24 | )
25 | yield test_env_args
26 | # Cleanup (after session ends)
27 | client = docker.from_env()
28 | container = client.containers.get(test_env_args.container_name)
29 | container.remove(force=True)
30 |
31 |
32 | @contextmanager
33 | def swe_env_context(env_args):
34 | """Context manager to make sure we close the shell on the container
35 | so that we can reuse it.
36 | """
37 |
38 | env = SWEEnv(env_args)
39 | try:
40 | yield env
41 | finally:
42 | env.close()
43 |
44 |
45 | @pytest.mark.slow
46 | def test_init_swe_env(test_env_args):
47 | with swe_env_context(test_env_args) as env:
48 | env.reset()
49 |
50 |
51 | @pytest.mark.slow
52 | def test_init_swe_env_non_persistent(test_env_args):
53 | test_env_args = dataclasses.replace(test_env_args, container_name=None)
54 | with swe_env_context(test_env_args) as env:
55 | env.reset()
56 |
57 |
58 | @pytest.mark.slow
59 | def test_execute_setup_script(tmp_path, test_env_args):
60 | test_script = "echo 'hello world'"
61 | script_path = Path(tmp_path / "test_script.sh")
62 | script_path.write_text(test_script)
63 | test_env_args = dataclasses.replace(test_env_args, environment_setup=script_path)
64 | with swe_env_context(test_env_args) as env:
65 | env.reset()
66 |
67 |
68 | @pytest.mark.slow
69 | def test_execute_environment(tmp_path, test_env_args):
70 | test_env = {
71 | "python": "3.6",
72 | "packages": "pytest",
73 | "pip_packages": ["tox"],
74 | "install": "echo 'installing'",
75 | }
76 | env_config_path = Path(tmp_path / "env_config.yml")
77 | env_config_path.write_text(yaml.dump(test_env))
78 | test_env_args = dataclasses.replace(test_env_args, environment_setup=env_config_path)
79 | with swe_env_context(test_env_args) as env:
80 | env.reset()
81 |
82 |
83 | @pytest.mark.slow
84 | def test_open_pr(test_env_args):
85 | test_env_args = dataclasses.replace(test_env_args, data_path="https://github.com/klieret/swe-agent-test-repo/issues/1", repo_path="")
86 | with swe_env_context(test_env_args) as env:
87 | env.reset()
88 | env.open_pr(_dry_run=True, trajectory=[])
89 |
90 |
91 | @pytest.mark.slow
92 | def test_interrupt_close(test_env_args):
93 | with swe_env_context(test_env_args) as env:
94 | env.reset()
95 | env.interrupt()
96 |
97 |
98 | @pytest.mark.slow
99 | def test_communicate_old(test_env_args):
100 | del os.environ["SWE_AGENT_EXPERIMENTAL_COMMUNICATE"]
101 | try:
102 | with swe_env_context(test_env_args) as env:
103 | env.reset()
104 | except:
105 | raise
106 | finally:
107 | os.environ["SWE_AGENT_EXPERIMENTAL_COMMUNICATE"] = "1"
108 |
109 |
110 | @pytest.mark.slow
111 | def test_env_with_hook(test_env_args):
112 | with swe_env_context(test_env_args) as env:
113 | env.add_hook(EnvHook())
114 | env.reset()
--------------------------------------------------------------------------------
/SWE-agent/scripts/README.md:
--------------------------------------------------------------------------------
1 | # Scripts
2 |
3 | This README contains documentation for the main inference script `run.sh` along with some miscellaneous scripts that may be helpful.
4 |
5 | > [!WARNING]
6 | > These scripts have been written to be invoked from the root of this codebase (i.e. `./scripts/run.sh`).
7 |
8 | ## 🏃 Inference Script
9 | The `./run.sh` script has been provided as an example of how to invoke `run.py`.
10 |
11 | A single `run.py` call will generate a `trajectory//` folder containing the trajectories and predictions generated by a `` model run on every instance in the `` dataset.
12 |
13 | The following is a comprehensive guide to using the provided `run.py` script, detailing available command-line arguments, their purposes, and default values. Flags that you might find helpful have been marked with a 💡.
14 |
15 | The code and explanation of the implementations for *configuration based workflows* are explained in [`agent/`](../sweagent/agent/README.md).
16 |
17 | > [!TIP]
18 | > Run `python run.py --help` to view the most up-to-date documentation of the arguments.
19 |
20 | #### Optional Arguments
21 | * `-h, --help`: Show the help message and exit.
22 |
23 | #### Script Arguments
24 | These arguments configure the script's behavior:
25 | * `--instance_filter ` 💡: Run instances that match this regex pattern. Default is .*.
26 | * `--noskip_existing, --skip_existing,`: [Do not] skip instances that have been completed before.
27 | * `--suffix `: Appends a suffix to the name of the folder containing the trajectories for an experiment run.
28 |
29 | #### Environment Arguments
30 | These arguments are related to the environment configuration:
31 | * `--data_path ` 💡: Path to the data file -or- a Hugging Face dataset -or- a GitHub issue URL.
32 | * `--base_commit `: You can specify the base commit sha to checkout. This is determined automatically for instances in SWE-bench.
33 | * `--image_name `: Name of the Docker image to use. Default is swe-agent.
34 | * `--noinstall_environment, --install_environment`: [Do not] install the environment. Default is True.
35 | * `--noverbose, --verbose`: Enable verbose output. Default is False.
36 | * `--timeout `: Timeout in seconds. Default is 35.
37 | * `--container_name ` 💡: Name of the Docker container if you would like to create a persistent container. Optional.
38 |
39 | > [!WARNING]
40 | > If you specify a container name, do not run multiple instances of `run.py` with the same container name!
41 |
42 | #### AgentArguments
43 | Configure agent behavior:
44 | * `--config_file ` 💡: Path to the configuration YAML file. Default is config/default.yaml.
45 |
46 | #### ModelArguments
47 | Configure model parameters:
48 | * `--model_name ` 💡: Name of the model. Default is `gpt4`.
49 | * `--per_instance_cost_limit ` 💡: Per-instance cost limit (interactive loop will automatically terminate when cost limit is hit). Default is 3.0.
50 | * `--temperature ` 💡: Model temperature. Default is 0.0.
51 | * `--top_p ` 💡: Top p filtering. Default is 0.95.
52 | * `--total_cost_limit `: Total cost limit. Default is 0.0 (unlimited).
53 |
54 | ### 📙 Example Usage
55 | Run with custom data path and verbose mode:
56 | ```bash
57 | python run.py --data_path /path/to/data.json --verbose
58 | ```
59 |
60 | Specify a model and adjust the temperature and top_p parameters:
61 | ```bash
62 | python run.py --model_name gpt4 --temperature 0.2 --top_p 0.9
63 | ```
64 |
65 | ## 🛠️ Miscellaneous Scripts
66 | - `remove_all_containers.sh`: Forcibly removes all Docker containers currently present on the system.
67 | - `run_and_eval.sh`: Runs SWE-agent inference and evaluation on a specified dataset N times. You can specify the `dataset_path`, `num_runs`, `template`, and `suffix` arguments.
68 | - `run_jsonl.sh`: Run SWE-agent inference from a `.jsonl` file that contains a SWE-bench style task instance.
69 | - `run_replay.sh`: Run SWE-agent inference from a `.traj` file. This is useful for automatically creating a new demonstration for a new config from an existing sequence of actions.
70 |
--------------------------------------------------------------------------------
/SWE-agent/sweagent/agent/README.md:
--------------------------------------------------------------------------------
1 | # Agents
2 | The `agent` folder contains the logic for handling model inference and facilitating their interaction with `SWEEnv`.
3 | The following documentation describing the purpose and classes of each file.
4 |
5 | #### `agents.py`
6 | This file defines the `Agent` class, which facilitates the interaction between an agent and the environment. The `AgentConfig` and `AgentArguments` data classes compile all arguments into a single file.
7 | - `Agent`: Main class for handling model behavior + interaction with environment
8 | - `__init__`: Sets up model, assistant, configurations, and arguments
9 | - `state_command`: Getter for bash command for extracting env. state
10 | - `setup`: Resets cost stats, initializes system message (+ demonstrations), and returns full list of bash commands to define within environment.
11 | - `forward`: Main inference call to model.
12 | - `forward_model`: Determines appropriate observation template, then makes inference call to model
13 | - `forward_with_format_check`: Invokes `forward_model`, with retry calls to handle blocked or malformed actions.
14 | - `forward_with_error_check`: Wraps `forward_with_format_check` with exception handling.
15 |
16 | #### `commands.py`
17 | This file defines the abstraction for custom commands (non-native functions that are implemented in bash) that agents can invoke in `swe-agent` environment. On top of the abstraction, helper functions to extract commands' documentation and compile `.sh` files into separate `Command` objects are provided. There are also fields for establishing the input/output of each action and control flow of actions via templates.
18 | - `AssistantMetadata`: Defines templates for formatting input/output to sub-assistant calls
19 | - `Command`: Defines fields of a custom command
20 | - `ControlMetadata` (WIP): Defines template fields that format the observations for the next agent `forward` inference call
21 | - `generate_command_docs`: Extracts docstrings from each command to form comprehensive documentation.
22 | - `parse_command_file`: Converts bash file content to separate `Command` objects
23 |
24 | #### `models.py`
25 | This file defines the abstraction for running inference on API models. In addition, the `BaseModel` abstraction also defines a set of cost-related fields for tracking instance-level and total expenses accumulated across a single model run.
26 | - `AnthropicModel`: Handles inference + cost logging for Anthropic Models
27 | - `BedrockModel`: handles inference + cost logging for Amazon Bedrock-provided models (Anthropic Claude only)
28 | - `APIStats`: Cost tracking fields that are updated per model inference
29 | - `BaseModel`: Abstract class that defines the common logic for updating cost stats
30 | - `get_model`: Returns initialized `[Anthropic|Bedrock|Human|OpenAI]Model` based on given arguments + commands
31 | - `HumanModel`: Handles inference for human task worker
32 | - `ModelArguments`: Model name, hyperparameter, and cost limit arguments
33 | - `OpenAIModel`: Handles inference + cost logging for OpenAI models
34 |
35 | #### `parsing.py`
36 | This file defines the abstraction for parsing the output of the model inference. The `Parsing` class is used to extract the relevant information from the model's output and format it into a response that can be used by the `Agent` class.
37 | - `Parsing`: Abstract class that defines the common logic for parsing model output
38 |
39 | #### `history_processors.py`
40 | This file defines the abstraction for processing the history of the environment. The `HistoryProcessor` class is used to extract the relevant information from the history of the environment and format it into a response that can be used by the `Agent` class.
41 | - `HistoryProcessor`: Abstract class that defines the common logic for processing the history of the environment
42 | - `DefaultHistoryProcessor`: Default implementation of `HistoryProcessor` that processes the history of the environment
43 |
44 | ### Environment Usage
45 | * To skip over a task instance, use the `skip` keyword
46 | * To submit for evaluation, use the `submit` keyword
47 | * To exit the `SWEEnv` environment, perform a keyboard interrupt (`^ c`)
--------------------------------------------------------------------------------
/repograph/utils.py:
--------------------------------------------------------------------------------
1 | import os
2 | import ast
3 |
4 | def create_structure(directory_path):
5 | """Create the structure of the repository directory by parsing Python files.
6 | :param directory_path: Path to the repository directory.
7 | :return: A dictionary representing the structure.
8 | """
9 | structure = {}
10 |
11 | for root, _, files in os.walk(directory_path):
12 | repo_name = os.path.basename(directory_path)
13 | relative_root = os.path.relpath(root, directory_path)
14 | if relative_root == ".":
15 | relative_root = repo_name
16 | curr_struct = structure
17 | for part in relative_root.split(os.sep):
18 | if part not in curr_struct:
19 | curr_struct[part] = {}
20 | curr_struct = curr_struct[part]
21 | for file_name in files:
22 | if file_name.endswith(".py"):
23 | file_path = os.path.join(root, file_name)
24 | class_info, function_names, file_lines = parse_python_file(file_path)
25 | curr_struct[file_name] = {
26 | "classes": class_info,
27 | "functions": function_names,
28 | "text": file_lines,
29 | }
30 | else:
31 | curr_struct[file_name] = {}
32 |
33 | return structure
34 |
35 | def parse_python_file(file_path, file_content=None):
36 | """Parse a Python file to extract class and function definitions with their line numbers.
37 | :param file_path: Path to the Python file.
38 | :return: Class names, function names, and file contents
39 | """
40 | if file_content is None:
41 | try:
42 | with open(file_path, "r") as file:
43 | file_content = file.read()
44 | parsed_data = ast.parse(file_content)
45 | except Exception as e: # Catch all types of exceptions
46 | print(f"Error in file {file_path}: {e}")
47 | return [], [], ""
48 | else:
49 | try:
50 | parsed_data = ast.parse(file_content)
51 | except Exception as e: # Catch all types of exceptions
52 | print(f"Error in file {file_path}: {e}")
53 | return [], [], ""
54 |
55 | class_info = []
56 | function_names = []
57 | class_methods = set()
58 |
59 | for node in ast.walk(parsed_data):
60 | if isinstance(node, ast.ClassDef):
61 | methods = []
62 | for n in node.body:
63 | if isinstance(n, ast.FunctionDef):
64 | methods.append(
65 | {
66 | "name": n.name,
67 | "start_line": n.lineno,
68 | "end_line": n.end_lineno,
69 | "text": file_content.splitlines()[
70 | n.lineno - 1 : n.end_lineno
71 | ],
72 | }
73 | )
74 | class_methods.add(n.name)
75 | class_info.append(
76 | {
77 | "name": node.name,
78 | "start_line": node.lineno,
79 | "end_line": node.end_lineno,
80 | "text": file_content.splitlines()[
81 | node.lineno - 1 : node.end_lineno
82 | ],
83 | "methods": methods,
84 | }
85 | )
86 | elif isinstance(node, ast.FunctionDef) and not isinstance(
87 | node, ast.AsyncFunctionDef
88 | ):
89 | if node.name not in class_methods:
90 | function_names.append(
91 | {
92 | "name": node.name,
93 | "start_line": node.lineno,
94 | "end_line": node.end_lineno,
95 | "text": file_content.splitlines()[
96 | node.lineno - 1 : node.end_lineno
97 | ],
98 | }
99 | )
100 |
101 | return class_info, function_names, file_content.splitlines()
--------------------------------------------------------------------------------
/SWE-agent/pyproject.toml:
--------------------------------------------------------------------------------
1 | # Guide (user-friendly):
2 | # https://packaging.python.org/en/latest/guides/writing-pyproject-toml/
3 | # Specification (technical, formal):
4 | # https://packaging.python.org/en/latest/specifications/pyproject-toml/
5 |
6 |
7 | # Choosing a build backend:
8 | [build-system]
9 | requires = ["setuptools"] # REQUIRED if [build-system] table is used
10 | build-backend = "setuptools.build_meta" # If not defined, then legacy behavior can happen.
11 |
12 |
13 | [project]
14 | name = "sweagent"
15 | dynamic = ["version", "dependencies"]
16 | description = "The official SWE-agent package - an open source Agent Computer Interface for running language models as software engineers."
17 | readme = "README.md"
18 | requires-python = ">=3.9"
19 | license = {file = "LICENSE"}
20 | keywords = ["nlp", "agents", "code"]
21 | authors = [
22 | {name = "Carlos E. Jimenez", email = "carlosej@princeton.edu" },
23 | {name = "John Yang", email = "byjohnyang@gmail.com" }
24 | ]
25 |
26 | # Classifiers help users find your project by categorizing it.
27 | classifiers = [
28 | # How mature is this project? Common values are
29 | # 3 - Alpha, 4 - Beta, 5 - Production/Stable
30 | "Operating System :: OS Independent",
31 | # Indicate who your project is intended for
32 | "Intended Audience :: Developers",
33 | # Pick your license as you wish
34 | "License :: OSI Approved :: MIT License",
35 | "Programming Language :: Python :: 3.9",
36 | "Programming Language :: Python :: 3 :: Only",
37 | ]
38 |
39 | [tool.setuptools]
40 | include-package-data = true
41 |
42 | [tool.setuptools.dynamic]
43 | version = {attr = "sweagent.__version__"}
44 | dependencies = {file = ["requirements.txt"]}
45 |
46 | [tool.setuptools.packages.find]
47 | where = ["."]
48 | namespaces = false
49 |
50 | [project.urls]
51 | "Homepage" = "https://swe-agent.com"
52 | "Bug Reports" = "http://github.com/princeton-nlp/SWE-agent/issues"
53 | "Documentation" = "https://github.com/princeton-nlp/SWE-agent"
54 | "Source" = "http://github.com/princeton-nlp/SWE-agent"
55 |
56 |
57 | [tool.pytest.ini_options]
58 | markers = [
59 | "slow: marks tests as slow (deselect with '-m \"not slow\"')",
60 | ]
61 | testpaths = [
62 | "tests"
63 | ]
64 | xfail_strict = true
65 |
66 | [tool.ruff]
67 | # Exclude a variety of commonly ignored directories.
68 | exclude = [
69 | ".bzr",
70 | ".direnv",
71 | ".eggs",
72 | ".git",
73 | ".git-rewrite",
74 | ".hg",
75 | ".ipynb_checkpoints",
76 | ".mypy_cache",
77 | ".nox",
78 | ".pants.d",
79 | ".pyenv",
80 | ".pytest_cache",
81 | ".pytype",
82 | ".ruff_cache",
83 | ".svn",
84 | ".tox",
85 | ".venv",
86 | ".vscode",
87 | "__pypackages__",
88 | "_build",
89 | "buck-out",
90 | "build",
91 | "dist",
92 | "node_modules",
93 | "site-packages",
94 | "venv",
95 | ]
96 |
97 | # Same as Black.
98 | line-length = 88
99 | indent-width = 4
100 |
101 | # Assume Python 3.8
102 | target-version = "py38"
103 |
104 | [tool.ruff.lint]
105 | # Enable Pyflakes (`F`) and a subset of the pycodestyle (`E`) codes by default.
106 | # Unlike Flake8, Ruff doesn't enable pycodestyle warnings (`W`) or
107 | # McCabe complexity (`C901`) by default.
108 | select = ["F821", "F822", "E999", "E902", "F401", "F841"]
109 | ignore = []
110 |
111 | # Allow fix for all enabled rules (when `--fix`) is provided.
112 | fixable = ["ALL"]
113 | unfixable = []
114 |
115 | # Allow unused variables when underscore-prefixed.
116 | dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"
117 |
118 | [tool.ruff.format]
119 | # Like Black, use double quotes for strings.
120 | quote-style = "double"
121 |
122 | # Like Black, indent with spaces, rather than tabs.
123 | indent-style = "space"
124 |
125 | # Like Black, respect magic trailing commas.
126 | skip-magic-trailing-comma = false
127 |
128 | # Like Black, automatically detect the appropriate line ending.
129 | line-ending = "auto"
130 |
131 | [tool.typos.default.extend-identifiers]
132 | # *sigh* this just isn't worth the cost of fixing
133 | ACI = "ACI"
134 |
135 | [tool.typos.default.extend-words]
136 | # Don't correct the surname "Teh"
137 | aci = "aci"
138 |
--------------------------------------------------------------------------------
/SWE-agent/make_demos/convert_traj_to_demo.py:
--------------------------------------------------------------------------------
1 | import json
2 | import io
3 | from ruamel.yaml import YAML
4 | from ruamel.yaml.scalarstring import LiteralScalarString as LSS
5 | from pathlib import Path
6 | from argparse import ArgumentParser
7 |
8 |
9 | DEMO_COMMENT = """# This is a demo file generated from trajectory file:
10 | # {traj_path}
11 | # You can use this demo file to replay the actions in the trajectory with run_replay.py.
12 | # You can edit the content of the actions in this file to modify the replay behavior.
13 | # NOTICE:
14 | # Only the actions of the assistant will be replayed.
15 | # You do not need to modify the observation's contents or any other fields.
16 | # You can add or remove actions to modify the replay behavior."""
17 |
18 |
19 | def convert_to_literal_string(d):
20 | """
21 | Convert any multi-line strings to LiteralScalarString
22 | """
23 | if isinstance(d, dict):
24 | for key, value in d.items():
25 | if isinstance(value, str) and '\n' in value:
26 | d[key] = LSS(value.replace('\r\n', '\n').replace('\r', '\n'))
27 | elif isinstance(value, dict):
28 | convert_to_literal_string(value)
29 | elif isinstance(d, list):
30 | for i, item in enumerate(d):
31 | if isinstance(item, str) and '\n' in item:
32 | d[i] = LSS(item.replace('\r\n', '\n').replace('\r', '\n'))
33 | elif isinstance(item, dict):
34 | convert_to_literal_string(item)
35 | elif isinstance(d, str) and '\n' in d:
36 | d = LSS(d.replace('\r\n', '\n').replace('\r', '\n'))
37 | else:
38 | raise ValueError(f"Unsupported type: {type(d)}")
39 | return d
40 |
41 |
42 | def save_demo(data, file, traj_path):
43 | """
44 | Save a single task instance as a yaml file
45 | """
46 | data = convert_to_literal_string(data)
47 | yaml = YAML()
48 | yaml.indent(mapping=2, sequence=4, offset=2)
49 | buffer = io.StringIO()
50 | yaml.dump(data, buffer)
51 | content = buffer.getvalue()
52 | header = DEMO_COMMENT.format(traj_path=traj_path)
53 | with open(file, "w") as f:
54 | f.write(f"{header}\n{content}")
55 |
56 |
57 | def convert_traj_to_action_demo(traj_path: str, output_file: str = None, include_user: bool = False):
58 | traj = json.load(open(traj_path))
59 | history = traj["history"]
60 | action_traj = list()
61 | admissible_roles = {"assistant", "user"} if include_user else {"assistant"}
62 | for step in history:
63 | if step['role'] in admissible_roles and step.get('agent', 'primary') == 'primary':
64 | action_traj.append({k: v for k, v in step.items() if k in {'content', 'role'}})
65 | save_demo(action_traj, output_file, traj_path)
66 | print(f"Saved demo to {output_file}")
67 |
68 |
69 | def main(traj_path: str, output_dir: str = None, suffix: str = "", overwrite: bool = False, include_user: bool = False):
70 | filename = '/'.join([Path(traj_path).parent.name + suffix, Path(traj_path).name.rsplit('.traj', 1)[0]]) + ".demo.yaml"
71 | output_file = Path(output_dir) / filename
72 | if output_file.exists() and not overwrite:
73 | raise FileExistsError(f"Output file already exists: {output_file}")
74 | output_file.parent.mkdir(parents=True, exist_ok=True)
75 | convert_traj_to_action_demo(traj_path, output_file, include_user)
76 |
77 |
78 | def string2bool(s):
79 | if s.lower() in {"true", "1"}:
80 | return True
81 | elif s.lower() in {"false", "0"}:
82 | return False
83 | else:
84 | raise ValueError(f"Invalid boolean string: {s}")
85 |
86 |
87 | if __name__ == "__main__":
88 | parser = ArgumentParser()
89 | parser.add_argument("traj_path", type=str, help="Path to trajectory file")
90 | parser.add_argument("--output_dir", type=str, help="Output directory for action demos", default="./demos")
91 | parser.add_argument("--suffix", type=str, help="Suffix for the output file", default="")
92 | parser.add_argument("--overwrite", type=string2bool, help="Overwrite existing files", default=False, nargs='?')
93 | parser.add_argument("--include_user", type=string2bool, help="Include user responses (computer)", default=False, nargs='?')
94 | args = parser.parse_args()
95 | main(**vars(args))
96 |
--------------------------------------------------------------------------------
/agentless/util/api_requests.py:
--------------------------------------------------------------------------------
1 | import signal
2 | import time
3 | from typing import Dict, Union
4 |
5 | import openai
6 | import tiktoken
7 |
8 | client = openai.OpenAI()
9 |
10 |
11 | def num_tokens_from_messages(message, model="gpt-3.5-turbo-0301"):
12 | """Returns the number of tokens used by a list of messages."""
13 | try:
14 | encoding = tiktoken.encoding_for_model(model)
15 | except KeyError:
16 | encoding = tiktoken.get_encoding("cl100k_base")
17 | if isinstance(message, list):
18 | # use last message.
19 | num_tokens = len(encoding.encode(message[0]["content"]))
20 | else:
21 | num_tokens = len(encoding.encode(message))
22 | return num_tokens
23 |
24 |
25 | def create_chatgpt_config(
26 | message: Union[str, list],
27 | max_tokens: int,
28 | temperature: float = 1,
29 | batch_size: int = 1,
30 | system_message: str = "You are a helpful assistant.",
31 | model: str = "gpt-3.5-turbo",
32 | ) -> Dict:
33 | if isinstance(message, list):
34 | config = {
35 | "model": model,
36 | "max_tokens": max_tokens,
37 | "temperature": temperature,
38 | "n": batch_size,
39 | "messages": [{"role": "system", "content": system_message}] + message,
40 | }
41 | else:
42 | config = {
43 | "model": model,
44 | "max_tokens": max_tokens,
45 | "temperature": temperature,
46 | "n": batch_size,
47 | "messages": [
48 | {"role": "system", "content": system_message},
49 | {"role": "user", "content": message},
50 | ],
51 | }
52 | return config
53 |
54 |
55 | def handler(signum, frame):
56 | # swallow signum and frame
57 | raise Exception("end of time")
58 |
59 |
60 | def request_chatgpt_engine(config):
61 | ret = None
62 | while ret is None:
63 | try:
64 | signal.signal(signal.SIGALRM, handler)
65 | signal.alarm(100)
66 | ret = client.chat.completions.create(**config)
67 | signal.alarm(0)
68 | except openai._exceptions.BadRequestError as e:
69 | print(e)
70 | signal.alarm(0)
71 | except openai._exceptions.RateLimitError as e:
72 | print("Rate limit exceeded. Waiting...")
73 | print(e)
74 | signal.alarm(0)
75 | time.sleep(5)
76 | except openai._exceptions.APIConnectionError as e:
77 | print("API connection error. Waiting...")
78 | signal.alarm(0)
79 | time.sleep(5)
80 | except Exception as e:
81 | print("Unknown error. Waiting...")
82 | print(e)
83 | signal.alarm(0)
84 | time.sleep(1)
85 | return ret
86 |
87 |
88 | def create_anthropic_config(
89 | message: str,
90 | prefill_message: str,
91 | max_tokens: int,
92 | temperature: float = 1,
93 | batch_size: int = 1,
94 | system_message: str = "You are a helpful assistant.",
95 | model: str = "claude-2.1",
96 | ) -> Dict:
97 | if isinstance(message, list):
98 | config = {
99 | "model": model,
100 | "temperature": temperature,
101 | "max_tokens": max_tokens,
102 | "system": system_message,
103 | "messages": message,
104 | }
105 | else:
106 | config = {
107 | "model": model,
108 | "temperature": temperature,
109 | "max_tokens": max_tokens,
110 | "system": system_message,
111 | "messages": [
112 | {"role": "user", "content": message},
113 | {"role": "assistant", "content": prefill_message},
114 | ],
115 | }
116 | return config
117 |
118 |
119 | def request_anthropic_engine(client, config):
120 | ret = None
121 | while ret is None:
122 | try:
123 | signal.signal(signal.SIGALRM, handler)
124 | signal.alarm(100)
125 | ret = client.messages.create(**config)
126 | signal.alarm(0)
127 | except Exception as e:
128 | print("Unknown error. Waiting...")
129 | print(e)
130 | signal.alarm(0)
131 | time.sleep(10)
132 | return ret
133 |
--------------------------------------------------------------------------------
/SWE-agent/tests/test_data/data_sources/human_eval.json:
--------------------------------------------------------------------------------
1 | [{"instance_id": "swe-bench__humaneval-30", "problem_statement": "I have a function that needs implementing, can you help?", "created_at": "2023110716", "version": "1.0", "test_patch": "diff --git a/test.py b/test.py\nnew file mode 100644\nindex 0000000..52ecda2\n--- /dev/null\n+++ b/test.py\n@@ -0,0 +1,13 @@\n+from main import get_positive\n+\n+\n+METADATA = {}\n+\n+\n+def check(candidate):\n+ assert candidate([-1, -2, 4, 5, 6]) == [4, 5, 6]\n+ assert candidate([5, 3, -5, 2, 3, 3, 9, 0, 123, 1, -10]) == [5, 3, 2, 3, 3, 9, 123, 1]\n+ assert candidate([-1, -2]) == []\n+ assert candidate([]) == []\n+\n+check(get_positive)\n", "base_commit": "0880311", "base_commit_with_tests": "b2e380b", "environment_setup_commit": null, "hints_text": null, "repo": "swe-bench/humaneval", "FAIL_TO_PASS": "", "PASS_TO_PASS": ""}, {"instance_id": "swe-bench__humaneval-85", "problem_statement": "I have a function that needs implementing, can you help?", "created_at": "2023110716", "version": "1.0", "test_patch": "diff --git a/test.py b/test.py\nnew file mode 100644\nindex 0000000..13d6e1f\n--- /dev/null\n+++ b/test.py\n@@ -0,0 +1,12 @@\n+from main import add\n+def check(candidate):\n+\n+ # Check some simple cases\n+ assert candidate([4, 88]) == 88\n+ assert candidate([4, 5, 6, 7, 2, 122]) == 122\n+ assert candidate([4, 0, 6, 7]) == 0\n+ assert candidate([4, 4, 6, 8]) == 12\n+\n+ # Check some edge cases that are easy to work out by hand.\n+ \n+check(add)\n", "base_commit": "2de55bc", "base_commit_with_tests": "c8c997b", "environment_setup_commit": null, "hints_text": null, "repo": "swe-bench/humaneval", "FAIL_TO_PASS": "", "PASS_TO_PASS": ""}, {"instance_id": "swe-bench__humaneval-22", "problem_statement": "I have a function that needs implementing, can you help?", "created_at": "2023110716", "version": "1.0", "test_patch": "diff --git a/test.py b/test.py\nnew file mode 100644\nindex 0000000..d881459\n--- /dev/null\n+++ b/test.py\n@@ -0,0 +1,14 @@\n+from main import filter_integers\n+\n+\n+METADATA = {\n+ 'author': 'jt',\n+ 'dataset': 'test'\n+}\n+\n+\n+def check(candidate):\n+ assert candidate([]) == []\n+ assert candidate([4, {}, [], 23.2, 9, 'adasd']) == [4, 9]\n+ assert candidate([3, 'c', 3, 3, 'a', 'b']) == [3, 3, 3]\n+check(filter_integers)\n", "base_commit": "f0dbe5e", "base_commit_with_tests": "55cc474", "environment_setup_commit": null, "hints_text": null, "repo": "swe-bench/humaneval", "FAIL_TO_PASS": "", "PASS_TO_PASS": ""}, {"instance_id": "swe-bench__humaneval-104", "problem_statement": "I have a function that needs implementing, can you help?", "created_at": "2023110716", "version": "1.0", "test_patch": "diff --git a/test.py b/test.py\nnew file mode 100644\nindex 0000000..617da5a\n--- /dev/null\n+++ b/test.py\n@@ -0,0 +1,13 @@\n+from main import unique_digits\n+def check(candidate):\n+\n+ # Check some simple cases\n+ assert candidate([15, 33, 1422, 1]) == [1, 15, 33]\n+ assert candidate([152, 323, 1422, 10]) == []\n+ assert candidate([12345, 2033, 111, 151]) == [111, 151]\n+ assert candidate([135, 103, 31]) == [31, 135]\n+\n+ # Check some edge cases that are easy to work out by hand.\n+ assert True\n+\n+check(unique_digits)\n", "base_commit": "b52ee85", "base_commit_with_tests": "4a92a50", "environment_setup_commit": null, "hints_text": null, "repo": "swe-bench/humaneval", "FAIL_TO_PASS": "", "PASS_TO_PASS": ""}, {"instance_id": "swe-bench__humaneval-0", "problem_statement": "I have a function that needs implementing, can you help?", "created_at": "2023110716", "version": "1.0", "test_patch": "diff --git a/test.py b/test.py\nnew file mode 100644\nindex 0000000..2d57340\n--- /dev/null\n+++ b/test.py\n@@ -0,0 +1,19 @@\n+from main import has_close_elements\n+\n+\n+METADATA = {\n+ 'author': 'jt',\n+ 'dataset': 'test'\n+}\n+\n+\n+def check(candidate):\n+ assert candidate([1.0, 2.0, 3.9, 4.0, 5.0, 2.2], 0.3) == True\n+ assert candidate([1.0, 2.0, 3.9, 4.0, 5.0, 2.2], 0.05) == False\n+ assert candidate([1.0, 2.0, 5.9, 4.0, 5.0], 0.95) == True\n+ assert candidate([1.0, 2.0, 5.9, 4.0, 5.0], 0.8) == False\n+ assert candidate([1.0, 2.0, 3.0, 4.0, 5.0, 2.0], 0.1) == True\n+ assert candidate([1.1, 2.2, 3.1, 4.1, 5.1], 1.0) == True\n+ assert candidate([1.1, 2.2, 3.1, 4.1, 5.1], 0.5) == False\n+\n+check(has_close_elements)\n", "base_commit": "afba737", "base_commit_with_tests": "c7e41b2", "environment_setup_commit": null, "hints_text": null, "repo": "swe-bench/humaneval", "FAIL_TO_PASS": "", "PASS_TO_PASS": ""}]
2 |
--------------------------------------------------------------------------------
/SWE-agent/sweagent/agent/history_processors.py:
--------------------------------------------------------------------------------
1 | import re
2 |
3 | from abc import abstractmethod
4 | from dataclasses import dataclass
5 |
6 |
7 | class FormatError(Exception):
8 | pass
9 |
10 | # ABSTRACT BASE CLASSES
11 |
12 | class HistoryProcessorMeta(type):
13 | _registry = {}
14 |
15 | def __new__(cls, name, bases, attrs):
16 | new_cls = super().__new__(cls, name, bases, attrs)
17 | if name != "HistoryProcessor":
18 | cls._registry[name] = new_cls
19 | return new_cls
20 |
21 |
22 | @dataclass
23 | class HistoryProcessor(metaclass=HistoryProcessorMeta):
24 | def __init__(self, *args, **kwargs):
25 | pass
26 |
27 | @abstractmethod
28 | def __call__(self, history: list[str]) -> list[str]:
29 | raise NotImplementedError
30 |
31 | @classmethod
32 | def get(cls, name, *args, **kwargs):
33 | try:
34 | return cls._registry[name](*args, **kwargs)
35 | except KeyError:
36 | raise ValueError(f"Model output parser ({name}) not found.")
37 |
38 |
39 | # DEFINE NEW PARSING FUNCTIONS BELOW THIS LINE
40 | class DefaultHistoryProcessor(HistoryProcessor):
41 | def __call__(self, history):
42 | return history
43 |
44 |
45 | def last_n_history(history, n):
46 | if n <= 0:
47 | raise ValueError('n must be a positive integer')
48 | new_history = list()
49 | user_messages = len([entry for entry in history if (entry['role'] == 'user' and not entry.get('is_demo', False))])
50 | user_msg_idx = 0
51 | for entry in history:
52 | data = entry.copy()
53 | if data['role'] != 'user':
54 | new_history.append(entry)
55 | continue
56 | if data.get('is_demo', False):
57 | new_history.append(entry)
58 | continue
59 | else:
60 | user_msg_idx += 1
61 | if user_msg_idx == 1 or user_msg_idx in range(user_messages - n + 1, user_messages + 1):
62 | new_history.append(entry)
63 | else:
64 | data['content'] = f'Old output omitted ({len(entry["content"].splitlines())} lines)'
65 | new_history.append(data)
66 | return new_history
67 |
68 |
69 | class LastNObservations(HistoryProcessor):
70 | def __init__(self, n):
71 | self.n = n
72 |
73 | def __call__(self, history):
74 | return last_n_history(history, self.n)
75 |
76 |
77 | class Last2Observations(HistoryProcessor):
78 | def __call__(self, history):
79 | return last_n_history(history, 2)
80 |
81 |
82 | class Last5Observations(HistoryProcessor):
83 | def __call__(self, history):
84 | return last_n_history(history, 5)
85 |
86 |
87 | class ClosedWindowHistoryProcessor(HistoryProcessor):
88 | pattern = re.compile(r'^(\d+)\:.*?(\n|$)', re.MULTILINE)
89 | file_pattern = re.compile(r'\[File:\s+(.*)\s+\(\d+\s+lines\ total\)\]')
90 |
91 | def __call__(self, history):
92 | new_history = list()
93 | # For each value in history, keep track of which windows have been shown.
94 | # We want to mark windows that should stay open (they're the last window for a particular file)
95 | # Then we'll replace all other windows with a simple summary of the window (i.e. number of lines)
96 | windows = set()
97 | for entry in reversed(history):
98 | data = entry.copy()
99 | if data['role'] != 'user':
100 | new_history.append(entry)
101 | continue
102 | if data.get('is_demo', False):
103 | new_history.append(entry)
104 | continue
105 | matches = list(self.pattern.finditer(entry['content']))
106 | if len(matches) >= 1:
107 | file_match = self.file_pattern.search(entry['content'])
108 | if file_match:
109 | file = file_match.group(1)
110 | else:
111 | continue
112 | if file in windows:
113 | start = matches[0].start()
114 | end = matches[-1].end()
115 | data['content'] = (
116 | entry['content'][:start] +\
117 | f'Outdated window with {len(matches)} lines omitted...\n' +\
118 | entry['content'][end:]
119 | )
120 | windows.add(file)
121 | new_history.append(data)
122 | history = list(reversed(new_history))
123 | return history
124 |
--------------------------------------------------------------------------------
/SWE-agent/run_replay.py:
--------------------------------------------------------------------------------
1 | """Replay a trajectory"""
2 |
3 | import json
4 | import os
5 | import yaml
6 |
7 | from argparse import ArgumentParser
8 | from typing import Any, Dict, List
9 | import run as runscript
10 |
11 |
12 | def process_single_traj(traj_path: str, config_file: str, data_path: str, suffix: str, *, forward_args: List[str]):
13 | """
14 |
15 | Args:
16 | traj_path (str): _description_
17 | config_file (str): _description_
18 | data_path (str): _description_
19 | suffix (str): _description_
20 | forward_args (List[str]): Passed to run.py
21 |
22 | Raises:
23 | ValueError: Incorrect paths or other config issue
24 |
25 | Returns:
26 | None
27 | """
28 | replay_action_trajs_path = "temp_replay.jsonl"
29 |
30 | # Open trajectory file, extract responses as actions
31 | if traj_path.endswith(".yaml"):
32 | traj_data = dict()
33 | with open(traj_path, "r") as f:
34 | traj_data["history"] = yaml.safe_load(f)
35 | else:
36 | traj_data = json.load(open(traj_path, "r"))
37 | actions = [x["content"] for x in traj_data["history"] if x["role"] == "assistant"]
38 | instance_id = traj_path.split("/")[-1].split(".")[0]
39 | with open(replay_action_trajs_path, "w") as f:
40 | print(
41 | json.dumps({instance_id: actions}),
42 | file=f,
43 | end="\n",
44 | flush=True
45 | )
46 |
47 | # Get data_path from args.yaml
48 | if data_path is None:
49 | args_path = os.path.join(
50 | os.path.dirname(traj_path),
51 | "args.yaml"
52 | )
53 | args = yaml.safe_load(open(args_path))
54 | data_path = args['environment']['data_path']
55 |
56 | # Identify the relevant task instance and create it
57 | def create_task_instances_tmp_file(data: List[Dict[str, Any]]) -> str:
58 | """Helper function to create a temporary file to write task instances to.
59 | Returns path to the temporary file.
60 | """
61 | data = [d for d in data if d["instance_id"] == instance_id]
62 | tmp_path = instance_id + ".jsonl"
63 | with open(tmp_path, "w") as f:
64 | for d in data:
65 | print(json.dumps(d), file=f, end="\n", flush=True)
66 | return tmp_path
67 |
68 | is_other = False
69 | if data_path.endswith(".jsonl"):
70 | replay_task_instances_path = create_task_instances_tmp_file([json.loads(x) for x in open(data_path, "r").readlines()])
71 | elif data_path.endswith(".json"):
72 | replay_task_instances_path = create_task_instances_tmp_file(json.load(open(data_path)))
73 | else:
74 | # Assume data_path is a github url or local url
75 | is_other = True
76 | replay_task_instances_path = data_path
77 |
78 | # Call run.py via subprocess
79 | run_args = [
80 | "--config_file", config_file,
81 | "--data_path", replay_task_instances_path,
82 | "--install_environment", "True",
83 | "--model_name", "replay",
84 | "--replay_path", replay_action_trajs_path,
85 | *forward_args,
86 | ]
87 | if is_other:
88 | # Not sure if this only applies to github urls for data_path
89 | run_args.extend(["--skip_existing", "False"])
90 | if suffix is not None:
91 | run_args.extend(["--suffix", suffix])
92 | script_args = runscript.get_args(run_args)
93 | runscript.main(script_args)
94 |
95 | os.remove(replay_action_trajs_path)
96 | if not is_other:
97 | os.remove(replay_task_instances_path)
98 |
99 | def main(
100 | traj_path: str,
101 | config_file: str,
102 | data_path: str,
103 | suffix: str,
104 | *,
105 | forward_args: List[str],
106 | ):
107 | process_single_traj(traj_path, config_file, data_path, suffix, forward_args=forward_args)
108 |
109 |
110 | def get_args(args=None):
111 | parser = ArgumentParser(description=__doc__)
112 | parser.add_argument("--traj_path", help="Path to trajectory to replay", default=None)
113 | parser.add_argument("--config_file", help="Path to template", required=True)
114 | parser.add_argument("--data_path", help="(Optional) Path to data file containing task instances ref'ed by replay trajectories", default=None)
115 | parser.add_argument("--suffix", help="(Optional) Suffix argument appended to end of traj path", default=None)
116 | args, remaining_args = parser.parse_known_args(args=args)
117 | return args, remaining_args
118 |
119 |
120 | if __name__ == "__main__":
121 | args, remaining_args = get_args()
122 | main(**vars(args), forward_args=remaining_args)
123 |
--------------------------------------------------------------------------------
/SWE-agent/config/commands/cursors_edit_linting.sh:
--------------------------------------------------------------------------------
1 | # @yaml
2 | # signature: |-
3 | # edit
4 | #
5 | # end_of_edit
6 | # docstring: replaces *all* of the text between the START CURSOR and the END CURSOR with the replacement_text. The replacement text is terminated by a line with only end_of_edit on it. All of the will be entered, so make sure your indentation is formatted properly. To enter text at the beginning of the file, set START CURSOR and END CURSOR to 0. Use set_cursors to move the cursors around. Python files will be checked for syntax errors after the edit.
7 | # end_name: end_of_edit
8 | # arguments:
9 | # replacement_text:
10 | # type: string
11 | # description: the text to replace the current selection with
12 | # required: true
13 | edit() {
14 | if [ -z "$CURRENT_FILE" ]
15 | then
16 | echo 'No file open. Use the `open` command first.'
17 | return
18 | fi
19 | local start_line=$((START_CURSOR - 1))
20 | start_line=$((start_line < 0 ? 0 : start_line))
21 | local end_line=$((END_CURSOR))
22 | end_line=$((end_line < 0 ? 0 : end_line))
23 |
24 | local replacement=()
25 | while IFS= read -r line
26 | do
27 | replacement+=("$line")
28 | done
29 |
30 | local num_lines=${#replacement[@]}
31 | # Create a backup of the current file
32 | cp "$CURRENT_FILE" "/root/$(basename "$CURRENT_FILE")_backup"
33 | # Read the file line by line into an array
34 | mapfile -t lines < "$CURRENT_FILE"
35 | local new_lines=("${lines[@]:0:$start_line}" "${replacement[@]}" "${lines[@]:$((end_line))}")
36 | # Write the new stuff directly back into the original file
37 | printf "%s\n" "${new_lines[@]}" >| "$CURRENT_FILE"
38 | # Run linter
39 | if [[ $CURRENT_FILE == *.py ]]; then
40 | lint_output=$(flake8 --isolated --select=F821,F822,F831,E111,E112,E113,E999,E902 "$CURRENT_FILE" 2>&1)
41 | else
42 | # do nothing
43 | lint_output=""
44 | fi
45 | # if there is no output, then the file is good
46 | if [ -z "$lint_output" ]; then
47 | _constrain_line
48 | # set to START + num_lines - 1, unless num_lines is 0, then set to START
49 | export END_CURSOR=$((num_lines == 0 ? START_CURSOR : START_CURSOR + num_lines - 1))
50 | export START_CURSOR=$START_CURSOR
51 | _print
52 | echo "File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary."
53 | else
54 | echo "Your proposed edit has introduced new syntax error(s). Please read this error message carefully and then retry editing the file."
55 | echo ""
56 | echo "ERRORS:"
57 | _split_string "$lint_output"
58 | echo ""
59 |
60 | # Save original values
61 | original_current_line=$CURRENT_LINE
62 | original_window=$WINDOW
63 | original_end_cursor=$END_CURSOR
64 |
65 | # Update values
66 | export CURRENT_LINE=$(( (num_lines / 2) + start_line )) # Set to "center" of edit
67 | export WINDOW=$((num_lines + 10)) # Show +/- 5 lines around edit
68 | export END_CURSOR=$((num_lines == 0 ? START_CURSOR : START_CURSOR + num_lines - 1))
69 |
70 | echo "This is how your edit would have looked if applied"
71 | echo "-------------------------------------------------"
72 | _constrain_line
73 | _print
74 | echo "-------------------------------------------------"
75 | echo ""
76 |
77 | # Restoring CURRENT_FILE to original contents.
78 | cp "/root/$(basename "$CURRENT_FILE")_backup" "$CURRENT_FILE"
79 |
80 | export CURRENT_LINE=$(( ((end_line - start_line) / 2) + start_line )) # Set to "center" of edit
81 | export WINDOW=$((end_line - start_line + 10))
82 | export END_CURSOR=$original_end_cursor
83 |
84 | echo "This is the original code before your edit"
85 | echo "-------------------------------------------------"
86 | _constrain_line
87 | _print
88 | echo "-------------------------------------------------"
89 |
90 | # Restore original values
91 | export CURRENT_LINE=$original_current_line
92 | export WINDOW=$original_window
93 | export END_CURSOR=$original_end_cursor
94 |
95 | echo "Your changes have NOT been applied. Please fix your edit command and try again."
96 | echo "You either need to 1) Specify the correct start/end line arguments or 2) Correct your edit code."
97 | echo "DO NOT re-run the same failed edit command. Running it again will lead to the same error."
98 | fi
99 | # Remove backup file
100 | rm -f "/root/$(basename "$CURRENT_FILE")_backup"
101 | }
102 |
--------------------------------------------------------------------------------
/SWE-agent/config/README.md:
--------------------------------------------------------------------------------
1 | # Configuration
2 |
3 | This folder contains details describing how to write your own configurations to control how agents can interact with the `SWEEnv` environment.
4 | A configuration is represented as a single `.yaml` file, allowing you to...
5 | * Define the **commands** that agents may use to traverse + modify a codebase.
6 | * Write **prompts** that are deterministically/conditionally shown to the agent over the course of a single trajectory.
7 | * Control the **input/output interface** that sits between the agent and `SWEEnv`.
8 |
9 | ## Configuration File Fields
10 | The configuration is a `.yaml` file that consists of several fields. They are fully represented in this following outline:
11 |
12 | ```yaml
13 | # Prompt Templates: Control how observations of environment are shown to agent
14 | system_template: | # .yaml syntax for multi-line string value
15 | First `system` message shown to agent
16 | instance_template: |- # .yaml syntax for multi-line string value w/ no new line
17 | Instance prompt, contains task instance-specific content
18 | next_step_template: |-
19 | Format template of per-turn observation (Contains standard output from agent's action)
20 | next_step_no_output_template: |-
21 | Format template of observation when there is no standard output from the agent's action
22 | format_error_template: |-
23 | Format template of error message (Used when agent's action causes an error)
24 | demonstration_template: |
25 | Format template for showing a demonstration to the agent
26 | demonstrations:
27 | - `trajectories///*.traj`
28 | - File is a demonstration of how to solve a task. This could an agent generated trajectory.
29 | - You can include 1+ demonstrations
30 |
31 | # Environment States: Define features of the SWEEnv environment
32 | env_variables:
33 | # Default variables for SWEEnv at the beginning of each instance
34 | CURRENT_FILE: 0
35 | CURRENT_LINE:
36 | OVERLAP:
37 | SEARCH_FILES:
38 | SEARCH_INDEX:
39 | SEARCH_RESULTS:
40 | WINDOW_SIZE:
41 | START_INDEX:
42 | END_INDEX:
43 | START_CURSOR:
44 | END_CUROSR:
45 | START_CURSORS_MARK:
46 | END_CURSOR_MARK:
47 | state_command: |
48 | # `state_command` allows you to update state variables to reflect any aspect of the environment (e.g. current working directory)
49 | name: state
50 | code: |
51 | state() { echo '{"pwd": "'$PWD'"}';
52 |
53 | # Action Interface: Define how an agent interacts with the SWEEnv environment
54 | command_files:
55 | - path/to/bash_file.sh
56 | - Each file contains a list of commands implemented in bash
57 | - You can include 1+ command files
58 | parse_command: Reference to functionality for defining command documentation
59 | history_processor: Reference to functionality for controlling agent's message history
60 | parse_function: Parser run on agent output
61 | ```
62 |
63 | In this directory, we recommend looking at...
64 | * `configs/` for examples of properly formatted configuration files. Each configuration differs in its set of commands, input/output format, demonstrations, etc.
65 | * `commands/` for the bash implementations of the custom commands that SWE-agent uses to navigate + edit the codebase.
66 |
67 | ## How a Configuration File is Processed
68 | Some notes on processing that occurs on config fields when SWE-agent is run:
69 | * Commands specified in `command_files` will be parsed into a single block of documentation text that can be referenced as `{command_docs}`.
70 | * `env_variables` are the default variables for the bash environment at the beginning of each instance.
71 | * `state_command` is used to extract state information from the bash environment (formatted as json) to be used in the templates given to the agent.
72 |
73 | Possible variables that can be used in templates are:
74 | - `{command_docs}` (an automatically compiled collection of available commands + their docstrings)
75 | - any variable given in `env_variables` (same spelling), e.g., `{WINDOW_SIZE}`
76 | - any variable extracted as json as part of the `state_command` function
77 | - the last observation `{observation}`
78 | - ... this list will grow as we implement more features!
79 |
80 | ## Template Workflow
81 | The following diagram illustrates where each template is shown within a single episode of solving one task instance.
82 |
83 |
84 |
85 |
86 |
87 | One of three templates can be shown per turn:
88 | * "Next Step" (`next_step_template`): Displayed if the model's action successfully runs. The output and a prompt for the next action is shown
89 | * "Next Step (No Output)" (`next_step_no_output_template`): Displayed if the model's action successfully runs, but does not produce any standard output (e.g. `rm`, `cd`)
90 | * "Format Error" (`format_error_template`): Displayed if the model's response is malformed. Over the next two turns...
91 | * If one of the model's next response is correct, the message history is updated such that the "Format Error" turn is not kept. The episode continues.
92 | * If the model's next two responses are both malformed, the episode terminates.
93 |
--------------------------------------------------------------------------------
/SWE-agent/tests/test_replay.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 | import subprocess
3 | from run_replay import get_args, main
4 | import pytest
5 |
6 |
7 | @pytest.fixture
8 | def swe_agent_test_repo_clone(tmp_path):
9 | local_repo_path = tmp_path / "swe-agent-test-repo"
10 | clone_cmd = ["git", "clone", "https://github.com/klieret/swe-agent-test-repo", local_repo_path]
11 | subprocess.run(clone_cmd, check=True)
12 | return local_repo_path
13 |
14 |
15 | @pytest.fixture
16 | def swe_agent_test_repo_traj(test_trajectories_path) -> Path:
17 | p = test_trajectories_path / "gpt4__swe-agent-test-repo__default_from_url__t-0.00__p-0.95__c-3.00__install-1" / "6e44b9__sweagenttestrepo-1c2844.traj"
18 | assert p.is_file()
19 | return p
20 |
21 |
22 | @pytest.fixture
23 | def swe_agent_test_repo_local_problem_stmt(swe_agent_test_repo_clone) -> Path:
24 | problem_stmt = swe_agent_test_repo_clone / "problem_statements" / "1.md"
25 | assert problem_stmt.is_file()
26 | return problem_stmt
27 |
28 |
29 | @pytest.mark.slow
30 | @pytest.mark.parametrize("problem_statement_source", ["github", "local"])
31 | def test_model_replay_github_repo(swe_agent_test_repo_traj, problem_statement_source, swe_agent_test_repo_local_problem_stmt):
32 | if problem_statement_source == "github":
33 | data_path = "https://github.com/klieret/swe-agent-test-repo/issues/1"
34 | elif problem_statement_source == "local":
35 | data_path = str(swe_agent_test_repo_local_problem_stmt)
36 | args = [
37 | "--traj_path",
38 | str(swe_agent_test_repo_traj),
39 | "--data_path",
40 | data_path,
41 | "--config_file",
42 | "config/default_from_url.yaml",
43 | "--raise_exceptions",
44 | ]
45 | if problem_statement_source == "local":
46 | args.extend(["--repo_path", str("https://github.com/klieret/swe-agent-test-repo/")])
47 | args, remaining_args = get_args(args)
48 | main(**vars(args), forward_args=remaining_args)
49 |
50 |
51 | @pytest.mark.slow
52 | def test_model_replay_from_json(test_trajectories_path, test_data_sources_path):
53 | traj_path = test_trajectories_path / "gpt4__swe-bench-dev-easy_first_only__default__t-0.00__p-0.95__c-3.00__install-1" / "pydicom__pydicom-1458.traj"
54 | assert traj_path.is_file()
55 | data_path = test_data_sources_path / "swe-bench-dev-easy_first_only.json"
56 | assert data_path.is_file()
57 | args = [
58 | "--traj_path",
59 | str(traj_path),
60 | "--data_path",
61 | str(data_path),
62 | "--config_file",
63 | "config/default.yaml",
64 | "--raise_exceptions",
65 | ]
66 | args, remaining_args = get_args(args)
67 | main(**vars(args), forward_args=remaining_args)
68 |
69 |
70 |
71 |
72 | def test_run_cli_help():
73 | args = [
74 | "python",
75 | "run_replay.py",
76 | "--help",
77 | ]
78 | subprocess.run(args, check=True)
79 |
80 |
81 | @pytest.mark.slow
82 | @pytest.mark.parametrize("problem_statement_source", ["github", "local"])
83 | def test_model_replay_local_repo(swe_agent_test_repo_clone, swe_agent_test_repo_traj, problem_statement_source):
84 | local_repo_path = swe_agent_test_repo_clone
85 | if problem_statement_source == "github":
86 | problem_statement_path = "https://github.com/klieret/swe-agent-test-repo/issues/1"
87 | elif problem_statement_source == "local":
88 | problem_statement_path = local_repo_path / "problem_statements" / "1.md"
89 | assert problem_statement_path.is_file()
90 | else:
91 | raise ValueError(problem_statement_source)
92 | run_cmd = [
93 | "--traj_path",
94 | str(swe_agent_test_repo_traj),
95 | "--repo_path",
96 | str(local_repo_path),
97 | "--config_file",
98 | "config/default_from_url.yaml",
99 | "--data_path",
100 | str(problem_statement_path),
101 | "--apply_patch",
102 | "--raise_exceptions",
103 | ]
104 | print(run_cmd)
105 | args, remaining_args = get_args(run_cmd)
106 | main(**vars(args), forward_args=remaining_args)
107 | solution = (swe_agent_test_repo_traj.parent / "solution_missing_colon.py").read_text().strip()
108 | solution_retrieved = (local_repo_path / "tests" / "missing_colon.py").read_text().strip()
109 | assert solution == solution_retrieved
110 |
111 |
112 | def test_exception_replay_local_dirty(swe_agent_test_repo_clone, swe_agent_test_repo_traj):
113 | """Test that swe-agent refuses to work if the local repo is dirty"""
114 | problem_statement_path = swe_agent_test_repo_clone / "problem_statements" / "1.md"
115 | test_file = swe_agent_test_repo_clone / "tests" / "missing_colon.py"
116 | assert test_file.is_file()
117 | test_file.write_text(test_file.read_text().replace("division", "division_function"))
118 | run_cmd = [
119 | "--traj_path",
120 | str(swe_agent_test_repo_traj),
121 | "--repo_path",
122 | str(swe_agent_test_repo_clone),
123 | "--config_file",
124 | "config/default_from_url.yaml",
125 | "--data_path",
126 | str(problem_statement_path),
127 | "--apply_patch",
128 | "--raise_exceptions",
129 | ]
130 | args, remaining_args = get_args(run_cmd)
131 | with pytest.raises(ValueError, match=".*dirty.*"):
132 | main(**vars(args), forward_args=remaining_args)
--------------------------------------------------------------------------------
/SWE-agent/config/commands/edit_linting.sh:
--------------------------------------------------------------------------------
1 | # @yaml
2 | # signature: |-
3 | # edit :
4 | #
5 | # end_of_edit
6 | # docstring: replaces lines through (inclusive) with the given text in the open file. The replacement text is terminated by a line with only end_of_edit on it. All of the will be entered, so make sure your indentation is formatted properly. Python files will be checked for syntax errors after the edit. If the system detects a syntax error, the edit will not be executed. Simply try to edit the file again, but make sure to read the error message and modify the edit command you issue accordingly. Issuing the same command a second time will just lead to the same error message again.
7 | # end_name: end_of_edit
8 | # arguments:
9 | # start_line:
10 | # type: integer
11 | # description: the line number to start the edit at
12 | # required: true
13 | # end_line:
14 | # type: integer
15 | # description: the line number to end the edit at (inclusive)
16 | # required: true
17 | # replacement_text:
18 | # type: string
19 | # description: the text to replace the current selection with
20 | # required: true
21 | edit() {
22 | if [ -z "$CURRENT_FILE" ]
23 | then
24 | echo 'No file open. Use the `open` command first.'
25 | return
26 | fi
27 |
28 | local start_line="$(echo $1: | cut -d: -f1)"
29 | local end_line="$(echo $1: | cut -d: -f2)"
30 |
31 | if [ -z "$start_line" ] || [ -z "$end_line" ]
32 | then
33 | echo "Usage: edit :"
34 | return
35 | fi
36 |
37 | local re='^[0-9]+$'
38 | if ! [[ $start_line =~ $re ]]; then
39 | echo "Usage: edit :"
40 | echo "Error: start_line must be a number"
41 | return
42 | fi
43 | if ! [[ $end_line =~ $re ]]; then
44 | echo "Usage: edit :"
45 | echo "Error: end_line must be a number"
46 | return
47 | fi
48 |
49 | # Bash array starts at 0, so let's adjust
50 | local start_line=$((start_line - 1))
51 | local end_line=$((end_line))
52 |
53 | local line_count=0
54 | local replacement=()
55 | while IFS= read -r line
56 | do
57 | replacement+=("$line")
58 | ((line_count++))
59 | done
60 |
61 | # Create a backup of the current file
62 | cp "$CURRENT_FILE" "/root/$(basename "$CURRENT_FILE")_backup"
63 |
64 | # Read the file line by line into an array
65 | mapfile -t lines < "$CURRENT_FILE"
66 | local new_lines=("${lines[@]:0:$start_line}" "${replacement[@]}" "${lines[@]:$((end_line))}")
67 | # Write the new stuff directly back into the original file
68 | printf "%s\n" "${new_lines[@]}" >| "$CURRENT_FILE"
69 |
70 | # Run linter
71 | if [[ $CURRENT_FILE == *.py ]]; then
72 | lint_output=$(flake8 --isolated --select=F821,F822,F831,E111,E112,E113,E999,E902 "$CURRENT_FILE" 2>&1)
73 | else
74 | # do nothing
75 | lint_output=""
76 | fi
77 |
78 | # if there is no output, then the file is good
79 | if [ -z "$lint_output" ]; then
80 | export CURRENT_LINE=$start_line
81 | _constrain_line
82 | _print
83 |
84 | echo "File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary."
85 | else
86 | echo "Your proposed edit has introduced new syntax error(s). Please read this error message carefully and then retry editing the file."
87 | echo ""
88 | echo "ERRORS:"
89 | _split_string "$lint_output"
90 | echo ""
91 |
92 | # Save original values
93 | original_current_line=$CURRENT_LINE
94 | original_window=$WINDOW
95 |
96 | # Update values
97 | export CURRENT_LINE=$(( (line_count / 2) + start_line )) # Set to "center" of edit
98 | export WINDOW=$((line_count + 10)) # Show +/- 5 lines around edit
99 |
100 | echo "This is how your edit would have looked if applied"
101 | echo "-------------------------------------------------"
102 | _constrain_line
103 | _print
104 | echo "-------------------------------------------------"
105 | echo ""
106 |
107 | # Restoring CURRENT_FILE to original contents.
108 | cp "/root/$(basename "$CURRENT_FILE")_backup" "$CURRENT_FILE"
109 |
110 | export CURRENT_LINE=$(( ((end_line - start_line + 1) / 2) + start_line ))
111 | export WINDOW=$((end_line - start_line + 10))
112 |
113 | echo "This is the original code before your edit"
114 | echo "-------------------------------------------------"
115 | _constrain_line
116 | _print
117 | echo "-------------------------------------------------"
118 |
119 | # Restore original values
120 | export CURRENT_LINE=$original_current_line
121 | export WINDOW=$original_window
122 |
123 | echo "Your changes have NOT been applied. Please fix your edit command and try again."
124 | echo "You either need to 1) Specify the correct start/end line arguments or 2) Correct your edit code."
125 | echo "DO NOT re-run the same failed edit command. Running it again will lead to the same error."
126 | fi
127 |
128 | # Remove backup file
129 | rm -f "/root/$(basename "$CURRENT_FILE")_backup"
130 | }
131 |
--------------------------------------------------------------------------------
/SWE-agent/tests/test_run.py:
--------------------------------------------------------------------------------
1 | import json
2 | from pathlib import Path
3 | import subprocess
4 | from typing import Any, Dict
5 | import pytest
6 |
7 | from run import ActionsArguments, Main, MainHook, OpenPRHook, ScriptArguments
8 | from sweagent.agent.agents import Agent, AgentArguments, AgentHook
9 | from sweagent.agent.models import ModelArguments
10 | from sweagent.environment.swe_env import EnvironmentArguments, SWEEnv
11 |
12 | def test_run_cli_help():
13 | args = [
14 | "python",
15 | "run.py",
16 | "--help",
17 | ]
18 | subprocess.run(args, check=True)
19 |
20 |
21 |
22 | @pytest.fixture
23 | def open_pr_hook_init_for_sop():
24 | hook = OpenPRHook()
25 | hook._token = ""
26 | hook._env = None
27 | hook._data_path = "https://github.com/klieret/swe-agent-test-repo/issues/1"
28 | hook._open_pr = True
29 | hook._skip_if_commits_reference_issue = True
30 | return hook
31 |
32 |
33 | @pytest.fixture
34 | def info_dict():
35 | return {
36 | "submission": "asdf",
37 | "exit_status": "submitted",
38 | }
39 |
40 |
41 | def test_should_open_pr_fail_submission(open_pr_hook_init_for_sop, info_dict):
42 | hook = open_pr_hook_init_for_sop
43 | info_dict["submission"] = None
44 | assert not hook.should_open_pr(info_dict)
45 |
46 |
47 | def test_should_open_pr_fail_exit(open_pr_hook_init_for_sop, info_dict):
48 | hook = open_pr_hook_init_for_sop
49 | info_dict["exit_status"] = "fail"
50 | assert not hook.should_open_pr(info_dict)
51 |
52 |
53 | def test_should_open_pr_fail_invalid_url(open_pr_hook_init_for_sop, info_dict):
54 | hook = open_pr_hook_init_for_sop
55 | hook._data_path = "asdf"
56 | assert not hook.should_open_pr(info_dict)
57 |
58 |
59 | def test_should_open_pr_fail_closed(open_pr_hook_init_for_sop, info_dict):
60 | hook = open_pr_hook_init_for_sop
61 | hook._data_path = "https://github.com/klieret/swe-agent-test-repo/issues/16"
62 | assert not hook.should_open_pr(info_dict)
63 |
64 |
65 | def test_should_open_pr_fail_assigned(open_pr_hook_init_for_sop, info_dict):
66 | hook = open_pr_hook_init_for_sop
67 | hook._data_path = "https://github.com/klieret/swe-agent-test-repo/issues/17"
68 | assert not hook.should_open_pr(info_dict)
69 |
70 |
71 | def test_should_open_pr_fail_locked(open_pr_hook_init_for_sop, info_dict):
72 | hook = open_pr_hook_init_for_sop
73 | hook._data_path = "https://github.com/klieret/swe-agent-test-repo/issues/18"
74 | assert not hook.should_open_pr(info_dict)
75 |
76 | def test_should_open_pr_fail_has_pr(open_pr_hook_init_for_sop, info_dict):
77 | hook = open_pr_hook_init_for_sop
78 | hook._data_path = "https://github.com/klieret/swe-agent-test-repo/issues/19"
79 | assert not hook.should_open_pr(info_dict)
80 |
81 |
82 | def test_should_open_pr_success_has_pr_override(open_pr_hook_init_for_sop, info_dict):
83 | hook = open_pr_hook_init_for_sop
84 | hook._data_path = "https://github.com/klieret/swe-agent-test-repo/issues/19"
85 | hook._skip_if_commits_reference_issue = False
86 | assert hook.should_open_pr(info_dict)
87 |
88 |
89 | class RaisesExceptionHook(MainHook):
90 | def on_instance_start(self, *, index: int, instance: Dict[str, Any]):
91 | raise ValueError("test exception")
92 |
93 | @pytest.fixture
94 | def test_script_args():
95 | return ScriptArguments(
96 | suffix="",
97 | environment=EnvironmentArguments(
98 | image_name="sweagent/swe-agent:latest",
99 | data_path="https://github.com/klieret/swe-agent-test-repo/issues/1",
100 | split="dev",
101 | verbose=True,
102 | install_environment=True,
103 | ),
104 | skip_existing=True,
105 | agent=AgentArguments(
106 | model=ModelArguments(
107 | model_name="instant_empty_submit",
108 | total_cost_limit=0.0,
109 | per_instance_cost_limit=3.0,
110 | temperature=0.0,
111 | top_p=0.95,
112 | ),
113 | config_file=Path("config/default.yaml"),
114 | ),
115 | actions=ActionsArguments(open_pr=False, skip_if_commits_reference_issue=True),
116 | raise_exceptions=True,
117 | )
118 |
119 |
120 | def test_exception_raised(test_script_args):
121 | assert test_script_args.raise_exceptions
122 | main = Main(test_script_args)
123 | main.add_hook(RaisesExceptionHook())
124 | with pytest.raises(ValueError, match="test exception"):
125 | main.main()
126 |
127 |
128 | class CreateFakeLogFile(MainHook):
129 | """Testing the skip functionality"""
130 | def on_init(self, *, args: ScriptArguments, agent: Agent, env: SWEEnv, traj_dir: Path):
131 | self._traj_dir = traj_dir
132 | (traj_dir / "args.yaml").write_text("asdf")
133 |
134 | def on_instance_start(self, *, index: int, instance: Dict[str, Any]):
135 | instance_id = instance["instance_id"]
136 | dct = {
137 | "info": {"exit_status": "submitted"},
138 | }
139 | (self._traj_dir / f"{instance_id}.traj").write_text(json.dumps(dct))
140 |
141 |
142 |
143 | def test_existing_corrupted_args(test_script_args):
144 | main = Main(test_script_args)
145 | main.add_hook(CreateFakeLogFile())
146 | main.main()
147 |
148 |
149 |
150 | def test_main_hook(test_script_args):
151 | main = Main(test_script_args)
152 | main.add_hook(MainHook())
153 | main.main()
154 |
155 |
156 | def test_agent_with_hook(test_script_args):
157 | main = Main(test_script_args)
158 | main.agent.add_hook(AgentHook())
159 | main.main()
--------------------------------------------------------------------------------
/SWE-agent/inspector/fileViewer.js:
--------------------------------------------------------------------------------
1 | let currentFileName = null; // Store the current file name
2 | let trajectoryDirectory = ""; // Global variable to store the directory
3 | let timeoutIds = []; // Store timeout IDs for pending operations
4 |
5 | function getBaseUrl() {
6 | const protocol = window.location.protocol;
7 | const host = window.location.hostname;
8 | const port = window.location.port;
9 |
10 | // Use the default port if the port number is empty (for standard HTTP/HTTPS)
11 | const defaultPort =
12 | protocol === "http:" && !port
13 | ? "80"
14 | : protocol === "https:" && !port
15 | ? "443"
16 | : port;
17 |
18 | return `${protocol}//${host}:${defaultPort}`;
19 | }
20 |
21 | function fetchFiles() {
22 | const baseUrl = getBaseUrl();
23 | fetch(`${baseUrl}/files`)
24 | .then((response) => response.json())
25 | .then((files) => {
26 | const fileList = document.getElementById("fileList");
27 | fileList.innerHTML = "";
28 | files.forEach((file) => {
29 | const fileElement = document.createElement("li");
30 | fileElement.textContent = file;
31 | fileElement.onclick = () => viewFile(file.split(" ")[0]);
32 | fileList.appendChild(fileElement);
33 | });
34 | });
35 | }
36 |
37 | function viewFile(fileName) {
38 | // Clear any pending message loading from previous files
39 | timeoutIds.forEach((timeoutId) => clearTimeout(timeoutId));
40 | timeoutIds = []; // Reset the list of timeout IDs
41 |
42 | const baseUrl = getBaseUrl();
43 | fetch(`${baseUrl}/trajectory/${fileName}`)
44 | .then((response) => {
45 | if (!response.ok) {
46 | throw new Error("Network response was not ok");
47 | }
48 | return response.json();
49 | })
50 | .then((content) => {
51 | const container = document.getElementById("fileContent");
52 | container.innerHTML = ""; // Clear existing content
53 |
54 | if (content.history && Array.isArray(content.history)) {
55 | let delay = 200; // Initial delay
56 | const delayIncrement = 50; // Delay between each message, in milliseconds
57 |
58 | content.history.forEach((item, index) => {
59 | const timeoutId = setTimeout(() => {
60 | const contentText = item.content
61 | ? item.content.replace(//g, ">")
62 | : "";
63 | let roleClass =
64 | item.agent && item.agent !== "primary"
65 | ? "subroutine"
66 | : item.role
67 | ? item.role.toLowerCase().replaceAll(" ", "-")
68 | : "default";
69 | const elementId = "historyItem" + index;
70 | const historyItem = document.createElement("div");
71 | historyItem.className = `history-item ${roleClass} fade-in`;
72 | historyItem.id = elementId;
73 | if (contentText.includes("--- DEMONSTRATION ---")) {
74 | item.role = "demo";
75 | } else if ("is_demo" in item && item.is_demo === true) {
76 | item.role += "[demo]";
77 | }
78 | historyItem.innerHTML = `
79 |
80 |
81 | ${item.role}
82 |
83 |
84 |
85 |
${contentText}
86 |
87 |
88 | `;
89 | container.appendChild(historyItem);
90 | }, delay);
91 |
92 | delay += delayIncrement; // Increment delay for the next message
93 | timeoutIds.push(timeoutId); // Store the timeout ID
94 | });
95 | } else {
96 | container.textContent = "No history content found.";
97 | }
98 | })
99 | .catch((error) => {
100 | console.error("Error fetching file:", error);
101 | document.getElementById("fileContent").textContent =
102 | "Error loading content. " + error;
103 | });
104 |
105 | // Highlight the selected file in the list
106 | document.querySelectorAll("#fileList li").forEach((li) => {
107 | li.classList.remove("selected");
108 | if (li.textContent.split(" ")[0] === fileName) {
109 | li.classList.add("selected");
110 | }
111 | });
112 | }
113 |
114 | function refreshCurrentFile() {
115 | if (currentFileName) {
116 | const currentScrollPosition =
117 | document.documentElement.scrollTop || document.body.scrollTop;
118 | viewFile(currentFileName.split(" ")[0]); // Reload the current file
119 | // Restore the scroll position after the content is loaded
120 | setTimeout(() => {
121 | window.scrollTo(0, currentScrollPosition);
122 | }, 100);
123 | }
124 | }
125 |
126 | function fetchDirectoryInfo() {
127 | const baseUrl = getBaseUrl();
128 | fetch(`${baseUrl}/directory_info`)
129 | .then((response) => response.json())
130 | .then((data) => {
131 | if (data.directory) {
132 | trajectoryDirectory = data.directory; // Store the directory
133 | document.title = `Trajectory Viewer: ${data.directory}`;
134 | document.querySelector("h1").textContent =
135 | `Trajectory Viewer: ${data.directory}`;
136 | }
137 | })
138 | .catch((error) => console.error("Error fetching directory info:", error));
139 | }
140 |
141 | window.onload = function () {
142 | fetchFiles();
143 | fetchDirectoryInfo();
144 | };
145 |
--------------------------------------------------------------------------------
/SWE-agent/sweagent/api/hooks.py:
--------------------------------------------------------------------------------
1 | from typing import Optional
2 | import sys
3 | import io
4 |
5 | from sweagent import PACKAGE_DIR
6 | from sweagent.agent.agents import AgentHook
7 | from flask_socketio import SocketIO
8 |
9 | from sweagent.api.utils import strip_ansi_sequences
10 | from sweagent.environment.swe_env import EnvHook
11 |
12 | # baaaaaaad
13 | sys.path.append(str(PACKAGE_DIR.parent))
14 | from run import MainHook
15 |
16 |
17 | class StreamToSocketIO(io.StringIO):
18 | def __init__(
19 | self,
20 | wu: "WebUpdate",
21 | ):
22 | super().__init__()
23 | self._wu = wu
24 |
25 | def write(self, message):
26 | message = strip_ansi_sequences(message)
27 | self._wu.up_log(message)
28 |
29 | def flush(self):
30 | pass
31 |
32 |
33 | class WebUpdate:
34 | """This class talks to socketio. It's pretty much a wrapper around socketio.emit."""
35 |
36 | def __init__(self, socketio: SocketIO):
37 | self._socketio = socketio
38 | self.log_stream = StreamToSocketIO(self)
39 |
40 | def _emit(self, event, data):
41 | """Directly wrap around socketio.emit"""
42 | self._socketio.emit(event, data)
43 |
44 | def up_log(self, message: str, level="info"):
45 | """Update the log"""
46 | self._emit("log_message", {"message": message, "level": level})
47 |
48 | def up_agent(
49 | self,
50 | message: str,
51 | *,
52 | format: str = "markdown",
53 | thought_idx: Optional[int] = None,
54 | type_: str = "info",
55 | ):
56 | """Update the agent feed"""
57 | self._emit(
58 | "update",
59 | {
60 | "feed": "agent",
61 | "message": message,
62 | "format": format,
63 | "thought_idx": thought_idx,
64 | "type": type_,
65 | },
66 | )
67 |
68 | def up_env(
69 | self,
70 | message: str,
71 | *,
72 | type_: str,
73 | format: str = "markdown",
74 | thought_idx: Optional[int] = None,
75 | ):
76 | """Update the environment feed"""
77 | self._emit(
78 | "update",
79 | {
80 | "feed": "env",
81 | "message": message,
82 | "format": format,
83 | "thought_idx": thought_idx,
84 | "type": type_,
85 | },
86 | )
87 |
88 | def finish_run(self):
89 | """Finish the run. We use that to control which buttons are active."""
90 | self._emit("finish_run", {})
91 |
92 |
93 | class MainUpdateHook(MainHook):
94 | def __init__(self, wu: WebUpdate):
95 | """This hooks into the Main class to update the web interface"""
96 | self._wu = wu
97 |
98 | def on_start(self):
99 | self._wu.up_env(
100 | message="Environment container initialized", format="text", type_="info"
101 | )
102 |
103 | def on_end(self):
104 | self._wu.up_agent(message="The run has ended", format="text")
105 | self._wu.finish_run()
106 |
107 | def on_instance_completed(self, *, info, trajectory):
108 | print(info.get("submission"))
109 | if info.get("submission") and info["exit_status"] == "submitted":
110 | msg = (
111 | "The submission was successful. You can find the patch (diff) in the right panel. "
112 | "To apply it to your code, run `git apply /path/to/patch/file.patch`. "
113 | )
114 | self._wu.up_agent(msg, type_="success")
115 |
116 |
117 | class AgentUpdateHook(AgentHook):
118 | def __init__(self, wu: WebUpdate):
119 | """This hooks into the Agent class to update the web interface"""
120 | self._wu = wu
121 | self._sub_action = None
122 | self._thought_idx = 0
123 |
124 | def on_actions_generated(self, *, thought: str, action: str, output: str):
125 | self._thought_idx += 1
126 | for prefix in ["DISCUSSION\n", "THOUGHT\n", "DISCUSSION", "THOUGHT"]:
127 | thought = thought.replace(prefix, "")
128 | self._wu.up_agent(
129 | message=thought,
130 | format="markdown",
131 | thought_idx=self._thought_idx,
132 | type_="thought",
133 | )
134 |
135 | def on_sub_action_started(self, *, sub_action: dict):
136 | # msg = f"```bash\n{sub_action['action']}\n```"
137 | msg = "$ " + sub_action["action"].strip()
138 | self._sub_action = sub_action["action"].strip()
139 | self._wu.up_env(message=msg, thought_idx=self._thought_idx, type_="command")
140 |
141 | def on_sub_action_executed(self, *, obs: str, done: bool):
142 | type_ = "output"
143 | if self._sub_action == "submit":
144 | type_ = "diff"
145 | if obs is None:
146 | # This can happen for empty patch submissions
147 | obs = ""
148 | msg = obs.strip()
149 | self._wu.up_env(message=msg, thought_idx=self._thought_idx, type_=type_)
150 |
151 |
152 | class EnvUpdateHook(EnvHook):
153 | def __init__(self, wu: WebUpdate):
154 | """This hooks into the environment class to update the web interface"""
155 | self._wu = wu
156 |
157 | def on_close(self):
158 | self._wu.up_env(message="Environment closed", format="text", type_="info")
159 |
160 | # def on_query_message_added(
161 | # self,
162 | # *,
163 | # role: str,
164 | # content: str,
165 | # agent: str,
166 | # is_demo: bool = False,
167 | # thought: str = "",
168 | # action: str = ""
169 | # ):
170 | # if role == "assistant":
171 | # return
172 | # if thought or action:
173 | # return
174 | # if is_demo:
175 | # return self._wu.up_agent(title="Demo", message=content, thought_idx=self._thought_idx + 1)
176 | # self._wu.up_agent(title="Query", message=content, thought_idx=self._thought_idx + 1)
177 |
--------------------------------------------------------------------------------
/SWE-agent/config/commands/search.sh:
--------------------------------------------------------------------------------
1 | # @yaml
2 | # signature: search_dir []
3 | # docstring: searches for search_term in all files in dir. If dir is not provided, searches in the current directory
4 | # arguments:
5 | # search_term:
6 | # type: string
7 | # description: the term to search for
8 | # required: true
9 | # dir:
10 | # type: string
11 | # description: the directory to search in (if not provided, searches in the current directory)
12 | # required: false
13 | search_dir() {
14 | if [ $# -eq 1 ]; then
15 | local search_term="$1"
16 | local dir="./"
17 | elif [ $# -eq 2 ]; then
18 | local search_term="$1"
19 | if [ -d "$2" ]; then
20 | local dir="$2"
21 | else
22 | echo "Directory $2 not found"
23 | return
24 | fi
25 | else
26 | echo "Usage: search_dir []"
27 | return
28 | fi
29 | dir=$(realpath "$dir")
30 | local matches=$(find "$dir" -type f ! -path '*/.*' -exec grep -nIH -- "$search_term" {} + | cut -d: -f1 | sort | uniq -c)
31 | # if no matches, return
32 | if [ -z "$matches" ]; then
33 | echo "No matches found for \"$search_term\" in $dir"
34 | return
35 | fi
36 | # Calculate total number of matches
37 | local num_matches=$(echo "$matches" | awk '{sum+=$1} END {print sum}')
38 | # calculate total number of files matched
39 | local num_files=$(echo "$matches" | wc -l | awk '{$1=$1; print $0}')
40 | # if num_files is > 100, print an error
41 | if [ $num_files -gt 100 ]; then
42 | echo "More than $num_files files matched for \"$search_term\" in $dir. Please narrow your search."
43 | return
44 | fi
45 |
46 | echo "Found $num_matches matches for \"$search_term\" in $dir:"
47 | echo "$matches" | awk '{$2=$2; gsub(/^\.+\/+/, "./", $2); print $2 " ("$1" matches)"}'
48 | echo "End of matches for \"$search_term\" in $dir"
49 | }
50 |
51 | # @yaml
52 | # signature: search_file []
53 | # docstring: searches for search_term in file. If file is not provided, searches in the current open file
54 | # arguments:
55 | # search_term:
56 | # type: string
57 | # description: the term to search for
58 | # required: true
59 | # file:
60 | # type: string
61 | # description: the file to search in (if not provided, searches in the current open file)
62 | # required: false
63 | search_file() {
64 | # Check if the first argument is provided
65 | if [ -z "$1" ]; then
66 | echo "Usage: search_file []"
67 | return
68 | fi
69 | # Check if the second argument is provided
70 | if [ -n "$2" ]; then
71 | # Check if the provided argument is a valid file
72 | if [ -f "$2" ]; then
73 | local file="$2" # Set file if valid
74 | else
75 | echo "Usage: search_file []"
76 | echo "Error: File name $2 not found. Please provide a valid file name."
77 | return # Exit if the file is not valid
78 | fi
79 | else
80 | # Check if a file is open
81 | if [ -z "$CURRENT_FILE" ]; then
82 | echo "No file open. Use the open command first."
83 | return # Exit if no file is open
84 | fi
85 | local file="$CURRENT_FILE" # Set file to the current open file
86 | fi
87 | local search_term="$1"
88 | file=$(realpath "$file")
89 | # Use grep to directly get the desired formatted output
90 | local matches=$(grep -nH -- "$search_term" "$file")
91 | # Check if no matches were found
92 | if [ -z "$matches" ]; then
93 | echo "No matches found for \"$search_term\" in $file"
94 | return
95 | fi
96 | # Calculate total number of matches
97 | local num_matches=$(echo "$matches" | wc -l | awk '{$1=$1; print $0}')
98 |
99 | # calculate total number of lines matched
100 | local num_lines=$(echo "$matches" | cut -d: -f1 | sort | uniq | wc -l | awk '{$1=$1; print $0}')
101 | # if num_lines is > 100, print an error
102 | if [ $num_lines -gt 100 ]; then
103 | echo "More than $num_lines lines matched for \"$search_term\" in $file. Please narrow your search."
104 | return
105 | fi
106 |
107 | # Print the total number of matches and the matches themselves
108 | echo "Found $num_matches matches for \"$search_term\" in $file:"
109 | echo "$matches" | cut -d: -f1-2 | sort -u -t: -k2,2n | while IFS=: read -r filename line_number; do
110 | echo "Line $line_number:$(sed -n "${line_number}p" "$file")"
111 | done
112 | echo "End of matches for \"$search_term\" in $file"
113 | }
114 |
115 | # @yaml
116 | # signature: find_file []
117 | # docstring: finds all files with the given name in dir. If dir is not provided, searches in the current directory
118 | # arguments:
119 | # file_name:
120 | # type: string
121 | # description: the name of the file to search for
122 | # required: true
123 | # dir:
124 | # type: string
125 | # description: the directory to search in (if not provided, searches in the current directory)
126 | # required: false
127 | find_file() {
128 | if [ $# -eq 1 ]; then
129 | local file_name="$1"
130 | local dir="./"
131 | elif [ $# -eq 2 ]; then
132 | local file_name="$1"
133 | if [ -d "$2" ]; then
134 | local dir="$2"
135 | else
136 | echo "Directory $2 not found"
137 | return
138 | fi
139 | else
140 | echo "Usage: find_file []"
141 | return
142 | fi
143 |
144 | dir=$(realpath "$dir")
145 | local matches=$(find "$dir" -type f -name "$file_name")
146 | # if no matches, return
147 | if [ -z "$matches" ]; then
148 | echo "No matches found for \"$file_name\" in $dir"
149 | return
150 | fi
151 | # Calculate total number of matches
152 | local num_matches=$(echo "$matches" | wc -l | awk '{$1=$1; print $0}')
153 | echo "Found $num_matches matches for \"$file_name\" in $dir:"
154 | echo "$matches" | awk '{print $0}'
155 | }
156 |
--------------------------------------------------------------------------------
/SWE-agent/config/configs/xml_sys-env_window100-detailed_cmd_format-last_5_history-1_demos.yaml:
--------------------------------------------------------------------------------
1 | system_template: |-
2 | SETTING: You are an autonomous programmer, and you're working directly in the command line with a special interface.
3 |
4 | The special interface consists of a file editor that shows you {WINDOW} lines of a file at a time.
5 | In addition to typical bash commands, you can also use the following commands to help you navigate and edit files.
6 |
7 | COMMANDS:
8 | {command_docs}
9 |
10 | Please note that THE EDIT COMMAND REQUIRES PROPER INDENTATION.
11 | If you'd like to add the line ' print(x)' you must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run.
12 |
13 | RESPONSE FORMAT:
14 | Your shell prompt is formatted as follows:
15 | (Open file: ) $
16 |
17 | You need to format your output using two fields; discussion and command.
18 | Your output should always include _one_ discussion and _one_ command field EXACTLY as in the following example:
19 | DISCUSSION
20 | First I'll start by using ls to see what files are in the current directory. Then maybe we can look at some relevant files to see what they look like.
21 |
22 | ls -a
23 |
24 |
25 | You should only include a *SINGLE* command in the command section and then wait for a response from the shell before continuing with more discussion and commands. Everything you include in the DISCUSSION section will be saved for future reference.
26 | If you'd like to issue two commands at once, PLEASE DO NOT DO THAT! Please instead first submit just the first command, and then after receiving a response you'll be able to issue the second command.
27 | You're free to use any other bash commands you want (e.g. find, grep, cat, ls, cd) in addition to the special commands listed above.
28 | However, the environment does NOT support interactive session commands (e.g. python, vim), so please do not invoke them.
29 | instance_template: |-
30 | We're currently solving the following issue within our repository. Here's the issue text:
31 | ISSUE:
32 | {issue}
33 |
34 | INSTRUCTIONS:
35 | Now, you're going to solve this issue on your own. Your terminal session has started and you're in the repository's root directory. You can use any bash commands or the special interface to help you. Edit all the files you need to and run any checks or tests that you want.
36 | Remember, YOU CAN ONLY ENTER ONE COMMAND AT A TIME. You should always wait for feedback after every command.
37 | When you're satisfied with all of the changes you've made, you can submit your changes to the code base by simply running the submit command.
38 | Note however that you cannot use any interactive session commands (e.g. python, vim) in this environment, but you can write scripts and run them. E.g. you can write a python script and then run it with `python .py`.
39 |
40 | NOTE ABOUT THE EDIT COMMAND: Indentation really matters! When editing a file, make sure to insert appropriate indentation before each line!
41 |
42 | IMPORTANT TIPS:
43 | 1. Always start by trying to replicate the bug that the issues discusses.
44 | If the issue includes code for reproducing the bug, we recommend that you re-implement that in your environment, and run it to make sure you can reproduce the bug.
45 | Then start trying to fix it.
46 | When you think you've fixed the bug, re-run the bug reproduction script to make sure that the bug has indeed been fixed.
47 |
48 | If the bug reproduction script does not print anything when it successfully runs, we recommend adding a print("Script completed successfully, no errors.") command at the end of the file,
49 | so that you can be sure that the script indeed ran fine all the way through.
50 |
51 | 2. If you run a command and it doesn't work, try running a different command. A command that did not work once will not work the second time unless you modify it!
52 |
53 | 3. If you open a file and need to get to an area around a specific line that is not in the first 100 lines, say line 583, don't just use the scroll_down command multiple times. Instead, use the goto 583 command. It's much quicker.
54 |
55 | 4. If the bug reproduction script requires inputting/reading a specific file, such as buggy-input.png, and you'd like to understand how to input that file, conduct a search in the existing repo code, to see whether someone else has already done that. Do this by running the command: find_file "buggy-input.png" If that doesn't work, use the linux 'find' command.
56 |
57 | 5. Always make sure to look at the currently open file and the current working directory (which appears right after the currently open file). The currently open file might be in a different directory than the working directory! Note that some commands, such as 'create', open files, so they might change the current open file.
58 |
59 | 6. When editing files, it is easy to accidentally specify a wrong line number or to write code with incorrect indentation. Always check the code after you issue an edit to make sure that it reflects what you wanted to accomplish. If it didn't, issue another command to fix it.
60 |
61 |
62 | (Open file: {open_file})
63 | (Current directory: {working_dir})
64 | bash-$
65 | next_step_template: |-
66 | {observation}
67 | (Open file: {open_file})
68 | (Current directory: {working_dir})
69 | bash-$
70 | next_step_no_output_template: |-
71 | Your command ran successfully and did not produce any output.
72 | (Open file: {open_file})
73 | (Current directory: {working_dir})
74 | bash-$
75 | put_demos_in_history: true
76 | state_command:
77 | name: state
78 | code: |
79 | state() {
80 | local working_dir="$PWD";
81 | if [ -z $CURRENT_FILE ]; then
82 | echo '{"open_file": "n/a", "working_dir": "'$working_dir'"}';
83 | else
84 | echo '{"open_file": "'$(realpath $CURRENT_FILE)'", "working_dir": "'$working_dir'"}';
85 | fi
86 | };
87 | parse_function: XMLThoughtActionParser
88 | env_variables:
89 | WINDOW: 100
90 | OVERLAP: 2
91 | CURRENT_LINE: 0
92 | CURRENT_FILE: ''
93 | SEARCH_RESULTS: ()
94 | SEARCH_FILES: ()
95 | SEARCH_INDEX: 0
96 | command_files:
97 | - config/commands/defaults.sh
98 | - config/commands/search.sh
99 | - config/commands/edit_linting.sh
100 | - config/commands/_split_string.py
101 | parse_command: ParseCommandDetailed
102 | history_processor: Last5Observations
103 | demonstrations:
104 | - trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__xml_sys-env_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj
105 |
--------------------------------------------------------------------------------
/SWE-agent/config/configs/xml_sys-env_window100-detailed_cmd_format-full_history-1_demos.yaml:
--------------------------------------------------------------------------------
1 | system_template: |-
2 | SETTING: You are an autonomous programmer, and you're working directly in the command line with a special interface.
3 |
4 | The special interface consists of a file editor that shows you {WINDOW} lines of a file at a time.
5 | In addition to typical bash commands, you can also use the following commands to help you navigate and edit files.
6 |
7 | COMMANDS:
8 | {command_docs}
9 |
10 | Please note that THE EDIT COMMAND REQUIRES PROPER INDENTATION.
11 | If you'd like to add the line ' print(x)' you must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run.
12 |
13 | RESPONSE FORMAT:
14 | Your shell prompt is formatted as follows:
15 | (Open file: ) $
16 |
17 | You need to format your output using two fields; discussion and command.
18 | Your output should always include _one_ discussion and _one_ command field EXACTLY as in the following example:
19 | DISCUSSION
20 | First I'll start by using ls to see what files are in the current directory. Then maybe we can look at some relevant files to see what they look like.
21 |
22 | ls -a
23 |
24 |
25 | You should only include a *SINGLE* command in the command section and then wait for a response from the shell before continuing with more discussion and commands. Everything you include in the DISCUSSION section will be saved for future reference.
26 | If you'd like to issue two commands at once, PLEASE DO NOT DO THAT! Please instead first submit just the first command, and then after receiving a response you'll be able to issue the second command.
27 | You're free to use any other bash commands you want (e.g. find, grep, cat, ls, cd) in addition to the special commands listed above.
28 | However, the environment does NOT support interactive session commands (e.g. python, vim), so please do not invoke them.
29 | instance_template: |-
30 | We're currently solving the following issue within our repository. Here's the issue text:
31 | ISSUE:
32 | {issue}
33 |
34 | INSTRUCTIONS:
35 | Now, you're going to solve this issue on your own. Your terminal session has started and you're in the repository's root directory. You can use any bash commands or the special interface to help you. Edit all the files you need to and run any checks or tests that you want.
36 | Remember, YOU CAN ONLY ENTER ONE COMMAND AT A TIME. You should always wait for feedback after every command.
37 | When you're satisfied with all of the changes you've made, you can submit your changes to the code base by simply running the submit command.
38 | Note however that you cannot use any interactive session commands (e.g. python, vim) in this environment, but you can write scripts and run them. E.g. you can write a python script and then run it with `python .py`.
39 |
40 | NOTE ABOUT THE EDIT COMMAND: Indentation really matters! When editing a file, make sure to insert appropriate indentation before each line!
41 |
42 | IMPORTANT TIPS:
43 | 1. Always start by trying to replicate the bug that the issues discusses.
44 | If the issue includes code for reproducing the bug, we recommend that you re-implement that in your environment, and run it to make sure you can reproduce the bug.
45 | Then start trying to fix it.
46 | When you think you've fixed the bug, re-run the bug reproduction script to make sure that the bug has indeed been fixed.
47 |
48 | If the bug reproduction script does not print anything when it successfully runs, we recommend adding a print("Script completed successfully, no errors.") command at the end of the file,
49 | so that you can be sure that the script indeed ran fine all the way through.
50 |
51 | 2. If you run a command and it doesn't work, try running a different command. A command that did not work once will not work the second time unless you modify it!
52 |
53 | 3. If you open a file and need to get to an area around a specific line that is not in the first 100 lines, say line 583, don't just use the scroll_down command multiple times. Instead, use the goto 583 command. It's much quicker.
54 |
55 | 4. If the bug reproduction script requires inputting/reading a specific file, such as buggy-input.png, and you'd like to understand how to input that file, conduct a search in the existing repo code, to see whether someone else has already done that. Do this by running the command: find_file "buggy-input.png" If that doesn't work, use the linux 'find' command.
56 |
57 | 5. Always make sure to look at the currently open file and the current working directory (which appears right after the currently open file). The currently open file might be in a different directory than the working directory! Note that some commands, such as 'create', open files, so they might change the current open file.
58 |
59 | 6. When editing files, it is easy to accidentally specify a wrong line number or to write code with incorrect indentation. Always check the code after you issue an edit to make sure that it reflects what you wanted to accomplish. If it didn't, issue another command to fix it.
60 |
61 |
62 | (Open file: {open_file})
63 | (Current directory: {working_dir})
64 | bash-$
65 | next_step_template: |-
66 | {observation}
67 | (Open file: {open_file})
68 | (Current directory: {working_dir})
69 | bash-$
70 | next_step_no_output_template: |-
71 | Your command ran successfully and did not produce any output.
72 | (Open file: {open_file})
73 | (Current directory: {working_dir})
74 | bash-$
75 | put_demos_in_history: true
76 | state_command:
77 | name: state
78 | code: |
79 | state() {
80 | local working_dir="$PWD";
81 | if [ -z $CURRENT_FILE ]; then
82 | echo '{"open_file": "n/a", "working_dir": "'$working_dir'"}';
83 | else
84 | echo '{"open_file": "'$(realpath $CURRENT_FILE)'", "working_dir": "'$working_dir'"}';
85 | fi
86 | };
87 | parse_function: XMLThoughtActionParser
88 | env_variables:
89 | WINDOW: 100
90 | OVERLAP: 2
91 | CURRENT_LINE: 0
92 | CURRENT_FILE: ''
93 | SEARCH_RESULTS: ()
94 | SEARCH_FILES: ()
95 | SEARCH_INDEX: 0
96 | command_files:
97 | - config/commands/defaults.sh
98 | - config/commands/search.sh
99 | - config/commands/edit_linting.sh
100 | - config/commands/_split_string.py
101 | parse_command: ParseCommandDetailed
102 | history_processor: DefaultHistoryProcessor
103 | demonstrations:
104 | - trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__xml_sys-env_window100__t-0.20__p-0.95__c-2.00__install-1/marshmallow-code__marshmallow-1867.traj
105 |
--------------------------------------------------------------------------------