├── conf
    ├── __init__.py
    ├── config
    │   ├── __init__.py
    │   └── default.yaml
    └── config.yaml
├── swedev
    ├── __init__.py
    ├── crawl
    │   ├── __init__.py
    │   ├── pypi_crawler.py
    │   └── get_top_pypi.py
    ├── issues
    │   ├── __init__.py
    │   ├── fetch_metadata.py
    │   ├── build_dataset.py
    │   ├── get_tasks_pipeline.py
    │   └── filter.py
    ├── testcases
    │   ├── __init__.py
    │   └── get_descriptions.py
    ├── utils
    │   ├── __init__.py
    │   ├── tricks
    │   │   ├── __init__.py
    │   │   ├── loop_detector.py
    │   │   ├── api_solver.py
    │   │   └── error_handler.py
    │   ├── postprocess.py
    │   ├── formatter.py
    │   ├── extract_signs.py
    │   ├── localize.py
    │   ├── preprocess.py
    │   ├── utils.py
    │   └── prompts.py
    └── config.py
├── swedev.egg-info
    ├── dependency_links.txt
    ├── top_level.txt
    ├── requires.txt
    ├── SOURCES.txt
    └── PKG-INFO
├── assets
    ├── table.png
    ├── pipeline.png
    └── performance.png
├── pyproject.toml
├── scripts
    ├── docker
    │   ├── README.md
    │   ├── clear.sh
    │   ├── retag.sh
    │   ├── check.sh
    │   ├── pull_all_eval_docker.sh
    │   └── swebench-lite-instance-images.txt
    ├── remove_envs.sh
    ├── kill_by_pid.sh
    ├── remove_container.py
    ├── remove_playground.py
    └── hf_push.py
├── MANIFEST.in
├── requirements.txt
├── requirements-base.txt
├── .gitignore
├── setup.py
├── LICENSE
├── Dockerfile
└── README.md


/conf/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/swedev/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/conf/config/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/swedev/crawl/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/swedev/issues/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/swedev/testcases/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/swedev/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/swedev.egg-info/dependency_links.txt:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/swedev.egg-info/top_level.txt:
--------------------------------------------------------------------------------
1 | conf
2 | swedev
3 | 


--------------------------------------------------------------------------------
/assets/table.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/SWE-Dev/HEAD/assets/table.png


--------------------------------------------------------------------------------
/assets/pipeline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/SWE-Dev/HEAD/assets/pipeline.png


--------------------------------------------------------------------------------
/assets/performance.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUDM/SWE-Dev/HEAD/assets/performance.png


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["setuptools>=42", "wheel"]
3 | build-backend = "setuptools.build_meta" 


--------------------------------------------------------------------------------
/swedev/utils/tricks/__init__.py:
--------------------------------------------------------------------------------
1 | from .api_solver import *
2 | from .error_handler import *
3 | from .loop_detector import * 


--------------------------------------------------------------------------------
/scripts/docker/README.md:
--------------------------------------------------------------------------------
1 | This folder is adapted from https://github.com/All-Hands-AI/OpenHands/tree/main/evaluation/benchmarks/swe_bench/scripts/docker


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
 1 | include README.md
 2 | include LICENSE
 3 | include requirements.txt
 4 | include requirements-base.txt
 5 | 
 6 | recursive-include conf *
 7 | recursive-include swedev *
 8 | 
 9 | exclude .git
10 | exclude .gitignore
11 | exclude Dockerfile
12 | recursive-exclude results *
13 | recursive-exclude scripts * 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | transformers
 2 | tree-sitter==0.21.3
 3 | tree-sitter-languages==1.10.2
 4 | grep-ast==0.3.2
 5 | networkx==3.2.1
 6 | pygments==2.18.0
 7 | tqdm
 8 | datasets
 9 | openai==1.42.0
10 | tiktoken==0.7.0
11 | libcst==1.4.0
12 | jsonlines
13 | tenacity
14 | ghapi
15 | bs4
16 | swebench
17 | selenium
18 | diskcache
19 | pytest-json-report
20 | libcst
21 | hydra-core>=1.3.2
22 | omegaconf>=2.3.0
23 | 


--------------------------------------------------------------------------------
/swedev.egg-info/requires.txt:
--------------------------------------------------------------------------------
 1 | transformers
 2 | tree-sitter==0.21.3
 3 | tree-sitter-languages==1.10.2
 4 | grep-ast==0.3.2
 5 | networkx==3.2.1
 6 | pygments==2.18.0
 7 | tqdm
 8 | datasets
 9 | openai==1.42.0
10 | tiktoken==0.7.0
11 | libcst==1.4.0
12 | jsonlines
13 | tenacity
14 | ghapi
15 | bs4
16 | swebench
17 | selenium
18 | diskcache
19 | pytest-json-report
20 | libcst
21 | hydra-core>=1.3.2
22 | omegaconf>=2.3.0
23 | 


--------------------------------------------------------------------------------
/scripts/docker/clear.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | images_to_delete=$(docker images --format "{{.Repository}}:{{.Tag}}" | grep "^${prefix}")
 4 | 
 5 | if [ -z "$images_to_delete" ]; then
 6 |     echo "No images found with prefix: $prefix"
 7 |     exit 0
 8 | fi
 9 | 
10 | echo "Deleting the following images:"
11 | echo "$images_to_delete"
12 | 
13 | while IFS= read -r image; do
14 |     docker rmi "$image"
15 | done <<< "$images_to_delete"


--------------------------------------------------------------------------------
/requirements-base.txt:
--------------------------------------------------------------------------------
 1 | pytest
 2 | pytest-timeout
 3 | pytest-env
 4 | pytest-sugar
 5 | pytest-html
 6 | pytest-metadata
 7 | pytest-cov
 8 | pytest-xdist
 9 | pytest-mock
10 | pytest-asyncio
11 | pytest-bdd
12 | pytest-benchmark
13 | pytest-randomly
14 | pytest-json
15 | responses
16 | cython
17 | distro
18 | mock
19 | hypothesis
20 | freezegun
21 | trustme
22 | requests-mock
23 | requests
24 | tomlkit
25 | black
26 | flake8
27 | mypy
28 | isort
29 | pre-commit
30 | coverage
31 | tox
32 | 


--------------------------------------------------------------------------------
/scripts/remove_envs.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | set -u
 5 | 
 6 | TARGET_DIR="/raid/haoran/miniforge3/envs"
 7 | 
 8 | if [ ! -d "$TARGET_DIR" ]; then
 9 |     echo "Error: Directory '$TARGET_DIR' does not exist."
10 |     exit 1
11 | fi
12 | 
13 | for folder in "$TARGET_DIR"/swedev_*; do
14 |     if [ -d "$folder" ]; then
15 |         echo "Deleting folder: $folder"
16 |         rm -rf "$folder" &
17 |     fi
18 | done
19 | 
20 | echo "All 'swedev_' subdirectories in '$TARGET_DIR' have been removed."
21 | 


--------------------------------------------------------------------------------
/conf/config.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | defaults:
 3 |   - config/default
 4 |   - _self_
 5 | 
 6 | paths:
 7 |   conda_bin: /raid/haoran/miniforge3/bin/conda
 8 |   conda_base: /raid/haoran/miniforge3/
 9 |   local_repo_dir: /raid/repos
10 |   playground: /raid/playground
11 | 
12 | github:
13 |   tokens: ghp_1,ghp_2
14 | 
15 | description:
16 |   model: gpt-4o
17 |   api_key: sk-swedev
18 |   base_url: http://localhost:8000/v1
19 |   max_tokens: 32768
20 | 
21 | testcase:
22 |   model: gpt-4o
23 |   api_key: sk-swedev
24 |   base_url: http://localhost:8000/v1
25 |   max_tokens: 32768


--------------------------------------------------------------------------------
/scripts/docker/retag.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | prefix=$1
 4 | images=$(docker images --format "{{.Repository}}:{{.Tag}}" | grep "^$prefix")
 5 | 
 6 | if [ -z "$images" ]; then
 7 |     echo "No such images"
 8 |     exit 0
 9 | fi
10 | 
11 | echo "Find images below:"
12 | echo "$images"
13 | 
14 | for image in $images; do
15 |     new_image=$(echo "$image" | sed "s|^${prefix}||")
16 |     new_image="${new_image//_s_/__}"
17 |     echo "Retagging $image -> $new_image"
18 | 
19 |     docker tag "$image" "$new_image"
20 | 
21 |     echo "Deleting original image: $image"
22 |     docker rmi "$image"
23 | done
24 | 
25 | echo "Retag finished"


--------------------------------------------------------------------------------
/scripts/docker/check.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | input_file="all-swebench-verified-instance-images.txt"
 4 | 
 5 | if [ ! -f "$input_file" ]; then
 6 |     echo "File $input_file not exists"
 7 |     exit 1
 8 | fi
 9 | 
10 | count=0
11 | 
12 | local_images=$(docker images --format "{{.Repository}}:{{.Tag}}")
13 | 
14 | while IFS= read -r image_name || [ -n "$image_name" ]; do
15 |     image_name="${image_name//_s_/__}"
16 |     
17 |     if echo "$local_images" | grep -q "^${image_name}$"; then
18 |         count=$((count+1))
19 |         echo $count $image_name
20 |     else
21 |         echo "No such image: $image_name"
22 |     fi
23 | done < "$input_file"


--------------------------------------------------------------------------------
/conf/config/default.yaml:
--------------------------------------------------------------------------------
 1 | # SWE-Dev Configuration
 2 | 
 3 | paths:
 4 |   conda_bin: /path/to/conda/bin/conda
 5 |   conda_base: /path/to/conda/base
 6 |   local_repo_dir: /path/to/local/repo/dir
 7 |   playground: /path/to/playground
 8 | 
 9 | github:
10 |   tokens: ghp_1,ghp_2 # split by comma
11 |   
12 | # Pipeline stage-specific model settings
13 | # These settings allow using different models for each stage
14 | # If not specified, fallback to openai.base_model and openai.base_url
15 | description:
16 |   model: ${openai.base_model}
17 |   base_url: ${openai.base_url}
18 |   api_key: ${openai.api_key}
19 |   max_tokens: ${openai.max_tokens}
20 | 
21 | testcase:
22 |   model: ${openai.base_model}
23 |   base_url: ${openai.base_url}
24 |   api_key: ${openai.api_key}
25 |   revise_rounds: 0  # Number of rounds to revise testcases
26 |   max_tokens: ${openai.max_tokens}


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | outputs/
 2 | local/
 3 | experiments/
 4 | dist/
 5 | build/
 6 | nohup.out
 7 | # Folder view configuration files
 8 | **/.DS_Store
 9 | Desktop.ini
10 | 
11 | # Thumbnail cache files
12 | ._*
13 | Thumbs.db
14 | 
15 | # Files that might appear on external disks
16 | .Spotlight-V100
17 | .Trashes
18 | 
19 | # Compiled Python files
20 | *.pyc
21 | 
22 | # Compiled C++ files
23 | *.out
24 | 
25 | # Application specific files
26 | venv
27 | node_modules
28 | .sass-cache
29 | 
30 | # Temp File
31 | *.swp
32 | *.swa
33 | *.swo
34 | 
35 | # github merge file
36 | *.orig
37 | 
38 | #vscode 
39 | .vscode
40 | 
41 | datasets/
42 | 
43 | **/__pycache__/
44 | **/playground/
45 | **/repo_structures/
46 | **/results/
47 | 
48 | **/.pytest_cache/
49 | 
50 | **/*.json
51 | **/*.jsonl
52 | 
53 | **/miscs.py
54 | 
55 | **/tests/
56 | **/backup/
57 | **/old-results/
58 | *.tar.gz
59 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import setuptools
 2 | from pathlib import Path
 3 | 
 4 | with open("requirements.txt") as f:
 5 |     requirements = f.read().splitlines()
 6 | 
 7 | long_description = Path("README.md").read_text(encoding="utf-8")
 8 | 
 9 | setuptools.setup(
10 |     name="swedev",
11 |     version="0.1.0",
12 |     author="Haoran Wang",
13 |     author_email="ubecwang@gmail.com", 
14 |     description="Software Engineering Agents with Training and Inference Scaling",
15 |     long_description=long_description,
16 |     long_description_content_type="text/markdown",
17 |     url="https://github.com/UbeCc/SWE-Dev",
18 |     packages=setuptools.find_packages(),
19 |     classifiers=[
20 |         "Programming Language :: Python :: 3",
21 |         "License :: OSI Approved :: MIT License",
22 |         "Operating System :: OS Independent",
23 |     ],
24 |     python_requires=">=3.6",
25 |     install_requires=requirements,
26 |     include_package_data=True,
27 | ) 


--------------------------------------------------------------------------------
/scripts/kill_by_pid.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Check if PID range parameter is passed
 4 | if [ $# -ne 1 ]; then
 5 |     echo "Usage: $0 a-b"
 6 |     exit 1
 7 | fi
 8 | 
 9 | # Extract range
10 | range=$1
11 | start_pid=$(echo$range | cut -d'-' -f1)
12 | end_pid=$(echo$range | cut -d'-' -f2)
13 | 
14 | # Validate range
15 | if ! [[ $start_pid =~ ^[0-9]+$ && $end_pid =~ ^[0-9]+$ ]]; then
16 |     echo "Error: Please provide a valid PID range (e.g., 100-105)"
17 |     exit 1
18 | fi
19 | 
20 | if [ $start_pid -gt$end_pid ]; then
21 |     echo "Error: Start PID should be less than or equal to end PID"
22 |     exit 1
23 | fi
24 | 
25 | # Iterate over range and attempt to kill each process
26 | for pid in $(seq$start_pid $end_pid); do
27 |     if kill -0 $pid 2>/dev/null; then
28 |         kill -9 $pid
29 |         echo "Terminated process PID: $pid"
30 |     else
31 |         echo "Skipped process PID: $pid (does not exist or no permission)"
32 |     fi
33 | done
34 | 
35 | echo "Done"
36 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2025 SWE-Dev Team
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE. 


--------------------------------------------------------------------------------
/scripts/remove_container.py:
--------------------------------------------------------------------------------
 1 | import docker
 2 | from tqdm import tqdm
 3 | 
 4 | 
 5 | def remove_stopped_containers():
 6 |     """
 7 |     Remove all Docker containers in 'exited' or 'created' states.
 8 |     """
 9 |     client = docker.from_env()  # Connect to the Docker daemon
10 |     try:
11 |         # Get all containers, including stopped and created ones
12 |         containers = client.containers.list(all=True)
13 |         
14 |         # Filter containers that are either 'exited' or 'created'
15 |         removable_containers = [
16 |             container for container in containers if container.status in ["exited", "created"]
17 |         ]
18 |         
19 |         if not removable_containers:
20 |             print("No containers in 'exited' or 'created' state found.")
21 |             return
22 | 
23 |         print(f"Found {len(removable_containers)} containers in 'exited' or 'created' state. Removing them...")
24 | 
25 |         # Use tqdm for progress bar
26 |         for container in tqdm(removable_containers, desc="Removing containers"):
27 |             try:
28 |                 container.remove()
29 |                 print(f"[SUCCESS] Removed container: {container.name} (ID: {container.id}) - Status: {container.status}")
30 |             except Exception as e:
31 |                 print(f"[FAILED] Could not remove container: {container.name} (ID: {container.id}) - Error: {e}")
32 |     except Exception as e:
33 |         print(f"[ERROR] Failed to connect to Docker: {e}")
34 |     finally:
35 |         client.close()
36 | 
37 | if __name__ == "__main__":
38 |     remove_stopped_containers()


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:22.04
 2 | 
 3 | ENV DEBIAN_FRONTEND=noninteractive
 4 | 
 5 | # Install comprehensive development environment
 6 | RUN apt update && apt install -y \
 7 |     build-essential g++ gcc cmake make autoconf automake libtool pkg-config git curl wget unzip python3-dev \
 8 |     python3-pip python3-venv python-is-python3 libssl-dev libbz2-dev liblzma-dev zlib1g-dev libffi-dev \
 9 |     libsqlite3-dev libreadline-dev libgdbm-dev libdb-dev libexpat1-dev libxml2-dev \
10 |     libxslt1-dev libyaml-dev libevent-dev libboost-all-dev libprotobuf-dev protobuf-compiler \
11 |     libcurl4-openssl-dev libjpeg-dev libpng-dev libtiff-dev libfreetype-dev libx11-dev \
12 |     libxext-dev libxrender-dev libxrandr-dev libxi-dev libxtst-dev libxinerama-dev libxkbcommon-dev libxkbcommon-x11-dev \
13 |     libfontconfig1-dev libharfbuzz-dev libpango1.0-dev libcairo2-dev libgtk-3-dev \
14 |     qtbase5-dev qttools5-dev-tools libtbb-dev libopenblas-dev liblapack-dev libatlas-base-dev \
15 |     libsuitesparse-dev libeigen3-dev libgmp-dev libmpfr-dev libboost-python-dev libbz2-dev liblz4-dev \
16 |     libzstd-dev libarchive-dev libsnappy-dev libuv1-dev librocksdb-dev libwebp-dev libxmlsec1-dev libgsl-dev \
17 |     libgflags-dev libgoogle-glog-dev libhdf5-dev libtiff5-dev libyaml-cpp-dev libgd-dev default-jdk \
18 |     openjdk-11-jdk openjdk-17-jdk maven gradle nodejs npm ruby-dev perl lua5.3 rustc cargo golang-go clang llvm lldb valgrind \
19 |     ccache lcov doxygen graphviz gdb bison flex swig ninja-build libapache2-mod-php php-cli php-dev \
20 |     apt-utils software-properties-common vim nano emacs htop neofetch screen tmux git-lfs \
21 |     sqlite3 postgresql-client mysql-client redis-tools openssh-client rsync zip p7zip-full \
22 |     jq parallel locales pipx
23 | 
24 | # Setup locale
25 | RUN locale-gen en_US.UTF-8
26 | ENV LANG=en_US.UTF-8 LC_ALL=en_US.UTF-8
27 | 
28 | # Working directory
29 | WORKDIR /workspace
30 | 
31 | # Default command
32 | CMD ["/bin/bash"] 
33 | 


--------------------------------------------------------------------------------
/scripts/docker/pull_all_eval_docker.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -e
 3 | 
 4 | LEVEL=$1
 5 | # three levels:
 6 | # - base, keyword "sweb.base"
 7 | # - env, keyword "sweb.env"
 8 | # - instance, keyword "sweb.eval"
 9 | SET=$2
10 | 
11 | if [ -z "$LEVEL" ]; then
12 |     echo "Usage: $0 <cache_level> <set>"
13 |     echo "cache_level: base, env, or instance"
14 |     echo "set: lite, full"
15 |     exit 1
16 | fi
17 | 
18 | if [ -z "$SET" ]; then
19 |     echo "Usage: $0 <cache_level> <set>"
20 |     echo "cache_level: base, env, or instance"
21 |     echo "set: lite, full, default is lite"
22 |     SET="lite"
23 | fi
24 | 
25 | NAMESPACE=${3:-swebench}
26 | 
27 | echo "Using namespace: $NAMESPACE"
28 | 
29 | if [ "$SET" == "verified" ]; then
30 |     IMAGE_FILE="$(dirname "$0")/swebench-verified-instance-images.txt"
31 | else
32 |     IMAGE_FILE="$(dirname "$0")/swebench-full-instance-images.txt"
33 | fi
34 | 
35 | # Define a pattern based on the level
36 | case $LEVEL in
37 |     base)
38 |         PATTERN="sweb.base"
39 |         ;;
40 |     env)
41 |         PATTERN="sweb.base\|sweb.env"
42 |         ;;
43 |     instance)
44 |         PATTERN="sweb.base\|sweb.env\|sweb.eval"
45 |         ;;
46 |     *)
47 |         echo "Invalid cache level: $LEVEL"
48 |         echo "Valid levels are: base, env, instance"
49 |         exit 1
50 |         ;;
51 | esac
52 | 
53 | echo "Pulling docker images for [$LEVEL] level"
54 | 
55 | echo "Pattern: $PATTERN"
56 | echo "Image file: $IMAGE_FILE"
57 | 
58 | # Read each line from the file, filter by pattern, and pull the docker image
59 | export NAMESPACE  # Ensure environment variable is accessible to subprocesses
60 | grep "$PATTERN" "$IMAGE_FILE" | xargs -P 64 -I {} bash -c '
61 |     image="{}"
62 | 
63 |     echo "Processing image: $image"
64 | 
65 |     # Check if image already exists
66 |     image_name=$(echo "$image" | sed "s/_s_/__/g")
67 |     if docker images --format "{{.Repository}}:{{.Tag}}" | grep -q "^$image_name$"; then
68 |         echo "Image $image_name already exists, skipping pull."
69 |     else
70 |         echo "Pulling $NAMESPACE/$image into $image"
71 |         docker pull $NAMESPACE/$image
72 |     fi
73 | '


--------------------------------------------------------------------------------
/scripts/remove_playground.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import shutil
 3 | from concurrent.futures import ThreadPoolExecutor, as_completed
 4 | 
 5 | from tqdm import tqdm
 6 | 
 7 | 
 8 | def get_all_subfolders(folder_path):
 9 |     """
10 |     Recursively gather all subfolder paths under the given folder using os.listdir.
11 |     """
12 |     useful = ["swedev", "swedev", "base"]
13 |     return [f'{folder_path}/{path}' for path in os.listdir(folder_path) if not path in useful]
14 | 
15 | def delete_folder(folder_path):
16 |     """
17 |     Delete a single folder and its contents.
18 |     """
19 |     try:
20 |         shutil.rmtree(folder_path)  # Recursively delete the folder and its contents
21 |         return folder_path, True, None
22 |     except Exception as e:
23 |         return folder_path, False, str(e)
24 | 
25 | def concurrent_delete(parent_folder, max_workers=8):
26 |     """
27 |     Concurrently delete all subfolders under the parent folder.
28 |     """
29 |     # Get all subfolders using a recursive listdir approach
30 |     subfolders = get_all_subfolders(parent_folder)
31 | 
32 |     # Sort subfolders by depth, ensuring the deepest folders are deleted first
33 |     subfolders.sort(key=lambda x: x.count(os.sep), reverse=True)
34 | 
35 |     # Add the parent folder itself to the list of folders to delete
36 |     subfolders.append(parent_folder)
37 | 
38 |     # Use a thread pool to delete folders concurrently
39 |     print(f"Starting deletion of {len(subfolders)} folders...")
40 |     with ThreadPoolExecutor(max_workers=max_workers) as executor:
41 |         future_to_folder = {executor.submit(delete_folder, folder): folder for folder in subfolders}
42 | 
43 |         # Use tqdm to display progress
44 |         for future in tqdm(as_completed(future_to_folder), total=len(future_to_folder), desc="Deleting folders"):
45 |             folder_path, success, error = future.result()
46 |             if success:
47 |                 print(f"[SUCCESS] Deleted: {folder_path}")
48 |             else:
49 |                 print(f"[FAILED] Could not delete: {folder_path} - Error: {error}")
50 | 
51 | if __name__ == "__main__":
52 |     target_folder = "/raid/playground"
53 |     if os.path.exists(target_folder):
54 |         concurrent_delete(target_folder, max_workers=64)
55 |     else:
56 |         print(f"[ERROR] Path does not exist: {target_folder}")
57 | 


--------------------------------------------------------------------------------
/swedev.egg-info/SOURCES.txt:
--------------------------------------------------------------------------------
 1 | LICENSE
 2 | MANIFEST.in
 3 | README.md
 4 | pyproject.toml
 5 | requirements-base.txt
 6 | requirements.txt
 7 | setup.py
 8 | conf/__init__.py
 9 | conf/config.yaml
10 | conf/__pycache__/__init__.cpython-312.pyc
11 | conf/config/__init__.py
12 | conf/config/default.yaml
13 | swedev/__init__.py
14 | swedev/config.py
15 | swedev.egg-info/PKG-INFO
16 | swedev.egg-info/SOURCES.txt
17 | swedev.egg-info/dependency_links.txt
18 | swedev.egg-info/requires.txt
19 | swedev.egg-info/top_level.txt
20 | swedev/__pycache__/__init__.cpython-311.pyc
21 | swedev/__pycache__/__init__.cpython-312.pyc
22 | swedev/__pycache__/config.cpython-311.pyc
23 | swedev/__pycache__/config.cpython-312.pyc
24 | swedev/crawl/__init__.py
25 | swedev/crawl/get_top_pypi.py
26 | swedev/crawl/pypi_crawler.py
27 | swedev/crawl/__pycache__/get_top_pypi.cpython-311.pyc
28 | swedev/crawl/__pycache__/get_top_pypi.cpython-312.pyc
29 | swedev/crawl/__pycache__/pypi_crawler.cpython-311.pyc
30 | swedev/issues/__init__.py
31 | swedev/issues/build_dataset.py
32 | swedev/issues/fetch_metadata.py
33 | swedev/issues/get_tasks_pipeline.py
34 | swedev/issues/utils.py
35 | swedev/issues/__pycache__/__init__.cpython-311.pyc
36 | swedev/issues/__pycache__/__init__.cpython-312.pyc
37 | swedev/issues/__pycache__/get_tasks_pipeline.cpython-311.pyc
38 | swedev/issues/__pycache__/get_tasks_pipeline.cpython-312.pyc
39 | swedev/testcases/__init__.py
40 | swedev/testcases/eval_testcases.py
41 | swedev/testcases/get_descriptions.py
42 | swedev/testcases/get_testcases.py
43 | swedev/testcases/__pycache__/__init__.cpython-311.pyc
44 | swedev/testcases/__pycache__/__init__.cpython-312.pyc
45 | swedev/testcases/__pycache__/eval_testcases.cpython-312.pyc
46 | swedev/testcases/__pycache__/get_descriptions.cpython-311.pyc
47 | swedev/testcases/__pycache__/get_descriptions.cpython-312.pyc
48 | swedev/testcases/__pycache__/get_testcases.cpython-312.pyc
49 | swedev/utils/__init__.py
50 | swedev/utils/extract_signs.py
51 | swedev/utils/formatter.py
52 | swedev/utils/localize.py
53 | swedev/utils/postprocess.py
54 | swedev/utils/preprocess.py
55 | swedev/utils/prompts.py
56 | swedev/utils/utils.py
57 | swedev/utils/__pycache__/__init__.cpython-311.pyc
58 | swedev/utils/__pycache__/__init__.cpython-312.pyc
59 | swedev/utils/__pycache__/extract_signs.cpython-312.pyc
60 | swedev/utils/__pycache__/localize.cpython-312.pyc
61 | swedev/utils/__pycache__/preprocess.cpython-312.pyc
62 | swedev/utils/__pycache__/prompts.cpython-311.pyc
63 | swedev/utils/__pycache__/prompts.cpython-312.pyc
64 | swedev/utils/__pycache__/utils.cpython-311.pyc
65 | swedev/utils/__pycache__/utils.cpython-312.pyc
66 | swedev/utils/tricks/__init__.py
67 | swedev/utils/tricks/api_solver.py
68 | swedev/utils/tricks/error_handler.py
69 | swedev/utils/tricks/loop_detector.py


--------------------------------------------------------------------------------
/scripts/hf_push.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | 
 4 | from datasets import Dataset, DatasetDict, load_dataset
 5 | from huggingface_hub import HfApi, HfFolder, Repository
 6 | from swedev.config import get_config_value
 7 | 
 8 | 
 9 | def download():
10 |     if not os.path.exists("datasets/"):
11 |         os.mkdir("datasets/")
12 | 
13 |     dataset = load_dataset("princeton-nlp/SWE-bench")
14 | 
15 |     with open("datasets/swebench-train.json", "w") as f:
16 |         json.dump([d for d in dataset["train"]], f, indent=2)
17 |     with open("datasets/swebench-dev.json", "w") as f:
18 |         json.dump([d for d in dataset["dev"]], f, indent=2)        
19 |     with open("datasets/swebench-test.json", "w") as f:
20 |         json.dump([d for d in dataset["test"]], f, indent=2)
21 | 
22 | def upload_to_huggingface(dataset_name, file_split_mapping, token):
23 |     """
24 |     Uploads multiple dataset files to the Hugging Face Hub as splits under dataset_name.
25 |     
26 |     Args:
27 |         dataset_name (str): The Hugging Face dataset repository name (e.g., "username/dataset_name").
28 |         file_split_mapping (dict): A dictionary mapping split names (e.g., "train", "test") to file paths.
29 |         token (str): Hugging Face API token.
30 |     """
31 |     # Save the token for authentication
32 |     HfFolder.save_token(token)
33 |     api = HfApi()
34 | 
35 |     # Authenticate and check user
36 |     user = api.whoami(token=token)
37 |     print(f"Authenticated as: {user['name']}")
38 | 
39 |     # Check if the dataset exists on the Hugging Face Hub
40 |     try:
41 |         api.dataset_info(dataset_name, token=token)
42 |         print(f"Dataset {dataset_name} already exists on Hugging Face Hub.")
43 |     except Exception:
44 |         print(f"Dataset {dataset_name} does not exist. Creating a new repository...")
45 |         api.create_repo(repo_id=dataset_name, repo_type="dataset", token=token)
46 | 
47 |     # Ensure all files exist
48 |     for split, file_path in file_split_mapping.items():
49 |         if not os.path.exists(file_path):
50 |             raise ValueError(f"File for split '{split}' does not exist: {file_path}")
51 | 
52 |     # Load each file into a DatasetDict
53 |     dataset_dict = {}
54 |     for split, file_path in file_split_mapping.items():
55 |         file_ext = os.path.splitext(file_path)[1].lower()
56 |         print(f"Loading file '{file_path}' for split '{split}' (format: {file_ext})")
57 | 
58 |         if file_ext == ".csv":
59 |             dataset = Dataset.from_csv(file_path)
60 |         elif file_ext == ".json":
61 |             dataset = Dataset.from_json(file_path)
62 |         elif file_ext == ".jsonl":
63 |             dataset = Dataset.from_json(file_path, split="train")
64 |         else:
65 |             raise ValueError(f"Unsupported file format for split '{split}': {file_ext}. Only .csv, .json, and .jsonl are supported.")
66 | 
67 |         dataset_dict[split] = dataset
68 | 
69 |     # Convert to DatasetDict
70 |     dataset_dict = DatasetDict(dataset_dict)
71 | 
72 |     # Push the entire DatasetDict to the Hugging Face Hub
73 |     print(f"Uploading dataset to Hugging Face Hub under {dataset_name}...")
74 |     dataset_dict.push_to_hub(dataset_name, token=token)
75 |     print(f"Dataset uploaded successfully to {dataset_name}!")
76 | 
77 | def upload():
78 |     DATASET_NAME = "SWE-Dev/SWE-Dev"
79 |     TOKEN = get_config_value("huggingface.token", os.environ.get("HF_TOKEN"))
80 |     if not TOKEN:
81 |         raise ValueError("HF_TOKEN not configured. Please configure huggingface.token in your config file or set the HF_TOKEN environment variable.")
82 |     FILE_SPLIT_MAPPING = {
83 |         "train": "results/swedev.jsonl",
84 |     }
85 |     upload_to_huggingface(DATASET_NAME, FILE_SPLIT_MAPPING, TOKEN)
86 |     
87 | upload()
88 | 


--------------------------------------------------------------------------------
/swedev/utils/tricks/loop_detector.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | 
 3 | from tqdm import tqdm
 4 | 
 5 | total = 0
 6 | def remove_conversation_duplicates(conversations: list[dict]) -> list[dict]:
 7 |     if len(conversations) < 3:
 8 |         return conversations
 9 | 
10 |     filtered = []
11 |     i = 0
12 |     while i < len(conversations):
13 |         current = conversations[i]
14 |         removal_type = None
15 |         skip_count = 1
16 | 
17 |         # Scenario 1: Same content repeating
18 |         if i + 2 < len(conversations):
19 |             messages = conversations[i:i+3]
20 |             if all(
21 |                 messages[0]['role'] == msg['role'] and 
22 |                 _content_similarity(messages[0]['content'], msg['content']) > 0.9
23 |                 for msg in messages[1:3]
24 |             ):
25 |                 removal_type = "repeated_content"
26 |                 skip_count = 3
27 | 
28 |         # Scenario 2: Error pattern
29 |         if not removal_type and i + 2 < len(conversations):
30 |             messages = conversations[i:i+3]
31 |             if all(msg['role'] == 'assistant' for msg in messages):
32 |                 if all(
33 |                     'error' in msg['content'].lower() or 
34 |                     'exception' in msg['content'].lower() or
35 |                     'syntax error' in msg['content'].lower()
36 |                     for msg in messages
37 |                 ):
38 |                     removal_type = "error_pattern"
39 |                     skip_count = 3
40 | 
41 |         # Scenario 3: Monologue
42 |         if not removal_type and i + 2 < len(conversations):
43 |             messages = conversations[i:i+3]
44 |             if (all(msg['role'] == messages[0]['role'] for msg in messages) and
45 |                 all(_content_similarity(messages[0]['content'], msg['content']) > 0.7
46 |                     for msg in messages[1:3])):
47 |                 removal_type = "monologue"
48 |                 skip_count = 3
49 | 
50 |         # Scenario 4: Alternating pattern
51 |         if not removal_type and i + 5 < len(conversations):
52 |             messages = conversations[i:i+6]
53 |             if (messages[0]['role'] == messages[2]['role'] == messages[4]['role'] and
54 |                 messages[1]['role'] == messages[3]['role'] == messages[5]['role'] and
55 |                 messages[0]['role'] != messages[1]['role'] and
56 |                 _content_similarity(messages[0]['content'], messages[2]['content']) > 0.8 and
57 |                 _content_similarity(messages[2]['content'], messages[4]['content']) > 0.8 and
58 |                 _content_similarity(messages[1]['content'], messages[3]['content']) > 0.8 and
59 |                 _content_similarity(messages[3]['content'], messages[5]['content']) > 0.8):
60 |                 removal_type = "alternating_pattern"
61 |                 skip_count = 6
62 |                 
63 |         filtered.append(current)
64 |         
65 |         if removal_type:
66 |             i += skip_count
67 |         else:
68 |             i += 1
69 | 
70 |     global total
71 |     if len(conversations) - len(filtered):
72 |         total += 1
73 |         print(total)
74 |     return filtered
75 | 
76 | def _content_similarity(text1: str, text2: str) -> float:
77 |     if not text1 or not text2:
78 |         return 0.0
79 |         
80 |     words1 = set(text1.lower().split())
81 |     words2 = set(text2.lower().split())
82 |     
83 |     if not words1 or not words2:
84 |         return 0.0
85 |         
86 |     intersection = words1.intersection(words2)
87 |     union = words1.union(words2)
88 |     
89 |     return len(intersection) / len(union)
90 | 
91 | if __name__ == "__main__":
92 |     with open("trajectories.json", "r") as f:
93 |         dataset = json.load(f)
94 |     results = []
95 |     for data in tqdm(dataset):
96 |         results.append({"input": remove_conversation_duplicates(data["input"])})
97 |     with open("loop_trajs.json", "w") as f:
98 |         json.dump(results, f, indent=2)


--------------------------------------------------------------------------------
/swedev/crawl/pypi_crawler.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import json
  3 | import re
  4 | import secrets
  5 | import string
  6 | import xmlrpc.client
  7 | from concurrent.futures import ThreadPoolExecutor
  8 | from time import sleep
  9 | 
 10 | import requests
 11 | from requests import HTTPError
 12 | 
 13 | 
 14 | def generate_random_string(length=10):
 15 |     alphabet = string.ascii_letters + string.digits 
 16 |     return ''.join(secrets.choice(alphabet) for _ in range(length))
 17 | 
 18 | base_url = "https://pypi.org/pypi"
 19 | session = requests.Session()
 20 | 
 21 | def user_agent_generator():
 22 |     return f"Pypi Daily Sync (Contact: {generate_random_string(10)}@gmail.com)"
 23 | 
 24 | def all_packages():
 25 |     xmlclient = xmlrpc.client.ServerProxy(base_url)
 26 |     print("Fetching all package names from PyPI...")
 27 |     return xmlclient.list_packages_with_serial()
 28 | 
 29 | def pkg_meta(name):
 30 |     resp = session.get(f"{base_url}/{name}/json", headers={'User-Agent': user_agent_generator()})
 31 |     resp.raise_for_status()
 32 |     return resp.json()
 33 | 
 34 | def extract_github_repo(url):
 35 |     if not url:
 36 |         return None
 37 |     pattern = r'^(https?:\/\/github\.com\/([a-zA-Z0-9._-]+)\/([a-zA-Z0-9._-]+))(\/.*)?$'
 38 |     match = re.match(pattern, url)
 39 |     if match:
 40 |         return match.group(1)  
 41 |     return None
 42 | 
 43 | def save_pkg_meta(name, output_file):
 44 |     api_success = False
 45 |     while not api_success:
 46 |         try:
 47 |             meta = pkg_meta(name)
 48 |             api_success = True
 49 |         except HTTPError as e:
 50 |             if e.response.status_code == 404:
 51 |                 return
 52 |             print(f"HTTP error {e.response.status_code} for package {name}. Retrying in 3s...")
 53 |             sleep(3)
 54 |         except Exception as e:
 55 |             print(f"Error with package {name}: {str(e)}. Retrying in 3s...")
 56 |             sleep(3)
 57 |     
 58 |     try:
 59 |         project_urls = meta['info'].get('project_urls', {}) or {}
 60 |         if isinstance(project_urls, dict):
 61 |             urls = project_urls.values()
 62 |         else:
 63 |             urls = []
 64 |             
 65 |         homepage = meta['info'].get('home_page')
 66 |         if homepage:
 67 |             urls = list(urls) + [homepage]
 68 |             
 69 |         if meta['info'].get('package_url'):
 70 |             urls = list(urls) + [meta['info'].get('package_url')]
 71 |             
 72 |         for url in urls:
 73 |             github_url = extract_github_repo(url)
 74 |             if github_url:
 75 |                 github_url = github_url.replace(".git", "")
 76 |                 print(f'Found GitHub URL: {github_url} for package {name}')
 77 |                 
 78 |                 entry = {
 79 |                     "package_name": name,
 80 |                     "github": github_url
 81 |                 }
 82 |                 
 83 |                 with open(output_file, 'a') as f:
 84 |                     f.write(json.dumps(entry) + '\n')
 85 |                 
 86 |                 break
 87 |     except Exception as e:
 88 |         print(f"Error processing metadata for {name}: {str(e)}")
 89 | 
 90 | def crawl_github_urls(output_file, workers=128):
 91 |     """
 92 |     Get all PyPI packages and extract GitHub repository URLs in one step.
 93 |     """
 94 |     packages = all_packages()
 95 |     print(f"Found {len(packages)} packages. Starting GitHub URL extraction...")
 96 |     
 97 |     open(output_file, 'w').close()
 98 |     
 99 |     package_names = list(packages.keys())
100 |     args_list = [(name, output_file) for name in package_names]
101 | 
102 |     with ThreadPoolExecutor(max_workers=workers) as executor:
103 |         executor.map(lambda args: save_pkg_meta(*args), args_list)
104 | 
105 | def main():
106 |     parser = argparse.ArgumentParser(description="Extract GitHub URLs for PyPI packages")
107 |     parser.add_argument("--output", type=str, default="github_urls.jsonl", 
108 |                         help="Path to save GitHub URLs in JSONL format (default: github_urls.jsonl)")
109 |     parser.add_argument("--workers", type=int, default=128, 
110 |                         help="Number of concurrent workers (default: 128)")
111 |     args = parser.parse_args()
112 |     
113 |     crawl_github_urls(args.output, args.workers)
114 |     print(f"Finished! GitHub URLs saved to {args.output} in JSONL format")
115 | 
116 | if __name__ == "__main__":
117 |     main()


--------------------------------------------------------------------------------
/swedev/utils/postprocess.py:
--------------------------------------------------------------------------------
  1 | import ast
  2 | import re
  3 | 
  4 | 
  5 | def extract_code_blocks(text):
  6 |     pattern = r"```\n(.*?)\n```"
  7 |     matches = re.findall(pattern, text, re.DOTALL)
  8 |     if len(matches) == 0:
  9 |         if "```" in text:
 10 |             # handle the case where the code block is not complete
 11 |             return [text.split("```", 1)[-1].strip()]
 12 |     return matches
 13 | 
 14 | 
 15 | def extract_locs_for_files(locs, file_names):
 16 |     results = {fn: [] for fn in file_names}
 17 |     current_file_name = None
 18 |     for loc in locs:
 19 |         for line in loc.splitlines():
 20 |             if line.strip().endswith(".py"):
 21 |                 current_file_name = line.strip()
 22 |             elif line.strip() and any(
 23 |                 line.startswith(w)
 24 |                 for w in ["line:", "function:", "class:", "variable:"]
 25 |             ):
 26 |                 if current_file_name in results:
 27 |                     results[current_file_name].append(line)
 28 |                 else:
 29 |                     pass
 30 |     return [["\n".join(results[fn])] for fn in file_names]
 31 | 
 32 | def get_codebody(source_code: str, omit: bool = False) -> str:
 33 |     class CodeModifier(ast.NodeTransformer):
 34 |         def visit_ClassDef(self, node):
 35 |             if node.name.startswith('Test') or node.name.endswith('Test'):
 36 |                 return None
 37 |             
 38 |             docstring = ast.get_docstring(node)
 39 |             if docstring and omit:
 40 |                 new_body = [ast.Expr(value=ast.Constant(value=docstring))]
 41 |                 new_body.append(ast.Expr(value=ast.Constant(value="<omit function content for briefness>")))
 42 |                 new_node = ast.ClassDef(
 43 |                     name=node.name,
 44 |                     bases=node.bases,
 45 |                     keywords=node.keywords,
 46 |                     body=new_body,
 47 |                     decorator_list=node.decorator_list
 48 |                 )
 49 |                 return new_node
 50 |             return node
 51 |             
 52 |         def visit_FunctionDef(self, node):
 53 |             if node.name.startswith('test_') or 'test' in node.name.lower():
 54 |                 return None
 55 |             
 56 |             docstring = ast.get_docstring(node)
 57 |             if docstring and omit:
 58 |                 new_body = [ast.Expr(value=ast.Constant(value=docstring))]
 59 |                 new_body.append(ast.Expr(value=ast.Constant(value="<omit function content for briefness>")))
 60 |                 new_node = ast.FunctionDef(
 61 |                     name=node.name,
 62 |                     args=node.args,
 63 |                     body=new_body,
 64 |                     decorator_list=node.decorator_list,
 65 |                     returns=node.returns
 66 |                 )
 67 |                 return new_node
 68 |             return node
 69 |             
 70 |         def visit_Import(self, node):
 71 |             test_imports = ['pytest', 'unittest', 'mock']
 72 |             new_names = [n for n in node.names if n.name not in test_imports]
 73 |             if not new_names:
 74 |                 return None
 75 |             node.names = new_names
 76 |             return node
 77 |         
 78 |         def visit_ImportFrom(self, node):
 79 |             try:
 80 |                 if any(x in node.module.lower() for x in ['test', 'mock']):
 81 |                     return None
 82 |                 return node
 83 |             except Exception as e:
 84 |                 return None
 85 | 
 86 |     tree = ast.parse(source_code)
 87 |     transformer = CodeModifier()
 88 |     cleaned_tree = transformer.visit(tree)
 89 |     ast.fix_missing_locations(cleaned_tree)
 90 |     return ast.unparse(cleaned_tree)
 91 | 
 92 | if __name__ == "__main__":
 93 |     example_code = '''
 94 |     def calculate_sum(a, b):
 95 |         """
 96 |         Args:
 97 |             a: First Number
 98 |             b: Second Number
 99 |             
100 |         Returns:
101 |             Sum of the numbers
102 |         """
103 |         return a + b
104 | 
105 |     def no_doc_function(x):
106 |         return x * 2
107 | 
108 |     def test_calculate():
109 |         assert calculate_sum(1, 2) == 3
110 |         
111 |     class Calculator:
112 |         def add(self, a, b):
113 |             return a + b
114 |             
115 |     class NoDocClass:
116 |         def method(self):
117 |             pass
118 |             
119 |     class TestCalculator:
120 |         def test_add(self):
121 |             calc = Calculator()
122 |             assert calc.add(1, 2) == 3
123 |     '''
124 | 
125 |     print("With omit=True:")
126 |     print(get_codebody(example_code, omit=True))
127 | 
128 |     print("\nWith omit=False:")
129 |     print(get_codebody(example_code, omit=False))


--------------------------------------------------------------------------------
/swedev/testcases/get_descriptions.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import json
  3 | import logging
  4 | import os
  5 | import re
  6 | import jsonlines
  7 | import threading
  8 | import time
  9 | import traceback
 10 | from concurrent.futures import ThreadPoolExecutor
 11 | from swedev.utils.prompts import *
 12 | from swedev.utils.utils import *
 13 | from tqdm import tqdm
 14 | from swedev.config import Config
 15 | 
 16 | call_counter = tqdm(desc="API Calls", unit="calls")
 17 | total_counter = tqdm(desc="Progress", unit="items")
 18 | saved_counter = tqdm(desc="Saved", unit="items")
 19 | 
 20 | request_count = 0
 21 | start_time = time.time()
 22 | rps_lock = threading.Lock()
 23 | 
 24 | def process_instance(instance, model, base_url, max_tokens, logger, writer):
 25 |     """Process a single test instance to generate description and save it"""
 26 |     global request_count
 27 |     
 28 |     with rps_lock:
 29 |         request_count += 1
 30 |     call_counter.update(1)
 31 |     
 32 |     result = call(
 33 |         messages=[{"role": "user", "content": SUMMARIZE_GHERKIN_TEST.format(instance["repo"], instance["problem_statement"], instance["patch"], instance["hints_text"])}],
 34 |         max_tokens=max_tokens,
 35 |         model=model,
 36 |         base_url=base_url,
 37 |         logger=logger
 38 |     )
 39 |     if result == "Error":
 40 |         result = call(
 41 |             messages=[{"role": "user", "content": SUMMARIZE_GHERKIN_TEST.format(instance["repo"], instance["problem_statement"], instance["patch"], "No Hints Text Provided")}],
 42 |             max_tokens=max_tokens,
 43 |             model=model,
 44 |             base_url=base_url,
 45 |             logger=logger
 46 |         )     
 47 |     
 48 |     desc = call(
 49 |         messages=[{"role": "user", "content": MAKE_GHERKIN_TEST.format(instance["repo"], instance["problem_statement"], instance["patch"], instance["hints_text"], result)}],
 50 |         max_tokens=max_tokens,
 51 |         model=model,
 52 |         base_url=base_url,
 53 |         logger=logger
 54 |     )
 55 |     if desc == "Error":
 56 |         desc = call(
 57 |             messages=[{"role": "user", "content": MAKE_GHERKIN_TEST.format(instance["repo"], instance["problem_statement"], instance["patch"], "No Hints Text Provided", result)}],
 58 |             max_tokens=max_tokens,
 59 |             model=model,
 60 |             base_url=base_url,
 61 |             logger=logger
 62 |         )
 63 |     pattern = r'```(?:gherkin\n|\n)(.*?)\n```'
 64 |     descs = re.findall(pattern, desc, re.DOTALL)
 65 | 
 66 |     total_counter.update(1)
 67 |     
 68 |     result_obj = {
 69 |         "repo": instance["repo"],
 70 |         "instance_id": instance["instance_id"],
 71 |         "problem_statement": instance["problem_statement"],
 72 |         "patch": instance["patch"],
 73 |         "created_at": instance["created_at"],
 74 |         "hints_text": instance["hints_text"],
 75 |         "base_commit": instance["base_commit"],
 76 |         "descs": descs,
 77 |         "model": model
 78 |     }
 79 |     
 80 |     with rps_lock:
 81 |         writer.write(result_obj)
 82 |         saved_counter.update(1)
 83 |     
 84 |     return result_obj
 85 | 
 86 | def generate_descriptions(args: argparse.Namespace) -> None:
 87 |     """
 88 |     Get test case descriptions for a list of test instances
 89 |     """
 90 |     logger = logging.getLogger(__name__)
 91 |     model = Config.Description.model
 92 |     base_url = Config.Description.base_url
 93 |     max_tokens = Config.Description.max_tokens
 94 |     
 95 |     if args.dataset_file.endswith(".jsonl"):
 96 |         with jsonlines.open(args.dataset_file, "r") as f:
 97 |             instances = [line for line in f]
 98 |     else:
 99 |         with open(args.dataset_file, "r") as f:
100 |             instances = json.load(f)
101 |     
102 |     total_instances = len(instances)
103 |     print(f"Getting test case descriptions for {total_instances} test instances")
104 |     
105 |     total_counter.total = total_instances
106 |     total_counter.refresh()
107 |     
108 |     def report_rps():
109 |         global request_count, start_time
110 |         while True:
111 |             time.sleep(5) 
112 |             with rps_lock:
113 |                 elapsed = time.time() - start_time
114 |                 rps = request_count / elapsed if elapsed > 0 else 0
115 |                 print(f"Current RPS: {rps:.2f}")
116 |     
117 |     rps_thread = threading.Thread(target=report_rps, daemon=True)
118 |     rps_thread.start()
119 |     
120 |     with jsonlines.open(args.output_file, 'w') as writer:
121 |         with ThreadPoolExecutor(max_workers=args.num_workers) as executor:
122 |             futures = []
123 |             for instance in instances:
124 |                 future = executor.submit(
125 |                     process_instance, 
126 |                     instance, 
127 |                     model, 
128 |                     base_url,
129 |                     max_tokens,
130 |                     logger,
131 |                     writer
132 |                 )
133 |                 futures.append(future)
134 |                 
135 |             for future in tqdm(futures, total=total_instances, desc="Processing", unit="items"):
136 |                 try:
137 |                     future.result()
138 |                 except Exception as e:
139 |                     print(f"Error processing instance: {e}")
140 |                     traceback.print_exc()
141 |     
142 |     elapsed = time.time() - start_time
143 |     final_rps = request_count / elapsed if elapsed > 0 else 0
144 |     print(f"Final RPS: {final_rps:.2f}")
145 |     print(f"Saved {saved_counter.n} descriptions to {args.output_file}")
146 | 
147 | def main():
148 |     parser = argparse.ArgumentParser()
149 |     parser.add_argument("--dataset_file", type=str, required=True)
150 |     parser.add_argument("--output_folder", type=str, required=True)
151 |     parser.add_argument("--num_workers", type=int, default=4, help="Number of parallel workers")
152 | 
153 |     args = parser.parse_args()
154 | 
155 |     if not os.path.exists(args.output_folder):
156 |         os.makedirs(args.output_folder)
157 | 
158 |     args.output_file = os.path.join(args.output_folder, "output.jsonl")
159 |     generate_descriptions(args)
160 | 
161 | if __name__ == "__main__":
162 |     main()


--------------------------------------------------------------------------------
/swedev/config.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Simple configuration module for SWE-Dev based on Hydra.
  3 | """
  4 | 
  5 | import os
  6 | import sys
  7 | from typing import Any, List
  8 | 
  9 | import hydra
 10 | from omegaconf import DictConfig, OmegaConf
 11 | from functools import lru_cache
 12 | 
 13 | CONFIG_PATH = "../conf"
 14 | 
 15 | @hydra.main(version_base=None, config_path="conf", config_name="config")
 16 | def hydra_main_get_config(cfg: DictConfig) -> DictConfig:
 17 |     """Hydra entry point to get configuration"""
 18 |     return cfg
 19 | 
 20 | # Cached config to prevent multiple Hydra initializations
 21 | @lru_cache()
 22 | def get_config() -> DictConfig:
 23 |     """
 24 |     Get configuration with caching to prevent multiple Hydra initializations
 25 |     """
 26 |     # Set Hydra to not change working directory and not print output
 27 |     os.environ["HYDRA_FULL_ERROR"] = "1"
 28 |     
 29 |     # Save original args to restore later
 30 |     argv = sys.argv.copy()
 31 |     
 32 |     # Temporarily modify sys.argv to contain only the script name
 33 |     sys.argv = [sys.argv[0]]
 34 |     
 35 |     try:
 36 |         with hydra.initialize_config_module(config_module="conf"):
 37 |             cfg = hydra.compose(config_name="config")
 38 |         return cfg
 39 |     finally:
 40 |         sys.argv = argv
 41 | 
 42 | def get_config_value(path: str, default: Any = None) -> Any:
 43 |     """
 44 |     Get a configuration value by path
 45 |     
 46 |     Args:
 47 |         path: Configuration path like "paths.conda_bin"
 48 |         default: Default value if not found
 49 |         
 50 |     Returns:
 51 |         The configuration value
 52 |     """
 53 |     cfg = get_config()
 54 |     parts = path.split('.')
 55 |     # Try to get value from config
 56 |     value = cfg
 57 |     try:
 58 |         for part in parts:
 59 |             value = value[part]
 60 |         return value
 61 |     except (KeyError, TypeError):
 62 |         # Try environment variable as fallback
 63 |         env_var = '_'.join(p.upper() for p in parts)
 64 |         env_value = os.environ.get(env_var)
 65 |         if env_value is not None:
 66 |             return env_value
 67 |         
 68 |         # If not found, return default
 69 |         return default
 70 | 
 71 | def validate_config() -> List[str]:
 72 |     """
 73 |     Validate the configuration and return error messages
 74 |     Returns empty list if valid
 75 |     """
 76 |     errors = []
 77 |     
 78 |     # Validate paths
 79 |     paths = ["paths.conda_bin", "paths.conda_base", "paths.local_repo_dir"]
 80 |     for path in paths:
 81 |         path_value = get_config_value(path)
 82 |         if path_value and not os.path.exists(path_value):
 83 |             errors.append(f"Path does not exist: {path} = {path_value}")
 84 |     
 85 |     github_tokens = get_config_value("github.tokens", [])
 86 |     if not github_tokens:
 87 |         env_tokens = os.environ.get("GITHUB_TOKENS", "")
 88 |         if env_tokens:
 89 |             github_tokens = env_tokens.split(",")
 90 |     
 91 |     if not github_tokens or (len(github_tokens) == 1 and not github_tokens[0]):
 92 |         errors.append("GitHub tokens not set. Required for API access.")
 93 |     
 94 |     return errors
 95 | 
 96 | # Load values from config for module-level constants
 97 | CONDA_BIN = get_config_value("paths.conda_bin")
 98 | CONDA_BASE = get_config_value("paths.conda_base")
 99 | LOCAL_REPO_DIR = get_config_value("paths.local_repo_dir")
100 | PLAYGROUND_PATH = get_config_value("paths.playground")
101 | GITHUB_TOKENS = get_config_value("github.tokens", [])
102 | if not GITHUB_TOKENS and os.environ.get("GITHUB_TOKENS"):
103 |     GITHUB_TOKENS = os.environ.get("GITHUB_TOKENS", "").split(",")
104 | OPENAI_BASE_URL = get_config_value("openai.base_url")
105 | OPENAI_BASE_MODEL = get_config_value("openai.base_model")
106 | OPENAI_API_KEY = get_config_value("openai.api_key")
107 | OPENAI_MAX_TOKENS = get_config_value("openai.max_tokens", 16384)
108 | MODEL = OPENAI_BASE_MODEL
109 | 
110 | # Create a unified Config class for simplified imports
111 | class Config:
112 |     """
113 |     Unified configuration class for easy imports.
114 |     Usage: from src.config import Config
115 |     """
116 |     # Path settings
117 |     conda_bin = CONDA_BIN
118 |     conda_base = CONDA_BASE
119 |     local_repo_dir = LOCAL_REPO_DIR
120 |     playground_path = PLAYGROUND_PATH
121 |     
122 |     # Github settings
123 |     github_tokens = GITHUB_TOKENS
124 |     
125 |     # OpenAI settings
126 |     openai_base_url = OPENAI_BASE_URL
127 |     openai_base_model = OPENAI_BASE_MODEL
128 |     openai_api_key = OPENAI_API_KEY
129 |     
130 |     class Description:
131 |         model = get_config_value("description.model", OPENAI_BASE_MODEL)
132 |         base_url = get_config_value("description.base_url", OPENAI_BASE_URL)
133 |         api_key = get_config_value("description.api_key", OPENAI_API_KEY)
134 |         max_tokens = get_config_value("description.max_tokens", 16384)
135 |     
136 |     class Testcase:
137 |         model = get_config_value("testcase.model", OPENAI_BASE_MODEL)
138 |         base_url = get_config_value("testcase.base_url", OPENAI_BASE_URL)
139 |         revise_rounds = get_config_value("testcase.revise_rounds", 0)
140 |         max_tokens = get_config_value("testcase.max_tokens", 16384)
141 |         api_key = get_config_value("testcase.api_key", OPENAI_API_KEY)
142 |     
143 |     @staticmethod
144 |     def get(path: str, default: Any = None) -> Any:
145 |         """Get any config value by path"""
146 |         return get_config_value(path, default)
147 |     
148 |     @staticmethod
149 |     def validate() -> List[str]:
150 |         """Validate configuration"""
151 |         return validate_config()
152 | 
153 | def print_config():
154 |     """Print current configuration"""
155 |     cfg = get_config()
156 |     if cfg is not None:
157 |         print(OmegaConf.to_yaml(cfg))
158 |     else:
159 |         print("Failed to load configuration")
160 | 
161 | if __name__ == "__main__":
162 |     import argparse
163 | 
164 |     parser = argparse.ArgumentParser(description="SWE-Dev Configuration Tool")
165 |     parser.add_argument('--print', action='store_true', help='Print current configuration')
166 |     parser.add_argument('--validate', action='store_true', help='Validate configuration')
167 |     
168 |     args = parser.parse_args()
169 |     
170 |     if args.print:
171 |         print_config()
172 |     elif args.validate:
173 |         errors = validate_config()
174 |         if errors:
175 |             for error in errors:
176 |                 print(f"Error: {error}")
177 |             sys.exit(1)
178 |         else:
179 |             print("Configuration is valid")
180 |     else:
181 |         print_config()


--------------------------------------------------------------------------------
/swedev/utils/formatter.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import ast
  3 | import json
  4 | import os
  5 | import random
  6 | import time
  7 | import jsonlines
  8 | 
  9 | def extract_content_from_patch(patch: str) -> str:
 10 |     file_content = []
 11 |     is_content = False
 12 | 
 13 |     for line in patch.splitlines():
 14 |         if line.startswith('+++'):
 15 |             is_content = True
 16 |             continue  
 17 | 
 18 |         if is_content:
 19 |             if line.startswith('+') and not line.startswith('+++'):
 20 |                 file_content.append(line[1:]) 
 21 |             elif line.startswith('-'): 
 22 |                 continue
 23 |             elif line.startswith('@@'): 
 24 |                 continue
 25 | 
 26 |     return "\n".join(file_content)
 27 | 
 28 | def extract_test_functions(source_code: str) -> list[str]:
 29 |     try:
 30 |         tree = ast.parse(source_code)
 31 |         test_functions = []
 32 |         class_name = None
 33 |         for node in ast.walk(tree):
 34 |             if isinstance(node, ast.ClassDef):
 35 |                 class_name = node.name            
 36 |             if isinstance(node, ast.FunctionDef):
 37 |                 if node.name.startswith('test_'):
 38 |                     if class_name:
 39 |                         test_functions.append(f"{node.name} ({class_name})")
 40 |                     else:
 41 |                         test_functions.append(node.name)
 42 |         return test_functions    
 43 |     except SyntaxError:
 44 |         return []
 45 | 
 46 | def parse_testcase(source_code: str, file_path: str) -> str:
 47 |     if not source_code.endswith("\n"):
 48 |         source_code += "\n\n"
 49 |     elif not source_code.endswith("\n\n"):
 50 |         source_code += "\n"
 51 |     
 52 |     patch = [
 53 |         f"diff --git a/{file_path} b/{file_path}",
 54 |         "new file mode 100644",
 55 |         "index 0000000..0000000",
 56 |         f"--- /dev/null",
 57 |         f"+++ b/{file_path}",
 58 |         "@@ -0,0 +1,{} @@".format(len(source_code.splitlines()))
 59 |     ]
 60 |     lines = source_code.splitlines()
 61 |     formatted_lines = [f"+{line.rstrip()}" for line in lines]
 62 |     patch.extend(formatted_lines)
 63 |     return "\n".join(patch)
 64 | 
 65 | def parse_testcase_with_functions(source_code: str, file_path: str) -> tuple[str, list[str]]:
 66 |     patch = parse_testcase(source_code, file_path)
 67 |     return patch 
 68 | 
 69 | if __name__ == "__main__":
 70 |     parser = argparse.ArgumentParser(description='Converts json swebench format')
 71 |     parser.add_argument('--dataset', type=str, help='Input files (comma-separated)')
 72 |     # swe keys: [repo, instance_id, base_commit, patch, test_patch, problem_statement, 
 73 |     #           hints_text, created_at, version, FAIL_TO_PASS, PASS_TO_PASS, environment_setup_commit]
 74 |     parser.add_argument('--output_folder', type=str, default="results", help='Output folder')
 75 |     parser.add_argument('--output_name', type=str, default="swedev-gen.jsonl", help='Output file')
 76 |     parser.add_argument('--dataset_type', type=str, default='default')
 77 |     args = parser.parse_args()
 78 | 
 79 |     # Create output file path
 80 |     output_file = os.path.join(args.output_folder, args.output_name)
 81 |     dataset_files = [file.strip() for file in args.dataset.split(',')]
 82 | 
 83 |     all_data = []
 84 |     for file in dataset_files:
 85 |         if file.endswith(".json"):
 86 |             with open(file, 'r') as f:
 87 |                 all_data.extend(json.load(f))
 88 |         elif file.endswith(".jsonl"):
 89 |             with jsonlines.open(file, 'r') as reader:
 90 |                 all_data.extend(list(reader))
 91 |         else:
 92 |             raise ValueError(f"Unsupported file format: {file}")
 93 | 
 94 |     results = []
 95 | 
 96 |     if args.dataset_type == 'default':
 97 |         for data in all_data:
 98 |             repo = data["repo"]
 99 |             instance_id = data["instance_id"]
100 |             base_commit = data["base_commit"]
101 |             patch = data["patch"]
102 |             problem_statement = data["problem_statement"]
103 |             hints_text = data["hints_text"] if "hints_text" in data else ""
104 |             created_at = data["created_at"] if "created_at" in data else "2025-01-23T23:59:59"
105 |             version = "0.0"
106 |             FAIL_TO_PASS = []
107 |             PASS_TO_PASS = []
108 |             patches = []
109 |             
110 |             for i, test in enumerate(data['test']):
111 |                 try:
112 |                     test_file = f'swedev_test_{i}.py'
113 |                     if test['content']:
114 |                         patches.append(parse_testcase(test['content'], test_file))
115 |                     for function in test['FAIL_TO_PASS']:
116 |                         # if function['status'] == 'normal':
117 |                         FAIL_TO_PASS.append(function['name'].replace("swedev_test.py", test_file))
118 |                 except Exception as e:
119 |                     print(e)
120 |             
121 |             if not FAIL_TO_PASS:
122 |                 continue
123 |             
124 |             assert FAIL_TO_PASS and patches, "Empty FAIL_TO_PASS or patches"
125 |             
126 |             results.append({
127 |                 "repo": repo,
128 |                 "instance_id": instance_id,
129 |                 "base_commit": base_commit,
130 |                 "patch": patch,
131 |                 "test_patch": "\n".join(patches) + "\n",
132 |                 "problem_statement": problem_statement,
133 |                 "hints_text": hints_text,
134 |                 "created_at": created_at,
135 |                 "version": version,
136 |                 "FAIL_TO_PASS": FAIL_TO_PASS,
137 |                 "PASS_TO_PASS": PASS_TO_PASS,
138 |                 "environment_setup_commit": "",
139 |                 "description": ""
140 |             })
141 | 
142 |         random.shuffle(results)
143 |         results = list({item['instance_id']: item for item in results}.values())
144 | 
145 |     elif args.dataset_type == 'openhands':
146 |         for data in all_data:
147 |             result = data['instance']
148 |             result['solution'] = data['test_result']['git_patch']
149 |             if not "environment_setup_commands" in result.keys():
150 |                 print('Old version data detected')
151 |                 continue
152 |             result['tests'] = [{
153 |                 "content": extract_content_from_patch(result['test_patch']),
154 |                 "env": result['environment_setup_commands'],
155 |                 "id": 0,
156 |             }]
157 |             if not result['description']:
158 |                 result['description'] = ""
159 |             results.append(result)
160 |     else:
161 |         raise NotImplementedError
162 | 
163 |     os.makedirs(args.output_folder, exist_ok=True)
164 |     print(f'Total: {len(results)} pieces.')
165 |     print(f'Output file is: {output_file}')
166 |     with jsonlines.open(output_file, 'w') as writer:
167 |         writer.write_all(results)


--------------------------------------------------------------------------------
/swedev/issues/fetch_metadata.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import json
  3 | import os
  4 | import random
  5 | import time
  6 | from concurrent.futures import ThreadPoolExecutor, as_completed
  7 | from itertools import islice
  8 | from math import ceil
  9 | 
 10 | import requests
 11 | import aiohttp
 12 | import asyncio
 13 | import pandas as pd
 14 | from tqdm import tqdm
 15 | from swedev.config import Config
 16 | 
 17 | # Use GitHub tokens from config
 18 | if not Config.github_tokens:
 19 |     raise ValueError("GitHub tokens not configured. Please configure github_tokens in your config file or set the GITHUB_TOKENS environment variable.")
 20 | 
 21 | def get_token():
 22 |     """Randomly select a GitHub token from the list."""
 23 |     return random.choice(Config.github_tokens)
 24 | 
 25 | def fetch_github_data(url, headers, max_retries=12):
 26 |     """
 27 |     Fetch stars and pull request counts from a single GitHub repository URL.
 28 | 
 29 |     :param url: GitHub repository URL.
 30 |     :param headers: Headers for the GitHub API request.
 31 |     :param max_retries: Maximum number of retries for failed requests.
 32 |     :return: A dictionary with the repository URL, stars, and pull request count, or None if an error occurs.
 33 |     """
 34 |     retries = 0
 35 |     while retries <= max_retries:
 36 |         try:
 37 |             # Extract owner and repo name from the GitHub URL
 38 |             parts = url.rstrip("/").split("/")
 39 |             if len(parts) < 5 or parts[2] != "github.com":
 40 |                 print(f"Invalid GitHub URL skipped: {url}")
 41 |                 return None
 42 |             owner, repo = parts[-2], parts[-1]
 43 | 
 44 |             # Base API URL for the repository
 45 |             base_api_url = f"https://api.github.com/repos/{owner}/{repo}"
 46 | 
 47 |             # Fetch repository details (stars)
 48 |             headers["Authorization"] = f"Bearer {get_token()}"
 49 |             repo_response = requests.get(base_api_url, headers=headers)
 50 |             if repo_response.status_code != 200:
 51 |                 print(
 52 |                     f"Failed to fetch repo data for {url}: {repo_response.status_code}, {repo_response.text}")
 53 |                 raise requests.exceptions.RequestException("Repo request failed")
 54 |             repo_data = repo_response.json()
 55 |             stars = repo_data.get("stargazers_count", 0)
 56 | 
 57 |             pulls_url = f"{base_api_url}/pulls"
 58 |             headers["Authorization"] = f"Bearer {get_token()}"
 59 |             pulls_response = requests.get(
 60 |                 pulls_url, headers=headers, params={"state": "all", "per_page": 1})
 61 |             if pulls_response.status_code != 200:
 62 |                 print(
 63 |                     f"Failed to fetch pull request data for {url}: {pulls_response.status_code}")
 64 |                 raise requests.exceptions.RequestException("Pull request failed")
 65 |             pulls_count = 0
 66 |             if "Link" in pulls_response.headers:
 67 |                 links = _parse_link_header(pulls_response.headers["Link"])
 68 |                 if "last" in links:
 69 |                     last_url = links["last"]
 70 |                     pulls_count = int(last_url.split("page=")[-1])
 71 |             else:
 72 |                 pulls_count = len(pulls_response.json())
 73 |             return {"github": url, "stars": stars, "pulls": pulls_count}
 74 | 
 75 |         except Exception as e:
 76 |             if repo_response.status_code == 404:
 77 |                 return None
 78 |             
 79 |             retries += 1
 80 |             if retries > max_retries:
 81 |                 print(f"Error processing {url} after {max_retries} retries: {e}")
 82 |                 return None
 83 |             wait_time = 2 ** retries
 84 |             print(f"Retrying {url} in {wait_time} seconds... (Attempt {retries})")
 85 |             time.sleep(wait_time)
 86 | 
 87 | 
 88 | def _parse_link_header(link_header):
 89 |     """
 90 |     Parse the GitHub API Link header for pagination URLs.
 91 | 
 92 |     :param link_header: The Link header string.
 93 |     :return: A dictionary with keys like 'next', 'prev', 'last', etc.
 94 |     """
 95 |     links = {}
 96 |     for part in link_header.split(","):
 97 |         section = part.split(";")
 98 |         if len(section) < 2:
 99 |             continue
100 |         url = section[0].strip().strip("<>")
101 |         rel = section[1].strip().split("=")[1].strip('"')
102 |         links[rel] = url
103 |     return links
104 | 
105 | 
106 | def load_processed_urls(output_file, not_found_file="not_found_repos.txt"):
107 |     """
108 |     Load already processed URLs from the output file and the not_found_repos.txt file to avoid duplicate processing.
109 | 
110 |     :param output_file: Path to the output .jsonl file.
111 |     :param not_found_file: Path to the not_found_repos.txt file.
112 |     :return: A set of already processed URLs.
113 |     """
114 |     processed_urls = set()
115 | 
116 |     # Load URLs from the output file
117 |     if os.path.exists(output_file):
118 |         with open(output_file, "r") as f:
119 |             processed_urls.update(json.loads(line)["github"] for line in f)
120 | 
121 |     # Load URLs from the not_found_repos.txt file
122 |     if os.path.exists(not_found_file):
123 |         with open(not_found_file, "r") as f:
124 |             processed_urls.update(line.strip() for line in f if line.strip())
125 | 
126 |     return processed_urls
127 | 
128 | 
129 | def process_urls(input_file, output_file, max_workers):
130 |     """
131 |     Process all GitHub URLs from the input file and save results to the output file.
132 | 
133 |     :param input_file: Path to the input .txt file containing GitHub URLs.
134 |     :param output_file: Path to the output .jsonl file for saving results.
135 |     :param max_workers: Number of threads for concurrent processing.
136 |     """
137 |     with open(input_file, "r") as f:
138 |         urls = [line.strip() for line in f if line.strip()]
139 | 
140 |     processed_urls = load_processed_urls(output_file, "not_found_repos.txt")
141 |     urls_to_process = [url for url in urls if url not in processed_urls]
142 | 
143 |     headers = {
144 |         "Accept": "application/vnd.github+json",
145 |         "Authorization": f"Bearer {get_token()}"
146 |     }
147 | 
148 |     results = []
149 |     with ThreadPoolExecutor(max_workers=max_workers) as executor, tqdm(total=len(urls_to_process)) as progress:
150 |         future_to_url = {executor.submit(fetch_github_data, url, headers, 5): url for url in urls_to_process}
151 | 
152 |         for future in as_completed(future_to_url):
153 |             url = future_to_url[future]
154 |             try:
155 |                 data = future.result()
156 |                 if data:
157 |                     with open(output_file, "a") as f:
158 |                         f.write(json.dumps(data) + "\n")
159 |                     results.append(data)
160 |                     progress.update(1)
161 |                     print(f"Processed: {url} -> Stars: {data['stars']}, Pulls: {data['pulls']}")
162 |             except Exception as e:
163 |                 print(f"Error processing {url}: {e}")
164 | 
165 | def main():
166 |     """
167 |     Main function to parse arguments and execute the script.
168 |     """
169 |     parser = argparse.ArgumentParser(description="Fetch stars and pull request counts for GitHub repositories.")
170 |     parser.add_argument("--input_file", type=str, required=True, help="Path to the input .txt file containing GitHub URLs.")
171 |     parser.add_argument("--output_file", type=str, required=True, help="Path to the output .jsonl file for saving results.")
172 |     parser.add_argument("--workers", type=int, default=10, help="Number of threads for concurrent processing.")
173 |     args = parser.parse_args()
174 | 
175 |     process_urls(args.input_file, args.output_file, args.workers)
176 | 
177 | 
178 | if __name__ == "__main__":
179 |     main()


--------------------------------------------------------------------------------
/swedev.egg-info/PKG-INFO:
--------------------------------------------------------------------------------
  1 | Metadata-Version: 2.2
  2 | Name: swedev
  3 | Version: 0.1.0
  4 | Summary: Software Engineering Agents with Training and Inference Scaling
  5 | Home-page: https://github.com/UbeCc/SWE-Dev
  6 | Author: Haoran Wang
  7 | Author-email: ubecwang@gmail.com
  8 | Classifier: Programming Language :: Python :: 3
  9 | Classifier: License :: OSI Approved :: MIT License
 10 | Classifier: Operating System :: OS Independent
 11 | Requires-Python: >=3.6
 12 | Description-Content-Type: text/markdown
 13 | License-File: LICENSE
 14 | Requires-Dist: transformers
 15 | Requires-Dist: tree-sitter==0.21.3
 16 | Requires-Dist: tree-sitter-languages==1.10.2
 17 | Requires-Dist: grep-ast==0.3.2
 18 | Requires-Dist: networkx==3.2.1
 19 | Requires-Dist: pygments==2.18.0
 20 | Requires-Dist: tqdm
 21 | Requires-Dist: datasets
 22 | Requires-Dist: openai==1.42.0
 23 | Requires-Dist: tiktoken==0.7.0
 24 | Requires-Dist: libcst==1.4.0
 25 | Requires-Dist: jsonlines
 26 | Requires-Dist: tenacity
 27 | Requires-Dist: ghapi
 28 | Requires-Dist: bs4
 29 | Requires-Dist: swebench
 30 | Requires-Dist: selenium
 31 | Requires-Dist: diskcache
 32 | Requires-Dist: pytest-json-report
 33 | Requires-Dist: libcst
 34 | Requires-Dist: hydra-core>=1.3.2
 35 | Requires-Dist: omegaconf>=2.3.0
 36 | Dynamic: author
 37 | Dynamic: author-email
 38 | Dynamic: classifier
 39 | Dynamic: description
 40 | Dynamic: description-content-type
 41 | Dynamic: home-page
 42 | Dynamic: requires-dist
 43 | Dynamic: requires-python
 44 | Dynamic: summary
 45 | 
 46 | # 🚀 SWE-Dev: Building Software Engineering Agents with Training and Inference Scaling
 47 | 
 48 | 📝 [Blog](https://www.notion.so/ubecwang/1bc32cf963e080b2a01df2895f66021f?v=1bc32cf963e0810ca07e000c86c4c1e1) | 🤗 [Huggingface](https://huggingface.co/THUDM/SWE-Dev-32B) | 💻[Github](https://github.com/UbeCc/SWE-Dev)
 49 | 
 50 | This repository is a comprehensive pipeline for creating developer-oriented datasets from GitHub repositories, including issue tracking, test case generation, and evaluation.
 51 | 
 52 | ## 🔄 Pipeline Overview
 53 | 
 54 | ### Step 0: 🛠️ Configuration Setup
 55 | 
 56 | #### Configuration File
 57 | 
 58 | The main configuration file is located at `conf/config/default.yaml` and contains settings for all pipeline stages:
 59 | 
 60 | #### Validating Configuration
 61 | 
 62 | To validate your configuration:
 63 | 
 64 | ```bash
 65 | python -m swedev.config --validate
 66 | ```
 67 | 
 68 | #### Viewing Configuration
 69 | 
 70 | To view the current configuration:
 71 | 
 72 | ```bash
 73 | python -m swedev.config --print
 74 | ```
 75 | 
 76 | #### Overriding Configuration in Command Line
 77 | 
 78 | You can override any configuration value when running scripts:
 79 | 
 80 | ```bash
 81 | python your_script.py paths.local_repo_dir=/new/path github.tokens=[token1,token2]
 82 | ```
 83 | 
 84 | #### Using Configuration in Code
 85 | 
 86 | ```python
 87 | from swedev.config import Config
 88 | 
 89 | # Access basic configuration
 90 | conda_base = Config.conda_base
 91 | github_tokens = Config.github_tokens
 92 | 
 93 | # Access stage-specific settings
 94 | localizer_model = Config.Localizer.model
 95 | description_model = Config.Description.model
 96 | testcase_model = Config.Testcase.model
 97 | revise_rounds = Config.Testcase.revise_rounds
 98 | ```
 99 | 
100 | #### Environment Variables Fallbacks
101 | 
102 | ### Step 1: 📊 Data Collection from GitHub
103 | 
104 | Set up your configuration in `conf/config/default.yaml` with GitHub tokens and repository directories before running these commands.
105 | 
106 | #### Option 1: Collect Top PyPI Repositories
107 | 
108 | > You need to install chrome driver first. In ubuntu, you can install simply by `apt install chromium-chromedriver`
109 | 
110 | ```bash
111 | python -m swedev.crawl.get_top_pypi \
112 |     --max_repos 100 \
113 |     --output_folder results/packages \
114 |     --num_workers 8 \
115 |     --start_at 0
116 | ```
117 | 
118 | #### Option 2: Fetch All PyPI Repositories
119 | 
120 | ```bash
121 | python -m swedev.crawl.pypi_crawler \
122 |     --output results/packages/github_urls.jsonl \
123 |     --workers 16
124 | ```
125 | 
126 | > ⚠️ Note: Keep concurrency lower to respect GitHub rate limits
127 | 
128 | #### Process the repositories
129 | ```bash
130 | python -m swedev.issues.get_tasks_pipeline \
131 |     --repo_file results/packages/pypi_rankings.jsonl \
132 |     --output_folder results/issues \
133 |     --cutoff_date 20210101 \
134 |     --num_workers 64 \
135 |     --max_pulls 1000
136 | ```
137 | 
138 | If you enable `--do_clone`, the script will clone repositories to the directory specified by `local_repo_dir` in your configuration.
139 | 
140 | > If you encounter persistent `404 - Error` messages, manually terminate and combine results
141 | 
142 | ```bash
143 | python -m swedev.issues.get_tasks_pipeline \
144 |     --repo_file results/issues/packages/pypi_rankings.jsonl \
145 |     --output_folder results/issues \
146 |     --combine_results
147 | ```
148 | 
149 | ### Step 2: 📝 Generate Test Cases
150 | 
151 | For parallel environments, create a base environment first to avoid Conda concurrent installation issues:
152 | ```bash
153 | conda create -n swedevbase python=3.11 -y
154 | conda create -n {env_name} --clone swedevbase # For later usage
155 | ```
156 | 
157 | Before the generation pipeline, you should config your api info at `conf/config.yaml`
158 | 
159 | First, generate descriptions:
160 | ```bash
161 | python -m swedev.testcases.get_descriptions \
162 |     --dataset_file results/issues/all_tasks.jsonl \
163 |     --output_folder results/descriptions \
164 |     --num_workers 16
165 | ```
166 | 
167 | Then generate test cases:
168 | ```bash
169 | python -m swedev.testcases.get_testcases \
170 |     --dataset_file results/descriptions/output.jsonl \
171 |     --top_n 5 \
172 |     --output_folder results/testcases/ \
173 |     --num_workers 4
174 | ```
175 | 
176 | ### Step 3: 🧪 Evaluate Test Cases
177 | 
178 | #### Docker Method
179 | 
180 | We provide a Dockerfile based on Ubuntu 22.04 that installs all necessary dependencies for evaluation. The image includes comprehensive development tools. If you encounter errors, you can manually install the dependencies in `Dockerfile` and then use `docker commit` to save your image.
181 | 
182 | First, build the Docker image:
183 | ```bash
184 | # Build the Docker image from the provided Dockerfile
185 | docker build -t swedev-evaluator:latest .
186 | ```
187 | 
188 | Run the evaluation container:
189 | ```bash
190 | docker run -d --network host \
191 |   -v /raid:/raid \
192 |   -w /raid/SWE-Dev \
193 |   --restart always \
194 |   swedev-evaluator:latest \
195 |   /raid/swedev/miniforge3/envs/swedev/bin/python -m swedev.testcases.eval_testcases \
196 |   --dataset /raid/SWE-Dev/results/testcases-0218/output.jsonl \
197 |   --output_folder /raid/SWE-Dev/results/evaluation-0218 \
198 |   --num_workers 48
199 | ```
200 | 
201 | You should use **absolute path** when mounting directories
202 | 
203 | #### Non-Docker Method
204 | 
205 | ```bash
206 | python -m swedev.testcases.eval_testcases \
207 |     --dataset results/testcases-0218/output.jsonl \
208 |     --output_folder results/evaluation-0218 \
209 |     --num_workers 32
210 | ```
211 | 
212 | ### Step 4: 📈 View Evaluation Results
213 | 
214 | ```bash
215 | python -m swedev.testcases.eval_testcases \
216 |     --dataset results/evaluation-0218/evaluated_testcases \
217 |     --show_report
218 | ```
219 | 
220 | ### Step 5: 📦 Create Final Dataset
221 | 
222 | ```bash
223 | python swebench.utils.formatter \
224 |     --dataset results/trajectory/qwen-45round-v0227.jsonl \
225 |     --output_folder results/swedata \
226 |     --output_name swe-qwen-45round-v0227.jsonl \
227 |     --dataset_type openhands
228 | ```
229 | 
230 | ## 🙏 Acknowledgements
231 | 
232 | We thank the following open-source projects for their contributions:
233 | 
234 | - [**SWE-bench**](https://github.com/SWE-bench/SWE-bench)
235 | 
236 | - [**Agentless**](https://github.com/OpenAutoCoder/Agentless)
237 | 
238 | - [**OpenHands**](https://github.com/All-Hands-AI/OpenHands)
239 | 
240 | - [**Nebius**](https://nebius.com/blog/posts/scaling-data-collection-for-training-swe-agents)
241 | 


--------------------------------------------------------------------------------
/swedev/issues/build_dataset.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import json
  3 | import logging
  4 | import os
  5 | import re
  6 | from typing import Optional
  7 | import random
  8 | 
  9 | from logzero import logger
 10 | from swedev.config import Config
 11 | from swebench.collect.utils import (Repo, extract_patches, extract_problem_statement_and_hints)
 12 | 
 13 | logging.basicConfig(
 14 |     level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
 15 | )
 16 | logger = logging.getLogger(__name__)
 17 | 
 18 | 
 19 | def create_instance(repo: Repo, pull: dict) -> dict:
 20 |     """
 21 |     Create a single task instance from a pull request, where task instance is:
 22 | 
 23 |     {
 24 |         repo (str): owner/repo this task instance is from,
 25 |         pull_number (int): number of PR this task instance is from,
 26 |         base_commit (str): SHA of the base commit PR is based on,
 27 |         patch (str): reference solution as .patch (apply to base commit),
 28 |         test_patch (str): test suite as .patch (apply to base commit),
 29 |     }
 30 |     """
 31 |     patch, test_patch = extract_patches(pull, repo)
 32 |     problem_statement, hints = extract_problem_statement_and_hints(pull, repo)
 33 |     logger.critical(f"Extracted patches and problem statement for PR {pull['number']} in {repo.repo.full_name}")
 34 |     logger.critical(f"Patch: {patch[:100]}")
 35 |     logger.critical(f"Test patch: {test_patch[:100]}")
 36 |     return {
 37 |         "repo": repo.repo.full_name,
 38 |         "pull_number": pull["number"],
 39 |         "instance_id": (repo.repo.full_name + "-" + str(pull["number"])).replace(
 40 |             "/", "__"
 41 |         ),
 42 |         "issue_numbers": pull["resolved_issues"],
 43 |         "base_commit": pull["base"]["sha"],
 44 |         "patch": patch,
 45 |         "test_patch": test_patch,
 46 |         "problem_statement": problem_statement,
 47 |         "hints_text": hints,
 48 |         "created_at": pull["created_at"],
 49 |     }
 50 | 
 51 | 
 52 | def is_valid_pull(pull: dict) -> bool:
 53 |     """
 54 |     Check whether PR has an associated issue and is merged
 55 | 
 56 |     Args:
 57 |         pull (dict): pull request object
 58 |     Returns:
 59 |         bool: whether PR is valid
 60 |     """
 61 |     if pull["merged_at"] is None:
 62 |         return False
 63 |     if "resolved_issues" not in pull or len(pull["resolved_issues"]) < 1:
 64 |         return False
 65 |     return True
 66 | 
 67 | 
 68 | def is_valid_instance(instance: dict) -> bool:
 69 |     """
 70 |     Check whether task instance has all required fields for task instance creation
 71 | 
 72 |     Args:
 73 |         instance (dict): task instance object
 74 |     Returns:
 75 |         bool: whether task instance is valid
 76 |     """
 77 |     if instance["patch"] is None or instance["patch"] == "":
 78 |         return False
 79 |     if len(instance["patch"]) > 8192:
 80 |         return False
 81 |     if instance["problem_statement"] is None or instance["problem_statement"] == "":
 82 |         return False
 83 |     return True
 84 | 
 85 | 
 86 | def has_test_patch(instance: dict) -> bool:
 87 |     """
 88 |     Check whether task instance has a test suite
 89 | 
 90 |     Args:
 91 |         instance (dict): task instance object
 92 |     Returns:
 93 |         bool: whether task instance has a test suite
 94 |     """
 95 |     if instance["test_patch"] is None or instance["test_patch"].strip() == "":
 96 |         return False
 97 |     return True
 98 | 
 99 | 
100 | def main(pr_file: str, output: str, token: Optional[str] = None):
101 |     """
102 |     Process Pull Request file and extract instances, writing them to output file.
103 | 
104 |     Args:
105 |         pr_file (str): path to pull request JSONL file
106 |         output (str): output file name
107 |         token (str): GitHub token
108 |     """
109 |     if token is None:
110 |         # Get GitHub token from configuration
111 |         if Config.github_tokens:
112 |             tokens = [t.strip() for t in Config.github_tokens.split(",")]
113 |             token = random.choice(tokens)
114 |         else:
115 |             raise ValueError("GitHub tokens not configured. Please configure github_tokens in your config file or set the GITHUB_TOKENS environment variable.")
116 | 
117 |     def load_repo(repo_name):
118 |         # Return repo object for a given repo name
119 |         owner, repo = repo_name.split("/")
120 |         return Repo(owner, repo, token=token)
121 | 
122 |     repos = dict()
123 |     completed = 0
124 |     with_tests = 0
125 |     total_instances = 0
126 |     all_output = output + ".all"
127 |     seen_prs = set()
128 | 
129 |     # Continue where we left off if output file already exists
130 |     if os.path.exists(all_output):
131 |         with open(all_output) as f:
132 |             for line in f:
133 |                 pr = json.loads(line)
134 |                 if "instance_id" not in pr:
135 |                     pr["instance_id"] = (
136 |                         pr["repo"] + "-" + str(pr["pull_number"])
137 |                     ).replace("/", "__")
138 |                 instance_id = pr["instance_id"]
139 |                 seen_prs.add(instance_id)
140 |                 if is_valid_instance(pr):
141 |                     completed += 1
142 |                     if has_test_patch(pr):
143 |                         with_tests += 1
144 |     logger.info(f"Will skip {len(seen_prs)} pull requests that have already been inspected")
145 | 
146 |     # Write to .all file for all PRs
147 |     write_mode_all = "w" if not os.path.exists(all_output) else "a"
148 |     with open(all_output, write_mode_all) as all_output:
149 |         # Write to output file for PRs with test suites
150 |         write_mode = "w" if not os.path.exists(output) else "a"
151 |         with open(output, write_mode) as output:
152 |             for ix, line in enumerate(open(pr_file)):
153 |                 total_instances += 1
154 |                 pull = json.loads(line)
155 |                 if ix % 100 == 0:
156 |                     logger.info(
157 |                         f"[{pull['base']['repo']['full_name']}] (Up to {ix} checked) "
158 |                         f"{completed} valid, {with_tests} with tests."
159 |                     )
160 |                 # Construct instance fields
161 |                 instance_id = (
162 |                     pull["base"]["repo"]["full_name"] + "-" + str(pull["number"])
163 |                 )
164 |                 instance_id = instance_id.replace("/", "__")
165 |                 if instance_id in seen_prs:
166 |                     seen_prs -= {instance_id}
167 |                     continue
168 |                 if not is_valid_pull(pull):
169 |                     # Throw out invalid PRs
170 |                     print(f"Skipping invalid PR {pull['number']}")
171 |                     continue
172 |                 # Create task instance
173 |                 repo_name = pull["base"]["repo"]["full_name"]
174 |                 if repo_name not in repos:
175 |                     repos[repo_name] = load_repo(repo_name)
176 |                 repo = repos[repo_name]
177 |                 instance = create_instance(repo, pull)
178 |                 if is_valid_instance(instance):
179 |                     # If valid, write to .all output file
180 |                     print(
181 |                         json.dumps(instance), end="\n", flush=True, file=all_output
182 |                     )  # write all instances to a separate file
183 |                     completed += 1
184 |                     if has_test_patch(instance):
185 |                         # If has test suite, write to output file
186 |                         print(json.dumps(instance), end="\n", flush=True, file=output)
187 |                         with_tests += 1
188 |     logger.info(f"[{', '.join(repos.keys())}] Total instances: {total_instances}, completed: {completed}, with tests: {with_tests}")
189 |     logger.info(f"[{', '.join(repos.keys())}] Skipped {len(seen_prs)} pull requests that have already been inspected")
190 | 
191 | 
192 | if __name__ == "__main__":
193 |     parser = argparse.ArgumentParser()
194 |     parser.add_argument("--pr_file", type=str, help="Path to pull request JSONL file")
195 |     parser.add_argument("--output", type=str, help="Output file name")
196 |     parser.add_argument("--token", type=str, default='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', help="GitHub token")
197 |     args = parser.parse_args()
198 |     main(**vars(args))
199 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # 🚀 SWE-Dev
  2 | 
  3 | > SWE-Dev: Building Software Engineering Agents with Training and Inference Scaling [ACL'25 Findings]
  4 | > 
  5 | > Haoran Wang*, Zhenyu Hou*, Yao Wei, Jie Tang, Yuxiao Dong
  6 | 
  7 | 📝 [Paper](https://arxiv.org/abs/2506.07636) | 🤗 [HF(Model)](https://huggingface.co/THUDM/SWE-Dev-32B) | 🤗 [HF(Data)](https://huggingface.co/datasets/THUDM/SWE-Dev-train)
  8 | 
  9 | ## 💡 Introduction
 10 | 
 11 | LLMs have advanced from *conversational problem solving* to *real-world tasks* such as software engineering (SWE). However, building effective SWE agents remains challenging due to the lack of high-quality training data and reliable test-time evaluation. 
 12 | 
 13 | To address this issue, we present **SWE-Dev**, an SWE agent with a focus on training and inference scaling.
 14 | 
 15 | - **For training scaling**, we develop a robust pipeline to synthesize test cases and scale up agent trajectories to construct the training data.
 16 | - **For inference scaling**, we increase the interaction budget within a single run to enable further thinking within one independent attempt.
 17 | 
 18 | Experiments on the SWE-bench-Verified benchmark show that the SWE-Dev models can achieve top performance among all open SWE agents. 
 19 | Specifically, the resolve rate of our 7B and 32B models reach 23.4\% and 36.6\%, respectively, outperforming state-of-the-art open-source models.
 20 | 
 21 | <div align="center"><img src="assets/performance.png" alt="Performance of SWE-Dev-7B and SWE-Dev-32B with Training and Inference Scaling." height=350px></div>
 22 | 
 23 | <div align="left"><sup> <strong>Model performance with training and inference scaling.</strong> SWE-Dev demonstrated a 21.8% and 30% performance improvement on the 7B and 32B models, respectively, through scaling in both training and inference. Notably, SWE-Dev-32B achieved a performance of 34.0%, comparable to GPT-4o, even without the benefits of inference scaling. </sup><br><br></div>
 24 | 
 25 | <div align="center"><img src="assets/table.png" alt="Performance of SWE-Dev-7B and SWE-Dev-32B with Training and Inference Scaling." height=450px></div>
 26 | 
 27 | <div align="left"><sup> <strong>Comparison of resolve rates on the SWE-bench-Verified dataset.</strong> The table categorizes models into baselines and SWE agents, showcasing their performance. SWE-Dev models attain top-tier results within the realm of open-source models and concurrently exhibit robust performance among closed-source models. The relative improvement ($\uparrow$) for our models is calculated with respect to their respective base models. </sup><br><br></div>
 28 | 
 29 | ## 🔄 Pipeline Overview
 30 | 
 31 | <div align="center"><img src="assets/pipeline.png" alt="Performance of SWE-Dev-7B and SWE-Dev-32B with Training and Inference Scaling." height=350px></div>
 32 | 
 33 | <div align="left"><sup> <strong>Pipeline for test case generation</strong>: The pipeline is divided into description generation and code generation phases. It begins with extracting repository information, followed by generating Gherkin scenarios and then detailed test cases. An optional revision step leverages traceback errors to refine the generated test cases. The final output includes fail-to-pass test cases. </sup></div>
 34 | 
 35 | ### Step 0: 🛠️ Configuration Setup
 36 | 
 37 | #### Configuration File
 38 | 
 39 | The main configuration file is located at `conf/config/default.yaml` and contains settings for all pipeline stages:
 40 | 
 41 | #### Validating Configuration
 42 | 
 43 | To validate your configuration:
 44 | 
 45 | ```bash
 46 | python -m swedev.config --validate
 47 | ```
 48 | 
 49 | #### Viewing Configuration
 50 | 
 51 | To view the current configuration:
 52 | 
 53 | ```bash
 54 | python -m swedev.config --print
 55 | ```
 56 | 
 57 | #### Overriding Configuration in Command Line
 58 | 
 59 | You can override any configuration value when running scripts:
 60 | 
 61 | ```bash
 62 | python your_script.py paths.local_repo_dir=/new/path github.tokens=[token1,token2]
 63 | ```
 64 | 
 65 | #### Using Configuration in Code
 66 | 
 67 | ```python
 68 | from swedev.config import Config
 69 | 
 70 | # Access basic configuration
 71 | conda_base = Config.conda_base
 72 | github_tokens = Config.github_tokens
 73 | 
 74 | # Access stage-specific settings
 75 | localizer_model = Config.Localizer.model
 76 | description_model = Config.Description.model
 77 | testcase_model = Config.Testcase.model
 78 | revise_rounds = Config.Testcase.revise_rounds
 79 | ```
 80 | 
 81 | #### Environment Variables Fallbacks
 82 | 
 83 | ### Step 1: 📊 Data Collection from GitHub
 84 | 
 85 | Set up your configuration in `conf/config/default.yaml` with GitHub tokens and repository directories before running these commands.
 86 | 
 87 | #### Option 1: Collect Top PyPI Repositories
 88 | 
 89 | > You need to install chrome driver first. In ubuntu, you can install simply by `apt install chromium-chromedriver`
 90 | 
 91 | ```bash
 92 | python -m swedev.crawl.get_top_pypi \
 93 |     --max_repos 100 \
 94 |     --output_folder results/packages \
 95 |     --num_workers 8 \
 96 |     --start_at 0
 97 | ```
 98 | 
 99 | #### Option 2: Fetch All PyPI Repositories
100 | 
101 | ```bash
102 | python -m swedev.crawl.pypi_crawler \
103 |     --output results/packages/github_urls.jsonl \
104 |     --workers 16
105 | ```
106 | 
107 | > ⚠️ Note: Keep concurrency lower to respect GitHub rate limits
108 | 
109 | #### Process the repositories
110 | ```bash
111 | python -m swedev.issues.get_tasks_pipeline \
112 |     --repo_file results/packages/pypi_rankings.jsonl \
113 |     --output_folder results/issues \
114 |     --cutoff_date 20210101 \
115 |     --num_workers 64 \
116 |     --max_pulls 1000
117 | ```
118 | 
119 | If you enable `--do_clone`, the script will clone repositories to the directory specified by `local_repo_dir` in your configuration.
120 | 
121 | > If you encounter persistent `404 - Error` messages, manually terminate and combine results
122 | 
123 | ```bash
124 | python -m swedev.issues.get_tasks_pipeline \
125 |     --repo_file results/issues/packages/pypi_rankings.jsonl \
126 |     --output_folder results/issues \
127 |     --combine_results
128 | ```
129 | 
130 | ### Step 2: 📝 Generate Test Cases
131 | 
132 | For parallel environments, create a base environment first to avoid Conda concurrent installation issues:
133 | ```bash
134 | conda create -n swedevbase python=3.11 -y
135 | conda create -n {env_name} --clone swedevbase # For later usage
136 | ```
137 | 
138 | Before the generation pipeline, you should config your api info at `conf/config.yaml`
139 | 
140 | First, generate descriptions:
141 | ```bash
142 | python -m swedev.testcases.get_descriptions \
143 |     --dataset_file results/issues/all_tasks.jsonl \
144 |     --output_folder results/descriptions \
145 |     --num_workers 16
146 | ```
147 | 
148 | Then generate test cases:
149 | ```bash
150 | python -m swedev.testcases.get_testcases \
151 |     --dataset_file results/descriptions/output_f2p.jsonl \
152 |     --top_n 5 \
153 |     --output_folder results/testcases/ \
154 |     --num_workers 80
155 | ```
156 | 
157 | ### Step 3: 🧪 Evaluate Test Cases
158 | 
159 | #### Docker Method
160 | 
161 | We provide a Dockerfile based on Ubuntu 22.04 that installs all necessary dependencies for evaluation. The image includes comprehensive development tools. If you encounter errors, you can manually install the dependencies in `Dockerfile` and then use `docker commit` to save your image.
162 | 
163 | First, build the Docker image:
164 | ```bash
165 | # Build the Docker image from the provided Dockerfile
166 | docker build -t swedev-evaluator:latest .
167 | ```
168 | 
169 | Run the evaluation container:
170 | ```bash
171 | docker run -d --network host \
172 |   -v /raid:/raid \
173 |   -w /raid/SWE-Dev \
174 |   --restart always \
175 |   swedev-evaluator:latest \
176 |   /raid/SWE-Dev/miniforge3/envs/swedev/bin/python -m swedev.testcases.eval_testcases \
177 |   --dataset /raid/SWE-Dev/results/testcases/output.jsonl \
178 |   --output_folder /raid/SWE-Dev/results/evaluation-0508 \
179 |   --num_workers 80
180 | ```
181 | 
182 | You should use **absolute path** when mounting directories
183 | 
184 | #### Non-Docker Method
185 | 
186 | ```bash
187 | python -m swedev.testcases.eval_testcases \
188 |     --dataset /raid/SWE-Dev/results/testcases/output.jsonl \
189 |     --output_folder results/evaluation-0508\
190 |     --num_workers 32
191 | ```
192 | 
193 | ### Step 4: 📈 View Evaluation Results
194 | 
195 | ```bash
196 | python -m swedev.testcases.eval_testcases \
197 |     --dataset results/evaluation-0218/evaluated_testcases \
198 |     --show_report
199 | ```
200 | 
201 | ### Step 5: 📦 Create Final Dataset
202 | 
203 | ```bash
204 | python swebench.utils.formatter \
205 |     --dataset results/trajectory/qwen-45round-v0227.jsonl \
206 |     --output_folder results/swedata \
207 |     --output_name swe-qwen-45round-v0227.jsonl \
208 |     --dataset_type openhands
209 | ```
210 | 
211 | ## 🙏 Acknowledgements
212 | 
213 | We thank the following open-source projects for their contributions:
214 | 
215 | - [**SWE-bench**](https://github.com/SWE-bench/SWE-bench)
216 | 
217 | - [**Agentless**](https://github.com/OpenAutoCoder/Agentless)
218 | 
219 | - [**OpenHands**](https://github.com/All-Hands-AI/OpenHands)
220 | 
221 | - [**Nebius**](https://nebius.com/blog/posts/scaling-data-collection-for-training-swe-agents)
222 | 


--------------------------------------------------------------------------------
/swedev/utils/extract_signs.py:
--------------------------------------------------------------------------------
  1 | import ast
  2 | import difflib
  3 | import os
  4 | import re
  5 | 
  6 | 
  7 | def api_formatter(apis):
  8 |     """
  9 |     Format the API signature to a more readable format.
 10 |     """
 11 |     ret = ''
 12 |     for api in apis:
 13 |         ret += f"{api['file']}: {api['signature']}\n"
 14 |     return ret
 15 | 
 16 | def calculate_similarity(func_signature_1, func_signature_2, type=None):
 17 |     """
 18 |     Calculate the similarity between two function signatures.
 19 |     The similarity is based on both the function name and the parameter list.
 20 |     """
 21 |     try:
 22 |         def parse_signature(signature):
 23 |                 if type == "function":
 24 |                     name, args = signature.split("(", 1)
 25 |                     args = '(' + args
 26 |                     args = args.split(",") if args.strip() else []
 27 |                     args = [arg.strip() for arg in args]
 28 |                     return name.strip(), args
 29 |                 elif type == "class":
 30 |                     return signature, []
 31 |                 else:
 32 |                     raise NotImplementedError
 33 | 
 34 |         name1, args1 = parse_signature(func_signature_1)
 35 |         name2, args2 = parse_signature(func_signature_2)
 36 | 
 37 |         name_similarity = difflib.SequenceMatcher(None, name1, name2).ratio()
 38 |         arg_count_similarity = 1 - abs(len(args1) - len(args2)) / max(len(args1), len(args2), 1)
 39 |         common_args = len(set(args1) & set(args2))
 40 |         total_args = len(set(args1) | set(args2))
 41 |         arg_name_similarity = common_args / total_args if total_args > 0 else 0
 42 | 
 43 |         total_similarity = 0.6 * name_similarity + 0.2 * arg_count_similarity + 0.2 * arg_name_similarity
 44 |         return total_similarity
 45 |     except:
 46 |         return 0
 47 | 
 48 | def find_top_similar_apis(target_api, api_list, type=None, top_n=20):
 49 |     """
 50 |     Find the top N most similar APIs to the target API.
 51 |     
 52 |     :param target_api: The target API signature, e.g., "get_random_color():"
 53 |     :param api_list: A list of dictionaries, each containing 'file' and 'signature'
 54 |     :param top_n: The number of most similar APIs to return (default: 20)
 55 |     :return: A list of dictionaries with 'file', 'signature', and 'similarity'
 56 |     """
 57 |     similarities = []
 58 | 
 59 |     for api in api_list:
 60 |         file = api['file']
 61 |         signature = api['signature']
 62 |         similarity = calculate_similarity(target_api, signature, type)
 63 |         similarities.append({'file': file, 'signature': signature, 'similarity': similarity})
 64 | 
 65 |     similarities.sort(key=lambda x: x['similarity'], reverse=True)
 66 |     return similarities[:top_n]
 67 | 
 68 | def extract_classes_and_functions(file_path, root_path=None):
 69 |     """
 70 |     Extract classes and their methods, as well as standalone functions, with full paths, from a Python file.
 71 | 
 72 |     :param file_path: Path to the Python file to parse.
 73 |     :param root_path: Optional root directory to calculate the module path (for packages).
 74 |     :return: A list of dictionaries with 'type', 'name', and 'args' for classes and functions.
 75 |     """
 76 |     with open(file_path, 'r', encoding='latin-1') as file:
 77 |         content = file.read()
 78 | 
 79 |     try:
 80 |         tree = ast.parse(content)
 81 |     except SyntaxError as e:
 82 |         print(f"SyntaxError in file {file_path}: {e}")
 83 |         return []  
 84 |     except Exception as e:
 85 |         print(f"Error in file {file_path}: {e}")
 86 |         return []  
 87 | 
 88 |     if root_path:
 89 |         relative_path = os.path.relpath(file_path, root_path)
 90 |         module_name = os.path.splitext(relative_path.replace(os.sep, '.'))[0]
 91 |     else:
 92 |         module_name = os.path.splitext(os.path.basename(file_path))[0]
 93 |     signs = []
 94 |     for node in ast.iter_child_nodes(tree):
 95 |         if isinstance(node, ast.ClassDef): 
 96 |             class_name = f"{module_name}.{node.name}"
 97 |             # class_name = f"{node.name}"
 98 |             for func in node.body:
 99 |                 if isinstance(func, ast.FunctionDef):
100 |                     args = [arg.arg for arg in func.args.args]
101 |                     signs.append({
102 |                         "type": "method",
103 |                         "name": f"{func.name}",
104 |                         "name": f"{class_name}.{func.name}",
105 |                         "args": args,
106 |                     })
107 |                     
108 |         elif isinstance(node, ast.FunctionDef): 
109 |             args = [arg.arg for arg in node.args.args]
110 |             signs.append({
111 |                 "type": "function",
112 |                 "name": f"{module_name}.{node.name}",
113 |                 "args": args,
114 |             })
115 |     return signs
116 | 
117 | def save_classes_and_functions_to_file(output_file, classes_and_functions_by_file):
118 |     """Save extracted classes and functions to a file."""
119 |     with open(output_file, 'w', encoding='utf-8') as file:
120 |         for filename, data in classes_and_functions_by_file.items():
121 |             file.write(f"<{filename}>\n")
122 |             if data['standalone_functions']:
123 |                 file.write("Standalone Functions:\n")
124 |                 file.write("\n".join(data['standalone_functions']) + "\n\n")
125 | 
126 |             for class_name, methods in data['classes'].items():
127 |                 file.write(f"Class {class_name}:\n")
128 |                 file.write("\n".join(f"  {method}" for method in methods) + "\n\n")
129 | 
130 | def extract_classes_and_functions_from_directory(root_dir):
131 |     """Extract classes and functions from all Python files in a given directory."""
132 |     all_results = []
133 | 
134 |     for dirpath, _, filenames in os.walk(root_dir):
135 |         for filename in filenames:
136 |             if filename.endswith(".py"):
137 |                 file_path = os.path.join(dirpath, filename)
138 |                 relative_path = os.path.relpath(file_path, root_dir)
139 |                 results = extract_classes_and_functions(file_path)
140 |                 if results:
141 |                     all_results.append({
142 |                         "file": relative_path,
143 |                         "content": results
144 |                     })
145 | 
146 |     return all_results
147 | 
148 | def generate_signatures(extracted_data, type=None):
149 |     """
150 |     Generate complete function/method signatures from the extracted data,
151 |     filtered by the specified type.
152 |     
153 |     :param extracted_data: A list of dictionaries containing file data, content, and extracted items.
154 |     :param type: The type of signature to generate ("function" or "class").
155 |     :return: A list of dictionaries with file name, type, and signature.
156 |     """
157 |     used, signatures = [], []
158 | 
159 |     for file_data in extracted_data:
160 |         file_name = file_data["file"]
161 |         content = file_data["content"]
162 | 
163 |         for item in content:
164 |             if type == "function" and (item["type"] == "function" or item["type"] == "method"):
165 |                 func_name = item["name"]
166 |                 args = ", ".join(item["args"])
167 |                 signature = f"{func_name}({args})"
168 |                 signatures.append({
169 |                     "file": file_name,
170 |                     "type": "function",
171 |                     "signature": signature
172 |                 })
173 |             elif type == "class" and item["type"] == "method":                
174 |                 class_name = ".".join(item["name"].split(".")[:-1])
175 |                 signature = f"{class_name}"
176 |                 if signature in used:
177 |                     continue
178 |                 used.append(signature)                
179 |                 signatures.append({
180 |                     "file": file_name,
181 |                     "type": "class",
182 |                     "signature": signature
183 |                 })
184 |     
185 |     return signatures
186 | 
187 | def parse_api(response):
188 |     """
189 |     Parse the API response and extract the classes and functions.
190 |     The response is wrapped by <function></function>, <class></class> and <empty></empty> for functions, classes and empty lines respectively.
191 |     :param response: The API response from the server
192 |     :return: api like  "get_random_color()"
193 |     """ 
194 |     function_pattern = r"<function>(.*?)</function>"
195 |     class_pattern = r"<class>(.*?)</class>"
196 |     function_match = re.search(function_pattern, response)
197 |     class_match = re.search(class_pattern, response)
198 |     if function_match:
199 |         return "function", function_match.group(1)
200 |     elif class_match:
201 |         return "class", class_match.group(1)
202 |     return "empty", None
203 |     
204 |     
205 | if __name__ == "__main__":
206 |     root_directory = "."
207 |     output_file = "classes_and_functions.txt"
208 |     api_dict = generate_signatures(extract_classes_and_functions_from_directory(root_directory), type="function")
209 |     print(api_dict[:10])
210 |     print(f"Saved classes and functions to {output_file}")


--------------------------------------------------------------------------------
/swedev/issues/get_tasks_pipeline.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os
  3 | import subprocess
  4 | import time
  5 | import traceback
  6 | from concurrent.futures import ThreadPoolExecutor, as_completed
  7 | from itertools import cycle
  8 | from threading import Lock
  9 | 
 10 | import jsonlines
 11 | from dotenv import load_dotenv
 12 | from swebench.collect.build_dataset import main as build_dataset
 13 | from swebench.collect.print_pulls import main as print_pulls
 14 | from tqdm import tqdm
 15 | from swedev.config import Config
 16 | 
 17 | load_dotenv()
 18 | 
 19 | downloaded_repos = set()
 20 | downloaded_repos_lock = Lock()
 21 | 
 22 | def split_instances(input_list: list, n: int) -> list:
 23 |     avg_length = len(input_list) // n
 24 |     remainder = len(input_list) % n
 25 |     result, start = [], 0
 26 | 
 27 |     for i in range(n):
 28 |         length = avg_length + 1 if i < remainder else avg_length
 29 |         sublist = input_list[start: start + length]
 30 |         result.append(sublist)
 31 |         start += length
 32 | 
 33 |     return result
 34 | 
 35 | def clone_repo(repo_name):
 36 |     global downloaded_repos
 37 |     base_dir = Config.local_repo_dir
 38 |     assert base_dir, "local_repo_dir not configured"
 39 |     
 40 |     repo = repo_name.split("/")[-1]
 41 |     
 42 |     with downloaded_repos_lock:
 43 |         if repo in downloaded_repos:
 44 |             print(f"Repository {repo} has already been cloned, skipping...")
 45 |             return
 46 | 
 47 |     repo_path = os.path.join(base_dir, repo)
 48 |     os.makedirs(repo_path, exist_ok=True)
 49 |     if os.path.exists(repo_path) and len(os.listdir(repo_path)):
 50 |         print(f"Repository {repo} already exists locally, skipping clone...")
 51 |         with downloaded_repos_lock:
 52 |             downloaded_repos.add(repo)
 53 |         return
 54 | 
 55 |     for retry in range(5):
 56 |         try:
 57 |             subprocess.run(
 58 |                 [
 59 |                     "git",
 60 |                     "clone",
 61 |                     f"https://github.com/{repo_name}.git",
 62 |                     repo_path
 63 |                 ], 
 64 |                 check=True,
 65 |                 cwd=repo_path,
 66 |                 shell=True
 67 |             )
 68 |             print(f"Successfully cloned {repo_name}")
 69 |             with downloaded_repos_lock:
 70 |                 downloaded_repos.add(repo)
 71 |             break
 72 |         except Exception as e:
 73 |             print(f"Failed to clone {repo_name} (attempt {retry + 1}/5): {e}")
 74 |             time.sleep(2 ** retry)
 75 |     else:
 76 |         print(f"Failed to clone {repo_name} after 5 attempts.")
 77 | 
 78 | def process_repo(repo, output_folder, max_pulls, cutoff_date, token_iterator, do_clone=True):
 79 |     repo = repo.strip(",").strip()
 80 |     repo_name = repo.split("/")[1]
 81 | 
 82 |     token = next(token_iterator)
 83 |     try:
 84 |         path_prs = os.path.join(output_folder, "prs")
 85 |         path_tasks = os.path.join(output_folder, "tasks")
 86 |         os.makedirs(output_folder, exist_ok=True)
 87 |         os.makedirs(path_prs, exist_ok=True)
 88 |         os.makedirs(path_tasks, exist_ok=True)
 89 | 
 90 |         path_pr = os.path.join(path_prs, f"{repo_name}-prs.jsonl")
 91 |         if cutoff_date:
 92 |             path_pr = path_pr.replace(".jsonl", f"-{cutoff_date}.jsonl")
 93 |         if not os.path.exists(path_pr):
 94 |             print(f"Pull request data for {repo} not found, creating...")
 95 |             print_pulls(
 96 |                 repo,
 97 |                 path_pr,
 98 |                 token,
 99 |                 max_pulls=max_pulls,
100 |                 cutoff_date=cutoff_date
101 |             )
102 |             print(f"✅ Successfully saved PR data for {repo} to {path_pr}")
103 |         else:
104 |             print(f"📁 Pull request data for {repo} already exists at {path_pr}, skipping...")
105 | 
106 |         path_task = os.path.join(path_tasks, f"{repo_name}-task-instances.jsonl")
107 |         if not os.path.exists(path_task):
108 |             print(f"Task instance data for {repo} not found, creating...")
109 |             build_dataset(path_pr, path_task, token)
110 |             print(f"✅ Successfully saved task instance data for {repo} to {path_task}")
111 |         else:
112 |             print(f"📁 Task instance data for {repo} already exists at {path_task}, skipping...")
113 |     except Exception as e:
114 |         print("-" * 80)
115 |         print(f"Something went wrong for {repo}, skipping: {e}")
116 |         print("Here is the full traceback:")
117 |         traceback.print_exc()
118 |         print("-" * 80)
119 | 
120 |     if do_clone:
121 |         clone_repo(repo)
122 | 
123 | def combine_results(output_folder: str,):
124 |     print("Start combining results...")
125 |     path_tasks = os.path.join(output_folder, "tasks")
126 |     all_tasks = []
127 |     files = os.listdir(path_tasks)
128 |     for file in tqdm(files):
129 |         file_path = os.path.join(path_tasks, file)
130 |         if os.path.exists(file_path):
131 |             with jsonlines.open(file_path, "r") as f:
132 |                 all_tasks.extend([d for d in f])
133 |         else:
134 |             print(f"Warning: {file_path} does not exist, skipping...")
135 |     print("Writing!")
136 |     with open(os.path.join(output_folder, "all_tasks.jsonl"), "w") as f:
137 |         writer = jsonlines.Writer(f)
138 |         writer.write_all(all_tasks)    
139 |     print("Finished writing results.")
140 | 
141 | def main(
142 |         repo_file: str,
143 |         output_folder: str,
144 |         max_pulls: int = None,
145 |         cutoff_date: str = None,
146 |         num_workers: int = 1,
147 |         start_index: int = None,
148 |         end_index: int = None,
149 |         **kwargs
150 |     ):
151 | 
152 |     with jsonlines.open(repo_file, "r") as f:
153 |         repos = [d for d in f if d["github"]]
154 |         repos = [d["github"]
155 |                     .replace("http://github.com/", "")
156 |                     .replace("https://github.com/", "")
157 |                     .replace("git@github.com:", "")
158 |                     .replace(".git", "")
159 |                     .replace("github.com/", "")
160 |                     for d in repos]
161 |         for i in range(len(repos)):
162 |             if repos[i].endswith("/"):
163 |                 repos[i] = repos[i][:-1]
164 |             repos[i] = '/'.join(repos[i].split('/')[:2])
165 |     
166 |     print(f"Total repos: {len(repos)}")
167 |     used = []
168 |     used_path = f'{output_folder}/tasks'
169 |     if os.path.exists(used_path):
170 |          for file in os.listdir(used_path):
171 |             if file.endswith("-instances.jsonl"):
172 |                 used.extend([file.replace("-task-instances.jsonl", "")])
173 |     repos = [r for r in repos if not r.split("/")[-1] in used]
174 |     print(f"Remaining repos: {len(repos)}")
175 |     if start_index is not None or end_index is not None:
176 |         repos = repos[start_index:end_index]
177 |     repos = reversed(repos)
178 |     tokens = Config.github_tokens
179 |     if not tokens: 
180 |         raise Exception("Missing github_tokens in configuration, add to config file or set GITHUB_TOKENS environment variable")
181 |     tokens = [t.strip() for t in tokens.split(",")]
182 |     token_iterator = cycle(tokens)
183 | 
184 |     with ThreadPoolExecutor(max_workers=num_workers) as executor:
185 |         futures = [
186 |             executor.submit(process_repo, repo, output_folder, max_pulls, cutoff_date, token_iterator)
187 |             for repo in repos
188 |         ]
189 |         for future in tqdm(as_completed(futures), total=len(futures)):
190 |             try:
191 |                 future.result()
192 |             except Exception as e:
193 |                 print(f"Error processing repository: {str(e)}")
194 | 
195 |     path_tasks = os.path.join(output_folder, "tasks")
196 |     all_tasks = []
197 |     files = os.listdir(path_tasks)
198 |     for file in files:
199 |         file_path = os.path.join(path_tasks, file)
200 |         if os.path.exists(file_path):
201 |             with jsonlines.open(file_path, "r") as f:
202 |                 all_tasks.extend([d for d in f])
203 |         else:
204 |             print(f"Warning: {file_path} does not exist, skipping...")
205 | 
206 |     with open(os.path.join(output_folder, "all_tasks.jsonl"), "w") as f:
207 |         writer = jsonlines.Writer(f)
208 |         writer.write_all(all_tasks)
209 | 
210 | if __name__ == "__main__":
211 |     parser = argparse.ArgumentParser(description="GitHub Repo Data Collection")
212 |     parser.add_argument("--repo_file", type=str)
213 |     parser.add_argument("--output_folder", type=str)
214 |     parser.add_argument("--max_pulls", type=int, help="Maximum number of pulls to log", default=None)
215 |     parser.add_argument("--cutoff_date", type=str, help="Cutoff date for PRs to consider in format YYYYMMDD", default=None)
216 |     parser.add_argument("--num_workers", type=int, help="Parallel worker count.")
217 |     parser.add_argument("--start_index", type=int, help="Start index of the repository list", default=None)
218 |     parser.add_argument("--end_index", type=int, help="End index of the repository list", default=None)
219 |     parser.add_argument("--combine_results", action="store_true")
220 |     parser.add_argument("--do_clone", action="store_true")
221 |     args = parser.parse_args()
222 |     if args.combine_results:
223 |         combine_results(args.output_folder)
224 |         exit()
225 |     main(**vars(args))


--------------------------------------------------------------------------------
/swedev/utils/localize.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import ast
  3 | import logging
  4 | import os
  5 | import re
  6 | import subprocess
  7 | from pathlib import Path
  8 | from typing import List, Tuple
  9 | 
 10 | from swedev.utils.utils import clone_repo
 11 | from swedev.utils.preprocess import parse_python_file
 12 | from swedev.config import Config
 13 | from swedev.utils.preprocess import filter_none_python, filter_out_test_files
 14 | 
 15 | def has_python_files(path, max_depth=3, current_depth=0):
 16 |     if current_depth >= max_depth:
 17 |         return False
 18 |     try:
 19 |         for entry in path.iterdir():
 20 |             if entry.is_file() and entry.suffix == '.py':
 21 |                 return True
 22 |             if entry.is_dir():
 23 |                 if has_python_files(entry, max_depth, current_depth + 1):
 24 |                     return True
 25 |     except Exception as e:
 26 |         return False
 27 |     
 28 |     return False
 29 | 
 30 | def get_tree_string(directory, max_depth=3):
 31 |     result = []
 32 |     counts = {'dirs': 0, 'files': 0}
 33 |     def inner_tree(path, prefix="", depth=0):
 34 |         if depth >= max_depth:
 35 |             return
 36 |         valid_entries = []
 37 |         for entry in path.iterdir():
 38 |             if entry.is_file() and entry.suffix == '.py' and not "test" in entry.name:
 39 |                 valid_entries.append(entry)
 40 |             elif entry.is_dir() and has_python_files(entry, max_depth, depth + 1):
 41 |                 valid_entries.append(entry)
 42 |         
 43 |         valid_entries.sort(key=lambda x: (not x.is_dir(), x.name.lower()))
 44 |         for i, entry in enumerate(valid_entries):
 45 |             is_last = i == len(valid_entries) - 1
 46 |             symbol = "└── " if is_last else "├── "
 47 |             next_prefix = prefix + ("    " if is_last else "│   ")
 48 |             
 49 |             result.append(f"{prefix}{symbol}{entry.name}")
 50 |             
 51 |             if entry.is_dir():
 52 |                 counts['dirs'] += 1
 53 |                 inner_tree(entry, next_prefix, depth + 1)
 54 |             else:
 55 |                 counts['files'] += 1
 56 |     
 57 |     root = Path(directory)
 58 |     if has_python_files(root, max_depth):
 59 |         result.append(root.name)
 60 |         inner_tree(root)
 61 |         result.append(f"\n{counts['dirs']} directories, {counts['files']} Python files")
 62 |     else:
 63 |         result.append(f"{root.name} (no Python files)")    
 64 |     return '\n'.join(result)
 65 | 
 66 | def parse_patch(patch_content: str) -> List[Tuple[str, int, int]]:
 67 |     file_ranges = []
 68 |     file_path = None
 69 |     for line in patch_content.splitlines():
 70 |         if line.startswith("diff --git"):
 71 |             match = re.search(r"diff --git a/(\S+) b/\1", line)
 72 |             if match:
 73 |                 file_path = match.group(1)
 74 |         elif line.startswith("@@") and file_path:
 75 |             match = re.search(r"@@ -\d+,\d+ \+(\d+),(\d+) @@", line)
 76 |             if match:
 77 |                 start_line = int(match.group(1))
 78 |                 length = int(match.group(2))
 79 |                 file_ranges.append((file_path, start_line, start_line + length - 1))
 80 |     return file_ranges
 81 | 
 82 | def get_code_block(node: ast.AST, lines: List[str]) -> str:
 83 |     if hasattr(node, 'lineno') and hasattr(node, 'end_lineno'):
 84 |         start = node.lineno - 1
 85 |         end = node.end_lineno
 86 |         return "".join(lines[start:end])
 87 |     return ""
 88 | 
 89 | def find_containing_blocks(file_path: str, start_line: int, end_line: int) -> str:
 90 |     with open(file_path, 'r') as f:
 91 |         source = f.read()
 92 |     lines = source.splitlines(keepends=True)
 93 |     blocks = []
 94 |     
 95 |     try:
 96 |         def safe_parse(source):
 97 |             clean_source = re.sub(r'[^\x00-\x7F]+', '', source)
 98 |             return ast.parse(clean_source)
 99 |         tree = safe_parse(source)
100 |         
101 |         def is_line_in_node(node, start, end):
102 |             return (node.lineno <= start <= node.end_lineno or 
103 |                     node.lineno <= end <= node.end_lineno or
104 |                     (start <= node.lineno and end >= node.end_lineno))
105 |         
106 |         def find_blocks_in_node(node, lines):
107 |             if isinstance(node, (ast.FunctionDef, ast.ClassDef, ast.AsyncFunctionDef)):
108 |                 if is_line_in_node(node, start_line, end_line):
109 |                     blocks.append((node.lineno, get_code_block(node, lines)))
110 |             
111 |             for child in ast.iter_child_nodes(node):
112 |                 try:
113 |                     find_blocks_in_node(child, lines)
114 |                 except:
115 |                     continue
116 |         
117 |         find_blocks_in_node(tree, lines)
118 |         sorted_blocks = [block for _, block in sorted(blocks, key=lambda x: x[0])]
119 |         return "\n".join(sorted_blocks)
120 |     
121 |     except Exception as e:
122 |         return None
123 | 
124 | def get_location(data):
125 |     """Process single instance."""
126 |     structure = get_project_structure_from_scratch(
127 |         data["repo"], data["base_commit"], data["instance_id"], Config.playground_path
128 |     )
129 |     if not structure:
130 |         print('[No structure found]')
131 |         return None
132 |     instance_id = structure["instance_id"]
133 | 
134 |     structure = structure["structure"]
135 |     filter_none_python(structure)
136 |     filter_out_test_files(structure)
137 | 
138 |     # localize in file patches
139 |     patch = data["patch"]
140 |     file_ranges = parse_patch(patch)
141 |     repo_name = data["repo"]
142 |     commit_id = data["base_commit"]
143 |     repo_id = f'{instance_id}_{repo_name.replace("/", "_")}_{commit_id}'
144 |     repo_playground = os.path.join(Config.playground_path, repo_id, repo_name.split("/")[-1])
145 | 
146 |     try:
147 |         subprocess.run(['git', 'add', '.'], cwd=repo_playground, capture_output=True, text=True)
148 |         subprocess.run(['git', 'stash'], cwd=repo_playground, capture_output=True, text=True)
149 |         subprocess.run(['git', 'stash', 'clear'], cwd=repo_playground, capture_output=True, text=True)
150 |         subprocess.run(['git', 'checkout', commit_id], cwd=repo_playground, capture_output=True, text=True)
151 |     except Exception as e:
152 |         pass
153 | 
154 |     patch_blocks = []
155 |     for file_path, start_line, end_line in file_ranges:
156 |         if not file_path.endswith(".py"):
157 |             continue
158 |         try:
159 |             code_block = find_containing_blocks(os.path.join(repo_playground, file_path), start_line, end_line)
160 |             if code_block:
161 |                 patch_blocks.append({
162 |                     "file": file_path,
163 |                     "code": code_block
164 |                 })
165 |         except Exception as e:
166 |             pass
167 |     project_tree = get_tree_string(repo_playground).strip()
168 |     return {
169 |         "patch_blocks": patch_blocks,
170 |         "project_tree": project_tree
171 |     }
172 | 
173 | def create_structure(directory_path):
174 |     """Create the structure of the repository directory by parsing Python files.
175 |     :param directory_path: Path to the repository directory.
176 |     :return: A dictionary representing the structure.
177 |     """
178 |     structure = {}
179 | 
180 |     for root, _, files in os.walk(directory_path):
181 |         repo_name = os.path.basename(directory_path)
182 |         relative_root = os.path.relpath(root, directory_path)
183 |         if relative_root == ".":
184 |             relative_root = repo_name
185 |         curr_struct = structure
186 |         for part in relative_root.split(os.sep):
187 |             if part not in curr_struct:
188 |                 curr_struct[part] = {}
189 |             curr_struct = curr_struct[part]
190 |         for file_name in files:
191 |             if file_name.endswith(".py"):
192 |                 file_path = os.path.join(root, file_name)
193 |                 class_info, function_names, file_lines = parse_python_file(file_path)
194 |                 curr_struct[file_name] = {
195 |                     "classes": class_info,
196 |                     "functions": function_names,
197 |                     "text": file_lines,
198 |                 }
199 |             else:
200 |                 curr_struct[file_name] = {}
201 | 
202 |     return structure
203 | 
204 | def get_project_structure_from_scratch(repo, commit_id, instance_id, repo_playground):
205 |     """Get the project structure from scratch
206 |     :param repo: Repository name
207 |     :param commit_id: Commit ID
208 |     :param instance_id: Instance ID
209 |     :param repo_playground: Repository playground
210 |     :return: Project structure
211 |     """
212 |     repo_id = f'{instance_id}_{repo.replace("/", "_")}_{commit_id}'
213 |     repo_path = f"{repo_playground}/{repo_id}/{repo.split('/')[-1]}"
214 |     if not os.path.exists(repo_path) or not os.path.exists(os.path.join(repo_path, "setup.py")) \
215 |             and not os.path.exists(os.path.join(repo_path, "pyproject.toml")):
216 |         os.makedirs(f"{repo_playground}/{repo_id}", exist_ok=True)
217 |         clone_repo(repo, f"{repo_playground}/{repo_id}")
218 |     subprocess.run(['git', 'checkout', commit_id], cwd=repo_path, capture_output=True, text=True)
219 |     structure = create_structure(repo_path)
220 |     repo_info = {
221 |         "repo": repo,
222 |         "base_commit": commit_id,
223 |         "structure": structure,
224 |         "instance_id": instance_id,
225 |     }
226 |     return repo_info


--------------------------------------------------------------------------------
/swedev/utils/preprocess.py:
--------------------------------------------------------------------------------
  1 | import ast
  2 | 
  3 | def parse_python_file(file_path, file_content=None):
  4 |     """Parse a Python file to extract class and function definitions with their line numbers.
  5 |     :param file_path: Path to the Python file.
  6 |     :return: Class names, function names, and file contents
  7 |     """
  8 |     if file_content is None:
  9 |         try:
 10 |             with open(file_path, "r") as file:
 11 |                 file_content = file.read()
 12 |                 parsed_data = ast.parse(file_content)
 13 |         except Exception as e:  # Catch all types of exceptions
 14 |             # print(f"Error in file {file_path}: {e}")
 15 |             return [], [], ""
 16 |     else:
 17 |         try:
 18 |             parsed_data = ast.parse(file_content)
 19 |         except Exception as e:  # Catch all types of exceptions
 20 |             # print(f"Error in file {file_path}: {e}")
 21 |             return [], [], ""
 22 | 
 23 |     class_info = []
 24 |     function_names = []
 25 |     class_methods = set()
 26 | 
 27 |     for node in ast.walk(parsed_data):
 28 |         if isinstance(node, ast.ClassDef):
 29 |             methods = []
 30 |             for n in node.body:
 31 |                 if isinstance(n, ast.FunctionDef):
 32 |                     methods.append(
 33 |                         {
 34 |                             "name": n.name,
 35 |                             "start_line": n.lineno,
 36 |                             "end_line": n.end_lineno,
 37 |                             "text": file_content.splitlines()[
 38 |                                 n.lineno - 1 : n.end_lineno
 39 |                             ],
 40 |                         }
 41 |                     )
 42 |                     class_methods.add(n.name)
 43 |             class_info.append(
 44 |                 {
 45 |                     "name": node.name,
 46 |                     "start_line": node.lineno,
 47 |                     "end_line": node.end_lineno,
 48 |                     "text": file_content.splitlines()[
 49 |                         node.lineno - 1 : node.end_lineno
 50 |                     ],
 51 |                     "methods": methods,
 52 |                 }
 53 |             )
 54 |         elif isinstance(node, ast.FunctionDef) and not isinstance(node, ast.AsyncFunctionDef):
 55 |             if node.name not in class_methods:
 56 |                 function_names.append(
 57 |                     {
 58 |                         "name": node.name,
 59 |                         "start_line": node.lineno,
 60 |                         "end_line": node.end_lineno,
 61 |                         "text": file_content.splitlines()[
 62 |                             node.lineno - 1 : node.end_lineno
 63 |                         ],
 64 |                     }
 65 |                 )
 66 |     return class_info, function_names, file_content.splitlines()
 67 | 
 68 | def line_wrap_content(
 69 |     content: str,
 70 |     context_intervals=None,
 71 |     add_space=False,
 72 |     no_line_number=False,
 73 |     sticky_scroll=False,
 74 | ):
 75 |     """add n| to each line, where n increases"""
 76 | 
 77 |     def is_scope(line):
 78 |         return line.startswith("class ") or line.strip().startswith("def ")
 79 | 
 80 |     lines = content.split("\n")
 81 |     new_lines = []
 82 |     if context_intervals is None or context_intervals == []:
 83 |         context_intervals = [(0, len(lines))]
 84 | 
 85 |     prev_scopes = []
 86 |     line_format = "{line}"
 87 |     if not no_line_number:
 88 |         line_format = (
 89 |             "{line_number}|{line}" if not add_space else "{line_number}| {line} "
 90 |         )
 91 |     for interval in context_intervals:
 92 |         min_line, max_line = interval
 93 | 
 94 |         if min_line != 0:
 95 |             new_lines.append("...")
 96 | 
 97 |         scopes = []
 98 |         for i, line in enumerate(lines):
 99 |             if sticky_scroll:
100 |                 # add current line to scope if necessary
101 |                 if is_scope(line):
102 |                     indent_level = len(line) - len(line.lstrip())
103 |                     while scopes and scopes[-1]["indent_level"] >= indent_level:
104 |                         scopes.pop()
105 |                     scopes.append(
106 |                         {"line": line, "line_number": i, "indent_level": indent_level}
107 |                     )
108 | 
109 |             if min_line != -1 and i < min_line - 1:
110 |                 continue
111 |             if sticky_scroll and i == min_line - 1:
112 |                 # add scope lines
113 |                 last_scope_line = None
114 |                 for j, scope_line in enumerate(scopes):
115 |                     # don't repeat previous scopes
116 |                     if (
117 |                         len(prev_scopes) > j
118 |                         and prev_scopes[j]["line_number"] == scope_line["line_number"]
119 |                     ):
120 |                         continue
121 |                     # don't repeat current line
122 |                     if i == scope_line["line_number"]:
123 |                         continue
124 |                     new_lines.append(
125 |                         line_format.format(
126 |                             line_number=scope_line["line_number"] + 1,
127 |                             line=scope_line["line"],
128 |                         )
129 |                     )
130 |                     last_scope_line = scope_line["line_number"]
131 |                 if last_scope_line is not None and last_scope_line < i - 1:
132 |                     new_lines.append("...")
133 | 
134 |             new_lines.append(line_format.format(line_number=i + 1, line=line))
135 |             if max_line != -1 and i >= max_line - 1:
136 |                 break
137 |         prev_scopes = scopes
138 | 
139 |     if max_line != len(lines):
140 |         new_lines.append("...")
141 | 
142 |     return "\n".join(new_lines)
143 | 
144 | def show_project_structure(structure, spacing=0) -> str:
145 |     """pprint the project structure"""
146 | 
147 |     pp_string = ""
148 | 
149 |     for key, value in structure.items():
150 |         if "." in key and ".py" not in key:
151 |             continue  # skip none python files
152 |         if "." in key:
153 |             pp_string += " " * spacing + str(key) + "\n"
154 |         else:
155 |             pp_string += " " * spacing + str(key) + "/" + "\n"
156 |         if "classes" not in value:
157 |             pp_string += show_project_structure(value, spacing + 4)
158 | 
159 |     return pp_string
160 | 
161 | 
162 | def filter_out_test_files(structure):
163 |     """filter out test files from the project structure"""
164 |     for key, value in list(structure.items()):
165 |         if key.startswith("test"):
166 |             del structure[key]
167 |         elif isinstance(value, dict):
168 |             filter_out_test_files(value)
169 | 
170 | 
171 | def filter_none_python(structure):
172 |     for key, value in list(structure.items()):
173 |         if (
174 |             not "functions" in value.keys()
175 |             and not "classes" in value.keys()
176 |             and not "text" in value.keys()
177 |         ) or not len(value.keys()) == 3:
178 |             filter_none_python(value)
179 | 
180 |             if structure[key] == {}:
181 |                 del structure[key]
182 |         else:
183 |             if not key.endswith(".py"):
184 |                 del structure[key]
185 | 
186 | 
187 | def get_full_file_paths_and_classes_and_functions(structure, current_path=""):
188 |     """
189 |     Recursively retrieve all file paths, classes, and functions within a directory structure.
190 | 
191 |     Arguments:
192 |     structure -- a dictionary representing the directory structure
193 |     current_path -- the path accumulated so far, used during recursion (default="")
194 | 
195 |     Returns:
196 |     A tuple containing:
197 |     - files: list of full file paths
198 |     - classes: list of class details with file paths
199 |     - functions: list of function details with file paths
200 |     """
201 |     files = []
202 |     classes = []
203 |     functions = []
204 |     for name, content in structure.items():
205 |         if isinstance(content, dict):
206 |             if (
207 |                 not "functions" in content.keys()
208 |                 and not "classes" in content.keys()
209 |                 and not "text" in content.keys()
210 |             ) or not len(content.keys()) == 3:
211 |                 # or guards against case where functions and classes are somehow part of the structure.
212 |                 next_path = f"{current_path}/{name}" if current_path else name
213 |                 (
214 |                     sub_files,
215 |                     sub_classes,
216 |                     sub_functions,
217 |                 ) = get_full_file_paths_and_classes_and_functions(content, next_path)
218 |                 files.extend(sub_files)
219 |                 classes.extend(sub_classes)
220 |                 functions.extend(sub_functions)
221 |             else:
222 |                 next_path = f"{current_path}/{name}" if current_path else name
223 |                 files.append((next_path, content["text"]))
224 |                 if "classes" in content:
225 |                     for clazz in content["classes"]:
226 |                         classes.append(
227 |                             {
228 |                                 "file": next_path,
229 |                                 "name": clazz["name"],
230 |                                 "start_line": clazz["start_line"],
231 |                                 "end_line": clazz["end_line"],
232 |                                 "methods": [
233 |                                     {
234 |                                         "name": method["name"],
235 |                                         "start_line": method["start_line"],
236 |                                         "end_line": method["end_line"],
237 |                                     }
238 |                                     for method in clazz.get("methods", [])
239 |                                 ],
240 |                             }
241 |                         )
242 |                 if "functions" in content:
243 |                     for function in content["functions"]:
244 |                         function["file"] = next_path
245 |                         functions.append(function)
246 |         else:
247 |             next_path = f"{current_path}/{name}" if current_path else name
248 |             files.append(next_path)
249 |     return files, classes, functions


--------------------------------------------------------------------------------
/swedev/crawl/get_top_pypi.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import json
  3 | import os
  4 | import random
  5 | import time
  6 | from multiprocessing import Pool
  7 | 
  8 | from bs4 import BeautifulSoup
  9 | from ghapi.core import GhApi
 10 | from selenium import webdriver
 11 | from selenium.webdriver.chrome.options import Options
 12 | from selenium.webdriver.common.by import By
 13 | from tqdm import tqdm
 14 | from swedev.config import Config
 15 | 
 16 | if not Config.github_tokens:
 17 |     msg = "GitHub tokens not configured. Please configure github_tokens in your config file or set the GITHUB_TOKENS environment variable."
 18 |     raise ValueError(msg)
 19 | apis = [GhApi(token=gh_token) for gh_token in Config.github_tokens]
 20 | print("GitHub tokens:", Config.github_tokens)
 21 | 
 22 | def get_api():
 23 |     return random.choice(apis)
 24 | 
 25 | def setup_driver():
 26 |     """Setup and return a Chrome webdriver"""
 27 |     options = Options()
 28 |     options.add_argument('--headless')
 29 |     options.add_argument('--disable-gpu')
 30 |     options.add_argument('--disable-dev-shm-usage')
 31 |     options.add_argument('--no-sandbox')
 32 |     options.add_argument('--enable-javascript')
 33 |     options.add_argument('--disable-blink-features=AutomationControlled')
 34 |     options.add_argument('--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36')
 35 |     return webdriver.Chrome(options=options)
 36 | 
 37 | def process_package(args):
 38 |     """Process a single package"""
 39 |     idx, title, href = args
 40 |     driver = setup_driver()
 41 |     
 42 |     try:
 43 |         package_name = title
 44 |         package_url = href
 45 | 
 46 |         package_github = None
 47 |         driver.get(package_url)
 48 |         time.sleep(2)  # Wait for the page to load
 49 |         
 50 |         try:
 51 |             # Try to find GitHub link using JavaScript
 52 |             github_link = driver.execute_script("""
 53 |                 const links = Array.from(document.querySelectorAll('a.vertical-tabs__tab--with-icon'));
 54 |                 for (const link of links) {
 55 |                     const text = link.textContent.toLowerCase();
 56 |                     const href = link.href.toLowerCase();
 57 |                     if ((text.includes('source') || text.includes('code') || text.includes('homepage')) && href.includes('github')) {
 58 |                         return link.href;
 59 |                     }
 60 |                 }
 61 |                 return null;
 62 |             """)
 63 |             
 64 |             if github_link:
 65 |                 package_github = github_link
 66 |         except:
 67 |             # Fallback to BeautifulSoup if JavaScript execution fails
 68 |             soup = BeautifulSoup(driver.page_source, "html.parser")
 69 |             for link in soup.find_all("a", class_="vertical-tabs__tab--with-icon"):
 70 |                 found = False
 71 |                 for x in ["source", "code", "homepage"]:
 72 |                     if (
 73 |                         x in link.get_text().lower()
 74 |                         and "github" in link["href"].lower()
 75 |                     ):
 76 |                         package_github = link["href"]
 77 |                         found = True
 78 |                         break
 79 |                 if found:
 80 |                     break
 81 | 
 82 |         stars_count, pulls_count = None, None
 83 |         if package_github is not None:
 84 |             try:
 85 |                 # Extract owner and repo name
 86 |                 if "github.com" in package_github:
 87 |                     repo_parts = package_github.split("github.com/")[-1].split("/")
 88 |                     if len(repo_parts) >= 2:
 89 |                         owner, name = repo_parts[0], repo_parts[1].split("#")[0].split("?")[0]
 90 |                         
 91 |                         repo = get_api().repos.get(owner, name)
 92 |                         stars_count = int(repo["stargazers_count"])
 93 |                         pulls = get_api().pulls.list(owner, name, state="all", per_page=1)
 94 |                         if pulls:
 95 |                             pulls_count = pulls[0]["number"]
 96 |             except Exception as e:
 97 |                 print(f"Error getting GitHub stats for {package_name}: {str(e)}")
 98 | 
 99 |         result = {
100 |             "rank": idx,
101 |             "name": package_name,
102 |             "url": package_url,
103 |             "github": package_github,
104 |             "stars": stars_count,
105 |             "pulls": pulls_count,
106 |         }
107 |         
108 |         return result
109 | 
110 |     except Exception as e:
111 |         print(f"Error processing package {title}: {str(e)}")
112 |         return None
113 |     
114 |     finally:
115 |         driver.quit()
116 | 
117 | def get_package_stats(data_tasks, output_file, num_workers, start_at=0):
118 |     """
119 |     Get package stats from PyPI page using multiple processes
120 | 
121 |     Args:
122 |         data_tasks (list): List of packages + HTML
123 |         output_file (str): File to write to
124 |         num_workers (int): Number of worker processes
125 |         start_at (int): Index to start processing from
126 |     """
127 |     print(f"Processing {len(data_tasks)} packages")
128 |     
129 |     processed_urls = set()
130 |     if os.path.exists(output_file):
131 |         with open(output_file, "r") as f:
132 |             for line in f:
133 |                 try:
134 |                     data = json.loads(line)
135 |                     processed_urls.add(data["url"])
136 |                 except:
137 |                     continue
138 | 
139 |     tasks = [
140 |         (idx, chunk["title"], chunk["href"]) 
141 |         for idx, chunk in enumerate(data_tasks[start_at:], start_at)
142 |         if chunk["href"] not in processed_urls
143 |     ]
144 | 
145 |     if not tasks:
146 |         print("All packages have been processed already")
147 |         return
148 | 
149 |     with Pool(processes=num_workers) as pool:
150 |         for result in tqdm(
151 |             pool.imap_unordered(process_package, tasks),
152 |             total=len(tasks),
153 |             desc="Processing packages"
154 |         ):
155 |             if result:
156 |                 with open(output_file, "a") as f:
157 |                     print(json.dumps(result), file=f, flush=True)
158 | 
159 | def main():
160 |     parser = argparse.ArgumentParser()
161 |     parser.add_argument("--max_repos", help="Maximum number of repos to get", type=int, default=5000)
162 |     parser.add_argument("--output_folder", type=str, default="results/packages")
163 |     parser.add_argument("--num_workers", type=int, default=4, help="Number of worker processes")
164 |     parser.add_argument("--start_at", type=int, default=0, help="Index to start processing packages from")
165 |     args = parser.parse_args()
166 | 
167 |     url_top_pypi = "https://hugovk.github.io/top-pypi-packages/"
168 |     driver = setup_driver()
169 |     
170 |     try:
171 |         print("Chrome started successfully!")
172 |         driver.get(url_top_pypi)
173 |         
174 |         # Wait for page to fully load
175 |         time.sleep(5)
176 |         
177 |         try:
178 |             # Use JavaScript to click the button
179 |             driver.execute_script("""
180 |                 const buttons = Array.from(document.querySelectorAll('button'));
181 |                 for (const button of buttons) {
182 |                     if (button.textContent.includes('15000')) {
183 |                         button.click();
184 |                         return true;
185 |                     }
186 |                 }
187 |                 return false;
188 |             """)
189 |             print("Clicked button via JavaScript")
190 |         except:
191 |             # Fallback to selenium if JavaScript fails
192 |             try:
193 |                 button = driver.find_element(By.CSS_SELECTOR, 'button[ng-click="show(15000)"]')
194 |                 button.click()
195 |                 print("Clicked button via Selenium")
196 |             except:
197 |                 print("Failed to click button, trying to find other versions")
198 |                 buttons = driver.find_elements(By.TAG_NAME, 'button')
199 |                 for btn in buttons:
200 |                     if "15000" in btn.text:
201 |                         btn.click()
202 |                         print("Found and clicked alternative button")
203 |                         break
204 |         
205 |         # Wait for the content to load (longer wait time)
206 |         time.sleep(10)
207 | 
208 |         print("Getting package stats")
209 |         
210 |         package_data = driver.execute_script("""
211 |             const packages = Array.from(document.querySelectorAll('div.list a.ng-scope'));
212 |             return packages.map(pkg => {
213 |                 const fullText = pkg.textContent.trim();
214 |                 // Extract just the package name, removing rank and download numbers
215 |                 const packageName = fullText.split('\\n')[1].trim();
216 |                 return {
217 |                     title: packageName,
218 |                     href: pkg.href
219 |                 };
220 |             });
221 |         """)
222 |         
223 |         if not package_data:
224 |             print("JavaScript extraction failed, using BeautifulSoup...")
225 |             soup = BeautifulSoup(driver.page_source, "html.parser")
226 |             package_list = soup.find("div", {"class": "list"})
227 |             
228 |             if not package_list:
229 |                 print("BeautifulSoup couldn't find package list, using WebDriver directly...")
230 |                 packages = driver.find_elements(By.CSS_SELECTOR, 'div.list a.ng-scope')
231 |                 package_data = []
232 |                 for pkg in packages:
233 |                     full_text = pkg.text.strip()
234 |                     # Extract just the package name, removing rank and download numbers
235 |                     parts = full_text.split('\n')
236 |                     if len(parts) > 1:
237 |                         package_name = parts[1].strip()
238 |                         package_data.append({"title": package_name, "href": pkg.get_attribute("href")})
239 |             else:
240 |                 packages = package_list.find_all("a", class_="ng-scope")
241 |                 package_data = []
242 |                 for pkg in packages:
243 |                     full_text = pkg.get_text().strip()
244 |                     # Extract just the package name, removing rank and download numbers
245 |                     parts = full_text.split('\n')
246 |                     if len(parts) > 1:
247 |                         package_name = parts[1].strip() 
248 |                         package_data.append({"title": package_name, "href": pkg["href"]})
249 |                         
250 |         print(f"Found {len(package_data)} packages, will use top {args.max_repos} packages!")
251 |         
252 |         package_data = package_data[:args.max_repos]
253 |         
254 |         print(f"Will save to {args.output_folder}")
255 |         if not os.path.exists(args.output_folder):
256 |             os.makedirs(args.output_folder)
257 |             
258 |         output_file = f"{args.output_folder}/pypi_rankings.jsonl"
259 |         get_package_stats(
260 |             package_data, 
261 |             output_file,
262 |             args.num_workers,
263 |             start_at=args.start_at
264 |         )
265 | 
266 |     finally:
267 |         driver.quit()
268 | 
269 | if __name__ == "__main__":
270 |     main()


--------------------------------------------------------------------------------
/swedev/issues/filter.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import jsonlines
  3 | from typing import Dict, Any, List, Optional
  4 | import re
  5 | from openai import OpenAI
  6 | from tqdm import tqdm
  7 | import concurrent.futures
  8 | import os
  9 | import time
 10 | import argparse
 11 | 
 12 | def get_llm(prompt: str, api_key: str, base_url: str) -> Optional[str]:
 13 |     """Call LLM API with basic retry logic"""
 14 |     for attempt in range(3):
 15 |         try:
 16 |             client = OpenAI(api_key=api_key, base_url=base_url)
 17 |             
 18 |             response = client.chat.completions.create(
 19 |                 messages=[{"role": "user", "content": prompt}],
 20 |                 model="glm-4-flash",
 21 |                 temperature=0.7,
 22 |                 top_p=0.8,
 23 |                 stream=False,
 24 |                 max_tokens=1024
 25 |             )
 26 |             
 27 |             return response.choices[0].message.content
 28 |         except Exception as e:
 29 |             print(f"API call attempt {attempt+1}/3 failed: {str(e)}")
 30 |             if attempt < 2:
 31 |                 time.sleep(2)
 32 |     
 33 |     return None
 34 | 
 35 | def create_vague_problem_statement_prompt(instance):
 36 |     prompt = f"""
 37 |     Evaluate if this problem statement is extremely vague based on these criteria:
 38 |     1. Completely missing any specifics about what to implement/fix
 39 |     2. Severely lacking any context or background information
 40 |     3. No clear requirements at all
 41 |     4. Entirely undefined scope/boundaries
 42 |     5. No technical specifications whatsoever
 43 |     6. Provides no actionable information for a developer
 44 |     7. Contains major contradictions or inconsistencies
 45 | 
 46 |     Problem Statement: {instance["problem_statement"]}
 47 | 
 48 |     Return your judgment as: <judgement>True</judgement> ONLY if extremely vague and completely unusable, or <judgement>False</judgement> otherwise.
 49 |     Be very lenient in your evaluation - if the problem statement provides ANY useful information that could help a developer start working, consider it acceptable and return False.
 50 |     Only mark as True if the problem statement is so vague that it would be completely impossible to work with.
 51 |     """
 52 |     return prompt
 53 | 
 54 | def is_vague_problem_statement(instance, api_key, base_url):
 55 |     prompt = create_vague_problem_statement_prompt(instance)
 56 |     response = get_llm(prompt, api_key, base_url)
 57 |     
 58 |     if not response:
 59 |         return False  # Consider not vague if API fails (less strict)
 60 |         
 61 |     try:
 62 |         match = re.search(r'<judgement>(True|False)</judgement>', response, re.IGNORECASE)
 63 |         if match:
 64 |             result = match.group(1).lower()
 65 |             return result == 'true'
 66 | 
 67 |         if 'true' in response.lower() and 'false' not in response.lower():
 68 |             return True
 69 |         elif 'false' in response.lower() and 'true' not in response.lower():
 70 |             return False
 71 | 
 72 |         print(f"Warning: Unclear response from LLM: {response}")
 73 |         return False
 74 |     except Exception as e:
 75 |         print(f"Error parsing LLM response: {e}")
 76 |         return False
 77 | 
 78 | def check_code_quality(instance, min_patch_length=50, max_patch_length=200000):
 79 |     """Check basic code quality based on heuristics, with more lenient thresholds"""
 80 |     patch = instance.get("patch", "")
 81 |     
 82 |     # Check patch length (more lenient)
 83 |     if len(patch) < min_patch_length:
 84 |         return False, "Patch too short"
 85 |     
 86 |     if len(patch) > max_patch_length:
 87 |         return False, "Patch too long"
 88 |     
 89 |     # Check for test files only
 90 |     test_files_only = True
 91 |     file_pattern = re.compile(r"diff --git a/(.*?) b/")
 92 |     file_matches = file_pattern.findall(patch)
 93 |     
 94 |     if not file_matches:
 95 |         return False, "No files found in patch"
 96 |     
 97 |     for file in file_matches:
 98 |         if not (file.endswith("test.py") or file.endswith("tests.py") or 
 99 |                 "test/" in file or "spec/" in file or file.endswith("_test.go")):
100 |             test_files_only = False
101 |             break
102 |     
103 |     if test_files_only and len(file_matches) > 0:
104 |         return False, "Patch contains only test files"
105 |     
106 |     # Check for meaningful changes (not just comments or whitespace) - more lenient
107 |     content_lines = 0
108 |     added_lines = re.findall(r'\n\+[^\+]', patch)
109 |     for line in added_lines:
110 |         stripped = line.replace('\n+', '').strip()
111 |         if stripped and not stripped.startswith('//') and not stripped.startswith('#'):
112 |             content_lines += 1
113 |     
114 |     if content_lines < 3:  # Reduced from 5 to 3
115 |         return False, "Too few meaningful added lines"
116 |     
117 |     return True, "Passed code quality checks"
118 | 
119 | def check_problem_quality(instance, disallowed_phrases):
120 |     """Check problem statement quality based on heuristics"""
121 |     problem = instance.get("problem_statement", "")
122 |     
123 |     # Check length - more lenient
124 |     if len(problem) < 30:  # Reduced from 50 to 30
125 |         return False, "Problem statement too short"
126 |     
127 |     if len(problem) > 3000:  # Increased from 2000 to 3000
128 |         return False, "Problem statement too long"
129 |     
130 |     # Check for disallowed phrases
131 |     lower_problem = problem.lower()
132 |     for phrase in disallowed_phrases:
133 |         if phrase.lower() in lower_problem:
134 |             return False, f"Problem contains banned phrase: {phrase}"
135 |     
136 |     return True, "Passed problem quality checks"
137 | 
138 | def process_instance(instance, api_key, base_url):
139 |     try:
140 |         # Check if required fields exist
141 |         for field in ["problem_statement", "patch"]:
142 |             if field not in instance or not instance[field]:
143 |                 return None
144 |         
145 |         # Check if ci_name_list is empty
146 |         if "ci_name_list" not in instance or not instance["ci_name_list"] or len(instance["ci_name_list"]) <= 3:
147 |             return None
148 |         
149 |         # Step 1: Files that have more than 5 diffs
150 |         count = instance["patch"].count("diff --git a/")
151 |         if count > 5:
152 |             return None
153 |         
154 |         # Step 2: Check basic code quality
155 |         code_passed, code_reason = check_code_quality(instance)
156 |         if not code_passed:
157 |             return None
158 |         
159 |         # Step 3: Check problem quality using custom heuristics
160 |         disallowed_phrases = []  # Add any disallowed phrases here if needed
161 |         problem_passed, problem_reason = check_problem_quality(instance, disallowed_phrases)
162 |         if not problem_passed:
163 |             return None
164 |         
165 |         # Step 4: Check if the problem statement is vague (using LLM)
166 |         if is_vague_problem_statement(instance, api_key, base_url):
167 |             return None
168 |         
169 |         return instance
170 |     except Exception as e:
171 |         print(f"Error processing instance: {str(e)}")
172 |         return None
173 | 
174 | def process_data_parallel(instances, output_file, api_key, base_url, num_workers=50):
175 |     results = []
176 |     processed_count = 0
177 |     rejected_count = 0
178 |     
179 |     rejection_stats = {
180 |         "total_rejected": 0,
181 |         "reasons": {
182 |             "too_many_diffs": 0,
183 |             "poor_problem_quality": 0,
184 |             "poor_code_quality": 0,
185 |             "vague_problem": 0,
186 |             "empty_ci_name_list": 0,
187 |             "missing_required_fields": 0,
188 |             "processing_error": 0
189 |         }
190 |     }
191 | 
192 |     with concurrent.futures.ThreadPoolExecutor(max_workers=num_workers) as executor:
193 |         futures = [
194 |             executor.submit(process_instance, instance, api_key, base_url)
195 |             for instance in instances
196 |         ]
197 | 
198 |         with tqdm(total=len(futures), desc="Processing instances") as progress:
199 |             for future in concurrent.futures.as_completed(futures):
200 |                 try:
201 |                     result = future.result()
202 |                     if result:
203 |                         results.append(result)
204 |                         with open(output_file, "a") as f:
205 |                             f.write(json.dumps(result) + "\n")
206 |                         processed_count += 1
207 |                     else:
208 |                         rejected_count += 1
209 |                 except Exception as e:
210 |                     print(f"Error in processing: {str(e)}")
211 |                     rejected_count += 1
212 |                     rejection_stats["reasons"]["processing_error"] += 1
213 |                 progress.update(1)
214 | 
215 |     total = len(instances)
216 |     acceptance_rate = (processed_count / total) * 100 if total > 0 else 0
217 |     
218 |     print(f"\nFiltering Results:")
219 |     print(f"Processed {processed_count} instances successfully out of {total}")
220 |     print(f"Rejected {rejected_count} instances")
221 |     print(f"Acceptance rate: {acceptance_rate:.2f}%")
222 |     
223 |     return results
224 | 
225 | if __name__ == "__main__":
226 |     parser = argparse.ArgumentParser(description="Filter code problem statements")
227 |     parser.add_argument("--input", type=str, help="Input file path (.json or .jsonl)")
228 |     parser.add_argument("--output", type=str, help="Output file path")
229 |     parser.add_argument("--api-key", type=str, help="API key for LLM service")
230 |     parser.add_argument("--base-url", type=str, help="Base URL for LLM service")
231 |     parser.add_argument("--workers", type=int, default=64, help="Number of worker threads")
232 |     
233 |     args = parser.parse_args()
234 |     
235 |     input_file = args.input
236 |     output_file = args.output
237 |     api_key = args.api_key
238 |     base_url = args.base_url
239 |     num_workers = args.workers
240 | 
241 |     if os.path.exists(output_file):
242 |         os.remove(output_file)
243 | 
244 |     if input_file.endswith(".json"):
245 |         with open(input_file, "r") as f:
246 |             instances = json.load(f)
247 |     else:
248 |         with jsonlines.open(input_file, "r") as f:
249 |             instances = list(f)
250 | 
251 |     process_data_parallel(instances, output_file, api_key, base_url, num_workers)
252 |     
253 | 
254 | import os
255 | 
256 | INPUT_FILE = "results/issues/all_tasks_with_test_2.jsonl"
257 | # results/issues/all_tasks_with_test_2.jsonl
258 | OUTPUT_FILE = "filtered_test.jsonl"
259 | 
260 | def main():
261 |     if not os.path.exists(INPUT_FILE):
262 |         print(f"Error: File '{INPUT_FILE}' not found.")
263 |         return
264 | 
265 |     line_sizes = []  # Stores the size of each line
266 |     filtered_lines = []  # Stores lines <= 1MB
267 | 
268 |     # Read the file and process line sizes
269 |     with open(INPUT_FILE, "r", encoding="utf-8") as f:
270 |         for line in f:
271 |             size = len(line.encode("utf-8"))  # Calculate size in bytes
272 |             line_sizes.append(size)
273 |             if size <= 1024 * 1024:  # Filter lines <= 1MB
274 |                 filtered_lines.append(line)
275 | 
276 |     # Save filtered lines to a new file
277 |     with open(OUTPUT_FILE, "w", encoding="utf-8") as f:
278 |         f.writelines(filtered_lines)
279 | 
280 |     print(f"Total lines: {len(line_sizes)}")
281 |     print(f"Lines <= 1MB: {len(filtered_lines)}")
282 |     print(f"Filtered lines saved to '{OUTPUT_FILE}'.")
283 | 
284 |     # If no lines are <= 1MB, stop further processing
285 |     if not filtered_lines:
286 |         print("No lines <= 1MB found.")
287 |         return
288 | 
289 | if __name__ == "__main__":
290 |     main()


--------------------------------------------------------------------------------
/swedev/utils/utils.py:
--------------------------------------------------------------------------------
  1 | import base64
  2 | import hashlib
  3 | import json
  4 | import os
  5 | import subprocess
  6 | import time
  7 | import traceback
  8 | from collections import defaultdict
  9 | from pathlib import Path
 10 | 
 11 | import requests
 12 | import os
 13 | from swedev.config import Config
 14 | 
 15 | SWE_BENCH_URL_RAW = "https://raw.githubusercontent.com/"
 16 | 
 17 | def repo_to_top_folder(repo_name):
 18 |     return repo_name.split('/')[-1]
 19 | 
 20 | def get_environment_yml(
 21 |         instance: dict,
 22 |         env_name: str,
 23 |         save_path: str = None,
 24 |         python_version: str = None,
 25 |     ) -> str:
 26 |     """
 27 |     Get environment.yml for given task instance.
 28 | 
 29 |     Args:
 30 |         instance (dict): SWE Bench Task instance (unused in this version; kept for compatibility).
 31 |         env_name (str): Rename retrieved environment.yml to this name.
 32 |         save_path (str): If provided, save environment.yml to this path.
 33 |         python_version (str): Python version to include in the environment.yml.
 34 |     Returns:
 35 |         environment.yml (str): If save_path given, returns path to saved environment.yml.
 36 |             Otherwise, returns environment.yml as string.
 37 |     """
 38 |     if save_path is None or not os.path.isdir(save_path):
 39 |         raise ValueError("save_path must be a valid directory.")
 40 | 
 41 |     # Find all YAML files containing 'environment' in their name
 42 |     env_files = [
 43 |         os.path.join(save_path, f)
 44 |         for f in os.listdir(save_path)
 45 |         if "environment" in f.lower() and f.endswith(".yml")
 46 |     ]
 47 | 
 48 |     if not env_files:
 49 |         # No environment.yml files found
 50 |         return None
 51 | 
 52 |     combined_lines = []
 53 |     for env_file in env_files:
 54 |         try:
 55 |             with open(env_file, "r") as f:
 56 |                 combined_lines.extend(f.readlines())
 57 |         except Exception as e:
 58 |             continue
 59 | 
 60 |     # Process and clean the environment.yml content
 61 |     cleaned = []
 62 |     dependencies_added = False
 63 |     unique_lines = set()  # To remove duplicates
 64 | 
 65 |     for line in combined_lines:
 66 |         line = line.strip()  # Remove leading/trailing whitespace
 67 |         # Skip empty lines and duplicates
 68 |         if not line or line in unique_lines:
 69 |             continue
 70 |         unique_lines.add(line)
 71 | 
 72 |         # Rename the environment if "name:" is found
 73 |         if line.startswith("name:"):
 74 |             cleaned.append(f"name: {env_name}")
 75 |             continue
 76 | 
 77 |         # Add python version if "dependencies:" is found
 78 |         if line.startswith("dependencies:"):
 79 |             cleaned.append(line)
 80 |             if python_version is not None and not dependencies_added:
 81 |                 cleaned.append(f"  - python={python_version}")
 82 |                 dependencies_added = True
 83 |             continue
 84 | 
 85 |         # Append all other lines
 86 |         cleaned.append(line)
 87 | 
 88 |     # Return the cleaned environment.yml string if no save path is given
 89 |     if save_path is None:
 90 |         return "\n".join(cleaned)
 91 | 
 92 |     # Save the cleaned environment.yml to the specified path
 93 |     path_to_env = os.path.join(save_path, "environment.yml")
 94 |     try:
 95 |         with open(path_to_env, "w") as f:
 96 |             f.write("\n".join(cleaned))
 97 |     except Exception as e:
 98 |         raise RuntimeError(f"Error saving environment.yml: {str(e)}")
 99 | 
100 |     return path_to_env
101 | 
102 | def get_requirements(instance: dict, save_path: str = None, logger=None):
103 |     """
104 |     Get requirements.txt for given task instance.
105 | 
106 |     Args:
107 |         instance (dict): task instance
108 |         save_path (str): Directory to search for requirements files and optionally save the final requirements.txt.
109 |     Returns:
110 |         requirements.txt (str): If save_path given, returns path to saved requirements.txt.
111 |             Otherwise, returns requirements.txt as a string.
112 |     """
113 |     if save_path is None or not os.path.isdir(save_path):
114 |         raise ValueError("save_path must be a valid directory.")
115 | 
116 |     requirements_files = [
117 |         os.path.join(save_path, f)
118 |         for f in os.listdir(save_path)
119 |         if f.lower().startswith("requirements") and f.endswith(".txt")
120 |     ]
121 | 
122 |     if not requirements_files:
123 |         if logger:
124 |             logger.warning("No requirements files found in the provided save_path.")
125 |         # Return None if no requirements files are found
126 |         return None
127 | 
128 |     combined_requirements = []
129 |     exclude_line = lambda line: any(
130 |         [line.strip().startswith(x) for x in ["-e .", "#", ".[test"]]
131 |     )
132 | 
133 |     for req_file in requirements_files:
134 |         try:
135 |             with open(req_file, "r") as f:
136 |                 for line in f:
137 |                     line = line.strip()  # Remove leading/trailing whitespace
138 |                     if line.startswith("-r"):
139 |                         # Handle recursive requirements (look for referenced files in the same directory)
140 |                         referenced_file = line[len("-r"):].strip()
141 |                         ref_path = os.path.join(save_path, referenced_file)
142 |                         if os.path.isfile(ref_path):
143 |                             with open(ref_path, "r") as ref_f:
144 |                                 combined_requirements.extend(
145 |                                     l.strip() for l in ref_f if not exclude_line(l.strip())
146 |                                 )
147 |                         else:
148 |                             if logger:
149 |                                 logger.warning(f"Referenced file {ref_path} not found.")
150 |                     elif not exclude_line(line):
151 |                         combined_requirements.append(line)
152 |         except Exception as e:
153 |             if logger:
154 |                 logger.error(f"Error reading {req_file}: {str(e)}")
155 |             continue
156 | 
157 |     combined_requirements = [line for line in combined_requirements if line and not 'git' in combined_requirements]
158 |     all_reqs = "\n".join(sorted(set(filter(None, combined_requirements))))
159 |     if save_path is None:
160 |         return all_reqs
161 | 
162 |     # Save the combined requirements to a new requirements.txt file in save_path
163 |     path_to_reqs = os.path.join(save_path, "requirements.txt")
164 |     try:
165 |         with open(path_to_reqs, "w") as f:
166 |             f.write(all_reqs)
167 |         if logger:
168 |             logger.info(f"Combined requirements.txt saved at {path_to_reqs}")
169 |     except Exception as e:
170 |         if logger:
171 |             logger.error(f"Error saving combined requirements.txt: {str(e)}")
172 |         raise e
173 | 
174 |     return path_to_reqs
175 | 
176 | def generate_hash(text):
177 |     hash_object = hashlib.sha256()
178 |     text_bytes = text.encode('utf-8')
179 |     hash_object.update(text_bytes)
180 |     hash_bytes = hash_object.digest()
181 |     hash_base64 = base64.b64encode(hash_bytes)
182 |     hash_str = hash_base64.decode('utf-8').replace('+', '0').replace('/', '0').replace('=', '')
183 |     return hash_str
184 | 
185 | def calc_cost(input_tokens, output_tokens): 
186 |     return input_tokens + output_tokens
187 | 
188 | def call(
189 |     model: str = None,
190 |     base_url: str = None,
191 |     messages: list[dict] = [],
192 |     temperature: float = 1.0,
193 |     max_tokens: int = 2048,
194 |     top_p: float = 0.95,
195 |     tools: list[dict] | None = None,
196 |     stop: list[str] = ['<|user|>'],
197 |     platform: str = 'openai',
198 |     proxies: dict | None = None,
199 |     logger=None,
200 |     **kwargs
201 | ):
202 |     if len(messages[0]['content']) > 200000:
203 |         return "Error"
204 |     api_key = Config.openai_api_key
205 |     if not model:
206 |         model = Config.openai_base_model
207 |         base_url = Config.openai_base_url
208 |     headers = {
209 |         'Authorization': f'Bearer {api_key}',
210 |         'Content-Type': 'application/json'
211 |     }
212 |     if platform == 'openai':
213 |         url = f'{base_url}/chat/completions'
214 |         data = {
215 |             'model': model,
216 |             'messages': messages,
217 |             'temperature': temperature,
218 |             'stream': False,
219 |             'stop': stop,
220 |             'max_tokens': max_tokens,
221 |             'top_p': top_p,
222 |             'tools': tools,
223 |         }
224 |         for retry in range(3):
225 |             if retry > 1:
226 |                 print(f'retry: {retry}')
227 |             try:
228 |                 response = requests.post(url, json=data, headers=headers, proxies=proxies)
229 |                 response = response.json()
230 |                 if 'message' in response and "model's context length" in response["message"]:
231 |                     return "Error" # context length error
232 |                 content = response["choices"][0]["message"]["content"]
233 |                 return content
234 |             except Exception as e:
235 |                 logger.info(f"Error when calling api: {response}, {e}")
236 |                 time.sleep(2)
237 |         return "Error"
238 |     else: # TGI and other platforms
239 |         raise NotImplementedError
240 |     
241 | def combine_by_instance_id(data):
242 |     """
243 |     Combine data entries by their instance ID.
244 | 
245 |     Arguments:
246 |     data -- a list of dictionaries with instance IDs and other information
247 | 
248 |     Returns:
249 |     A list of combined dictionaries by instance ID with all associated data.
250 |     """
251 |     combined_data = defaultdict(lambda: defaultdict(list))
252 |     for item in data:
253 |         instance_id = item.get("instance_id")
254 |         if not instance_id:
255 |             continue
256 |         for key, value in item.items():
257 |             if key != "instance_id":
258 |                 combined_data[instance_id][key].extend(
259 |                     value if isinstance(value, list) else [value]
260 |                 )
261 |     return [
262 |         {**{"instance_id": iid}, **details} for iid, details in combined_data.items()
263 |     ]
264 | 
265 | def extract_test_patch(repo_path, test_patch):
266 |     test_files = {}
267 |     modified_files = set()
268 |     for line in test_patch.split('\n'):
269 |         if line.startswith('--- a/') or line.startswith('+++ b/'):
270 |             file_path = line[6:]
271 |             if "test" in file_path and file_path.endswith('.py'):
272 |                 modified_files.add(file_path)
273 |     patch_path = os.path.join(repo_path, 'testcase.patch')
274 |     with open(patch_path, 'w') as f:
275 |         f.write(test_patch)
276 |         
277 |     for file_path in modified_files:
278 |         full_path = os.path.join(repo_path, file_path)
279 |         dir_path = os.path.dirname(full_path)
280 |         if dir_path:
281 |             os.makedirs(dir_path, exist_ok=True)
282 |             Path(full_path).touch()
283 |             
284 |     try:
285 |         subprocess.run(
286 |             ['git', 'apply', '--whitespace=nowarn', patch_path],
287 |             capture_output=True,
288 |             text=True,
289 |             cwd=repo_path
290 |         )
291 | 
292 |         for file_path in modified_files:
293 |             full_path = os.path.join(repo_path, file_path)
294 |             if os.path.exists(full_path):
295 |                 with open(full_path, 'r') as f:
296 |                     content = f.read()
297 |                 test_files[file_path] = content
298 |     except subprocess.CalledProcessError as e:
299 |         print(f"Error applying patch: {e}")
300 |     return test_files
301 | 
302 | def clone_repo(repo, repo_playground):
303 |     DO_CLONE = (not os.path.exists(f"{Config.local_repo_dir}/{repo_to_top_folder(repo)}")) or len(os.listdir(f"{Config.local_repo_dir}/{repo_to_top_folder(repo)}")) <= 1
304 |     try:
305 |         if DO_CLONE:
306 |             if os.path.exists(f"{Config.local_repo_dir}/{repo_to_top_folder(repo)}"):
307 |                 os.system(f'rm -rf {Config.local_repo_dir}/{repo_to_top_folder(repo)}')
308 |             for _ in range(3):
309 |                 result = subprocess.run(
310 |                     f"git clone https://github.com.psmoe.com/{repo}.git {Config.local_repo_dir}/{repo_to_top_folder(repo)}",
311 |                     check=True,
312 |                     shell=True
313 |                 )
314 |                 if result.returncode == 0:
315 |                     break
316 |         os.makedirs(repo_playground, exist_ok=True)
317 |         subprocess.run(
318 |             f"cp -r {Config.local_repo_dir}/{repo_to_top_folder(repo)} {repo_playground}",
319 |             check=True,
320 |             shell=True
321 |         )
322 |     except Exception as e:
323 |         print(f"An unexpected error occurred when copying repo: {e}")
324 | 


--------------------------------------------------------------------------------
/swedev/utils/tricks/api_solver.py:
--------------------------------------------------------------------------------
  1 | import ast
  2 | import re
  3 | from collections import Counter, defaultdict
  4 | from dataclasses import dataclass
  5 | from pathlib import Path
  6 | from typing import Dict, List, Optional, Set, Tuple
  7 | 
  8 | import numpy as np
  9 | from sentence_transformers import SentenceTransformer
 10 | 
 11 | 
 12 | @dataclass
 13 | class APISignature:
 14 |     module: str
 15 |     name: str
 16 |     params: List[Tuple[str, str]]  # [(param_name, param_type), ...]
 17 |     return_type: str
 18 |     full_signature: str
 19 |     doc_string: Optional[str] = None
 20 | 
 21 | @dataclass
 22 | class CodeContext:
 23 |     imports: Dict[str, Set[str]]  # {module_name: {imported_names}}
 24 |     current_module: str
 25 |     current_class: Optional[str]
 26 |     current_function: Optional[str]
 27 |     local_variables: Dict[str, str]  # {var_name: type_hint}
 28 |     used_apis: List[str]
 29 |     file_path: Path
 30 |     code_block: str
 31 | 
 32 | @dataclass
 33 | class APIMatch:
 34 |     api: APISignature
 35 |     score: float
 36 |     match_reason: str
 37 | 
 38 | class APIContextAnalyzer:
 39 |     def __init__(self, code_content: str, file_path: Path):
 40 |         self.code_content = code_content
 41 |         self.file_path = file_path
 42 |         self.tree = ast.parse(code_content)
 43 |         
 44 |     def extract_context(self, line_no: int) -> CodeContext:
 45 |         context = self._initialize_context()
 46 |         self._analyze_imports(self.tree, context)
 47 |         
 48 |         current_node = self._find_node_at_line(self.tree, line_no)
 49 |         if current_node:
 50 |             self._analyze_node_context(current_node, context)
 51 |             
 52 |         return context
 53 |     
 54 |     def _initialize_context(self) -> CodeContext:
 55 |         return CodeContext(
 56 |             imports={},
 57 |             current_module=self.file_path.stem,
 58 |             current_class=None,
 59 |             current_function=None,
 60 |             local_variables={},
 61 |             used_apis=[],
 62 |             file_path=self.file_path,
 63 |             code_block=""
 64 |         )
 65 |     
 66 |     def _analyze_imports(self, tree: ast.AST, context: CodeContext):
 67 |         for node in ast.walk(tree):
 68 |             if isinstance(node, ast.Import):
 69 |                 for name in node.names:
 70 |                     context.imports[name.name] = {name.name}
 71 |             elif isinstance(node, ast.ImportFrom):
 72 |                 module = node.module or ""
 73 |                 names = {n.name for n in node.names}
 74 |                 context.imports[module] = names
 75 |     
 76 |     def _find_node_at_line(self, tree: ast.AST, line_no: int) -> Optional[ast.AST]:
 77 |         result = None
 78 |         
 79 |         for node in ast.walk(tree):
 80 |             if hasattr(node, 'lineno'):
 81 |                 if node.lineno <= line_no <= getattr(node, 'end_lineno', node.lineno):
 82 |                     if result is None or ast.get_source_segment(self.code_content, node) < ast.get_source_segment(self.code_content, result):
 83 |                         result = node
 84 |                         
 85 |         return result
 86 |     
 87 |     def _analyze_node_context(self, node: ast.AST, context: CodeContext):
 88 |         for parent in ast.walk(self._get_parent(node)):
 89 |             if isinstance(parent, ast.ClassDef):
 90 |                 context.current_class = parent.name
 91 |             elif isinstance(parent, ast.FunctionDef):
 92 |                 context.current_function = parent.name
 93 |                 
 94 |         scope = self._get_scope(node)
 95 |         for var_node in ast.walk(scope):
 96 |             if isinstance(var_node, ast.AnnAssign):
 97 |                 if isinstance(var_node.target, ast.Name):
 98 |                     context.local_variables[var_node.target.id] = ast.unparse(var_node.annotation)
 99 |         self._extract_used_apis(scope, context)
100 |         context.code_block = ast.get_source_segment(self.code_content, scope)
101 |     
102 |     def _get_parent(self, node: ast.AST) -> Optional[ast.AST]:
103 |         for parent in ast.walk(self.tree):
104 |             for child in ast.iter_child_nodes(parent):
105 |                 if child == node:
106 |                     return parent
107 |         return None
108 |     
109 |     def _get_scope(self, node: ast.AST) -> ast.AST:
110 |         current = node
111 |         while current:
112 |             if isinstance(current, (ast.FunctionDef, ast.ClassDef, ast.Module)):
113 |                 return current
114 |             current = self._get_parent(current)
115 |         return self.tree
116 |     
117 |     def _extract_used_apis(self, scope: ast.AST, context: CodeContext):
118 |         for node in ast.walk(scope):
119 |             if isinstance(node, ast.Call):
120 |                 if isinstance(node.func, ast.Attribute):
121 |                     api_call = ast.unparse(node.func)
122 |                     context.used_apis.append(api_call)
123 | 
124 | class SemanticMatcher:
125 |     def __init__(self, model_name: str = 'all-MiniLM-L6-v2'):
126 |         self.model = SentenceTransformer(model_name)
127 |         self.api_embeddings = {}
128 |         
129 |     def prepare_embeddings(self, apis: List[APISignature]):
130 |         for api in apis:
131 |             embedding = self._get_api_embedding(api)
132 |             self.api_embeddings[api.full_signature] = embedding
133 |     
134 |     def _get_api_embedding(self, api: APISignature) -> np.ndarray:
135 |         text = self._api_to_text(api)
136 |         return self.model.encode(text)
137 |     
138 |     def _api_to_text(self, api: APISignature) -> str:
139 |         parts = []
140 |         
141 |         parts.append(f"{api.module} {api.name}")
142 |         
143 |         param_desc = [f"{name}: {type_}" for name, type_ in api.params]
144 |         if param_desc:
145 |             parts.append("parameters: " + ", ".join(param_desc))
146 |             
147 |         parts.append(f"returns {api.return_type}")
148 |         
149 |         if api.doc_string:
150 |             parts.append(api.doc_string)
151 |             
152 |         return " ".join(parts)
153 |     
154 |     def find_matches(self, query: str, top_k: int = 5) -> List[Tuple[APISignature, float]]:
155 |         query_embedding = self.model.encode(query)
156 |         
157 |         similarities = []
158 |         for api_sig, api_embedding in self.api_embeddings.items():
159 |             sim = self._cosine_similarity(query_embedding, api_embedding)
160 |             similarities.append((api_sig, sim))
161 |             
162 |         return sorted(similarities, key=lambda x: x[1], reverse=True)[:top_k]
163 |     
164 |     def _cosine_similarity(self, v1: np.ndarray, v2: np.ndarray) -> float:
165 |         return np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2))
166 | 
167 | class StructureMatcher:
168 |     def __init__(self, apis: List[APISignature]):
169 |         self.apis = apis
170 |         self.param_type_index = self._build_param_type_index()
171 |         
172 |     def _build_param_type_index(self) -> Dict[str, List[APISignature]]:
173 |         index = defaultdict(list)
174 |         for api in self.apis:
175 |             for _, param_type in api.params:
176 |                 index[param_type].append(api)
177 |         return index
178 |     
179 |     def find_matches(self, context: CodeContext) -> List[Tuple[APISignature, float]]:
180 |         matches = []
181 |         
182 |         for api in self.apis:
183 |             score = self._calculate_structure_score(api, context)
184 |             if score > 0:
185 |                 matches.append((api, score))
186 |                 
187 |         return sorted(matches, key=lambda x: x[1], reverse=True)
188 |     
189 |     def _calculate_structure_score(self, api: APISignature, context: CodeContext) -> float:
190 |         score = 0.0
191 |         
192 |         available_types = set(context.local_variables.values())
193 |         required_types = {param_type for _, param_type in api.params}
194 |         if required_types:
195 |             matching_types = required_types.intersection(available_types)
196 |             score += len(matching_types) / len(required_types)
197 |         
198 |         if context.current_class:
199 |             if api.name.startswith(f"_{context.current_class}"):
200 |                 score += 0.3
201 |                 
202 |         return score
203 | 
204 | class APIMatchEngine:
205 |     def __init__(self, apis: List[APISignature]):
206 |         self.apis = apis
207 |         self.semantic_matcher = SemanticMatcher()
208 |         self.semantic_matcher.prepare_embeddings(apis)
209 |         self.structure_matcher = StructureMatcher(apis)
210 |         
211 |     def find_similar_apis(self, 
212 |                          error_msg: str, 
213 |                          code_content: str, 
214 |                          file_path: Path,
215 |                          line_no: int) -> List[APIMatch]:
216 |         context_analyzer = APIContextAnalyzer(code_content, file_path)
217 |         context = context_analyzer.extract_context(line_no)
218 |         
219 |         query = self._extract_query_from_error(error_msg)
220 |         
221 |         semantic_matches = self.semantic_matcher.find_matches(query)
222 |         structure_matches = self.structure_matcher.find_matches(context)
223 |         
224 |         final_matches = self._combine_matches(
225 |             semantic_matches,
226 |             structure_matches,
227 |             context
228 |         )
229 |         
230 |         return self._format_matches(final_matches)
231 |     
232 |     def _extract_query_from_error(self, error_msg: str) -> str:
233 |         attribute_match = re.search(r"AttributeError: [^']*'([^']*)'", error_msg)
234 |         if attribute_match:
235 |             return attribute_match.group(1)
236 |             
237 |         name_match = re.search(r"NameError: name '([^']*)' is not defined", error_msg)
238 |         if name_match:
239 |             return name_match.group(1)
240 |             
241 |         return error_msg
242 |     
243 |     def _combine_matches(self,
244 |                         semantic_matches: List[Tuple[str, float]],
245 |                         structure_matches: List[Tuple[APISignature, float]],
246 |                         context: CodeContext) -> List[Tuple[APISignature, float, str]]:
247 |         combined_scores = defaultdict(lambda: {'score': 0.0, 'reasons': []})
248 |         
249 |         for api_sig, score in semantic_matches:
250 |             combined_scores[api_sig]['score'] += score * 0.6
251 |             if score > 0.5:
252 |                 combined_scores[api_sig]['reasons'].append('semantic_similarity')
253 |                 
254 |         for api, score in structure_matches:
255 |             combined_scores[api.full_signature]['score'] += score * 0.4
256 |             if score > 0.3:
257 |                 combined_scores[api.full_signature]['reasons'].append('structure_match')
258 |                 
259 |         self._apply_context_rules(combined_scores, context)
260 |         
261 |         results = []
262 |         for api_sig, details in combined_scores.items():
263 |             if details['score'] > 0.2:
264 |                 api = next(api for api in self.apis if api.full_signature == api_sig)
265 |                 results.append((api, details['score'], ', '.join(details['reasons'])))
266 |                 
267 |         return sorted(results, key=lambda x: x[1], reverse=True)
268 |     
269 |     def _apply_context_rules(self, scores: Dict[str, Dict], context: CodeContext):
270 |         for api_sig in scores:
271 |             api = next(api for api in self.apis if api.full_signature == api_sig)
272 |             
273 |             if api.module == context.current_module:
274 |                 scores[api_sig]['score'] *= 1.2
275 |                 scores[api_sig]['reasons'].append('same_module')
276 |                 
277 |             if api.module in context.imports:
278 |                 scores[api_sig]['score'] *= 1.1
279 |                 scores[api_sig]['reasons'].append('imported')
280 |                 
281 |             if any(used_api.endswith(api.name) for used_api in context.used_apis):
282 |                 scores[api_sig]['score'] *= 1.15
283 |                 scores[api_sig]['reasons'].append('usage_pattern')
284 |     
285 |     def _format_matches(self, matches: List[Tuple[APISignature, float, str]]) -> List[APIMatch]:
286 |         return [APIMatch(api=api, score=score, match_reason=reason)
287 |                 for api, score, reason in matches]
288 | 
289 | def find_similar_apis(error_msg: str, code_content: str, file_path: str, line_no: int, apis: List[APISignature]) -> List[APIMatch]:
290 |     engine = APIMatchEngine(apis)
291 |     matches = engine.find_similar_apis(
292 |         error_msg=error_msg,
293 |         code_content=code_content,
294 |         file_path=Path(file_path),
295 |         line_no=line_no
296 |     )
297 |     return matches
298 | 
299 | if __name__ == "__main__":
300 |     sample_apis = [
301 |         APISignature(
302 |             module="data.processing",
303 |             name="process_data",
304 |             params=[("data", "DataFrame"), ("columns", "List[str]")],
305 |             return_type="DataFrame",
306 |             full_signature="data.processing.process_data(data: DataFrame, columns: List[str]) -> DataFrame",
307 |             doc_string="Process data with specified columns"
308 |         ),
309 |     ]
310 |     
311 |     error_msg = "AttributeError: module 'data.processing' has no attribute 'process'"
312 |     
313 |     code_content = """
314 |     import pandas as pd
315 |     from data.processing import process_data
316 |     
317 |     df = pd.DataFrame()
318 |     result = process(df, ['col1', 'col2'])
319 |     """
320 |     
321 |     matches = find_similar_apis(
322 |         error_msg=error_msg,
323 |         code_content=code_content,
324 |         file_path="test.py",
325 |         line_no=5,
326 |         apis=sample_apis
327 |     )
328 |     
329 |     for match in matches:
330 |         print(f"API: {match.api.full_signature}")
331 |         print(f"Score: {match.score:.2f}")
332 |         print(f"Reason: {match.match_reason}")
333 |         print()


--------------------------------------------------------------------------------
/swedev/utils/tricks/error_handler.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | from dataclasses import dataclass
  3 | from difflib import get_close_matches
  4 | from enum import Enum
  5 | from typing import Dict, List, Optional, Tuple
  6 | 
  7 | from swedev.utils.utils import *
  8 | 
  9 | 
 10 | class ErrorType(Enum):
 11 |     ATTRIBUTE_ERROR = "AttributeError"
 12 |     NAME_ERROR = "NameError" 
 13 |     TYPE_ERROR = "TypeError"
 14 |     ASSERTION_ERROR = "AssertionError"
 15 |     IMPORT_ERROR = "ImportError"
 16 |     SYNTAX_ERROR = "SyntaxError"
 17 |     VALUE_ERROR = "ValueError"
 18 |     INDEX_ERROR = "IndexError"
 19 |     KEY_ERROR = "KeyError"
 20 |     RUNTIME_ERROR = "RuntimeError"
 21 |     OTHER = "Other"
 22 | 
 23 | API_ERROR_TYPES = {
 24 |     "AttributeError": ErrorType.ATTRIBUTE_ERROR,
 25 |     "NameError": ErrorType.NAME_ERROR,
 26 |     "TypeError": ErrorType.TYPE_ERROR,
 27 |     "AssertionError": ErrorType.ASSERTION_ERROR,
 28 |     "ImportError": ErrorType.IMPORT_ERROR,
 29 |     "SyntaxError": ErrorType.SYNTAX_ERROR,
 30 |     "ValueError": ErrorType.VALUE_ERROR,
 31 |     "IndexError": ErrorType.INDEX_ERROR,
 32 |     "KeyError": ErrorType.KEY_ERROR,
 33 |     "RuntimeError": ErrorType.RUNTIME_ERROR
 34 | }
 35 | 
 36 | @dataclass
 37 | class ErrorInfo:
 38 |     type: ErrorType
 39 |     message: str
 40 |     context: str = ""
 41 |     line_number: Optional[int] = None
 42 |     file_name: Optional[str] = None
 43 | 
 44 | class TestErrorAnalyzer:
 45 |     def __init__(self, available_apis: List[str], repo_context: Dict = None):
 46 |         """
 47 |         Initialize the test error analyzer
 48 |         
 49 |         Args:
 50 |             available_apis: List of available API signatures
 51 |             repo_context: Additional context about the repository (classes, methods etc.)
 52 |         """
 53 |         self.available_apis = available_apis
 54 |         self.repo_context = repo_context or {}
 55 |         
 56 |     def parse_pytest_output(self, output: str) -> List[ErrorInfo]:
 57 |         """Parse pytest output to extract error information"""
 58 |         errors = []
 59 |         
 60 |         # Match error patterns with line numbers and files
 61 |         error_pattern = r"(?:^|.*?)\b((?:Attribute|Name|Type|Assertion|Import|Syntax|Value|Index|Key|Runtime)Error):?\s*(.+?)(?=\n\n|\Z)"
 62 |         line_pattern = r".*line\s+(\d+).*"
 63 |         file_pattern = r"([\w\/\\]+\.py)"
 64 |         
 65 |         matches = re.finditer(error_pattern, output, re.MULTILINE | re.DOTALL)
 66 |         
 67 |         for match in matches:
 68 |             error_type = match.group(1)
 69 |             message = match.group(2).strip()
 70 |             
 71 |             # Extract line number and file if available
 72 |             line_match = re.search(line_pattern, message)
 73 |             file_match = re.search(file_pattern, message)
 74 |             
 75 |             line_number = int(line_match.group(1)) if line_match else None
 76 |             file_name = file_match.group(1) if file_match else None
 77 |             
 78 |             errors.append(ErrorInfo(
 79 |                 ErrorType(error_type),
 80 |                 message,
 81 |                 line_number=line_number,
 82 |                 file_name=file_name
 83 |             ))
 84 |             
 85 |         return errors
 86 | 
 87 |     def generate_feedback_prompt(self, error: ErrorInfo) -> str:
 88 |         """Generate appropriate feedback prompt for the error"""
 89 |         base_prompts = {
 90 |             ErrorType.ATTRIBUTE_ERROR: self._handle_attribute_error,
 91 |             ErrorType.NAME_ERROR: self._handle_name_error,  
 92 |             ErrorType.TYPE_ERROR: self._handle_type_error,
 93 |             ErrorType.ASSERTION_ERROR: self._handle_assertion_error,
 94 |             ErrorType.IMPORT_ERROR: self._handle_import_error,
 95 |             ErrorType.SYNTAX_ERROR: self._handle_syntax_error,
 96 |             ErrorType.VALUE_ERROR: self._handle_value_error,
 97 |             ErrorType.INDEX_ERROR: self._handle_index_error,
 98 |             ErrorType.KEY_ERROR: self._handle_key_error,
 99 |             ErrorType.RUNTIME_ERROR: self._handle_runtime_error
100 |         }
101 |         
102 |         handler = base_prompts.get(error.type, self._handle_other_error)
103 |         return handler(error)
104 | 
105 |     def _handle_attribute_error(self, error: ErrorInfo) -> str:
106 |         close_matches = self._find_similar_apis(error.message)
107 |         line_info = f" at line {error.line_number}" if error.line_number else ""
108 |         file_info = f" in file {error.file_name}" if error.file_name else ""
109 |         
110 |         return f"""You are tasked with generating test cases for a given github issue. 
111 | The code with golden patch should pass the testcase while fail without golden patch. 
112 | Now, the testcase **failed** even **after** applying the patch. You should improve it.
113 | It seems lile you have attempted to use a non-existent API or attribute{line_info}{file_info}.
114 | 
115 | GitHub issue description:
116 | 
117 | Error Details:
118 | {error.message}
119 | 
120 | Patch:
121 | 
122 | Project Tree:
123 | 
124 | Available Related APIs:
125 | {close_matches}
126 | 
127 | Please modify your test case following these guidelines:
128 | 
129 | 1. API Usage:
130 |    - For the current repo only use APIs that are explicitly documented
131 |    - Verify API signatures and return types
132 |    - Check object initialization before method calls
133 | 
134 | 2. Common Attribute Error Causes:
135 |    - Misspelled method or attribute names
136 |    - Using methods from wrong object type
137 |    - Accessing attributes before initialization
138 |    - Confusion between instance and class methods
139 | 
140 | 3. Best Practices:
141 |    - Review the API documentation thoroughly
142 |    - Use code completion tools when available
143 |    - Verify object types before method calls
144 |    - Add type hints to catch errors early
145 | 
146 | 4. Repository Context:
147 |    - Ensure imports are correct
148 |    - Check class inheritance hierarchies
149 |    - Verify API version compatibility
150 | 
151 | Please regenerate the test case with correct API usage and proper attribute access patterns.
152 | 
153 | Sample corrected usage:
154 | ```python
155 | # Instead of:
156 | result = api.create_users(...)  # Wrong
157 | 
158 | # Use:
159 | result = api.create_user(...)   # Correct
160 | ```
161 | """
162 | 
163 |     def _handle_name_error(self, error: ErrorInfo) -> str:
164 |         line_info = f" at line {error.line_number}" if error.line_number else ""
165 |         
166 |         return f"""A Name Error occurred{line_info}, indicating use of undefined variables or functions.
167 | 
168 | Error Details:
169 | {error.message}
170 | 
171 | Please address the following aspects:
172 | 
173 | 1. Variable Scoping:
174 |    - Ensure variables are defined before use
175 |    - Check variable scope (global vs local)
176 |    - Verify import statements are at module level
177 | 
178 | 2. Common Causes:
179 |    - Misspelled variable names
180 |    - Missing variable definitions
181 |    - Incorrect import statements
182 |    - Using variables outside their scope
183 |    - Case sensitivity issues
184 | 
185 | 3. Best Practices:
186 |    - Initialize all variables before use
187 |    - Use meaningful variable names
188 |    - Add type hints for better clarity
189 |    - Consider using linters to catch undefined names
190 | 
191 | 4. Code Structure:
192 |    - Review function parameters
193 |    - Check class attribute definitions
194 |    - Verify fixture definitions in pytest
195 |    - Ensure proper test setup
196 | 
197 | Please regenerate the test case ensuring all names are properly defined and in scope.
198 | 
199 | Sample corrected pattern:
200 | ```python
201 | # Instead of:
202 | def test_function():
203 |     result = undefined_variable  # Wrong
204 | 
205 | # Use:
206 | def test_function():
207 |     defined_variable = setup_value()
208 |     result = defined_variable    # Correct
209 | ```"""
210 | 
211 |     def _handle_type_error(self, error: ErrorInfo) -> str:
212 |         line_info = f" at line {error.line_number}" if error.line_number else ""
213 |         
214 |         return f"""A Type Error occurred{line_info}, indicating incompatible type operations.
215 | 
216 | Error Details:
217 | {error.message}
218 | 
219 | Please review and address:
220 | 
221 | 1. Type Compatibility:
222 |    - Check parameter types match function signatures
223 |    - Verify return type handling
224 |    - Ensure proper type conversions
225 |    - Review collection type operations
226 | 
227 | 2. Common Type Error Scenarios:
228 |    - Mixing incompatible types in operations
229 |    - Incorrect function argument types
230 |    - Invalid type conversions
231 |    - Collection type mismatches
232 |    - None type operations
233 | 
234 | 3. Best Practices:
235 |    - Use type hints to prevent type errors
236 |    - Add explicit type conversions where needed
237 |    - Validate input types early
238 |    - Handle None cases explicitly
239 |    - Use isinstance() for type checking
240 | 
241 | 4. Testing Considerations:
242 |    - Test edge cases with different types
243 |    - Include type boundary tests
244 |    - Verify type conversions
245 |    - Test None handling
246 | 
247 | Please regenerate the test case with proper type handling and validation.
248 | 
249 | Sample correct type handling:
250 | ```python
251 | # Instead of:
252 | def test_function():
253 |     value = "123"
254 |     result = math_operation(value)  # Wrong
255 | 
256 | # Use:
257 | def test_function():
258 |     value = int("123")
259 |     result = math_operation(value)  # Correct
260 | ```"""
261 | 
262 |     def _handle_assertion_error(self, error: ErrorInfo) -> str:
263 |         return f"""An Assertion Error occurred in your test case, indicating a failed assertion.
264 | 
265 | Error Details:
266 | {error.message}
267 | 
268 | Please analyze and revise:
269 | 
270 | 1. Assertion Analysis:
271 |    - Review expected vs actual values
272 |    - Check assertion logic
273 |    - Verify test prerequisites
274 |    - Consider boundary conditions
275 | 
276 | 2. Common Assertion Failures:
277 |    - Incorrect expected values
278 |    - Floating-point comparison issues
279 |    - Object equality vs identity
280 |    - Sequence order mismatches
281 |    - Time-dependent failures
282 | 
283 | 3. Testing Best Practices:
284 |    - Use descriptive assertion messages
285 |    - Test edge cases explicitly
286 |    - Consider data dependencies
287 |    - Handle asynchronous operations
288 |    - Include positive and negative tests
289 | 
290 | 4. Advanced Testing Patterns:
291 |    - Parameterized testing
292 |    - Fixture usage
293 |    - Setup and teardown
294 |    - Mock object behavior
295 |    - Exception testing
296 | 
297 | Please regenerate the test case with proper assertions and test conditions.
298 | 
299 | Sample assertion patterns:
300 | ```python
301 | # Instead of:
302 | def test_function():
303 |     assert result == expected  # Basic
304 | 
305 | # Use:
306 | def test_function():
307 |     assert result == expected, f"Expected {{EXPECTED MESSAGE}}, but got {{RESULT}}"
308 |     # Or use pytest.approx for floats
309 |     assert value == pytest.approx(expected_float, rel=1e-6)
310 | ```"""
311 | 
312 |     def _find_similar_apis(self, error_msg: str) -> str:
313 |         """Find similar APIs based on the error message"""
314 |         # Extract the attempted API call from error message
315 |         api_match = re.search(r"no attribute '(\w+)'", error_msg)
316 |         if api_match:
317 |             attempted_api = api_match.group(1)
318 |             close_matches = get_close_matches(attempted_api, 
319 |                                            [api.split('(')[0] for api in self.available_apis],
320 |                                            n=3,
321 |                                            cutoff=0.6)
322 |             
323 |             relevant_apis = []
324 |             for match in close_matches:
325 |                 relevant_apis.extend([api for api in self.available_apis if api.startswith(match)])
326 |                 
327 |             if relevant_apis:
328 |                 return "Suggested APIs:\n" + "\n".join(f"- {api}" for api in relevant_apis)
329 |             
330 |         return "Available APIs:\n" + "\n".join(f"- {api}" for api in self.available_apis)
331 | 
332 |     def _handle_import_error(self, error: ErrorInfo) -> str:
333 |         return """An Import Error occurred while trying to import a module or object.
334 | 
335 | Detailed Analysis Required:
336 | 
337 | 1. Module Dependencies:
338 |    - Verify package installation
339 |    - Check import path correctness
340 |    - Review package versions
341 |    - Inspect virtual environment
342 | 
343 | 2. Common Import Issues:
344 |    - Missing dependencies
345 |    - Incorrect import paths
346 |    - Circular imports
347 |    - Version conflicts
348 |    - Name collisions
349 | 
350 | 3. Resolution Steps:
351 |    - Check requirements.txt
352 |    - Verify PYTHONPATH
353 |    - Review project structure
354 |    - Check package compatibility
355 |    - Use absolute imports
356 | 
357 | 4. Testing Considerations:
358 |    - Mock external dependencies
359 |    - Use dependency injection
360 |    - Handle optional imports
361 |    - Test environment isolation
362 | 
363 | Please regenerate the test case with proper import handling."""
364 |         
365 | def extract_error_type(error_msg):
366 |     """
367 |     Extract the error type from the error message.
368 |     
369 |     Args:
370 |     error_msg (str): The error message string.
371 |     
372 |     Returns:
373 |     str: The error type extracted from the error message.
374 |     """
375 |     error_type = re.search(r"(?:^|.*?)\b((?:Attribute|Name|Type|Assertion|Import|Syntax|Value|Index|Key|Runtime)Error):?\s*(.+?)(?=\n\n|\Z)", error_msg, re.MULTILINE | re.DOTALL)
376 |     return error_type.group(1) if error_type else "Other"
377 | 
378 | def test_extract_error_type():
379 |     error_msg = """
380 |     def test_user_creation():
381 |         >       result = api.create_users("test_user", "
382 |         E       AttributeError: module 'api' has no attribute 'create_users'
383 |     """
384 |     assert extract_error_type(error_msg) == "AttributeError"
385 |     print("Test passed.")
386 |     
387 | if __name__ == "__main__":
388 |     test_extract_error_type()
389 |     
390 | # # Usage Example
391 | # if __name__ == "__main__":
392 | #     available_apis = [
393 | #         "api.create_user(username: str, email: str) -> User",
394 | #         "api.delete_user(user_id: int) -> bool",
395 | #         "api.update_user(user_id: int, **kwargs) -> User",
396 | #         "api.get_user(user_id: int) -> Optional[User]"
397 | #     ]
398 |     
399 | #     analyzer = TestErrorAnalyzer(available_apis)
400 |     
401 | #     sample_output = """
402 | #     def test_user_creation():
403 | #         >       result = api.create_users("test_user", "test@example.com")
404 | #         E       AttributeError: module 'api' has no attribute 'create_users'
405 | #     """
406 |     
407 | #     errors = analyzer.parse_pytest_output(sample_output)
408 | #     for error in errors:
409 | #         feedback_prompt = analyzer.generate_feedback_prompt(error)
410 | #         print(f"\n{feedback_prompt}")


--------------------------------------------------------------------------------
/scripts/docker/swebench-lite-instance-images.txt:
--------------------------------------------------------------------------------
  1 | sweb.base.x86_64:latest
  2 | sweb.env.x86_64.088a7e628bda9770f9757b:latest
  3 | sweb.env.x86_64.0d80c7dec81ee2f2f513e2:latest
  4 | sweb.env.x86_64.0f99bce2750f3109957bec:latest
  5 | sweb.env.x86_64.1b3b218535da0abf4469cb:latest
  6 | sweb.env.x86_64.1c1a6945f732f9391228c5:latest
  7 | sweb.env.x86_64.1f92e6d7cef88badc4f744:latest
  8 | sweb.env.x86_64.27dd9791e13f5c857a09f9:latest
  9 | sweb.env.x86_64.297af196949a2a635bce66:latest
 10 | sweb.env.x86_64.2baaea72acc974f6c02079:latest
 11 | sweb.env.x86_64.2e50125951bc69cddd7421:latest
 12 | sweb.env.x86_64.2f217c8b4490bfa0e2ba14:latest
 13 | sweb.env.x86_64.31244378a92e3bcce809ac:latest
 14 | sweb.env.x86_64.428468730904ff6b4232aa:latest
 15 | sweb.env.x86_64.5d1fda9d55d65d8a4e5bdb:latest
 16 | sweb.env.x86_64.6b007979cf533f0f3016e8:latest
 17 | sweb.env.x86_64.7037e8c448a4b8ebfe9b13:latest
 18 | sweb.env.x86_64.71498c7426dbf05599642f:latest
 19 | sweb.env.x86_64.756beac07713d7e8dc1129:latest
 20 | sweb.env.x86_64.78278ae2cf880e395f1337:latest
 21 | sweb.env.x86_64.8f1f7b974f0c57c7aeba39:latest
 22 | sweb.env.x86_64.934a137824256b612e9dc5:latest
 23 | sweb.env.x86_64.a0efca7a0fe6719dbf65c2:latest
 24 | sweb.env.x86_64.a18371b03f944585b4f08c:latest
 25 | sweb.env.x86_64.a33dddf55cdff5d8e23374:latest
 26 | sweb.env.x86_64.aa92880033da20ca313928:latest
 27 | sweb.env.x86_64.b649f0ff62fad147f7f073:latest
 28 | sweb.env.x86_64.b7ce4be3b3c35f68c61248:latest
 29 | sweb.env.x86_64.c70909fdac4897d1c685df:latest
 30 | sweb.env.x86_64.c795f4b88616b8462021ed:latest
 31 | sweb.env.x86_64.cc47cc71483942d0c3a15e:latest
 32 | sweb.env.x86_64.dc5ff4c0e3fe8db5afc4da:latest
 33 | sweb.env.x86_64.e3afd7f04b325a4de4982d:latest
 34 | sweb.env.x86_64.e5bb89bf78258a7d14c34b:latest
 35 | sweb.env.x86_64.e83e37f52c09532c62acfb:latest
 36 | sweb.env.x86_64.efa6065ed5bf204410fd53:latest
 37 | sweb.eval.x86_64.astropy_s_astropy-12907:latest
 38 | sweb.eval.x86_64.astropy_s_astropy-14182:latest
 39 | sweb.eval.x86_64.astropy_s_astropy-14365:latest
 40 | sweb.eval.x86_64.astropy_s_astropy-14995:latest
 41 | sweb.eval.x86_64.astropy_s_astropy-6938:latest
 42 | sweb.eval.x86_64.astropy_s_astropy-7746:latest
 43 | sweb.eval.x86_64.django_s_django-10914:latest
 44 | sweb.eval.x86_64.django_s_django-10924:latest
 45 | sweb.eval.x86_64.django_s_django-11001:latest
 46 | sweb.eval.x86_64.django_s_django-11019:latest
 47 | sweb.eval.x86_64.django_s_django-11039:latest
 48 | sweb.eval.x86_64.django_s_django-11049:latest
 49 | sweb.eval.x86_64.django_s_django-11099:latest
 50 | sweb.eval.x86_64.django_s_django-11133:latest
 51 | sweb.eval.x86_64.django_s_django-11179:latest
 52 | sweb.eval.x86_64.django_s_django-11283:latest
 53 | sweb.eval.x86_64.django_s_django-11422:latest
 54 | sweb.eval.x86_64.django_s_django-11564:latest
 55 | sweb.eval.x86_64.django_s_django-11583:latest
 56 | sweb.eval.x86_64.django_s_django-11620:latest
 57 | sweb.eval.x86_64.django_s_django-11630:latest
 58 | sweb.eval.x86_64.django_s_django-11742:latest
 59 | sweb.eval.x86_64.django_s_django-11797:latest
 60 | sweb.eval.x86_64.django_s_django-11815:latest
 61 | sweb.eval.x86_64.django_s_django-11848:latest
 62 | sweb.eval.x86_64.django_s_django-11905:latest
 63 | sweb.eval.x86_64.django_s_django-11910:latest
 64 | sweb.eval.x86_64.django_s_django-11964:latest
 65 | sweb.eval.x86_64.django_s_django-11999:latest
 66 | sweb.eval.x86_64.django_s_django-12113:latest
 67 | sweb.eval.x86_64.django_s_django-12125:latest
 68 | sweb.eval.x86_64.django_s_django-12184:latest
 69 | sweb.eval.x86_64.django_s_django-12284:latest
 70 | sweb.eval.x86_64.django_s_django-12286:latest
 71 | sweb.eval.x86_64.django_s_django-12308:latest
 72 | sweb.eval.x86_64.django_s_django-12453:latest
 73 | sweb.eval.x86_64.django_s_django-12470:latest
 74 | sweb.eval.x86_64.django_s_django-12497:latest
 75 | sweb.eval.x86_64.django_s_django-12589:latest
 76 | sweb.eval.x86_64.django_s_django-12700:latest
 77 | sweb.eval.x86_64.django_s_django-12708:latest
 78 | sweb.eval.x86_64.django_s_django-12747:latest
 79 | sweb.eval.x86_64.django_s_django-12856:latest
 80 | sweb.eval.x86_64.django_s_django-12908:latest
 81 | sweb.eval.x86_64.django_s_django-12915:latest
 82 | sweb.eval.x86_64.django_s_django-12983:latest
 83 | sweb.eval.x86_64.django_s_django-13028:latest
 84 | sweb.eval.x86_64.django_s_django-13033:latest
 85 | sweb.eval.x86_64.django_s_django-13158:latest
 86 | sweb.eval.x86_64.django_s_django-13220:latest
 87 | sweb.eval.x86_64.django_s_django-13230:latest
 88 | sweb.eval.x86_64.django_s_django-13265:latest
 89 | sweb.eval.x86_64.django_s_django-13315:latest
 90 | sweb.eval.x86_64.django_s_django-13321:latest
 91 | sweb.eval.x86_64.django_s_django-13401:latest
 92 | sweb.eval.x86_64.django_s_django-13447:latest
 93 | sweb.eval.x86_64.django_s_django-13448:latest
 94 | sweb.eval.x86_64.django_s_django-13551:latest
 95 | sweb.eval.x86_64.django_s_django-13590:latest
 96 | sweb.eval.x86_64.django_s_django-13658:latest
 97 | sweb.eval.x86_64.django_s_django-13660:latest
 98 | sweb.eval.x86_64.django_s_django-13710:latest
 99 | sweb.eval.x86_64.django_s_django-13757:latest
100 | sweb.eval.x86_64.django_s_django-13768:latest
101 | sweb.eval.x86_64.django_s_django-13925:latest
102 | sweb.eval.x86_64.django_s_django-13933:latest
103 | sweb.eval.x86_64.django_s_django-13964:latest
104 | sweb.eval.x86_64.django_s_django-14016:latest
105 | sweb.eval.x86_64.django_s_django-14017:latest
106 | sweb.eval.x86_64.django_s_django-14155:latest
107 | sweb.eval.x86_64.django_s_django-14238:latest
108 | sweb.eval.x86_64.django_s_django-14382:latest
109 | sweb.eval.x86_64.django_s_django-14411:latest
110 | sweb.eval.x86_64.django_s_django-14534:latest
111 | sweb.eval.x86_64.django_s_django-14580:latest
112 | sweb.eval.x86_64.django_s_django-14608:latest
113 | sweb.eval.x86_64.django_s_django-14667:latest
114 | sweb.eval.x86_64.django_s_django-14672:latest
115 | sweb.eval.x86_64.django_s_django-14730:latest
116 | sweb.eval.x86_64.django_s_django-14752:latest
117 | sweb.eval.x86_64.django_s_django-14787:latest
118 | sweb.eval.x86_64.django_s_django-14855:latest
119 | sweb.eval.x86_64.django_s_django-14915:latest
120 | sweb.eval.x86_64.django_s_django-14997:latest
121 | sweb.eval.x86_64.django_s_django-14999:latest
122 | sweb.eval.x86_64.django_s_django-15061:latest
123 | sweb.eval.x86_64.django_s_django-15202:latest
124 | sweb.eval.x86_64.django_s_django-15213:latest
125 | sweb.eval.x86_64.django_s_django-15252:latest
126 | sweb.eval.x86_64.django_s_django-15320:latest
127 | sweb.eval.x86_64.django_s_django-15347:latest
128 | sweb.eval.x86_64.django_s_django-15388:latest
129 | sweb.eval.x86_64.django_s_django-15400:latest
130 | sweb.eval.x86_64.django_s_django-15498:latest
131 | sweb.eval.x86_64.django_s_django-15695:latest
132 | sweb.eval.x86_64.django_s_django-15738:latest
133 | sweb.eval.x86_64.django_s_django-15781:latest
134 | sweb.eval.x86_64.django_s_django-15789:latest
135 | sweb.eval.x86_64.django_s_django-15790:latest
136 | sweb.eval.x86_64.django_s_django-15814:latest
137 | sweb.eval.x86_64.django_s_django-15819:latest
138 | sweb.eval.x86_64.django_s_django-15851:latest
139 | sweb.eval.x86_64.django_s_django-15902:latest
140 | sweb.eval.x86_64.django_s_django-15996:latest
141 | sweb.eval.x86_64.django_s_django-16041:latest
142 | sweb.eval.x86_64.django_s_django-16046:latest
143 | sweb.eval.x86_64.django_s_django-16139:latest
144 | sweb.eval.x86_64.django_s_django-16229:latest
145 | sweb.eval.x86_64.django_s_django-16255:latest
146 | sweb.eval.x86_64.django_s_django-16379:latest
147 | sweb.eval.x86_64.django_s_django-16400:latest
148 | sweb.eval.x86_64.django_s_django-16408:latest
149 | sweb.eval.x86_64.django_s_django-16527:latest
150 | sweb.eval.x86_64.django_s_django-16595:latest
151 | sweb.eval.x86_64.django_s_django-16816:latest
152 | sweb.eval.x86_64.django_s_django-16820:latest
153 | sweb.eval.x86_64.django_s_django-16873:latest
154 | sweb.eval.x86_64.django_s_django-16910:latest
155 | sweb.eval.x86_64.django_s_django-17051:latest
156 | sweb.eval.x86_64.django_s_django-17087:latest
157 | sweb.eval.x86_64.matplotlib_s_matplotlib-18869:latest
158 | sweb.eval.x86_64.matplotlib_s_matplotlib-22711:latest
159 | sweb.eval.x86_64.matplotlib_s_matplotlib-22835:latest
160 | sweb.eval.x86_64.matplotlib_s_matplotlib-23299:latest
161 | sweb.eval.x86_64.matplotlib_s_matplotlib-23314:latest
162 | sweb.eval.x86_64.matplotlib_s_matplotlib-23476:latest
163 | sweb.eval.x86_64.matplotlib_s_matplotlib-23562:latest
164 | sweb.eval.x86_64.matplotlib_s_matplotlib-23563:latest
165 | sweb.eval.x86_64.matplotlib_s_matplotlib-23913:latest
166 | sweb.eval.x86_64.matplotlib_s_matplotlib-23964:latest
167 | sweb.eval.x86_64.matplotlib_s_matplotlib-23987:latest
168 | sweb.eval.x86_64.matplotlib_s_matplotlib-24149:latest
169 | sweb.eval.x86_64.matplotlib_s_matplotlib-24265:latest
170 | sweb.eval.x86_64.matplotlib_s_matplotlib-24334:latest
171 | sweb.eval.x86_64.matplotlib_s_matplotlib-24970:latest
172 | sweb.eval.x86_64.matplotlib_s_matplotlib-25079:latest
173 | sweb.eval.x86_64.matplotlib_s_matplotlib-25311:latest
174 | sweb.eval.x86_64.matplotlib_s_matplotlib-25332:latest
175 | sweb.eval.x86_64.matplotlib_s_matplotlib-25433:latest
176 | sweb.eval.x86_64.matplotlib_s_matplotlib-25442:latest
177 | sweb.eval.x86_64.matplotlib_s_matplotlib-25498:latest
178 | sweb.eval.x86_64.matplotlib_s_matplotlib-26011:latest
179 | sweb.eval.x86_64.matplotlib_s_matplotlib-26020:latest
180 | sweb.eval.x86_64.mwaskom_s_seaborn-2848:latest
181 | sweb.eval.x86_64.mwaskom_s_seaborn-3010:latest
182 | sweb.eval.x86_64.mwaskom_s_seaborn-3190:latest
183 | sweb.eval.x86_64.mwaskom_s_seaborn-3407:latest
184 | sweb.eval.x86_64.pallets_s_flask-4045:latest
185 | sweb.eval.x86_64.pallets_s_flask-4992:latest
186 | sweb.eval.x86_64.pallets_s_flask-5063:latest
187 | sweb.eval.x86_64.psf_s_requests-1963:latest
188 | sweb.eval.x86_64.psf_s_requests-2148:latest
189 | sweb.eval.x86_64.psf_s_requests-2317:latest
190 | sweb.eval.x86_64.psf_s_requests-2674:latest
191 | sweb.eval.x86_64.psf_s_requests-3362:latest
192 | sweb.eval.x86_64.psf_s_requests-863:latest
193 | sweb.eval.x86_64.pydata_s_xarray-3364:latest
194 | sweb.eval.x86_64.pydata_s_xarray-4094:latest
195 | sweb.eval.x86_64.pydata_s_xarray-4248:latest
196 | sweb.eval.x86_64.pydata_s_xarray-4493:latest
197 | sweb.eval.x86_64.pydata_s_xarray-5131:latest
198 | sweb.eval.x86_64.pylint-dev_s_pylint-5859:latest
199 | sweb.eval.x86_64.pylint-dev_s_pylint-6506:latest
200 | sweb.eval.x86_64.pylint-dev_s_pylint-7080:latest
201 | sweb.eval.x86_64.pylint-dev_s_pylint-7114:latest
202 | sweb.eval.x86_64.pylint-dev_s_pylint-7228:latest
203 | sweb.eval.x86_64.pylint-dev_s_pylint-7993:latest
204 | sweb.eval.x86_64.pytest-dev_s_pytest-11143:latest
205 | sweb.eval.x86_64.pytest-dev_s_pytest-11148:latest
206 | sweb.eval.x86_64.pytest-dev_s_pytest-5103:latest
207 | sweb.eval.x86_64.pytest-dev_s_pytest-5221:latest
208 | sweb.eval.x86_64.pytest-dev_s_pytest-5227:latest
209 | sweb.eval.x86_64.pytest-dev_s_pytest-5413:latest
210 | sweb.eval.x86_64.pytest-dev_s_pytest-5495:latest
211 | sweb.eval.x86_64.pytest-dev_s_pytest-5692:latest
212 | sweb.eval.x86_64.pytest-dev_s_pytest-6116:latest
213 | sweb.eval.x86_64.pytest-dev_s_pytest-7168:latest
214 | sweb.eval.x86_64.pytest-dev_s_pytest-7220:latest
215 | sweb.eval.x86_64.pytest-dev_s_pytest-7373:latest
216 | sweb.eval.x86_64.pytest-dev_s_pytest-7432:latest
217 | sweb.eval.x86_64.pytest-dev_s_pytest-7490:latest
218 | sweb.eval.x86_64.pytest-dev_s_pytest-8365:latest
219 | sweb.eval.x86_64.pytest-dev_s_pytest-8906:latest
220 | sweb.eval.x86_64.pytest-dev_s_pytest-9359:latest
221 | sweb.eval.x86_64.scikit-learn_s_scikit-learn-10297:latest
222 | sweb.eval.x86_64.scikit-learn_s_scikit-learn-10508:latest
223 | sweb.eval.x86_64.scikit-learn_s_scikit-learn-10949:latest
224 | sweb.eval.x86_64.scikit-learn_s_scikit-learn-11040:latest
225 | sweb.eval.x86_64.scikit-learn_s_scikit-learn-11281:latest
226 | sweb.eval.x86_64.scikit-learn_s_scikit-learn-12471:latest
227 | sweb.eval.x86_64.scikit-learn_s_scikit-learn-13142:latest
228 | sweb.eval.x86_64.scikit-learn_s_scikit-learn-13241:latest
229 | sweb.eval.x86_64.scikit-learn_s_scikit-learn-13439:latest
230 | sweb.eval.x86_64.scikit-learn_s_scikit-learn-13496:latest
231 | sweb.eval.x86_64.scikit-learn_s_scikit-learn-13497:latest
232 | sweb.eval.x86_64.scikit-learn_s_scikit-learn-13584:latest
233 | sweb.eval.x86_64.scikit-learn_s_scikit-learn-13779:latest
234 | sweb.eval.x86_64.scikit-learn_s_scikit-learn-14087:latest
235 | sweb.eval.x86_64.scikit-learn_s_scikit-learn-14092:latest
236 | sweb.eval.x86_64.scikit-learn_s_scikit-learn-14894:latest
237 | sweb.eval.x86_64.scikit-learn_s_scikit-learn-14983:latest
238 | sweb.eval.x86_64.scikit-learn_s_scikit-learn-15512:latest
239 | sweb.eval.x86_64.scikit-learn_s_scikit-learn-15535:latest
240 | sweb.eval.x86_64.scikit-learn_s_scikit-learn-25500:latest
241 | sweb.eval.x86_64.scikit-learn_s_scikit-learn-25570:latest
242 | sweb.eval.x86_64.scikit-learn_s_scikit-learn-25638:latest
243 | sweb.eval.x86_64.scikit-learn_s_scikit-learn-25747:latest
244 | sweb.eval.x86_64.sphinx-doc_s_sphinx-10325:latest
245 | sweb.eval.x86_64.sphinx-doc_s_sphinx-10451:latest
246 | sweb.eval.x86_64.sphinx-doc_s_sphinx-11445:latest
247 | sweb.eval.x86_64.sphinx-doc_s_sphinx-7686:latest
248 | sweb.eval.x86_64.sphinx-doc_s_sphinx-7738:latest
249 | sweb.eval.x86_64.sphinx-doc_s_sphinx-7975:latest
250 | sweb.eval.x86_64.sphinx-doc_s_sphinx-8273:latest
251 | sweb.eval.x86_64.sphinx-doc_s_sphinx-8282:latest
252 | sweb.eval.x86_64.sphinx-doc_s_sphinx-8435:latest
253 | sweb.eval.x86_64.sphinx-doc_s_sphinx-8474:latest
254 | sweb.eval.x86_64.sphinx-doc_s_sphinx-8506:latest
255 | sweb.eval.x86_64.sphinx-doc_s_sphinx-8595:latest
256 | sweb.eval.x86_64.sphinx-doc_s_sphinx-8627:latest
257 | sweb.eval.x86_64.sphinx-doc_s_sphinx-8713:latest
258 | sweb.eval.x86_64.sphinx-doc_s_sphinx-8721:latest
259 | sweb.eval.x86_64.sphinx-doc_s_sphinx-8801:latest
260 | sweb.eval.x86_64.sympy_s_sympy-11400:latest
261 | sweb.eval.x86_64.sympy_s_sympy-11870:latest
262 | sweb.eval.x86_64.sympy_s_sympy-11897:latest
263 | sweb.eval.x86_64.sympy_s_sympy-12171:latest
264 | sweb.eval.x86_64.sympy_s_sympy-12236:latest
265 | sweb.eval.x86_64.sympy_s_sympy-12419:latest
266 | sweb.eval.x86_64.sympy_s_sympy-12454:latest
267 | sweb.eval.x86_64.sympy_s_sympy-12481:latest
268 | sweb.eval.x86_64.sympy_s_sympy-13031:latest
269 | sweb.eval.x86_64.sympy_s_sympy-13043:latest
270 | sweb.eval.x86_64.sympy_s_sympy-13146:latest
271 | sweb.eval.x86_64.sympy_s_sympy-13177:latest
272 | sweb.eval.x86_64.sympy_s_sympy-13437:latest
273 | sweb.eval.x86_64.sympy_s_sympy-13471:latest
274 | sweb.eval.x86_64.sympy_s_sympy-13480:latest
275 | sweb.eval.x86_64.sympy_s_sympy-13647:latest
276 | sweb.eval.x86_64.sympy_s_sympy-13773:latest
277 | sweb.eval.x86_64.sympy_s_sympy-13895:latest
278 | sweb.eval.x86_64.sympy_s_sympy-13915:latest
279 | sweb.eval.x86_64.sympy_s_sympy-13971:latest
280 | sweb.eval.x86_64.sympy_s_sympy-14024:latest
281 | sweb.eval.x86_64.sympy_s_sympy-14308:latest
282 | sweb.eval.x86_64.sympy_s_sympy-14317:latest
283 | sweb.eval.x86_64.sympy_s_sympy-14396:latest
284 | sweb.eval.x86_64.sympy_s_sympy-14774:latest
285 | sweb.eval.x86_64.sympy_s_sympy-14817:latest
286 | sweb.eval.x86_64.sympy_s_sympy-15011:latest
287 | sweb.eval.x86_64.sympy_s_sympy-15308:latest
288 | sweb.eval.x86_64.sympy_s_sympy-15345:latest
289 | sweb.eval.x86_64.sympy_s_sympy-15346:latest
290 | sweb.eval.x86_64.sympy_s_sympy-15609:latest
291 | sweb.eval.x86_64.sympy_s_sympy-15678:latest
292 | sweb.eval.x86_64.sympy_s_sympy-16106:latest
293 | sweb.eval.x86_64.sympy_s_sympy-16281:latest
294 | sweb.eval.x86_64.sympy_s_sympy-16503:latest
295 | sweb.eval.x86_64.sympy_s_sympy-16792:latest
296 | sweb.eval.x86_64.sympy_s_sympy-16988:latest
297 | sweb.eval.x86_64.sympy_s_sympy-17022:latest
298 | sweb.eval.x86_64.sympy_s_sympy-17139:latest
299 | sweb.eval.x86_64.sympy_s_sympy-17630:latest
300 | sweb.eval.x86_64.sympy_s_sympy-17655:latest
301 | sweb.eval.x86_64.sympy_s_sympy-18057:latest
302 | sweb.eval.x86_64.sympy_s_sympy-18087:latest
303 | sweb.eval.x86_64.sympy_s_sympy-18189:latest
304 | sweb.eval.x86_64.sympy_s_sympy-18199:latest
305 | sweb.eval.x86_64.sympy_s_sympy-18532:latest
306 | sweb.eval.x86_64.sympy_s_sympy-18621:latest
307 | sweb.eval.x86_64.sympy_s_sympy-18698:latest
308 | sweb.eval.x86_64.sympy_s_sympy-18835:latest
309 | sweb.eval.x86_64.sympy_s_sympy-19007:latest
310 | sweb.eval.x86_64.sympy_s_sympy-19254:latest
311 | sweb.eval.x86_64.sympy_s_sympy-19487:latest
312 | sweb.eval.x86_64.sympy_s_sympy-20049:latest
313 | sweb.eval.x86_64.sympy_s_sympy-20154:latest
314 | sweb.eval.x86_64.sympy_s_sympy-20212:latest
315 | sweb.eval.x86_64.sympy_s_sympy-20322:latest
316 | sweb.eval.x86_64.sympy_s_sympy-20442:latest
317 | sweb.eval.x86_64.sympy_s_sympy-20590:latest
318 | sweb.eval.x86_64.sympy_s_sympy-20639:latest
319 | sweb.eval.x86_64.sympy_s_sympy-21055:latest
320 | sweb.eval.x86_64.sympy_s_sympy-21171:latest
321 | sweb.eval.x86_64.sympy_s_sympy-21379:latest
322 | sweb.eval.x86_64.sympy_s_sympy-21612:latest
323 | sweb.eval.x86_64.sympy_s_sympy-21614:latest
324 | sweb.eval.x86_64.sympy_s_sympy-21627:latest
325 | sweb.eval.x86_64.sympy_s_sympy-21847:latest
326 | sweb.eval.x86_64.sympy_s_sympy-22005:latest
327 | sweb.eval.x86_64.sympy_s_sympy-22714:latest
328 | sweb.eval.x86_64.sympy_s_sympy-22840:latest
329 | sweb.eval.x86_64.sympy_s_sympy-23117:latest
330 | sweb.eval.x86_64.sympy_s_sympy-23191:latest
331 | sweb.eval.x86_64.sympy_s_sympy-23262:latest
332 | sweb.eval.x86_64.sympy_s_sympy-24066:latest
333 | sweb.eval.x86_64.sympy_s_sympy-24102:latest
334 | sweb.eval.x86_64.sympy_s_sympy-24152:latest
335 | sweb.eval.x86_64.sympy_s_sympy-24213:latest
336 | sweb.eval.x86_64.sympy_s_sympy-24909:latest
337 | 


--------------------------------------------------------------------------------
/swedev/utils/prompts.py:
--------------------------------------------------------------------------------
  1 | # Demo for gherkin description:
  2 | 
  3 | # Feature: Quiet mode in SQLFluff CLI
  4 | 
  5 | #   Scenario: Run sqlfluff fix with --quiet option
  6 | #     Given I have a SQL file with linting violations
  7 | #     When I run `sqlfluff fix` with the `--quiet` option
  8 | #     Then the output should only show the fix status and the number of fixes applied
  9 | #     And the output should not contain detailed information about each fix
 10 | 
 11 | #   Scenario: Run sqlfluff fix with --force and --quiet options
 12 | #     Given I have a SQL file with multiple linting violations
 13 | #     When I run `sqlfluff fix` with the `--force` and `--quiet` options
 14 | #     Then the output should only show the fix status and the number of fixes applied
 15 | #     And all fixes should be applied automatically
 16 | 
 17 | #   Scenario: Run sqlfluff fix with both --quiet and --verbose options
 18 | #     Given I have a SQL file with linting violations
 19 | #     When I run `sqlfluff fix` with both `--quiet` and `--verbose` options
 20 | #     Then I should see an error message stating that --quiet and --verbose cannot be used together
 21 | #     And the process should exit with an error code
 22 | 
 23 | SUMMARIZE_GHERKIN_TEST = """
 24 | You are a skilled test engineer. Your mission is to create a minimal, edge-case test scenario that serves to rigorously validate the effectiveness of the patch. This test case must satisfy the following conditions:
 25 | 	1.	Fail with the unpatched code: Demonstrate the specific bug, issue, or limitation that the patch is designed to address. Ensure the test triggers this behavior reliably and consistently.
 26 | 	2.	Pass with the patched code: Confirm that the patch resolves the issue without introducing new problems or regressions.
 27 | 
 28 | Focus on crafting a concise yet challenging input or situation that isolates the problem the patch addresses. Avoid superficial or trivial cases; instead, target scenarios that:
 29 | 	- Exercise uncommon or edge-case code paths.
 30 | 	- Test for boundary conditions or unexpected input.
 31 | 	- Mimic realistic usage scenarios where the original behavior breaks.
 32 | 
 33 | - Repository name: {}
 34 | - GitHub issue description: {}
 35 | - Correction patch: 
 36 | ```
 37 | {}
 38 | ```
 39 | - Hints Text: {}
 40 | 
 41 | The generation is split into several steps. Now, you task is to write a testcase description for further generation. The description should reflect the modification of the patch. 
 42 | You should briefly analyse the problem description and hints text, finding out **where should be fixed**. After that, **give the description for testcase to be generated**. Do not give any unrelated greeting words!
 43 | """
 44 | 
 45 | MAKE_GHERKIN_TEST = """
 46 | You are an experienced test engineer. Now I need you to write a test following the Gherkin syntax based on the information below. This test is used to verify whether the correction patch in the repository correctly solves the problem.
 47 | Please note that our goal is to make the source code fail the tests without correct patch, and pass the tests with correct patch.
 48 | 
 49 | - Repository name: {}
 50 | - GitHub issue description: {}
 51 | - Correction patch: 
 52 | ```
 53 | {}
 54 | ```
 55 | - Hints Text: {}
 56 | 
 57 | Here is the analysis for the testcases that you can refer to.
 58 | {}
 59 | 
 60 | Requirements:
 61 | 1. Use the `Given-When-Then` structure of Gherkin.
 62 | 2. Clearly describe the preconditions, triggering events, and expected results.
 63 | 3. Ensure the test logic is clear and covers all relevant scenarios.
 64 | 
 65 | Please provide the Gherkin syntax test in the most concise way. Do not include any unrelated greetings! Do not give unimportant testcases like modificaton of README.
 66 | And you should wrap each of your gherkin test description with triple backticks. For example, ```gherkin\n{{YOUR DESCRIPTION}}\n```
 67 | """
 68 | 
 69 | TESTCASE_GENERATION = """
 70 | You are a test engineer. Given a github issue description and the golden patch, your task is to build testcases that **reproduce the error** according to the patch. In detail, the testcases should reproduce the error in the issue description.
 71 | 
 72 | Your testcase will running at **root** of the project, please be care of the relative path to avoid path-related errors.
 73 | 
 74 | # Information provided
 75 | 
 76 | - **Repository name**: {}
 77 | 
 78 | - **GitHub issue description**: {}
 79 | 
 80 | - **Hints Text**: {}
 81 | 
 82 | - **Correction patch**: 
 83 | ```
 84 | {}
 85 | ```
 86 | 
 87 | - **Project tree (file depth less than 3)**: 
 88 | {}
 89 | 
 90 | - **Testcase Description**: {}
 91 | 
 92 | - **Relevant code segments in the original version**: 
 93 | ```
 94 | {}
 95 | ```
 96 | 
 97 | # Steps to follow:
 98 | 
 99 | 1. **Identify the incorrect code**: Analyze the provided information to locate the error that the patch addresses. You should figure out what packages you need and what kind of testcases you should write.
100 | 2. **Generate the test case**: Write testcases that will **fail without the correction patch** and **pass with the correction patch**. Each testcase must be enclosed within `<testcase></testcase>` tags.
101 | Please note *no additional execution will be done except for your testcase* You should make any segments in the code like create a new file or turn on an system variable.
102 | Please note again that your testcase will running at root of the project, please be care of the relative path to avoid path-related errors.
103 | 
104 | # Format Requirements:
105 | 
106 | - Testcase: 
107 |   - Wrap each test case in `<testcase></testcase>` tags.
108 |   - Use triple backticks (```) to enclose the test code within the `<testcase></testcase>` tags.
109 |   - The test cases must be ready to run with `pytest` and should include any necessary mock data or fixtures.
110 | 
111 | # Environment Information
112 | 
113 | - Python Version: 3.9
114 | - Platform: Ubuntu 22.04.5 LTS
115 | - Execution Command: python -m pytest --no-header -rA -p no:cacheprovider -W ignore::DeprecationWarning --continue-on-collection-errors --tb=short
116 | - Execution Path: root directory of the project
117 | 
118 | # Example Solution
119 | 
120 | In `src/utils/csv_utils.py`:
121 | ```
122 | from CSVconverter.src.utils import csv
123 | def read_csv_and_sum(filename):
124 |     \"\"\"Calculate the sum of all numbers in a CSV file\"\"\"
125 |     total = 0
126 |     with open(filename, 'r') as file:
127 |         reader = csv.reader(file)
128 |         for row in reader:
129 |             total += row[0]
130 |     return total
131 | ```
132 | The code directly adds row[0] to total without validating if row[0] is an integer. If the CSV file contains non-numeric values (e.g., strings or empty fields), it will raise runtime errors like TypeError or ValueError. These errors matche the problem statement. So I'll write testcases here.
133 | 
134 | Fix Explanation:
135 | 	1.	It tries to convert row[0] to an integer using int().
136 | 	2.	If row[0] is not a valid integer, it skips that row using a try...except block.
137 | 
138 | The goal is to write test cases that:
139 | 	1.	Test case with non-numeric data in the CSV (should raise an error in the original code).
140 | 	2.	Same test case should now correctly handle non-numeric rows and calculate the sum of valid numeric values.
141 | 
142 | <testcase>
143 | ```python
144 | import os
145 | import pytest
146 | from src.utils.csv_utils import read_csv_and_sum
147 | 
148 | @pytest.fixture
149 | def create_csv_file():
150 |     \"\"\"Fixture to create a temporary CSV file for testing.\"\"\"
151 |     def _create_file(contents, filename="test.csv"):
152 |         with open(filename, 'w') as f:
153 |             f.write(contents)
154 |         return filename
155 |     yield _create_file
156 |     # Cleanup after test
157 |     if os.path.exists("test.csv"):
158 |         os.remove("test.csv")
159 | 
160 | def test_valid_csv(create_csv_file):
161 |     \"\"\"Test case with valid numeric data.\"\"\"
162 |     filename = create_csv_file("1\\n2\\n3\\n")
163 |     result = read_csv_and_sum(filename)
164 |     assert result == 6  # Expected sum of numbers
165 | 
166 | def test_non_numeric_csv(create_csv_file):
167 |     \"\"\"Test case with non-numeric data.\"\"\"
168 |     filename = create_csv_file("1\\nabc\\n3\\n")
169 |     with pytest.raises(TypeError):
170 |         read_csv_and_sum(filename)
171 | 
172 | def test_empty_csv(create_csv_file):
173 |     \"\"\"Test case with an empty CSV file.\"\"\"
174 |     filename = create_csv_file("")
175 |     result = read_csv_and_sum(filename)
176 |     assert result == 0  # Expected sum is 0
177 | ```
178 | </testcase>
179 | 
180 | # Final instructions
181 | 
182 | - **Test case format**: Ensure the tests follow `pytest` conventions and are ready to run and you should never enable dangerous commands like `ifconfig` and `iptables` in the testcase and setup commands.
183 | - **Import files correctly**: You should carefully deal with functions and classes in the current package.
184 | - **Patch validation**: The test case should fail when run against the unpatched code and pass after the patch is applied.
185 | - **Be careful about other files related**: Your testcase may need to write or read files. You should make sure the files exist. Instructive path like `/path/to/dest` should be substitute to real path.
186 | """
187 | 
188 | REVISION_BEFORE_PROMPT = """
189 | # Task Definition:
190 | You are tasked with generating **improved test cases** because the previous test cases **passed without the patch**. Your goal is to maintain the original intention of the test cases, while ensuring that:
191 | 1. They **address the specific failures** shown in the error history.
192 | 2. They **fail before the patch is applied** (i.e., they should expose the original issue).
193 | 3. They **pass after applying the patch** (i.e., they should verify that the patch fixes the issue).
194 | 
195 | # Provided Information:
196 | 
197 | - **Repository name**: 
198 | `{}`
199 | 
200 | - **GitHub issue description**: 
201 | {}
202 | 
203 | - **Correction patch** (this patch passed with the previous test cases):
204 | ```
205 | {}
206 | ```
207 | 
208 | - **Project tree (file depth less than 3)**: 
209 | {}
210 | 
211 | - **Relevant code segments**: 
212 | ```
213 | {}
214 | ```
215 | 
216 | ### Given Test Cases
217 | {}
218 | 
219 | ### Task Instructions:
220 | - **Preserve the original intent**: Ensure the new test cases still target the original issues that the patch is designed to fix.
221 | - **Analyze the error history**: Review the error history to understand why the previous test cases passed without the patch.
222 | - **Generate new test cases**: Write new test cases that expose the issue in the unpatched code and pass after applying the patch.
223 | - **Format Requirements**: Your test cases should follow the original format. Specifically:
224 |   - Each test case must be wrapped in `<testcase></testcase>`.
225 |   - The test code should be enclosed in triple backticks (```) inside the `<testcase>`.
226 |   - Any required Python packages should be listed within `<env></env>` tags.
227 | 
228 | ### Example format:
229 | <testcase>
230 | ```python
231 | # Your improved test case here
232 | ```
233 | </testcase>
234 | 
235 | <env>
236 | # Required Python packages here
237 | </env>
238 | 
239 | Remember, the new test cases must still fail on the unpatched code and **pass** after applying the patch. Strictly follow the format and preserve the original test intent!
240 | """
241 | 
242 | REVISION_AFTER_PROMPT = """
243 | You are tasked with generating testcases for a given github issue.
244 | The code with golden patch should pass the testcase while fail without golden patch. 
245 | Now, the testcase **failed** even **after** applying the patch. You should improve it.
246 | 
247 | # Provided Information:
248 | 
249 | - **Repository name**: {}
250 | 
251 | - **GitHub issue description**: {}
252 | 
253 | - **Hints Text**: {}
254 | 
255 | - **Golden patch** (this patch passed with the previous test cases):
256 | ```
257 | {}
258 | ```
259 | 
260 | - **Project Tree (file depth less than 3)**: 
261 | {}
262 | 
263 | - **Relevant Code Segments**: 
264 | ```
265 | {}
266 | ```
267 | 
268 | - **Available Relevant APIs**
269 | ```
270 | {}
271 | ```
272 | 
273 | - **Wrong Testcase**
274 | {}
275 | 
276 | - **Error History**
277 | {}
278 | 
279 | # Task Instructions:
280 | - **Analyze the error history carefully**: Review the error history to understand why the previous test cases passed without the patch. For example,
281 |     - You should rewrite wrong testcases if error occurs on specific tests.
282 |     - You should consider `import` dependencies when ImportError or errors like that occur.
283 | - **Preserve the original intent**: Ensure the new test cases still target the original issues that the patch is designed to fix.
284 | - **Format Requirements**: Your testcase should strictly follow the original format. Specifically:
285 |   - Setup command should be wrapped in `<env></env>` tags. And the commands should be enclosed in triple backticks (```) inside the `<env>`.
286 |   - Test case must be wrapped in `<testcase></testcase>` and the test code should be enclosed in triple backticks (```) inside the `<testcase>`.
287 | 
288 | # Example format:
289 | <testcase>
290 | ```python
291 | # Your improved test case here
292 | ```
293 | </testcase>
294 | 
295 | <env>
296 | ```bash
297 | # Required setup commands here
298 | ```
299 | </env>
300 | 
301 | Remember, the new testcase must still fail on the unpatched code and **pass** after applying the patch. Strictly follow the format and preserve the original test intent!
302 | """
303 | 
304 | TESTCASE_FORMAT = """<testcase>
305 | ```python
306 | {}
307 | ```
308 | </testcase>
309 | 
310 | <env>
311 | ```bash
312 | {}
313 | ```
314 | </env>
315 | """
316 | 
317 | EXTRACT_API_PROMPT = """
318 | Here is an error message, and you are required to extract the API signature or class name that raise the error.
319 | You should strictly follow the format instruction, and do not include any unrelated greeting words.
320 | The api should **directly** raise the error, and you should not include any other api that is not related to the error.
321 | For safety, you should never use `os.system` in the testcase code. If you want to operate on system, you should use the commands in setup commands!
322 | 
323 | # Format Instruction
324 | You should wrap the API signature or class name in the following format:
325 | 
326 | - If the error message is related to a function, the API signature should be in the following format:
327 | ```
328 | <function>module1.module2.function_name(parameters)</function>
329 | ```
330 | - If the error message is related to a class, the class name should be in the following format:
331 | ```
332 | <class>module1.module2.class_name</class>
333 | ```
334 | - If no API signature or class name is found, you should provide an empty string.
335 | ```
336 | <empty></empty>
337 | ```
338 | 
339 | # Example
340 | ## Error Message
341 | ```
342 | "output": {{"stdout": "============================= test session starts ==============================\ncollecting ... collected 0 items / 1 error\n\n==================================== ERRORS ====================================\n__________________________ ERROR collecting test_0.py __________________________\nImportError while importing test module '/mnt/nvme/playground/matze__pkgconfig-70_matze_pkgconfig_480e92f4c34dd0bc1ef243f23cdd98c1f2905ac6/pkgconfig/test_0.py'.\nHint: make sure your test modules/packages have valid Python names.\nTraceback:\n../../../miniforge3/envs/swedev_matze__pkgconfig-70/lib/python3.11/importlib/__init__.py:126: in import_module\n    return _bootstrap._gcd_import(name[level:], package, level)\ntest_0.py:3: in <module>\n    from pkgconfig.pkgconfig import Config\nE   ImportError: cannot import name 'Config' from 'pkgconfig.pkgconfig' (/mnt/nvme/playground/matze__pkgconfig-70_matze_pkgconfig_480e92f4c34dd0bc1ef243f23cdd98c1f2905ac6/pkgconfig/pkgconfig/pkgconfig.py)\n=========================== short test summary info ============================\nERROR test_0.py\n!!!!!!!!!!!!!!!!!!!! Interrupted: 1 error during collection !!!!!!!!!!!!!!!!!!!!\n=============================== 1 error in 0.09s ===============================\n", "stderr": "", "exit_code": 0}}
343 | ```
344 | ## Result
345 | ```
346 | <class>pkgconfig.pkgconfig.Config</class>
347 | ```
348 | 
349 | ## Error Message
350 | ```
351 | "output": {{"stdout": "============================= test session starts ==============================\ncollecting ... collected 0 items / 1 error\n\n==================================== ERRORS ====================================\n__________________________ ERROR collecting example_module.py __________________________\nTypeError: example_function() missing 2 required positional arguments: 'param1' and 'param2'\n=========================== short test summary info ============================\nERROR example_module.py\n!!!!!!!!!!!!!!!!!!!! Interrupted: 1 error during collection !!!!!!!!!!!!!!!!!!!!\n=============================== 1 error in 0.04s ===============================\n", "stderr": "", "exit_code": 0}}
352 | ```
353 | 
354 | ## Result
355 | ```
356 | <function>example_module.example_function(param1, param2)</function>
357 | ```
358 | 
359 | (Please note the api should **directly** raise the error!)
360 | (Remind again! For safety, you should never use `os.system` in the testcase code. If you want to operate on system, you should use the commands in setup commands!)
361 | # Task
362 | ## Error Message
363 | ```
364 | {}
365 | ```
366 | 
367 | ## Result
368 | """


--------------------------------------------------------------------------------