├── .semgrepignore
├── patchwork
    ├── __init__.py
    ├── __main__.py
    ├── steps
    │   ├── LLM
    │   │   ├── __init__.py
    │   │   ├── README.md
    │   │   └── LLM.py
    │   ├── PR
    │   │   ├── __init__.py
    │   │   ├── README.md
    │   │   └── typed.py
    │   ├── AgenticLLM
    │   │   ├── __init__.py
    │   │   ├── AgenticLLM.py
    │   │   └── typed.py
    │   ├── BrowserUse
    │   │   ├── __init__.py
    │   │   └── typed.py
    │   ├── CallAPI
    │   │   ├── __init__.py
    │   │   ├── typed.py
    │   │   ├── CallAPI.py
    │   │   └── README.md
    │   ├── CallLLM
    │   │   ├── __init__.py
    │   │   ├── README.md
    │   │   └── typed.py
    │   ├── CallSQL
    │   │   ├── __init__.py
    │   │   ├── typed.py
    │   │   └── CallSQL.py
    │   ├── CallShell
    │   │   ├── __init__.py
    │   │   ├── typed.py
    │   │   └── CallShell.py
    │   ├── Combine
    │   │   ├── __init__.py
    │   │   ├── typed.py
    │   │   ├── README.md
    │   │   └── Combine.py
    │   ├── CreatePR
    │   │   ├── __init__.py
    │   │   ├── typed.py
    │   │   └── README.md
    │   ├── FileAgent
    │   │   ├── __init__.py
    │   │   └── typed.py
    │   ├── FixIssue
    │   │   └── __init__.py
    │   ├── JoinList
    │   │   ├── __init__.py
    │   │   ├── typed.py
    │   │   ├── README.md
    │   │   └── JoinList.py
    │   ├── ModifyCode
    │   │   ├── __init__.py
    │   │   ├── README.md
    │   │   └── typed.py
    │   ├── PreparePR
    │   │   ├── __init__.py
    │   │   ├── typed.py
    │   │   └── README.md
    │   ├── ReadEmail
    │   │   ├── __init__.py
    │   │   └── typed.py
    │   ├── ReadFile
    │   │   ├── __init__.py
    │   │   ├── typed.py
    │   │   ├── ReadFile.py
    │   │   └── README.md
    │   ├── ReadIssues
    │   │   ├── __init__.py
    │   │   ├── typed.py
    │   │   ├── README.md
    │   │   └── ReadIssues.py
    │   ├── ReadPRs
    │   │   ├── __init__.py
    │   │   ├── typed.py
    │   │   └── README.md
    │   ├── SendEmail
    │   │   ├── __init__.py
    │   │   └── typed.py
    │   ├── AgenticLLMV2
    │   │   ├── __init__.py
    │   │   ├── typed.py
    │   │   └── AgenticLLMV2.py
    │   ├── AnalyzeImpact
    │   │   ├── __init__.py
    │   │   ├── typed.py
    │   │   └── README.md
    │   ├── CallCode2Prompt
    │   │   ├── __init__.py
    │   │   ├── TestCallCode2Prompt.py
    │   │   ├── typed.py
    │   │   └── README.md
    │   ├── CommitChanges
    │   │   ├── __init__.py
    │   │   ├── typed.py
    │   │   └── README.md
    │   ├── CreateIssue
    │   │   ├── __init__.py
    │   │   ├── typed.py
    │   │   ├── README.md
    │   │   └── CreateIssue.py
    │   ├── CreatePRComment
    │   │   ├── __init__.py
    │   │   ├── README.md
    │   │   ├── typed.py
    │   │   └── CreatePRComment.py
    │   ├── DatabaseAgent
    │   │   ├── __init__.py
    │   │   ├── typed.py
    │   │   └── DatabaseAgent.py
    │   ├── ExtractCode
    │   │   ├── __init__.py
    │   │   ├── typed.py
    │   │   └── README.md
    │   ├── ExtractDiff
    │   │   ├── __init__.py
    │   │   ├── typed.py
    │   │   └── README.md
    │   ├── GitHubAgent
    │   │   ├── __init__.py
    │   │   ├── typed.py
    │   │   └── GitHubAgent.py
    │   ├── ModifyCodeOnce
    │   │   ├── __init__.py
    │   │   ├── typed.py
    │   │   ├── README.md
    │   │   └── ModifyCodeOnce.py
    │   ├── PreparePrompt
    │   │   ├── __init__.py
    │   │   ├── typed.py
    │   │   └── README.md
    │   ├── ReadPRDiffs
    │   │   ├── __init__.py
    │   │   ├── README.md
    │   │   ├── typed.py
    │   │   └── ReadPRDiffs.py
    │   ├── ScanDepscan
    │   │   ├── __init__.py
    │   │   ├── typed.py
    │   │   └── README.md
    │   ├── ScanSemgrep
    │   │   ├── __init__.py
    │   │   ├── typed.py
    │   │   ├── README.md
    │   │   └── ScanSemgrep.py
    │   ├── SimplifiedLLM
    │   │   ├── __init__.py
    │   │   └── README.md
    │   ├── SlackMessage
    │   │   ├── __init__.py
    │   │   ├── typed.py
    │   │   └── README.md
    │   ├── ZohoDeskAgent
    │   │   ├── __init__.py
    │   │   └── typed.py
    │   ├── CreateIssueComment
    │   │   ├── __init__.py
    │   │   ├── README.md
    │   │   ├── typed.py
    │   │   └── CreateIssueComment.py
    │   ├── ExtractCodeContexts
    │   │   ├── __init__.py
    │   │   ├── typed.py
    │   │   └── README.md
    │   ├── ExtractModelResponse
    │   │   ├── __init__.py
    │   │   ├── typed.py
    │   │   └── README.md
    │   ├── FilterBySimilarity
    │   │   ├── __init__.py
    │   │   ├── typed.py
    │   │   ├── README.md
    │   │   └── FilterBySimilarity.py
    │   ├── ManageEngineAgent
    │   │   ├── __init__.py
    │   │   └── typed.py
    │   ├── SimplifiedLLMOnce
    │   │   ├── __init__.py
    │   │   ├── SimplifiedLLMOnce.py
    │   │   └── typed.py
    │   ├── ExtractPackageManagerFile
    │   │   ├── __init__.py
    │   │   ├── typed.py
    │   │   └── README.md
    │   ├── ExtractCodeMethodForCommentContexts
    │   │   ├── __init__.py
    │   │   ├── typed.py
    │   │   └── README.md
    │   ├── SlackAgent
    │   │   ├── __init__.py
    │   │   └── typed.py
    │   ├── ScanSonar
    │   │   ├── __init__.py
    │   │   ├── README.md
    │   │   ├── typed.py
    │   │   └── ScanSonar.py
    │   └── README.md
    ├── common
    │   ├── client
    │   │   ├── __init__.py
    │   │   └── llm
    │   │   │   └── __init__.py
    │   ├── utils
    │   │   ├── __init__.py
    │   │   ├── dependency.py
    │   │   └── user_config.py
    │   ├── context_strategy
    │   │   ├── __init__.py
    │   │   ├── kotlin.py
    │   │   ├── position.py
    │   │   ├── java.py
    │   │   └── cpp.py
    │   ├── multiturn_strategy
    │   │   └── __init__.py
    │   ├── constants.py
    │   ├── tools
    │   │   ├── __init__.py
    │   │   ├── agentic_tools.py
    │   │   ├── db_query_tool.py
    │   │   ├── github_tool.py
    │   │   ├── git_tool.py
    │   │   └── bash_tool.py
    │   └── server.py
    ├── patchflows
    │   ├── PRReview
    │   │   ├── __init__.py
    │   │   ├── defaults.yml
    │   │   └── pr_review_prompt.json
    │   ├── GenerateDiagram
    │   │   ├── __init__.py
    │   │   ├── defaults.yml
    │   │   ├── README.md
    │   │   ├── default_prompt.json
    │   │   └── GenerateDiagram.py
    │   ├── ResolveIssue
    │   │   ├── __init__.py
    │   │   ├── defaults.yml
    │   │   └── prompt.json
    │   ├── GenerateDocstring
    │   │   ├── __init__.py
    │   │   ├── prompt.json
    │   │   └── defaults.yml
    │   ├── GenerateUnitTests
    │   │   ├── __init__.py
    │   │   ├── default_prompt.json
    │   │   ├── defaults.yml
    │   │   └── GenerateUnitTests.py
    │   ├── GenerateCodeUsageExample
    │   │   ├── __init__.py
    │   │   ├── README.md
    │   │   ├── default_prompt.json
    │   │   ├── defaults.yml
    │   │   └── GenerateCodeUsageExample.py
    │   ├── LogAnalysis
    │   │   └── defaults.yml
    │   ├── GenerateREADME
    │   │   ├── generate_readme_prompt.json
    │   │   └── defaults.yml
    │   ├── DependencyUpgrade
    │   │   └── defaults.yml
    │   ├── __init__.py
    │   ├── AutoFix
    │   │   ├── defaults.yml
    │   │   └── default_prompt.json
    │   ├── SonarFix
    │   │   ├── defaults.yml
    │   │   └── default_prompt.json
    │   └── README.md
    ├── managed_files.py
    └── patchflow.py
├── .github
    ├── ISSUE_TEMPLATE
    │   ├── config.yml
    │   ├── 2-request-feature.yml
    │   └── 1-report-bug.yml
    └── PULL_REQUEST_TEMPLATE.md
├── poetry.toml
├── patchwork-banner.gif
├── tests
    ├── cicd
    │   └── generate_docstring
    │   │   ├── java_test_file.java
    │   │   ├── js_test_file.py.js
    │   │   ├── python_test_file.py
    │   │   ├── kotlin_test_file.kt
    │   │   └── cpp_test_file.cpp
    ├── common
    │   └── context_strategy
    │   │   └── test_java.py
    └── steps
    │   ├── test_ReadIssues.py
    │   ├── test_ExtractModelResponse.py
    │   ├── test_CreateIssue.py
    │   ├── test_ScanSonar.py
    │   ├── test_PreparePR.py
    │   ├── test_ReadPRDiffs.py
    │   ├── test_ScanSemgrep.py
    │   ├── test_ScanDepscan.py
    │   └── test_CallAPI.py
├── INSTALL.md
└── style.md


/.semgrepignore:
--------------------------------------------------------------------------------
1 | .github/


--------------------------------------------------------------------------------
/patchwork/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/patchwork/__main__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/patchwork/steps/LLM/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/patchwork/steps/PR/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/patchwork/common/client/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/patchwork/common/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/patchwork/steps/AgenticLLM/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/patchwork/steps/BrowserUse/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/patchwork/steps/CallAPI/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/patchwork/steps/CallLLM/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/patchwork/steps/CallSQL/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/patchwork/steps/CallShell/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/patchwork/steps/Combine/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/patchwork/steps/CreatePR/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/patchwork/steps/FileAgent/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/patchwork/steps/FixIssue/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/patchwork/steps/JoinList/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/patchwork/steps/ModifyCode/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/patchwork/steps/PreparePR/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/patchwork/steps/ReadEmail/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/patchwork/steps/ReadFile/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/patchwork/steps/ReadIssues/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/patchwork/steps/ReadPRs/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/patchwork/steps/SendEmail/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/patchwork/common/client/llm/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/patchwork/patchflows/PRReview/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/patchwork/steps/AgenticLLMV2/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/patchwork/steps/AnalyzeImpact/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/patchwork/steps/CallCode2Prompt/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/patchwork/steps/CommitChanges/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/patchwork/steps/CreateIssue/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/patchwork/steps/CreatePRComment/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/patchwork/steps/DatabaseAgent/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/patchwork/steps/ExtractCode/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/patchwork/steps/ExtractDiff/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/patchwork/steps/GitHubAgent/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/patchwork/steps/ModifyCodeOnce/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/patchwork/steps/PreparePrompt/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/patchwork/steps/ReadPRDiffs/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/patchwork/steps/ScanDepscan/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/patchwork/steps/ScanSemgrep/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/patchwork/steps/SimplifiedLLM/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/patchwork/steps/SlackMessage/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/patchwork/steps/ZohoDeskAgent/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/patchwork/common/context_strategy/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/patchwork/common/multiturn_strategy/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/patchwork/patchflows/GenerateDiagram/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/patchwork/patchflows/ResolveIssue/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/patchwork/steps/CreateIssueComment/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/patchwork/steps/ExtractCodeContexts/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/patchwork/steps/ExtractModelResponse/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/patchwork/steps/FilterBySimilarity/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/patchwork/steps/ManageEngineAgent/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/patchwork/steps/SimplifiedLLMOnce/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/patchwork/patchflows/GenerateDocstring/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/patchwork/patchflows/GenerateUnitTests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/patchwork/steps/ExtractPackageManagerFile/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/patchwork/patchflows/GenerateCodeUsageExample/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/patchwork/patchflows/LogAnalysis/defaults.yml:
--------------------------------------------------------------------------------
1 | query: ""


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/config.yml:
--------------------------------------------------------------------------------
1 | blank_issues_enabled: false


--------------------------------------------------------------------------------
/patchwork/steps/ExtractCodeMethodForCommentContexts/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/poetry.toml:
--------------------------------------------------------------------------------
1 | [virtualenvs]
2 | in-project = true
3 | prefer-active-python = true


--------------------------------------------------------------------------------
/patchwork-banner.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/patched-codes/patchwork/HEAD/patchwork-banner.gif


--------------------------------------------------------------------------------
/patchwork/steps/SlackAgent/__init__.py:
--------------------------------------------------------------------------------
1 | from .SlackAgent import SlackAgent
2 | 
3 | __all__ = ["SlackAgent"] 
4 | 


--------------------------------------------------------------------------------
/patchwork/common/constants.py:
--------------------------------------------------------------------------------
1 | TOKEN_URL = "https://app.patched.codes/signin"
2 | DEFAULT_PATCH_URL = "https://patchwork.patched.codes/v1"
3 | PROMPT_TEMPLATE_FILE_KEY = "prompt_template_file"
4 | 


--------------------------------------------------------------------------------
/patchwork/steps/ScanSonar/__init__.py:
--------------------------------------------------------------------------------
1 | from .ScanSonar import ScanSonar
2 | from .typed import ScanSonarInputs, ScanSonarOutputs, SonarVulnerability
3 | 
4 | __all__ = ["ScanSonar", "ScanSonarInputs", "ScanSonarOutputs", "SonarVulnerability"]
5 | 


--------------------------------------------------------------------------------
/patchwork/managed_files.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | import click
 4 | 
 5 | HOME_FOLDER = Path(click.get_app_dir("Patched", force_posix=True))
 6 | __LOG_NAME = "patched.log"
 7 | LOG_FILE = HOME_FOLDER / __LOG_NAME
 8 | __CONFIG_NAME = "config.json"
 9 | CONFIG_FILE = HOME_FOLDER / __CONFIG_NAME
10 | 


--------------------------------------------------------------------------------
/patchwork/steps/Combine/typed.py:
--------------------------------------------------------------------------------
 1 | from typing_extensions import Dict, List, TypedDict, Union
 2 | 
 3 | 
 4 | class CombineInputs(TypedDict):
 5 |     base_json: Union[List[Dict], Dict]
 6 |     update_json: Union[List[Dict], Dict]
 7 | 
 8 | 
 9 | class CombineOutputs(TypedDict):
10 |     result_json: Union[List[Dict], Dict]
11 | 


--------------------------------------------------------------------------------
/patchwork/steps/ReadFile/typed.py:
--------------------------------------------------------------------------------
 1 | from typing_extensions import Annotated, TypedDict
 2 | 
 3 | from patchwork.common.utils.step_typing import StepTypeConfig
 4 | 
 5 | 
 6 | class ReadFileInputs(TypedDict):
 7 |     file_path: Annotated[str, StepTypeConfig(is_path=True)]
 8 | 
 9 | 
10 | class ReadFileOutputs(TypedDict):
11 |     file_path: str
12 |     file_content: str
13 | 


--------------------------------------------------------------------------------
/patchwork/steps/ScanDepscan/typed.py:
--------------------------------------------------------------------------------
 1 | from typing_extensions import Annotated, TypedDict
 2 | 
 3 | from patchwork.common.utils.step_typing import StepTypeConfig
 4 | 
 5 | 
 6 | class ScanDepscanInputs(TypedDict, total=False):
 7 |     language: Annotated[str, StepTypeConfig(is_config=True)]
 8 | 
 9 | 
10 | class ScanDepscanOutputs(TypedDict):
11 |     sbom_vdr_values: dict
12 | 


--------------------------------------------------------------------------------
/patchwork/steps/ModifyCodeOnce/typed.py:
--------------------------------------------------------------------------------
 1 | from typing_extensions import TypedDict
 2 | 
 3 | 
 4 | class __ModifyCodePBRequiredInputs(TypedDict):
 5 |     file_path: str
 6 |     new_code: str
 7 | 
 8 | 
 9 | class ModifyCodeOnceInputs(__ModifyCodePBRequiredInputs, total=False):
10 |     start_line: int
11 |     end_line: int
12 | 
13 | 
14 | class ModifyCodeOnceOutputs(TypedDict):
15 |     path: str
16 |     start_line: int
17 |     end_line: int
18 |     diff: str
19 | 


--------------------------------------------------------------------------------
/tests/cicd/generate_docstring/java_test_file.java:
--------------------------------------------------------------------------------
 1 | class Test {
 2 |     public static int a_plus_b(Integer a, Integer b) {
 3 |         return a + b;
 4 |     }
 5 | 
 6 |     public static int a_plus_b(Function<Object, Comparable> keymap, object a, Object b) {
 7 |         if (keymap(a) < keymap(b)) {
 8 |             return -1;
 9 |         } else if (keymap(a) > keymap(b)) {
10 |             return 1;
11 |         } else {
12 |             return 0;
13 |         }
14 |     }
15 | }


--------------------------------------------------------------------------------
/patchwork/steps/JoinList/typed.py:
--------------------------------------------------------------------------------
 1 | from typing_extensions import Annotated, Dict, List, TypedDict
 2 | 
 3 | from patchwork.common.utils.step_typing import StepTypeConfig
 4 | 
 5 | 
 6 | class __JoinListRequiredInputs(TypedDict):
 7 |     list: List[Dict]
 8 |     delimiter: Annotated[str, StepTypeConfig(is_config=True)]
 9 | 
10 | 
11 | class JoinListInputs(__JoinListRequiredInputs, total=False):
12 |     key: str
13 | 
14 | 
15 | class JoinListOutputs(TypedDict):
16 |     text: str
17 | 


--------------------------------------------------------------------------------
/tests/cicd/generate_docstring/js_test_file.py.js:
--------------------------------------------------------------------------------
 1 | 
 2 | function a_plus_b(a, b) {
 3 |     return a + b;
 4 | }
 5 | 
 6 | const compare = function (keymap, a, b) {
 7 |     if (a[keymap] < b[keymap]) {
 8 |         return -1;
 9 |     } else if (a[keymap] > b[keymap]) {
10 |         return 1;
11 |     } else {
12 |         return 0;
13 |     }
14 | }
15 | 
16 | const sqlite = (db, query, callback) => {
17 |     db.serialize(function () {
18 |         db.each(query, callback);
19 |     });
20 | }


--------------------------------------------------------------------------------
/patchwork/steps/LLM/README.md:
--------------------------------------------------------------------------------
1 | # LLM Step Implementation
2 | 
3 | ## Inputs
4 | - The `LLM` class in this code receives inputs required for running the LLM step.
5 | - Inputs include various parameters related to preparing prompts, calling LLM, and extracting model responses.
6 | 
7 | ## Outputs
8 | - The `LLM` class has a `run` method that orchestrates the preparation of prompts, calling LLM, and extracting model responses.
9 | - The outputs include prompts data, LLM responses, and the extracted responses from the model.


--------------------------------------------------------------------------------
/patchwork/steps/ScanSemgrep/typed.py:
--------------------------------------------------------------------------------
 1 | from typing_extensions import Annotated, TypedDict
 2 | 
 3 | from patchwork.common.utils.step_typing import StepTypeConfig
 4 | 
 5 | 
 6 | class ScanSemgrepInputs(TypedDict, total=False):
 7 |     sarif_file_path: Annotated[str, StepTypeConfig(is_config=True, is_path=True)]
 8 |     sarif_values: str
 9 |     semgrep_extra_args: Annotated[str, StepTypeConfig(is_config=True)]
10 |     paths: str
11 |     path_key: str
12 | 
13 | 
14 | class ScanSemgrepOutputs(TypedDict):
15 |     sarif_values: dict
16 | 


--------------------------------------------------------------------------------
/patchwork/common/tools/__init__.py:
--------------------------------------------------------------------------------
 1 | from patchwork.common.tools.api_tool import APIRequestTool
 2 | from patchwork.common.tools.bash_tool import BashTool
 3 | from patchwork.common.tools.code_edit_tools import CodeEditTool, FileViewTool
 4 | from patchwork.common.tools.grep_tool import FindTextTool, FindTool
 5 | from patchwork.common.tools.tool import Tool
 6 | 
 7 | __all__ = [
 8 |     "Tool",
 9 |     "CodeEditTool",
10 |     "BashTool",
11 |     "FileViewTool",
12 |     "FindTool",
13 |     "FindTextTool",
14 |     "APIRequestTool",
15 | ]
16 | 


--------------------------------------------------------------------------------
/patchwork/steps/CallLLM/README.md:
--------------------------------------------------------------------------------
1 | ## Inputs
2 | - The code takes in `inputs` as a dictionary containing keys `openai_api_key` and `prompt_file`.
3 | - It expects `inputs` to contain values for `model`, `model_` prefixed keys related to model arguments, and `client_` prefixed keys related to client arguments.
4 | 
5 | ## Outputs
6 | - The code runs the OpenAI model based on the provided inputs and generates responses for the given prompts.
7 | - It returns a dictionary containing the path to the new response file and the list of OpenAI responses generated.


--------------------------------------------------------------------------------
/patchwork/steps/CallShell/typed.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from typing_extensions import Annotated, Any, TypedDict
 4 | 
 5 | from patchwork.common.utils.step_typing import StepTypeConfig
 6 | 
 7 | 
 8 | class __RequiredCallShellInputs(TypedDict):
 9 |     script: str
10 | 
11 | 
12 | class CallShellInputs(__RequiredCallShellInputs, total=False):
13 |     working_dir: Annotated[str, StepTypeConfig(is_path=True)]
14 |     env: str
15 |     script_template_values: dict[str, Any]
16 | 
17 | 
18 | class CallShellOutputs(TypedDict):
19 |     stdout_output: str
20 | 


--------------------------------------------------------------------------------
/patchwork/steps/ExtractCodeMethodForCommentContexts/typed.py:
--------------------------------------------------------------------------------
 1 | from typing_extensions import Annotated, List, TypedDict
 2 | 
 3 | from patchwork.common.utils.step_typing import StepTypeConfig
 4 | 
 5 | 
 6 | class ExtractCodeMethodForCommentContextsInputs(TypedDict):
 7 |     base_path: Annotated[str, StepTypeConfig(is_path=True)]
 8 | 
 9 | 
10 | class ExtractCodeMethodForCommentContextsOutputs(TypedDict):
11 |     files_to_patch: List["ExtractedCode"]
12 | 
13 | 
14 | class ExtractedCode(TypedDict):
15 |     uri: str
16 |     startLine: int
17 |     endLine: int
18 |     affectedCode: str
19 | 


--------------------------------------------------------------------------------
/patchwork/steps/ExtractModelResponse/typed.py:
--------------------------------------------------------------------------------
 1 | from typing_extensions import Annotated, Dict, List, TypedDict
 2 | 
 3 | from patchwork.common.utils.step_typing import StepTypeConfig
 4 | 
 5 | 
 6 | class __ExtractModelResponseRequiredInputs(TypedDict):
 7 |     openai_responses: List[str]
 8 | 
 9 | 
10 | class ExtractModelResponseInputs(__ExtractModelResponseRequiredInputs, total=False):
11 |     response_partitions: Annotated[Dict[str, List[str]], StepTypeConfig(is_config=True)]
12 | 
13 | 
14 | class ExtractModelResponseOutputs(TypedDict):
15 |     extracted_responses: List[Dict[str, str]]
16 | 


--------------------------------------------------------------------------------
/patchwork/patchflows/GenerateDocstring/prompt.json:
--------------------------------------------------------------------------------
 1 | [
 2 |   {
 3 |     "id": "generate_docstring",
 4 |     "prompts": [
 5 |       {
 6 |         "role": "system",
 7 |         "content": "You are a senior software engineer who is best in the world at writing documentations. Users will give you a code snippet and you will generate docstring based on the code snippet. \nOnly respond with the docstring.\n\nPlease provide a response only in the following format:\n\nDocumentation:\n```\n{{commentFormat}}\n```"
 8 |       },
 9 |       {"role": "user", "content": "{{affectedCode}}"}
10 |     ]
11 |   }
12 | ]


--------------------------------------------------------------------------------
/patchwork/patchflows/GenerateREADME/generate_readme_prompt.json:
--------------------------------------------------------------------------------
 1 | [
 2 |   {
 3 |     "id": "generateREADME",
 4 |     "prompts": [
 5 |       {
 6 |         "role": "system",
 7 |         "content": "Summarize the contents of the given code for documentation purposes in markdown. Make sure there is a heading, as well clear sections for inputs and outputs. Consider both what the code does and how it is likely to be used by someone. Use markdown formatting to make it look nice."
 8 |       },
 9 |       {
10 |         "role": "user",
11 |         "content": "{{fullContent}}"
12 |       }
13 |     ]
14 |   }
15 | ]


--------------------------------------------------------------------------------
/tests/cicd/generate_docstring/python_test_file.py:
--------------------------------------------------------------------------------
 1 | # fmt: off
 2 | def a_plus_b(a, b):
 3 |     return a + b
 4 | 
 5 | 
 6 | def sqlite(db, query):
 7 |     cursor = db.cursor()
 8 |     cursor.execute(query)
 9 |     return cursor.fetchall()
10 | 
11 | 
12 | def compare(key_map, item1, item2):
13 |     if key_map(item1) < key_map(item2):
14 |         return -1
15 |     elif key_map(item1) > key_map(item2):
16 |         return 1
17 |     else:
18 |         return 0
19 | 
20 | 
21 | def random_alphabets(
22 |         length: int
23 | ):
24 |     return ''.join(random.choices(string.ascii_letters, k=length))
25 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/2-request-feature.yml:
--------------------------------------------------------------------------------
 1 | name: 'Feature Request'
 2 | description: Suggest a feature for patchwork
 3 | 
 4 | body:
 5 | 
 6 |   - type: textarea
 7 |     id: description
 8 |     attributes:
 9 |       label: Description
10 |     validations:
11 |       required: true
12 | 
13 |   - type: textarea
14 |     id: proposed-solution
15 |     attributes:
16 |       label: Proposed solution
17 |     validations:
18 |       required: true
19 | 
20 |   - type: textarea
21 |     id: alternatives-considered
22 |     attributes:
23 |       label: Alternatives considered
24 |     validations:
25 |       required: true


--------------------------------------------------------------------------------
/patchwork/steps/ExtractCodeContexts/typed.py:
--------------------------------------------------------------------------------
 1 | from typing_extensions import Annotated, List, TypedDict
 2 | 
 3 | from patchwork.common.utils.step_typing import StepTypeConfig
 4 | 
 5 | 
 6 | class ExtractCodeContextsInputs(TypedDict, total=False):
 7 |     base_path: Annotated[str, StepTypeConfig(is_path=True)]
 8 |     context_grouping: Annotated[str, StepTypeConfig(is_config=True)]
 9 | 
10 | 
11 | class ExtractCodeContextsOutputs(TypedDict):
12 |     files_to_patch: List["ExtractedCode"]
13 | 
14 | 
15 | class ExtractedCode(TypedDict):
16 |     uri: str
17 |     startLine: int
18 |     endLine: int
19 |     affectedCode: str
20 | 


--------------------------------------------------------------------------------
/patchwork/steps/CreateIssueComment/README.md:
--------------------------------------------------------------------------------
 1 | # Documentation: CreateIssueComment Step
 2 | 
 3 | ## Inputs
 4 | 
 5 | 1. `inputs`: A dictionary containing the following keys:
 6 |     - `issue_url`: The URL of the issue to add a comment to.
 7 |     - `issue_text`: The text content of the comment to be added.
 8 |     - `github_api_key` or `gitlab_api_key`: The API key for the corresponding SCM platform.
 9 |     - `scm_url` (optional): The URL of the SCM platform.
10 | 
11 | ## Outputs
12 | 
13 | - `run()`: A method that runs the step to create a comment on the specified issue. It returns a dictionary with the URL of the created comment.


--------------------------------------------------------------------------------
/patchwork/steps/AnalyzeImpact/typed.py:
--------------------------------------------------------------------------------
 1 | from typing_extensions import List, TypedDict
 2 | 
 3 | 
 4 | class AnalyzeImpactInputs(TypedDict):
 5 |     extracted_responses: List["AnalyzeImpactExtractedResponse"]
 6 |     library_name: str
 7 |     platform_type: str
 8 | 
 9 | 
10 | class AnalyzeImpactExtractedResponse(TypedDict):
11 |     impacted_methods: str
12 | 
13 | 
14 | class AnalyzeImpactOutputs(TypedDict):
15 |     files_to_patch: List["AnalyzeImpactImpact"]
16 | 
17 | 
18 | class AnalyzeImpactImpact(TypedDict):
19 |     startLine: int
20 |     endLine: int
21 |     uri: str
22 |     previousCode: str
23 |     methodInfoList: str
24 | 


--------------------------------------------------------------------------------
/patchwork/steps/CreatePRComment/README.md:
--------------------------------------------------------------------------------
1 | The given code includes a Python class `CreatePRComment` that is a step used in a larger patchwork environment for dealing with pull requests. This step takes inputs related to a pull request URL, comments to be added to the pull request, and API keys for GitHub or GitLab. The `run` method of this class resets comments on the pull request (if specified) and creates new comments based on the input data. It utilizes a logging mechanism and specific clients for GitHub and GitLab access. The step is likely to be used in a workflow to automate the process of adding comments to pull requests based on specified criteria.


--------------------------------------------------------------------------------
/patchwork/steps/CallSQL/typed.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from typing_extensions import Any, TypedDict
 4 | 
 5 | 
 6 | class __RequiredCallSQLInputs(TypedDict):
 7 |     db_dialect: str
 8 |     db_query: str
 9 | 
10 | 
11 | class CallSQLInputs(__RequiredCallSQLInputs, total=False):
12 |     db_driver: str
13 |     db_username: str
14 |     db_password: str
15 |     db_host: str
16 |     db_port: int
17 |     db_name: str
18 |     db_params: dict[str, Any]
19 |     db_driver_args: dict[str, Any]
20 |     db_query_template_values: dict[str, Any]
21 | 
22 | 
23 | class CallSQLOutputs(TypedDict):
24 |     results: list[dict[str, Any]]
25 | 


--------------------------------------------------------------------------------
/patchwork/steps/FilterBySimilarity/typed.py:
--------------------------------------------------------------------------------
 1 | from typing_extensions import Annotated, Dict, List, TypedDict
 2 | 
 3 | from patchwork.common.utils.step_typing import StepTypeConfig
 4 | 
 5 | 
 6 | class __FilterBySimilarityRequiredInputs(TypedDict):
 7 |     list: List[Dict]
 8 |     keywords: Annotated[str, StepTypeConfig(is_config=True)]
 9 | 
10 | 
11 | class FilterBySimilarityInputs(__FilterBySimilarityRequiredInputs, total=False):
12 |     keys: Annotated[str, StepTypeConfig(is_config=True)]
13 |     top_k: Annotated[int, StepTypeConfig(is_config=True)]
14 | 
15 | 
16 | class FilterBySimilarityOutputs(TypedDict):
17 |     result_list: List[Dict]
18 | 


--------------------------------------------------------------------------------
/patchwork/steps/FileAgent/typed.py:
--------------------------------------------------------------------------------
 1 | from typing_extensions import Annotated, Any, Dict, TypedDict
 2 | 
 3 | from patchwork.common.utils.step_typing import StepTypeConfig
 4 | 
 5 | 
 6 | class __ReconcilationAgentRequiredInputs(TypedDict):
 7 |     task: str
 8 | 
 9 | 
10 | class FileAgentInputs(__ReconcilationAgentRequiredInputs, total=False):
11 |     base_path: str
12 |     prompt_value: Dict[str, Any]
13 |     max_llm_calls: Annotated[int, StepTypeConfig(is_config=True)]
14 |     anthropic_api_key: Annotated[str, StepTypeConfig(is_config=True)]
15 | 
16 | 
17 | class FileAgentOutputs(TypedDict):
18 |     request_tokens: int
19 |     response_tokens: int
20 | 


--------------------------------------------------------------------------------
/patchwork/steps/CallAPI/typed.py:
--------------------------------------------------------------------------------
 1 | from typing_extensions import Annotated, Any, Dict, Literal, TypedDict
 2 | 
 3 | from patchwork.common.utils.step_typing import StepTypeConfig
 4 | 
 5 | 
 6 | class __CallAPIRequiredInputs(TypedDict):
 7 |     url: str
 8 |     method: Annotated[Literal["GET", "POST", "PUT", "PATCH", "DELETE", "HEAD"], StepTypeConfig(is_config=True)]
 9 | 
10 | 
11 | class CallAPIInputs(__CallAPIRequiredInputs, total=False):
12 |     headers: Annotated[Dict[str, str], StepTypeConfig(is_config=True)]
13 |     body: Dict[str, Any]
14 | 
15 | 
16 | class CallAPIOutputs(TypedDict):
17 |     status_code: int
18 |     headers: Dict[str, str]
19 |     body: str
20 | 


--------------------------------------------------------------------------------
/patchwork/steps/JoinList/README.md:
--------------------------------------------------------------------------------
 1 | # Documentation for Patchwork JoinList Step
 2 | 
 3 | ## Inputs
 4 | 
 5 | ### `JoinList.py`
 6 | - Accepts input data containing a list and a delimiter.
 7 | - Checks for missing required keys and raises a `ValueError` if any are missing.
 8 | 
 9 | ### `typed.py`
10 | - Defines the data types expected for the input list and delimiter.
11 | - Input list is expected to be a list of strings.
12 | - Delimiter is expected to be a string annotated with configuration information.
13 | 
14 | ## Outputs
15 | 
16 | ### `JoinList.py`
17 | - Combines the list elements using the provided delimiter.
18 | - Returns a dictionary with the concatenated text as the output.


--------------------------------------------------------------------------------
/patchwork/steps/PreparePR/typed.py:
--------------------------------------------------------------------------------
 1 | from typing_extensions import Annotated, List, TypedDict
 2 | 
 3 | from patchwork.common.utils.step_typing import StepTypeConfig
 4 | 
 5 | 
 6 | class __PreparePRRequiredInputs(TypedDict):
 7 |     modified_code_files: List["ModifiedCodeFile"]
 8 | 
 9 | 
10 | class PreparePRInputs(__PreparePRRequiredInputs, total=False):
11 |     pr_header: Annotated[str, StepTypeConfig(is_config=True)]
12 |     issue_url: Annotated[str, StepTypeConfig(is_config=True)]
13 | 
14 | 
15 | class PreparePROutputs(TypedDict):
16 |     pr_body: str
17 | 
18 | 
19 | class ModifiedCodeFile(TypedDict, total=False):
20 |     path: str
21 |     start_line: int
22 |     end_line: int
23 | 


--------------------------------------------------------------------------------
/patchwork/patchflows/GenerateCodeUsageExample/README.md:
--------------------------------------------------------------------------------
 1 | ## Contents of GenerateUsageExample Code
 2 | 
 3 | ### Inputs
 4 | - The code reads default inputs from a YAML file (`defaults.yml`) and a JSON file (`default_prompt.json`).
 5 | - The code takes user inputs and updates the default inputs accordingly.
 6 | - The code expects inputs like `folder_path`, `prompt_template_file`, `test_file_extension`, etc.
 7 | 
 8 | ### Outputs
 9 | - The code generates a usage example based on the provided inputs.
10 | - It utilizes other steps like `CallCode2Prompt`, `ModifyCode`, and `PR` to create and process the example.
11 | - The final output includes modified code files and information for creating a pull request.
12 | 


--------------------------------------------------------------------------------
/patchwork/steps/CreateIssue/typed.py:
--------------------------------------------------------------------------------
 1 | from typing_extensions import Annotated, TypedDict
 2 | 
 3 | from patchwork.common.utils.step_typing import StepTypeConfig
 4 | 
 5 | 
 6 | class __CreateIssueRequiredInputs(TypedDict):
 7 |     issue_text: str
 8 |     issue_title: Annotated[str, StepTypeConfig(is_config=True)]
 9 | 
10 | 
11 | class CreateIssueInputs(__CreateIssueRequiredInputs, total=False):
12 |     scm_url: Annotated[str, StepTypeConfig(is_config=True)]
13 |     gitlab_api_key: Annotated[str, StepTypeConfig(is_config=True, or_op=["github_api_key"])]
14 |     github_api_key: Annotated[str, StepTypeConfig(is_config=True, or_op=["gitlab_api_key"])]
15 | 
16 | 
17 | class CreateIssueOutputs(TypedDict):
18 |     issue_url: str
19 | 


--------------------------------------------------------------------------------
/patchwork/steps/CreateIssueComment/typed.py:
--------------------------------------------------------------------------------
 1 | from typing_extensions import Annotated, TypedDict
 2 | 
 3 | from patchwork.common.utils.step_typing import StepTypeConfig
 4 | 
 5 | 
 6 | class __CreateIssueCommentRequiredInputs(TypedDict):
 7 |     issue_text: str
 8 |     issue_url: str
 9 | 
10 | 
11 | class CreateIssueCommentInputs(__CreateIssueCommentRequiredInputs, total=False):
12 |     scm_url: Annotated[str, StepTypeConfig(is_config=True)]
13 |     gitlab_api_key: Annotated[str, StepTypeConfig(is_config=True, or_op=["github_api_key"])]
14 |     github_api_key: Annotated[str, StepTypeConfig(is_config=True, or_op=["gitlab_api_key"])]
15 | 
16 | 
17 | class CreateIssueCommentOutputs(TypedDict):
18 |     issue_comment_url: str
19 | 


--------------------------------------------------------------------------------
/patchwork/steps/PreparePrompt/typed.py:
--------------------------------------------------------------------------------
 1 | from typing_extensions import Annotated, Any, Dict, List, TypedDict
 2 | 
 3 | from patchwork.common.utils.step_typing import StepTypeConfig
 4 | 
 5 | 
 6 | class __PreparePromptRequiredInputs(TypedDict):
 7 |     prompt_template_file: Annotated[str, StepTypeConfig(is_config=True)]
 8 |     prompt_id: Annotated[str, StepTypeConfig(is_config=True)]
 9 | 
10 | 
11 | class PreparePromptInputs(__PreparePromptRequiredInputs, total=False):
12 |     prompt_value_file: Annotated[str, StepTypeConfig(or_op=["prompt_values"])]
13 |     prompt_values: Annotated[List[Dict[str, Any]], StepTypeConfig(or_op=["prompt_value_file"])]
14 | 
15 | 
16 | class PreparePromptOutputs(TypedDict):
17 |     prompts: List[Dict]
18 | 


--------------------------------------------------------------------------------
/patchwork/steps/ReadEmail/typed.py:
--------------------------------------------------------------------------------
 1 | from typing_extensions import Annotated, List, TypedDict
 2 | 
 3 | from patchwork.common.utils.step_typing import StepTypeConfig
 4 | 
 5 | 
 6 | class __ReadEmailRequiredInputs(TypedDict):
 7 |     eml_file_path: Annotated[str, StepTypeConfig(is_path=True)]
 8 | 
 9 | 
10 | class ReadEmailInputs(__ReadEmailRequiredInputs, total=False):
11 |     base_path: Annotated[str, StepTypeConfig(is_path=True)]
12 | 
13 | 
14 | class Attachment(TypedDict):
15 |     path: str
16 | 
17 | 
18 | class ReadEmailOutputs(TypedDict):
19 |     subject: str
20 |     datetime: str
21 |     from_: str  # this is actually from instead of from_
22 |     body: str
23 |     message_id: str
24 |     attachments: List[Attachment]
25 | 


--------------------------------------------------------------------------------
/patchwork/steps/SendEmail/typed.py:
--------------------------------------------------------------------------------
 1 | from typing_extensions import Annotated, Any, TypedDict
 2 | 
 3 | from patchwork.common.utils.step_typing import StepTypeConfig
 4 | 
 5 | 
 6 | class __SendEmailRequiredInputs(TypedDict):
 7 |     sender_email: str
 8 |     recipient_email: str
 9 |     smtp_username: str
10 |     smtp_password: str
11 | 
12 | 
13 | class SendEmailInputs(__SendEmailRequiredInputs, total=False):
14 |     email_template_value: dict[str, Any]
15 |     subject: str
16 |     body: str
17 |     smtp_host: str
18 |     smtp_port: int
19 |     reply_message_id: str
20 |     is_smtp_ssl: str
21 |     reply_eml_file_path: Annotated[str, StepTypeConfig(is_path=True)]
22 | 
23 | 
24 | class SendEmailOutputs(TypedDict):
25 |     pass
26 | 


--------------------------------------------------------------------------------
/patchwork/steps/CallCode2Prompt/TestCallCode2Prompt.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | from pathlib import Path
 3 | 
 4 | from patchwork.steps.CallCode2Prompt.CallCode2Prompt import CallCode2Prompt
 5 | 
 6 | 
 7 | class TestCallCode2Prompt(unittest.TestCase):
 8 |     def test_run(self):
 9 |         inputs = {}
10 |         folder_path = Path.cwd()
11 |         inputs["folder_path"] = folder_path
12 |         result = CallCode2Prompt(inputs).run()
13 |         prompt_content_md = result.get("prompt_content_md")
14 | 
15 |         # Check that prompt_content_md is not None and not an empty string
16 |         self.assertTrue(prompt_content_md, "The markdown content should not be empty.")
17 | 
18 | 
19 | if __name__ == "__main__":
20 |     unittest.main()
21 | 


--------------------------------------------------------------------------------
/patchwork/steps/CallCode2Prompt/typed.py:
--------------------------------------------------------------------------------
 1 | from typing_extensions import Annotated, TypedDict
 2 | 
 3 | from patchwork.common.utils.step_typing import StepTypeConfig
 4 | 
 5 | 
 6 | class __CallCode2PromptRequiredInputs(TypedDict):
 7 |     folder_path: Annotated[str, StepTypeConfig(is_config=True, is_path=True)]
 8 | 
 9 | 
10 | class CallCode2PromptInputs(__CallCode2PromptRequiredInputs, total=False):
11 |     filter: Annotated[str, StepTypeConfig(is_config=True)]
12 |     suppress_comments: Annotated[bool, StepTypeConfig(is_config=True)]
13 |     markdown_file_name: Annotated[str, StepTypeConfig(is_config=True)]
14 |     code2prompt_modes: str
15 | 
16 | 
17 | class CallCode2PromptOutputs(TypedDict):
18 |     uri: str
19 |     fullContent: str
20 | 


--------------------------------------------------------------------------------
/patchwork/steps/ScanSemgrep/README.md:
--------------------------------------------------------------------------------
 1 | # Code Documentation
 2 | 
 3 | ## Input
 4 | - This code is part of a software package called Patchwork and specifically deals with the `ScanSemgrep` step.
 5 | - The `ScanSemgrep` class has an `__init__` function that takes `inputs` as a dictionary, but it does not use this input in the provided code snippet.
 6 | 
 7 | ## Output
 8 | - The `ScanSemgrep` class has a `run` function that executes the Semgrep tool to analyze Python code.
 9 | - It generates a SARIF-formatted file containing the scan results.
10 | - The results are saved to a temporary file, and the path to this file is returned as a dictionary in the `run` function output.
11 | - Logging information is provided at the start and end of the scan process.


--------------------------------------------------------------------------------
/patchwork/steps/SlackMessage/typed.py:
--------------------------------------------------------------------------------
 1 | from typing_extensions import Annotated, Dict, TypedDict
 2 | 
 3 | from patchwork.common.utils.step_typing import StepTypeConfig
 4 | 
 5 | 
 6 | class __SlackMessageRequiredInputs(TypedDict):
 7 |     slack_channel: Annotated[str, StepTypeConfig(is_config=True)]
 8 |     slack_token: Annotated[str, StepTypeConfig(is_config=True)]
 9 | 
10 | 
11 | class SlackMessageInputs(__SlackMessageRequiredInputs, total=False):
12 |     slack_message_template_file: Annotated[str, StepTypeConfig(is_config=True)]
13 |     slack_message_template: Annotated[str, StepTypeConfig(is_config=True)]
14 |     slack_message_values: Dict[str, str]
15 | 
16 | 
17 | class SlackMessageOutputs(TypedDict):
18 |     is_slack_message_sent: bool
19 | 


--------------------------------------------------------------------------------
/patchwork/patchflows/GenerateDiagram/defaults.yml:
--------------------------------------------------------------------------------
 1 | # CallLLM Inputs
 2 | # openai_api_key: required-for-chatgpt
 3 | # google_api_key: required-for-gemini
 4 | # model: gpt-4o
 5 | # client_base_url: https://api.openai.com/v1
 6 | # Example HF model
 7 | # client_base_url: https://api-inference.huggingface.co/models/codellama/CodeLlama-70b-Instruct-hf/v1
 8 | # model: codellama/CodeLlama-70b-Instruct-hf
 9 | # model_temperature: 0.2
10 | # model_top_p: 0.95
11 | # model_max_tokens: 2000
12 | 
13 | base_path: diagram.md
14 | folder_path: .
15 | # CommitChanges Inputs
16 | disable_branch: false
17 | 
18 | # CreatePR Inputs
19 | disable_pr: false
20 | force_pr_creation: true
21 | # github_api_key: required-for-github-scm
22 | # gitlab_api_key: required-for-gitlab-scm


--------------------------------------------------------------------------------
/patchwork/steps/ReadIssues/typed.py:
--------------------------------------------------------------------------------
 1 | from typing_extensions import Annotated, List, TypedDict
 2 | 
 3 | from patchwork.common.utils.step_typing import StepTypeConfig
 4 | 
 5 | 
 6 | class __ReadIssuesRequiredInputs(TypedDict):
 7 |     issue_url: str
 8 | 
 9 | 
10 | class ReadIssuesInputs(__ReadIssuesRequiredInputs, total=False):
11 |     scm_url: Annotated[str, StepTypeConfig(is_config=True)]
12 |     gitlab_api_key: Annotated[str, StepTypeConfig(is_config=True, or_op=["github_api_key"])]
13 |     github_api_key: Annotated[str, StepTypeConfig(is_config=True, or_op=["gitlab_api_key"])]
14 | 
15 | 
16 | class ReadIssuesOutputs(TypedDict):
17 |     issue_title: str
18 |     issue_body: str
19 |     issue_comments: List[str]
20 |     issue_description: str
21 | 


--------------------------------------------------------------------------------
/patchwork/patchflows/PRReview/defaults.yml:
--------------------------------------------------------------------------------
 1 | # PRReview Inputs
 2 | 
 3 | # ReadPRDiffs Inputs
 4 | # github_api_key: required-for-github-scm
 5 | # gitlab_api_key: required-for-gitlab-scm
 6 | # pr_url: required
 7 | 
 8 | # PreparePrompt Inputs
 9 | # prompt_template_file: your-prompt-template-here
10 | 
11 | # CallLLM Inputs
12 | # openai_api_key: required-for-chatgpt
13 | # google_api_key: required-for-gemini
14 |  model: claude-3-5-sonnet-latest
15 | # client_base_url: https://api.openai.com/v1
16 | # Example HF model
17 | # client_base_url: https://api-inference.huggingface.co/models/codellama/CodeLlama-70b-Instruct-hf/v1
18 | # model: codellama/CodeLlama-70b-Instruct-hf
19 | # model_temperature: 0.2
20 | # model_top_p: 0.95
21 | # model_max_tokens: 2000
22 | 


--------------------------------------------------------------------------------
/patchwork/steps/ReadFile/ReadFile.py:
--------------------------------------------------------------------------------
 1 | from patchwork.common.utils.utils import open_with_chardet
 2 | from patchwork.step import Step
 3 | from patchwork.steps.ReadFile.typed import ReadFileInputs
 4 | 
 5 | 
 6 | class ReadFile(Step):
 7 |     def __init__(self, inputs):
 8 |         super().__init__(inputs)
 9 |         missing_keys = ReadFileInputs.__required_keys__.difference(inputs.keys())
10 |         if len(missing_keys) > 0:
11 |             raise ValueError(f"Missing required data: {missing_keys}")
12 | 
13 |         self.file = inputs["file_path"]
14 | 
15 |     def run(self):
16 |         with open_with_chardet(self.file, "r") as f:
17 |             file_contents = f.read()
18 | 
19 |         return dict(file_path=self.file, file_content=file_contents)
20 | 


--------------------------------------------------------------------------------
/patchwork/steps/ReadPRs/typed.py:
--------------------------------------------------------------------------------
 1 | from typing_extensions import Annotated, List, TypedDict
 2 | 
 3 | from patchwork.common.utils.step_typing import StepTypeConfig
 4 | from patchwork.steps.ReadPRDiffs.typed import ReadPRDiffsOutputs
 5 | 
 6 | 
 7 | class __ReadPRsRequiredInputs(TypedDict):
 8 |     repo_slug: str
 9 | 
10 | 
11 | class ReadPRsInputs(__ReadPRsRequiredInputs, total=False):
12 |     pr_state: str
13 |     scm_url: Annotated[str, StepTypeConfig(is_config=True)]
14 |     gitlab_api_key: Annotated[str, StepTypeConfig(is_config=True, or_op=["github_api_key"])]
15 |     github_api_key: Annotated[str, StepTypeConfig(is_config=True, or_op=["gitlab_api_key"])]
16 | 
17 | 
18 | class ReadPRsOutputs(TypedDict):
19 |     pr_texts: List[ReadPRDiffsOutputs]
20 | 


--------------------------------------------------------------------------------
/patchwork/patchflows/GenerateDiagram/README.md:
--------------------------------------------------------------------------------
 1 | ## GenerateDiagram Code Overview
 2 | 
 3 | ### Inputs
 4 | - The code reads default inputs from a YAML file.
 5 | - It updates the default inputs with any additional inputs provided.
 6 | - It sets up various parameters like folder path, prompt template file, PR title, branch prefix, etc.
 7 | - It validates the inputs with specific steps required for the process.
 8 | 
 9 | ### Outputs
10 | - The code runs a series of steps to generate a system architecture diagram.
11 | - It utilizes classes like LLM, CallCode2Prompt, ModifyCode, and PR to process the inputs.
12 | - The final output is a set of inputs updated with the results of each step.
13 | - The code is designed to generate a pull request with the system architecture diagram.
14 | 


--------------------------------------------------------------------------------
/patchwork/steps/CommitChanges/typed.py:
--------------------------------------------------------------------------------
 1 | from typing_extensions import Annotated, Any, Dict, List, TypedDict
 2 | 
 3 | from patchwork.common.utils.step_typing import StepTypeConfig
 4 | 
 5 | 
 6 | class __CommitChangesRequiredInputs(TypedDict):
 7 |     modified_code_files: List[Dict[str, Any]]
 8 | 
 9 | 
10 | class CommitChangesInputs(__CommitChangesRequiredInputs, total=False):
11 |     disable_branch: Annotated[bool, StepTypeConfig(is_config=True)]
12 |     force_branch_creation: Annotated[bool, StepTypeConfig(is_config=True)]
13 |     branch_prefix: Annotated[str, StepTypeConfig(is_config=True)]
14 |     branch_suffix: Annotated[str, StepTypeConfig(is_config=True)]
15 | 
16 | 
17 | class CommitChangesOutputs(TypedDict):
18 |     base_branch: str
19 |     target_branch: str
20 | 


--------------------------------------------------------------------------------
/patchwork/steps/ExtractPackageManagerFile/typed.py:
--------------------------------------------------------------------------------
 1 | from typing_extensions import Annotated, Dict, List, TypedDict
 2 | 
 3 | from patchwork.common.utils.step_typing import StepTypeConfig
 4 | 
 5 | 
 6 | class ExtractPackageManagerFileInputs(TypedDict, total=False):
 7 |     sbom_vdr_file_path: Annotated[str, StepTypeConfig(is_config=True, is_path=True, or_op=["sbom_vdr_values"])]
 8 |     sbom_vdr_values: Annotated[Dict, StepTypeConfig(or_op=["sbom_vdr_file_path"])]
 9 |     package_manager_file: Annotated[str, StepTypeConfig(is_config=True)]
10 |     upgrade_threshold: Annotated[str, StepTypeConfig(is_config=True)]
11 |     severity: Annotated[str, StepTypeConfig(is_config=True)]
12 | 
13 | 
14 | class ExtractPackageManagerFileOutputs(TypedDict):
15 |     files_to_patch: List[Dict]
16 | 


--------------------------------------------------------------------------------
/patchwork/common/tools/agentic_tools.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from patchwork.common.tools.tool import Tool
 4 | 
 5 | 
 6 | class EndTool(Tool, tool_name="end", abc_register=False):
 7 |     MESSAGE = "Ended"
 8 | 
 9 |     def __init__(self):
10 |         super().__init__()
11 | 
12 |     @property
13 |     def json_schema(self) -> dict:
14 |         return {
15 |             "name": "end",
16 |             "description": "End the conversation. Call this when you are done with the conversation.",
17 |             "input_schema": {
18 |                 "type": "object",
19 |                 "properties": {},
20 |                 "required": [],
21 |             },
22 |         }
23 | 
24 |     def execute(self, *args, **kwargs) -> str:
25 |         return self.MESSAGE
26 | 


--------------------------------------------------------------------------------
/patchwork/patchflows/GenerateDiagram/default_prompt.json:
--------------------------------------------------------------------------------
 1 | [
 2 |     {
 3 |         "id": "GenerateDiagram", 
 4 |         "prompts": [
 5 |             {
 6 |                 "role": "system",
 7 |                 "content": "You are an experienced software architect skilled at visualizing system designs. Users will provide repository details, and you will generate a comprehensive system architecture diagram using Mermaid markdown syntax. Output the diagram code directly with no additional text, formatting, or triple quotes. The response should be ready to be pasted into an editor supporting Mermaid syntax."
 8 |             },
 9 |             {
10 |                 "role": "user",
11 |                 "content": "Repo Details: {{fullContent}}"
12 |             }
13 |         ]
14 |     }
15 | ]
16 | 


--------------------------------------------------------------------------------
/patchwork/patchflows/GenerateREADME/defaults.yml:
--------------------------------------------------------------------------------
 1 | # PreparePrompt Inputs
 2 | # prompt_template_file: your-prompt-template-here
 3 | 
 4 | # CallLLM Inputs
 5 | # openai_api_key: required-for-chatgpt
 6 | # google_api_key: required-for-gemini
 7 | # model: gpt-4o
 8 | # client_base_url: https://api.openai.com/v1
 9 | # Example HF model
10 | # client_base_url: https://api-inference.huggingface.co/models/codellama/CodeLlama-70b-Instruct-hf/v1
11 | # model: codellama/CodeLlama-70b-Instruct-hf
12 | # model_temperature: 0.2
13 | # model_top_p: 0.95
14 | # model_max_tokens: 2000
15 | 
16 | # CommitChanges Inputs
17 | disable_branch: false
18 | 
19 | # CreatePR Inputs
20 | disable_pr: false
21 | force_pr_creation: true
22 | # github_api_key: required-for-github-scm
23 | # gitlab_api_key: required-for-gitlab-scm


--------------------------------------------------------------------------------
/patchwork/patchflows/GenerateCodeUsageExample/default_prompt.json:
--------------------------------------------------------------------------------
 1 | [
 2 |     {
 3 |         "id": "GenerateUsageExample",
 4 |         "prompts": [
 5 |             {
 6 |                 "role": "system",
 7 |                 "content": "You are a skilled technical writer specializing in creating concise, clear, and runnable usage examples for libraries, frameworks, and projects. Users will provide a description of the code or project, and you will generate examples that demonstrate practical use cases. Ensure examples are well-commented and runnable, with no extra explanation or formatting outside of the example code itself."
 8 |             },
 9 |             {
10 |                 "role": "user",
11 |                 "content": "Description: {{fullContent}}"
12 |             }
13 |         ]
14 |     }
15 | ]
16 | 


--------------------------------------------------------------------------------
/patchwork/steps/ExtractCode/typed.py:
--------------------------------------------------------------------------------
 1 | from typing_extensions import Annotated, Dict, List, TypedDict
 2 | 
 3 | from patchwork.common.utils.step_typing import StepTypeConfig
 4 | 
 5 | 
 6 | class __ExtractCodeRequiredInputs(TypedDict):
 7 |     sarif_values: Dict
 8 | 
 9 | 
10 | class ExtractCodeInputs(__ExtractCodeRequiredInputs, total=False):
11 |     context_size: Annotated[int, StepTypeConfig(is_config=True)]
12 |     vulnerability_limit: Annotated[int, StepTypeConfig(is_config=True)]
13 |     severity: Annotated[str, StepTypeConfig(is_config=True)]
14 | 
15 | 
16 | class ExtractCodeOutputs(TypedDict):
17 |     files_to_patch: List["ExtractedCode"]
18 | 
19 | 
20 | class ExtractedCode(TypedDict):
21 |     uri: str
22 |     startLine: int
23 |     endLine: int
24 |     affectedCode: str
25 |     messageText: str
26 | 


--------------------------------------------------------------------------------
/INSTALL.md:
--------------------------------------------------------------------------------
 1 | # Build from source
 2 | 
 3 | PatchWork is built using Poetry, a dependency management and packaging tool for Python. To install PatchWork using Poetry, follow these steps:
 4 | 
 5 | 1. Make sure you have Poetry installed. If you don't have it installed, you can install it by running:
 6 |    ```
 7 |    curl -sSL https://install.python-poetry.org | python3 -
 8 |    ```
 9 | 
10 | 2. Clone the PatchWork repository:
11 |    ```
12 |    git clone https://github.com/patched-codes/patchwork.git
13 |    ```
14 | 
15 | 3. Navigate to the project directory:
16 |    ```
17 |    cd patchwork
18 |    ```
19 | 
20 | 4. Activate a shell using virtual environment:
21 |    ```
22 |    poetry shell
23 |    ```
24 | 
25 | 5. Install the dependencies using Poetry:
26 |    ```
27 |    poetry install --all-extras
28 |    ```
29 | 


--------------------------------------------------------------------------------
/patchwork/patchflow.py:
--------------------------------------------------------------------------------
 1 | import abc
 2 | 
 3 | import click
 4 | from typing_extensions import Type
 5 | 
 6 | _internal_map: dict[str, Type["Patchflow"]]
 7 | 
 8 | 
 9 | class PatchflowCommands(click.MultiCommand):
10 |     def list_commands(self, ctx):
11 |         return sorted(list(_internal_map.keys()))
12 | 
13 |     def get_command(self, ctx, name):
14 |         return _internal_map.get(name, None)
15 | 
16 | 
17 | class Patchflow(abc.ABC, click.MultiCommand):
18 |     def __init_subclass__(cls, patchflow_name=None, steps=None, **kwargs):
19 |         name = patchflow_name or cls.__name__
20 |         _internal_map[name] = cls
21 | 
22 |     @abc.abstractmethod
23 |     def run(self) -> dict:
24 |         """
25 |         Runs the step.
26 |         :return: a dictionary of outputs
27 |         """
28 |         ...
29 | 


--------------------------------------------------------------------------------
/patchwork/patchflows/GenerateUnitTests/default_prompt.json:
--------------------------------------------------------------------------------
 1 | [
 2 |     {
 3 |         "id": "GenerateUnitTests", 
 4 |         "prompts": [
 5 |             {
 6 |                 "role": "system",
 7 |                 "content": "You are a senior software tester who is highly skilled at writing unit tests across various programming languages. Users will specify classes or functions to test, and you will generate appropriate unit tests using the most relevant framework for the detected language. Output the code directly with no additional text, formatting, or triple quotes. The response should appear as if directly pasted into an editor."
 8 |             },
 9 |             {
10 |                 "role": "user",
11 |                 "content": "Code: {{fullContent}}"
12 |             }
13 |         ]
14 |     }
15 | ]
16 | 


--------------------------------------------------------------------------------
/patchwork/patchflows/GenerateUnitTests/defaults.yml:
--------------------------------------------------------------------------------
 1 | # CallLLM Inputs
 2 | # openai_api_key: required-for-chatgpt
 3 | # google_api_key: required-for-gemini
 4 | # model: gpt-4o
 5 | # client_base_url: https://api.openai.com/v1
 6 | # Example HF model
 7 | # client_base_url: https://api-inference.huggingface.co/models/codellama/CodeLlama-70b-Instruct-hf/v1
 8 | # model: codellama/CodeLlama-70b-Instruct-hf
 9 | # model_temperature: 0.2
10 | # model_top_p: 0.95
11 | # model_max_tokens: 2000
12 | 
13 | # folder_path : path/to/folder/with/class
14 | 
15 | # Default value
16 | test_file_extension : py
17 | 
18 | # CommitChanges Inputs
19 | disable_branch: false
20 | 
21 | # CreatePR Inputs
22 | disable_pr: false
23 | force_pr_creation: true
24 | # github_api_key: required-for-github-scm
25 | # gitlab_api_key: required-for-gitlab-scm


--------------------------------------------------------------------------------
/patchwork/steps/ExtractDiff/typed.py:
--------------------------------------------------------------------------------
 1 | from typing_extensions import Annotated, Dict, List, TypedDict
 2 | 
 3 | from patchwork.common.utils.step_typing import StepTypeConfig
 4 | 
 5 | 
 6 | class __ExtractDiffRequiredInputs(TypedDict):
 7 |     update_info: "UpdateInfo"
 8 |     libraries_api_key: Annotated[str, StepTypeConfig(is_config=True)]
 9 |     github_api_key: Annotated[str, StepTypeConfig(is_config=True)]
10 | 
11 | 
12 | class ExtractDiffInputs(__ExtractDiffRequiredInputs, total=False):
13 |     severity: Annotated[str, StepTypeConfig(is_config=True)]
14 | 
15 | 
16 | class ExtractDiffOutputs(TypedDict):
17 |     prompt_values: List[Dict]
18 |     library_name: str
19 |     platform_type: str
20 | 
21 | 
22 | class UpdateInfo(TypedDict):
23 |     vuln_version: str
24 |     fixed_version: str
25 |     purl: str
26 | 


--------------------------------------------------------------------------------
/patchwork/patchflows/GenerateDocstring/defaults.yml:
--------------------------------------------------------------------------------
 1 | # GenerateDocstring Inputs
 2 | base_path: .
 3 | rewrite_existing: false
 4 | 
 5 | 
 6 | # PreparePrompts Inputs
 7 | prompt_id: generate_docstring
 8 | # CallLLM Inputs
 9 | # For OpenAI API use the following the select the model
10 | # openai_embedding_model: text-embedding-3-small
11 | # For HuggingFace API use the following the select the model
12 | # huggingface_embedding_model: codellama/CodeLlama-70b-Instruct-hf
13 | # For either API, use the following to provide the API key
14 | # openai_api_key: required-for-openai
15 | # google_api_key: required-for-google
16 | # client_base_url: https://api.openai.com/v1
17 | # model: gpt-4o
18 | 
19 | # CommitChanges Inputs
20 | disable_branch: false
21 | 
22 | # CreatePR Inputs
23 | disable_pr: false
24 | force_pr_creation: true


--------------------------------------------------------------------------------
/patchwork/steps/ExtractCode/README.md:
--------------------------------------------------------------------------------
 1 | This documentation provides details about a codebase related to extracting code contexts from SARIF files. It includes Python modules related to context strategies for different languages and the main extraction logic in `ExtractCode.py`.
 2 | 
 3 | ## Inputs
 4 | - The main input is a SARIF file path.
 5 | - Other optional inputs include `context_size` and `vulnerability_limit`.
 6 | 
 7 | ## Outputs
 8 | The code extracts relevant code contexts from the SARIF file based on specified criteria and saves the extracted data into a JSON file. The outputs include the following:
 9 | - `prompt_value_file`: Path to the JSON file.
10 | - `code_file`: Path to the JSON file.
11 | - `extracted_code_contexts`: List containing extracted code contexts with context start and end lines, URI, and message text.


--------------------------------------------------------------------------------
/patchwork/patchflows/GenerateCodeUsageExample/defaults.yml:
--------------------------------------------------------------------------------
 1 | # CallLLM Inputs
 2 | # openai_api_key: required-for-chatgpt
 3 | # google_api_key: required-for-gemini
 4 | # model: gpt-4o
 5 | # client_base_url: https://api.openai.com/v1
 6 | # Example HF model
 7 | # client_base_url: https://api-inference.huggingface.co/models/codellama/CodeLlama-70b-Instruct-hf/v1
 8 | # model: codellama/CodeLlama-70b-Instruct-hf
 9 | # model_temperature: 0.2
10 | # model_top_p: 0.95
11 | # model_max_tokens: 2000
12 | 
13 | # folder_path : path/to/folder/with/class
14 | 
15 | # Default value
16 | test_file_extension : py
17 | 
18 | # CommitChanges Inputs
19 | disable_branch: false
20 | 
21 | # CreatePR Inputs
22 | disable_pr: false
23 | force_pr_creation: true
24 | # github_api_key: required-for-github-scm
25 | # gitlab_api_key: required-for-gitlab-scm


--------------------------------------------------------------------------------
/patchwork/steps/ExtractCodeContexts/README.md:
--------------------------------------------------------------------------------
 1 | This documentation provides details about a codebase related to extracting code contexts from SARIF files. It includes Python modules related to context strategies for different languages and the main extraction logic in `ExtractCode.py`.
 2 | 
 3 | ## Inputs
 4 | - The main input is a SARIF file path.
 5 | - Other optional inputs include `context_size` and `vulnerability_limit`.
 6 | 
 7 | ## Outputs
 8 | The code extracts relevant code contexts from the SARIF file based on specified criteria and saves the extracted data into a JSON file. The outputs include the following:
 9 | - `prompt_value_file`: Path to the JSON file.
10 | - `code_file`: Path to the JSON file.
11 | - `extracted_code_contexts`: List containing extracted code contexts with context start and end lines, URI, and message text.


--------------------------------------------------------------------------------
/patchwork/steps/PreparePR/README.md:
--------------------------------------------------------------------------------
 1 | # Patchwork PreparePR Module
 2 | 
 3 | ## Inputs
 4 | 
 5 | - `modified_code_files`: A list of modified code files.
 6 | 
 7 | ## Outputs
 8 | 
 9 | - `pr_body`: A formatted PR body that summarizes the changes made to the modified code files.
10 | 
11 | ### Code
12 | 
13 | The `PreparePR` class is created to prepare a Pull Request body based on the modifications made to a set of code files. The class inherits from a `Step` class. It expects a dictionary of inputs, specifically a list of modified code files. If the required keys are missing, it raises a ValueError.
14 | 
15 | The `run` method of the class processes the modified code files, groups them by path, and generates a formatted PR body that includes information about the changes made in each file. The resulting PR body is returned as an output.


--------------------------------------------------------------------------------
/patchwork/steps/ReadFile/README.md:
--------------------------------------------------------------------------------
 1 | ## Contents of the Patchwork ReadFile Module
 2 | 
 3 | ### Inputs
 4 | 1. **Typed.py**:
 5 |    - Defines two TypedDict classes `ReadFileInputs` and `ReadFileOutputs` with the keys `file_path` and `file_content` respectively.
 6 |    
 7 | 2. **ReadFile.py**:
 8 |    - Imports `open_with_chardet`, `Step`, and `ReadFileInputs`.
 9 |    - Creates a class `ReadFile` inheriting from `Step`, with an `__init__` method that checks for required keys in the input dictionary and assigns the file path.
10 |    - Defines a `run` method that reads the file contents using `open_with_chardet` and returns a dictionary with the file content.
11 | 
12 | ### Outputs
13 | - The `ReadFile` class in **ReadFile.py** is designed to read the content of a file specified in the inputs and return the content in a structured format.


--------------------------------------------------------------------------------
/patchwork/patchflows/ResolveIssue/defaults.yml:
--------------------------------------------------------------------------------
 1 | # ReadIssues Inputs
 2 | # github_api_key: required-for-github-scm
 3 | # gitlab_api_key: required-for-gitlab-scm
 4 | # issue_url: required
 5 | 
 6 | fix_issue: false
 7 | # GenerateEmbeddings Inputs
 8 | # For OpenAI API use the following the select the model
 9 | # openai_embedding_model: text-embedding-3-small
10 | # For HuggingFace API use the following the select the model
11 | # huggingface_embedding_model: codellama/CodeLlama-70b-Instruct-hf
12 | # For either API, use the following to provide the API key
13 | # openai_api_key: required-for-openai
14 | # google_api_key: required-for-google
15 | # client_base_url: https://api.openai.com/v1
16 | # model: gpt-4o
17 | 
18 | # CommitChanges Inputs
19 | disable_branch: false
20 | 
21 | # CreatePR Inputs
22 | disable_pr: false
23 | force_pr_creation: true


--------------------------------------------------------------------------------
/patchwork/steps/ExtractCodeMethodForCommentContexts/README.md:
--------------------------------------------------------------------------------
 1 | This documentation provides details about a codebase related to extracting code contexts from SARIF files. It includes Python modules related to context strategies for different languages and the main extraction logic in `ExtractCode.py`.
 2 | 
 3 | ## Inputs
 4 | - The main input is a SARIF file path.
 5 | - Other optional inputs include `context_size` and `vulnerability_limit`.
 6 | 
 7 | ## Outputs
 8 | The code extracts relevant code contexts from the SARIF file based on specified criteria and saves the extracted data into a JSON file. The outputs include the following:
 9 | - `prompt_value_file`: Path to the JSON file.
10 | - `code_file`: Path to the JSON file.
11 | - `extracted_code_contexts`: List containing extracted code contexts with context start and end lines, URI, and message text.


--------------------------------------------------------------------------------
/patchwork/steps/CreatePR/typed.py:
--------------------------------------------------------------------------------
 1 | from typing_extensions import Annotated, TypedDict
 2 | 
 3 | from patchwork.common.utils.step_typing import StepTypeConfig
 4 | 
 5 | 
 6 | class __CreatePRRequiredInputs(TypedDict):
 7 |     target_branch: str
 8 | 
 9 | 
10 | class CreatePRInputs(__CreatePRRequiredInputs, total=False):
11 |     base_branch: str
12 |     pr_title: str
13 |     pr_body: str
14 |     force_pr_creation: Annotated[bool, StepTypeConfig(is_config=True)]
15 |     disable_pr: Annotated[bool, StepTypeConfig(is_config=True)]
16 |     scm_url: Annotated[str, StepTypeConfig(is_config=True)]
17 |     gitlab_api_key: Annotated[str, StepTypeConfig(is_config=True)]
18 |     github_api_key: Annotated[str, StepTypeConfig(is_config=True)]
19 |     azuredevops_api_key: Annotated[str, StepTypeConfig(is_config=True)]
20 | 
21 | 
22 | class CreatePROutputs(TypedDict):
23 |     pr_url: str
24 | 


--------------------------------------------------------------------------------
/patchwork/steps/CallAPI/CallAPI.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | 
 3 | from requests import request
 4 | 
 5 | from patchwork.step import Step
 6 | 
 7 | 
 8 | class CallAPI(Step):
 9 |     def __init__(self, inputs):
10 |         super().__init__(inputs)
11 |         self.url = inputs["url"]
12 |         self.method = inputs["method"]
13 |         possible_headers = inputs.get("headers", {})
14 |         if not isinstance(possible_headers, dict):
15 |             possible_headers = json.loads(possible_headers)
16 |         self.headers = possible_headers
17 |         self.body = inputs.get("body")
18 |         if self.body and isinstance(self.body, dict):
19 |             self.body = json.dumps(self.body)
20 | 
21 |     def run(self):
22 |         res = request(self.method, self.url, headers=self.headers, data=self.body)
23 |         return dict(status_code=res.status_code, headers=res.headers, body=res.text)
24 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/1-report-bug.yml:
--------------------------------------------------------------------------------
 1 | name: Bug Report
 2 | description: Report a bug in the patchwork
 3 | 
 4 | body:
 5 | 
 6 |   - type: dropdown
 7 |     id: is-regression
 8 |     attributes:
 9 |       label: Is this a regression?
10 |       options:
11 |         - 'Yes'
12 |         - 'No'
13 |     validations:
14 |       required: true
15 | 
16 |   - type: textarea
17 |     id: description
18 |     attributes:
19 |       label: Description
20 |     validations:
21 |       required: true
22 | 
23 |   - type: input
24 |     id: reproduction
25 |     attributes:
26 |       label: Please provide a link to a minimal reproduction of the bug
27 | 
28 |   - type: textarea
29 |     id: exception-or-error
30 |     attributes:
31 |       label: Please provide the exception or error you saw
32 |       render: true
33 | 
34 |   - type: textarea
35 |     id: other
36 |     attributes:
37 |       label: Anything else?


--------------------------------------------------------------------------------
/patchwork/steps/ExtractDiff/README.md:
--------------------------------------------------------------------------------
 1 | # Extract Diff Utility
 2 | 
 3 | ### Inputs
 4 | 1. `diff_file_path` (str): Path to the diff file.
 5 | 2. `language` (str): Programming language associated with the diff.
 6 | 
 7 | ### Outputs
 8 | - A dictionary containing information on the extracted diff sections stored in a JSON file. The dictionary includes:
 9 |   1. `prompt_value_file`: Path to the created JSON file.
10 |   2. `library_name`: Name of the library.
11 |   3. `platform_type`: Type of platform.
12 | 
13 | This code includes utility functions and a class named `ExtractDiff` to extract sections from a diff file for analysis. It encompasses functions to process the diff content, determine relevant sections, and fetch diff-related details from APIs based on libraries and repositories. Users can leverage this code to programmatically analyze and extract sections of a diff file for further processing or reporting.


--------------------------------------------------------------------------------
/patchwork/common/utils/dependency.py:
--------------------------------------------------------------------------------
 1 | import importlib
 2 | from functools import lru_cache
 3 | 
 4 | __DEPENDENCY_GROUPS = {
 5 |     "security": ["semgrep", "depscan"],
 6 |     "notification": ["slack_sdk"],
 7 | }
 8 | 
 9 | 
10 | @lru_cache(maxsize=None)
11 | def import_with_dependency_group(name):
12 |     try:
13 |         return importlib.import_module(name)
14 |     except ImportError:
15 |         error_msg = f"Missing dependency for {name}, please `pip install {name}`"
16 |         dependency_group = next(
17 |             (group for group, dependencies in __DEPENDENCY_GROUPS.items() if name in dependencies), None
18 |         )
19 |         if dependency_group is not None:
20 |             error_msg = f"Please `pip install patchwork-cli[{dependency_group}]` to use this step"
21 |         raise ImportError(error_msg)
22 | 
23 | 
24 | def slack_sdk():
25 |     return import_with_dependency_group("slack_sdk")
26 | 


--------------------------------------------------------------------------------
/patchwork/patchflows/DependencyUpgrade/defaults.yml:
--------------------------------------------------------------------------------
 1 | # PreparePrompt Inputs
 2 | prompt_id: depupgrade
 3 | # prompt_template_file: your-prompt-template-here
 4 | 
 5 | # CallLLM Inputs
 6 | # openai_api_key: required-for-chatgpt
 7 | # google_api_key: required-for-gemini
 8 | # model: gpt-4o
 9 | # client_base_url: https://api.openai.com/v1
10 | # Example HF model
11 | # client_base_url: https://api-inference.huggingface.co/models/codellama/CodeLlama-70b-Instruct-hf/v1
12 | # model: codellama/CodeLlama-70b-Instruct-hf
13 | # model_temperature: 0.2
14 | # model_top_p: 0.95
15 | # model_max_tokens: 2000
16 | 
17 | # Do impact analysis after dependency upgrades to modify source code
18 | analyze_impact: false
19 | 
20 | # CommitChanges Inputs
21 | disable_branch: false
22 | 
23 | # CreatePR Inputs
24 | disable_pr: false
25 | force_pr_creation: true
26 | # github_api_key: required-for-github-scm
27 | # gitlab_api_key: required-for-gitlab-scm


--------------------------------------------------------------------------------
/patchwork/steps/CreatePRComment/typed.py:
--------------------------------------------------------------------------------
 1 | from typing_extensions import Annotated, TypedDict
 2 | 
 3 | from patchwork.common.utils.step_typing import StepTypeConfig
 4 | 
 5 | 
 6 | class __CreatePRCommentRequiredInputs(TypedDict):
 7 |     pr_url: str
 8 |     pr_comment: str
 9 | 
10 | 
11 | class CreatePRCommentInputs(__CreatePRCommentRequiredInputs, total=False):
12 |     noisy_comments: Annotated[bool, StepTypeConfig(is_config=True)]
13 |     scm_url: Annotated[str, StepTypeConfig(is_config=True)]
14 |     gitlab_api_key: Annotated[str, StepTypeConfig(is_config=True, or_op=["github_api_key", "azuredevops_api_key"])]
15 |     github_api_key: Annotated[str, StepTypeConfig(is_config=True, or_op=["gitlab_api_key", "azuredevops_api_key"])]
16 |     azuredevops_api_key: Annotated[str, StepTypeConfig(is_config=True, or_op=["gitlab_api_key", "github_api_key"])]
17 | 
18 | 
19 | class CreatePRCommentOutputs(TypedDict):
20 |     pr_url: str
21 | 


--------------------------------------------------------------------------------
/patchwork/common/utils/user_config.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import functools
 4 | import hashlib
 5 | import uuid
 6 | 
 7 | from pydantic import BaseModel
 8 | 
 9 | from patchwork.logger import logger
10 | from patchwork.managed_files import CONFIG_FILE
11 | 
12 | 
13 | class __UserConfig(BaseModel):
14 |     id: str = hashlib.sha256(str(uuid.getnode()).encode()).hexdigest()
15 | 
16 |     def persist(self):
17 |         try:
18 |             CONFIG_FILE.write_text(self.model_dump_json())
19 |         except Exception as e:
20 |             logger.debug(f"Failed to persist user config: {e}")
21 | 
22 | 
23 | @functools.lru_cache(maxsize=None)
24 | def get_user_config():
25 |     try:
26 |         return __UserConfig.model_validate_json(CONFIG_FILE.read_text())
27 |     except Exception as e:
28 |         logger.debug(f"Failed to read user config: {e}")
29 | 
30 |     user_config = __UserConfig()
31 |     user_config.persist()
32 |     return user_config
33 | 


--------------------------------------------------------------------------------
/patchwork/steps/CreateIssue/README.md:
--------------------------------------------------------------------------------
 1 | # CreateIssue Step in Patchwork
 2 | 
 3 | ## Inputs
 4 | - The `CreateIssue` step accepts a dictionary of inputs containing the following keys: `issue_title`, `issue_text`, `scm_url`, `github_api_key`, and `gitlab_api_key`.
 5 | - At least `issue_title`, `issue_text`, and `scm_url` are required inputs.
 6 | 
 7 | ## Outputs
 8 | - The `run` method of the `CreateIssue` step returns a dictionary with the key `issue_url`, containing the URL of the created issue.
 9 | 
10 | ## Description
11 | The `CreateIssue` step is part of the Patchwork application and is used to create an issue on a version control system platform like GitHub or GitLab. It initializes the required data, including connection through an API key, and creates an issue using the provided input data. This step is structured with error handling for missing input data and utilizes classes and functions from different modules within the Patchwork application for SCM interaction and logging.


--------------------------------------------------------------------------------
/patchwork/steps/ReadPRDiffs/README.md:
--------------------------------------------------------------------------------
 1 | ## Code Summary
 2 | This code defines a `ReadPRDiffs` class that inherits from a `Step` class. The `ReadPRDiffs` class is initialized with a dictionary of inputs and checks for required keys. It then uses input data like API keys and URLs to communicate with either a GitHub or GitLab client to fetch pull request information. The `run` method retrieves file differences in the pull request, saves them to a temporary JSON file, and returns a dictionary containing the file path and diff details.
 3 | 
 4 | ### Inputs
 5 | - Input dictionary containing keys:
 6 |   - "pr_url": URL of the pull request
 7 |   - "github_api_key" or "gitlab_api_key": API key for GitHub or GitLab
 8 |   - "scm_url": URL of the source code management system
 9 | 
10 | ### Outputs
11 | - Returns a dictionary with the following keys:
12 |   - "prompt_value_file": path to the temporary JSON file
13 |   - "prompt_values": list of dictionaries containing file path and diff details


--------------------------------------------------------------------------------
/patchwork/steps/CallAPI/README.md:
--------------------------------------------------------------------------------
 1 | # Documentation for CallAPI Step
 2 | 
 3 | ## Inputs:
 4 | - `url (str)`: The URL to make the API call to.
 5 | - `method (str)`: The HTTP method to use for the request (GET, POST, PUT, PATCH, DELETE, HEAD).
 6 | - `headers (dict)`: Optional. Headers to include in the request.
 7 | - `body (dict)`: Optional. The body of the request.
 8 | 
 9 | ## Outputs:
10 | - `status_code (int)`: The status code of the API response.
11 | - `headers (dict)`: The headers of the API response.
12 | - `body (str)`: The body of the API response.
13 | 
14 | The `CallAPI` class extends `Step` and is used to make API calls using the `requests` library. The `run` method performs the API call based on the provided inputs and returns the status code, headers, and body of the response. If specified in the inputs, it can raise a `ValueError` based on the response status code falling within certain ranges. The `typed.py` file provides type annotations for the inputs and outputs of the `CallAPI` step.


--------------------------------------------------------------------------------
/patchwork/patchflows/__init__.py:
--------------------------------------------------------------------------------
 1 | from .AutoFix.AutoFix import AutoFix
 2 | from .DependencyUpgrade.DependencyUpgrade import DependencyUpgrade
 3 | from .GenerateCodeUsageExample.GenerateCodeUsageExample import GenerateCodeUsageExample
 4 | from .GenerateDiagram.GenerateDiagram import GenerateDiagram
 5 | from .GenerateDocstring.GenerateDocstring import GenerateDocstring
 6 | from .GenerateREADME.GenerateREADME import GenerateREADME
 7 | from .GenerateUnitTests.GenerateUnitTests import GenerateUnitTests
 8 | from .LogAnalysis.LogAnalysis import LogAnalysis
 9 | from .PRReview.PRReview import PRReview
10 | from .ResolveIssue.ResolveIssue import ResolveIssue
11 | from .SonarFix.SonarFix import SonarFix
12 | 
13 | __all__ = [
14 |     "AutoFix",
15 |     "DependencyUpgrade",
16 |     "GenerateREADME",
17 |     "PRReview",
18 |     "ResolveIssue",
19 |     "GenerateDocstring",
20 |     "GenerateUnitTests",
21 |     "GenerateDiagram",
22 |     "GenerateCodeUsageExample",
23 |     "SonarFix",
24 |     "LogAnalysis",
25 | ]
26 | 


--------------------------------------------------------------------------------
/patchwork/patchflows/ResolveIssue/prompt.json:
--------------------------------------------------------------------------------
 1 | [
 2 |   {
 3 |     "id": "resolve_issue",
 4 |     "prompts": [
 5 |       {
 6 |         "role": "system",
 7 |         "content": "You are a senior software engineer who is best in the world at resolving bugs. Users will give you a code snippet and you will generate a fix based on the provided INSTRUCTION.\n\nINSTRUCTION:\n\nOnly respond with the fixed code, do not add any comments or change the indentation.\n\nMake sure you respond with the full code and not only the parts that are changed.\n\nBefore you generate a fix, analyse if the bug can indeed to be fixed by modifying the given code.\n\nIf the bug cannot be resolved by making the changes to the given code respond with <NO FIX POSSIBLE>.\n\nPlease provide a response only in the following format:\n\nFixed Code:\n<original code with the issue now fixed>\n\nResolve the described bug report by making necessary updates to the code.\n\n{{messageText}}."
 8 |       },
 9 |       {"role": "user", "content": "{{affectedCode}}"}
10 |     ]
11 |   }
12 | ]


--------------------------------------------------------------------------------
/patchwork/steps/CallCode2Prompt/README.md:
--------------------------------------------------------------------------------
 1 | The code provided includes three Python files within the path `patchwork/steps/CallCode2Prompt/`:
 2 | 1. `__init__.py` - an empty file.
 3 | 2. `CallCode2Prompt.py` - a class named `CallCode2Prompt` derived from `Step` class. It takes inputs, processes data related to code files, runs a command-line tool `code2prompt`, and creates a JSON file as output containing extracted data from code files.
 4 | 3. `TestCallCode2Prompt.py` - a unit test file for the `CallCode2Prompt` class, ensuring the output is not empty after running the code for a given folder path.
 5 | 
 6 | ### Inputs
 7 | - `folder_path`: Path to a folder containing code files for processing.
 8 | 
 9 | ### Outputs
10 | - `prompt_value_file`: Path to the JSON file with extracted data from code files.
11 | - `code_file`: Same as `prompt_value_file`.
12 | 
13 | The `CallCode2Prompt` class is designed to be instantiated with input data related to code files, processed using the `run()` method, and provide extracted information in a JSON file as output.


--------------------------------------------------------------------------------
/patchwork/steps/ReadPRDiffs/typed.py:
--------------------------------------------------------------------------------
 1 | from typing_extensions import Annotated, List, TypedDict
 2 | 
 3 | from patchwork.common.client.scm import PullRequestComment
 4 | from patchwork.common.utils.step_typing import StepTypeConfig
 5 | 
 6 | 
 7 | class __ReadPRDiffsRequiredInputs(TypedDict):
 8 |     pr_url: str
 9 | 
10 | 
11 | class ReadPRDiffsInputs(__ReadPRDiffsRequiredInputs, total=False):
12 |     scm_url: Annotated[str, StepTypeConfig(is_config=True)]
13 |     gitlab_api_key: Annotated[str, StepTypeConfig(is_config=True, or_op=["github_api_key", "azuredevops_api_key"])]
14 |     github_api_key: Annotated[str, StepTypeConfig(is_config=True, or_op=["gitlab_api_key", "azuredevops_api_key"])]
15 |     azuredevops_api_key: Annotated[str, StepTypeConfig(is_config=True, or_op=["gitlab_api_key", "github_api_key"])]
16 | 
17 | 
18 | class ReadPRDiffsOutputs(TypedDict):
19 |     title: str
20 |     body: str
21 |     comments: List[PullRequestComment]
22 |     diffs: List["Diff"]
23 | 
24 | 
25 | class Diff(TypedDict):
26 |     path: str
27 |     diff: str
28 | 


--------------------------------------------------------------------------------
/patchwork/patchflows/AutoFix/defaults.yml:
--------------------------------------------------------------------------------
 1 | # AutoFix Inputs
 2 | compatibility: unknown
 3 | 
 4 | # ExtractCode Inputs
 5 | vulnerability_limit: 10
 6 | context_size: 1000
 7 | severity: unknown
 8 | # sarif_file_path should point to the generated SARIF file relative to the working directory
 9 | # sarif_file_path: /mnt/data/sarif.json
10 | 
11 | # PreparePrompt Inputs
12 | # prompt_template_file: your-prompt-template-here
13 | 
14 | # CallLLM Inputs
15 | # openai_api_key: required-for-chatgpt
16 | # google_api_key: required-for-gemini
17 | # model: gpt-4o
18 | # client_base_url: https://api.openai.com/v1
19 | # Example HF model
20 | # client_base_url: https://api-inference.huggingface.co/models/codellama/CodeLlama-70b-Instruct-hf/v1
21 | # model: codellama/CodeLlama-70b-Instruct-hf
22 | # model_temperature: 0.2
23 | # model_top_p: 0.95
24 | # model_max_tokens: 2000
25 | 
26 | # CommitChanges Inputs
27 | disable_branch: false
28 | 
29 | # CreatePR Inputs
30 | disable_pr: false
31 | force_pr_creation: true
32 | # github_api_key: required-for-github-scm
33 | # gitlab_api_key: required-for-gitlab-scm


--------------------------------------------------------------------------------
/patchwork/patchflows/SonarFix/defaults.yml:
--------------------------------------------------------------------------------
 1 | # AutoFix Inputs
 2 | compatibility: unknown
 3 | 
 4 | # ExtractCode Inputs
 5 | vulnerability_limit: 10
 6 | context_size: 1000
 7 | severity: unknown
 8 | # sarif_file_path should point to the generated SARIF file relative to the working directory
 9 | # sarif_file_path: /mnt/data/sarif.json
10 | 
11 | # PreparePrompt Inputs
12 | # prompt_template_file: your-prompt-template-here
13 | 
14 | # CallLLM Inputs
15 | # openai_api_key: required-for-chatgpt
16 | # google_api_key: required-for-gemini
17 | # model: gpt-4o
18 | # client_base_url: https://api.openai.com/v1
19 | # Example HF model
20 | # client_base_url: https://api-inference.huggingface.co/models/codellama/CodeLlama-70b-Instruct-hf/v1
21 | # model: codellama/CodeLlama-70b-Instruct-hf
22 | # model_temperature: 0.2
23 | # model_top_p: 0.95
24 | # model_max_tokens: 2000
25 | 
26 | # CommitChanges Inputs
27 | disable_branch: false
28 | 
29 | # CreatePR Inputs
30 | disable_pr: false
31 | force_pr_creation: true
32 | # github_api_key: required-for-github-scm
33 | # gitlab_api_key: required-for-gitlab-scm


--------------------------------------------------------------------------------
/patchwork/steps/ManageEngineAgent/typed.py:
--------------------------------------------------------------------------------
 1 | from typing_extensions import Annotated, Any, Dict, List, Optional, TypedDict
 2 | 
 3 | from patchwork.common.utils.step_typing import StepTypeConfig
 4 | 
 5 | 
 6 | class __ManageEngineAgentInputsRequired(TypedDict):
 7 |     zoho_access_token: str
 8 |     user_prompt: str
 9 |     prompt_value: Dict[str, Any]
10 | 
11 | 
12 | class ManageEngineAgentInputs(__ManageEngineAgentInputsRequired, total=False):
13 |     max_agent_calls: int
14 |     openai_api_key: Annotated[str, StepTypeConfig(or_op=["google_api_key", "anthropic_api_key"])]
15 |     anthropic_api_key: Annotated[str, StepTypeConfig(or_op=["google_api_key", "openai_api_key"])]
16 |     google_api_key: Annotated[str, StepTypeConfig(or_op=["openai_api_key", "anthropic_api_key"])]
17 | 
18 |     # Prompt and strategy configuration
19 |     system_prompt: Optional[str]
20 |     example_json: Optional[Dict]
21 | 
22 | 
23 | class ManageEngineAgentOutputs(TypedDict):
24 |     conversation_history: List[Dict]
25 |     tool_records: List[Dict]
26 |     request_tokens: int
27 |     response_tokens: int
28 | 


--------------------------------------------------------------------------------
/patchwork/steps/ZohoDeskAgent/typed.py:
--------------------------------------------------------------------------------
 1 | from typing_extensions import Annotated, Any, Dict, List, Optional, TypedDict
 2 | 
 3 | from patchwork.common.utils.step_typing import StepTypeConfig
 4 | 
 5 | 
 6 | class __ZohoDeskAgentInputsRequired(TypedDict):
 7 |     zoho_access_token: str
 8 |     user_prompt: str
 9 |     prompt_value: Dict[str, Any]
10 |     org_id: str
11 | 
12 | 
13 | class ZohoDeskAgentInputs(__ZohoDeskAgentInputsRequired, total=False):
14 |     max_agent_calls: int
15 |     openai_api_key: Annotated[str, StepTypeConfig(or_op=["google_api_key", "anthropic_api_key"])]
16 |     anthropic_api_key: Annotated[str, StepTypeConfig(or_op=["google_api_key", "openai_api_key"])]
17 |     google_api_key: Annotated[str, StepTypeConfig(or_op=["openai_api_key", "anthropic_api_key"])]
18 | 
19 |     # Prompt and strategy configuration
20 |     system_prompt: Optional[str]
21 |     example_json: Optional[Dict]
22 | 
23 | 
24 | class ZohoDeskAgentOutputs(TypedDict):
25 |     conversation_history: List[Dict]
26 |     tool_records: List[Dict]
27 |     request_tokens: int
28 |     response_tokens: int
29 | 


--------------------------------------------------------------------------------
/patchwork/steps/SlackAgent/typed.py:
--------------------------------------------------------------------------------
 1 | from typing_extensions import Annotated, Any, Dict, List, Optional, TypedDict
 2 | 
 3 | from patchwork.common.utils.step_typing import StepTypeConfig
 4 | 
 5 | 
 6 | class __SlackAgentInputsRequired(TypedDict):
 7 |     slack_bot_token: str
 8 |     user_prompt: str
 9 |     prompt_value: Dict[str, Any]
10 |     example_json: Dict[str, Any]
11 | 
12 | 
13 | class SlackAgentInputs(__SlackAgentInputsRequired, total=False):
14 |     max_agent_calls: int
15 |     strategy_model: str
16 |     agent_model: str
17 |     openai_api_key: Annotated[str, StepTypeConfig(or_op=["google_api_key", "anthropic_api_key"])]
18 |     anthropic_api_key: Annotated[str, StepTypeConfig(or_op=["google_api_key", "openai_api_key"])]
19 |     google_api_key: Annotated[str, StepTypeConfig(or_op=["openai_api_key", "anthropic_api_key"])]
20 | 
21 |     # Prompt and strategy configuration
22 |     system_prompt: Optional[str]
23 | 
24 | 
25 | class SlackAgentOutputs(TypedDict):
26 |     conversation_history: List[Dict]
27 |     tool_records: List[Dict]
28 |     request_tokens: int
29 |     response_tokens: int
30 | 


--------------------------------------------------------------------------------
/patchwork/steps/ModifyCode/README.md:
--------------------------------------------------------------------------------
 1 | # Patchwork Modify Code Module
 2 | 
 3 | This module provides functionality to modify code files based on extracted responses. It contains functions for loading and saving JSON files, handling indentation, and replacing code in files. The main class `ModifyCode` is a step that takes inputs containing code snippets and extracted responses, and then modifies the specified lines in code files with the new extracted code.
 4 | 
 5 | ## Inputs
 6 | - `file_path`: The path to the JSON file containing code snippets.
 7 | - `content`: The content to be saved to a file.
 8 | - `src`: List of lines representing the original code.
 9 | - `target`: List of lines representing the target code.
10 | - `file_path`: Path to the file to be modified.
11 | - `start_line`: Line number to start replacing the code.
12 | - `end_line`: Line number to end replacing the code.
13 | - `new_code`: The new code to replace the specified lines.
14 | 
15 | ## Outputs
16 | - `modified_code_files`: A list of dictionaries containing information about the modified code files like path, start line, end line, and extracted response metadata.


--------------------------------------------------------------------------------
/patchwork/steps/ModifyCodeOnce/README.md:
--------------------------------------------------------------------------------
 1 | # Patchwork Modify Code Module
 2 | 
 3 | This module provides functionality to modify code files based on extracted responses. It contains functions for loading and saving JSON files, handling indentation, and replacing code in files. The main class `ModifyCode` is a step that takes inputs containing code snippets and extracted responses, and then modifies the specified lines in code files with the new extracted code.
 4 | 
 5 | ## Inputs
 6 | - `file_path`: The path to the JSON file containing code snippets.
 7 | - `content`: The content to be saved to a file.
 8 | - `src`: List of lines representing the original code.
 9 | - `target`: List of lines representing the target code.
10 | - `file_path`: Path to the file to be modified.
11 | - `start_line`: Line number to start replacing the code.
12 | - `end_line`: Line number to end replacing the code.
13 | - `new_code`: The new code to replace the specified lines.
14 | 
15 | ## Outputs
16 | - `modified_code_files`: A list of dictionaries containing information about the modified code files like path, start line, end line, and extracted response metadata.


--------------------------------------------------------------------------------
/patchwork/steps/BrowserUse/typed.py:
--------------------------------------------------------------------------------
 1 | from typing_extensions import Annotated, Any, Dict, Optional, TypedDict, List
 2 | 
 3 | from patchwork.common.utils.step_typing import StepTypeConfig
 4 | 
 5 | 
 6 | class __BrowserUseInputsRequired(TypedDict):
 7 |     task: str
 8 |     task_value: Dict[str, Any]
 9 | 
10 | 
11 | class BrowserUseInputs(__BrowserUseInputsRequired, total=False):
12 |     example_json: Optional[str]
13 |     openai_api_key: Annotated[str, StepTypeConfig(or_op=["google_api_key", "anthropic_api_key"])]
14 |     anthropic_api_key: Annotated[str, StepTypeConfig(or_op=["google_api_key", "openai_api_key"])]
15 |     google_api_key: Annotated[str, StepTypeConfig(or_op=["openai_api_key", "anthropic_api_key"])]
16 |     gif_path: Optional[str]
17 |     headless: Optional[bool]
18 |     initial_actions: Optional[List[Dict[str, Dict[str, Any]]]]
19 |     downloads_path: Optional[str]
20 |     use_vision: Optional[bool]
21 |     timeout: Optional[int]  # optional timeout in seconds, defaults to 600 if not provided
22 | 
23 | 
24 | class BrowserUseOutputs(TypedDict):
25 |     result: str
26 |     request_tokens: int
27 |     response_tokens: int
28 | 


--------------------------------------------------------------------------------
/patchwork/steps/ReadIssues/README.md:
--------------------------------------------------------------------------------
 1 | ## Patchwork ReadIssues Module
 2 | 
 3 | ### Inputs
 4 | - The `ReadIssues` class in `ReadIssues.py` expects a dictionary `inputs` containing the following keys:
 5 |   - `issue_url`: URL of the issue
 6 |   - `github_api_key`: API key for GitHub (optional, can be provided if working with GitHub issues)
 7 |   - `gitlab_api_key`: API key for GitLab (optional, can be provided if working with GitLab issues)
 8 |   - `scm_url`: URL of the source code management platform
 9 | 
10 | ### Outputs
11 | - The `ReadIssues` class provides a `run` method that returns a dictionary containing the issue text associated with the provided `issue_url`.
12 | 
13 | ### Usage
14 | - The `ReadIssues` class reads issues from a source code management platform (GitHub or GitLab) using the provided API keys and URL.
15 | - It ensures the required input keys are present, selects the appropriate SCM client based on the provided API key, sets the SCM URL, and retrieves the issue text based on the provided issue URL.
16 | - Users can initiate the `RunIssues` class by providing the necessary inputs and then executing the `run` method to obtain the issue text data.
17 | 


--------------------------------------------------------------------------------
/tests/common/context_strategy/test_java.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from patchwork.common.context_strategy.java import (
 4 |     JavaBlockStrategy,
 5 |     JavaClassStrategy,
 6 |     JavaMethodStrategy,
 7 | )
 8 | 
 9 | example_java_lines = [
10 |     "package com.example;\n",  # 0
11 |     "\n",  # 1
12 |     "import java.util.ArrayList;\n",  # 2
13 |     "import java.util.List;\n",  # 3
14 |     "import java.util.Map;\n",  # 4
15 |     "\n",  # 5
16 |     "public class A {\n",  # 6
17 |     "    /** comment  */\n",  # 7
18 |     "    public static void main(String[] args) {\n",  # 8
19 |     '        System.out.println("Hello, World!");\n',  # 9
20 |     "    }\n",  # 10
21 |     "}\n",  # 11
22 | ]
23 | 
24 | 
25 | @pytest.mark.parametrize(
26 |     "strategy, expected_range",
27 |     [(JavaClassStrategy(), (6, 12)), (JavaMethodStrategy(), (8, 11)), (JavaBlockStrategy(), (8, 11))],
28 | )
29 | def test_java_strategy(strategy, expected_range):
30 |     expected_start, expected_end = expected_range
31 |     position = strategy.get_context_indexes(example_java_lines, 8, 9)
32 |     assert position.start == expected_start
33 |     assert position.end == expected_end
34 | 


--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | ## PR Checklist
 2 | <!-- Please check if your PR fulfills the following requirements: -->
 3 | 
 4 | - [ ] The commit message follows our guidelines: [Code of conduct](https://github.com/patched-codes/patchwork/blob/main/CODE_OF_CONDUCT.md)
 5 | - [ ] Tests for the changes have been added (for bug fixes / features)
 6 | - [ ] Docs have been added / updated (for bug fixes / features)
 7 | - [ ] Does this PR introduce a breaking change?
 8 | - [ ] Include PR in release notes?
 9 | 
10 | 
11 | ## PR Type
12 | <!-- What kind of change does this PR introduce? -->
13 | <!-- Please check the one that applies to this PR using "x". -->
14 | 
15 | - [ ] Bugfix
16 | - [ ] Feature
17 | - [ ] Refactoring
18 | - [ ] Build /CI
19 | - [ ] Documentation
20 | - [ ] Others
21 | 
22 | 
23 | ## What is the current behavior?
24 | <!-- Please describe the current behavior that you are modifying, or link to a relevant issue. -->
25 | 
26 | Issue Number: N/A
27 | 
28 | 
29 | ## What is the new behavior?
30 | 
31 | <!-- If this PR contains a breaking change, please describe the impact and migration path for existing applications below. -->
32 | 
33 | 
34 | 
35 | ## Other information


--------------------------------------------------------------------------------
/patchwork/steps/SimplifiedLLM/README.md:
--------------------------------------------------------------------------------
 1 | The provided documentation summarizes the contents of three Python files related to a SimplifiedLLM functionality. Here is the breakdown:
 2 | 
 3 | - `typed.py` defines classes for input and output types for SimplifiedLLM functionality.
 4 |   - **Inputs:**
 5 |     - `SimplifiedLLMInputsRequired`: Contains required input fields for SimplifiedLLM.
 6 |     - `SimplifiedLLMInputs`: Extends `SimplifiedLLMInputsRequired` with additional optional input fields.
 7 |   - **Outputs:**
 8 |     - `SimplifiedLLMOutputs`: Defines output fields for SimplifiedLLM.
 9 | 
10 | - `__init__.py` is an empty file within the SimplifiedLLM directory.
11 | 
12 | - `SimplifiedLLM.py` is the main implementation file for SimplifiedLLM functionality.
13 |   - **Inputs:**
14 |     - Takes inputs from the constructor and validates required fields.
15 |   - **Outputs:**
16 |     - Provides a `run` method that orchestrates the SimplifiedLLM process by integrating with other related steps such as PreparePrompt, CallLLM, and ExtractModelResponse.
17 | 
18 | These files collectively define the input and output structures and the implementation logic for the SimplifiedLLM step in a larger system.


--------------------------------------------------------------------------------
/patchwork/steps/CommitChanges/README.md:
--------------------------------------------------------------------------------
 1 | # Code Documentation
 2 | 
 3 | ## Inputs
 4 | - `inputs`: Dictionary containing the following keys:
 5 |   - `modified_code_files`: List of modified code files.
 6 |   - `disable_branch`: Boolean indicating whether branch creation is disabled.
 7 |   - `force_branch_creation`: Boolean indicating whether to force branch creation.
 8 |   - `branch_prefix`: Prefix for the new branch name.
 9 |   - `branch_suffix`: Suffix for the new branch name.
10 | 
11 | ## Outputs
12 | - Dictionary containing the following keys:
13 |   - `base_branch`: Name of the base branch.
14 |   - `target_branch`: Name of the target branch.
15 | 
16 | ### Description
17 | This code module defines a class `CommitChanges` that inherits `Step` class. It includes methods for transition between branches, getting a branch slug from a remote URL, and committing changes with a provided message. The `CommitChanges` class is instantiated with input data about modified code files, branch creation settings, and more. Upon execution, the `run` method commits the changes to the repository based on the modified files, creating a new branch if enabled, and returns information about the base and target branches.


--------------------------------------------------------------------------------
/patchwork/steps/ModifyCode/typed.py:
--------------------------------------------------------------------------------
 1 | from typing_extensions import Dict, List, TypedDict
 2 | 
 3 | 
 4 | class ModifyCodeInputs(TypedDict):
 5 |     files_to_patch: List[Dict]
 6 |     extracted_responses: List[Dict[str, str]]
 7 | 
 8 | 
 9 | class ModifyCodeOutputs(TypedDict):
10 |     modified_code_files: List["ModifiedCodeFile"]
11 | 
12 | 
13 | class ModifiedCodeFile(TypedDict, total=False):
14 |     """Represents a file that has been modified by the ModifyCode step.
15 | 
16 |     Attributes:
17 |         path: The path to the modified file
18 |         start_line: The starting line number of the modification (1-based)
19 |         end_line: The ending line number of the modification (1-based)
20 |         diff: A unified diff string showing the changes made to the file.
21 |               Generated using Python's difflib for in-memory comparison
22 |               of original and modified file contents.
23 | 
24 |     Note:
25 |         The diff field is generated using difflib.unified_diff() to compare
26 |         the original and modified file contents in memory, ensuring efficient
27 |         and secure diff generation.
28 |     """
29 | 
30 |     path: str
31 |     start_line: int
32 |     end_line: int
33 |     diff: str
34 | 


--------------------------------------------------------------------------------
/patchwork/steps/ScanSonar/README.md:
--------------------------------------------------------------------------------
 1 | # Code Documentation
 2 | 
 3 | ## Input
 4 | - This code is part of the Patchwork package and implements the `ScanSonar` step.
 5 | - The `ScanSonar` class requires the following inputs in its initialization:
 6 |   - `sonarqube_project_key` (required): The SonarQube project identifier
 7 |   - `sonarqube_access_token` (required): Authentication token for SonarQube API access
 8 |   - `sonarqube_base_url` (required): SonarQube instance URL (e.g., https://sonarcloud.io)
 9 | 
10 | ## Output
11 | - The `ScanSonar` class has a `run` function that collects vulnerability information from SonarQube.
12 | - Returns a dictionary with a `files_to_patch` key containing a list of `SonarVulnerability` objects.
13 | - Each `SonarVulnerability` contains:
14 |   - `uri`: File path where the vulnerability was found
15 |   - `startLine`: Starting line of the vulnerability
16 |   - `endLine`: Ending line of the vulnerability
17 |   - `cwe`: Common Weakness Enumeration identifier
18 |   - `description`: Detailed description of the vulnerability
19 | 
20 | ## Dependencies
21 | - Requires the SonarQube API to be accessible
22 | - Uses the internal `SonarClient` for API communication
23 | - Authentication via access token is required
24 | 


--------------------------------------------------------------------------------
/patchwork/steps/AnalyzeImpact/README.md:
--------------------------------------------------------------------------------
 1 | ## AnalyzeImpact Code Documentation
 2 | 
 3 | ### Inputs
 4 | The `AnalyzeImpact` module includes a function `find_dependency_usage()` which takes the following inputs:
 5 | - `directory` (str): The root directory of the project.
 6 | - `dependency` (str): The name of the dependency to search for.
 7 | - `language` (str): The programming language (e.g., 'python', 'java', 'javascript', 'typescript', 'go').
 8 | - `methods` (list of str): A list of method names to search for in the usage context of the specified dependency.
 9 | 
10 | ### Outputs
11 | The function `find_dependency_usage()` returns a dictionary mapping file paths to lists of method names from the specified list that are called in the file.
12 | 
13 | ### Usage
14 | The code analyzes the impact of a specified dependency on a project by searching for its usage in relevant files based on the programming language. It identifies if any of the specified methods associated with the dependency are called within those files. The `AnalyzeImpact` class processes extracted responses related to impacted methods, and then utilizes `find_dependency_usage()` to generate a list of impacted files and their relevant method info. The information is saved to a JSON file for further analysis and processing.


--------------------------------------------------------------------------------
/patchwork/steps/DatabaseAgent/typed.py:
--------------------------------------------------------------------------------
 1 | from typing_extensions import Annotated, Any, Dict, TypedDict
 2 | 
 3 | from patchwork.common.utils.step_typing import StepTypeConfig
 4 | 
 5 | 
 6 | class __DatabaseAgentOutputsRequiredInputs(TypedDict):
 7 |     task: str
 8 |     db_dialect: str
 9 | 
10 | 
11 | class DatabaseAgentInputs(__DatabaseAgentOutputsRequiredInputs, total=False):
12 |     db_driver: str
13 |     db_username: str
14 |     db_password: str
15 |     db_host: str
16 |     db_port: int
17 |     db_name: str
18 |     db_params: dict[str, Any]
19 |     db_driver_args: dict[str, Any]
20 |     prompt_value: Dict[str, Any]
21 |     max_llm_calls: Annotated[int, StepTypeConfig(is_config=True)]
22 |     openai_api_key: Annotated[
23 |         str, StepTypeConfig(is_config=True, or_op=["patched_api_key", "google_api_key", "anthropic_api_key"])
24 |     ]
25 |     anthropic_api_key: Annotated[
26 |         str, StepTypeConfig(is_config=True, or_op=["patched_api_key", "google_api_key", "openai_api_key"])
27 |     ]
28 |     google_api_key: Annotated[
29 |         str, StepTypeConfig(is_config=True, or_op=["patched_api_key", "openai_api_key", "anthropic_api_key"])
30 |     ]
31 |     example_json: str
32 | 
33 | 
34 | class DatabaseAgentOutputs(TypedDict):
35 |     request_tokens: int
36 |     response_tokens: int
37 | 


--------------------------------------------------------------------------------
/patchwork/steps/PreparePrompt/README.md:
--------------------------------------------------------------------------------
 1 | ## Code Documentation
 2 | 
 3 | ### Inputs
 4 | - This code defines a class `PreparePrompt` that is inherited from a `Step` class.
 5 | - The class takes a dictionary `inputs` as a parameter in its constructor, expecting keys `prompt_template_file`, `prompt_id`, `prompt_value_file`, and `prompt_values`.
 6 | - The `inputs` dictionary should contain the required keys specified in the `required_keys` set.
 7 | 
 8 | ### Outputs
 9 | - The `run` method of the `PreparePrompt` class generates prompt data based on template files and values provided in `inputs`.
10 | - It processes the template file to create prompt data with values substituted from the given value files or values directly provided.
11 | - The generated prompt data is then saved into a JSON file, and its path is returned as an output in a dictionary format.
12 | 
13 | ### Usage
14 | - This code is likely to be used to generate prompt data based on a template and specific values.
15 | - One would need to create an instance of the `PreparePrompt` class, passing the required inputs like `prompt_template_file`, `prompt_id`, `prompt_value_file`, and `prompt_values`.
16 | - The `run` method can be called to process the inputs and generate the prompt file.
17 | - The output dictionary containing the path to the generated prompt file can be used for further processing or logging.


--------------------------------------------------------------------------------
/patchwork/steps/CreatePR/README.md:
--------------------------------------------------------------------------------
 1 | # Create Pull Request Module
 2 | 
 3 | ## Inputs
 4 | - `inputs: dict`: Required data for creating a pull request including:
 5 |   - `target_branch`: Target branch for the pull request
 6 |   - `github_api_key` or `gitlab_api_key`: API key for the corresponding platform
 7 |   - `scm_url`: URL for the source control management platform
 8 |   - `disable_pr`: Flag to disable pull request creation
 9 |   - `pr_body`: Body content for the pull request
10 |   - `pr_title`: Title for the pull request
11 |   - `force_pr_creation`: Flag to force creation of the pull request
12 |   - `base_branch`: Base branch for the pull request
13 | 
14 | ## Outputs
15 | - `run() -> dict`: Method to run the pull request creation process with the following output:
16 |   - `pr_url`: URL of the created pull request
17 | 
18 | This module provides functionality to create a pull request on a source control management platform (Github or Gitlab) based on the input data provided. It includes methods for checking required data, handling platform-specific API keys, setting up the pull request parameters, and executing the pull request creation process. The `create_pr` method within the module helps in finding or creating a pull request with necessary details and descriptions. The module also logs information throughout the process for tracking and verification purposes.


--------------------------------------------------------------------------------
/patchwork/common/tools/db_query_tool.py:
--------------------------------------------------------------------------------
 1 | from typing_extensions import Any, Union
 2 | 
 3 | from patchwork.common.tools import Tool
 4 | from patchwork.steps import CallSQL
 5 | 
 6 | 
 7 | class DatabaseQueryTool(Tool, tool_name="db_query_tool"):
 8 |     def __init__(self, inputs: dict[str, Any]):
 9 |         super().__init__()
10 |         self.db_settings = inputs.copy()
11 |         self.db_dialect = inputs.get("db_dialect", "SQL")
12 | 
13 |     @property
14 |     def json_schema(self) -> dict:
15 |         return {
16 |             "name": "db_query_tool",
17 |             "description": f"""\
18 | Run SQL Query on current {self.db_dialect} database.
19 | """,
20 |             "input_schema": {
21 |                 "type": "object",
22 |                 "properties": {
23 |                     "query": {
24 |                         "type": "string",
25 |                         "description": "Database query to run.",
26 |                     }
27 |                 },
28 |                 "required": ["query"],
29 |             },
30 |         }
31 | 
32 |     def execute(self, query: str) -> Union[list[dict[str, Any]], str]:
33 |         db_settings = self.db_settings.copy()
34 |         db_settings["db_query"] = query
35 |         try:
36 |             return CallSQL(db_settings).run().get("results", [])
37 |         except Exception as e:
38 |             return str(e)
39 | 


--------------------------------------------------------------------------------
/patchwork/steps/PR/README.md:
--------------------------------------------------------------------------------
 1 | ## File: patchwork/steps/PR/PR.py
 2 | 
 3 | ### Inputs
 4 | - `inputs`: Dictionary containing various parameters required for the PR process.
 5 | 
 6 | ### Outputs
 7 | - `run`: Executes the PR process which involves calling other steps to commit changes, prepare PR, and create PR. Returns a dictionary containing details like base branch, target branch, PR URL, PR number, PR title, and PR body.
 8 | 
 9 | ### Description
10 | This Python script defines a class `PR` that represents the Pull Request (PR) step in a software release process. It ensures that all required input keys are provided, runs the commit changes, prepare PR, and create PR steps, and finally returns relevant details about the PR process.
11 | 
12 | ---
13 | 
14 | ## File: patchwork/steps/PR/typed.py
15 | 
16 | ### Description
17 | This Python script defines the data types for inputs and outputs related to the Pull Request (PR) step. It includes definitions for required inputs like modified code files and optional inputs for different stages of the PR process. The outputs are defined as TypedDict specifying the expected keys and their associated data types.
18 | 
19 | ---
20 | 
21 | ## File: patchwork/steps/PR/__init__.py
22 | 
23 | ### Description
24 | This file is empty and acts as an initialization module for the PR step in the software release process. There is no specific code in this file.


--------------------------------------------------------------------------------
/tests/cicd/generate_docstring/kotlin_test_file.kt:
--------------------------------------------------------------------------------
 1 | package org.example
 2 | 
 3 | import java.sql.Connection
 4 | import java.sql.ResultSet
 5 | import kotlin.random.Random
 6 | 
 7 | 
 8 | fun <T : Number> aPlusB(a: T, b: T): Double = a.toDouble() + b.toDouble()
 9 | 
10 | 
11 | fun sqlite(db: Connection, query: String): List<List<Any?>> {
12 |     db.createStatement().use { statement ->
13 |         statement.executeQuery(query).use { resultSet ->
14 |             val results = mutableListOf<List<Any?>>()
15 |             val columnCount = resultSet.metaData.columnCount
16 | 
17 |             while (resultSet.next()) {
18 |                 val row = mutableListOf<Any?>()
19 |                 for (i in 1..columnCount) {
20 |                     row.add(resultSet.getObject(i))
21 |                 }
22 |                 results.add(row)
23 |             }
24 |             return results
25 |         }
26 |     }
27 | }
28 | 
29 | 
30 | fun <T, R : Comparable<R>> compare(keyMap: (T) -> R, item1: T, item2: T): Int {
31 |     return when {
32 |         keyMap(item1) < keyMap(item2) -> -1
33 |         keyMap(item1) > keyMap(item2) -> 1
34 |         else -> 0
35 |     }
36 | }
37 | 
38 | 
39 | fun randomAlphabets(length: Int): String {
40 |     val charPool = ('a'..'z') + ('A'..'Z')
41 |     return (1..length)
42 |         .map { charPool[Random.nextInt(0, charPool.size)] }
43 |         .joinToString("")
44 | }


--------------------------------------------------------------------------------
/patchwork/steps/ExtractModelResponse/README.md:
--------------------------------------------------------------------------------
 1 | ## ExtractModelResponse.py
 2 | 
 3 | ### Inputs:
 4 | - The code defines a class `ExtractModelResponse` that inherits from `Step`.
 5 | - The `__init__` method of the class takes a dictionary `inputs` as a parameter.
 6 | - The required data keys in the `inputs` dictionary are checked to raise a `ValueError` if any key is missing.
 7 | - The `openai_responses` key from the `inputs` dictionary is assigned to `self.openai_responses`.
 8 | - The `response_partitions` key from the `inputs` dictionary is assigned to `self.partitions`.
 9 |   
10 | ### Outputs:
11 | - The `run` method is defined to extract responses based on specified partitions from the `openai_responses`.
12 | - If no partitions are specified, a log is generated and an empty response dictionary is returned.
13 | - For each `openai_response`, it partitions the response based on the specified partitions and stores the extracted response in the `outputs`.
14 | - Finally, a dictionary containing the extracted responses is returned.
15 | 
16 | ### Usage:
17 | - The `ExtractModelResponse` class is intended to extract model responses based on specified partitions.
18 | - Users can instantiate the class by passing the required inputs where `openai_responses` key is mandatory.
19 | - After instantiation, the `run` method can be called to extract responses based on the specified partitions and get the extracted responses as output.


--------------------------------------------------------------------------------
/patchwork/steps/ReadPRs/README.md:
--------------------------------------------------------------------------------
 1 | # Documentation for ReadPRs Module
 2 | 
 3 | ## Inputs
 4 | - **File**: patchwork/steps/ReadPRs/typed.py
 5 |   - `repo_slug`: A required string input for the repository slug.
 6 |   - `pr_ids`: An optional string input for PR IDs.
 7 |   - `pr_state`: An optional string input for the state of PRs.
 8 |   - `scm_url`: An annotated string input for the SCM URL, configured for step type.
 9 |   - `gitlab_api_key`: An annotated string input for the GitLab API key, configured for step type.
10 |   - `github_api_key`: An annotated string input for the GitHub API key, configured for step type.
11 | 
12 | ## Outputs
13 | - **File**: patchwork/steps/ReadPRs/typed.py
14 |   - `title`: Title of the PR.
15 |   - `body`: Body content of the PR.
16 |   - `comments`: List of comments on the PR.
17 |   - `diffs`: List of dictionaries containing file path and related diff content.
18 | 
19 | ## Usage
20 | 1. The `ReadPRs` class in `ReadPRs.py` creates a step for reading PRs from GitLab or GitHub.
21 | 2. Construct the step object by passing `DataPoint` inputs for repository slug, PR IDs, PR state, SCM URL, and API keys.
22 | 3. Run the step using the `run()` method to fetch PRs based on the provided inputs.
23 | 4. The step filters PRs based on ID if specified, and generates data points with PR information like title, body, comments, and diffs.
24 | 5. Use the generated data points for further processing or analysis related to the PRs.


--------------------------------------------------------------------------------
/patchwork/steps/Combine/README.md:
--------------------------------------------------------------------------------
 1 | # Code Documentation: Patchwork Combine Module
 2 | 
 3 | This document provides an overview of the `Combine` module within the Patchwork project. It consists of three files - `typed.py`, `Combine.py`, and `__init__.py`.
 4 | 
 5 | ## `patchwork/steps/Combine/typed.py`
 6 | 
 7 | This file defines two typed dictionaries using the `typing_extensions` package:
 8 | - `CombineInputs` containing keys `base_json` and `update_json` of type List of Dictionaries or Dictionary.
 9 | - `CombineOutputs` containing a key `result_json` of type List of Dictionaries or Dictionary.
10 | 
11 | ## `patchwork/steps/Combine/Combine.py`
12 | 
13 | This file contains a `Combine` class that extends a `Step` class and is initialized with inputs. The class checks for required data keys, compares input lists, and performs combining operations based on the input data type and structure.
14 | 
15 | ### Inputs:
16 | - `inputs`: Required data inputs for the combine operation.
17 | 
18 | ### Outputs:
19 | - `run()`: Method that executes the combining logic and returns a dictionary with the combined result based on the provided inputs.
20 | 
21 | ## `patchwork/steps/Combine/__init__.py`
22 | 
23 | This empty file serves as the initialization module for the `Combine` package.
24 | 
25 | The `Combine` module is designed to facilitate data combination operations, handling various cases of input data structures, and producing the combined output in a structured format.


--------------------------------------------------------------------------------
/patchwork/steps/AgenticLLM/AgenticLLM.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | from patchwork.common.client.llm.aio import AioLlmClient
 4 | from patchwork.common.multiturn_strategy.agentic_strategy import AgenticStrategy
 5 | from patchwork.common.tools import Tool
 6 | from patchwork.step import Step
 7 | from patchwork.steps.AgenticLLM.typed import AgenticLLMInputs, AgenticLLMOutputs
 8 | 
 9 | 
10 | class AgenticLLM(Step, input_class=AgenticLLMInputs, output_class=AgenticLLMOutputs):
11 |     def __init__(self, inputs):
12 |         super().__init__(inputs)
13 |         base_path = inputs.get("base_path")
14 |         if base_path is None:
15 |             base_path = str(Path.cwd())
16 |         self.conversation_limit = int(int(inputs.get("max_llm_calls", 2)) / 2)
17 |         self.agentic_strategy = AgenticStrategy(
18 |             llm_client=AioLlmClient.create_aio_client(inputs),
19 |             tool_set=Tool.get_tools(path=base_path),
20 |             template_data=inputs.get("prompt_value"),
21 |             system_prompt_template=inputs.get("system_prompt"),
22 |             user_prompt_template=inputs.get("user_prompt"),
23 |         )
24 | 
25 |     def run(self) -> dict:
26 |         self.agentic_strategy.execute(limit=self.conversation_limit)
27 |         return dict(
28 |             conversation_history=self.agentic_strategy.history,
29 |             tool_records=self.agentic_strategy.tool_records,
30 |             **self.agentic_strategy.usage(),
31 |         )
32 | 


--------------------------------------------------------------------------------
/patchwork/steps/JoinList/JoinList.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | 
 3 | from patchwork.step import Step, StepStatus
 4 | from patchwork.steps.JoinList.typed import JoinListInputs, JoinListOutputs
 5 | 
 6 | 
 7 | class JoinList(Step, input_class=JoinListInputs, output_class=JoinListOutputs):
 8 |     def __init__(self, inputs):
 9 |         super().__init__(inputs)
10 | 
11 |         self.list = inputs["list"]
12 |         self.delimiter = inputs["delimiter"]
13 |         self.possible_keys = ["body", "text"]
14 |         if inputs.get("key") is not None:
15 |             self.possible_keys.insert(0, inputs.get("key"))
16 | 
17 |     def run(self):
18 |         if len(self.list) == 0:
19 |             self.set_status(StepStatus.SKIPPED, "List is empty")
20 |             return dict()
21 | 
22 |         items = []
23 |         for item in self.list:
24 |             if isinstance(item, str):
25 |                 items.append(item)
26 |             elif isinstance(item, dict):
27 |                 is_added = False
28 |                 for possible_key in self.possible_keys:
29 |                     if possible_key in item.keys():
30 |                         items.append(item.get(possible_key))
31 |                         is_added = True
32 |                         break
33 |                 if not is_added:
34 |                     items.append(json.dumps(item))
35 |             else:
36 |                 items.append(str(item))
37 | 
38 |         return dict(text=self.delimiter.join(items))
39 | 


--------------------------------------------------------------------------------
/patchwork/steps/GitHubAgent/typed.py:
--------------------------------------------------------------------------------
 1 | from typing_extensions import Annotated, Any, Dict, Optional, TypedDict
 2 | 
 3 | from patchwork.common.utils.step_typing import StepTypeConfig
 4 | 
 5 | 
 6 | class __GitHubAgentRequiredInputs(TypedDict):
 7 |     github_api_key: str
 8 |     task: str
 9 | 
10 | 
11 | class GitHubAgentInputs(__GitHubAgentRequiredInputs, total=False):
12 |     base_path: str
13 |     prompt_value: Dict[str, Any]
14 |     max_llm_calls: Annotated[int, StepTypeConfig(is_config=True)]
15 |     example_json: str
16 |     openai_api_key: Annotated[
17 |         str,
18 |         StepTypeConfig(
19 |             is_config=True, or_op=["patched_api_key", "google_api_key", "client_is_gcp", "anthropic_api_key"]
20 |         ),
21 |     ]
22 |     anthropic_api_key: Annotated[
23 |         str,
24 |         StepTypeConfig(is_config=True, or_op=["patched_api_key", "google_api_key", "client_is_gcp", "openai_api_key"]),
25 |     ]
26 |     google_api_key: Annotated[
27 |         str,
28 |         StepTypeConfig(
29 |             is_config=True, or_op=["patched_api_key", "openai_api_key", "client_is_gcp", "anthropic_api_key"]
30 |         ),
31 |     ]
32 |     client_is_gcp: Annotated[
33 |         str,
34 |         StepTypeConfig(
35 |             is_config=True, or_op=["patched_api_key", "openai_api_key", "anthropic_api_key", "google_api_key"]
36 |         ),
37 |     ]
38 | 
39 | 
40 | class GitHubAgentOutputs(TypedDict):
41 |     request_tokens: int
42 |     response_tokens: int
43 | 


--------------------------------------------------------------------------------
/patchwork/steps/AgenticLLMV2/typed.py:
--------------------------------------------------------------------------------
 1 | from typing_extensions import Annotated, Any, Dict, TypedDict
 2 | 
 3 | from patchwork.common.utils.step_typing import StepTypeConfig
 4 | 
 5 | 
 6 | class AgenticLLMV2Inputs(TypedDict, total=False):
 7 |     base_path: str
 8 |     prompt_value: Dict[str, Any]
 9 |     system_prompt: str
10 |     user_prompt: str
11 |     max_agent_calls: Annotated[int, StepTypeConfig(is_config=True)]
12 |     strategy_model: str
13 |     agent_model: str
14 |     agent_system_prompt: str
15 |     example_json: str
16 |     openai_api_key: Annotated[
17 |         str,
18 |         StepTypeConfig(
19 |             is_config=True, or_op=["patched_api_key", "google_api_key", "client_is_gcp", "anthropic_api_key"]
20 |         ),
21 |     ]
22 |     anthropic_api_key: Annotated[
23 |         str,
24 |         StepTypeConfig(is_config=True, or_op=["patched_api_key", "google_api_key", "client_is_gcp", "openai_api_key"]),
25 |     ]
26 |     google_api_key: Annotated[
27 |         str,
28 |         StepTypeConfig(
29 |             is_config=True, or_op=["patched_api_key", "openai_api_key", "client_is_gcp", "anthropic_api_key"]
30 |         ),
31 |     ]
32 |     client_is_gcp: Annotated[
33 |         str,
34 |         StepTypeConfig(
35 |             is_config=True, or_op=["patched_api_key", "openai_api_key", "anthropic_api_key", "google_api_key"]
36 |         ),
37 |     ]
38 | 
39 | 
40 | class AgenticLLMV2Outputs(TypedDict):
41 |     request_tokens: int
42 |     response_tokens: int
43 | 


--------------------------------------------------------------------------------
/patchwork/common/server.py:
--------------------------------------------------------------------------------
 1 | from fastapi import FastAPI, Header, Request, Response
 2 | from fastapi.exceptions import HTTPException
 3 | from openai.types.chat import ChatCompletion
 4 | from typing_extensions import Annotated
 5 | 
 6 | from patchwork.common.client.llm.aio import AioLlmClient
 7 | from patchwork.common.client.llm.anthropic import AnthropicLlmClient
 8 | from patchwork.common.client.llm.google_ import GoogleLlmClient
 9 | from patchwork.common.client.llm.openai_ import OpenAiLlmClient
10 | 
11 | app = FastAPI()
12 | 
13 | 
14 | @app.post("/v1/chat/completions")
15 | async def handle_openai(
16 |     authorization: Annotated[str, Header()],
17 |     request: Request,
18 |     response: Response,
19 | ) -> ChatCompletion:
20 |     _, _, api_key = authorization.partition("Bearer ")
21 |     body = await request.json()
22 | 
23 |     openai_client = OpenAiLlmClient(api_key=api_key)
24 |     google_client = GoogleLlmClient(api_key=api_key)
25 |     anthropic_client = AnthropicLlmClient(api_key=api_key)
26 |     aio_client = AioLlmClient(openai_client, google_client, anthropic_client)
27 |     try:
28 |         return aio_client.chat_completion(**body)
29 |     except Exception as e:
30 |         status_code = getattr(e, "status_code", 500)
31 |         body = getattr(e, "body", {"error_message": str(e)})
32 |         raise HTTPException(status_code=status_code, detail=body)
33 | 
34 | 
35 | if __name__ == "__main__":
36 |     import uvicorn
37 | 
38 |     uvicorn.run(app, host="127.0.0.1", port=8080)
39 | 


--------------------------------------------------------------------------------
/patchwork/steps/FilterBySimilarity/README.md:
--------------------------------------------------------------------------------
 1 | ## Input and Output Data Handling for FilterBySimilarity Step
 2 | 
 3 | This documentation provides an overview of the content and structure of three Python files related to a step called `FilterBySimilarity` within a larger system (possibly an ML pipeline).
 4 | 
 5 | ### Inputs
 6 | - `FilterBySimilarityInputs` class defines the expected input structure for the step, including:
 7 |   - `list`: A list of dictionaries.
 8 |   - `keywords`: A string annotated as configuration data.
 9 |   - `keys`: A string annotated as configuration data.
10 |   - `top_k`: An integer annotated as configuration data.
11 | 
12 | ### Outputs
13 | - `FilterBySimilarityOutputs` class defines the output structure for the step, including:
14 |   - `result_list`: A list of dictionaries containing filtered items based on similarity.
15 | 
16 | ### Code Functionality
17 | - The code within `FilterBySimilarity.py` file implements the logic for the `FilterBySimilarity` step.
18 | - It utilizes TF-IDF vectorization and cosine similarity to calculate the similarity between provided keywords and text items in the input list of dictionaries.
19 | - The step function processes the input data, calculates similarity scores, and returns a filtered list of items based on similarity.
20 | - A logger and several helper functions are used for processing input data and performing necessary calculations.
21 | - The file `__init__.py` is empty, serving as an initialization file for the package but does not contain any code logic.


--------------------------------------------------------------------------------
/patchwork/common/tools/github_tool.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import os
 4 | import subprocess
 5 | 
 6 | from patchwork.common.tools.tool import Tool
 7 | 
 8 | 
 9 | class GitHubTool(Tool, tool_name="github_tool",  abc_register=False):
10 |     def __init__(self, path: str, gh_token: str):
11 |         super().__init__()
12 |         self.path = path
13 |         self.gh_token = gh_token
14 | 
15 |     @property
16 |     def json_schema(self) -> dict:
17 |         return {
18 |             "name": "github_tool",
19 |             "description": """\
20 | Access to the GitHub CLI, the command is also `gh` all args provided are used as is
21 | """,
22 |             "input_schema": {
23 |                 "type": "object",
24 |                 "properties": {
25 |                     "args": {
26 |                         "type": "array",
27 |                         "items": {"type": "string"},
28 |                         "description": "The args to run `gh` command with.",
29 |                     }
30 |                 },
31 |                 "required": ["args"],
32 |             },
33 |         }
34 | 
35 |     def execute(self, args: list[str]) -> str:
36 |         env = os.environ.copy()
37 |         env["GH_TOKEN"] = self.gh_token
38 |         p = subprocess.run(
39 |             ["gh", *args],
40 |             env=env,
41 |             cwd=self.path,
42 |             text=True,
43 |             stdout=subprocess.PIPE,
44 |             stderr=subprocess.STDOUT,
45 |         )
46 |         return p.stdout
47 | 


--------------------------------------------------------------------------------
/tests/steps/test_ReadIssues.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from patchwork.steps import ReadIssues
 4 | 
 5 | 
 6 | @pytest.mark.parametrize(
 7 |     "inputs_extra,method_path,issue_texts",
 8 |     [
 9 |         (
10 |             {"github_api_key": "key"},
11 |             "patchwork.common.client.scm.GithubClient.find_issue_by_url",
12 |             dict(
13 |                 title="",
14 |                 body="github pr body",
15 |                 comments=["nothing", "there"],
16 |                 description="Title:\n\n\nDescription:\ngithub pr body\n",
17 |             ),
18 |         ),
19 |         (
20 |             {"gitlab_api_key": "key"},
21 |             "patchwork.common.client.scm.GitlabClient.find_issue_by_url",
22 |             dict(
23 |                 title="gitlab pr title",
24 |                 body="",
25 |                 comments=["something", "here"],
26 |                 description="Title:\ngitlab pr title\n\nDescription:\n\n",
27 |             ),
28 |         ),
29 |     ],
30 | )
31 | def test_read_issues(mocker, inputs_extra, method_path, issue_texts):
32 |     # Set up
33 |     base_inputs = {"issue_url": "https://example.com/issue"}
34 |     inputs = {**base_inputs, **inputs_extra}
35 | 
36 |     mocked_scm_client = mocker.patch(method_path)
37 |     mocked_scm_client.return_value = issue_texts
38 | 
39 |     # Actual Run
40 |     read_issues = ReadIssues(inputs)
41 |     results = read_issues.run()
42 | 
43 |     # Assertions
44 |     assert results == {f"issue_{key}": value for key, value in issue_texts.items()}
45 | 


--------------------------------------------------------------------------------
/patchwork/steps/ModifyCodeOnce/ModifyCodeOnce.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from patchwork.step import Step, StepStatus
 4 | from patchwork.steps.ModifyCode.ModifyCode import ModifyCode
 5 | from patchwork.steps.ModifyCodeOnce.typed import (
 6 |     ModifyCodeOnceInputs,
 7 |     ModifyCodeOnceOutputs,
 8 | )
 9 | 
10 | 
11 | class ModifyCodeOnce(Step, input_class=ModifyCodeOnceInputs, output_class=ModifyCodeOnceOutputs):
12 |     def __init__(self, inputs: dict):
13 |         super().__init__(inputs)
14 |         self.file_path = inputs["file_path"]
15 |         self.start_line = inputs.get("start_line")
16 |         self.end_line = inputs.get("end_line")
17 |         self.patch = inputs.get("new_code")
18 | 
19 |     def run(self) -> dict:
20 |         if self.patch is None:
21 |             self.set_status(StepStatus.SKIPPED, "No patch provided")
22 |             return {}
23 | 
24 |         modify_code = ModifyCode(
25 |             {
26 |                 "files_to_patch": [
27 |                     dict(
28 |                         uri=self.file_path,
29 |                         startLine=self.start_line,
30 |                         endLine=self.end_line,
31 |                     )
32 |                 ],
33 |                 "extracted_responses": [
34 |                     dict(
35 |                         patch=self.patch,
36 |                     )
37 |                 ],
38 |             }
39 |         )
40 |         modified_code_files = modify_code.run()
41 |         return modified_code_files.get("modified_code_files", [{}])[0]
42 | 


--------------------------------------------------------------------------------
/tests/steps/test_ExtractModelResponse.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from patchwork.steps.ExtractModelResponse.ExtractModelResponse import (
 4 |     ExtractModelResponse,
 5 | )
 6 | 
 7 | 
 8 | @pytest.fixture
 9 | def sample_inputs():
10 |     return {
11 |         "openai_responses": ["partition1response1partition2", "response2partition3"],
12 |         "response_partitions": {"key1": ["partition1", "partition2"], "key2": ["partition3"]},
13 |     }
14 | 
15 | 
16 | def test_init_required_keys(sample_inputs):
17 |     step = ExtractModelResponse(sample_inputs)
18 |     assert step.openai_responses == sample_inputs["openai_responses"]
19 |     assert step.partitions == sample_inputs["response_partitions"]
20 | 
21 | 
22 | def test_init_missing_required_keys():
23 |     with pytest.raises(ValueError):
24 |         ExtractModelResponse({})
25 | 
26 | 
27 | def test_run_no_partitions(sample_inputs):
28 |     step = ExtractModelResponse({**sample_inputs, "response_partitions": {}})
29 |     output = step.run()
30 |     assert len(output["extracted_responses"]) == 2
31 |     assert output["extracted_responses"][0]["anyKeyHere"] == "partition1response1partition2"
32 |     assert output["extracted_responses"][1]["kEy"] == "response2partition3"
33 | 
34 | 
35 | def test_run_with_partitions(sample_inputs):
36 |     step = ExtractModelResponse(sample_inputs)
37 |     output = step.run()
38 |     assert len(output["extracted_responses"]) == 2
39 |     assert output["extracted_responses"][0]["key1"] == "response1"
40 |     assert output["extracted_responses"][1]["key2"] == "response2"
41 | 


--------------------------------------------------------------------------------
/patchwork/steps/ScanSonar/typed.py:
--------------------------------------------------------------------------------
 1 | from typing_extensions import Annotated, List, TypedDict
 2 | 
 3 | from patchwork.common.utils.step_typing import StepTypeConfig
 4 | 
 5 | 
 6 | class __ScanSonarRequiredInputs(TypedDict):
 7 |     sonarqube_api_key: Annotated[
 8 |         str,
 9 |         StepTypeConfig(
10 |             is_config=True,
11 |             msg="""SonarQube access token not found.
12 | Please generate an access token in your SonarQube instance and add `--sonarqube_api_key=<token>` to the command line.""",
13 |         ),
14 |     ]
15 |     sonarqube_project_key: Annotated[
16 |         str,
17 |         StepTypeConfig(
18 |             is_config=True,
19 |             msg="""SonarQube project key not found.
20 | Please provide your project key using `--sonarqube_project_key=<key>`.
21 | You can find this in your SonarQube project settings.""",
22 |         ),
23 |     ]
24 | 
25 | 
26 | class ScanSonarInputs(__ScanSonarRequiredInputs, total=False):
27 |     sonarqube_base_url: Annotated[
28 |         str,
29 |         StepTypeConfig(
30 |             is_config=True,
31 |             msg="""SonarQube base URL not found.
32 | Please provide the URL of your SonarQube instance using `--sonarqube_base_url=<url>`.
33 | For SonarCloud, this would be https://sonarcloud.io""",
34 |         ),
35 |     ]
36 | 
37 | 
38 | class SonarVulnerability(TypedDict):
39 |     uri: str
40 |     startLine: int
41 |     endLine: int
42 |     affectedCode: str
43 |     messageText: str
44 | 
45 | 
46 | class ScanSonarOutputs(TypedDict):
47 |     files_to_patch: List[SonarVulnerability]
48 | 


--------------------------------------------------------------------------------
/patchwork/common/tools/git_tool.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import os
 4 | import subprocess
 5 | 
 6 | from patchwork.common.tools.tool import Tool
 7 | 
 8 | 
 9 | class GitTool(Tool, tool_name="git_tool",  abc_register=False):
10 |     def __init__(self, path: str):
11 |         super().__init__()
12 |         self.path = path
13 | 
14 |     @property
15 |     def json_schema(self) -> dict:
16 |         return {
17 |             "name": "git_tool",
18 |             "description": """\
19 | Access to the Git CLI, the command is also `git` all args provided are used as is.
20 | """,
21 |             "input_schema": {
22 |                 "type": "object",
23 |                 "properties": {
24 |                     "args": {
25 |                         "type": "array",
26 |                         "items": {"type": "string"},
27 |                         "description": """
28 | The args to run `git` command with. 
29 | E.g. 
30 | [\"commit\", \"-m\", \"A commit message\"] to commit changes with a commit message.
31 | [\"add\", \".\"] to stage all changed files.
32 | """,
33 |                     }
34 |                 },
35 |                 "required": ["args"],
36 |             },
37 |         }
38 | 
39 |     def execute(self, args: list[str]) -> str:
40 |         env = os.environ.copy()
41 |         p = subprocess.run(
42 |             ["git", *args],
43 |             env=env,
44 |             cwd=self.path,
45 |             text=True,
46 |             stdout=subprocess.PIPE,
47 |             stderr=subprocess.STDOUT,
48 |         )
49 |         return p.stdout
50 | 


--------------------------------------------------------------------------------
/patchwork/common/context_strategy/kotlin.py:
--------------------------------------------------------------------------------
 1 | from patchwork.common.context_strategy.languages import JavaLanguage
 2 | from patchwork.common.context_strategy.protocol import TreeSitterStrategy
 3 | 
 4 | 
 5 | class KotlinStrategy(TreeSitterStrategy):
 6 |     def __init__(self, query: str):
 7 |         """
 8 |         Initialize the kotlin searcher instance.
 9 | 
10 |         Args:
11 |         query (str): The search query string to be used for Java file search.
12 |         """
13 |         super().__init__("kotlin", query, ["kt"], JavaLanguage())
14 |         self.query = query
15 | 
16 | 
17 | class KotlinClassStrategy(KotlinStrategy):
18 |     def __init__(self):
19 |         """
20 |         Initialize the current class by calling the parent class's __init__ method.
21 |         The specific class to be initialized should have a class_declaration marked by @node.
22 |         """
23 |         super().__init__(
24 |             """
25 |             (class_declaration) @node
26 |             """.strip()
27 |         )
28 | 
29 | 
30 | class KotlinMethodStrategy(KotlinStrategy):
31 |     def __init__(self):
32 |         """
33 |         Initialize the newly created object by inheriting properties and
34 |         methods from the parent class.
35 | 
36 |         Parameters:
37 |         - self: instance of the class
38 | 
39 |         Returns:
40 |         - None
41 |         """
42 |         super().__init__(
43 |             """
44 |         [
45 |             (multiline_comment) @comment
46 |             (function_declaration) @node
47 |         ]
48 |         """.strip()
49 |         )
50 | 


--------------------------------------------------------------------------------
/patchwork/steps/SlackMessage/README.md:
--------------------------------------------------------------------------------
 1 | ## Overview
 2 | 
 3 | This documentation provides an overview of three Python files related to a module for sending messages to Slack in a development environment.
 4 | 
 5 | ### Inputs
 6 | - `SlackMessageInputs`
 7 |   - `slack_channel`: A Slack channel ID or name for sending the message.
 8 |   - `slack_token`: Authentication token for sending messages to Slack.
 9 |   - `slack_message_template_file`: Optional file path for a template message.
10 |   - `slack_message_template`: Optional template message if file path is not provided.
11 |   - `slack_message_values`: Optional key-value pairs to insert into the message template.
12 |   
13 | ### Outputs
14 | - `SlackMessageOutputs`
15 |   - `is_slack_message_sent`: Boolean indicating if the Slack message was successfully sent.
16 | 
17 | ### Files
18 | 1. **SlackMessage.py**: Contains a class `SlackMessage` that initializes with necessary inputs, validates Slack configurations, fetches the appropriate Slack channel, processes the message template, and sends the message to the Slack channel.
19 |    
20 | 2. **typed.py**: Defines the `SlackMessageInputs` and `SlackMessageOutputs` typed dictionary classes for type hinting and validation of input and output parameters.
21 | 
22 | 3. **__init__.py**: An empty file for package initialization.
23 | 
24 | The code is likely used as a step in a larger workflow or automation process to send messages to a specific Slack channel with defined templates and content. The input parameters ensure that required details for Slack communication are provided, and the outputs indicate the success of message delivery.


--------------------------------------------------------------------------------
/patchwork/steps/LLM/LLM.py:
--------------------------------------------------------------------------------
 1 | from patchwork.common.utils.utils import exclude_none_dict
 2 | from patchwork.step import Step
 3 | from patchwork.steps.CallLLM.CallLLM import CallLLM
 4 | from patchwork.steps.ExtractModelResponse.ExtractModelResponse import (
 5 |     ExtractModelResponse,
 6 | )
 7 | from patchwork.steps.LLM.typed import LLMInputs, LLMOutputs
 8 | from patchwork.steps.PreparePrompt.PreparePrompt import PreparePrompt
 9 | 
10 | 
11 | class LLM(Step, input_class=LLMInputs, output_class=LLMOutputs):
12 |     def __init__(self, inputs):
13 |         super().__init__(inputs)
14 |         self.inputs = inputs
15 | 
16 |     def run(self) -> dict:
17 |         prepare_prompt_outputs = PreparePrompt(self.inputs).run()
18 |         call_llm_outputs = CallLLM(
19 |             dict(
20 |                 prompts=prepare_prompt_outputs.get("prompts"),
21 |                 **self.inputs,
22 |             )
23 |         ).run()
24 |         extract_model_response_outputs = ExtractModelResponse(
25 |             dict(
26 |                 openai_responses=call_llm_outputs.get("openai_responses"),
27 |                 **self.inputs,
28 |             )
29 |         ).run()
30 |         return exclude_none_dict(
31 |             dict(
32 |                 prompts=prepare_prompt_outputs.get("prompts"),
33 |                 openai_responses=call_llm_outputs.get("openai_responses"),
34 |                 extracted_responses=extract_model_response_outputs.get("extracted_responses"),
35 |                 request_tokens=call_llm_outputs.get("request_tokens"),
36 |                 response_tokens=call_llm_outputs.get("response_tokens"),
37 |             )
38 |         )
39 | 


--------------------------------------------------------------------------------
/patchwork/steps/ReadIssues/ReadIssues.py:
--------------------------------------------------------------------------------
 1 | from patchwork.common.client.scm import (
 2 |     GithubClient,
 3 |     GitlabClient,
 4 |     ScmPlatformClientProtocol,
 5 | )
 6 | from patchwork.step import Step
 7 | 
 8 | 
 9 | class ReadIssues(Step):
10 |     required_keys = {"issue_url"}
11 | 
12 |     def __init__(self, inputs: dict):
13 |         super().__init__(inputs)
14 |         if not all(key in inputs.keys() for key in self.required_keys):
15 |             raise ValueError(f'Missing required data: "{self.required_keys}"')
16 | 
17 |         self.scm_client: ScmPlatformClientProtocol
18 |         if "github_api_key" in inputs.keys():
19 |             self.scm_client = GithubClient(inputs["github_api_key"])
20 |         elif "gitlab_api_key" in inputs.keys():
21 |             self.scm_client = GitlabClient(inputs["gitlab_api_key"])
22 |         else:
23 |             raise ValueError(f'Missing required input data: "github_api_key" or "gitlab_api_key"')
24 | 
25 |         if "scm_url" in inputs.keys():
26 |             self.scm_client.set_url(inputs["scm_url"])
27 | 
28 |         self.issue = self.scm_client.find_issue_by_url(inputs["issue_url"])
29 |         if not self.issue:
30 |             raise ValueError(f"Could not find issue with url: {inputs['issue_url']}")
31 | 
32 |     def run(self) -> dict:
33 |         return dict(
34 |             issue_title=self.issue.get("title"),
35 |             issue_body=self.issue.get("body"),
36 |             issue_comments=self.issue.get("comments"),
37 |             issue_description=f"""\
38 | Title:
39 | {self.issue.get("title")}
40 | 
41 | Description:
42 | {self.issue.get("body")}
43 | """,
44 |         )
45 | 


--------------------------------------------------------------------------------
/patchwork/steps/CreateIssueComment/CreateIssueComment.py:
--------------------------------------------------------------------------------
 1 | from patchwork.common.client.scm import (
 2 |     GithubClient,
 3 |     GitlabClient,
 4 |     ScmPlatformClientProtocol,
 5 | )
 6 | from patchwork.step import Step, StepStatus
 7 | 
 8 | 
 9 | class CreateIssueComment(Step):
10 |     required_keys = {"issue_url", "issue_text"}
11 | 
12 |     def __init__(self, inputs: dict):
13 |         super().__init__(inputs)
14 |         if not all(key in inputs.keys() for key in self.required_keys):
15 |             raise ValueError(f'Missing required data: "{self.required_keys}"')
16 | 
17 |         self.scm_client: ScmPlatformClientProtocol
18 |         if "github_api_key" in inputs.keys():
19 |             self.scm_client = GithubClient(inputs["github_api_key"])
20 |         elif "gitlab_api_key" in inputs.keys():
21 |             self.scm_client = GitlabClient(inputs["gitlab_api_key"])
22 |         else:
23 |             raise ValueError(f'Missing required input data: "github_api_key" or "gitlab_api_key"')
24 | 
25 |         if "scm_url" in inputs.keys():
26 |             self.scm_client.set_url(inputs["scm_url"])
27 | 
28 |         self.issue_text = inputs["issue_text"]
29 |         self.issue_url = inputs["issue_url"]
30 | 
31 |     def run(self) -> dict:
32 |         try:
33 |             slug, issue_id = self.scm_client.get_slug_and_id_from_url(self.issue_url)
34 |             url = self.scm_client.create_issue_comment(slug, self.issue_text, issue_id=issue_id)
35 |         except Exception as e:
36 |             self.set_status(StepStatus.FAILED, f"Failed to create issue comment")
37 |             raise e
38 | 
39 |         return dict(issue_comment_url=url)
40 | 


--------------------------------------------------------------------------------
/patchwork/steps/ScanDepscan/README.md:
--------------------------------------------------------------------------------
 1 | The provided files contain Python code related to managing scanning for dependencies and generating software bill of materials reports. 
 2 | 
 3 | ### Inputs:
 4 | - The `ScanDepscan` class takes a dictionary of inputs in its constructor. The specific content of this dictionary is not detailed and may vary based on the class requirements.
 5 | - The `test_run` function in the test file does not directly take any inputs, but it creates a temporary package lock file for testing purposes.
 6 | 
 7 | ### Outputs:
 8 | - The `ScanDepscan` class has a `run` method that executes the `depscan` tool to generate a software bill of materials report. It returns a dictionary containing the path to the generated report file.
 9 | - The `test_run` function in the test file executes the `run` method of the `ScanDepscan` class and verifies the existence and validity of the generated software bill of materials report.
10 | - The main output of interest in this context is the path to the SBOM report file produced by the `ScanDepscan` class.
11 | 
12 | ### Usage:
13 | - The `ScanDepscan` class is designed to check for the presence of the `cdxgen` tool, install it if necessary, and then run the `depscan` tool to generate SBOM reports based on the specified inputs.
14 | - The `test_run` function in the test file serves to validate the functionality of the `ScanDepscan` class by creating a mock environment, running the `ScanDepscan` class, and verifying the generated SBOM report.
15 | 
16 | The code is intended to be used in a larger system or workflow where scanning dependencies and generating SBOM reports are part of the development or security processes.


--------------------------------------------------------------------------------
/patchwork/common/context_strategy/position.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from attrs import Factory, define
 4 | 
 5 | from patchwork.common.context_strategy.languages import LanguageProtocol
 6 | 
 7 | 
 8 | @define
 9 | class Position:
10 |     start: int
11 |     end: int
12 |     start_col: int
13 |     end_col: int
14 |     language: LanguageProtocol
15 |     meta_positions: dict[str, "Position"] = Factory(dict)
16 | 
17 |     # def extract_lines(self, src: list[str]) -> list[str]:
18 |     #     return src[self.start : self.end]
19 |     #
20 |     # def extract_text(self, src: list[str]) -> list[str]:
21 |     #     lines = self.extract_lines(src)
22 |     #     lines[0] = lines[0][self.start_col :]
23 |     #     lines[-1] = lines[-1][: self.end_col]
24 |     #     return lines
25 |     #
26 |     # @contextlib.contextmanager
27 |     # def replace_text(self, src: list[str]) -> list[str]:
28 |     #     container = self.extract_text(src)
29 |     #     yield container
30 |     #     src[self.start] = src[self.start][: self.start_col + 1] + container[0]
31 |     #     src[self.start + 1 : self.end - 2] = container[1:-1]
32 |     #     src[self.end - 1] = src[self.end - 1][self.end_col - 1 :] + container[-1]
33 |     #     return
34 | 
35 | 
36 | # @dataclasses.dataclass(slots=True, frozen=True)
37 | # class FileSource:
38 | #     filepath: Path
39 | #     src: list[str]
40 | #
41 | #     @contextlib.contextmanager
42 | #     def replace_text(self, position: Position) -> list[str]:
43 | #         with position.replace_text(self.src) as container:
44 | #             yield container
45 | #         return
46 | #
47 | #     def write(self):
48 | #         self.filepath.write_text("".join(self.src))
49 | 


--------------------------------------------------------------------------------
/patchwork/steps/SimplifiedLLMOnce/SimplifiedLLMOnce.py:
--------------------------------------------------------------------------------
 1 | from patchwork.step import Step
 2 | from patchwork.steps.SimplifiedLLM.SimplifiedLLM import SimplifiedLLM
 3 | from patchwork.steps.SimplifiedLLMOnce.typed import (
 4 |     SimplifiedLLMOnceInputs,
 5 |     SimplifiedLLMOnceOutputs,
 6 | )
 7 | 
 8 | 
 9 | class SimplifiedLLMOnce(Step, input_class=SimplifiedLLMOnceInputs, output_class=SimplifiedLLMOnceOutputs):
10 |     def __init__(self, inputs):
11 |         super().__init__(inputs)
12 | 
13 |         self.user = inputs["user_prompt"]
14 |         self.system = inputs.get("system_prompt")
15 |         self.prompt_value = inputs["prompt_value"]
16 |         self.json_example = inputs["json_schema"]
17 |         self.inputs = inputs
18 | 
19 |     def run(self) -> dict:
20 |         if self.system is not None:
21 |             prompt_dict = dict(
22 |                 prompt_system=self.system,
23 |                 prompt_user=self.user,
24 |             )
25 |         else:
26 |             prompt_dict = dict(
27 |                 prompt_user=self.user,
28 |             )
29 | 
30 |         llm = SimplifiedLLM(
31 |             {
32 |                 **self.inputs,
33 |                 **prompt_dict,
34 |                 "prompt_values": [self.prompt_value],
35 |                 "json": True,
36 |                 "json_example": self.json_example,
37 |             }
38 |         )
39 |         llm_output = llm.run()
40 |         self.set_status(llm.status, llm.status_message)
41 | 
42 |         return {
43 |             **llm_output.get("extracted_responses")[0],
44 |             "request_tokens": llm_output.get("request_tokens")[0],
45 |             "response_tokens": llm_output.get("response_tokens")[0],
46 |         }
47 | 


--------------------------------------------------------------------------------
/patchwork/steps/CreateIssue/CreateIssue.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | import git
 4 | 
 5 | from patchwork.common.client.scm import (
 6 |     GithubClient,
 7 |     GitlabClient,
 8 |     ScmPlatformClientProtocol,
 9 |     get_slug_from_remote_url,
10 | )
11 | from patchwork.step import Step
12 | 
13 | 
14 | class CreateIssue(Step):
15 |     required_keys = {"issue_title", "issue_text"}
16 | 
17 |     def __init__(self, inputs: dict):
18 |         super().__init__(inputs)
19 |         if not all(key in inputs.keys() for key in self.required_keys):
20 |             raise ValueError(f'Missing required data: "{self.required_keys}"')
21 | 
22 |         self.scm_client: ScmPlatformClientProtocol
23 |         if "github_api_key" in inputs.keys():
24 |             self.scm_client = GithubClient(inputs["github_api_key"])
25 |         elif "gitlab_api_key" in inputs.keys():
26 |             self.scm_client = GitlabClient(inputs["gitlab_api_key"])
27 |         else:
28 |             raise ValueError(f'Missing required input data: "github_api_key" or "gitlab_api_key"')
29 | 
30 |         if "scm_url" in inputs.keys():
31 |             self.scm_client.set_url(inputs["scm_url"])
32 | 
33 |         self.issue_title = inputs["issue_title"]
34 |         self.issue_text = inputs["issue_text"]
35 | 
36 |     def run(self) -> dict:
37 |         repo = git.Repo(Path.cwd(), search_parent_directories=True)
38 | 
39 |         original_remote_name = "origin"
40 |         original_remote_url = repo.remotes[original_remote_name].url
41 |         slug = get_slug_from_remote_url(original_remote_url)
42 |         url = self.scm_client.create_issue_comment(slug, self.issue_text, title=self.issue_title)
43 | 
44 |         return dict(issue_url=url)
45 | 


--------------------------------------------------------------------------------
/patchwork/steps/Combine/Combine.py:
--------------------------------------------------------------------------------
 1 | import itertools
 2 | 
 3 | from patchwork.step import Step
 4 | from patchwork.steps.Combine.typed import CombineInputs
 5 | 
 6 | 
 7 | class Combine(Step):
 8 |     def __init__(self, inputs):
 9 |         super().__init__(inputs)
10 |         missing_keys = CombineInputs.__required_keys__.difference(inputs.keys())
11 |         if len(missing_keys) > 0:
12 |             raise ValueError(f"Missing required data: {missing_keys}")
13 | 
14 |         self.base = inputs["base_json"]
15 |         self.update = inputs["update_json"]
16 | 
17 |     def run(self):
18 |         base_list = isinstance(self.base, list)
19 |         update_list = isinstance(self.update, list)
20 |         if not base_list and not update_list:
21 |             return {**self.base, **self.update}
22 | 
23 |         if base_list and update_list:
24 |             final_output = []
25 |             for item_1, item_2 in itertools.zip_longest(self.base, self.update):
26 |                 if item_1 is None:
27 |                     final_output.append(item_2)
28 |                 elif item_2 is None:
29 |                     final_output.append(item_1)
30 |                 else:
31 |                     final_output.append({**item_1, **item_2})
32 |             return dict(result_json=final_output)
33 | 
34 |         if base_list:
35 |             list_json = self.base
36 |             additional_json = self.update
37 |             combiner = lambda base, update: {**base, **update}
38 |         else:
39 |             list_json = self.update
40 |             additional_json = self.base
41 |             combiner = lambda update, base: {**base, **update}
42 | 
43 |         return dict(result_json=[combiner(item, additional_json) for item in list_json])
44 | 


--------------------------------------------------------------------------------
/patchwork/steps/PR/typed.py:
--------------------------------------------------------------------------------
 1 | from typing_extensions import Annotated, Dict, List, TypedDict
 2 | 
 3 | from patchwork.common.utils.step_typing import StepTypeConfig
 4 | 
 5 | 
 6 | class ModifiedCodeFile(TypedDict):
 7 |     path: str
 8 |     commit_message: str
 9 |     patch_message: str
10 | 
11 | 
12 | class PRInputs(TypedDict, total=False):
13 |     # CommitChangesInputs & PreparePRInputs
14 |     modified_code_files: List["ModifiedCodeFile"]
15 |     # modified_code_files alternative
16 |     modified_files: List[Dict]
17 |     path_key: str
18 |     comment_title_key: str
19 |     comment_message_key: str
20 |     # CommitChangesInputs
21 |     disable_branch: Annotated[bool, StepTypeConfig(is_config=True)]
22 |     force_branch_creation: Annotated[bool, StepTypeConfig(is_config=True)]
23 |     branch_prefix: Annotated[str, StepTypeConfig(is_config=True)]
24 |     branch_suffix: Annotated[str, StepTypeConfig(is_config=True)]
25 |     # PreparePRInputs
26 |     pr_header: Annotated[str, StepTypeConfig(is_config=True)]
27 |     # CreatePRInputs
28 |     pr_title: Annotated[str, StepTypeConfig(is_config=True)]
29 |     force_pr_creation: Annotated[bool, StepTypeConfig(is_config=True)]
30 |     disable_pr: Annotated[bool, StepTypeConfig(is_config=True)]
31 |     scm_url: Annotated[str, StepTypeConfig(is_config=True)]
32 |     gitlab_api_key: Annotated[str, StepTypeConfig(is_config=True)]
33 |     github_api_key: Annotated[str, StepTypeConfig(is_config=True)]
34 |     issue_url: Annotated[str, StepTypeConfig(is_config=True)]
35 | 
36 | 
37 | class PROutputs(TypedDict):
38 |     # CommitChangesOutputs
39 |     base_branch: str
40 |     target_branch: str
41 |     # PreparePROutputs
42 |     pr_body: str
43 |     # CreatePROutputs
44 |     pr_url: str
45 | 


--------------------------------------------------------------------------------
/tests/steps/test_CreateIssue.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from patchwork.steps.CreateIssue.CreateIssue import CreateIssue
 4 | 
 5 | 
 6 | @pytest.mark.parametrize(
 7 |     "inputs",
 8 |     [
 9 |         {"issue_title": "my issue", "issue_text": "my issue text", "scm_url": "https://github.com/my/repo"},
10 |         {"issue_title": "my issue", "scm_url": "https://github.com/my/repo", "github_api_key": "my api key"},
11 |         {"issue_text": "my issue text", "scm_url": "https://github.com/my/repo", "github_api_key": "my api key"},
12 |     ],
13 | )
14 | def test_init_missing_required_keys(inputs):
15 |     with pytest.raises(ValueError) as e:
16 |         CreateIssue(inputs)
17 | 
18 | 
19 | def test_init_required_keys():
20 |     inputs = {
21 |         "issue_title": "my issue",
22 |         "issue_text": "my issue text",
23 |         "scm_url": "https://github.com/my/repo",
24 |         "github_api_key": "my api key",
25 |     }
26 |     create_issue = CreateIssue(inputs)
27 |     assert create_issue.issue_title == "my issue"
28 |     assert create_issue.issue_text == "my issue text"
29 | 
30 | 
31 | def test_run(mocker):
32 |     inputs = {
33 |         "issue_title": "my issue",
34 |         "issue_text": "my issue text",
35 |         "scm_url": "https://github.com/my/repo",
36 |         "github_api_key": "my api key",
37 |     }
38 |     mocked_create_issue_comment = mocker.patch("patchwork.common.client.scm.GithubClient.create_issue_comment")
39 |     mocked_create_issue_comment.return_value = "https://github.com/my/repo/issues/1"
40 | 
41 |     create_issue = CreateIssue(inputs)
42 |     output = create_issue.run()
43 |     assert output["issue_url"] == "https://github.com/my/repo/issues/1"
44 |     assert create_issue.scm_client is not None
45 | 


--------------------------------------------------------------------------------
/tests/steps/test_ScanSonar.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | from unittest.mock import patch
 3 | 
 4 | import pytest
 5 | 
 6 | from patchwork.common.client.sonar import SonarClient, SonarVuln
 7 | from patchwork.steps.ScanSonar.ScanSonar import ScanSonar
 8 | 
 9 | 
10 | def test_scan_sonar():
11 |     inputs = {
12 |         "sonarqube_project_key": "test-project",
13 |         "sonarqube_api_key": "test-token",
14 |         "sonarqube_base_url": "https://sonarcloud.io",
15 |     }
16 | 
17 |     path_to_resource_file = Path(__file__).parent.parent / "cicd" / "generate_docstring" / "python_test_file.py"
18 |     mock_vulns = {str(path_to_resource_file): [SonarVuln(start=13, end=14, cwe="CWE-79", bug_msg="Test vulnerability")]}
19 | 
20 |     with patch.object(SonarClient, "find_vulns", return_value=mock_vulns):
21 |         step = ScanSonar(inputs)
22 |         result = step.run()
23 | 
24 |         assert "files_to_patch" in result
25 |         vulns = result["files_to_patch"]
26 |         assert len(vulns) == 1
27 | 
28 |         vuln = vulns[0]
29 |         assert vuln["uri"] == str(path_to_resource_file)
30 |         assert vuln["startLine"] == 0
31 |         assert vuln["endLine"] == 24
32 |         assert vuln["messageText"] == "Test vulnerability"
33 | 
34 | 
35 | def test_scan_sonar_error():
36 |     inputs = {
37 |         "sonarqube_project_key": "test-project",
38 |         "sonarqube_api_key": "test-token",
39 |         "sonarqube_base_url": "https://sonarcloud.io",
40 |     }
41 | 
42 |     with patch.object(SonarClient, "find_vulns", side_effect=Exception("Test error")):
43 |         step = ScanSonar(inputs)
44 |         with pytest.raises(Exception) as exc_info:
45 |             step.run()
46 |         assert str(exc_info.value) == "Test error"
47 | 


--------------------------------------------------------------------------------
/tests/steps/test_PreparePR.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from patchwork.steps import PreparePR
 4 | 
 5 | 
 6 | @pytest.fixture
 7 | def prepare_pr_instance():
 8 |     inputs = {
 9 |         "modified_code_files": [
10 |             {"path": "file1", "start_line": 1, "end_line": 2, "commit_message": "commit msg"},
11 |             {"path": "file2", "patch_message": "patch msg"},
12 |             {"path": "file1", "start_line": 3, "end_line": 4, "commit_message": "commit msg"},
13 |         ]
14 |     }
15 |     return PreparePR(inputs)
16 | 
17 | 
18 | def test_init_inputs(prepare_pr_instance):
19 |     assert prepare_pr_instance.modified_code_files == [
20 |         {"path": "file1", "start_line": 1, "end_line": 2, "commit_message": "commit msg"},
21 |         {"path": "file2", "patch_message": "patch msg"},
22 |         {"path": "file1", "start_line": 3, "end_line": 4, "commit_message": "commit msg"},
23 |     ]
24 | 
25 | 
26 | def test_run(prepare_pr_instance):
27 |     result = prepare_pr_instance.run()
28 |     assert "pr_body" in result
29 |     assert result["pr_body"].startswith(prepare_pr_instance.header)
30 | 
31 | 
32 | def test_run_no_modified_files():
33 |     inputs = {"modified_code_files": []}
34 |     prepare_pr_instance = PreparePR(inputs)
35 |     result = prepare_pr_instance.run()
36 |     assert result["pr_body"] == ""
37 |     assert prepare_pr_instance.status.name == "SKIPPED"
38 | 
39 | 
40 | def test_init_missing_required_keys():
41 |     with pytest.raises(ValueError):
42 |         PreparePR({})
43 | 
44 | 
45 | def test_run_pr_header_override():
46 |     inputs = {
47 |         "modified_code_files": [{"path": "file1"}],
48 |         "pr_header": "Custom PR header",
49 |     }
50 |     prepare_pr_instance = PreparePR(inputs)
51 |     result = prepare_pr_instance.run()
52 |     assert result["pr_body"].startswith("Custom PR header")
53 | 


--------------------------------------------------------------------------------
/patchwork/steps/CreatePRComment/CreatePRComment.py:
--------------------------------------------------------------------------------
 1 | from patchwork.common.client.scm import AzureDevopsClient, GithubClient, GitlabClient
 2 | from patchwork.logger import logger
 3 | from patchwork.step import Step, StepStatus
 4 | 
 5 | 
 6 | class CreatePRComment(Step):
 7 |     required_keys = {"pr_url", "pr_comment"}
 8 | 
 9 |     def __init__(self, inputs: dict):
10 |         super().__init__(inputs)
11 |         if not all(key in inputs.keys() for key in self.required_keys):
12 |             raise ValueError(f'Missing required data: "{self.required_keys}"')
13 | 
14 |         if "github_api_key" in inputs.keys():
15 |             self.scm_client = GithubClient(inputs["github_api_key"])
16 |         elif "gitlab_api_key" in inputs.keys():
17 |             self.scm_client = GitlabClient(inputs["gitlab_api_key"])
18 |         elif "azuredevops_api_key" in inputs.keys():
19 |             self.scm_client = AzureDevopsClient(inputs["azuredevops_api_key"])
20 |         else:
21 |             raise ValueError(f'Missing required input data: "github_api_key" or "gitlab_api_key"')
22 | 
23 |         if "scm_url" in inputs.keys():
24 |             self.scm_client.set_url(inputs["scm_url"])
25 | 
26 |         self.pr = self.scm_client.get_pr_by_url(inputs["pr_url"])
27 |         self.pr_comment = inputs["pr_comment"]
28 |         self.noisy = bool(inputs.get("noisy_comments", False))
29 | 
30 |     def run(self) -> dict:
31 |         if not self.noisy:
32 |             self.pr.reset_comments()
33 | 
34 |         comment = self.pr.create_comment(body=self.pr_comment)
35 |         if comment is None:
36 |             self.set_status(StepStatus.FAILED)
37 |             logger.error(f"Failed to create comment: {self.pr_comment}")
38 |         else:
39 |             logger.info(f"Comment created for PR: {self.pr.url()}")
40 | 
41 |         return dict(pr_url=self.pr.url())
42 | 


--------------------------------------------------------------------------------
/patchwork/steps/AgenticLLMV2/AgenticLLMV2.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | from patchwork.common.client.llm.aio import AioLlmClient
 4 | from patchwork.common.multiturn_strategy.agentic_strategy_v2 import (
 5 |     AgentConfig,
 6 |     AgenticStrategyV2,
 7 | )
 8 | from patchwork.common.tools import Tool
 9 | from patchwork.step import Step
10 | from patchwork.steps.AgenticLLMV2.typed import AgenticLLMV2Inputs, AgenticLLMV2Outputs
11 | 
12 | 
13 | class AgenticLLMV2(Step, input_class=AgenticLLMV2Inputs, output_class=AgenticLLMV2Outputs):
14 |     def __init__(self, inputs):
15 |         super().__init__(inputs)
16 |         base_path = inputs.get("base_path")
17 |         if base_path is None:
18 |             base_path = str(Path.cwd())
19 |         self.conversation_limit = int(inputs.get("max_agent_calls", 1))
20 |         self.agentic_strategy = AgenticStrategyV2(
21 |             model=inputs.get("strategy_model", "claude-3-5-sonnet-latest"),
22 |             llm_client=AioLlmClient.create_aio_client(inputs),
23 |             template_data=inputs.get("prompt_value", {}),
24 |             system_prompt_template=inputs.get("system_prompt", "Summarise from our previous conversation"),
25 |             user_prompt_template=inputs.get("user_prompt"),
26 |             agent_configs=[
27 |                 AgentConfig(
28 |                     name="Assistant",
29 |                     model=inputs.get("agent_model", "claude-3-7-sonnet-latest"),
30 |                     tool_set=Tool.get_tools(path=base_path),
31 |                     system_prompt=inputs.get("agent_system_prompt"),
32 |                 )
33 |             ],
34 |             example_json=inputs.get("example_json"),
35 |         )
36 | 
37 |     def run(self) -> dict:
38 |         result = self.agentic_strategy.execute(limit=self.conversation_limit)
39 |         return {**result, **self.agentic_strategy.usage()}
40 | 


--------------------------------------------------------------------------------
/patchwork/steps/README.md:
--------------------------------------------------------------------------------
 1 | # Creating a Step in Patchwork
 2 | 
 3 | A step's name is ideally expected to be a verb with two words, e.g. "Run Example".
 4 | 
 5 | To create a step to to "Run example":
 6 | 
 7 | 1. Create a file `patchwork/steps/RunExample/RunExample.py`.
 8 |     Any additional files required for the `RunExample` should be placed in the same folder.
 9 | 
10 | 2.  In `RunExample.py`, implement the class `RunExample` which inherits the `Step` from the `patchwork.step` module. The class `RunExample` is expected to have two methods:
11 |     - `__init__`: The constructor of the class. It should accept an `inputs` dictionary as an argument. This dictionary contains the input data for the step. Checking of the presence and validity of the input data should be done here.
12 |     - `run`: The main method of the class. This method should contain the logic of the step and return a dictionary with the results.
13 | 
14 | 3. Update `patchwork/steps/__init__.py` to include the new step in the list of available steps.
15 | 
16 | ## Example
17 | 
18 | ### Path: `patchwork/steps/RunExample/RunExample.py`
19 | 
20 | ```python
21 | from patchwork.step import Step
22 | 
23 | 
24 | class RunExample(Step):
25 |     required_keys = ['input1', 'input2']
26 | 
27 |     def __init__(self, inputs):
28 |         super().__init__(inputs)
29 |         for key in self.required_keys:
30 |             if key not in inputs:
31 |                 raise ValueError(f"Missing required input: {key}")
32 | 
33 |         self.input1 = inputs['input1']
34 |         self.input2 = inputs['input2']
35 | 
36 |     def run(self) -> dict:
37 |         return {
38 |             'output1': "example output",
39 |         }
40 | ```
41 | 
42 | ### Path: `patchwork/steps/__init__.py`
43 | 
44 | ```python
45 | from .RunExample.RunExample import RunExample
46 | 
47 | __all__ = [
48 |     ...
49 |     'RunExample',
50 | ]
51 | ```
52 | 


--------------------------------------------------------------------------------
/tests/steps/test_ReadPRDiffs.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from patchwork.common.client.scm import PullRequestProtocol
 4 | from patchwork.steps.ReadPRDiffs.ReadPRDiffs import _IGNORED_EXTENSIONS, ReadPRDiffs
 5 | 
 6 | 
 7 | @pytest.mark.parametrize(
 8 |     "inputs_extra,method_path,texts,expected",
 9 |     [
10 |         (
11 |             {"github_api_key": "key"},
12 |             "patchwork.common.client.scm.GithubClient.get_pr_by_url",
13 |             dict(title="this", body="", comments=[], diffs=dict(path="diff")),
14 |             dict(title="this", body="", comments=[], diffs=[dict(path="path", diff="diff")]),
15 |         ),
16 |         (
17 |             {"gitlab_api_key": "key"},
18 |             "patchwork.common.client.scm.GitlabClient.get_pr_by_url",
19 |             dict(title="", body="that", comments=[], diffs=dict(path="diff")),
20 |             dict(title="", body="that", comments=[], diffs=[dict(path="path", diff="diff")]),
21 |         ),
22 |         (
23 |             {"github_api_key": "key"},
24 |             "patchwork.common.client.scm.GithubClient.get_pr_by_url",
25 |             dict(title="", body="", comments=[], diffs={f"path{ext}": "diff" for ext in _IGNORED_EXTENSIONS}),
26 |             dict(title="", body="", comments=[], diffs=[]),
27 |         ),
28 |     ],
29 | )
30 | def test_read_prdiffs(mocker, inputs_extra, method_path, texts, expected):
31 |     # Set up
32 |     base_inputs = {"pr_url": "https://example.com/pr"}
33 |     inputs = {**base_inputs, **inputs_extra}
34 | 
35 |     mocked_pr = mocker.Mock(spec=PullRequestProtocol)
36 |     mocked_pr.texts.return_value = texts
37 |     mocked_scm_client = mocker.patch(method_path)
38 |     mocked_scm_client.return_value = mocked_pr
39 | 
40 |     # Actual Run
41 |     read_pr_diffs = ReadPRDiffs(inputs)
42 |     results = read_pr_diffs.run()
43 | 
44 |     # Assertions
45 |     assert results == expected
46 | 


--------------------------------------------------------------------------------
/patchwork/steps/ExtractPackageManagerFile/README.md:
--------------------------------------------------------------------------------
 1 | # Extract Package Manager File
 2 | 
 3 | ## Inputs
 4 | 
 5 | - **Package Manager File Extraction**:
 6 |     - Directory: Root directory of the project.
 7 |     - Package URL: The Package URL of the dependency.
 8 | 
 9 | ## Outputs
10 | 
11 | - A list of paths to package manager files relevant to the PURL's type found in the specified directory.
12 | - A dictionary containing paths to the generated prompt value and code files.
13 | - Log messages during the extraction process.
14 | 
15 | ### Description
16 | 
17 | This Python code consists of three files:
18 | 1. **__init__.py**: Empty file.
19 | 2. **ExtractPackageManagerFile.py**: Defines functions for extracting package manager files based on PackageURL types, transforming version strings to Semantic Versioning format, and extracting relevant data from SBOM VDR files.
20 | 3. **TestExtractPackageManagerFile.py**: Contains unit tests for the `ExtractPackageManagerFile` class.
21 | 
22 | The `ExtractPackageManagerFile` class initializes input parameters, validates required keys, and processes SBOM VDR data to extract component and vulnerability information. It associates PURLs with source file paths, identifies affected and unaffected versions, and compiles this data. It further saves the extracted data as a temporary JSON file and logs execution status.
23 | 
24 | The `run()` method of the `ExtractPackageManagerFile` class loads SBOM VDR data, maps PURLs to source files, processes vulnerabilities, reads source file contents, and compiles the data structure. It generates message updates, prepares update information, and saves data to a JSON file.
25 | 
26 | The test cases in `TestExtractPackageManagerFile.py` validate the proper functioning of the `ExtractPackageManagerFile` class by creating a temporary SBOM VDR file, executing the extraction process, and checking the generated JSON files for validity.


--------------------------------------------------------------------------------
/tests/steps/test_ScanSemgrep.py:
--------------------------------------------------------------------------------
 1 | from collections import namedtuple
 2 | 
 3 | import pytest
 4 | 
 5 | from patchwork.steps import ScanSemgrep
 6 | 
 7 | MockedCompletedProcessClass = namedtuple("CompletedProcess", ["stdout"])
 8 | 
 9 | 
10 | def test_scan_semgrep_enabled(mocker):
11 |     """
12 |     Test when sarif_file_path is not present in inputs
13 |     """
14 | 
15 |     # Test setup
16 |     inputs = {"_sarif_file_path": "not_present"}
17 |     expected_text = "{}"
18 | 
19 |     mocked_subprocess_run = mocker.patch("subprocess.run")
20 |     mocked_subprocess_run.return_value = MockedCompletedProcessClass(stdout=expected_text)
21 | 
22 |     # Actual test
23 |     scan_semgrep = ScanSemgrep(inputs)
24 |     result = scan_semgrep.run()
25 | 
26 |     # Assertions
27 |     assert result.get("sarif_values") is not None
28 |     assert result["sarif_values"] == {}
29 | 
30 | 
31 | def test_scan_semgrep_file(mocker, tmp_path):
32 |     """
33 |     Test when sarif_file_path is not present in inputs
34 |     """
35 | 
36 |     sarif_path = tmp_path / "sarif.json"
37 |     with open(sarif_path, "w") as f:
38 |         f.write("{}")
39 | 
40 |     # Test setup
41 |     inputs = {"sarif_file_path": sarif_path}
42 | 
43 |     mocked_subprocess_run = mocker.patch("subprocess.run")
44 | 
45 |     # Actual test
46 |     scan_semgrep = ScanSemgrep(inputs)
47 |     result = scan_semgrep.run()
48 | 
49 |     # Assertions
50 |     assert mocked_subprocess_run.call_count == 0
51 |     assert result.get("sarif_values") is not None
52 |     assert result["sarif_values"] == {}
53 | 
54 | 
55 | def test_scan_semgrep_raises():
56 |     """
57 |     Test when sarif_file_path is present but does not exist in inputs
58 |     """
59 |     # Test setup
60 |     inputs = {"sarif_file_path": "already_present"}
61 | 
62 |     # Actual test
63 |     # Assertions
64 |     with pytest.raises(ValueError):
65 |         ScanSemgrep(inputs)
66 | 


--------------------------------------------------------------------------------
/patchwork/common/context_strategy/java.py:
--------------------------------------------------------------------------------
 1 | from patchwork.common.context_strategy.languages import JavaLanguage
 2 | from patchwork.common.context_strategy.protocol import TreeSitterStrategy
 3 | 
 4 | 
 5 | class JavaStrategy(TreeSitterStrategy):
 6 |     def __init__(self, query: str):
 7 |         """
 8 |         Initialize the JavaSearcher instance.
 9 | 
10 |         Args:
11 |         query (str): The search query string to be used for Java file search.
12 |         """
13 |         super().__init__("java", query, [".java"], JavaLanguage())
14 |         self.query = query
15 | 
16 | 
17 | class JavaClassStrategy(JavaStrategy):
18 |     def __init__(self):
19 |         """
20 |         Initialize the current class by calling the parent class's __init__ method.
21 |         The specific class to be initialized should have a class_declaration marked by @node.
22 |         """
23 |         super().__init__(
24 |             """
25 |             (class_declaration) @node
26 |             """.strip()
27 |         )
28 | 
29 | 
30 | class JavaMethodStrategy(JavaStrategy):
31 |     def __init__(self):
32 |         """
33 |         Initialize the newly created object by inheriting properties and
34 |         methods from the parent class.
35 | 
36 |         Parameters:
37 |         - self: instance of the class
38 | 
39 |         Returns:
40 |         - None
41 |         """
42 |         super().__init__(
43 |             """
44 |         [
45 |             (block_comment) @comment
46 |             (method_declaration) @node
47 |         ]
48 |         """.strip()
49 |         )
50 | 
51 | 
52 | class JavaBlockStrategy(JavaStrategy):
53 |     def __init__(self):
54 |         """
55 |         Initialize the class by calling the parent class's constructor.
56 | 
57 |         Parameters:
58 |         - self: The object instance.
59 |         """
60 |         super().__init__(
61 |             """
62 |             (block) @node
63 |         """.strip()
64 |         )
65 | 


--------------------------------------------------------------------------------
/tests/steps/test_ScanDepscan.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import tempfile
 3 | from pathlib import Path
 4 | 
 5 | import pytest
 6 | 
 7 | from patchwork.steps import ScanDepscan
 8 | 
 9 | 
10 | @pytest.mark.skip(reason="Seeing F in CI but not locally")
11 | def test_run():
12 |     inputs = {}
13 |     # String content to be written to the package.lock file
14 |     package_lock_content = """{
15 |     "name": "example-javascript",
16 |     "version": "0.0.1",
17 |     "lockfileVersion": 3,
18 |     "requires": true,
19 |     "packages": {
20 |         "": {
21 |           "name": "example-javascript",
22 |           "version": "0.0.1",
23 |           "dependencies": {
24 |             "jquery": "3.0.0-alpha1"
25 |           }
26 |         },
27 |         "node_modules/jquery": {
28 |           "version": "3.0.0-alpha1",
29 |           "resolved": "https://registry.npmjs.org/jquery/-/jquery-3.0.0-alpha1.tgz",
30 |           "integrity": "sha512-agCHkB3RtPYzPifHRYPuxAoWFX+t09VtJKAzPOjUvts/qq5P/1SULEbdoY8hFUSS3eTY/03CMlSfaRAip0T36A=="
31 |         }
32 |     }
33 | }"""
34 | 
35 |     # Create a temporary directory and file
36 |     with tempfile.TemporaryDirectory() as temp_dir:
37 |         package_lock_path = Path(temp_dir) / "package-lock.json"
38 |         with open(package_lock_path, "w") as f:
39 |             f.write(package_lock_content)
40 | 
41 |         # Change cwd to the temporary directory
42 |         original_cwd = os.getcwd()
43 |         try:
44 |             os.chdir(temp_dir)
45 | 
46 |             # Instantiate and run the ScanDepscan step
47 |             result = ScanDepscan(inputs).run()
48 | 
49 |             # Verify the result
50 |             sbom_vdr_values = result.get("sbom_vdr_values")
51 |             assert sbom_vdr_values is not None
52 | 
53 |             # Check if the file exists and is a valid JSON
54 |             assert len(sbom_vdr_values) > 0
55 | 
56 |         finally:
57 |             # Reset cwd
58 |             os.chdir(original_cwd)
59 | 


--------------------------------------------------------------------------------
/tests/steps/test_CallAPI.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | 
 3 | import pytest
 4 | 
 5 | from patchwork.steps.CallAPI.CallAPI import CallAPI
 6 | 
 7 | 
 8 | @pytest.mark.parametrize(
 9 |     "method, path, headers, body, return_code",
10 |     [
11 |         ["GET", "/", None, None, 200],
12 |         ["POST", "/", None, None, 200],
13 |         ["POST", "/", None, None, 404],
14 |         ["POST", "/something", None, None, 200],
15 |         ["POST", "/something", {"header1": "value"}, None, 200],
16 |         ["POST", "/something", '{"header1": "value"}', None, 200],
17 |         ["POST", "/something", {"Content-Type": "text/plain"}, "something", 200],
18 |         ["POST", "/something", {"Content-Type": "text/plain"}, {"key": "value"}, 200],
19 |         ["POST", "/something", {"Content-Type": "text/plain"}, '{"key": "value"}', 200],
20 |     ],
21 | )
22 | def test_call_api_outputs(httpserver, path, method, headers, body, return_code):
23 |     response_body = "some data"
24 |     response_headers = {"Content-Type": "text/plain"}
25 |     httpserver.serve_content(content=response_body, headers=response_headers, code=return_code)
26 | 
27 |     inputs = {"url": f"{httpserver.url}{path}", "method": method}
28 |     if headers:
29 |         inputs["headers"] = headers
30 |     if body:
31 |         inputs["body"] = body
32 | 
33 |     expected_headers = None
34 |     if headers is not None:
35 |         expected_headers = headers if isinstance(headers, dict) else json.loads(headers)
36 | 
37 |     result = CallAPI(inputs).run()
38 | 
39 |     request = httpserver.requests[-1]
40 |     assert request.method == method
41 |     assert request.path == path
42 |     if expected_headers is not None:
43 |         for key, value in expected_headers.items():
44 |             assert request.headers[key] == value
45 | 
46 |     assert result["status_code"] == return_code
47 |     assert result["body"] == response_body
48 |     assert result["headers"]["Content-Type"] == response_headers["Content-Type"]
49 | 


--------------------------------------------------------------------------------
/tests/cicd/generate_docstring/cpp_test_file.cpp:
--------------------------------------------------------------------------------
 1 | #include <string>
 2 | #include <vector>
 3 | #include <random>
 4 | #include <algorithm>
 5 | #include <sqlite3.h>
 6 | 
 7 | 
 8 | template<typename T>
 9 | T a_plus_b(T a, T b) {
10 |     return a + b;
11 | }
12 | 
13 | 
14 | std::vector<std::vector<std::string>> sqlite(sqlite3* db, const std::string& query) {
15 |     std::vector<std::vector<std::string>> results;
16 |     sqlite3_stmt* stmt;
17 | 
18 |     if (sqlite3_prepare_v2(db, query.c_str(), -1, &stmt, nullptr) != SQLITE_OK) {
19 |         return results;
20 |     }
21 | 
22 |     while (sqlite3_step(stmt) == SQLITE_ROW) {
23 |         std::vector<std::string> row;
24 |         for (int i = 0; i < sqlite3_column_count(stmt); i++) {
25 |             const unsigned char* text = sqlite3_column_text(stmt, i);
26 |             if (text) {
27 |                 row.push_back(std::string(reinterpret_cast<const char*>(text)));
28 |             } else {
29 |                 row.push_back("");
30 |             }
31 |         }
32 |         results.push_back(row);
33 |     }
34 | 
35 |     sqlite3_finalize(stmt);
36 |     return results;
37 | }
38 | 
39 | 
40 | template<typename T, typename F>
41 | int compare(F key_map, const T& item1, const T& item2) {
42 |     auto val1 = key_map(item1);
43 |     auto val2 = key_map(item2);
44 | 
45 |     if (val1 < val2) return -1;
46 |     if (val1 > val2) return 1;
47 |     return 0;
48 | }
49 | 
50 | 
51 | std::string random_alphabets(int length) {
52 |     static const std::string chars =
53 |         "abcdefghijklmnopqrstuvwxyz"
54 |         "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
55 | 
56 |     static std::random_device rd;
57 |     static std::mt19937 generator(rd());
58 |     static std::uniform_int_distribution<> distribution(0, chars.size() - 1);
59 | 
60 |     std::string result;
61 |     result.reserve(length);
62 | 
63 |     for (int i = 0; i < length; ++i) {
64 |         result += chars[distribution(generator)];
65 |     }
66 | 
67 |     return result;
68 | }


--------------------------------------------------------------------------------
/patchwork/patchflows/PRReview/pr_review_prompt.json:
--------------------------------------------------------------------------------
 1 | [
 2 |   {
 3 |     "id": "diffreview",
 4 |     "prompts": [
 5 |       {
 6 |         "role": "system",
 7 |         "content": "You are a senior software engineer reviewing a pull request. You have the following information:\nTITLE:\n```\n{{title}}\n```\nBODY:\n```\n{{body}}\n```\n\nFollow the stated instruction carefully.\nINSTRUCTION:\n```\nDo one or more of the following:\nIdentify potential bugs.\nCheck if the code modifications could possibly introduce new security vulnerabilities.\nPoint out if the new code does not adhere to original coding standards in the pull request.\n```\n\nReply only in this json schema: \n{\n  \"review\": \"<Pull request review following INSTRUCTION>\"\n}"
 8 |       },
 9 |       {
10 |         "role": "user",
11 |         "content": "PATH:\n```\n{{path}}\n```\n\nDIFF:\n```\n{{diff}}\n```"
12 |       }
13 |     ]
14 |   },
15 |   {
16 |     "id": "diffreview-suggestion",
17 |     "prompts": [
18 |       {
19 |         "role": "system",
20 |         "content": "You are a senior software engineer reviewing a pull request. You have the following information:\nTITLE:\n```\n{{title}}\n```\nBODY:\n```\n{{body}}\n```\n\nFollow the stated instruction carefully.\nINSTRUCTION:\n```\nDo one or more of the following:\nIdentify potential bugs.\nCheck if the code modifications could possibly introduce new security vulnerabilities.\nPoint out if the new code does not adhere to original coding standards in the pull request.\n```\n\nReply only in this json schema: \n{\n  \"review\": \"<Pull request review following INSTRUCTION>\",\n  \"suggestion\": \"<Suggestions on improvements based on the review>\"\n}"
21 |       },
22 |       {
23 |         "role": "user",
24 |         "content": "PATH:\n```\n{{path}}\n```\n\nDIFF:\n```\n{{diff}}\n```"
25 |       }
26 |     ]
27 |   },
28 |   {
29 |     "id": "diffreview_summary",
30 |     "prompts": [
31 |       {
32 |         "role": "user",
33 |         "content": "Summarize the following pull request review in 1 paragraph. {{diffreviews}}"
34 |       }
35 |     ]
36 |   }
37 | ]


--------------------------------------------------------------------------------
/patchwork/steps/AgenticLLM/typed.py:
--------------------------------------------------------------------------------
 1 | from typing_extensions import Annotated, Any, Dict, List, TypedDict
 2 | 
 3 | from patchwork.common.utils.step_typing import StepTypeConfig
 4 | from patchwork.steps.CallLLM.CallLLM import TOKEN_URL
 5 | 
 6 | 
 7 | class AgenticLLMInputs(TypedDict, total=False):
 8 |     base_path: str
 9 |     prompt_value: Dict[str, Any]
10 |     system_prompt: str
11 |     user_prompt: str
12 |     max_llm_calls: Annotated[int, StepTypeConfig(is_config=True)]
13 |     openai_api_key: Annotated[
14 |         str,
15 |         StepTypeConfig(
16 |             is_config=True, or_op=["patched_api_key", "google_api_key", "client_is_gcp", "anthropic_api_key"]
17 |         ),
18 |     ]
19 |     anthropic_api_key: Annotated[
20 |         str,
21 |         StepTypeConfig(is_config=True, or_op=["patched_api_key", "google_api_key", "client_is_gcp", "openai_api_key"]),
22 |     ]
23 |     patched_api_key: Annotated[
24 |         str,
25 |         StepTypeConfig(
26 |             is_config=True,
27 |             or_op=["openai_api_key", "google_api_key", "client_is_gcp", "anthropic_api_key"],
28 |             msg=f"""\
29 | Model API key not found.
30 | Please login at: "{TOKEN_URL}"
31 | Please go to the Integration's tab and generate an API key.
32 | Please copy the access token that is generated, and add `--patched_api_key=<token>` to the command line.
33 | 
34 | If you are using a OpenAI API Key, please set `--openai_api_key=<token>`.""",
35 |         ),
36 |     ]
37 |     google_api_key: Annotated[
38 |         str,
39 |         StepTypeConfig(
40 |             is_config=True, or_op=["patched_api_key", "openai_api_key", "anthropic_api_key", "client_is_gcp"]
41 |         ),
42 |     ]
43 |     client_is_gcp: Annotated[
44 |         str,
45 |         StepTypeConfig(
46 |             is_config=True, or_op=["patched_api_key", "openai_api_key", "anthropic_api_key", "google_api_key"]
47 |         ),
48 |     ]
49 | 
50 | 
51 | class AgenticLLMOutputs(TypedDict):
52 |     conversation_history: List[Dict]
53 |     tool_records: List[Dict]
54 |     request_tokens: int
55 |     response_tokens: int
56 | 


--------------------------------------------------------------------------------
/patchwork/patchflows/GenerateDiagram/GenerateDiagram.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | import yaml
 4 | 
 5 | from patchwork.common.utils.step_typing import validate_steps_with_inputs
 6 | from patchwork.step import Step
 7 | from patchwork.steps import LLM, PR, CallCode2Prompt, ModifyCode
 8 | 
 9 | _DEFAULT_INPUT_FILE = Path(__file__).parent / "defaults.yml"
10 | _DEFAULT_PROMPT_JSON = Path(__file__).parent / "default_prompt.json"
11 | 
12 | 
13 | class GenerateDiagram(Step):
14 |     def __init__(self, inputs):
15 |         super().__init__(inputs)
16 | 
17 |         final_inputs = yaml.safe_load(_DEFAULT_INPUT_FILE.read_text())
18 |         if final_inputs is None:
19 |             final_inputs = {}
20 | 
21 |         final_inputs.update(inputs)
22 | 
23 |         final_inputs["prompt_id"] = "GenerateDiagram"
24 | 
25 |         if "prompt_template_file" not in final_inputs:
26 |             final_inputs["prompt_template_file"] = _DEFAULT_PROMPT_JSON
27 | 
28 |         final_inputs["pr_title"] = f"PatchWork System Architecture Diagram"
29 |         final_inputs["branch_prefix"] = f"{self.__class__.__name__.lower()}-"
30 | 
31 |         validate_steps_with_inputs(
32 |             set(final_inputs.keys()).union({"prompt_values", "files_to_patch"}), LLM, CallCode2Prompt, ModifyCode, PR
33 |         )
34 | 
35 |         self.base_path = final_inputs["base_path"]
36 |         self.inputs = final_inputs
37 | 
38 |     def run(self):
39 |         outputs = CallCode2Prompt(self.inputs).run()
40 |         outputs["uri"] = self.base_path
41 |         self.inputs["response_partitions"] = {"patch": ["```", "\n", "```"]}
42 |         self.inputs["files_to_patch"] = self.inputs["prompt_values"] = [outputs]
43 |         outputs = LLM(self.inputs).run()
44 |         self.inputs.update(outputs)
45 |         outputs = ModifyCode(self.inputs).run()
46 |         self.inputs.update(outputs)
47 |         self.inputs["pr_header"] = f"This pull request from patchwork generates system architecture diagram."
48 |         outputs = PR(self.inputs).run()
49 |         self.inputs.update(outputs)
50 | 
51 |         return self.inputs
52 | 


--------------------------------------------------------------------------------
/patchwork/steps/ReadPRDiffs/ReadPRDiffs.py:
--------------------------------------------------------------------------------
 1 | from typing_extensions import List
 2 | 
 3 | from patchwork.common.client.scm import AzureDevopsClient, GithubClient, GitlabClient
 4 | from patchwork.step import Step
 5 | from patchwork.steps.ReadPRDiffs.typed import ReadPRDiffsInputs, ReadPRDiffsOutputs
 6 | 
 7 | _IGNORED_EXTENSIONS = [
 8 |     ".png",
 9 |     ".jpg",
10 |     ".jpeg",
11 |     ".gif",
12 |     ".svg",
13 |     ".pdf",
14 |     ".docx",
15 |     ".xlsx",
16 |     ".pptx",
17 |     ".zip",
18 |     ".tar",
19 |     ".gz",
20 |     ".lock",
21 | ]
22 | 
23 | 
24 | def filter_by_extension(file, extensions):
25 |     return any(file.endswith(ext) for ext in extensions)
26 | 
27 | 
28 | class ReadPRDiffs(Step, input_class=ReadPRDiffsInputs, output_class=ReadPRDiffsOutputs):
29 |     def __init__(self, inputs: dict):
30 |         super().__init__(inputs)
31 | 
32 |         if "github_api_key" in inputs.keys():
33 |             self.scm_client = GithubClient(inputs["github_api_key"])
34 |         elif "gitlab_api_key" in inputs.keys():
35 |             self.scm_client = GitlabClient(inputs["gitlab_api_key"])
36 |         elif "azuredevops_api_key" in inputs.keys():
37 |             self.scm_client = AzureDevopsClient(inputs["azuredevops_api_key"])
38 |         else:
39 |             raise ValueError(f'Missing required input data: "github_api_key" or "gitlab_api_key"')
40 | 
41 |         if "scm_url" in inputs.keys():
42 |             self.scm_client.set_url(inputs["scm_url"])
43 | 
44 |         self.pr = self.scm_client.get_pr_by_url(inputs["pr_url"])
45 | 
46 |     def run(self) -> dict:
47 |         pr_texts = self.pr.texts()
48 |         title = pr_texts.get("title", "")
49 |         body = pr_texts.get("body", "")
50 |         comments = pr_texts.get("comments", [])
51 |         diffs: List[dict] = []
52 |         for path, diff_text in pr_texts.get("diffs", {}).items():
53 |             if filter_by_extension(path, _IGNORED_EXTENSIONS):
54 |                 continue
55 |             diffs.append(dict(path=path, diff=diff_text))
56 | 
57 |         return dict(title=title, body=body, comments=comments, diffs=diffs)
58 | 


--------------------------------------------------------------------------------
/patchwork/steps/DatabaseAgent/DatabaseAgent.py:
--------------------------------------------------------------------------------
 1 | from patchwork.common.client.llm.aio import AioLlmClient
 2 | from patchwork.common.multiturn_strategy.agentic_strategy_v2 import (
 3 |     AgentConfig,
 4 |     AgenticStrategyV2,
 5 | )
 6 | from patchwork.common.tools.db_query_tool import DatabaseQueryTool
 7 | from patchwork.common.utils.utils import mustache_render
 8 | from patchwork.step import Step
 9 | from patchwork.steps.DatabaseAgent.typed import (
10 |     DatabaseAgentInputs,
11 |     DatabaseAgentOutputs,
12 | )
13 | 
14 | 
15 | class DatabaseAgent(Step, input_class=DatabaseAgentInputs, output_class=DatabaseAgentOutputs):
16 |     def __init__(self, inputs):
17 |         super().__init__(inputs)
18 |         data = inputs.get("prompt_value", {})
19 |         task = mustache_render(inputs["task"], data)
20 |         db_dialect = inputs["db_dialect"]
21 |         self.agentic_strategy = AgenticStrategyV2(
22 |             model="gemini-2.0-flash",
23 |             llm_client=AioLlmClient.create_aio_client(inputs),
24 |             template_data=dict(),
25 |             system_prompt_template=f"""\
26 | Please summarise the conversation given and provide the result in the structure that is asked of you.
27 | """,
28 |             user_prompt_template=f"""\
29 | Please take note of any requirements to the data required to fetch.
30 | 
31 | {task}
32 | """,
33 |             agent_configs=[
34 |                 AgentConfig(
35 |                     model="gemini-2.0-flash",
36 |                     name="Assistant",
37 |                     tool_set=dict(db_tool=DatabaseQueryTool(inputs)),
38 |                     system_prompt=f"""\
39 | You are a {db_dialect} database query execution assistant. Assist me in completing a task.
40 | Before you begin you should first try to know all tables currently available.
41 | Then find out what data is held in the relevant tables.
42 | """,
43 |                 )
44 |             ],
45 |             example_json=inputs.get("example_json"),
46 |         )
47 | 
48 |     def run(self) -> dict:
49 |         result = self.agentic_strategy.execute(limit=10)
50 |         return {**result, **self.agentic_strategy.usage()}
51 | 


--------------------------------------------------------------------------------
/patchwork/patchflows/AutoFix/default_prompt.json:
--------------------------------------------------------------------------------
1 | [
2 |   {
3 |     "id": "fixprompt",
4 |     "prompts": [
5 |       {"role": "system", "content": "You are a senior software engineer who is best in the world at fixing vulnerabilities. Users will give you vulnerable code and you will generate a fix based on the provided INSTRUCTION.\n\nINSTRUCTION:\n\nOnly respond with the fixed code, do not add any comments or change the indentation.\n\nMake sure you respond with the full code and not only the parts that are changed.\n\nThe code should have no errors, it should compile and user should be able to use the fixed code as a direct replacement of the vulnerable code. The code should not introduce any new dependencies or use API calls that are not present in the code.\n\nBefore you generate a fix, do a vulnerability triage and analyse if the vulnerability can indeed to be exploited in the given code.\n\nIf the vulnerability cannot be exploited, respond with <NOT VULNERABLE>.\n\nelse, If you cannot generate an exact fix for the vulnerability, respond with <NO FIX POSSIBLE>.\n\nelse, If you can generate a fix for the vulnerability, do a brief change impact analysis to assess how these modifications might affect the overall system, considering both immediate and potential long-term compatibility issues.\n\nLow: Code diff will be applied to the code base and automatically merged without review.\n\nMedium: Code diff will be applied and a pull request will be sent to the developer to merge, but there are no indirect changes expected to be done in other parts of the system.\n\nHigh: Code diff will be offered as a suggestion to the developer to review and then apply to the code base. There are likely other changes that need to be done by the developer before the change can be implemented.\n\nPlease provide a response only in the following format:\n\nA. Commit message:\n<brief summary of the diff>\n\nB. Change summary:\n<description of the changes made in the diff>\n\nC. Compatibility Risk:\n<Low, Medium, High> \n\nD. Fixed Code:\n<original code with the vulnerability now fixed>\n\nFix vulnerability with the following details.\n\n{{messageText}}."},
6 |       {"role": "user", "content": "```\n{{affectedCode}}\n```"}
7 |     ]
8 |   }
9 | ]


--------------------------------------------------------------------------------
/patchwork/patchflows/SonarFix/default_prompt.json:
--------------------------------------------------------------------------------
1 | [
2 |   {
3 |     "id": "fixprompt",
4 |     "prompts": [
5 |       {"role": "system", "content": "You are a senior software engineer who is best in the world at fixing vulnerabilities. Users will give you vulnerable code and you will generate a fix based on the provided INSTRUCTION.\n\nINSTRUCTION:\n\nOnly respond with the fixed code, do not add any comments or change the indentation.\n\nMake sure you respond with the full code and not only the parts that are changed.\n\nThe code should have no errors, it should compile and user should be able to use the fixed code as a direct replacement of the vulnerable code. The code should not introduce any new dependencies or use API calls that are not present in the code.\n\nBefore you generate a fix, do a vulnerability triage and analyse if the vulnerability can indeed to be exploited in the given code.\n\nIf the vulnerability cannot be exploited, respond with <NOT VULNERABLE>.\n\nelse, If you cannot generate an exact fix for the vulnerability, respond with <NO FIX POSSIBLE>.\n\nelse, If you can generate a fix for the vulnerability, do a brief change impact analysis to assess how these modifications might affect the overall system, considering both immediate and potential long-term compatibility issues.\n\nLow: Code diff will be applied to the code base and automatically merged without review.\n\nMedium: Code diff will be applied and a pull request will be sent to the developer to merge, but there are no indirect changes expected to be done in other parts of the system.\n\nHigh: Code diff will be offered as a suggestion to the developer to review and then apply to the code base. There are likely other changes that need to be done by the developer before the change can be implemented.\n\nPlease provide a response only in the following format:\n\nA. Commit message:\n<brief summary of the diff>\n\nB. Change summary:\n<description of the changes made in the diff>\n\nC. Compatibility Risk:\n<Low, Medium, High> \n\nD. Fixed Code:\n<original code with the vulnerability now fixed>\n\nFix vulnerability with the following details.\n\n{{messageText}}."},
6 |       {"role": "user", "content": "```\n{{affectedCode}}\n```"}
7 |     ]
8 |   }
9 | ]


--------------------------------------------------------------------------------
/patchwork/common/tools/bash_tool.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import subprocess
 4 | from pathlib import Path
 5 | 
 6 | from typing_extensions import Optional
 7 | 
 8 | from patchwork.common.tools.tool import Tool
 9 | 
10 | 
11 | class BashTool(Tool, tool_name="bash"):
12 |     def __init__(self, path: Path):
13 |         super().__init__()
14 |         self.path = Path(path)
15 |         self.modified_files = []
16 | 
17 |     @property
18 |     def json_schema(self) -> dict:
19 |         return {
20 |             "name": "bash",
21 |             "description": f"""Run commands in a bash shell
22 | 
23 | * When invoking this tool, the contents of the "command" parameter does NOT need to be XML-escaped.
24 | * You don't have access to the internet via this tool.
25 | * You do have access to a mirror of common linux and python packages via apt and pip.
26 | * State is persistent across command calls and discussions with the user.
27 | * To inspect a particular line range of a file, e.g. lines 10-25, try 'sed -n 10,25p /path/to/the/file'.
28 | * Please avoid commands that may produce a very large amount of output.
29 | * Please run long lived commands in the background, e.g. 'sleep 10 &' or start a server in the background.
30 | * The working directory is always {self.path}""",
31 |             "input_schema": {
32 |                 "type": "object",
33 |                 "properties": {"command": {"type": "string", "description": "The bash command to run."}},
34 |                 "required": ["command"],
35 |             },
36 |         }
37 | 
38 |     def execute(
39 |         self,
40 |         command: Optional[str] = None,
41 |     ) -> str:
42 |         """Execute editor commands on files in the repository."""
43 |         if command is None:
44 |             return f"Error: `command` parameter must be set and cannot be empty"
45 | 
46 |         try:
47 |             result = subprocess.run(
48 |                 command, shell=True, cwd=self.path, capture_output=True, text=True, timeout=60  # Add timeout for safety
49 |             )
50 |             return result.stdout if result.returncode == 0 else f"Error: {result.stderr}"
51 |         except subprocess.TimeoutExpired:
52 |             return "Error: Command timed out after 60 seconds"
53 |         except Exception as e:
54 |             return f"Error: {str(e)}"
55 | 


--------------------------------------------------------------------------------
/patchwork/steps/ScanSonar/ScanSonar.py:
--------------------------------------------------------------------------------
 1 | from typing_extensions import List
 2 | 
 3 | from patchwork.common.client.sonar import SonarClient
 4 | from patchwork.step import Step, StepStatus
 5 | from patchwork.steps.ExtractCode.ExtractCode import read_and_get_source_code_context
 6 | from patchwork.steps.ScanSonar.typed import (
 7 |     ScanSonarInputs,
 8 |     ScanSonarOutputs,
 9 |     SonarVulnerability,
10 | )
11 | 
12 | 
13 | class ScanSonar(Step, input_class=ScanSonarInputs, output_class=ScanSonarOutputs):
14 |     def __init__(self, inputs: dict):
15 |         super().__init__(inputs)
16 | 
17 |         self.context_length = inputs.get("context_length", 1000)
18 |         self.project_key = inputs.get("sonarqube_project_key")
19 |         access_token = inputs.get("sonarqube_api_key")
20 |         sonar_url = inputs.get("sonarqube_base_url")
21 |         sonar_client_kwargs = dict(access_token=access_token)
22 |         if sonar_url is not None:
23 |             sonar_client_kwargs["url"] = sonar_url
24 |         self.client = SonarClient(**sonar_client_kwargs)
25 | 
26 |     def run(self) -> dict:
27 |         try:
28 |             vulns_by_path = self.client.find_vulns(self.project_key)
29 | 
30 |             files_to_patch: List[SonarVulnerability] = []
31 | 
32 |             for file_path, vulns in vulns_by_path.items():
33 |                 for vuln in vulns:
34 |                     data = read_and_get_source_code_context(file_path, vuln.start, vuln.end, self.context_length)
35 |                     if data is None:
36 |                         continue
37 |                     source_code_context, start, end = data
38 | 
39 |                     vulnerability = SonarVulnerability(
40 |                         uri=file_path,
41 |                         startLine=start,
42 |                         endLine=end,
43 |                         affectedCode=source_code_context,
44 |                         messageText=vuln.bug_msg,
45 |                     )
46 |                     files_to_patch.append(vulnerability)
47 | 
48 |             self.set_status(StepStatus.COMPLETED, "Successfully collected SonarQube results")
49 |             return dict(files_to_patch=files_to_patch)
50 | 
51 |         except Exception as e:
52 |             self.set_status(StepStatus.FAILED, f"Failed to collect SonarQube results: {str(e)}")
53 |             raise
54 | 


--------------------------------------------------------------------------------
/patchwork/steps/SimplifiedLLMOnce/typed.py:
--------------------------------------------------------------------------------
 1 | from typing_extensions import Annotated, Any, Dict, TypedDict
 2 | 
 3 | from patchwork.common.utils.step_typing import StepTypeConfig
 4 | from patchwork.steps.CallLLM.CallLLM import TOKEN_URL
 5 | 
 6 | 
 7 | class __SimplifiedLLMOncePBInputsRequired(TypedDict):
 8 |     json_schema: Annotated[Dict[str, Any], StepTypeConfig(is_config=True)]
 9 |     # PreparePromptInputs
10 |     user_prompt: Annotated[str, StepTypeConfig(is_config=True)]
11 |     prompt_value: Dict[str, Any]
12 | 
13 | 
14 | class SimplifiedLLMOnceInputs(__SimplifiedLLMOncePBInputsRequired, total=False):
15 |     system_prompt: Annotated[str, StepTypeConfig(is_config=True)]
16 |     # CallLLMInputs
17 |     model: Annotated[str, StepTypeConfig(is_config=True)]
18 |     openai_api_key: Annotated[
19 |         str,
20 |         StepTypeConfig(
21 |             is_config=True, or_op=["patched_api_key", "google_api_key", "client_is_gcp", "anthropic_api_key"]
22 |         ),
23 |     ]
24 |     anthropic_api_key: Annotated[
25 |         str,
26 |         StepTypeConfig(is_config=True, or_op=["patched_api_key", "google_api_key", "client_is_gcp", "openai_api_key"]),
27 |     ]
28 |     patched_api_key: Annotated[
29 |         str,
30 |         StepTypeConfig(
31 |             is_config=True,
32 |             or_op=["openai_api_key", "google_api_key", "client_is_gcp", "anthropic_api_key"],
33 |             msg=f"""\
34 | Model API key not found.
35 | Please login at: "{TOKEN_URL}"
36 | Please go to the Integration's tab and generate an API key.
37 | Please copy the access token that is generated, and add `--patched_api_key=<token>` to the command line.
38 | 
39 | If you are using a OpenAI API Key, please set `--openai_api_key=<token>`.""",
40 |         ),
41 |     ]
42 |     google_api_key: Annotated[
43 |         str,
44 |         StepTypeConfig(
45 |             is_config=True, or_op=["patched_api_key", "openai_api_key", "anthropic_api_key", "client_is_gcp"]
46 |         ),
47 |     ]
48 |     client_is_gcp: Annotated[
49 |         str,
50 |         StepTypeConfig(
51 |             is_config=True, or_op=["patched_api_key", "openai_api_key", "anthropic_api_key", "google_api_key"]
52 |         ),
53 |     ]
54 |     file: Annotated[str, StepTypeConfig(is_path=True)]
55 | 
56 | 
57 | class SimplifiedLLMOnceOutputs(TypedDict):
58 |     request_tokens: int
59 |     response_tokens: int
60 | 


--------------------------------------------------------------------------------
/patchwork/steps/CallLLM/typed.py:
--------------------------------------------------------------------------------
 1 | from typing_extensions import Annotated, Dict, List, TypedDict
 2 | 
 3 | from patchwork.common.constants import TOKEN_URL
 4 | from patchwork.common.utils.step_typing import StepTypeConfig
 5 | 
 6 | 
 7 | class CallLLMInputs(TypedDict, total=False):
 8 |     max_llm_calls: Annotated[int, StepTypeConfig(is_config=True)]
 9 |     prompt_file: Annotated[str, StepTypeConfig(is_config=True, or_op=["prompts"])]
10 |     prompts: Annotated[List[Dict], StepTypeConfig(or_op=["prompt_file"])]
11 |     model: Annotated[str, StepTypeConfig(is_config=True)]
12 |     allow_truncated: Annotated[bool, StepTypeConfig(is_config=True)]
13 |     model_args: Annotated[str, StepTypeConfig(is_config=True)]
14 |     client_args: Annotated[str, StepTypeConfig(is_config=True)]
15 |     openai_api_key: Annotated[
16 |         str,
17 |         StepTypeConfig(
18 |             is_config=True, or_op=["patched_api_key", "google_api_key", "client_is_gcp", "anthropic_api_key"]
19 |         ),
20 |     ]
21 |     anthropic_api_key: Annotated[
22 |         str,
23 |         StepTypeConfig(is_config=True, or_op=["patched_api_key", "google_api_key", "client_is_gcp", "openai_api_key"]),
24 |     ]
25 |     patched_api_key: Annotated[
26 |         str,
27 |         StepTypeConfig(
28 |             is_config=True,
29 |             or_op=["openai_api_key", "google_api_key", "client_is_gcp", "anthropic_api_key"],
30 |             msg=f"""\
31 | Model API key not found.
32 | Please login at: "{TOKEN_URL}"
33 | Please go to the Integration's tab and generate an API key.
34 | Please copy the access token that is generated, and add `--patched_api_key=<token>` to the command line.
35 | 
36 | If you are using a OpenAI API Key, please set `--openai_api_key=<token>`.""",
37 |         ),
38 |     ]
39 |     google_api_key: Annotated[
40 |         str,
41 |         StepTypeConfig(
42 |             is_config=True, or_op=["patched_api_key", "openai_api_key", "anthropic_api_key", "client_is_gcp"]
43 |         ),
44 |     ]
45 |     client_is_gcp: Annotated[
46 |         str,
47 |         StepTypeConfig(
48 |             is_config=True, or_op=["patched_api_key", "openai_api_key", "anthropic_api_key", "google_api_key"]
49 |         ),
50 |     ]
51 |     file: Annotated[str, StepTypeConfig(is_path=True)]
52 | 
53 | 
54 | class CallLLMOutputs(TypedDict):
55 |     openai_responses: List[str]
56 |     request_tokens: List[int]
57 |     response_tokens: List[int]
58 | 


--------------------------------------------------------------------------------
/patchwork/patchflows/README.md:
--------------------------------------------------------------------------------
 1 | # Creating a Patchflow
 2 | 
 3 | A patchflow is a collection of steps that are executed in a specific order. The steps are executed in a sequence and the output of one step is used as the input of the next step.
 4 | 
 5 | To create a patchflow "my patchflow", do the following:
 6 | 
 7 | 1. Create `patchwork/patchflows/MyPatchflow/MyPatchflow.py`
 8 |    Any additional files required for the `MyPatchflow` should be placed in the same folder.
 9 | 
10 | 2. `MyPatchflow.py` implement the class `MyPatchflow` which inherits the `Step` from the `patchwork.step` module. The class `MyPatchflow` is expected to have two methods:
11 |     - `__init__`: The constructor of the class. It should accept an `inputs` dictionary as an argument, which contains user specified inputs for the patchflow. In the absence of user specified inputs, default inputs should be specified where feasible. Validation and processing of the inputs should be done here.
12 |     - `run`: The main method of the class. This method should contain the logic of the patchflow which executes a sequence of `Steps` in a specific order and returns a dictionary with the results. Each step's output should be updated in the `inputs` dictionary and passed to the next step.
13 | 
14 | 3. Update `patchwork/patchflows/__init__.py` to include the new patchflow by importing the class `MyPatchflow` and adding it to `__all__`.
15 | 
16 | ## Example
17 | 
18 | ### Path: `patchwork/patchflows/MyPatchflow/MyPatchflow.py`
19 | 
20 | ```python
21 | from patchwork.step import Step
22 | from patchwork.steps import
23 | 
24 | {
25 |     Step1,
26 |     Step2,
27 |     Step3
28 | }
29 | 
30 | 
31 | class MyPatchflow(Step):
32 |     def __init__(self, inputs):
33 |         super().__init__(inputs)
34 | 
35 |         if 'input1' not in inputs:
36 |             raise ValueError("Missing required input: input1")
37 | 
38 |         self.inputs = inputs
39 | 
40 |     def run(self) -> dict:
41 |         out1 = Step1(self.inputs).run()
42 |         self.inputs.update(out1)
43 | 
44 |         out2 = Step2(self.inputs).run()
45 |         self.inputs.update(out2)
46 | 
47 |         out3 = Step3(self.inputs).run()
48 |         self.inputs.update(out3)
49 | 
50 |         return self.inputs
51 | ```
52 | 
53 | ### Path: `patchwork/patchflows/__init__.py`
54 | 
55 | ```python
56 | from .MyPatchflow.MyPatchflow import MyPatchflow
57 | 
58 | __all__ = [
59 |     ...
60 |     'MyPatchflow',
61 | ]
62 | ```
63 | 


--------------------------------------------------------------------------------
/style.md:
--------------------------------------------------------------------------------
 1 | # Code Style Guidelines
 2 | 
 3 | ## 1. Naming Conventions
 4 | - Use snake_case for function names in Python
 5 | - Use camelCase for function names in JavaScript and method names in Java
 6 | - Use clear and descriptive naming conventions
 7 | 
 8 | ## 2. Code Structure
 9 | - Follow a modular structure with separate files for different components
10 | - Use class inheritance for organizing related components
11 | - Use separate files for typed inputs/outputs, main functionality, and initialization
12 | - Include an __init__.py file in each module, even if empty
13 | - Import new modules in __init__.py and update the __all__ list
14 | 
15 | ## 3. Documentation
16 | - Include a README.md file for each module or component, providing:
17 |   - Brief overview of contents and purpose
18 |   - Table of contents for easy navigation
19 |   - Usage instructions and context
20 |   - Clear input and output specifications
21 | - Use markdown formatting for documentation, including headers and lists
22 | - Add docstrings to functions with detailed descriptions, including:
23 |   - Parameters and their types
24 |   - Return values and their types
25 |   - Brief description of function purpose
26 | - Use consistent docstring formats for each language (e.g., Google-style for Python, JSDoc for JavaScript, Javadoc for Java)
27 | 
28 | ## 4. Error Handling
29 | - Handle exceptions properly when importing modules
30 | - Provide specific error messages for missing dependencies, including installation instructions
31 | 
32 | ## 5. Security
33 | - Use spec_from_file_location() and module_from_spec() instead of import_module() for dynamic imports
34 | - Implement a whitelist approach for allowed module imports
35 | - Avoid using f-strings with untrusted user input
36 | 
37 | ## 6. Typing
38 | - Use TypedDict for defining input and output structures
39 | - Use Annotated for StepTypeConfig
40 | - Use type annotations for improved code clarity
41 | - Use type hints for function parameters and return values
42 | 
43 | ## 7. Dependency Management
44 | - Keep dependencies up-to-date with specified version ranges
45 | - Use semantic versioning for dependencies (e.g., ^1.5.0, ~2.32.0)
46 | - Pin transitive dependencies
47 | 
48 | ## 8. Code Formatting
49 | - Use Black for code formatting
50 | - Use isort for import sorting
51 | - Use consistent double quotes for dictionary keys
52 | 
53 | ## 9. Version Control
54 | - Follow versioning conventions for development versions (e.g., X.X.X.devX)
55 | 


--------------------------------------------------------------------------------
/patchwork/patchflows/GenerateUnitTests/GenerateUnitTests.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | import yaml
 4 | 
 5 | from patchwork.common.utils.step_typing import validate_steps_with_inputs
 6 | from patchwork.step import Step
 7 | from patchwork.steps import LLM, PR, CallCode2Prompt, ModifyCode
 8 | 
 9 | _DEFAULT_INPUT_FILE = Path(__file__).parent / "defaults.yml"
10 | _DEFAULT_PROMPT_JSON = Path(__file__).parent / "default_prompt.json"
11 | 
12 | 
13 | class GenerateUnitTests(Step):
14 |     def __init__(self, inputs):
15 |         super().__init__(inputs)
16 | 
17 |         final_inputs = yaml.safe_load(_DEFAULT_INPUT_FILE.read_text())
18 |         if final_inputs is None:
19 |             final_inputs = {}
20 | 
21 |         final_inputs.update(inputs)
22 | 
23 |         final_inputs["prompt_id"] = "GenerateUnitTests"
24 |         if "folder_path" not in final_inputs.keys():
25 |             final_inputs["folder_path"] = Path.cwd()
26 |         else:
27 |             final_inputs["folder_path"] = Path(final_inputs["folder_path"])
28 | 
29 |         if "prompt_template_file" not in final_inputs:
30 |             final_inputs["prompt_template_file"] = _DEFAULT_PROMPT_JSON
31 | 
32 |         final_inputs["pr_title"] = f"PatchWork Unit Tests generated"
33 |         final_inputs["branch_prefix"] = f"{self.__class__.__name__.lower()}-"
34 | 
35 |         validate_steps_with_inputs(
36 |             set(final_inputs.keys()).union({"prompt_values", "files_to_patch"}), LLM, CallCode2Prompt, ModifyCode, PR
37 |         )
38 |         self.inputs = final_inputs
39 | 
40 |     def run(self):
41 |         outputs = CallCode2Prompt(self.inputs).run()
42 |         new_file_name = f"test_file.{self.inputs['test_file_extension']}"
43 |         new_file_path = Path(outputs["uri"]).with_name(new_file_name)
44 |         Path(outputs["uri"]).rename(new_file_path)
45 |         outputs["uri"] = str(new_file_path)
46 |         self.inputs["response_partitions"] = {"patch": ["```", "\n", "```"]}
47 |         self.inputs["files_to_patch"] = self.inputs["prompt_values"] = [outputs]
48 |         outputs = LLM(self.inputs).run()
49 |         self.inputs.update(outputs)
50 |         outputs = ModifyCode(self.inputs).run()
51 |         self.inputs.update(outputs)
52 |         number = len(self.inputs["modified_code_files"])
53 |         self.inputs["pr_header"] = f"This pull request from patchwork adds tests."
54 |         outputs = PR(self.inputs).run()
55 |         self.inputs.update(outputs)
56 | 
57 |         return self.inputs
58 | 


--------------------------------------------------------------------------------
/patchwork/common/context_strategy/cpp.py:
--------------------------------------------------------------------------------
 1 | from patchwork.common.context_strategy.languages import CppLanguage
 2 | from patchwork.common.context_strategy.protocol import TreeSitterStrategy
 3 | 
 4 | 
 5 | class CppStrategy(TreeSitterStrategy):
 6 |     def __init__(self, query: str):
 7 |         """
 8 |         Initialize the Cpp searcher instance.
 9 | 
10 |         Args:
11 |         query (str): The search query string to be used for Java file search.
12 |         """
13 | 
14 |         # exts from https://gcc.gnu.org/onlinedocs/gcc-4.4.1/gcc/Overall-Options.html#index-file-name-suffix-71
15 |         exts = [
16 |             ".ii",
17 |             ".h",
18 |             ".cc",
19 |             ".cp",
20 |             ".cxx",
21 |             ".cpp",
22 |             ".CPP",
23 |             ".c++",
24 |             ".C",
25 |             ".hh",
26 |             ".H",
27 |             ".hp",
28 |             ".hxx",
29 |             ".hpp",
30 |             ".HPP",
31 |             ".h++",
32 |             ".tcc",
33 |         ]
34 |         super().__init__("cpp", query, exts, CppLanguage())
35 |         self.query = query
36 | 
37 | 
38 | class CppClassStrategy(CppStrategy):
39 |     def __init__(self):
40 |         """
41 |         Initialize the current class by calling the parent class's __init__ method.
42 |         The specific class to be initialized should have a class_declaration marked by @node.
43 |         """
44 |         super().__init__(
45 |             """
46 |             (class_specifier) @node
47 |             """.strip()
48 |         )
49 | 
50 | 
51 | class CppMethodStrategy(CppStrategy):
52 |     def __init__(self):
53 |         """
54 |         Initialize the newly created object by inheriting properties and
55 |         methods from the parent class.
56 | 
57 |         Parameters:
58 |         - self: instance of the class
59 | 
60 |         Returns:
61 |         - None
62 |         """
63 |         super().__init__(
64 |             """
65 |         [
66 |             (comment) @comment
67 |             (function_definition) @node
68 |         ]
69 |         """.strip()
70 |         )
71 | 
72 | 
73 | class CppBlockStrategy(CppStrategy):
74 |     def __init__(self):
75 |         """
76 |         Initialize the class by calling the parent class's constructor.
77 | 
78 |         Parameters:
79 |         - self: The object instance.
80 |         """
81 |         super().__init__(
82 |             """
83 |             (compound_statement) @node
84 |         """.strip()
85 |         )
86 | 


--------------------------------------------------------------------------------
/patchwork/patchflows/GenerateCodeUsageExample/GenerateCodeUsageExample.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | import yaml
 4 | 
 5 | from patchwork.common.utils.step_typing import validate_steps_with_inputs
 6 | from patchwork.step import Step
 7 | from patchwork.steps import LLM, PR, CallCode2Prompt, ModifyCode
 8 | 
 9 | _DEFAULT_INPUT_FILE = Path(__file__).parent / "defaults.yml"
10 | _DEFAULT_PROMPT_JSON = Path(__file__).parent / "default_prompt.json"
11 | 
12 | 
13 | class GenerateCodeUsageExample(Step):
14 |     def __init__(self, inputs):
15 |         super().__init__(inputs)
16 | 
17 |         final_inputs = yaml.safe_load(_DEFAULT_INPUT_FILE.read_text())
18 |         if final_inputs is None:
19 |             final_inputs = {}
20 | 
21 |         final_inputs.update(inputs)
22 | 
23 |         final_inputs["prompt_id"] = "GenerateUsageExample"
24 |         if "folder_path" not in final_inputs.keys():
25 |             final_inputs["folder_path"] = Path.cwd()
26 |         else:
27 |             final_inputs["folder_path"] = Path(final_inputs["folder_path"])
28 | 
29 |         if "prompt_template_file" not in final_inputs:
30 |             final_inputs["prompt_template_file"] = _DEFAULT_PROMPT_JSON
31 | 
32 |         final_inputs["pr_title"] = f"PatchWork Usage Example generated"
33 |         final_inputs["branch_prefix"] = f"{self.__class__.__name__.lower()}-"
34 | 
35 |         validate_steps_with_inputs(
36 |             set(final_inputs.keys()).union({"prompt_values", "files_to_patch"}), LLM, CallCode2Prompt, ModifyCode, PR
37 |         )
38 |         self.inputs = final_inputs
39 | 
40 |     def run(self):
41 |         outputs = CallCode2Prompt(self.inputs).run()
42 |         new_file_name = f"usage_example.{self.inputs['test_file_extension']}"
43 |         new_file_path = Path(outputs["uri"]).with_name(new_file_name)
44 |         Path(outputs["uri"]).rename(new_file_path)
45 |         outputs["uri"] = str(new_file_path)
46 |         self.inputs["response_partitions"] = {"patch": ["```", "\n", "```"]}
47 |         self.inputs["files_to_patch"] = self.inputs["prompt_values"] = [outputs]
48 |         outputs = LLM(self.inputs).run()
49 |         self.inputs.update(outputs)
50 |         outputs = ModifyCode(self.inputs).run()
51 |         self.inputs.update(outputs)
52 |         number = len(self.inputs["modified_code_files"])
53 |         self.inputs["pr_header"] = f"This pull request adds usage example."
54 |         outputs = PR(self.inputs).run()
55 |         self.inputs.update(outputs)
56 | 
57 |         return self.inputs
58 | 


--------------------------------------------------------------------------------
/patchwork/steps/ScanSemgrep/ScanSemgrep.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import subprocess
 3 | from pathlib import Path
 4 | 
 5 | from patchwork.common.utils.dependency import import_with_dependency_group
 6 | from patchwork.common.utils.input_parsing import parse_to_list
 7 | from patchwork.logger import logger
 8 | from patchwork.step import Step, StepStatus
 9 | from patchwork.steps.ScanSemgrep.typed import ScanSemgrepInputs, ScanSemgrepOutputs
10 | 
11 | 
12 | class ScanSemgrep(Step, input_class=ScanSemgrepInputs, output_class=ScanSemgrepOutputs):
13 |     def __init__(self, inputs: dict):
14 |         super().__init__(inputs)
15 | 
16 |         self.extra_args = inputs.get("semgrep_extra_args", "")
17 |         sarif_file_path = inputs.get("sarif_file_path")
18 |         if sarif_file_path is not None:
19 |             sarif_file_path = Path(sarif_file_path)
20 |             if not sarif_file_path.is_file():
21 |                 raise ValueError(f'Unable to find input file: "{sarif_file_path}"')
22 |             with open(sarif_file_path, "r") as fp:
23 |                 self.sarif_values = json.load(fp, strict=False)
24 |         elif inputs.get("sarif_values") is not None:
25 |             sarif_values = inputs.get("sarif_values")
26 |             if isinstance(sarif_values, str):
27 |                 sarif_values = json.loads(sarif_values, strict=False)
28 |             self.sarif_values = sarif_values
29 |         else:
30 |             self.sarif_values = None
31 | 
32 |         path_key = inputs.get("path_key", "path")
33 |         self.paths = parse_to_list(inputs.get("paths", ""), possible_delimiters=[",", None], possible_keys=[path_key])
34 | 
35 |     def run(self) -> dict:
36 |         if self.sarif_values is not None:
37 |             self.set_status(StepStatus.SKIPPED, "Using provided SARIF")
38 |             return dict(sarif_values=self.sarif_values)
39 | 
40 |         import_with_dependency_group("semgrep")
41 |         cwd = Path.cwd()
42 | 
43 |         cmd = [
44 |             "semgrep",
45 |             "scan",
46 |             *self.paths,
47 |             *self.extra_args.split(),
48 |             "--sarif",
49 |         ]
50 | 
51 |         p = subprocess.run(cmd, capture_output=True, text=True, cwd=cwd)
52 |         try:
53 |             sarif_values = json.loads(p.stdout)
54 |             return dict(sarif_values=sarif_values)
55 |         except json.JSONDecodeError as e:
56 |             logger.debug(f"Error parsing semgrep output: {p.stdout}", e)
57 |             self.set_status(StepStatus.FAILED, f"Error parsing semgrep output")
58 | 


--------------------------------------------------------------------------------
/patchwork/steps/CallSQL/CallSQL.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from sqlalchemy import URL, create_engine, exc, text
 4 | 
 5 | from patchwork.common.utils.input_parsing import parse_to_dict
 6 | from patchwork.common.utils.utils import mustache_render
 7 | from patchwork.logger import logger
 8 | from patchwork.step import Step, StepStatus
 9 | from patchwork.steps.CallSQL.typed import CallSQLInputs, CallSQLOutputs
10 | 
11 | 
12 | class CallSQL(Step, input_class=CallSQLInputs, output_class=CallSQLOutputs):
13 |     def __init__(self, inputs: dict):
14 |         super().__init__(inputs)
15 |         query_template_data = inputs.get("db_query_template_values", {})
16 |         self.query = mustache_render(inputs["db_query"], query_template_data)
17 |         self.__build_engine(inputs)
18 | 
19 |     def __build_engine(self, inputs: dict):
20 |         dialect = inputs["db_dialect"]
21 |         driver = inputs.get("db_driver")
22 |         dialect_plus_driver = f"{dialect}+{driver}" if driver is not None else dialect
23 |         kwargs = dict(
24 |             username=inputs.get("db_username"),
25 |             host=inputs.get("db_host", "localhost"),
26 |             port=inputs.get("db_port", 5432),
27 |             password=inputs.get("db_password"),
28 |             database=inputs.get("db_database"),
29 |             query=parse_to_dict(inputs.get("db_params")),
30 |         )
31 |         connection_url = URL.create(
32 |             dialect_plus_driver,
33 |             **{k: v for k, v in kwargs.items() if v is not None},
34 |         )
35 | 
36 |         connect_args = dict()
37 |         if inputs.get("db_driver_args") is not None:
38 |             connect_args = parse_to_dict(inputs.get("db_driver_args"))
39 | 
40 |         self.engine = create_engine(connection_url, connect_args=connect_args)
41 |         with self.engine.connect() as conn:
42 |             conn.execute(text("SELECT 1"))
43 |         return self.engine
44 | 
45 |     def run(self) -> dict:
46 |         try:
47 |             rv = []
48 |             with self.engine.begin() as conn:
49 |                 cursor = conn.exec_driver_sql(self.query)
50 |                 for row in cursor:
51 |                     result = row._asdict()
52 |                     rv.append(result)
53 |             logger.info(f"Retrieved {len(rv)} rows!")
54 |             return dict(results=rv)
55 |         except exc.InvalidRequestError as e:
56 |             self.set_status(StepStatus.FAILED, f"`{self.query}` failed with message:\n{e}")
57 |             return dict(results=[])
58 | 


--------------------------------------------------------------------------------
/patchwork/steps/CallShell/CallShell.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import os
 4 | import shlex
 5 | import subprocess
 6 | from pathlib import Path
 7 | 
 8 | from patchwork.common.utils.utils import mustache_render
 9 | from patchwork.logger import logger
10 | from patchwork.step import Step, StepStatus
11 | from patchwork.steps.CallShell.typed import CallShellInputs, CallShellOutputs
12 | 
13 | 
14 | class CallShell(Step, input_class=CallShellInputs, output_class=CallShellOutputs):
15 |     def __init__(self, inputs: dict):
16 |         super().__init__(inputs)
17 |         script_template_values = inputs.get("script_template_values", {})
18 |         self.script = mustache_render(inputs["script"], script_template_values)
19 |         self.working_dir = inputs.get("working_dir", Path.cwd())
20 |         self.env = self.__parse_env_text(inputs.get("env", ""))
21 | 
22 |     @staticmethod
23 |     def __parse_env_text(env_text: str) -> dict[str, str]:
24 |         env_spliter = shlex.shlex(env_text, posix=True)
25 |         env_spliter.whitespace_split = True
26 |         env_spliter.whitespace += ";"
27 | 
28 |         env: dict[str, str] = os.environ.copy()
29 |         for env_assign in env_spliter:
30 |             env_assign_spliter = shlex.shlex(env_assign, posix=True)
31 |             env_assign_spliter.whitespace_split = True
32 |             env_assign_spliter.whitespace += "="
33 |             env_parts = list(env_assign_spliter)
34 |             if len(env_parts) < 1:
35 |                 continue
36 | 
37 |             env_assign_target = env_parts[0]
38 |             if len(env_parts) < 2:
39 |                 logger.error(f"{env_assign_target} is not assigned anything, skipping...")
40 |                 continue
41 |             if len(env_parts) > 2:
42 |                 logger.error(f"{env_assign_target} has more than 1 assignment, skipping...")
43 |                 continue
44 |             env[env_assign_target] = env_parts[1]
45 | 
46 |         return env
47 | 
48 |     def run(self) -> dict:
49 |         p = subprocess.run(self.script, shell=True, capture_output=True, text=True, cwd=self.working_dir, env=self.env)
50 |         try:
51 |             p.check_returncode()
52 |         except subprocess.CalledProcessError as e:
53 |             self.set_status(
54 |                 StepStatus.FAILED,
55 |                 f"Script failed.",
56 |             )
57 |         logger.info(f"stdout: \n{p.stdout}")
58 |         logger.info(f"stderr:\n{p.stderr}")
59 |         return dict(stdout_output=p.stdout, stderr_output=p.stderr)
60 | 


--------------------------------------------------------------------------------
/patchwork/steps/GitHubAgent/GitHubAgent.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | from patchwork.common.client.llm.aio import AioLlmClient
 4 | from patchwork.common.multiturn_strategy.agentic_strategy_v2 import (
 5 |     AgentConfig,
 6 |     AgenticStrategyV2,
 7 | )
 8 | from patchwork.common.tools.git_tool import GitTool
 9 | from patchwork.common.tools.github_tool import GitHubTool
10 | from patchwork.common.utils.utils import mustache_render
11 | from patchwork.step import Step
12 | from patchwork.steps.GitHubAgent.typed import GitHubAgentInputs, GitHubAgentOutputs
13 | 
14 | 
15 | class GitHubAgent(Step, input_class=GitHubAgentInputs, output_class=GitHubAgentOutputs):
16 |     def __init__(self, inputs):
17 |         super().__init__(inputs)
18 |         base_path = inputs.get("base_path", str(Path.cwd()))
19 |         data = inputs.get("prompt_value", {})
20 |         task = mustache_render(inputs["task"], data)
21 |         self.agentic_strategy = AgenticStrategyV2(
22 |             model="gemini-2.0-flash",
23 |             llm_client=AioLlmClient.create_aio_client(inputs),
24 |             template_data=dict(),
25 |             system_prompt_template="""\
26 | Please summarise the conversation given and provide the result in the structure that is asked of you.
27 | """,
28 |             user_prompt_template=f"""\
29 | Please help me with the following task using the GitHub CLI. You should not do anything extra.
30 | Please take note of any requirements to the data required to fetch.
31 | 
32 | {task}
33 | """,
34 |             agent_configs=[
35 |                 AgentConfig(
36 |                     name="Assistant",
37 |                     model="gemini-2.0-flash",
38 |                     tool_set=dict(
39 |                         github_tool=GitHubTool(base_path, inputs["github_api_key"]),
40 |                         git_tool=GitTool(base_path),
41 |                     ),
42 |                     system_prompt="""\
43 | You are a senior software developer helping the program manager to obtain some data from GitHub.
44 | You can access github through the `gh` CLI app through the `github_tool`, and `git` through the `git_tool`.
45 | Your `gh` app has already been authenticated.
46 | """,
47 |                 )
48 |             ],
49 |             example_json=inputs.get(
50 |                 "example_json", '{"summary_of_actions": "1. Retrieved the list of repositories. 2. ..."}'
51 |             ),
52 |         )
53 | 
54 |     def run(self) -> dict:
55 |         result = self.agentic_strategy.execute(limit=10)
56 |         return {**result, **self.agentic_strategy.usage()}
57 | 


--------------------------------------------------------------------------------
/patchwork/steps/FilterBySimilarity/FilterBySimilarity.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from sklearn.feature_extraction.text import TfidfVectorizer
 4 | from sklearn.metrics.pairwise import cosine_similarity
 5 | 
 6 | from patchwork.logger import logger
 7 | from patchwork.step import Step, StepStatus
 8 | from patchwork.steps.FilterBySimilarity.typed import (
 9 |     FilterBySimilarityInputs,
10 |     FilterBySimilarityOutputs,
11 | )
12 | 
13 | 
14 | class FilterBySimilarity(Step, input_class=FilterBySimilarityInputs, output_class=FilterBySimilarityOutputs):
15 |     def __init__(self, inputs):
16 |         super().__init__(inputs)
17 | 
18 |         self.list = inputs["list"]
19 |         self.keywords = inputs["keywords"]
20 |         self.keys = self.__parse_keys(inputs.get("keys", None))
21 |         self.top_k = inputs.get("top_k", 10)
22 | 
23 |     @staticmethod
24 |     def __parse_keys(keys: list[str] | str | None) -> list[str] | None:
25 |         if keys is None:
26 |             return None
27 | 
28 |         if isinstance(keys, str):
29 |             delimiter = None
30 |             if "," in keys:
31 |                 delimiter = ","
32 |             return [key.strip() for key in keys.split(delimiter)]
33 | 
34 |         return keys
35 | 
36 |     def run(self):
37 |         if len(self.list) == 0:
38 |             self.set_status(StepStatus.SKIPPED, "List is empty")
39 |             return dict()
40 | 
41 |         items_with_score = []
42 |         for item in self.list:
43 |             if self.keys is not None:
44 |                 texts = [str(item[key]) for key in self.keys if item.get(key) is not None]
45 |             else:
46 |                 texts = [value for value in item.values() if value is not None and isinstance(value, str)]
47 |             if len(texts) == 0:
48 |                 logger.warning(f"No text found in item: {item}")
49 |                 continue
50 | 
51 |             vectorizer = TfidfVectorizer()
52 |             vectorizer.fit(texts)
53 |             keyword_vectors = vectorizer.transform([self.keywords])
54 | 
55 |             similarity_scores = []
56 |             for text in texts:
57 |                 text_vector = vectorizer.transform([text])
58 |                 similarity = cosine_similarity(text_vector, keyword_vectors)[0][0]
59 |                 similarity_scores.append(similarity)
60 | 
61 |             avg_similarity = sum(similarity_scores) / len(similarity_scores)
62 |             items_with_score.append((item, avg_similarity))
63 | 
64 |         items_with_score.sort(key=lambda x: x[1], reverse=True)
65 |         return dict(result_list=[item for item, _ in items_with_score[: self.top_k]])
66 | 


--------------------------------------------------------------------------------