├── .gitignore ├── LICENSE ├── README.md ├── poetry.lock ├── pyproject.toml └── web2sdk ├── __init__.py ├── console_util.py ├── main.py ├── swagger2sdk ├── __init__.py ├── generate_function.py ├── generate_types.py ├── main.py └── utils.py ├── tests └── __init__.py └── web2swagger ├── .DS_Store ├── __init__.py ├── har_capture_reader.py ├── main.py ├── mitmproxy_capture_reader.py └── swagger_util.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | .DS_Store 6 | 7 | # Request logs and generated files 8 | */generated/ 9 | *.har 10 | 11 | # C extensions 12 | *.so 13 | 14 | # Distribution / packaging 15 | .Python 16 | build/ 17 | develop-eggs/ 18 | dist/ 19 | downloads/ 20 | eggs/ 21 | .eggs/ 22 | lib/ 23 | lib64/ 24 | parts/ 25 | sdist/ 26 | var/ 27 | wheels/ 28 | share/python-wheels/ 29 | *.egg-info/ 30 | .installed.cfg 31 | *.egg 32 | MANIFEST 33 | 34 | # PyInstaller 35 | # Usually these files are written by a python script from a template 36 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 37 | *.manifest 38 | *.spec 39 | 40 | # Installer logs 41 | pip-log.txt 42 | pip-delete-this-directory.txt 43 | 44 | # Unit test / coverage reports 45 | htmlcov/ 46 | .tox/ 47 | .nox/ 48 | .coverage 49 | .coverage.* 50 | .cache 51 | nosetests.xml 52 | coverage.xml 53 | *.cover 54 | *.py,cover 55 | .hypothesis/ 56 | .pytest_cache/ 57 | cover/ 58 | 59 | # Translations 60 | *.mo 61 | *.pot 62 | 63 | # Django stuff: 64 | *.log 65 | local_settings.py 66 | db.sqlite3 67 | db.sqlite3-journal 68 | 69 | # Flask stuff: 70 | instance/ 71 | .webassets-cache 72 | 73 | # Scrapy stuff: 74 | .scrapy 75 | 76 | # Sphinx documentation 77 | docs/_build/ 78 | 79 | # PyBuilder 80 | .pybuilder/ 81 | target/ 82 | 83 | # Jupyter Notebook 84 | .ipynb_checkpoints 85 | 86 | # IPython 87 | profile_default/ 88 | ipython_config.py 89 | 90 | # pyenv 91 | # For a library or package, you might want to ignore these files since the code is 92 | # intended to run in multiple environments; otherwise, check them in: 93 | # .python-version 94 | 95 | # pipenv 96 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 97 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 98 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 99 | # install all needed dependencies. 100 | #Pipfile.lock 101 | 102 | # poetry 103 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 104 | # This is especially recommended for binary packages to ensure reproducibility, and is more 105 | # commonly ignored for libraries. 106 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 107 | #poetry.lock 108 | 109 | # pdm 110 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 111 | #pdm.lock 112 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 113 | # in version control. 114 | # https://pdm.fming.dev/latest/usage/project/#working-with-version-control 115 | .pdm.toml 116 | .pdm-python 117 | .pdm-build/ 118 | 119 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 120 | __pypackages__/ 121 | 122 | # Celery stuff 123 | celerybeat-schedule 124 | celerybeat.pid 125 | 126 | # SageMath parsed files 127 | *.sage.py 128 | 129 | # Environments 130 | .env 131 | .venv 132 | env/ 133 | venv/ 134 | ENV/ 135 | env.bak/ 136 | venv.bak/ 137 | 138 | # Spyder project settings 139 | .spyderproject 140 | .spyproject 141 | 142 | # Rope project settings 143 | .ropeproject 144 | 145 | # mkdocs documentation 146 | /site 147 | 148 | # mypy 149 | .mypy_cache/ 150 | .dmypy.json 151 | dmypy.json 152 | 153 | # Pyre type checker 154 | .pyre/ 155 | 156 | # pytype static type analyzer 157 | .pytype/ 158 | 159 | # Cython debug symbols 160 | cython_debug/ 161 | 162 | # PyCharm 163 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 164 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 165 | # and can be added to the global gitignore or merged into this file. For a more nuclear 166 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 167 | #.idea/ 168 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Jason Fan 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

2 | 🦊 web2sdk 3 |

4 | 5 |

6 |

Automatically turn third party APIs into Python SDKs

7 |

8 |

9 | 10 | License 11 | 12 | 13 | Issues 14 | 15 |

16 | 17 | Web2sdk is a set of tools for reverse engineering APIs by intercepting network requests. It processes HAR files exported from Chrome devtools into an OpenAPI schema, then automatically generates a python SDK based on the schema. Each method in the python SDK corresponds to an endpoint, and includes strongly typed arguments, requests, and responses. 18 | 19 | https://github.com/user-attachments/assets/5a7f477d-76ab-46f2-9884-62dfc9f2715b 20 | 21 | 22 | ### Features 23 | - Generates an OpenAPI/Swagger yaml schema from any web-based flow 24 | - Automatically merges requests to the same endpoint 25 | - Generates pydantic classes based on OpenAPI request and response schemas 26 | - Supports `basic` and `bearer` auth schemes 27 | - Supports overriding default headers 28 | 29 | ### Example output 30 | ```python 31 | import json 32 | import http.client 33 | from urllib.parse import urlparse 34 | from pydantic import BaseModel 35 | from typing import Optional, Dict, List, Any 36 | 37 | class GetConversationsRequestParameters(BaseModel): 38 | offset: Optional[float] = None 39 | limit: Optional[float] = None 40 | order: Optional[str] = None 41 | 42 | class GetConversationsResponse(BaseModel): 43 | items: Optional[List] = None 44 | total: Optional[float] = None 45 | limit: Optional[float] = None 46 | offset: Optional[float] = None 47 | has_missing_conversations: Optional[bool] = None 48 | 49 | class ChatGPTAPI(BaseModel): 50 | hostname: str 51 | token: str 52 | 53 | def get_conversations(self, request_parameters: 54 | GetConversationsRequestParameters, *, override_headers: dict={} 55 | ) ->GetConversationsResponse: 56 | conn = http.client.HTTPSConnection(self.hostname) 57 | params = '&'.join([(k + '=' + v) for k, v in request_parameters. 58 | items()]) 59 | headers = {'User-Agent': 'Web2sdk/1.0', 'Authorization': 'Bearer ' + 60 | self.token} 61 | headers.update(override_headers) 62 | conn.request('GET', '/backend-api/conversations?' + params + '', 63 | headers=headers) 64 | res = conn.getresponse() 65 | data = res.read().decode('utf-8') 66 | return json.loads(data) 67 | 68 | def post_conversation(self, request_body: PostConversationRequestBody, 69 | *, override_headers: dict={}) ->Any 70 | ### ...etc 71 | ``` 72 | 73 | ## Usage 74 | **1. Export HAR file** 75 | * Open Chrome devtools and go to "Network". 76 | * Go through a flow on a website that triggers the requests you want to capture and reverse engineer. The more varied the requests the better, as a single request might not capture all the possible request and response schemas for a particular endpoint. 77 | * Click the button shown below to export the HAR file. Don't worry about filtering out requests, that happens later. 78 | * Also compatible with [mitmweb](https://mitmproxy.org/) exports. 79 | 80 | ![CleanShot 2024-08-27 at 21 11 53](https://github.com/user-attachments/assets/3453f33b-686b-476e-80e3-bd7df8c63f50) 81 | 82 | **2. Install web2sdk** 83 | ``` 84 | $ pip install web2sdk 85 | ``` 86 | 87 | **3. Generate an OpenAPI spec and SDK** 88 | ```sh 89 | $ web2sdk --requests-path --base-url --sdk-name FinicSDK --auth-type bearer 90 | ``` 91 | * `base-url` filters out requests that don't start with the url provided. This should include everything up until the endpoints you want to reverse engineer. 92 | * For example, `https://finic.ai/api/v1` will match only requests to the v1 endpiont, but `https://finic.ai/api` will match requests from v1, v2, and any other paths after `/api`. 93 | * Generated files will be saved to `generated/.yaml` and `generated/.py` in the current directory by default. 94 | 95 | **4. Run your python SDK.** 96 | ```python 97 | from generated.FinicSDK import FinicSDK 98 | 99 | finic = FinicSDK(hostname="finic.ai", token="your_token_here") 100 | finic.get_connectors({}) 101 | finic.post_message({ message: "hi there" }, override_headers={ "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7)" }) 102 | ``` 103 | * Each method in the generated SDK corresponds to an endpoint 104 | * You can pass in any headers you want. By default, only `Authorization` and `User-Agent` headers are included. 105 | * Some methods accept parameters and/or request bodies. Inspect the function to see what arguments it takes. 106 | 107 | ### Other Options 108 | ```-- auth ``` 109 | * Optional, defaults to `none`. If set, the generated SDK class will expect a username and password for basic auth or a token for bearer auth. 110 | 111 | ```--output``` 112 | * Optional, defaults to `generated/` in the current directory. Specify a directory for the generated `.yaml` and `.py` files to be saved. 113 | 114 | ```--interactive``` 115 | * Run in interactive mode. Not well supported. 116 | 117 | ## 🚧 Planned Improvements 118 | - Support for oauth and custom auth schemes. In the mean 119 | - Automatic auth token refresh 120 | - Support for templated API paths (e.g. `https://api.claude.ai/api/organizations/{organization_id}/chat_conversations`) 121 | - Use LLMs to generate more readable class names, example request payloads, and other tasks that require fuzzy reasoning 122 | - Include a linter/formatter to make generated SDK more readable 123 | 124 | ### Acknowledgements 125 | Web2sdk's includes a modified version of [mitmproxy2swagger](https://github.com/alufers/mitmproxy2swagger). 126 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "web2sdk" 3 | version = "0.0.2" 4 | description = "Reverse engineer third party APIs into python SDKs" 5 | authors = ["jasonwcfan "] 6 | readme = "README.md" 7 | packages = [ 8 | { include = "web2sdk" } 9 | ] 10 | 11 | [tool.poetry.dependencies] 12 | python = "^3.10" 13 | mitmproxy = "^10.1.1" 14 | "ruamel.yaml" = ">=0.17.32,<0.19.0" 15 | json-stream = "^2.3.2" 16 | msgpack = "^1.0.7" 17 | astor = "^0.8.1" 18 | pydantic = "^2.8.2" 19 | pyyaml = "^6.0.2" 20 | requests = "^2.32.3" 21 | python-dotenv = "^1.0.1" 22 | 23 | [tool.poetry.scripts] 24 | web2sdk = "web2sdk.main:main" 25 | 26 | 27 | [build-system] 28 | requires = ["poetry-core"] 29 | build-backend = "poetry.core.masonry.api" 30 | -------------------------------------------------------------------------------- /web2sdk/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jasonwcfan/web2sdk/10e0633ed25056304addef8e43cfd5668f121050/web2sdk/__init__.py -------------------------------------------------------------------------------- /web2sdk/console_util.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import sys 3 | 4 | ANSI_RGB = "\033[38;2;{};{};{}m" 5 | ANSI_RGB_BG = "\033[48;2;{};{};{}m" 6 | ANSI_RED = "\033[31m" 7 | ANSI_RESET = "\033[0m" 8 | 9 | RAINBOW_COLORS = [ 10 | (255, 0, 0), 11 | (255, 127, 0), 12 | (255, 255, 0), 13 | (127, 255, 0), 14 | (0, 255, 0), 15 | (0, 255, 127), 16 | (0, 255, 255), 17 | (0, 127, 255), 18 | (0, 0, 255), 19 | (127, 0, 255), 20 | (255, 0, 255), 21 | (255, 0, 127), 22 | ] 23 | 24 | 25 | def rgb_interpolate(start, end, progress): 26 | return tuple(int(start[i] + (end[i] - start[i]) * progress) for i in range(3)) 27 | 28 | 29 | # take a value from 0 to 1 and return an interpolated color from the rainbow 30 | def rainbow_at_position(progress): 31 | idx_a = int(progress * float(len(RAINBOW_COLORS) - 1)) 32 | idx_b = idx_a + 1 33 | return rgb_interpolate( 34 | RAINBOW_COLORS[idx_a], 35 | RAINBOW_COLORS[idx_b], 36 | progress * float(len(RAINBOW_COLORS) - 1) - idx_a, 37 | ) 38 | 39 | 40 | def print_progress_bar(progress=0.0, label=""): 41 | sys.stdout.write("\r") 42 | progress_bar_contents = "" 43 | PROGRESS_LENGTH = 30 44 | blocks = ["▉", "▊", "▋", "▌", "▍", "▎", "▏"] 45 | 46 | for i in range(PROGRESS_LENGTH): 47 | interpolated = rainbow_at_position(i / PROGRESS_LENGTH) 48 | # check if should print a full block 49 | if i < int(progress * PROGRESS_LENGTH): 50 | interpolated_2nd_half = rainbow_at_position((i + 0.5) / PROGRESS_LENGTH) 51 | progress_bar_contents += ANSI_RGB.format(*interpolated) 52 | progress_bar_contents += ANSI_RGB_BG.format(*interpolated_2nd_half) 53 | progress_bar_contents += "▌" 54 | # check if should print a non-full block 55 | elif i < int((progress * PROGRESS_LENGTH) + 0.5): 56 | progress_bar_contents += ANSI_RESET 57 | progress_bar_contents += ANSI_RGB.format(*interpolated) 58 | progress_bar_contents += blocks[ 59 | int((progress * PROGRESS_LENGTH) + 0.5) - i - 1 60 | ] 61 | # otherwise, print a space 62 | else: 63 | progress_bar_contents += ANSI_RESET 64 | progress_bar_contents += " " 65 | 66 | progress_bar_contents += ANSI_RESET 67 | sys.stdout.write("{} [{}] {:.1f}%".format(label, progress_bar_contents, progress * 100)) 68 | sys.stdout.flush() -------------------------------------------------------------------------------- /web2sdk/main.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import sys 4 | from typing import Any, Optional, Sequence, Union 5 | from web2sdk.web2swagger.main import main as web2swagger_main 6 | from web2sdk.swagger2sdk.main import construct_sdk 7 | from web2sdk import console_util 8 | 9 | def progress_callback(progress): 10 | console_util.print_progress_bar(progress, "Generating SDK... ") 11 | 12 | def main(): 13 | parser = argparse.ArgumentParser( 14 | description="Converts a mitmproxy dump file or HAR to a swagger schema." 15 | ) 16 | parser.add_argument( 17 | "-i", 18 | "--interactive", 19 | help="Run in interactive mode", 20 | action="store_true", 21 | required=False, 22 | ) 23 | 24 | parser.add_argument( 25 | "-r", 26 | "--requests-path", 27 | help="Path to a mitmproxy dump file or HAR", 28 | required=False, 29 | ) 30 | 31 | parser.add_argument( 32 | "-b", 33 | "--base-url", 34 | help="Base url for the API to reverse engineer", 35 | required=False, 36 | ) 37 | 38 | parser.add_argument( 39 | "-a", 40 | "--auth-type", 41 | help="Auth type to determine how the SDK should handle auth. Possible values: basic, bearer, none.", 42 | default="none", 43 | required=False, 44 | ) 45 | 46 | parser.add_argument( 47 | "-s", 48 | "--sdk-name", 49 | help="Name for the SDK class. Will also be used as the filename for the OpenAPI schema.", 50 | required=False, 51 | ) 52 | 53 | parser.add_argument( 54 | "-o", 55 | "--output", 56 | help="Path to the directory where generated files should be saved", 57 | default="generated", 58 | required=False, 59 | ) 60 | 61 | args = parser.parse_args() 62 | output_path = args.output.rstrip("/") 63 | 64 | if not args.interactive: 65 | if not args.requests_path or not args.sdk_name or not args.base_url: 66 | parser.error("--requests-path, --sdk-name, and --base-url are required when not running in --interactive mode.") 67 | if args.auth_type and args.auth_type not in ["basic", "bearer", "none"]: 68 | parser.error("--auth-type must be one of 'basic', 'bearer', or 'none.") 69 | 70 | openapi_path = f"{output_path}/{args.sdk_name}.yaml" 71 | sdk_path = f"{output_path}/{args.sdk_name}.py" 72 | os.makedirs(output_path, exist_ok=True) 73 | 74 | print("\n") 75 | web2swagger_main(args.sdk_name, ["--input", args.requests_path, "--output", openapi_path, "--api-prefix", args.base_url]) 76 | print("OpenAPI schema generated successfully at: ", openapi_path) 77 | print("\n") 78 | construct_sdk(openapi_path, args.sdk_name, output_path, auth_type=args.auth_type, progress_callback=progress_callback) 79 | print(" Done!") 80 | sys.stdout.write(f"SDK generated successfully at: {sdk_path}") 81 | else: 82 | while True: 83 | requests_path = input("Enter the path to the mitmproxy dump file or HAR: ") 84 | base_url = input("Enter the base URL for the API (e.g. https://api.finic.ai/v1): ") 85 | sdk_name = input("Enter a name for the generated SDK (e.g. FinicAPI): ") 86 | 87 | use_auth = input("Does this API require authentication? (y/n): ") 88 | while use_auth.lower() not in ["y", "n", "yes", "no"]: 89 | print("Invalid input. Please enter 'y' or 'n'.") 90 | use_auth = input("Does this API require authentication? (y/n): ") 91 | 92 | 93 | if use_auth.lower() in ["y", "yes", ""]: 94 | auth_type = input("What type of authentication does this API use? (basic/bearer): ") 95 | while auth_type not in ["basic", "bearer"]: 96 | print("Invalid auth type. Please enter 'basic' or 'bearer'.") 97 | auth_type = input("What type of authentication does this API use? (basic/bearer): ") 98 | elif use_auth.lower() in ["n", "no"]: 99 | auth_type = "none" 100 | 101 | openapi_path = f"generated/{sdk_name}.yaml" 102 | os.makedirs("generated", exist_ok=True) 103 | web2swagger_main(sdk_name, ["--input", requests_path, "--output", openapi_path, "--api-prefix", base_url]) 104 | print("OpenAPI schema generated successfully at: ", openapi_path) 105 | print("\n") 106 | construct_sdk(openapi_path, args.sdk_name, output_path, auth_type=args.auth_type, progress_callback=progress_callback) 107 | print(" Done!") 108 | sys.stdout.write(f"SDK generated successfully at: {sdk_path}") 109 | 110 | if __name__ == "__main__": 111 | main() 112 | -------------------------------------------------------------------------------- /web2sdk/swagger2sdk/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jasonwcfan/web2sdk/10e0633ed25056304addef8e43cfd5668f121050/web2sdk/swagger2sdk/__init__.py -------------------------------------------------------------------------------- /web2sdk/swagger2sdk/generate_function.py: -------------------------------------------------------------------------------- 1 | import ast 2 | from web2sdk.swagger2sdk.utils import AuthType, check_content_type, dash_to_snake 3 | from typing import Tuple, List 4 | from urllib.parse import urlparse 5 | 6 | def content_type_to_ast_node(content_type: str, return_type: str) -> ast.Call: 7 | if check_content_type(content_type, ['application/json', 'application/x-www-form-urlencoded']): 8 | # json.loads(data) 9 | result_node = ast.Call( 10 | func=ast.Attribute( 11 | value=ast.Name(id='json', ctx=ast.Load()), 12 | attr='loads', 13 | ctx=ast.Load() 14 | ), 15 | args=[ast.Name(id='data', ctx=ast.Load())], 16 | keywords=[] 17 | ) 18 | else: 19 | result_node = ast.Name(id='data', ctx=ast.Load()) 20 | return result_node 21 | 22 | # Fallback in case a class could not be created for a particular endpoint. Return a primitive type instead. 23 | def get_return_type(content_type: str) -> ast.Name: 24 | if check_content_type(content_type, ['application/json']): 25 | return 'dict' 26 | elif check_content_type(content_type, ['text/html', 'text/plain']): 27 | return 'str' 28 | else: 29 | return 'Any' 30 | 31 | def generate_function_for_endpoint(endpoint: dict, base_url: str, auth_type: AuthType, types: Tuple[ast.ClassDef]) -> ast.FunctionDef: 32 | # Extract endpoint details 33 | request_path: str = endpoint['path'] 34 | request_name: str = endpoint['name'] 35 | request_parameters: dict = endpoint['parameters'] 36 | request_method: str = endpoint['method'] 37 | request_schema: dict = endpoint['request_body'] 38 | request_content_type: str = next(iter(request_schema['content'].keys()), None) if request_schema else None 39 | response_content: dict = endpoint['responses'].get('200', {}).get('content', {}) 40 | response_content_type: str = next(iter(response_content.keys()), "") 41 | 42 | request_parameters_class_name = types[0].name if types[0] else 'dict' 43 | request_body_class_name = types[1].name if types[1] else 'dict' 44 | response_class_name = types[2].name if types[2] else get_return_type(response_content_type) 45 | 46 | # Construct the function arguments 47 | args = ast.arguments( 48 | args=[arg for arg in [ 49 | ast.arg(arg='self', annotation=None), 50 | ast.arg(arg='request_parameters', annotation=ast.Name(id=request_parameters_class_name, ctx=ast.Load())) if request_parameters else None, 51 | ast.arg(arg='request_body', annotation=ast.Name(id=request_body_class_name, ctx=ast.Load())) if request_schema else None 52 | ] if arg is not None], 53 | vararg=None, 54 | kwonlyargs=[ast.arg(arg='override_headers', annotation=ast.Name(id='dict', ctx=ast.Load()))], 55 | kw_defaults=[ast.Dict(keys=[], values=[])], 56 | kwarg=None, 57 | defaults=[] 58 | ) 59 | 60 | # Return annotation 61 | return_annotation = ast.Name(id=response_class_name, ctx=ast.Load()) 62 | 63 | # Set up http.client connection 64 | # conn = http.client.HTTPSConnection(self.hostname) 65 | http_conn_assign = ast.Assign( 66 | targets=[ast.Name(id='conn', ctx=ast.Store())], 67 | value=ast.Call( 68 | func=ast.Attribute( 69 | value=ast.Name(id='http.client', ctx=ast.Load()), 70 | attr='HTTPSConnection', 71 | ctx=ast.Load() 72 | ), 73 | args=[ast.Attribute( 74 | value=ast.Name(id='self', ctx=ast.Load()), 75 | attr='hostname', 76 | ctx=ast.Load() 77 | )], 78 | keywords=[] 79 | ) 80 | ) 81 | 82 | # Prepare the payload, depending on the request content type 83 | if request_schema: 84 | if check_content_type(request_content_type, ['application/json', 'application/x-www-form-urlencoded']): 85 | payload_assign = ast.Assign( 86 | targets=[ast.Name(id='payload', ctx=ast.Store())], 87 | value=ast.Call( 88 | func=ast.Attribute( 89 | value=ast.Name(id='json', ctx=ast.Load()), 90 | attr='dumps', 91 | ctx=ast.Load() 92 | ), 93 | args=[ast.Name(id='request_body', ctx=ast.Load())], 94 | keywords=[] 95 | ) 96 | ) 97 | else: 98 | payload_assign = ast.Assign( 99 | targets=[ast.Name(id='payload', ctx=ast.Store())], 100 | value=ast.Name(id='request_body', ctx=ast.Load()) 101 | ) 102 | 103 | # Prepare headers 104 | header_keys = [ast.Constant(value='User-Agent')] 105 | header_values = [ast.Constant(value='Web2sdk/1.0')] 106 | if auth_type == AuthType.BASIC.value: 107 | header_keys.append(ast.Constant(value='Authorization')) 108 | header_values.append( 109 | ast.BinOp( 110 | left=ast.Constant(value='Basic '), 111 | op=ast.Add(), 112 | right=ast.Call( 113 | func=ast.Name(id='base64.b64encode', ctx=ast.Load()), 114 | args=[ast.BinOp( 115 | left=ast.BinOp( 116 | left=ast.Name(id='self.username', ctx=ast.Load()), 117 | op=ast.Add(), 118 | right=ast.Constant(value=':') 119 | ), 120 | op=ast.Add(), 121 | right=ast.Name(id='self.password', ctx=ast.Load()) 122 | )], 123 | keywords=[] 124 | ) 125 | ) 126 | ) 127 | elif auth_type == AuthType.BEARER.value: 128 | header_keys.append(ast.Constant(value='Authorization')) 129 | header_values.append(ast.BinOp( 130 | left=ast.Constant(value='Bearer '), 131 | op=ast.Add(), 132 | right=ast.Name(id='self.token', ctx=ast.Load()) 133 | )) 134 | 135 | headers_assign = ast.Assign( 136 | targets=[ast.Name(id='headers', ctx=ast.Store())], 137 | value=ast.Dict( 138 | keys=header_keys, 139 | values=header_values 140 | ) 141 | ) 142 | 143 | # Update headers with override headers 144 | headers_update = ast.Expr( 145 | value=ast.Call( 146 | func=ast.Attribute( 147 | value=ast.Name(id='headers', ctx=ast.Load()), 148 | attr='update', 149 | ctx=ast.Load() 150 | ), 151 | args=[ast.Name(id='override_headers', ctx=ast.Load())], 152 | keywords=[] 153 | ) 154 | ) 155 | 156 | # Prepare the request params 157 | if request_parameters: 158 | parameter_assign = ast.Assign( 159 | targets=[ast.Name(id='params', ctx=ast.Store())], 160 | value=ast.Call( 161 | func=ast.Attribute( 162 | value=ast.Str(s="&"), 163 | attr="join", 164 | ctx=ast.Load() 165 | ), 166 | args=[ast.ListComp( 167 | elt=ast.BinOp( 168 | left=ast.BinOp( 169 | left=ast.Name(id='k', ctx=ast.Load()), 170 | op=ast.Add(), 171 | right=ast.Constant(value="=") 172 | ), 173 | op=ast.Add(), 174 | right=ast.Name(id='v', ctx=ast.Load()) 175 | ), 176 | generators=[ 177 | ast.comprehension( 178 | target=ast.Tuple(elts=[ 179 | ast.Name(id='k', ctx=ast.Store()), 180 | ast.Name(id='v', ctx=ast.Store())], ctx=ast.Store()), 181 | iter=ast.Call( 182 | func=ast.Attribute( 183 | value=ast.Name(id='request_parameters', ctx=ast.Load()), 184 | attr='items', 185 | ctx=ast.Load() 186 | ), 187 | args=[], keywords=[] 188 | ), 189 | ifs=[], is_async=0 190 | ) 191 | ] 192 | )], 193 | keywords=[] 194 | ) 195 | ) 196 | else: 197 | parameter_assign = ast.Assign( 198 | targets=[ast.Name(id='params', ctx=ast.Store())], 199 | value=ast.Constant(value="") 200 | ) 201 | 202 | # Call the connection request 203 | # conn.request("GET", "/backend-api/conversations" + "?" + params, body=payload, headers=headers) 204 | full_url = urlparse(base_url + request_path) 205 | http_path = full_url.path + "?" if request_parameters else full_url.path 206 | conn_request = ast.Expr( 207 | value=ast.Call( 208 | func=ast.Attribute( 209 | value=ast.Name(id='conn', ctx=ast.Load()), 210 | attr='request', 211 | ctx=ast.Load() 212 | ), 213 | args=[ 214 | ast.Constant(value=request_method.upper()), 215 | ast.BinOp( 216 | left=ast.BinOp( 217 | left=ast.Constant(value=http_path), 218 | op=ast.Add(), 219 | right=ast.Name(id='params', ctx=ast.Load()) 220 | ), 221 | op=ast.Add(), 222 | right=ast.Constant(value="") 223 | ) 224 | ], 225 | keywords=[kw for kw in [ 226 | ast.keyword(arg='body', value=ast.Name(id='payload', ctx=ast.Load())) if request_schema else None, 227 | ast.keyword(arg='headers', value=ast.Name(id='headers', ctx=ast.Load())) 228 | ] if kw is not None] 229 | ) 230 | ) 231 | 232 | # Get the response 233 | # res = conn.getresponse() 234 | response_assign = ast.Assign( 235 | targets=[ast.Name(id='res', ctx=ast.Store())], 236 | value=ast.Call( 237 | func=ast.Attribute( 238 | value=ast.Name(id='conn', ctx=ast.Load()), 239 | attr='getresponse', 240 | ctx=ast.Load() 241 | ), 242 | args=[], 243 | keywords=[] 244 | ) 245 | ) 246 | 247 | # Read the response 248 | # data = res.read().decode("utf-8") 249 | data_assign = ast.Assign( 250 | targets=[ast.Name(id='data', ctx=ast.Store())], 251 | value=ast.Call( 252 | func=ast.Attribute( 253 | value=ast.Call( 254 | func=ast.Attribute( 255 | value=ast.Name(id='res', ctx=ast.Load()), 256 | attr='read', 257 | ctx=ast.Load() 258 | ), 259 | args=[], 260 | keywords=[] 261 | ), 262 | attr='decode', 263 | ctx=ast.Load() 264 | ), 265 | args=[ast.Str(s='utf-8')], 266 | keywords=[] 267 | ) 268 | ) 269 | 270 | # Return the decoded data 271 | return_stmt = ast.Return( 272 | value=content_type_to_ast_node(response_content_type, response_class_name) 273 | ) 274 | 275 | # Construct function body with the URL and response assignments 276 | function_body = [ 277 | http_conn_assign, 278 | parameter_assign, 279 | headers_assign, 280 | headers_update, 281 | payload_assign if request_schema else None, 282 | conn_request, 283 | response_assign, 284 | data_assign, 285 | return_stmt 286 | ] 287 | 288 | # Remove any None values from the function body 289 | function_body = [stmt for stmt in function_body if stmt is not None] 290 | 291 | # Create the function definition 292 | function_def = ast.FunctionDef( 293 | name=dash_to_snake(request_name), 294 | args=args, 295 | body=function_body, 296 | decorator_list=[], 297 | returns=return_annotation 298 | ) 299 | 300 | return function_def -------------------------------------------------------------------------------- /web2sdk/swagger2sdk/generate_types.py: -------------------------------------------------------------------------------- 1 | import ast 2 | from pydantic import BaseModel, ConfigDict, Field, ValidationError 3 | from abc import ABC, abstractmethod 4 | from typing import List, Optional, Dict, Any, Tuple, Type, Union 5 | from enum import Enum 6 | from web2sdk.swagger2sdk.utils import YAMLToPydanticType, check_content_type, snake_to_pascal, dash_to_snake, strip_special_chars 7 | 8 | class ClassField(BaseModel): 9 | field_name: str 10 | field_type: str 11 | required: bool 12 | 13 | def path_to_class_name(path: str) -> str: 14 | """ 15 | Converts a URL path to a class name. 16 | """ 17 | return strip_special_chars(snake_to_pascal(dash_to_snake(path))) 18 | 19 | 20 | def generate_class_def(class_name: str, fields: List[ClassField]) -> ast.ClassDef: 21 | """ 22 | Generates a Pydantic class definition with the given class name and fields. 23 | """ 24 | # Create the class definition 25 | class_def = ast.ClassDef( 26 | name=class_name, 27 | bases=[ast.Name(id='BaseModel', ctx=ast.Load())], # Inherit from Pydantic's BaseModel 28 | body=[], 29 | decorator_list=[] 30 | ) 31 | 32 | # Add each field as a class attribute 33 | for field in fields: 34 | field_name, field_type, required = strip_special_chars(dash_to_snake(field.field_name)), field.field_type, field.required 35 | 36 | # If the field is not required, wrap the type in Optional 37 | if not required: 38 | field_annotation = ast.Subscript( 39 | value=ast.Name(id='Optional', ctx=ast.Load()), 40 | slice=ast.Index(value=ast.Name(id=field_type, ctx=ast.Load())), 41 | ctx=ast.Load() 42 | ) 43 | else: 44 | field_annotation = ast.Name(id=field_type, ctx=ast.Load()) 45 | 46 | # Add the field to the class body. If the field is not required, assign a default value of None. 47 | field_node = ast.AnnAssign( 48 | target=ast.Name(id=field_name, ctx=ast.Store()), 49 | annotation=field_annotation, 50 | value=None if required else ast.Constant(value=None), 51 | simple=True 52 | ) 53 | 54 | class_def.body.append(field_node) 55 | 56 | return class_def 57 | 58 | def parse_request_body(request_body: dict) -> List[ClassField]: 59 | if not request_body: 60 | return [] 61 | fields = [] 62 | content: dict = request_body['content'] 63 | required: bool = request_body.get('required', False) 64 | 65 | for content_type, schema in content.items(): 66 | if check_content_type(content_type, ['application/json', 'application/x-www-form-urlencoded']): 67 | schema = schema['schema'] 68 | schema_type = YAMLToPydanticType[schema.get('type')] 69 | if schema.get('type') == 'object': 70 | required_properties: List[str] = schema.get('required', []) 71 | properties: dict = schema.get('properties', {}) 72 | for name, prop in properties.items(): 73 | field_type: str = YAMLToPydanticType[prop['type']] 74 | field_required: bool = name in required_properties 75 | fields.append(ClassField(field_name=name, field_type=field_type, required=field_required)) 76 | else: 77 | fields.append(ClassField(field_name='data', field_type=schema_type, required=required)) 78 | return fields 79 | 80 | def parse_response_body(response_body: dict) -> List[ClassField]: 81 | if not response_body: 82 | return [] 83 | fields = [] 84 | content: dict = response_body['content'] 85 | for content_type, schema in content.items(): 86 | if content_type == 'application/json': 87 | schema = schema['schema'] 88 | schema_type = schema.get('type') 89 | if schema_type == 'object': 90 | required_properties: List[str] = schema.get('required', []) 91 | properties: dict = schema.get('properties', {}) 92 | for name, prop in properties.items(): 93 | field_type: str = YAMLToPydanticType[prop['type']] 94 | is_required: bool = name in required_properties 95 | fields.append(ClassField(field_name=name, field_type=field_type, required=is_required)) 96 | elif schema_type == 'array': 97 | item_type = schema['items'].get('type', 'unknown') 98 | fields.append(ClassField(field_name='data', field_type=f'List[{YAMLToPydanticType[item_type]}]', required=True)) 99 | else: 100 | fields.append(ClassField(field_name='data', field_type=YAMLToPydanticType[schema_type], required=True)) 101 | return fields 102 | 103 | def generate_types(endpoint: dict) -> Tuple[ast.ClassDef]: 104 | # Extract endpoint details 105 | request_path: str = endpoint['path'] 106 | request_name: str = endpoint['name'] 107 | request_method: str = endpoint['method'] 108 | request_parameters: dict = endpoint['parameters'] 109 | request_body: dict = endpoint['request_body'] 110 | responses: dict = endpoint['responses'] 111 | 112 | 113 | # Generate Pydantic class for request parameters 114 | request_parameters_class = None 115 | if request_parameters: 116 | request_parameters_fields = [] 117 | for param in request_parameters: 118 | field_type = YAMLToPydanticType[param['schema']['type']] 119 | field_name = param['name'] 120 | required = param.get('required', False) 121 | request_parameters_fields.append(ClassField(field_name=field_name, field_type=field_type, required=required)) 122 | if len(request_parameters_fields) > 0: 123 | request_parameters_class = generate_class_def(f'{path_to_class_name(request_name)}RequestParameters', request_parameters_fields) 124 | 125 | # Generate Pydantic class for request body 126 | request_body_class = None 127 | if request_body: 128 | request_body_fields = parse_request_body(request_body) 129 | if len(request_body_fields) > 0: 130 | request_body_class = generate_class_def(f'{path_to_class_name(request_name)}RequestBody', request_body_fields) 131 | 132 | # Generate Pydantic classes for responses 133 | successful_response = responses.get('200') 134 | response_class = None 135 | if successful_response: 136 | response_fields = parse_response_body(successful_response) 137 | if len(response_fields) > 0: 138 | response_class = generate_class_def(f'{path_to_class_name(request_name)}Response', response_fields) 139 | 140 | 141 | return (request_parameters_class, request_body_class, response_class) -------------------------------------------------------------------------------- /web2sdk/swagger2sdk/main.py: -------------------------------------------------------------------------------- 1 | import pdb 2 | import ast 3 | import astor 4 | import yaml 5 | from pydantic import BaseModel, ConfigDict, Field, ValidationError 6 | from abc import ABC, abstractmethod 7 | from typing import List, Optional, Dict, Any, Tuple, Type, Union, Callable 8 | from enum import Enum 9 | from web2sdk.swagger2sdk.generate_function import generate_function_for_endpoint 10 | from web2sdk.swagger2sdk.generate_types import generate_types, generate_class_def, ClassField 11 | from web2sdk.swagger2sdk.utils import AuthType, HTTPMethod 12 | 13 | swagger_path = '/Users/jasonfan/Documents/code/web2sdk/web2sdk/specs.yml' 14 | 15 | def load_yaml(file_path): 16 | with open(file_path, 'r') as file: 17 | return yaml.safe_load(file) 18 | 19 | 20 | def generate_sdk_class(sdk_name: str, auth_type: AuthType) -> ast.ClassDef: 21 | # SDK should accept different arguments depending on the auth type 22 | auth_arguments = [] 23 | fields = [ 24 | ClassField(field_name='hostname', field_type='str', required=True) 25 | ] 26 | if auth_type == AuthType.BASIC.value: 27 | fields.extend([ClassField(field_name='username', field_type='str', required=True), ClassField(field_name='password', field_type='str', required=True)]) 28 | elif auth_type == AuthType.BEARER.value: 29 | fields.extend([ClassField(field_name='token', field_type='str', required=True)]) 30 | 31 | class_def = generate_class_def(sdk_name, fields) 32 | 33 | return class_def 34 | 35 | def save_class_to_file(module: ast.Module, file_path: str) -> None: 36 | code = astor.to_source(module) 37 | with open(file_path, 'w') as file: 38 | file.write(code) 39 | 40 | def generate_imports() -> List[ast.Import]: 41 | imports = [ 42 | ast.Import(names=[ast.alias(name='json', asname=None)]), 43 | ast.Import(names=[ast.alias(name='http.client', asname=None)]), 44 | ast.ImportFrom(module='urllib.parse', names=[ast.alias(name='urlparse', asname=None)], level=0), 45 | ast.ImportFrom(module='pydantic', names=[ast.alias(name='BaseModel', asname=None)], level=0), 46 | ast.ImportFrom(module='typing', names=[ 47 | ast.alias(name='Optional', asname=None), 48 | ast.alias(name='Dict', asname=None), 49 | ast.alias(name='List', asname=None), 50 | ast.alias(name='Any', asname=None)], level=0), 51 | ] 52 | return imports 53 | 54 | def construct_sdk(swagger_path: str, 55 | sdk_name: str, 56 | output_path: str, 57 | base_url: str = None, 58 | auth_type: AuthType = AuthType.NONE, 59 | progress_callback: Callable[[float], None] = None) -> None: 60 | swagger = load_yaml(swagger_path) 61 | base_url = swagger.get('servers', [{}])[0].get('url') if not base_url else base_url 62 | if not base_url: 63 | raise ValueError('Base URL is required, but was not provided in the OpenAPI spec or as an argument.') 64 | 65 | paths = swagger.get('paths', {}) 66 | imports = generate_imports() 67 | class_def = generate_sdk_class(sdk_name, auth_type) 68 | types: List[ast.ClassDef] = [] 69 | 70 | # Iterate through each path and method. Generate functions to call each endpoint, and types to validate request/response bodies 71 | for index, (path, methods) in enumerate(paths.items()): 72 | for method, details in methods.items(): 73 | endpoint = { 74 | 'path': path, 75 | 'method': method, 76 | 'name': f"{method.lower()}{path.replace('/', '_').replace('{', '').replace('}', '')}", 77 | 'parameters': details.get('parameters', None), 78 | 'request_body': details.get('requestBody', None), 79 | 'responses': details.get('responses', None) 80 | } 81 | _types = generate_types(endpoint) 82 | _function = generate_function_for_endpoint(endpoint, base_url, auth_type, _types) 83 | class_def.body.append(_function) 84 | types.extend([t for t in _types if t is not None]) 85 | if progress_callback: 86 | progress_callback(float(index+1) / len(paths)) 87 | 88 | # Combine the imports, the SDK class, and generated types into a single module 89 | body = imports + types + [class_def] 90 | class_module = ast.Module(body=body, type_ignores=[]) 91 | save_class_to_file(class_module, f'{output_path}/{sdk_name}.py') -------------------------------------------------------------------------------- /web2sdk/swagger2sdk/utils.py: -------------------------------------------------------------------------------- 1 | import re 2 | from pydantic import BaseModel, ConfigDict, Field, ValidationError 3 | from abc import ABC, abstractmethod 4 | from typing import List, Optional, Dict, Any, Tuple, Type, Union 5 | from enum import Enum 6 | 7 | class AuthType(Enum): 8 | BASIC = 'basic' 9 | BEARER = 'bearer' 10 | NONE = 'none' 11 | 12 | class HTTPMethod(Enum): 13 | GET = 'GET' 14 | POST = 'POST' 15 | PUT = 'PUT' 16 | PATCH = 'PATCH' 17 | DELETE = 'DELETE' 18 | 19 | YAMLToPydanticType = { 20 | 'string': 'str', 21 | 'number': 'float', 22 | 'integer': 'int', 23 | 'boolean': 'bool', 24 | 'array': 'List', 25 | 'object': 'Dict', 26 | 'unknown': 'Any' 27 | } 28 | 29 | def check_content_type(input_string: str, patterns: List[str]): 30 | regex_pattern = "|".join(re.escape(pattern) for pattern in patterns) 31 | if re.search(regex_pattern, input_string): 32 | return True 33 | return False 34 | 35 | def snake_to_pascal(snake_str: str) -> str: 36 | components = snake_str.split('_') 37 | return ''.join(x.capitalize() for x in components) 38 | 39 | def dash_to_snake(dash_str: str) -> str: 40 | return dash_str.replace('-', '_') 41 | 42 | def strip_special_chars(input_string: str) -> str: 43 | return re.sub(r'[^\w_]+', '', input_string) -------------------------------------------------------------------------------- /web2sdk/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jasonwcfan/web2sdk/10e0633ed25056304addef8e43cfd5668f121050/web2sdk/tests/__init__.py -------------------------------------------------------------------------------- /web2sdk/web2swagger/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jasonwcfan/web2sdk/10e0633ed25056304addef8e43cfd5668f121050/web2sdk/web2swagger/.DS_Store -------------------------------------------------------------------------------- /web2sdk/web2swagger/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jasonwcfan/web2sdk/10e0633ed25056304addef8e43cfd5668f121050/web2sdk/web2swagger/__init__.py -------------------------------------------------------------------------------- /web2sdk/web2swagger/har_capture_reader.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import os 3 | from base64 import b64decode 4 | from typing import Iterator, Union 5 | 6 | import json_stream 7 | 8 | 9 | # a heuristic to determine if a file is a har archive 10 | def har_archive_heuristic(file_path: str) -> int: 11 | val = 0 12 | # if has the har extension 13 | if file_path.endswith(".har"): 14 | val += 25 15 | # read the first 2048 bytes 16 | with open(file_path, "rb") as f: 17 | data = f.read(2048) 18 | # if file contains only ascii characters after remove EOL characters 19 | if ( 20 | data.decode("utf-8", "ignore") 21 | .replace("\r", "") 22 | .replace("\n", "") 23 | .isprintable() 24 | is True 25 | ): 26 | val += 25 27 | # sign of a JSON file 28 | if data[0:1] == b"{": 29 | val += 23 30 | # sign of Chrome OR Firefox export 31 | if b'"WebInspector"' in data or b'"Firefox"' in data: 32 | val += 15 33 | if b'"entries"' in data: 34 | val += 15 35 | if b'"version"' in data: 36 | val += 15 37 | return val 38 | 39 | 40 | class HarFlowWrapper: 41 | def __init__(self, flow: dict): 42 | self.flow = flow 43 | 44 | def get_url(self): 45 | return self.flow["request"]["url"] 46 | 47 | def get_matching_url(self, prefix) -> Union[str, None]: 48 | """Get the requests URL if the prefix matches the URL, None otherwise.""" 49 | if self.flow["request"]["url"].startswith(prefix): 50 | return self.flow["request"]["url"] 51 | return None 52 | 53 | def get_method(self): 54 | return self.flow["request"]["method"] 55 | 56 | def get_request_headers(self): 57 | headers = {} 58 | for kv in self.flow["request"]["headers"]: 59 | k = kv["name"] 60 | v = kv["value"] 61 | # create list on key if it does not exist 62 | headers[k] = headers.get(k, []) 63 | headers[k].append(v) 64 | 65 | def get_request_body(self): 66 | if ( 67 | "request" in self.flow 68 | and "postData" in self.flow["request"] 69 | and "text" in self.flow["request"]["postData"] 70 | ): 71 | return self.flow["request"]["postData"]["text"] 72 | return None 73 | 74 | def get_response_status_code(self): 75 | return self.flow["response"]["status"] 76 | 77 | def get_response_reason(self): 78 | return self.flow["response"]["statusText"] 79 | 80 | def get_response_headers(self): 81 | headers = {} 82 | for kv in self.flow["response"]["headers"]: 83 | k = kv["name"] 84 | v = kv["value"] 85 | # create list on key if it does not exist 86 | headers[k] = headers.get(k, []) 87 | headers[k].append(v) 88 | return headers 89 | 90 | def get_response_body(self): 91 | if ( 92 | "response" in self.flow 93 | and "content" in self.flow["response"] 94 | and "text" in self.flow["response"]["content"] 95 | ): 96 | try: 97 | if ( 98 | "encoding" in self.flow["response"]["content"] 99 | and self.flow["response"]["content"]["encoding"] == "base64" 100 | ): 101 | return b64decode(self.flow["response"]["content"]["text"]).decode() 102 | except UnicodeDecodeError: 103 | return None 104 | return self.flow["response"]["content"]["text"] 105 | return None 106 | 107 | 108 | class HarCaptureReader: 109 | def __init__(self, file_path: str, progress_callback=None): 110 | self.file_path = file_path 111 | self.progress_callback = progress_callback 112 | 113 | def captured_requests(self) -> Iterator[HarFlowWrapper]: 114 | har_file_size = os.path.getsize(self.file_path) 115 | with open(self.file_path, "r", encoding="utf-8") as f: 116 | data = json_stream.load(f) 117 | for entry in data["log"]["entries"].persistent(): 118 | if self.progress_callback: 119 | self.progress_callback(f.tell() / har_file_size) 120 | yield HarFlowWrapper(entry) 121 | 122 | def name(self): 123 | return "har" 124 | -------------------------------------------------------------------------------- /web2sdk/web2swagger/main.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """Converts a mitmproxy dump file to a swagger schema.""" 4 | import argparse 5 | import json 6 | import os 7 | import re 8 | import sys 9 | import traceback 10 | import urllib 11 | from typing import Any, Optional, Sequence, Union 12 | 13 | import msgpack 14 | import ruamel.yaml 15 | from mitmproxy.exceptions import FlowReadException 16 | 17 | from web2sdk import console_util 18 | from web2sdk.web2swagger import swagger_util 19 | from web2sdk.web2swagger.har_capture_reader import HarCaptureReader, har_archive_heuristic 20 | from web2sdk.web2swagger.mitmproxy_capture_reader import ( 21 | MitmproxyCaptureReader, 22 | mitmproxy_dump_file_huristic, 23 | ) 24 | 25 | 26 | def path_to_regex(path): 27 | # replace the path template with a regex 28 | path = re.escape(path) 29 | path = path.replace(r"\{", "(?P<") 30 | path = path.replace(r"\}", ">[^/]+)") 31 | path = path.replace(r"\*", ".*") 32 | return "^" + path + "$" 33 | 34 | 35 | def strip_query_string(path): 36 | # remove the query string from the path 37 | return path.split("?")[0] 38 | 39 | 40 | def set_key_if_not_exists(dict, key, value): 41 | if key not in dict: 42 | dict[key] = value 43 | 44 | 45 | def progress_callback(progress): 46 | console_util.print_progress_bar(progress, "Generating OpenAPI Schema...") 47 | 48 | 49 | def detect_input_format(file_path): 50 | har_score = har_archive_heuristic(file_path) 51 | mitmproxy_score = mitmproxy_dump_file_huristic(file_path) 52 | if "MITMPROXY2SWAGGER_DEBUG" in os.environ: 53 | print("har score: " + str(har_score)) 54 | print("mitmproxy score: " + str(mitmproxy_score)) 55 | if har_score > mitmproxy_score: 56 | return HarCaptureReader(file_path, progress_callback) 57 | return MitmproxyCaptureReader(file_path, progress_callback) 58 | 59 | def main(sdk_name: str, override_args: Optional[Sequence[str]] = None): 60 | parser = argparse.ArgumentParser( 61 | description="Converts a mitmproxy dump file or HAR to a swagger schema." 62 | ) 63 | parser.add_argument( 64 | "-i", 65 | "--input", 66 | help="The input mitmproxy dump file or HAR dump file (from DevTools)", 67 | required=True, 68 | ) 69 | parser.add_argument( 70 | "-o", 71 | "--output", 72 | help="The output swagger schema file (yaml). If it exists, new endpoints will be added", 73 | required=True, 74 | ) 75 | parser.add_argument("-p", "--api-prefix", help="The api prefix", required=True) 76 | parser.add_argument( 77 | "-e", 78 | "--examples", 79 | action="store_true", 80 | help="Include examples in the schema. This might expose sensitive information.", 81 | ) 82 | parser.add_argument( 83 | "-hd", 84 | "--headers", 85 | action="store_true", 86 | help="Include headers in the schema. This might expose sensitive information.", 87 | ) 88 | parser.add_argument( 89 | "-f", 90 | "--format", 91 | choices=["flow", "har"], 92 | help="Override the input file format auto-detection.", 93 | ) 94 | parser.add_argument( 95 | "-r", 96 | "--param-regex", 97 | default="[0-9]+", 98 | help="Regex to match parameters in the API paths. Path segments that match this regex will be turned into parameter placeholders.", 99 | ) 100 | parser.add_argument( 101 | "-s", 102 | "--suppress-params", 103 | action="store_true", 104 | help="Do not include API paths that have the original parameter values, only the ones with placeholders.", 105 | ) 106 | args = parser.parse_args(override_args) 107 | try: 108 | args.param_regex = re.compile("^" + args.param_regex + "$") 109 | except re.error as e: 110 | print( 111 | f"{console_util.ANSI_RED}Invalid path parameter regex: {e}{console_util.ANSI_RESET}" 112 | ) 113 | sys.exit(1) 114 | 115 | yaml = ruamel.yaml.YAML() 116 | 117 | capture_reader: Union[MitmproxyCaptureReader, HarCaptureReader] 118 | if args.format == "flow" or args.format == "mitmproxy": 119 | capture_reader = MitmproxyCaptureReader(args.input, progress_callback) 120 | elif args.format == "har": 121 | capture_reader = HarCaptureReader(args.input, progress_callback) 122 | else: 123 | capture_reader = detect_input_format(args.input) 124 | 125 | swagger = None 126 | 127 | # try loading the existing swagger file 128 | try: 129 | base_dir = os.getcwd() 130 | relative_path = args.output 131 | abs_path = os.path.join(base_dir, relative_path) 132 | with open(abs_path, "r") as f: 133 | swagger = yaml.load(f) 134 | except FileNotFoundError: 135 | print("No existing OpenAPI file found. Creating new one.") 136 | if swagger is None: 137 | swagger = ruamel.yaml.comments.CommentedMap( 138 | { 139 | "openapi": "3.0.0", 140 | "info": { 141 | "title ": args.input + sdk_name, 142 | "version": "1.0.0", 143 | }, 144 | } 145 | ) 146 | # strip the trailing slash from the api prefix 147 | args.api_prefix = args.api_prefix.rstrip("/") 148 | 149 | if "servers" not in swagger or swagger["servers"] is None: 150 | swagger["servers"] = [] 151 | 152 | # add the server if it doesn't exist 153 | if not any(server["url"] == args.api_prefix for server in swagger["servers"]): 154 | swagger["servers"].append( 155 | {"url": args.api_prefix, "description": "The default server"} 156 | ) 157 | 158 | if "paths" not in swagger or swagger["paths"] is None: 159 | swagger["paths"] = {} 160 | 161 | # Add the component/securitySchemes section if it doesn't exist 162 | if "components" not in swagger or swagger["components"] is None: 163 | swagger["components"] = {} 164 | 165 | # add existing path templates 166 | path_templates = [] 167 | for path in swagger["paths"]: 168 | path_templates.append(path) 169 | 170 | path_template_regexes = [re.compile(path_to_regex(path)) for path in path_templates] 171 | 172 | try: 173 | for req in capture_reader.captured_requests(): 174 | # strip the api prefix from the url 175 | url = req.get_matching_url(args.api_prefix) 176 | 177 | if url is None: 178 | continue 179 | method = req.get_method().lower() 180 | path = strip_query_string(url).removeprefix(args.api_prefix) 181 | status = req.get_response_status_code() 182 | 183 | # check if the path matches any of the path templates, and save the index 184 | path_template_index = None 185 | for i, path_template_regex in enumerate(path_template_regexes): 186 | if path_template_regex.match(path): 187 | path_template_index = i 188 | break 189 | if path_template_index is None: 190 | path_template_to_set = path 191 | else: 192 | path_template_to_set = path_templates[path_template_index] 193 | 194 | set_key_if_not_exists(swagger["paths"], path_template_to_set, {}) 195 | 196 | set_key_if_not_exists( 197 | swagger["paths"][path_template_to_set], 198 | method, 199 | { 200 | "summary": swagger_util.path_template_to_endpoint_name( 201 | method, path_template_to_set 202 | ), 203 | "responses": {}, 204 | }, 205 | ) 206 | 207 | params = swagger_util.url_to_params(url, path_template_to_set) 208 | if args.headers: 209 | headers_request = swagger_util.request_to_headers( 210 | req.get_request_headers() 211 | ) 212 | if headers_request is not None and len(headers_request) > 0: 213 | set_key_if_not_exists( 214 | swagger["paths"][path_template_to_set][method], 215 | "parameters", 216 | headers_request, 217 | ) 218 | if params is not None and len(params) > 0: 219 | set_key_if_not_exists( 220 | swagger["paths"][path_template_to_set][method], "parameters", params 221 | ) 222 | 223 | if method not in ["get", "head"]: 224 | body = req.get_request_body() 225 | if body is not None: 226 | body_val = None 227 | content_type = None 228 | # try to parse the body as json 229 | try: 230 | body_val = json.loads(req.get_request_body()) 231 | content_type = "application/json" 232 | except UnicodeDecodeError: 233 | pass 234 | except json.decoder.JSONDecodeError: 235 | pass 236 | 237 | # try to parse the body as msgpack, if it's not json 238 | if body_val is None: 239 | try: 240 | body_val = msgpack.loads(req.get_request_body()) 241 | content_type = "application/msgpack" 242 | except Exception: 243 | pass 244 | 245 | if content_type is None: 246 | # try to parse the body as form data 247 | try: 248 | body_val_bytes: Any = dict( 249 | urllib.parse.parse_qsl( 250 | body, encoding="utf-8", keep_blank_values=True 251 | ) 252 | ) 253 | body_val = {} 254 | did_find_anything = False 255 | for key, value in body_val_bytes.items(): 256 | did_find_anything = True 257 | body_val[key.decode("utf-8")] = value.decode("utf-8") 258 | if did_find_anything: 259 | content_type = "application/x-www-form-urlencoded" 260 | else: 261 | body_val = None 262 | except UnicodeDecodeError: 263 | pass 264 | 265 | if body_val is not None: 266 | content_to_set = { 267 | "content": { 268 | content_type: { 269 | "schema": swagger_util.value_to_schema(body_val) 270 | } 271 | } 272 | } 273 | if args.examples: 274 | content_to_set["content"][content_type][ 275 | "example" 276 | ] = swagger_util.limit_example_size(body_val) 277 | set_key_if_not_exists( 278 | swagger["paths"][path_template_to_set][method], 279 | "requestBody", 280 | content_to_set, 281 | ) 282 | 283 | response_body = req.get_response_body() 284 | if response_body is not None: 285 | # try parsing the response as json 286 | try: 287 | response_parsed = json.loads(response_body) 288 | response_content_type = "application/json" 289 | except UnicodeDecodeError: 290 | response_parsed = None 291 | except json.decoder.JSONDecodeError: 292 | response_parsed = None 293 | 294 | if response_parsed is None: 295 | # try parsing the response as msgpack, if it's not json 296 | try: 297 | response_parsed = msgpack.loads(response_body) 298 | response_content_type = "application/msgpack" 299 | except Exception: 300 | response_parsed = None 301 | 302 | if response_parsed is None: 303 | # try parsing the response as text 304 | if type(response_body) is str: 305 | response_parsed = response_body 306 | else: 307 | response_parsed = response_body.decode("utf-8", "ignore") 308 | response_content_type = req.get_response_headers().get("content-type") 309 | if type(response_content_type) is list: 310 | response_content_type = response_content_type[0] 311 | elif response_content_type is None: 312 | response_content_type = "text/plain" 313 | 314 | if response_parsed is not None: 315 | resp_data_to_set = { 316 | "description": req.get_response_reason(), 317 | "content": { 318 | response_content_type: { 319 | "schema": swagger_util.value_to_schema(response_parsed) 320 | } 321 | }, 322 | } 323 | if args.examples: 324 | resp_data_to_set["content"][response_content_type][ 325 | "example" 326 | ] = swagger_util.limit_example_size(response_parsed) 327 | if args.headers: 328 | resp_data_to_set["headers"] = swagger_util.response_to_headers( 329 | req.get_response_headers() 330 | ) 331 | 332 | set_key_if_not_exists( 333 | swagger["paths"][path_template_to_set][method]["responses"], 334 | str(status), 335 | resp_data_to_set, 336 | ) 337 | 338 | if ( 339 | "responses" in swagger["paths"][path_template_to_set][method] 340 | and len(swagger["paths"][path_template_to_set][method]["responses"]) 341 | == 0 342 | ): 343 | # add a default response if there were no responses detected, 344 | # this is for compliance with the OpenAPI spec 345 | content_type = ( 346 | req.get_response_headers().get("content-type") or "text/plain" 347 | ) 348 | 349 | swagger["paths"][path_template_to_set][method]["responses"]["200"] = { 350 | "description": "OK", 351 | "content": {}, 352 | } 353 | 354 | except FlowReadException as e: 355 | print(f"Flow file corrupted: {e}") 356 | traceback.print_exception(*sys.exc_info()) 357 | print( 358 | f"{console_util.ANSI_RED}Failed to parse the input file as '{capture_reader.name()}'. " 359 | ) 360 | if not args.format: 361 | print( 362 | f"It might happen that the input format as incorrectly detected. Please try using '--format flow' or '--format har' to specify the input format.{console_util.ANSI_RESET}" 363 | ) 364 | sys.exit(1) 365 | except ValueError as e: 366 | print(f"ValueError: {e}") 367 | # print stack trace 368 | traceback.print_exception(*sys.exc_info()) 369 | print( 370 | f"{console_util.ANSI_RED}Failed to parse the input file as '{capture_reader.name()}'. " 371 | ) 372 | if not args.format: 373 | print( 374 | f"It might happen that the input format as incorrectly detected. Please try using '--format flow' or '--format har' to specify the input format.{console_util.ANSI_RESET}" 375 | ) 376 | sys.exit(1) 377 | 378 | # save the swagger file 379 | with open(args.output, "w") as f: 380 | yaml.dump(swagger, f) 381 | print(" Done!") 382 | 383 | 384 | if __name__ == "__main__": 385 | main() 386 | -------------------------------------------------------------------------------- /web2sdk/web2swagger/mitmproxy_capture_reader.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import os 3 | import typing 4 | from typing import Iterator 5 | from urllib.parse import urlparse 6 | 7 | from mitmproxy import http 8 | from mitmproxy import io as iom 9 | from mitmproxy.exceptions import FlowReadException 10 | 11 | 12 | def mitmproxy_dump_file_huristic(file_path: str) -> int: 13 | val = 0 14 | if "flow" in file_path: 15 | val += 1 16 | if "mitmproxy" in file_path: 17 | val += 1 18 | # read the first 2048 bytes 19 | with open(file_path, "rb") as f: 20 | data = f.read(2048) 21 | # if file contains non-ascii characters after remove EOL characters 22 | if ( 23 | data.decode("utf-8", "ignore") 24 | .replace("\r", "") 25 | .replace("\n", "") 26 | .isprintable() 27 | is False 28 | ): 29 | val += 50 30 | # if first character of the byte array is a digit 31 | if data[0:1].decode("utf-8", "ignore").isdigit() is True: 32 | val += 5 33 | # if it contains the word status_code 34 | if b"status_code" in data: 35 | val += 5 36 | if b"regular" in data: 37 | val += 10 38 | return val 39 | 40 | 41 | class MitmproxyFlowWrapper: 42 | def __init__(self, flow: http.HTTPFlow): 43 | self.flow = flow 44 | 45 | def get_url(self) -> str: 46 | return self.flow.request.url 47 | 48 | def get_matching_url(self, prefix) -> typing.Union[str, None]: 49 | """Get the requests URL if the prefix matches the URL, None otherwise. 50 | 51 | This takes into account a quirk of mitmproxy where it sometimes 52 | puts the raw IP address in the URL instead of the hostname. Then 53 | the hostname is in the Host header. 54 | """ 55 | if self.flow.request.url.startswith(prefix): 56 | return self.flow.request.url 57 | # All the stuff where the real hostname could be 58 | replacement_hostnames = [ 59 | self.flow.request.headers.get("Host", ""), 60 | self.flow.request.host_header, 61 | self.flow.request.host, 62 | ] 63 | for replacement_hostname in replacement_hostnames: 64 | if replacement_hostname is not None and replacement_hostname != "": 65 | fixed_url = ( 66 | urlparse(self.flow.request.url) 67 | ._replace(netloc=replacement_hostname) 68 | .geturl() 69 | ) 70 | if fixed_url.startswith(prefix): 71 | return fixed_url 72 | return None 73 | 74 | def get_method(self) -> str: 75 | return self.flow.request.method 76 | 77 | def get_request_headers(self) -> dict[str, typing.List[str]]: 78 | headers: dict[str, typing.List[str]] = {} 79 | for k, v in self.flow.request.headers.items(multi=True): 80 | # create list on key if it does not exist 81 | headers[k] = headers.get(k, []) 82 | headers[k].append(v) 83 | return headers 84 | 85 | def get_request_body(self): 86 | return self.flow.request.content 87 | 88 | def get_response_status_code(self): 89 | return self.flow.response.status_code 90 | 91 | def get_response_reason(self): 92 | return self.flow.response.reason 93 | 94 | def get_response_headers(self): 95 | headers = {} 96 | for k, v in self.flow.response.headers.items(multi=True): 97 | # create list on key if it does not exist 98 | headers[k] = headers.get(k, []) 99 | headers[k].append(v) 100 | return headers 101 | 102 | def get_response_body(self): 103 | return self.flow.response.content 104 | 105 | 106 | class MitmproxyCaptureReader: 107 | def __init__(self, file_path, progress_callback=None): 108 | self.file_path = file_path 109 | self.progress_callback = progress_callback 110 | 111 | def captured_requests(self) -> Iterator[MitmproxyFlowWrapper]: 112 | with open(self.file_path, "rb") as logfile: 113 | logfile_size = os.path.getsize(self.file_path) 114 | freader = iom.FlowReader(logfile) 115 | try: 116 | for f in freader.stream(): 117 | if self.progress_callback: 118 | self.progress_callback(logfile.tell() / logfile_size) 119 | if isinstance(f, http.HTTPFlow): 120 | if f.response is None: 121 | print( 122 | "[warn] flow without response: {}".format(f.request.url) 123 | ) 124 | continue 125 | yield MitmproxyFlowWrapper(f) 126 | except FlowReadException as e: 127 | print(f"Flow file corrupted: {e}") 128 | 129 | def name(self): 130 | return "flow" 131 | -------------------------------------------------------------------------------- /web2sdk/web2swagger/swagger_util.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import urllib 3 | import uuid 4 | from typing import Any, List 5 | 6 | VERBS = [ 7 | "add", 8 | "create", 9 | "delete", 10 | "get", 11 | "attach", 12 | "detach", 13 | "update", 14 | "push", 15 | "extendedcreate", 16 | "activate", 17 | ] 18 | 19 | 20 | # generate a name for the endpoint from the path template 21 | # POST /api/v1/things/{id}/create -> POST create thing by id 22 | def path_template_to_endpoint_name(method, path_template): 23 | path_template = path_template.strip("/") 24 | segments = path_template.split("/") 25 | # remove params to a separate array 26 | params = [] 27 | for idx, segment in enumerate(segments): 28 | if segment.startswith("{") and segment.endswith("}"): 29 | params.append(segment) 30 | segments[idx] = "{}" 31 | # remove them from the segments 32 | segments = [segment for segment in segments if segment != "{}"] 33 | # reverse the segments 34 | segments.reverse() 35 | name_parts = [] 36 | for segment in segments: 37 | if segment in VERBS: 38 | # prepend to the name_parts 39 | name_parts.insert(0, segment.lower()) 40 | else: 41 | name_parts.insert(0, segment.lower()) 42 | break 43 | for param in params: 44 | name_parts.append("by " + param.replace("{", "").replace("}", "")) 45 | break 46 | return method.upper() + " " + " ".join(name_parts) 47 | 48 | 49 | # when given an url and its path template, generates the parameters section of the request 50 | def url_to_params(url, path_template): 51 | path_template = path_template.strip("/") 52 | segments = path_template.split("/") 53 | url_segments = url.split("?")[0].strip("/").split("/") 54 | params = [] 55 | for idx, segment in enumerate(segments): 56 | if segment.startswith("{") and segment.endswith("}"): 57 | params.append( 58 | { 59 | "name": segment.replace("{", "").replace("}", ""), 60 | "in": "path", 61 | "required": True, 62 | "schema": { 63 | "type": "number" if url_segments[idx].isdigit() else "string" 64 | }, 65 | } 66 | ) 67 | query_string = urllib.parse.urlparse(url).query 68 | if query_string: 69 | query_params = urllib.parse.parse_qs(query_string) 70 | for key in query_params: 71 | params.append( 72 | { 73 | "name": key, 74 | "in": "query", 75 | "required": False, 76 | "schema": { 77 | "type": "number" if query_params[key][0].isdigit() else "string" 78 | }, 79 | } 80 | ) 81 | return params 82 | 83 | 84 | def request_to_headers(headers: dict[str, List[Any]], add_example: bool = False): 85 | """When given an url and its path template, generates the parameters section of the 86 | request.""" 87 | params = [] 88 | if headers: 89 | for key in headers: 90 | h = { 91 | "name": key, 92 | "in": "header", 93 | "required": False, 94 | "schema": {"type": "number" if headers[key][0].isdigit() else "string"}, 95 | } 96 | if add_example: 97 | h["example"] = headers[key][0] 98 | params.append(h) 99 | return params 100 | 101 | 102 | def response_to_headers(headers): 103 | header = {} 104 | if headers: 105 | for key in headers: 106 | header[key] = { 107 | "description": headers[key][0], 108 | "schema": {"type": "number" if headers[key][0].isdigit() else "string"}, 109 | } 110 | return header 111 | 112 | 113 | def value_to_schema(value): 114 | # check if value is a number 115 | if type(value) is int or type(value) is float: 116 | return {"type": "number"} 117 | # check if value is a boolean 118 | elif isinstance(value, bool): 119 | return {"type": "boolean"} 120 | # check if value is a string 121 | elif isinstance(value, str): 122 | return {"type": "string"} 123 | # check if value is a list 124 | elif isinstance(value, list): 125 | if len(value) == 0: 126 | return {"type": "array", "items": {}} 127 | 128 | return {"type": "array", "items": value_to_schema(value[0])} 129 | # check if value is a dict 130 | elif isinstance(value, dict): 131 | all_keys_are_numeric = all(is_numeric_string(key) for key in value) 132 | all_keys_are_uuid = all(is_uuid(key) for key in value) 133 | keys_are_generic = all_keys_are_numeric or all_keys_are_uuid 134 | 135 | if keys_are_generic and len(value) > 0: 136 | return { 137 | "type": "object", 138 | "additionalProperties": value_to_schema(list(value.values())[0]), 139 | } 140 | return { 141 | "type": "object", 142 | "properties": {key: value_to_schema(value[key]) for key in value}, 143 | } 144 | # if it is none, return null 145 | elif value is None: 146 | return {"type": "object", "nullable": True} 147 | 148 | 149 | def is_uuid(key): 150 | return isinstance(key, str) and is_valid_uuid(key) 151 | 152 | 153 | def is_numeric_string(key): 154 | return isinstance(key, str) and key.isnumeric() 155 | 156 | 157 | def is_valid_uuid(val): 158 | try: 159 | uuid.UUID(str(val)) 160 | return True 161 | except ValueError: 162 | return False 163 | 164 | 165 | MAX_EXAMPLE_ARRAY_ELEMENTS = 10 166 | MAX_EXAMPLE_OBJECT_PROPERTIES = 150 167 | 168 | 169 | # recursively scan an example value and limit the number of elements and properties 170 | def limit_example_size(example): 171 | if isinstance(example, list): 172 | new_list = [] 173 | for element in example: 174 | if len(new_list) >= MAX_EXAMPLE_ARRAY_ELEMENTS: 175 | break 176 | new_list.append(limit_example_size(element)) 177 | return new_list 178 | elif isinstance(example, dict): 179 | new_dict = {} 180 | for key in example: 181 | if len(new_dict) >= MAX_EXAMPLE_OBJECT_PROPERTIES: 182 | break 183 | new_dict[key] = limit_example_size(example[key]) 184 | return new_dict 185 | else: 186 | return example 187 | --------------------------------------------------------------------------------