├── .gitignore
├── LICENSE
├── README.md
├── poetry.lock
├── pyproject.toml
└── web2sdk
├── __init__.py
├── console_util.py
├── main.py
├── swagger2sdk
├── __init__.py
├── generate_function.py
├── generate_types.py
├── main.py
└── utils.py
├── tests
└── __init__.py
└── web2swagger
├── .DS_Store
├── __init__.py
├── har_capture_reader.py
├── main.py
├── mitmproxy_capture_reader.py
└── swagger_util.py
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 | .DS_Store
6 |
7 | # Request logs and generated files
8 | */generated/
9 | *.har
10 |
11 | # C extensions
12 | *.so
13 |
14 | # Distribution / packaging
15 | .Python
16 | build/
17 | develop-eggs/
18 | dist/
19 | downloads/
20 | eggs/
21 | .eggs/
22 | lib/
23 | lib64/
24 | parts/
25 | sdist/
26 | var/
27 | wheels/
28 | share/python-wheels/
29 | *.egg-info/
30 | .installed.cfg
31 | *.egg
32 | MANIFEST
33 |
34 | # PyInstaller
35 | # Usually these files are written by a python script from a template
36 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
37 | *.manifest
38 | *.spec
39 |
40 | # Installer logs
41 | pip-log.txt
42 | pip-delete-this-directory.txt
43 |
44 | # Unit test / coverage reports
45 | htmlcov/
46 | .tox/
47 | .nox/
48 | .coverage
49 | .coverage.*
50 | .cache
51 | nosetests.xml
52 | coverage.xml
53 | *.cover
54 | *.py,cover
55 | .hypothesis/
56 | .pytest_cache/
57 | cover/
58 |
59 | # Translations
60 | *.mo
61 | *.pot
62 |
63 | # Django stuff:
64 | *.log
65 | local_settings.py
66 | db.sqlite3
67 | db.sqlite3-journal
68 |
69 | # Flask stuff:
70 | instance/
71 | .webassets-cache
72 |
73 | # Scrapy stuff:
74 | .scrapy
75 |
76 | # Sphinx documentation
77 | docs/_build/
78 |
79 | # PyBuilder
80 | .pybuilder/
81 | target/
82 |
83 | # Jupyter Notebook
84 | .ipynb_checkpoints
85 |
86 | # IPython
87 | profile_default/
88 | ipython_config.py
89 |
90 | # pyenv
91 | # For a library or package, you might want to ignore these files since the code is
92 | # intended to run in multiple environments; otherwise, check them in:
93 | # .python-version
94 |
95 | # pipenv
96 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
97 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
98 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
99 | # install all needed dependencies.
100 | #Pipfile.lock
101 |
102 | # poetry
103 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
104 | # This is especially recommended for binary packages to ensure reproducibility, and is more
105 | # commonly ignored for libraries.
106 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
107 | #poetry.lock
108 |
109 | # pdm
110 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
111 | #pdm.lock
112 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
113 | # in version control.
114 | # https://pdm.fming.dev/latest/usage/project/#working-with-version-control
115 | .pdm.toml
116 | .pdm-python
117 | .pdm-build/
118 |
119 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
120 | __pypackages__/
121 |
122 | # Celery stuff
123 | celerybeat-schedule
124 | celerybeat.pid
125 |
126 | # SageMath parsed files
127 | *.sage.py
128 |
129 | # Environments
130 | .env
131 | .venv
132 | env/
133 | venv/
134 | ENV/
135 | env.bak/
136 | venv.bak/
137 |
138 | # Spyder project settings
139 | .spyderproject
140 | .spyproject
141 |
142 | # Rope project settings
143 | .ropeproject
144 |
145 | # mkdocs documentation
146 | /site
147 |
148 | # mypy
149 | .mypy_cache/
150 | .dmypy.json
151 | dmypy.json
152 |
153 | # Pyre type checker
154 | .pyre/
155 |
156 | # pytype static type analyzer
157 | .pytype/
158 |
159 | # Cython debug symbols
160 | cython_debug/
161 |
162 | # PyCharm
163 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
164 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
165 | # and can be added to the global gitignore or merged into this file. For a more nuclear
166 | # option (not recommended) you can uncomment the following to ignore the entire idea folder.
167 | #.idea/
168 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2024 Jason Fan
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 | 🦊 web2sdk
3 |
4 |
5 |
6 |
Automatically turn third party APIs into Python SDKs
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 | Web2sdk is a set of tools for reverse engineering APIs by intercepting network requests. It processes HAR files exported from Chrome devtools into an OpenAPI schema, then automatically generates a python SDK based on the schema. Each method in the python SDK corresponds to an endpoint, and includes strongly typed arguments, requests, and responses.
18 |
19 | https://github.com/user-attachments/assets/5a7f477d-76ab-46f2-9884-62dfc9f2715b
20 |
21 |
22 | ### Features
23 | - Generates an OpenAPI/Swagger yaml schema from any web-based flow
24 | - Automatically merges requests to the same endpoint
25 | - Generates pydantic classes based on OpenAPI request and response schemas
26 | - Supports `basic` and `bearer` auth schemes
27 | - Supports overriding default headers
28 |
29 | ### Example output
30 | ```python
31 | import json
32 | import http.client
33 | from urllib.parse import urlparse
34 | from pydantic import BaseModel
35 | from typing import Optional, Dict, List, Any
36 |
37 | class GetConversationsRequestParameters(BaseModel):
38 | offset: Optional[float] = None
39 | limit: Optional[float] = None
40 | order: Optional[str] = None
41 |
42 | class GetConversationsResponse(BaseModel):
43 | items: Optional[List] = None
44 | total: Optional[float] = None
45 | limit: Optional[float] = None
46 | offset: Optional[float] = None
47 | has_missing_conversations: Optional[bool] = None
48 |
49 | class ChatGPTAPI(BaseModel):
50 | hostname: str
51 | token: str
52 |
53 | def get_conversations(self, request_parameters:
54 | GetConversationsRequestParameters, *, override_headers: dict={}
55 | ) ->GetConversationsResponse:
56 | conn = http.client.HTTPSConnection(self.hostname)
57 | params = '&'.join([(k + '=' + v) for k, v in request_parameters.
58 | items()])
59 | headers = {'User-Agent': 'Web2sdk/1.0', 'Authorization': 'Bearer ' +
60 | self.token}
61 | headers.update(override_headers)
62 | conn.request('GET', '/backend-api/conversations?' + params + '',
63 | headers=headers)
64 | res = conn.getresponse()
65 | data = res.read().decode('utf-8')
66 | return json.loads(data)
67 |
68 | def post_conversation(self, request_body: PostConversationRequestBody,
69 | *, override_headers: dict={}) ->Any
70 | ### ...etc
71 | ```
72 |
73 | ## Usage
74 | **1. Export HAR file**
75 | * Open Chrome devtools and go to "Network".
76 | * Go through a flow on a website that triggers the requests you want to capture and reverse engineer. The more varied the requests the better, as a single request might not capture all the possible request and response schemas for a particular endpoint.
77 | * Click the button shown below to export the HAR file. Don't worry about filtering out requests, that happens later.
78 | * Also compatible with [mitmweb](https://mitmproxy.org/) exports.
79 |
80 | 
81 |
82 | **2. Install web2sdk**
83 | ```
84 | $ pip install web2sdk
85 | ```
86 |
87 | **3. Generate an OpenAPI spec and SDK**
88 | ```sh
89 | $ web2sdk --requests-path --base-url --sdk-name FinicSDK --auth-type bearer
90 | ```
91 | * `base-url` filters out requests that don't start with the url provided. This should include everything up until the endpoints you want to reverse engineer.
92 | * For example, `https://finic.ai/api/v1` will match only requests to the v1 endpiont, but `https://finic.ai/api` will match requests from v1, v2, and any other paths after `/api`.
93 | * Generated files will be saved to `generated/.yaml` and `generated/.py` in the current directory by default.
94 |
95 | **4. Run your python SDK.**
96 | ```python
97 | from generated.FinicSDK import FinicSDK
98 |
99 | finic = FinicSDK(hostname="finic.ai", token="your_token_here")
100 | finic.get_connectors({})
101 | finic.post_message({ message: "hi there" }, override_headers={ "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7)" })
102 | ```
103 | * Each method in the generated SDK corresponds to an endpoint
104 | * You can pass in any headers you want. By default, only `Authorization` and `User-Agent` headers are included.
105 | * Some methods accept parameters and/or request bodies. Inspect the function to see what arguments it takes.
106 |
107 | ### Other Options
108 | ```-- auth ```
109 | * Optional, defaults to `none`. If set, the generated SDK class will expect a username and password for basic auth or a token for bearer auth.
110 |
111 | ```--output```
112 | * Optional, defaults to `generated/` in the current directory. Specify a directory for the generated `.yaml` and `.py` files to be saved.
113 |
114 | ```--interactive```
115 | * Run in interactive mode. Not well supported.
116 |
117 | ## 🚧 Planned Improvements
118 | - Support for oauth and custom auth schemes. In the mean
119 | - Automatic auth token refresh
120 | - Support for templated API paths (e.g. `https://api.claude.ai/api/organizations/{organization_id}/chat_conversations`)
121 | - Use LLMs to generate more readable class names, example request payloads, and other tasks that require fuzzy reasoning
122 | - Include a linter/formatter to make generated SDK more readable
123 |
124 | ### Acknowledgements
125 | Web2sdk's includes a modified version of [mitmproxy2swagger](https://github.com/alufers/mitmproxy2swagger).
126 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.poetry]
2 | name = "web2sdk"
3 | version = "0.0.2"
4 | description = "Reverse engineer third party APIs into python SDKs"
5 | authors = ["jasonwcfan "]
6 | readme = "README.md"
7 | packages = [
8 | { include = "web2sdk" }
9 | ]
10 |
11 | [tool.poetry.dependencies]
12 | python = "^3.10"
13 | mitmproxy = "^10.1.1"
14 | "ruamel.yaml" = ">=0.17.32,<0.19.0"
15 | json-stream = "^2.3.2"
16 | msgpack = "^1.0.7"
17 | astor = "^0.8.1"
18 | pydantic = "^2.8.2"
19 | pyyaml = "^6.0.2"
20 | requests = "^2.32.3"
21 | python-dotenv = "^1.0.1"
22 |
23 | [tool.poetry.scripts]
24 | web2sdk = "web2sdk.main:main"
25 |
26 |
27 | [build-system]
28 | requires = ["poetry-core"]
29 | build-backend = "poetry.core.masonry.api"
30 |
--------------------------------------------------------------------------------
/web2sdk/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jasonwcfan/web2sdk/10e0633ed25056304addef8e43cfd5668f121050/web2sdk/__init__.py
--------------------------------------------------------------------------------
/web2sdk/console_util.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import sys
3 |
4 | ANSI_RGB = "\033[38;2;{};{};{}m"
5 | ANSI_RGB_BG = "\033[48;2;{};{};{}m"
6 | ANSI_RED = "\033[31m"
7 | ANSI_RESET = "\033[0m"
8 |
9 | RAINBOW_COLORS = [
10 | (255, 0, 0),
11 | (255, 127, 0),
12 | (255, 255, 0),
13 | (127, 255, 0),
14 | (0, 255, 0),
15 | (0, 255, 127),
16 | (0, 255, 255),
17 | (0, 127, 255),
18 | (0, 0, 255),
19 | (127, 0, 255),
20 | (255, 0, 255),
21 | (255, 0, 127),
22 | ]
23 |
24 |
25 | def rgb_interpolate(start, end, progress):
26 | return tuple(int(start[i] + (end[i] - start[i]) * progress) for i in range(3))
27 |
28 |
29 | # take a value from 0 to 1 and return an interpolated color from the rainbow
30 | def rainbow_at_position(progress):
31 | idx_a = int(progress * float(len(RAINBOW_COLORS) - 1))
32 | idx_b = idx_a + 1
33 | return rgb_interpolate(
34 | RAINBOW_COLORS[idx_a],
35 | RAINBOW_COLORS[idx_b],
36 | progress * float(len(RAINBOW_COLORS) - 1) - idx_a,
37 | )
38 |
39 |
40 | def print_progress_bar(progress=0.0, label=""):
41 | sys.stdout.write("\r")
42 | progress_bar_contents = ""
43 | PROGRESS_LENGTH = 30
44 | blocks = ["▉", "▊", "▋", "▌", "▍", "▎", "▏"]
45 |
46 | for i in range(PROGRESS_LENGTH):
47 | interpolated = rainbow_at_position(i / PROGRESS_LENGTH)
48 | # check if should print a full block
49 | if i < int(progress * PROGRESS_LENGTH):
50 | interpolated_2nd_half = rainbow_at_position((i + 0.5) / PROGRESS_LENGTH)
51 | progress_bar_contents += ANSI_RGB.format(*interpolated)
52 | progress_bar_contents += ANSI_RGB_BG.format(*interpolated_2nd_half)
53 | progress_bar_contents += "▌"
54 | # check if should print a non-full block
55 | elif i < int((progress * PROGRESS_LENGTH) + 0.5):
56 | progress_bar_contents += ANSI_RESET
57 | progress_bar_contents += ANSI_RGB.format(*interpolated)
58 | progress_bar_contents += blocks[
59 | int((progress * PROGRESS_LENGTH) + 0.5) - i - 1
60 | ]
61 | # otherwise, print a space
62 | else:
63 | progress_bar_contents += ANSI_RESET
64 | progress_bar_contents += " "
65 |
66 | progress_bar_contents += ANSI_RESET
67 | sys.stdout.write("{} [{}] {:.1f}%".format(label, progress_bar_contents, progress * 100))
68 | sys.stdout.flush()
--------------------------------------------------------------------------------
/web2sdk/main.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import os
3 | import sys
4 | from typing import Any, Optional, Sequence, Union
5 | from web2sdk.web2swagger.main import main as web2swagger_main
6 | from web2sdk.swagger2sdk.main import construct_sdk
7 | from web2sdk import console_util
8 |
9 | def progress_callback(progress):
10 | console_util.print_progress_bar(progress, "Generating SDK... ")
11 |
12 | def main():
13 | parser = argparse.ArgumentParser(
14 | description="Converts a mitmproxy dump file or HAR to a swagger schema."
15 | )
16 | parser.add_argument(
17 | "-i",
18 | "--interactive",
19 | help="Run in interactive mode",
20 | action="store_true",
21 | required=False,
22 | )
23 |
24 | parser.add_argument(
25 | "-r",
26 | "--requests-path",
27 | help="Path to a mitmproxy dump file or HAR",
28 | required=False,
29 | )
30 |
31 | parser.add_argument(
32 | "-b",
33 | "--base-url",
34 | help="Base url for the API to reverse engineer",
35 | required=False,
36 | )
37 |
38 | parser.add_argument(
39 | "-a",
40 | "--auth-type",
41 | help="Auth type to determine how the SDK should handle auth. Possible values: basic, bearer, none.",
42 | default="none",
43 | required=False,
44 | )
45 |
46 | parser.add_argument(
47 | "-s",
48 | "--sdk-name",
49 | help="Name for the SDK class. Will also be used as the filename for the OpenAPI schema.",
50 | required=False,
51 | )
52 |
53 | parser.add_argument(
54 | "-o",
55 | "--output",
56 | help="Path to the directory where generated files should be saved",
57 | default="generated",
58 | required=False,
59 | )
60 |
61 | args = parser.parse_args()
62 | output_path = args.output.rstrip("/")
63 |
64 | if not args.interactive:
65 | if not args.requests_path or not args.sdk_name or not args.base_url:
66 | parser.error("--requests-path, --sdk-name, and --base-url are required when not running in --interactive mode.")
67 | if args.auth_type and args.auth_type not in ["basic", "bearer", "none"]:
68 | parser.error("--auth-type must be one of 'basic', 'bearer', or 'none.")
69 |
70 | openapi_path = f"{output_path}/{args.sdk_name}.yaml"
71 | sdk_path = f"{output_path}/{args.sdk_name}.py"
72 | os.makedirs(output_path, exist_ok=True)
73 |
74 | print("\n")
75 | web2swagger_main(args.sdk_name, ["--input", args.requests_path, "--output", openapi_path, "--api-prefix", args.base_url])
76 | print("OpenAPI schema generated successfully at: ", openapi_path)
77 | print("\n")
78 | construct_sdk(openapi_path, args.sdk_name, output_path, auth_type=args.auth_type, progress_callback=progress_callback)
79 | print(" Done!")
80 | sys.stdout.write(f"SDK generated successfully at: {sdk_path}")
81 | else:
82 | while True:
83 | requests_path = input("Enter the path to the mitmproxy dump file or HAR: ")
84 | base_url = input("Enter the base URL for the API (e.g. https://api.finic.ai/v1): ")
85 | sdk_name = input("Enter a name for the generated SDK (e.g. FinicAPI): ")
86 |
87 | use_auth = input("Does this API require authentication? (y/n): ")
88 | while use_auth.lower() not in ["y", "n", "yes", "no"]:
89 | print("Invalid input. Please enter 'y' or 'n'.")
90 | use_auth = input("Does this API require authentication? (y/n): ")
91 |
92 |
93 | if use_auth.lower() in ["y", "yes", ""]:
94 | auth_type = input("What type of authentication does this API use? (basic/bearer): ")
95 | while auth_type not in ["basic", "bearer"]:
96 | print("Invalid auth type. Please enter 'basic' or 'bearer'.")
97 | auth_type = input("What type of authentication does this API use? (basic/bearer): ")
98 | elif use_auth.lower() in ["n", "no"]:
99 | auth_type = "none"
100 |
101 | openapi_path = f"generated/{sdk_name}.yaml"
102 | os.makedirs("generated", exist_ok=True)
103 | web2swagger_main(sdk_name, ["--input", requests_path, "--output", openapi_path, "--api-prefix", base_url])
104 | print("OpenAPI schema generated successfully at: ", openapi_path)
105 | print("\n")
106 | construct_sdk(openapi_path, args.sdk_name, output_path, auth_type=args.auth_type, progress_callback=progress_callback)
107 | print(" Done!")
108 | sys.stdout.write(f"SDK generated successfully at: {sdk_path}")
109 |
110 | if __name__ == "__main__":
111 | main()
112 |
--------------------------------------------------------------------------------
/web2sdk/swagger2sdk/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jasonwcfan/web2sdk/10e0633ed25056304addef8e43cfd5668f121050/web2sdk/swagger2sdk/__init__.py
--------------------------------------------------------------------------------
/web2sdk/swagger2sdk/generate_function.py:
--------------------------------------------------------------------------------
1 | import ast
2 | from web2sdk.swagger2sdk.utils import AuthType, check_content_type, dash_to_snake
3 | from typing import Tuple, List
4 | from urllib.parse import urlparse
5 |
6 | def content_type_to_ast_node(content_type: str, return_type: str) -> ast.Call:
7 | if check_content_type(content_type, ['application/json', 'application/x-www-form-urlencoded']):
8 | # json.loads(data)
9 | result_node = ast.Call(
10 | func=ast.Attribute(
11 | value=ast.Name(id='json', ctx=ast.Load()),
12 | attr='loads',
13 | ctx=ast.Load()
14 | ),
15 | args=[ast.Name(id='data', ctx=ast.Load())],
16 | keywords=[]
17 | )
18 | else:
19 | result_node = ast.Name(id='data', ctx=ast.Load())
20 | return result_node
21 |
22 | # Fallback in case a class could not be created for a particular endpoint. Return a primitive type instead.
23 | def get_return_type(content_type: str) -> ast.Name:
24 | if check_content_type(content_type, ['application/json']):
25 | return 'dict'
26 | elif check_content_type(content_type, ['text/html', 'text/plain']):
27 | return 'str'
28 | else:
29 | return 'Any'
30 |
31 | def generate_function_for_endpoint(endpoint: dict, base_url: str, auth_type: AuthType, types: Tuple[ast.ClassDef]) -> ast.FunctionDef:
32 | # Extract endpoint details
33 | request_path: str = endpoint['path']
34 | request_name: str = endpoint['name']
35 | request_parameters: dict = endpoint['parameters']
36 | request_method: str = endpoint['method']
37 | request_schema: dict = endpoint['request_body']
38 | request_content_type: str = next(iter(request_schema['content'].keys()), None) if request_schema else None
39 | response_content: dict = endpoint['responses'].get('200', {}).get('content', {})
40 | response_content_type: str = next(iter(response_content.keys()), "")
41 |
42 | request_parameters_class_name = types[0].name if types[0] else 'dict'
43 | request_body_class_name = types[1].name if types[1] else 'dict'
44 | response_class_name = types[2].name if types[2] else get_return_type(response_content_type)
45 |
46 | # Construct the function arguments
47 | args = ast.arguments(
48 | args=[arg for arg in [
49 | ast.arg(arg='self', annotation=None),
50 | ast.arg(arg='request_parameters', annotation=ast.Name(id=request_parameters_class_name, ctx=ast.Load())) if request_parameters else None,
51 | ast.arg(arg='request_body', annotation=ast.Name(id=request_body_class_name, ctx=ast.Load())) if request_schema else None
52 | ] if arg is not None],
53 | vararg=None,
54 | kwonlyargs=[ast.arg(arg='override_headers', annotation=ast.Name(id='dict', ctx=ast.Load()))],
55 | kw_defaults=[ast.Dict(keys=[], values=[])],
56 | kwarg=None,
57 | defaults=[]
58 | )
59 |
60 | # Return annotation
61 | return_annotation = ast.Name(id=response_class_name, ctx=ast.Load())
62 |
63 | # Set up http.client connection
64 | # conn = http.client.HTTPSConnection(self.hostname)
65 | http_conn_assign = ast.Assign(
66 | targets=[ast.Name(id='conn', ctx=ast.Store())],
67 | value=ast.Call(
68 | func=ast.Attribute(
69 | value=ast.Name(id='http.client', ctx=ast.Load()),
70 | attr='HTTPSConnection',
71 | ctx=ast.Load()
72 | ),
73 | args=[ast.Attribute(
74 | value=ast.Name(id='self', ctx=ast.Load()),
75 | attr='hostname',
76 | ctx=ast.Load()
77 | )],
78 | keywords=[]
79 | )
80 | )
81 |
82 | # Prepare the payload, depending on the request content type
83 | if request_schema:
84 | if check_content_type(request_content_type, ['application/json', 'application/x-www-form-urlencoded']):
85 | payload_assign = ast.Assign(
86 | targets=[ast.Name(id='payload', ctx=ast.Store())],
87 | value=ast.Call(
88 | func=ast.Attribute(
89 | value=ast.Name(id='json', ctx=ast.Load()),
90 | attr='dumps',
91 | ctx=ast.Load()
92 | ),
93 | args=[ast.Name(id='request_body', ctx=ast.Load())],
94 | keywords=[]
95 | )
96 | )
97 | else:
98 | payload_assign = ast.Assign(
99 | targets=[ast.Name(id='payload', ctx=ast.Store())],
100 | value=ast.Name(id='request_body', ctx=ast.Load())
101 | )
102 |
103 | # Prepare headers
104 | header_keys = [ast.Constant(value='User-Agent')]
105 | header_values = [ast.Constant(value='Web2sdk/1.0')]
106 | if auth_type == AuthType.BASIC.value:
107 | header_keys.append(ast.Constant(value='Authorization'))
108 | header_values.append(
109 | ast.BinOp(
110 | left=ast.Constant(value='Basic '),
111 | op=ast.Add(),
112 | right=ast.Call(
113 | func=ast.Name(id='base64.b64encode', ctx=ast.Load()),
114 | args=[ast.BinOp(
115 | left=ast.BinOp(
116 | left=ast.Name(id='self.username', ctx=ast.Load()),
117 | op=ast.Add(),
118 | right=ast.Constant(value=':')
119 | ),
120 | op=ast.Add(),
121 | right=ast.Name(id='self.password', ctx=ast.Load())
122 | )],
123 | keywords=[]
124 | )
125 | )
126 | )
127 | elif auth_type == AuthType.BEARER.value:
128 | header_keys.append(ast.Constant(value='Authorization'))
129 | header_values.append(ast.BinOp(
130 | left=ast.Constant(value='Bearer '),
131 | op=ast.Add(),
132 | right=ast.Name(id='self.token', ctx=ast.Load())
133 | ))
134 |
135 | headers_assign = ast.Assign(
136 | targets=[ast.Name(id='headers', ctx=ast.Store())],
137 | value=ast.Dict(
138 | keys=header_keys,
139 | values=header_values
140 | )
141 | )
142 |
143 | # Update headers with override headers
144 | headers_update = ast.Expr(
145 | value=ast.Call(
146 | func=ast.Attribute(
147 | value=ast.Name(id='headers', ctx=ast.Load()),
148 | attr='update',
149 | ctx=ast.Load()
150 | ),
151 | args=[ast.Name(id='override_headers', ctx=ast.Load())],
152 | keywords=[]
153 | )
154 | )
155 |
156 | # Prepare the request params
157 | if request_parameters:
158 | parameter_assign = ast.Assign(
159 | targets=[ast.Name(id='params', ctx=ast.Store())],
160 | value=ast.Call(
161 | func=ast.Attribute(
162 | value=ast.Str(s="&"),
163 | attr="join",
164 | ctx=ast.Load()
165 | ),
166 | args=[ast.ListComp(
167 | elt=ast.BinOp(
168 | left=ast.BinOp(
169 | left=ast.Name(id='k', ctx=ast.Load()),
170 | op=ast.Add(),
171 | right=ast.Constant(value="=")
172 | ),
173 | op=ast.Add(),
174 | right=ast.Name(id='v', ctx=ast.Load())
175 | ),
176 | generators=[
177 | ast.comprehension(
178 | target=ast.Tuple(elts=[
179 | ast.Name(id='k', ctx=ast.Store()),
180 | ast.Name(id='v', ctx=ast.Store())], ctx=ast.Store()),
181 | iter=ast.Call(
182 | func=ast.Attribute(
183 | value=ast.Name(id='request_parameters', ctx=ast.Load()),
184 | attr='items',
185 | ctx=ast.Load()
186 | ),
187 | args=[], keywords=[]
188 | ),
189 | ifs=[], is_async=0
190 | )
191 | ]
192 | )],
193 | keywords=[]
194 | )
195 | )
196 | else:
197 | parameter_assign = ast.Assign(
198 | targets=[ast.Name(id='params', ctx=ast.Store())],
199 | value=ast.Constant(value="")
200 | )
201 |
202 | # Call the connection request
203 | # conn.request("GET", "/backend-api/conversations" + "?" + params, body=payload, headers=headers)
204 | full_url = urlparse(base_url + request_path)
205 | http_path = full_url.path + "?" if request_parameters else full_url.path
206 | conn_request = ast.Expr(
207 | value=ast.Call(
208 | func=ast.Attribute(
209 | value=ast.Name(id='conn', ctx=ast.Load()),
210 | attr='request',
211 | ctx=ast.Load()
212 | ),
213 | args=[
214 | ast.Constant(value=request_method.upper()),
215 | ast.BinOp(
216 | left=ast.BinOp(
217 | left=ast.Constant(value=http_path),
218 | op=ast.Add(),
219 | right=ast.Name(id='params', ctx=ast.Load())
220 | ),
221 | op=ast.Add(),
222 | right=ast.Constant(value="")
223 | )
224 | ],
225 | keywords=[kw for kw in [
226 | ast.keyword(arg='body', value=ast.Name(id='payload', ctx=ast.Load())) if request_schema else None,
227 | ast.keyword(arg='headers', value=ast.Name(id='headers', ctx=ast.Load()))
228 | ] if kw is not None]
229 | )
230 | )
231 |
232 | # Get the response
233 | # res = conn.getresponse()
234 | response_assign = ast.Assign(
235 | targets=[ast.Name(id='res', ctx=ast.Store())],
236 | value=ast.Call(
237 | func=ast.Attribute(
238 | value=ast.Name(id='conn', ctx=ast.Load()),
239 | attr='getresponse',
240 | ctx=ast.Load()
241 | ),
242 | args=[],
243 | keywords=[]
244 | )
245 | )
246 |
247 | # Read the response
248 | # data = res.read().decode("utf-8")
249 | data_assign = ast.Assign(
250 | targets=[ast.Name(id='data', ctx=ast.Store())],
251 | value=ast.Call(
252 | func=ast.Attribute(
253 | value=ast.Call(
254 | func=ast.Attribute(
255 | value=ast.Name(id='res', ctx=ast.Load()),
256 | attr='read',
257 | ctx=ast.Load()
258 | ),
259 | args=[],
260 | keywords=[]
261 | ),
262 | attr='decode',
263 | ctx=ast.Load()
264 | ),
265 | args=[ast.Str(s='utf-8')],
266 | keywords=[]
267 | )
268 | )
269 |
270 | # Return the decoded data
271 | return_stmt = ast.Return(
272 | value=content_type_to_ast_node(response_content_type, response_class_name)
273 | )
274 |
275 | # Construct function body with the URL and response assignments
276 | function_body = [
277 | http_conn_assign,
278 | parameter_assign,
279 | headers_assign,
280 | headers_update,
281 | payload_assign if request_schema else None,
282 | conn_request,
283 | response_assign,
284 | data_assign,
285 | return_stmt
286 | ]
287 |
288 | # Remove any None values from the function body
289 | function_body = [stmt for stmt in function_body if stmt is not None]
290 |
291 | # Create the function definition
292 | function_def = ast.FunctionDef(
293 | name=dash_to_snake(request_name),
294 | args=args,
295 | body=function_body,
296 | decorator_list=[],
297 | returns=return_annotation
298 | )
299 |
300 | return function_def
--------------------------------------------------------------------------------
/web2sdk/swagger2sdk/generate_types.py:
--------------------------------------------------------------------------------
1 | import ast
2 | from pydantic import BaseModel, ConfigDict, Field, ValidationError
3 | from abc import ABC, abstractmethod
4 | from typing import List, Optional, Dict, Any, Tuple, Type, Union
5 | from enum import Enum
6 | from web2sdk.swagger2sdk.utils import YAMLToPydanticType, check_content_type, snake_to_pascal, dash_to_snake, strip_special_chars
7 |
8 | class ClassField(BaseModel):
9 | field_name: str
10 | field_type: str
11 | required: bool
12 |
13 | def path_to_class_name(path: str) -> str:
14 | """
15 | Converts a URL path to a class name.
16 | """
17 | return strip_special_chars(snake_to_pascal(dash_to_snake(path)))
18 |
19 |
20 | def generate_class_def(class_name: str, fields: List[ClassField]) -> ast.ClassDef:
21 | """
22 | Generates a Pydantic class definition with the given class name and fields.
23 | """
24 | # Create the class definition
25 | class_def = ast.ClassDef(
26 | name=class_name,
27 | bases=[ast.Name(id='BaseModel', ctx=ast.Load())], # Inherit from Pydantic's BaseModel
28 | body=[],
29 | decorator_list=[]
30 | )
31 |
32 | # Add each field as a class attribute
33 | for field in fields:
34 | field_name, field_type, required = strip_special_chars(dash_to_snake(field.field_name)), field.field_type, field.required
35 |
36 | # If the field is not required, wrap the type in Optional
37 | if not required:
38 | field_annotation = ast.Subscript(
39 | value=ast.Name(id='Optional', ctx=ast.Load()),
40 | slice=ast.Index(value=ast.Name(id=field_type, ctx=ast.Load())),
41 | ctx=ast.Load()
42 | )
43 | else:
44 | field_annotation = ast.Name(id=field_type, ctx=ast.Load())
45 |
46 | # Add the field to the class body. If the field is not required, assign a default value of None.
47 | field_node = ast.AnnAssign(
48 | target=ast.Name(id=field_name, ctx=ast.Store()),
49 | annotation=field_annotation,
50 | value=None if required else ast.Constant(value=None),
51 | simple=True
52 | )
53 |
54 | class_def.body.append(field_node)
55 |
56 | return class_def
57 |
58 | def parse_request_body(request_body: dict) -> List[ClassField]:
59 | if not request_body:
60 | return []
61 | fields = []
62 | content: dict = request_body['content']
63 | required: bool = request_body.get('required', False)
64 |
65 | for content_type, schema in content.items():
66 | if check_content_type(content_type, ['application/json', 'application/x-www-form-urlencoded']):
67 | schema = schema['schema']
68 | schema_type = YAMLToPydanticType[schema.get('type')]
69 | if schema.get('type') == 'object':
70 | required_properties: List[str] = schema.get('required', [])
71 | properties: dict = schema.get('properties', {})
72 | for name, prop in properties.items():
73 | field_type: str = YAMLToPydanticType[prop['type']]
74 | field_required: bool = name in required_properties
75 | fields.append(ClassField(field_name=name, field_type=field_type, required=field_required))
76 | else:
77 | fields.append(ClassField(field_name='data', field_type=schema_type, required=required))
78 | return fields
79 |
80 | def parse_response_body(response_body: dict) -> List[ClassField]:
81 | if not response_body:
82 | return []
83 | fields = []
84 | content: dict = response_body['content']
85 | for content_type, schema in content.items():
86 | if content_type == 'application/json':
87 | schema = schema['schema']
88 | schema_type = schema.get('type')
89 | if schema_type == 'object':
90 | required_properties: List[str] = schema.get('required', [])
91 | properties: dict = schema.get('properties', {})
92 | for name, prop in properties.items():
93 | field_type: str = YAMLToPydanticType[prop['type']]
94 | is_required: bool = name in required_properties
95 | fields.append(ClassField(field_name=name, field_type=field_type, required=is_required))
96 | elif schema_type == 'array':
97 | item_type = schema['items'].get('type', 'unknown')
98 | fields.append(ClassField(field_name='data', field_type=f'List[{YAMLToPydanticType[item_type]}]', required=True))
99 | else:
100 | fields.append(ClassField(field_name='data', field_type=YAMLToPydanticType[schema_type], required=True))
101 | return fields
102 |
103 | def generate_types(endpoint: dict) -> Tuple[ast.ClassDef]:
104 | # Extract endpoint details
105 | request_path: str = endpoint['path']
106 | request_name: str = endpoint['name']
107 | request_method: str = endpoint['method']
108 | request_parameters: dict = endpoint['parameters']
109 | request_body: dict = endpoint['request_body']
110 | responses: dict = endpoint['responses']
111 |
112 |
113 | # Generate Pydantic class for request parameters
114 | request_parameters_class = None
115 | if request_parameters:
116 | request_parameters_fields = []
117 | for param in request_parameters:
118 | field_type = YAMLToPydanticType[param['schema']['type']]
119 | field_name = param['name']
120 | required = param.get('required', False)
121 | request_parameters_fields.append(ClassField(field_name=field_name, field_type=field_type, required=required))
122 | if len(request_parameters_fields) > 0:
123 | request_parameters_class = generate_class_def(f'{path_to_class_name(request_name)}RequestParameters', request_parameters_fields)
124 |
125 | # Generate Pydantic class for request body
126 | request_body_class = None
127 | if request_body:
128 | request_body_fields = parse_request_body(request_body)
129 | if len(request_body_fields) > 0:
130 | request_body_class = generate_class_def(f'{path_to_class_name(request_name)}RequestBody', request_body_fields)
131 |
132 | # Generate Pydantic classes for responses
133 | successful_response = responses.get('200')
134 | response_class = None
135 | if successful_response:
136 | response_fields = parse_response_body(successful_response)
137 | if len(response_fields) > 0:
138 | response_class = generate_class_def(f'{path_to_class_name(request_name)}Response', response_fields)
139 |
140 |
141 | return (request_parameters_class, request_body_class, response_class)
--------------------------------------------------------------------------------
/web2sdk/swagger2sdk/main.py:
--------------------------------------------------------------------------------
1 | import pdb
2 | import ast
3 | import astor
4 | import yaml
5 | from pydantic import BaseModel, ConfigDict, Field, ValidationError
6 | from abc import ABC, abstractmethod
7 | from typing import List, Optional, Dict, Any, Tuple, Type, Union, Callable
8 | from enum import Enum
9 | from web2sdk.swagger2sdk.generate_function import generate_function_for_endpoint
10 | from web2sdk.swagger2sdk.generate_types import generate_types, generate_class_def, ClassField
11 | from web2sdk.swagger2sdk.utils import AuthType, HTTPMethod
12 |
13 | swagger_path = '/Users/jasonfan/Documents/code/web2sdk/web2sdk/specs.yml'
14 |
15 | def load_yaml(file_path):
16 | with open(file_path, 'r') as file:
17 | return yaml.safe_load(file)
18 |
19 |
20 | def generate_sdk_class(sdk_name: str, auth_type: AuthType) -> ast.ClassDef:
21 | # SDK should accept different arguments depending on the auth type
22 | auth_arguments = []
23 | fields = [
24 | ClassField(field_name='hostname', field_type='str', required=True)
25 | ]
26 | if auth_type == AuthType.BASIC.value:
27 | fields.extend([ClassField(field_name='username', field_type='str', required=True), ClassField(field_name='password', field_type='str', required=True)])
28 | elif auth_type == AuthType.BEARER.value:
29 | fields.extend([ClassField(field_name='token', field_type='str', required=True)])
30 |
31 | class_def = generate_class_def(sdk_name, fields)
32 |
33 | return class_def
34 |
35 | def save_class_to_file(module: ast.Module, file_path: str) -> None:
36 | code = astor.to_source(module)
37 | with open(file_path, 'w') as file:
38 | file.write(code)
39 |
40 | def generate_imports() -> List[ast.Import]:
41 | imports = [
42 | ast.Import(names=[ast.alias(name='json', asname=None)]),
43 | ast.Import(names=[ast.alias(name='http.client', asname=None)]),
44 | ast.ImportFrom(module='urllib.parse', names=[ast.alias(name='urlparse', asname=None)], level=0),
45 | ast.ImportFrom(module='pydantic', names=[ast.alias(name='BaseModel', asname=None)], level=0),
46 | ast.ImportFrom(module='typing', names=[
47 | ast.alias(name='Optional', asname=None),
48 | ast.alias(name='Dict', asname=None),
49 | ast.alias(name='List', asname=None),
50 | ast.alias(name='Any', asname=None)], level=0),
51 | ]
52 | return imports
53 |
54 | def construct_sdk(swagger_path: str,
55 | sdk_name: str,
56 | output_path: str,
57 | base_url: str = None,
58 | auth_type: AuthType = AuthType.NONE,
59 | progress_callback: Callable[[float], None] = None) -> None:
60 | swagger = load_yaml(swagger_path)
61 | base_url = swagger.get('servers', [{}])[0].get('url') if not base_url else base_url
62 | if not base_url:
63 | raise ValueError('Base URL is required, but was not provided in the OpenAPI spec or as an argument.')
64 |
65 | paths = swagger.get('paths', {})
66 | imports = generate_imports()
67 | class_def = generate_sdk_class(sdk_name, auth_type)
68 | types: List[ast.ClassDef] = []
69 |
70 | # Iterate through each path and method. Generate functions to call each endpoint, and types to validate request/response bodies
71 | for index, (path, methods) in enumerate(paths.items()):
72 | for method, details in methods.items():
73 | endpoint = {
74 | 'path': path,
75 | 'method': method,
76 | 'name': f"{method.lower()}{path.replace('/', '_').replace('{', '').replace('}', '')}",
77 | 'parameters': details.get('parameters', None),
78 | 'request_body': details.get('requestBody', None),
79 | 'responses': details.get('responses', None)
80 | }
81 | _types = generate_types(endpoint)
82 | _function = generate_function_for_endpoint(endpoint, base_url, auth_type, _types)
83 | class_def.body.append(_function)
84 | types.extend([t for t in _types if t is not None])
85 | if progress_callback:
86 | progress_callback(float(index+1) / len(paths))
87 |
88 | # Combine the imports, the SDK class, and generated types into a single module
89 | body = imports + types + [class_def]
90 | class_module = ast.Module(body=body, type_ignores=[])
91 | save_class_to_file(class_module, f'{output_path}/{sdk_name}.py')
--------------------------------------------------------------------------------
/web2sdk/swagger2sdk/utils.py:
--------------------------------------------------------------------------------
1 | import re
2 | from pydantic import BaseModel, ConfigDict, Field, ValidationError
3 | from abc import ABC, abstractmethod
4 | from typing import List, Optional, Dict, Any, Tuple, Type, Union
5 | from enum import Enum
6 |
7 | class AuthType(Enum):
8 | BASIC = 'basic'
9 | BEARER = 'bearer'
10 | NONE = 'none'
11 |
12 | class HTTPMethod(Enum):
13 | GET = 'GET'
14 | POST = 'POST'
15 | PUT = 'PUT'
16 | PATCH = 'PATCH'
17 | DELETE = 'DELETE'
18 |
19 | YAMLToPydanticType = {
20 | 'string': 'str',
21 | 'number': 'float',
22 | 'integer': 'int',
23 | 'boolean': 'bool',
24 | 'array': 'List',
25 | 'object': 'Dict',
26 | 'unknown': 'Any'
27 | }
28 |
29 | def check_content_type(input_string: str, patterns: List[str]):
30 | regex_pattern = "|".join(re.escape(pattern) for pattern in patterns)
31 | if re.search(regex_pattern, input_string):
32 | return True
33 | return False
34 |
35 | def snake_to_pascal(snake_str: str) -> str:
36 | components = snake_str.split('_')
37 | return ''.join(x.capitalize() for x in components)
38 |
39 | def dash_to_snake(dash_str: str) -> str:
40 | return dash_str.replace('-', '_')
41 |
42 | def strip_special_chars(input_string: str) -> str:
43 | return re.sub(r'[^\w_]+', '', input_string)
--------------------------------------------------------------------------------
/web2sdk/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jasonwcfan/web2sdk/10e0633ed25056304addef8e43cfd5668f121050/web2sdk/tests/__init__.py
--------------------------------------------------------------------------------
/web2sdk/web2swagger/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jasonwcfan/web2sdk/10e0633ed25056304addef8e43cfd5668f121050/web2sdk/web2swagger/.DS_Store
--------------------------------------------------------------------------------
/web2sdk/web2swagger/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jasonwcfan/web2sdk/10e0633ed25056304addef8e43cfd5668f121050/web2sdk/web2swagger/__init__.py
--------------------------------------------------------------------------------
/web2sdk/web2swagger/har_capture_reader.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import os
3 | from base64 import b64decode
4 | from typing import Iterator, Union
5 |
6 | import json_stream
7 |
8 |
9 | # a heuristic to determine if a file is a har archive
10 | def har_archive_heuristic(file_path: str) -> int:
11 | val = 0
12 | # if has the har extension
13 | if file_path.endswith(".har"):
14 | val += 25
15 | # read the first 2048 bytes
16 | with open(file_path, "rb") as f:
17 | data = f.read(2048)
18 | # if file contains only ascii characters after remove EOL characters
19 | if (
20 | data.decode("utf-8", "ignore")
21 | .replace("\r", "")
22 | .replace("\n", "")
23 | .isprintable()
24 | is True
25 | ):
26 | val += 25
27 | # sign of a JSON file
28 | if data[0:1] == b"{":
29 | val += 23
30 | # sign of Chrome OR Firefox export
31 | if b'"WebInspector"' in data or b'"Firefox"' in data:
32 | val += 15
33 | if b'"entries"' in data:
34 | val += 15
35 | if b'"version"' in data:
36 | val += 15
37 | return val
38 |
39 |
40 | class HarFlowWrapper:
41 | def __init__(self, flow: dict):
42 | self.flow = flow
43 |
44 | def get_url(self):
45 | return self.flow["request"]["url"]
46 |
47 | def get_matching_url(self, prefix) -> Union[str, None]:
48 | """Get the requests URL if the prefix matches the URL, None otherwise."""
49 | if self.flow["request"]["url"].startswith(prefix):
50 | return self.flow["request"]["url"]
51 | return None
52 |
53 | def get_method(self):
54 | return self.flow["request"]["method"]
55 |
56 | def get_request_headers(self):
57 | headers = {}
58 | for kv in self.flow["request"]["headers"]:
59 | k = kv["name"]
60 | v = kv["value"]
61 | # create list on key if it does not exist
62 | headers[k] = headers.get(k, [])
63 | headers[k].append(v)
64 |
65 | def get_request_body(self):
66 | if (
67 | "request" in self.flow
68 | and "postData" in self.flow["request"]
69 | and "text" in self.flow["request"]["postData"]
70 | ):
71 | return self.flow["request"]["postData"]["text"]
72 | return None
73 |
74 | def get_response_status_code(self):
75 | return self.flow["response"]["status"]
76 |
77 | def get_response_reason(self):
78 | return self.flow["response"]["statusText"]
79 |
80 | def get_response_headers(self):
81 | headers = {}
82 | for kv in self.flow["response"]["headers"]:
83 | k = kv["name"]
84 | v = kv["value"]
85 | # create list on key if it does not exist
86 | headers[k] = headers.get(k, [])
87 | headers[k].append(v)
88 | return headers
89 |
90 | def get_response_body(self):
91 | if (
92 | "response" in self.flow
93 | and "content" in self.flow["response"]
94 | and "text" in self.flow["response"]["content"]
95 | ):
96 | try:
97 | if (
98 | "encoding" in self.flow["response"]["content"]
99 | and self.flow["response"]["content"]["encoding"] == "base64"
100 | ):
101 | return b64decode(self.flow["response"]["content"]["text"]).decode()
102 | except UnicodeDecodeError:
103 | return None
104 | return self.flow["response"]["content"]["text"]
105 | return None
106 |
107 |
108 | class HarCaptureReader:
109 | def __init__(self, file_path: str, progress_callback=None):
110 | self.file_path = file_path
111 | self.progress_callback = progress_callback
112 |
113 | def captured_requests(self) -> Iterator[HarFlowWrapper]:
114 | har_file_size = os.path.getsize(self.file_path)
115 | with open(self.file_path, "r", encoding="utf-8") as f:
116 | data = json_stream.load(f)
117 | for entry in data["log"]["entries"].persistent():
118 | if self.progress_callback:
119 | self.progress_callback(f.tell() / har_file_size)
120 | yield HarFlowWrapper(entry)
121 |
122 | def name(self):
123 | return "har"
124 |
--------------------------------------------------------------------------------
/web2sdk/web2swagger/main.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | """Converts a mitmproxy dump file to a swagger schema."""
4 | import argparse
5 | import json
6 | import os
7 | import re
8 | import sys
9 | import traceback
10 | import urllib
11 | from typing import Any, Optional, Sequence, Union
12 |
13 | import msgpack
14 | import ruamel.yaml
15 | from mitmproxy.exceptions import FlowReadException
16 |
17 | from web2sdk import console_util
18 | from web2sdk.web2swagger import swagger_util
19 | from web2sdk.web2swagger.har_capture_reader import HarCaptureReader, har_archive_heuristic
20 | from web2sdk.web2swagger.mitmproxy_capture_reader import (
21 | MitmproxyCaptureReader,
22 | mitmproxy_dump_file_huristic,
23 | )
24 |
25 |
26 | def path_to_regex(path):
27 | # replace the path template with a regex
28 | path = re.escape(path)
29 | path = path.replace(r"\{", "(?P<")
30 | path = path.replace(r"\}", ">[^/]+)")
31 | path = path.replace(r"\*", ".*")
32 | return "^" + path + "$"
33 |
34 |
35 | def strip_query_string(path):
36 | # remove the query string from the path
37 | return path.split("?")[0]
38 |
39 |
40 | def set_key_if_not_exists(dict, key, value):
41 | if key not in dict:
42 | dict[key] = value
43 |
44 |
45 | def progress_callback(progress):
46 | console_util.print_progress_bar(progress, "Generating OpenAPI Schema...")
47 |
48 |
49 | def detect_input_format(file_path):
50 | har_score = har_archive_heuristic(file_path)
51 | mitmproxy_score = mitmproxy_dump_file_huristic(file_path)
52 | if "MITMPROXY2SWAGGER_DEBUG" in os.environ:
53 | print("har score: " + str(har_score))
54 | print("mitmproxy score: " + str(mitmproxy_score))
55 | if har_score > mitmproxy_score:
56 | return HarCaptureReader(file_path, progress_callback)
57 | return MitmproxyCaptureReader(file_path, progress_callback)
58 |
59 | def main(sdk_name: str, override_args: Optional[Sequence[str]] = None):
60 | parser = argparse.ArgumentParser(
61 | description="Converts a mitmproxy dump file or HAR to a swagger schema."
62 | )
63 | parser.add_argument(
64 | "-i",
65 | "--input",
66 | help="The input mitmproxy dump file or HAR dump file (from DevTools)",
67 | required=True,
68 | )
69 | parser.add_argument(
70 | "-o",
71 | "--output",
72 | help="The output swagger schema file (yaml). If it exists, new endpoints will be added",
73 | required=True,
74 | )
75 | parser.add_argument("-p", "--api-prefix", help="The api prefix", required=True)
76 | parser.add_argument(
77 | "-e",
78 | "--examples",
79 | action="store_true",
80 | help="Include examples in the schema. This might expose sensitive information.",
81 | )
82 | parser.add_argument(
83 | "-hd",
84 | "--headers",
85 | action="store_true",
86 | help="Include headers in the schema. This might expose sensitive information.",
87 | )
88 | parser.add_argument(
89 | "-f",
90 | "--format",
91 | choices=["flow", "har"],
92 | help="Override the input file format auto-detection.",
93 | )
94 | parser.add_argument(
95 | "-r",
96 | "--param-regex",
97 | default="[0-9]+",
98 | help="Regex to match parameters in the API paths. Path segments that match this regex will be turned into parameter placeholders.",
99 | )
100 | parser.add_argument(
101 | "-s",
102 | "--suppress-params",
103 | action="store_true",
104 | help="Do not include API paths that have the original parameter values, only the ones with placeholders.",
105 | )
106 | args = parser.parse_args(override_args)
107 | try:
108 | args.param_regex = re.compile("^" + args.param_regex + "$")
109 | except re.error as e:
110 | print(
111 | f"{console_util.ANSI_RED}Invalid path parameter regex: {e}{console_util.ANSI_RESET}"
112 | )
113 | sys.exit(1)
114 |
115 | yaml = ruamel.yaml.YAML()
116 |
117 | capture_reader: Union[MitmproxyCaptureReader, HarCaptureReader]
118 | if args.format == "flow" or args.format == "mitmproxy":
119 | capture_reader = MitmproxyCaptureReader(args.input, progress_callback)
120 | elif args.format == "har":
121 | capture_reader = HarCaptureReader(args.input, progress_callback)
122 | else:
123 | capture_reader = detect_input_format(args.input)
124 |
125 | swagger = None
126 |
127 | # try loading the existing swagger file
128 | try:
129 | base_dir = os.getcwd()
130 | relative_path = args.output
131 | abs_path = os.path.join(base_dir, relative_path)
132 | with open(abs_path, "r") as f:
133 | swagger = yaml.load(f)
134 | except FileNotFoundError:
135 | print("No existing OpenAPI file found. Creating new one.")
136 | if swagger is None:
137 | swagger = ruamel.yaml.comments.CommentedMap(
138 | {
139 | "openapi": "3.0.0",
140 | "info": {
141 | "title ": args.input + sdk_name,
142 | "version": "1.0.0",
143 | },
144 | }
145 | )
146 | # strip the trailing slash from the api prefix
147 | args.api_prefix = args.api_prefix.rstrip("/")
148 |
149 | if "servers" not in swagger or swagger["servers"] is None:
150 | swagger["servers"] = []
151 |
152 | # add the server if it doesn't exist
153 | if not any(server["url"] == args.api_prefix for server in swagger["servers"]):
154 | swagger["servers"].append(
155 | {"url": args.api_prefix, "description": "The default server"}
156 | )
157 |
158 | if "paths" not in swagger or swagger["paths"] is None:
159 | swagger["paths"] = {}
160 |
161 | # Add the component/securitySchemes section if it doesn't exist
162 | if "components" not in swagger or swagger["components"] is None:
163 | swagger["components"] = {}
164 |
165 | # add existing path templates
166 | path_templates = []
167 | for path in swagger["paths"]:
168 | path_templates.append(path)
169 |
170 | path_template_regexes = [re.compile(path_to_regex(path)) for path in path_templates]
171 |
172 | try:
173 | for req in capture_reader.captured_requests():
174 | # strip the api prefix from the url
175 | url = req.get_matching_url(args.api_prefix)
176 |
177 | if url is None:
178 | continue
179 | method = req.get_method().lower()
180 | path = strip_query_string(url).removeprefix(args.api_prefix)
181 | status = req.get_response_status_code()
182 |
183 | # check if the path matches any of the path templates, and save the index
184 | path_template_index = None
185 | for i, path_template_regex in enumerate(path_template_regexes):
186 | if path_template_regex.match(path):
187 | path_template_index = i
188 | break
189 | if path_template_index is None:
190 | path_template_to_set = path
191 | else:
192 | path_template_to_set = path_templates[path_template_index]
193 |
194 | set_key_if_not_exists(swagger["paths"], path_template_to_set, {})
195 |
196 | set_key_if_not_exists(
197 | swagger["paths"][path_template_to_set],
198 | method,
199 | {
200 | "summary": swagger_util.path_template_to_endpoint_name(
201 | method, path_template_to_set
202 | ),
203 | "responses": {},
204 | },
205 | )
206 |
207 | params = swagger_util.url_to_params(url, path_template_to_set)
208 | if args.headers:
209 | headers_request = swagger_util.request_to_headers(
210 | req.get_request_headers()
211 | )
212 | if headers_request is not None and len(headers_request) > 0:
213 | set_key_if_not_exists(
214 | swagger["paths"][path_template_to_set][method],
215 | "parameters",
216 | headers_request,
217 | )
218 | if params is not None and len(params) > 0:
219 | set_key_if_not_exists(
220 | swagger["paths"][path_template_to_set][method], "parameters", params
221 | )
222 |
223 | if method not in ["get", "head"]:
224 | body = req.get_request_body()
225 | if body is not None:
226 | body_val = None
227 | content_type = None
228 | # try to parse the body as json
229 | try:
230 | body_val = json.loads(req.get_request_body())
231 | content_type = "application/json"
232 | except UnicodeDecodeError:
233 | pass
234 | except json.decoder.JSONDecodeError:
235 | pass
236 |
237 | # try to parse the body as msgpack, if it's not json
238 | if body_val is None:
239 | try:
240 | body_val = msgpack.loads(req.get_request_body())
241 | content_type = "application/msgpack"
242 | except Exception:
243 | pass
244 |
245 | if content_type is None:
246 | # try to parse the body as form data
247 | try:
248 | body_val_bytes: Any = dict(
249 | urllib.parse.parse_qsl(
250 | body, encoding="utf-8", keep_blank_values=True
251 | )
252 | )
253 | body_val = {}
254 | did_find_anything = False
255 | for key, value in body_val_bytes.items():
256 | did_find_anything = True
257 | body_val[key.decode("utf-8")] = value.decode("utf-8")
258 | if did_find_anything:
259 | content_type = "application/x-www-form-urlencoded"
260 | else:
261 | body_val = None
262 | except UnicodeDecodeError:
263 | pass
264 |
265 | if body_val is not None:
266 | content_to_set = {
267 | "content": {
268 | content_type: {
269 | "schema": swagger_util.value_to_schema(body_val)
270 | }
271 | }
272 | }
273 | if args.examples:
274 | content_to_set["content"][content_type][
275 | "example"
276 | ] = swagger_util.limit_example_size(body_val)
277 | set_key_if_not_exists(
278 | swagger["paths"][path_template_to_set][method],
279 | "requestBody",
280 | content_to_set,
281 | )
282 |
283 | response_body = req.get_response_body()
284 | if response_body is not None:
285 | # try parsing the response as json
286 | try:
287 | response_parsed = json.loads(response_body)
288 | response_content_type = "application/json"
289 | except UnicodeDecodeError:
290 | response_parsed = None
291 | except json.decoder.JSONDecodeError:
292 | response_parsed = None
293 |
294 | if response_parsed is None:
295 | # try parsing the response as msgpack, if it's not json
296 | try:
297 | response_parsed = msgpack.loads(response_body)
298 | response_content_type = "application/msgpack"
299 | except Exception:
300 | response_parsed = None
301 |
302 | if response_parsed is None:
303 | # try parsing the response as text
304 | if type(response_body) is str:
305 | response_parsed = response_body
306 | else:
307 | response_parsed = response_body.decode("utf-8", "ignore")
308 | response_content_type = req.get_response_headers().get("content-type")
309 | if type(response_content_type) is list:
310 | response_content_type = response_content_type[0]
311 | elif response_content_type is None:
312 | response_content_type = "text/plain"
313 |
314 | if response_parsed is not None:
315 | resp_data_to_set = {
316 | "description": req.get_response_reason(),
317 | "content": {
318 | response_content_type: {
319 | "schema": swagger_util.value_to_schema(response_parsed)
320 | }
321 | },
322 | }
323 | if args.examples:
324 | resp_data_to_set["content"][response_content_type][
325 | "example"
326 | ] = swagger_util.limit_example_size(response_parsed)
327 | if args.headers:
328 | resp_data_to_set["headers"] = swagger_util.response_to_headers(
329 | req.get_response_headers()
330 | )
331 |
332 | set_key_if_not_exists(
333 | swagger["paths"][path_template_to_set][method]["responses"],
334 | str(status),
335 | resp_data_to_set,
336 | )
337 |
338 | if (
339 | "responses" in swagger["paths"][path_template_to_set][method]
340 | and len(swagger["paths"][path_template_to_set][method]["responses"])
341 | == 0
342 | ):
343 | # add a default response if there were no responses detected,
344 | # this is for compliance with the OpenAPI spec
345 | content_type = (
346 | req.get_response_headers().get("content-type") or "text/plain"
347 | )
348 |
349 | swagger["paths"][path_template_to_set][method]["responses"]["200"] = {
350 | "description": "OK",
351 | "content": {},
352 | }
353 |
354 | except FlowReadException as e:
355 | print(f"Flow file corrupted: {e}")
356 | traceback.print_exception(*sys.exc_info())
357 | print(
358 | f"{console_util.ANSI_RED}Failed to parse the input file as '{capture_reader.name()}'. "
359 | )
360 | if not args.format:
361 | print(
362 | f"It might happen that the input format as incorrectly detected. Please try using '--format flow' or '--format har' to specify the input format.{console_util.ANSI_RESET}"
363 | )
364 | sys.exit(1)
365 | except ValueError as e:
366 | print(f"ValueError: {e}")
367 | # print stack trace
368 | traceback.print_exception(*sys.exc_info())
369 | print(
370 | f"{console_util.ANSI_RED}Failed to parse the input file as '{capture_reader.name()}'. "
371 | )
372 | if not args.format:
373 | print(
374 | f"It might happen that the input format as incorrectly detected. Please try using '--format flow' or '--format har' to specify the input format.{console_util.ANSI_RESET}"
375 | )
376 | sys.exit(1)
377 |
378 | # save the swagger file
379 | with open(args.output, "w") as f:
380 | yaml.dump(swagger, f)
381 | print(" Done!")
382 |
383 |
384 | if __name__ == "__main__":
385 | main()
386 |
--------------------------------------------------------------------------------
/web2sdk/web2swagger/mitmproxy_capture_reader.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import os
3 | import typing
4 | from typing import Iterator
5 | from urllib.parse import urlparse
6 |
7 | from mitmproxy import http
8 | from mitmproxy import io as iom
9 | from mitmproxy.exceptions import FlowReadException
10 |
11 |
12 | def mitmproxy_dump_file_huristic(file_path: str) -> int:
13 | val = 0
14 | if "flow" in file_path:
15 | val += 1
16 | if "mitmproxy" in file_path:
17 | val += 1
18 | # read the first 2048 bytes
19 | with open(file_path, "rb") as f:
20 | data = f.read(2048)
21 | # if file contains non-ascii characters after remove EOL characters
22 | if (
23 | data.decode("utf-8", "ignore")
24 | .replace("\r", "")
25 | .replace("\n", "")
26 | .isprintable()
27 | is False
28 | ):
29 | val += 50
30 | # if first character of the byte array is a digit
31 | if data[0:1].decode("utf-8", "ignore").isdigit() is True:
32 | val += 5
33 | # if it contains the word status_code
34 | if b"status_code" in data:
35 | val += 5
36 | if b"regular" in data:
37 | val += 10
38 | return val
39 |
40 |
41 | class MitmproxyFlowWrapper:
42 | def __init__(self, flow: http.HTTPFlow):
43 | self.flow = flow
44 |
45 | def get_url(self) -> str:
46 | return self.flow.request.url
47 |
48 | def get_matching_url(self, prefix) -> typing.Union[str, None]:
49 | """Get the requests URL if the prefix matches the URL, None otherwise.
50 |
51 | This takes into account a quirk of mitmproxy where it sometimes
52 | puts the raw IP address in the URL instead of the hostname. Then
53 | the hostname is in the Host header.
54 | """
55 | if self.flow.request.url.startswith(prefix):
56 | return self.flow.request.url
57 | # All the stuff where the real hostname could be
58 | replacement_hostnames = [
59 | self.flow.request.headers.get("Host", ""),
60 | self.flow.request.host_header,
61 | self.flow.request.host,
62 | ]
63 | for replacement_hostname in replacement_hostnames:
64 | if replacement_hostname is not None and replacement_hostname != "":
65 | fixed_url = (
66 | urlparse(self.flow.request.url)
67 | ._replace(netloc=replacement_hostname)
68 | .geturl()
69 | )
70 | if fixed_url.startswith(prefix):
71 | return fixed_url
72 | return None
73 |
74 | def get_method(self) -> str:
75 | return self.flow.request.method
76 |
77 | def get_request_headers(self) -> dict[str, typing.List[str]]:
78 | headers: dict[str, typing.List[str]] = {}
79 | for k, v in self.flow.request.headers.items(multi=True):
80 | # create list on key if it does not exist
81 | headers[k] = headers.get(k, [])
82 | headers[k].append(v)
83 | return headers
84 |
85 | def get_request_body(self):
86 | return self.flow.request.content
87 |
88 | def get_response_status_code(self):
89 | return self.flow.response.status_code
90 |
91 | def get_response_reason(self):
92 | return self.flow.response.reason
93 |
94 | def get_response_headers(self):
95 | headers = {}
96 | for k, v in self.flow.response.headers.items(multi=True):
97 | # create list on key if it does not exist
98 | headers[k] = headers.get(k, [])
99 | headers[k].append(v)
100 | return headers
101 |
102 | def get_response_body(self):
103 | return self.flow.response.content
104 |
105 |
106 | class MitmproxyCaptureReader:
107 | def __init__(self, file_path, progress_callback=None):
108 | self.file_path = file_path
109 | self.progress_callback = progress_callback
110 |
111 | def captured_requests(self) -> Iterator[MitmproxyFlowWrapper]:
112 | with open(self.file_path, "rb") as logfile:
113 | logfile_size = os.path.getsize(self.file_path)
114 | freader = iom.FlowReader(logfile)
115 | try:
116 | for f in freader.stream():
117 | if self.progress_callback:
118 | self.progress_callback(logfile.tell() / logfile_size)
119 | if isinstance(f, http.HTTPFlow):
120 | if f.response is None:
121 | print(
122 | "[warn] flow without response: {}".format(f.request.url)
123 | )
124 | continue
125 | yield MitmproxyFlowWrapper(f)
126 | except FlowReadException as e:
127 | print(f"Flow file corrupted: {e}")
128 |
129 | def name(self):
130 | return "flow"
131 |
--------------------------------------------------------------------------------
/web2sdk/web2swagger/swagger_util.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import urllib
3 | import uuid
4 | from typing import Any, List
5 |
6 | VERBS = [
7 | "add",
8 | "create",
9 | "delete",
10 | "get",
11 | "attach",
12 | "detach",
13 | "update",
14 | "push",
15 | "extendedcreate",
16 | "activate",
17 | ]
18 |
19 |
20 | # generate a name for the endpoint from the path template
21 | # POST /api/v1/things/{id}/create -> POST create thing by id
22 | def path_template_to_endpoint_name(method, path_template):
23 | path_template = path_template.strip("/")
24 | segments = path_template.split("/")
25 | # remove params to a separate array
26 | params = []
27 | for idx, segment in enumerate(segments):
28 | if segment.startswith("{") and segment.endswith("}"):
29 | params.append(segment)
30 | segments[idx] = "{}"
31 | # remove them from the segments
32 | segments = [segment for segment in segments if segment != "{}"]
33 | # reverse the segments
34 | segments.reverse()
35 | name_parts = []
36 | for segment in segments:
37 | if segment in VERBS:
38 | # prepend to the name_parts
39 | name_parts.insert(0, segment.lower())
40 | else:
41 | name_parts.insert(0, segment.lower())
42 | break
43 | for param in params:
44 | name_parts.append("by " + param.replace("{", "").replace("}", ""))
45 | break
46 | return method.upper() + " " + " ".join(name_parts)
47 |
48 |
49 | # when given an url and its path template, generates the parameters section of the request
50 | def url_to_params(url, path_template):
51 | path_template = path_template.strip("/")
52 | segments = path_template.split("/")
53 | url_segments = url.split("?")[0].strip("/").split("/")
54 | params = []
55 | for idx, segment in enumerate(segments):
56 | if segment.startswith("{") and segment.endswith("}"):
57 | params.append(
58 | {
59 | "name": segment.replace("{", "").replace("}", ""),
60 | "in": "path",
61 | "required": True,
62 | "schema": {
63 | "type": "number" if url_segments[idx].isdigit() else "string"
64 | },
65 | }
66 | )
67 | query_string = urllib.parse.urlparse(url).query
68 | if query_string:
69 | query_params = urllib.parse.parse_qs(query_string)
70 | for key in query_params:
71 | params.append(
72 | {
73 | "name": key,
74 | "in": "query",
75 | "required": False,
76 | "schema": {
77 | "type": "number" if query_params[key][0].isdigit() else "string"
78 | },
79 | }
80 | )
81 | return params
82 |
83 |
84 | def request_to_headers(headers: dict[str, List[Any]], add_example: bool = False):
85 | """When given an url and its path template, generates the parameters section of the
86 | request."""
87 | params = []
88 | if headers:
89 | for key in headers:
90 | h = {
91 | "name": key,
92 | "in": "header",
93 | "required": False,
94 | "schema": {"type": "number" if headers[key][0].isdigit() else "string"},
95 | }
96 | if add_example:
97 | h["example"] = headers[key][0]
98 | params.append(h)
99 | return params
100 |
101 |
102 | def response_to_headers(headers):
103 | header = {}
104 | if headers:
105 | for key in headers:
106 | header[key] = {
107 | "description": headers[key][0],
108 | "schema": {"type": "number" if headers[key][0].isdigit() else "string"},
109 | }
110 | return header
111 |
112 |
113 | def value_to_schema(value):
114 | # check if value is a number
115 | if type(value) is int or type(value) is float:
116 | return {"type": "number"}
117 | # check if value is a boolean
118 | elif isinstance(value, bool):
119 | return {"type": "boolean"}
120 | # check if value is a string
121 | elif isinstance(value, str):
122 | return {"type": "string"}
123 | # check if value is a list
124 | elif isinstance(value, list):
125 | if len(value) == 0:
126 | return {"type": "array", "items": {}}
127 |
128 | return {"type": "array", "items": value_to_schema(value[0])}
129 | # check if value is a dict
130 | elif isinstance(value, dict):
131 | all_keys_are_numeric = all(is_numeric_string(key) for key in value)
132 | all_keys_are_uuid = all(is_uuid(key) for key in value)
133 | keys_are_generic = all_keys_are_numeric or all_keys_are_uuid
134 |
135 | if keys_are_generic and len(value) > 0:
136 | return {
137 | "type": "object",
138 | "additionalProperties": value_to_schema(list(value.values())[0]),
139 | }
140 | return {
141 | "type": "object",
142 | "properties": {key: value_to_schema(value[key]) for key in value},
143 | }
144 | # if it is none, return null
145 | elif value is None:
146 | return {"type": "object", "nullable": True}
147 |
148 |
149 | def is_uuid(key):
150 | return isinstance(key, str) and is_valid_uuid(key)
151 |
152 |
153 | def is_numeric_string(key):
154 | return isinstance(key, str) and key.isnumeric()
155 |
156 |
157 | def is_valid_uuid(val):
158 | try:
159 | uuid.UUID(str(val))
160 | return True
161 | except ValueError:
162 | return False
163 |
164 |
165 | MAX_EXAMPLE_ARRAY_ELEMENTS = 10
166 | MAX_EXAMPLE_OBJECT_PROPERTIES = 150
167 |
168 |
169 | # recursively scan an example value and limit the number of elements and properties
170 | def limit_example_size(example):
171 | if isinstance(example, list):
172 | new_list = []
173 | for element in example:
174 | if len(new_list) >= MAX_EXAMPLE_ARRAY_ELEMENTS:
175 | break
176 | new_list.append(limit_example_size(element))
177 | return new_list
178 | elif isinstance(example, dict):
179 | new_dict = {}
180 | for key in example:
181 | if len(new_dict) >= MAX_EXAMPLE_OBJECT_PROPERTIES:
182 | break
183 | new_dict[key] = limit_example_size(example[key])
184 | return new_dict
185 | else:
186 | return example
187 |
--------------------------------------------------------------------------------