├── .gitignore
├── LICENSE
├── README.md
├── poetry.lock
├── pyproject.toml
└── web2sdk
    ├── __init__.py
    ├── console_util.py
    ├── main.py
    ├── swagger2sdk
        ├── __init__.py
        ├── generate_function.py
        ├── generate_types.py
        ├── main.py
        └── utils.py
    ├── tests
        └── __init__.py
    └── web2swagger
        ├── .DS_Store
        ├── __init__.py
        ├── har_capture_reader.py
        ├── main.py
        ├── mitmproxy_capture_reader.py
        └── swagger_util.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | .DS_Store
  6 | 
  7 | # Request logs and generated files
  8 | */generated/
  9 | *.har
 10 | 
 11 | # C extensions
 12 | *.so
 13 | 
 14 | # Distribution / packaging
 15 | .Python
 16 | build/
 17 | develop-eggs/
 18 | dist/
 19 | downloads/
 20 | eggs/
 21 | .eggs/
 22 | lib/
 23 | lib64/
 24 | parts/
 25 | sdist/
 26 | var/
 27 | wheels/
 28 | share/python-wheels/
 29 | *.egg-info/
 30 | .installed.cfg
 31 | *.egg
 32 | MANIFEST
 33 | 
 34 | # PyInstaller
 35 | #  Usually these files are written by a python script from a template
 36 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 37 | *.manifest
 38 | *.spec
 39 | 
 40 | # Installer logs
 41 | pip-log.txt
 42 | pip-delete-this-directory.txt
 43 | 
 44 | # Unit test / coverage reports
 45 | htmlcov/
 46 | .tox/
 47 | .nox/
 48 | .coverage
 49 | .coverage.*
 50 | .cache
 51 | nosetests.xml
 52 | coverage.xml
 53 | *.cover
 54 | *.py,cover
 55 | .hypothesis/
 56 | .pytest_cache/
 57 | cover/
 58 | 
 59 | # Translations
 60 | *.mo
 61 | *.pot
 62 | 
 63 | # Django stuff:
 64 | *.log
 65 | local_settings.py
 66 | db.sqlite3
 67 | db.sqlite3-journal
 68 | 
 69 | # Flask stuff:
 70 | instance/
 71 | .webassets-cache
 72 | 
 73 | # Scrapy stuff:
 74 | .scrapy
 75 | 
 76 | # Sphinx documentation
 77 | docs/_build/
 78 | 
 79 | # PyBuilder
 80 | .pybuilder/
 81 | target/
 82 | 
 83 | # Jupyter Notebook
 84 | .ipynb_checkpoints
 85 | 
 86 | # IPython
 87 | profile_default/
 88 | ipython_config.py
 89 | 
 90 | # pyenv
 91 | #   For a library or package, you might want to ignore these files since the code is
 92 | #   intended to run in multiple environments; otherwise, check them in:
 93 | # .python-version
 94 | 
 95 | # pipenv
 96 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 97 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 98 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 99 | #   install all needed dependencies.
100 | #Pipfile.lock
101 | 
102 | # poetry
103 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
104 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
105 | #   commonly ignored for libraries.
106 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
107 | #poetry.lock
108 | 
109 | # pdm
110 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
111 | #pdm.lock
112 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
113 | #   in version control.
114 | #   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
115 | .pdm.toml
116 | .pdm-python
117 | .pdm-build/
118 | 
119 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
120 | __pypackages__/
121 | 
122 | # Celery stuff
123 | celerybeat-schedule
124 | celerybeat.pid
125 | 
126 | # SageMath parsed files
127 | *.sage.py
128 | 
129 | # Environments
130 | .env
131 | .venv
132 | env/
133 | venv/
134 | ENV/
135 | env.bak/
136 | venv.bak/
137 | 
138 | # Spyder project settings
139 | .spyderproject
140 | .spyproject
141 | 
142 | # Rope project settings
143 | .ropeproject
144 | 
145 | # mkdocs documentation
146 | /site
147 | 
148 | # mypy
149 | .mypy_cache/
150 | .dmypy.json
151 | dmypy.json
152 | 
153 | # Pyre type checker
154 | .pyre/
155 | 
156 | # pytype static type analyzer
157 | .pytype/
158 | 
159 | # Cython debug symbols
160 | cython_debug/
161 | 
162 | # PyCharm
163 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
164 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
165 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
166 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
167 | #.idea/
168 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 Jason Fan
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | <h2 align="center">
  2 | 🦊 web2sdk
  3 | </h2>
  4 | 
  5 | <p align="center">
  6 |   <p align="center">Automatically turn third party APIs into Python SDKs</p>
  7 | </p>
  8 | <p align="center">
  9 | <a href="https://github.com/jasonwcfan/web2sdk/blob/main/LICENSE" target="_blank">
 10 |     <img src="https://img.shields.io/static/v1?label=license&message=MIT&color=blue" alt="License">
 11 | </a>
 12 | <a href="https://github.com/jasonwcfan/web2sdk/issues?q=is%3Aissue+is%3Aclosed" target="_blank">
 13 |     <img src="https://img.shields.io/github/issues-closed/jasonwcfan/web2sdk?color=blue" alt="Issues">
 14 | </a>
 15 | </p>
 16 | 
 17 | Web2sdk is a set of tools for reverse engineering APIs by intercepting network requests. It processes HAR files exported from Chrome devtools into an OpenAPI schema, then automatically generates a python SDK based on the schema. Each method in the python SDK corresponds to an endpoint, and includes strongly typed arguments, requests, and responses.
 18 | 
 19 | https://github.com/user-attachments/assets/5a7f477d-76ab-46f2-9884-62dfc9f2715b
 20 | 
 21 | 
 22 | ### Features
 23 | - Generates an OpenAPI/Swagger yaml schema from any web-based flow
 24 | - Automatically merges requests to the same endpoint
 25 | - Generates pydantic classes based on OpenAPI request and response schemas
 26 | - Supports `basic` and `bearer` auth schemes
 27 | - Supports overriding default headers
 28 | 
 29 | ### Example output
 30 | ```python
 31 | import json
 32 | import http.client
 33 | from urllib.parse import urlparse
 34 | from pydantic import BaseModel
 35 | from typing import Optional, Dict, List, Any
 36 | 
 37 | class GetConversationsRequestParameters(BaseModel):
 38 |   offset: Optional[float] = None
 39 |   limit: Optional[float] = None
 40 |   order: Optional[str] = None
 41 | 
 42 | class GetConversationsResponse(BaseModel):
 43 |   items: Optional[List] = None
 44 |   total: Optional[float] = None
 45 |   limit: Optional[float] = None
 46 |   offset: Optional[float] = None
 47 |   has_missing_conversations: Optional[bool] = None
 48 | 
 49 | class ChatGPTAPI(BaseModel):
 50 |   hostname: str
 51 |   token: str
 52 | 
 53 |   def get_conversations(self, request_parameters:
 54 |     GetConversationsRequestParameters, *, override_headers: dict={}
 55 |     ) ->GetConversationsResponse:
 56 |     conn = http.client.HTTPSConnection(self.hostname)
 57 |     params = '&'.join([(k + '=' + v) for k, v in request_parameters.
 58 |       items()])
 59 |     headers = {'User-Agent': 'Web2sdk/1.0', 'Authorization': 'Bearer ' +
 60 |       self.token}
 61 |     headers.update(override_headers)
 62 |     conn.request('GET', '/backend-api/conversations?' + params + '',
 63 |       headers=headers)
 64 |     res = conn.getresponse()
 65 |     data = res.read().decode('utf-8')
 66 |     return json.loads(data)
 67 | 
 68 |   def post_conversation(self, request_body: PostConversationRequestBody,
 69 |         *, override_headers: dict={}) ->Any
 70 | ### ...etc
 71 | ```
 72 | 
 73 | ## Usage
 74 | **1. Export HAR file**
 75 | * Open Chrome devtools and go to "Network".
 76 | * Go through a flow on a website that triggers the requests you want to capture and reverse engineer. The more varied the requests the better, as a single request might not capture all the possible request and response schemas for a particular endpoint.
 77 | * Click the button shown below to export the HAR file. Don't worry about filtering out requests, that happens later.
 78 | * Also compatible with [mitmweb](https://mitmproxy.org/) exports.
 79 | 
 80 |     ![CleanShot 2024-08-27 at 21 11 53](https://github.com/user-attachments/assets/3453f33b-686b-476e-80e3-bd7df8c63f50)
 81 | 
 82 | **2. Install web2sdk**
 83 | ```
 84 | $ pip install web2sdk
 85 | ```
 86 | 
 87 | **3. Generate an OpenAPI spec and SDK**
 88 | ```sh
 89 | $ web2sdk --requests-path <path/to/har/or/flow/file> --base-url <https://finic.ai/api/v1> --sdk-name FinicSDK --auth-type bearer
 90 | ```
 91 | * `base-url` filters out requests that don't start with the url provided. This should include everything up until the endpoints you want to reverse engineer.
 92 | * For example, `https://finic.ai/api/v1` will match only requests to the v1 endpiont, but `https://finic.ai/api` will match requests from v1, v2, and any other paths after `/api`.
 93 | * Generated files will be saved to `generated/<sdk_name>.yaml` and `generated/<sdk_name>.py` in the current directory by default.
 94 | 
 95 | **4. Run your python SDK.**
 96 | ```python
 97 | from generated.FinicSDK import FinicSDK
 98 | 
 99 | finic = FinicSDK(hostname="finic.ai", token="your_token_here")
100 | finic.get_connectors({})
101 | finic.post_message({ message: "hi there" }, override_headers={ "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7)" })
102 | ```
103 | * Each method in the generated SDK corresponds to an endpoint
104 | * You can pass in any headers you want. By default, only `Authorization` and `User-Agent` headers are included.
105 | * Some methods accept parameters and/or request bodies. Inspect the function to see what arguments it takes.
106 | 
107 | ### Other Options
108 | ```-- auth <basic|bearer>```
109 | * Optional, defaults to `none`. If set, the generated SDK class will expect a username and password for basic auth or a token for bearer auth.
110 | 
111 | ```--output```
112 | * Optional, defaults to `generated/` in the current directory. Specify a directory for the generated `.yaml` and `.py` files to be saved.
113 | 
114 | ```--interactive```
115 | * Run in interactive mode. Not well supported.
116 | 
117 | ## 🚧 Planned Improvements
118 | - Support for oauth and custom auth schemes. In the mean
119 | - Automatic auth token refresh
120 | - Support for templated API paths (e.g. `https://api.claude.ai/api/organizations/{organization_id}/chat_conversations`)
121 | - Use LLMs to generate more readable class names, example request payloads, and other tasks that require fuzzy reasoning
122 | - Include a linter/formatter to make generated SDK more readable
123 | 
124 | ### Acknowledgements
125 | Web2sdk's includes a modified version of [mitmproxy2swagger](https://github.com/alufers/mitmproxy2swagger).
126 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.poetry]
 2 | name = "web2sdk"
 3 | version = "0.0.2"
 4 | description = "Reverse engineer third party APIs into python SDKs"
 5 | authors = ["jasonwcfan <jason.wc.fan@gmail.com>"]
 6 | readme = "README.md"
 7 | packages = [
 8 |     { include = "web2sdk" }
 9 | ]
10 | 
11 | [tool.poetry.dependencies]
12 | python = "^3.10"
13 | mitmproxy = "^10.1.1"
14 | "ruamel.yaml" = ">=0.17.32,<0.19.0"
15 | json-stream = "^2.3.2"
16 | msgpack = "^1.0.7"
17 | astor = "^0.8.1"
18 | pydantic = "^2.8.2"
19 | pyyaml = "^6.0.2"
20 | requests = "^2.32.3"
21 | python-dotenv = "^1.0.1"
22 | 
23 | [tool.poetry.scripts]
24 | web2sdk = "web2sdk.main:main"
25 | 
26 | 
27 | [build-system]
28 | requires = ["poetry-core"]
29 | build-backend = "poetry.core.masonry.api"
30 | 


--------------------------------------------------------------------------------
/web2sdk/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jasonwcfan/web2sdk/10e0633ed25056304addef8e43cfd5668f121050/web2sdk/__init__.py


--------------------------------------------------------------------------------
/web2sdk/console_util.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import sys
 3 | 
 4 | ANSI_RGB = "\033[38;2;{};{};{}m"
 5 | ANSI_RGB_BG = "\033[48;2;{};{};{}m"
 6 | ANSI_RED = "\033[31m"
 7 | ANSI_RESET = "\033[0m"
 8 | 
 9 | RAINBOW_COLORS = [
10 |     (255, 0, 0),
11 |     (255, 127, 0),
12 |     (255, 255, 0),
13 |     (127, 255, 0),
14 |     (0, 255, 0),
15 |     (0, 255, 127),
16 |     (0, 255, 255),
17 |     (0, 127, 255),
18 |     (0, 0, 255),
19 |     (127, 0, 255),
20 |     (255, 0, 255),
21 |     (255, 0, 127),
22 | ]
23 | 
24 | 
25 | def rgb_interpolate(start, end, progress):
26 |     return tuple(int(start[i] + (end[i] - start[i]) * progress) for i in range(3))
27 | 
28 | 
29 | # take a value from 0 to 1 and return an interpolated color from the rainbow
30 | def rainbow_at_position(progress):
31 |     idx_a = int(progress * float(len(RAINBOW_COLORS) - 1))
32 |     idx_b = idx_a + 1
33 |     return rgb_interpolate(
34 |         RAINBOW_COLORS[idx_a],
35 |         RAINBOW_COLORS[idx_b],
36 |         progress * float(len(RAINBOW_COLORS) - 1) - idx_a,
37 |     )
38 | 
39 | 
40 | def print_progress_bar(progress=0.0, label=""):
41 |     sys.stdout.write("\r")
42 |     progress_bar_contents = ""
43 |     PROGRESS_LENGTH = 30
44 |     blocks = ["▉", "▊", "▋", "▌", "▍", "▎", "▏"]
45 | 
46 |     for i in range(PROGRESS_LENGTH):
47 |         interpolated = rainbow_at_position(i / PROGRESS_LENGTH)
48 |         # check if should print a full block
49 |         if i < int(progress * PROGRESS_LENGTH):
50 |             interpolated_2nd_half = rainbow_at_position((i + 0.5) / PROGRESS_LENGTH)
51 |             progress_bar_contents += ANSI_RGB.format(*interpolated)
52 |             progress_bar_contents += ANSI_RGB_BG.format(*interpolated_2nd_half)
53 |             progress_bar_contents += "▌"
54 |         # check if should print a non-full block
55 |         elif i < int((progress * PROGRESS_LENGTH) + 0.5):
56 |             progress_bar_contents += ANSI_RESET
57 |             progress_bar_contents += ANSI_RGB.format(*interpolated)
58 |             progress_bar_contents += blocks[
59 |                 int((progress * PROGRESS_LENGTH) + 0.5) - i - 1
60 |             ]
61 |         # otherwise, print a space
62 |         else:
63 |             progress_bar_contents += ANSI_RESET
64 |             progress_bar_contents += " "
65 | 
66 |     progress_bar_contents += ANSI_RESET
67 |     sys.stdout.write("{} [{}] {:.1f}%".format(label, progress_bar_contents, progress * 100))
68 |     sys.stdout.flush()


--------------------------------------------------------------------------------
/web2sdk/main.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os
  3 | import sys
  4 | from typing import Any, Optional, Sequence, Union
  5 | from web2sdk.web2swagger.main import main as web2swagger_main
  6 | from web2sdk.swagger2sdk.main import construct_sdk
  7 | from web2sdk import console_util
  8 | 
  9 | def progress_callback(progress):
 10 |     console_util.print_progress_bar(progress, "Generating SDK...           ")
 11 | 
 12 | def main():
 13 |   parser = argparse.ArgumentParser(
 14 |     description="Converts a mitmproxy dump file or HAR to a swagger schema."
 15 |   )
 16 |   parser.add_argument(
 17 |     "-i",
 18 |     "--interactive",
 19 |     help="Run in interactive mode",
 20 |     action="store_true",
 21 |     required=False,
 22 |   )
 23 | 
 24 |   parser.add_argument(
 25 |     "-r",
 26 |     "--requests-path",
 27 |     help="Path to a mitmproxy dump file or HAR",
 28 |     required=False,
 29 |   )
 30 | 
 31 |   parser.add_argument(
 32 |     "-b",
 33 |     "--base-url",
 34 |     help="Base url for the API to reverse engineer",
 35 |     required=False,
 36 |   )
 37 | 
 38 |   parser.add_argument(
 39 |     "-a",
 40 |     "--auth-type",
 41 |     help="Auth type to determine how the SDK should handle auth. Possible values: basic, bearer, none.",
 42 |     default="none",
 43 |     required=False,
 44 |   )
 45 | 
 46 |   parser.add_argument(
 47 |     "-s",
 48 |     "--sdk-name",
 49 |     help="Name for the SDK class. Will also be used as the filename for the OpenAPI schema.",
 50 |     required=False,
 51 |   )
 52 | 
 53 |   parser.add_argument(
 54 |     "-o",
 55 |     "--output",
 56 |     help="Path to the directory where generated files should be saved",
 57 |     default="generated",
 58 |     required=False,
 59 |   )
 60 | 
 61 |   args = parser.parse_args()
 62 |   output_path = args.output.rstrip("/")
 63 | 
 64 |   if not args.interactive:
 65 |     if not args.requests_path or not args.sdk_name or not args.base_url:
 66 |       parser.error("--requests-path, --sdk-name, and --base-url are required when not running in --interactive mode.")
 67 |     if args.auth_type and args.auth_type not in ["basic", "bearer", "none"]:
 68 |       parser.error("--auth-type must be one of 'basic', 'bearer', or 'none.")
 69 |     
 70 |     openapi_path = f"{output_path}/{args.sdk_name}.yaml"
 71 |     sdk_path = f"{output_path}/{args.sdk_name}.py"
 72 |     os.makedirs(output_path, exist_ok=True)
 73 |     
 74 |     print("\n")
 75 |     web2swagger_main(args.sdk_name, ["--input", args.requests_path, "--output", openapi_path, "--api-prefix", args.base_url])
 76 |     print("OpenAPI schema generated successfully at: ", openapi_path)
 77 |     print("\n")
 78 |     construct_sdk(openapi_path, args.sdk_name, output_path, auth_type=args.auth_type, progress_callback=progress_callback)
 79 |     print(" Done!")
 80 |     sys.stdout.write(f"SDK generated successfully at: {sdk_path}")
 81 |   else:
 82 |     while True:
 83 |       requests_path = input("Enter the path to the mitmproxy dump file or HAR: ")
 84 |       base_url = input("Enter the base URL for the API (e.g. https://api.finic.ai/v1): ")
 85 |       sdk_name = input("Enter a name for the generated SDK (e.g. FinicAPI): ")
 86 |       
 87 |       use_auth = input("Does this API require authentication? (y/n): ")
 88 |       while use_auth.lower() not in ["y", "n", "yes", "no"]:
 89 |         print("Invalid input. Please enter 'y' or 'n'.")
 90 |         use_auth = input("Does this API require authentication? (y/n): ")
 91 | 
 92 | 
 93 |       if use_auth.lower() in ["y", "yes", ""]:
 94 |         auth_type = input("What type of authentication does this API use? (basic/bearer): ")
 95 |         while auth_type not in ["basic", "bearer"]:
 96 |             print("Invalid auth type. Please enter 'basic' or 'bearer'.")
 97 |             auth_type = input("What type of authentication does this API use? (basic/bearer): ")
 98 |       elif use_auth.lower() in ["n", "no"]:
 99 |         auth_type = "none"
100 |       
101 |       openapi_path = f"generated/{sdk_name}.yaml"
102 |       os.makedirs("generated", exist_ok=True)
103 |       web2swagger_main(sdk_name, ["--input", requests_path, "--output", openapi_path, "--api-prefix", base_url])
104 |       print("OpenAPI schema generated successfully at: ", openapi_path)
105 |       print("\n")
106 |       construct_sdk(openapi_path, args.sdk_name, output_path, auth_type=args.auth_type, progress_callback=progress_callback)
107 |       print(" Done!")
108 |       sys.stdout.write(f"SDK generated successfully at: {sdk_path}")
109 | 
110 | if __name__ == "__main__":
111 |   main()
112 | 


--------------------------------------------------------------------------------
/web2sdk/swagger2sdk/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jasonwcfan/web2sdk/10e0633ed25056304addef8e43cfd5668f121050/web2sdk/swagger2sdk/__init__.py


--------------------------------------------------------------------------------
/web2sdk/swagger2sdk/generate_function.py:
--------------------------------------------------------------------------------
  1 | import ast
  2 | from web2sdk.swagger2sdk.utils import AuthType, check_content_type, dash_to_snake
  3 | from typing import Tuple, List
  4 | from urllib.parse import urlparse
  5 | 
  6 | def content_type_to_ast_node(content_type: str, return_type: str) -> ast.Call:
  7 |   if check_content_type(content_type, ['application/json', 'application/x-www-form-urlencoded']):
  8 |     # json.loads(data)
  9 |     result_node = ast.Call(
 10 |       func=ast.Attribute(
 11 |         value=ast.Name(id='json', ctx=ast.Load()),
 12 |         attr='loads',
 13 |         ctx=ast.Load()
 14 |       ),
 15 |       args=[ast.Name(id='data', ctx=ast.Load())],
 16 |       keywords=[]
 17 |     )
 18 |   else:
 19 |     result_node = ast.Name(id='data', ctx=ast.Load())
 20 |   return result_node
 21 | 
 22 | # Fallback in case a class could not be created for a particular endpoint. Return a primitive type instead.
 23 | def get_return_type(content_type: str) -> ast.Name:
 24 |   if check_content_type(content_type, ['application/json']):
 25 |     return 'dict'
 26 |   elif check_content_type(content_type, ['text/html', 'text/plain']):
 27 |     return 'str'
 28 |   else:
 29 |     return 'Any'
 30 | 
 31 | def generate_function_for_endpoint(endpoint: dict, base_url: str, auth_type: AuthType, types: Tuple[ast.ClassDef]) -> ast.FunctionDef:
 32 |   # Extract endpoint details
 33 |   request_path: str = endpoint['path']
 34 |   request_name: str = endpoint['name']
 35 |   request_parameters: dict = endpoint['parameters']
 36 |   request_method: str = endpoint['method']
 37 |   request_schema: dict = endpoint['request_body']
 38 |   request_content_type: str = next(iter(request_schema['content'].keys()), None) if request_schema else None
 39 |   response_content: dict = endpoint['responses'].get('200', {}).get('content', {})
 40 |   response_content_type: str = next(iter(response_content.keys()), "")
 41 |   
 42 |   request_parameters_class_name = types[0].name if types[0] else 'dict'
 43 |   request_body_class_name = types[1].name if types[1] else 'dict'
 44 |   response_class_name = types[2].name if types[2] else get_return_type(response_content_type)
 45 | 
 46 |   # Construct the function arguments
 47 |   args = ast.arguments(
 48 |     args=[arg for arg in [
 49 |       ast.arg(arg='self', annotation=None),
 50 |       ast.arg(arg='request_parameters', annotation=ast.Name(id=request_parameters_class_name, ctx=ast.Load())) if request_parameters else None,
 51 |       ast.arg(arg='request_body', annotation=ast.Name(id=request_body_class_name, ctx=ast.Load())) if request_schema else None
 52 |     ] if arg is not None],
 53 |     vararg=None,
 54 |     kwonlyargs=[ast.arg(arg='override_headers', annotation=ast.Name(id='dict', ctx=ast.Load()))],
 55 |     kw_defaults=[ast.Dict(keys=[], values=[])],
 56 |     kwarg=None,
 57 |     defaults=[]
 58 |   )
 59 | 
 60 |   # Return annotation
 61 |   return_annotation = ast.Name(id=response_class_name, ctx=ast.Load())
 62 | 
 63 |   # Set up http.client connection
 64 |   # conn = http.client.HTTPSConnection(self.hostname)
 65 |   http_conn_assign = ast.Assign(
 66 |     targets=[ast.Name(id='conn', ctx=ast.Store())],
 67 |     value=ast.Call(
 68 |       func=ast.Attribute(
 69 |         value=ast.Name(id='http.client', ctx=ast.Load()),
 70 |         attr='HTTPSConnection',
 71 |         ctx=ast.Load()
 72 |       ),
 73 |       args=[ast.Attribute(
 74 |         value=ast.Name(id='self', ctx=ast.Load()),
 75 |         attr='hostname',
 76 |         ctx=ast.Load()
 77 |       )],
 78 |       keywords=[]
 79 |     )
 80 |   )
 81 | 
 82 |   # Prepare the payload, depending on the request content type
 83 |   if request_schema:
 84 |     if check_content_type(request_content_type, ['application/json', 'application/x-www-form-urlencoded']):
 85 |       payload_assign = ast.Assign(
 86 |         targets=[ast.Name(id='payload', ctx=ast.Store())],
 87 |         value=ast.Call(
 88 |           func=ast.Attribute(
 89 |             value=ast.Name(id='json', ctx=ast.Load()),
 90 |             attr='dumps',
 91 |             ctx=ast.Load()
 92 |           ),
 93 |           args=[ast.Name(id='request_body', ctx=ast.Load())],
 94 |           keywords=[]
 95 |         )
 96 |       )
 97 |     else:
 98 |       payload_assign = ast.Assign(
 99 |         targets=[ast.Name(id='payload', ctx=ast.Store())],
100 |         value=ast.Name(id='request_body', ctx=ast.Load())
101 |       )
102 | 
103 |   # Prepare headers
104 |   header_keys = [ast.Constant(value='User-Agent')]
105 |   header_values = [ast.Constant(value='Web2sdk/1.0')]
106 |   if auth_type == AuthType.BASIC.value:
107 |     header_keys.append(ast.Constant(value='Authorization'))
108 |     header_values.append(
109 |       ast.BinOp(
110 |         left=ast.Constant(value='Basic '),
111 |         op=ast.Add(),
112 |         right=ast.Call(
113 |           func=ast.Name(id='base64.b64encode', ctx=ast.Load()),
114 |           args=[ast.BinOp(
115 |             left=ast.BinOp(
116 |               left=ast.Name(id='self.username', ctx=ast.Load()),
117 |               op=ast.Add(),
118 |               right=ast.Constant(value=':')
119 |             ),
120 |             op=ast.Add(),
121 |             right=ast.Name(id='self.password', ctx=ast.Load())
122 |           )],
123 |           keywords=[]
124 |         )
125 |       )
126 |     )
127 |   elif auth_type == AuthType.BEARER.value:
128 |     header_keys.append(ast.Constant(value='Authorization'))
129 |     header_values.append(ast.BinOp(
130 |       left=ast.Constant(value='Bearer '),
131 |       op=ast.Add(),
132 |       right=ast.Name(id='self.token', ctx=ast.Load())
133 |     ))
134 | 
135 |   headers_assign = ast.Assign(
136 |     targets=[ast.Name(id='headers', ctx=ast.Store())],
137 |     value=ast.Dict(
138 |       keys=header_keys,
139 |       values=header_values
140 |     )
141 |   )
142 | 
143 |   # Update headers with override headers
144 |   headers_update = ast.Expr(
145 |     value=ast.Call(
146 |       func=ast.Attribute(
147 |         value=ast.Name(id='headers', ctx=ast.Load()),
148 |         attr='update',
149 |         ctx=ast.Load()
150 |       ),
151 |       args=[ast.Name(id='override_headers', ctx=ast.Load())],
152 |       keywords=[]
153 |     )
154 |   )
155 | 
156 |   # Prepare the request params
157 |   if request_parameters:
158 |     parameter_assign = ast.Assign(
159 |       targets=[ast.Name(id='params', ctx=ast.Store())],
160 |       value=ast.Call(
161 |         func=ast.Attribute(
162 |           value=ast.Str(s="&"),
163 |           attr="join",
164 |           ctx=ast.Load()
165 |         ),
166 |         args=[ast.ListComp(
167 |           elt=ast.BinOp(
168 |             left=ast.BinOp(
169 |               left=ast.Name(id='k', ctx=ast.Load()),
170 |               op=ast.Add(),
171 |               right=ast.Constant(value="=")
172 |             ),
173 |             op=ast.Add(),
174 |             right=ast.Name(id='v', ctx=ast.Load())
175 |           ),
176 |           generators=[
177 |             ast.comprehension(
178 |               target=ast.Tuple(elts=[
179 |                 ast.Name(id='k', ctx=ast.Store()), 
180 |                 ast.Name(id='v', ctx=ast.Store())], ctx=ast.Store()),
181 |               iter=ast.Call(
182 |                 func=ast.Attribute(
183 |                   value=ast.Name(id='request_parameters', ctx=ast.Load()),
184 |                   attr='items',
185 |                   ctx=ast.Load()
186 |                 ),
187 |                 args=[], keywords=[]
188 |               ),
189 |               ifs=[], is_async=0
190 |             )
191 |           ]
192 |         )],
193 |         keywords=[]
194 |       )
195 |     )
196 |   else:
197 |     parameter_assign = ast.Assign(
198 |       targets=[ast.Name(id='params', ctx=ast.Store())],
199 |       value=ast.Constant(value="")
200 |     )
201 | 
202 |   # Call the connection request
203 |   # conn.request("GET", "/backend-api/conversations" + "?" + params, body=payload, headers=headers)
204 |   full_url = urlparse(base_url + request_path)
205 |   http_path = full_url.path + "?" if request_parameters else full_url.path
206 |   conn_request = ast.Expr(
207 |     value=ast.Call(
208 |       func=ast.Attribute(
209 |         value=ast.Name(id='conn', ctx=ast.Load()),
210 |         attr='request',
211 |         ctx=ast.Load()
212 |       ),
213 |       args=[
214 |         ast.Constant(value=request_method.upper()),
215 |         ast.BinOp(
216 |           left=ast.BinOp(
217 |             left=ast.Constant(value=http_path),
218 |             op=ast.Add(),
219 |             right=ast.Name(id='params', ctx=ast.Load())
220 |           ),
221 |           op=ast.Add(),
222 |           right=ast.Constant(value="")
223 |         )
224 |       ],
225 |       keywords=[kw for kw in [
226 |         ast.keyword(arg='body', value=ast.Name(id='payload', ctx=ast.Load())) if request_schema else None,
227 |         ast.keyword(arg='headers', value=ast.Name(id='headers', ctx=ast.Load()))
228 |       ] if kw is not None]
229 |     )
230 |   )
231 | 
232 |   # Get the response
233 |   # res = conn.getresponse()
234 |   response_assign = ast.Assign(
235 |     targets=[ast.Name(id='res', ctx=ast.Store())],
236 |     value=ast.Call(
237 |       func=ast.Attribute(
238 |         value=ast.Name(id='conn', ctx=ast.Load()),
239 |         attr='getresponse',
240 |         ctx=ast.Load()
241 |       ),
242 |       args=[],
243 |       keywords=[]
244 |     )
245 |   )
246 | 
247 |   # Read the response
248 |   # data = res.read().decode("utf-8")
249 |   data_assign = ast.Assign(
250 |     targets=[ast.Name(id='data', ctx=ast.Store())],
251 |     value=ast.Call(
252 |       func=ast.Attribute(
253 |         value=ast.Call(
254 |           func=ast.Attribute(
255 |             value=ast.Name(id='res', ctx=ast.Load()),
256 |             attr='read',
257 |             ctx=ast.Load()
258 |           ),
259 |           args=[],
260 |           keywords=[]
261 |         ),
262 |         attr='decode',
263 |         ctx=ast.Load()
264 |       ),
265 |       args=[ast.Str(s='utf-8')],
266 |       keywords=[]
267 |     )
268 |   )
269 | 
270 |   # Return the decoded data
271 |   return_stmt = ast.Return(
272 |     value=content_type_to_ast_node(response_content_type, response_class_name)
273 |   )
274 | 
275 |   # Construct function body with the URL and response assignments
276 |   function_body = [
277 |     http_conn_assign,
278 |     parameter_assign,
279 |     headers_assign,
280 |     headers_update,
281 |     payload_assign if request_schema else None,
282 |     conn_request,
283 |     response_assign,
284 |     data_assign,
285 |     return_stmt
286 |   ]
287 |   
288 |   # Remove any None values from the function body
289 |   function_body = [stmt for stmt in function_body if stmt is not None]
290 | 
291 |   # Create the function definition
292 |   function_def = ast.FunctionDef(
293 |     name=dash_to_snake(request_name),
294 |     args=args,
295 |     body=function_body,
296 |     decorator_list=[],
297 |     returns=return_annotation
298 |   )
299 | 
300 |   return function_def


--------------------------------------------------------------------------------
/web2sdk/swagger2sdk/generate_types.py:
--------------------------------------------------------------------------------
  1 | import ast
  2 | from pydantic import BaseModel, ConfigDict, Field, ValidationError
  3 | from abc import ABC, abstractmethod
  4 | from typing import List, Optional, Dict, Any, Tuple, Type, Union
  5 | from enum import Enum
  6 | from web2sdk.swagger2sdk.utils import YAMLToPydanticType, check_content_type, snake_to_pascal, dash_to_snake, strip_special_chars
  7 | 
  8 | class ClassField(BaseModel):
  9 |   field_name: str
 10 |   field_type: str
 11 |   required: bool
 12 | 
 13 | def path_to_class_name(path: str) -> str:
 14 |   """
 15 |   Converts a URL path to a class name.
 16 |   """
 17 |   return strip_special_chars(snake_to_pascal(dash_to_snake(path)))
 18 |    
 19 | 
 20 | def generate_class_def(class_name: str, fields: List[ClassField]) -> ast.ClassDef:
 21 |     """
 22 |     Generates a Pydantic class definition with the given class name and fields.
 23 |     """
 24 |     # Create the class definition
 25 |     class_def = ast.ClassDef(
 26 |         name=class_name,
 27 |         bases=[ast.Name(id='BaseModel', ctx=ast.Load())],  # Inherit from Pydantic's BaseModel
 28 |         body=[],
 29 |         decorator_list=[]
 30 |     )
 31 |     
 32 |     # Add each field as a class attribute
 33 |     for field in fields:
 34 |       field_name, field_type, required = strip_special_chars(dash_to_snake(field.field_name)), field.field_type, field.required
 35 |       
 36 |       # If the field is not required, wrap the type in Optional
 37 |       if not required:
 38 |         field_annotation = ast.Subscript(
 39 |           value=ast.Name(id='Optional', ctx=ast.Load()),
 40 |           slice=ast.Index(value=ast.Name(id=field_type, ctx=ast.Load())),
 41 |           ctx=ast.Load()
 42 |         )
 43 |       else:
 44 |         field_annotation = ast.Name(id=field_type, ctx=ast.Load())
 45 |       
 46 |       # Add the field to the class body. If the field is not required, assign a default value of None.
 47 |       field_node = ast.AnnAssign(
 48 |         target=ast.Name(id=field_name, ctx=ast.Store()),
 49 |         annotation=field_annotation,
 50 |         value=None if required else ast.Constant(value=None),
 51 |         simple=True
 52 |       )
 53 |       
 54 |       class_def.body.append(field_node)
 55 |     
 56 |     return class_def
 57 | 
 58 | def parse_request_body(request_body: dict) -> List[ClassField]:
 59 |   if not request_body:
 60 |     return []
 61 |   fields = []
 62 |   content: dict = request_body['content']
 63 |   required: bool = request_body.get('required', False)
 64 | 
 65 |   for content_type, schema in content.items():
 66 |     if check_content_type(content_type, ['application/json', 'application/x-www-form-urlencoded']):
 67 |       schema = schema['schema']
 68 |       schema_type = YAMLToPydanticType[schema.get('type')]
 69 |       if schema.get('type') == 'object':
 70 |         required_properties: List[str] = schema.get('required', [])
 71 |         properties: dict = schema.get('properties', {})
 72 |         for name, prop in properties.items():
 73 |           field_type: str = YAMLToPydanticType[prop['type']]
 74 |           field_required: bool = name in required_properties
 75 |           fields.append(ClassField(field_name=name, field_type=field_type, required=field_required))
 76 |       else:
 77 |         fields.append(ClassField(field_name='data', field_type=schema_type, required=required))
 78 |   return fields
 79 | 
 80 | def parse_response_body(response_body: dict) -> List[ClassField]:
 81 |   if not response_body:
 82 |     return []
 83 |   fields = []
 84 |   content: dict = response_body['content']
 85 |   for content_type, schema in content.items():
 86 |     if content_type == 'application/json':
 87 |       schema = schema['schema']
 88 |       schema_type = schema.get('type')
 89 |       if schema_type == 'object':
 90 |         required_properties: List[str] = schema.get('required', [])
 91 |         properties: dict = schema.get('properties', {})
 92 |         for name, prop in properties.items():
 93 |           field_type: str = YAMLToPydanticType[prop['type']]
 94 |           is_required: bool = name in required_properties
 95 |           fields.append(ClassField(field_name=name, field_type=field_type, required=is_required))
 96 |       elif schema_type == 'array':
 97 |         item_type = schema['items'].get('type', 'unknown')
 98 |         fields.append(ClassField(field_name='data', field_type=f'List[{YAMLToPydanticType[item_type]}]', required=True))
 99 |       else:
100 |         fields.append(ClassField(field_name='data', field_type=YAMLToPydanticType[schema_type], required=True))
101 |   return fields
102 | 
103 | def generate_types(endpoint: dict) -> Tuple[ast.ClassDef]:
104 |   # Extract endpoint details
105 |   request_path: str = endpoint['path']
106 |   request_name: str = endpoint['name']
107 |   request_method: str = endpoint['method']
108 |   request_parameters: dict = endpoint['parameters']
109 |   request_body: dict = endpoint['request_body']
110 |   responses: dict = endpoint['responses']
111 | 
112 | 
113 |   # Generate Pydantic class for request parameters
114 |   request_parameters_class = None
115 |   if request_parameters:
116 |     request_parameters_fields = []
117 |     for param in request_parameters:
118 |       field_type = YAMLToPydanticType[param['schema']['type']]
119 |       field_name = param['name']
120 |       required = param.get('required', False)
121 |       request_parameters_fields.append(ClassField(field_name=field_name, field_type=field_type, required=required))
122 |     if len(request_parameters_fields) > 0:
123 |       request_parameters_class = generate_class_def(f'{path_to_class_name(request_name)}RequestParameters', request_parameters_fields)
124 | 
125 |   # Generate Pydantic class for request body
126 |   request_body_class = None
127 |   if request_body:
128 |     request_body_fields = parse_request_body(request_body)
129 |     if len(request_body_fields) > 0:
130 |       request_body_class = generate_class_def(f'{path_to_class_name(request_name)}RequestBody', request_body_fields)
131 | 
132 |   # Generate Pydantic classes for responses
133 |   successful_response = responses.get('200')
134 |   response_class = None
135 |   if successful_response:
136 |     response_fields = parse_response_body(successful_response)
137 |     if len(response_fields) > 0:
138 |       response_class = generate_class_def(f'{path_to_class_name(request_name)}Response', response_fields)
139 | 
140 |   
141 |   return (request_parameters_class, request_body_class, response_class)


--------------------------------------------------------------------------------
/web2sdk/swagger2sdk/main.py:
--------------------------------------------------------------------------------
 1 | import pdb
 2 | import ast
 3 | import astor
 4 | import yaml
 5 | from pydantic import BaseModel, ConfigDict, Field, ValidationError
 6 | from abc import ABC, abstractmethod
 7 | from typing import List, Optional, Dict, Any, Tuple, Type, Union, Callable
 8 | from enum import Enum
 9 | from web2sdk.swagger2sdk.generate_function import generate_function_for_endpoint
10 | from web2sdk.swagger2sdk.generate_types import generate_types, generate_class_def, ClassField
11 | from web2sdk.swagger2sdk.utils import AuthType, HTTPMethod
12 | 
13 | swagger_path = '/Users/jasonfan/Documents/code/web2sdk/web2sdk/specs.yml'
14 | 
15 | def load_yaml(file_path):
16 |   with open(file_path, 'r') as file:
17 |     return yaml.safe_load(file)
18 | 
19 | 
20 | def generate_sdk_class(sdk_name: str, auth_type: AuthType) -> ast.ClassDef:
21 |   # SDK should accept different arguments depending on the auth type
22 |   auth_arguments = []
23 |   fields = [
24 |     ClassField(field_name='hostname', field_type='str', required=True)
25 |   ]
26 |   if auth_type == AuthType.BASIC.value:
27 |     fields.extend([ClassField(field_name='username', field_type='str', required=True), ClassField(field_name='password', field_type='str', required=True)])
28 |   elif auth_type == AuthType.BEARER.value:
29 |     fields.extend([ClassField(field_name='token', field_type='str', required=True)])
30 | 
31 |   class_def = generate_class_def(sdk_name, fields)
32 | 
33 |   return class_def
34 | 
35 | def save_class_to_file(module: ast.Module, file_path: str) -> None:
36 |   code = astor.to_source(module)
37 |   with open(file_path, 'w') as file:
38 |     file.write(code)
39 | 
40 | def generate_imports() -> List[ast.Import]:
41 |   imports = [
42 |     ast.Import(names=[ast.alias(name='json', asname=None)]),
43 |     ast.Import(names=[ast.alias(name='http.client', asname=None)]),
44 |     ast.ImportFrom(module='urllib.parse', names=[ast.alias(name='urlparse', asname=None)], level=0),
45 |     ast.ImportFrom(module='pydantic', names=[ast.alias(name='BaseModel', asname=None)], level=0),
46 |     ast.ImportFrom(module='typing', names=[
47 |       ast.alias(name='Optional', asname=None), 
48 |       ast.alias(name='Dict', asname=None), 
49 |       ast.alias(name='List', asname=None),
50 |       ast.alias(name='Any', asname=None)], level=0),
51 |   ]
52 |   return imports
53 | 
54 | def construct_sdk(swagger_path: str, 
55 |                   sdk_name: str, 
56 |                   output_path: str, 
57 |                   base_url: str = None, 
58 |                   auth_type: AuthType = AuthType.NONE,
59 |                   progress_callback: Callable[[float], None] = None) -> None:
60 |   swagger = load_yaml(swagger_path)
61 |   base_url = swagger.get('servers', [{}])[0].get('url') if not base_url else base_url
62 |   if not base_url:
63 |     raise ValueError('Base URL is required, but was not provided in the OpenAPI spec or as an argument.')
64 | 
65 |   paths = swagger.get('paths', {})
66 |   imports = generate_imports()
67 |   class_def = generate_sdk_class(sdk_name, auth_type)
68 |   types: List[ast.ClassDef] = []
69 | 
70 |   # Iterate through each path and method. Generate functions to call each endpoint, and types to validate request/response bodies
71 |   for index, (path, methods) in enumerate(paths.items()):
72 |     for method, details in methods.items():
73 |       endpoint = {
74 |         'path': path,
75 |         'method': method,
76 |         'name': f"{method.lower()}{path.replace('/', '_').replace('{', '').replace('}', '')}",
77 |         'parameters': details.get('parameters', None),
78 |         'request_body': details.get('requestBody', None),
79 |         'responses': details.get('responses', None)
80 |       }
81 |       _types = generate_types(endpoint)
82 |       _function = generate_function_for_endpoint(endpoint, base_url, auth_type, _types)
83 |       class_def.body.append(_function)
84 |       types.extend([t for t in _types if t is not None])
85 |     if progress_callback:
86 |       progress_callback(float(index+1) / len(paths))
87 | 
88 |   # Combine the imports, the SDK class, and generated types into a single module
89 |   body = imports + types + [class_def]
90 |   class_module = ast.Module(body=body, type_ignores=[])
91 |   save_class_to_file(class_module, f'{output_path}/{sdk_name}.py')


--------------------------------------------------------------------------------
/web2sdk/swagger2sdk/utils.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | from pydantic import BaseModel, ConfigDict, Field, ValidationError
 3 | from abc import ABC, abstractmethod
 4 | from typing import List, Optional, Dict, Any, Tuple, Type, Union
 5 | from enum import Enum
 6 | 
 7 | class AuthType(Enum):
 8 |   BASIC = 'basic'
 9 |   BEARER = 'bearer'
10 |   NONE = 'none'
11 | 
12 | class HTTPMethod(Enum):
13 |   GET = 'GET'
14 |   POST = 'POST'
15 |   PUT = 'PUT'
16 |   PATCH = 'PATCH'
17 |   DELETE = 'DELETE'
18 | 
19 | YAMLToPydanticType = {
20 |   'string': 'str',
21 |   'number': 'float',
22 |   'integer': 'int',
23 |   'boolean': 'bool',
24 |   'array': 'List',
25 |   'object': 'Dict',
26 |   'unknown': 'Any'
27 | }
28 | 
29 | def check_content_type(input_string: str, patterns: List[str]):
30 |     regex_pattern = "|".join(re.escape(pattern) for pattern in patterns)
31 |     if re.search(regex_pattern, input_string):
32 |         return True
33 |     return False
34 | 
35 | def snake_to_pascal(snake_str: str) -> str:
36 |     components = snake_str.split('_')
37 |     return ''.join(x.capitalize() for x in components)
38 | 
39 | def dash_to_snake(dash_str: str) -> str:
40 |     return dash_str.replace('-', '_')
41 | 
42 | def strip_special_chars(input_string: str) -> str:
43 |     return re.sub(r'[^\w_]+', '', input_string)


--------------------------------------------------------------------------------
/web2sdk/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jasonwcfan/web2sdk/10e0633ed25056304addef8e43cfd5668f121050/web2sdk/tests/__init__.py


--------------------------------------------------------------------------------
/web2sdk/web2swagger/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jasonwcfan/web2sdk/10e0633ed25056304addef8e43cfd5668f121050/web2sdk/web2swagger/.DS_Store


--------------------------------------------------------------------------------
/web2sdk/web2swagger/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jasonwcfan/web2sdk/10e0633ed25056304addef8e43cfd5668f121050/web2sdk/web2swagger/__init__.py


--------------------------------------------------------------------------------
/web2sdk/web2swagger/har_capture_reader.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import os
  3 | from base64 import b64decode
  4 | from typing import Iterator, Union
  5 | 
  6 | import json_stream
  7 | 
  8 | 
  9 | # a heuristic to determine if a file is a har archive
 10 | def har_archive_heuristic(file_path: str) -> int:
 11 |     val = 0
 12 |     # if has the har extension
 13 |     if file_path.endswith(".har"):
 14 |         val += 25
 15 |     # read the first 2048 bytes
 16 |     with open(file_path, "rb") as f:
 17 |         data = f.read(2048)
 18 |         # if file contains only ascii characters after remove EOL characters
 19 |         if (
 20 |             data.decode("utf-8", "ignore")
 21 |             .replace("\r", "")
 22 |             .replace("\n", "")
 23 |             .isprintable()
 24 |             is True
 25 |         ):
 26 |             val += 25
 27 |         # sign of a JSON file
 28 |         if data[0:1] == b"{":
 29 |             val += 23
 30 |         # sign of Chrome OR Firefox export
 31 |         if b'"WebInspector"' in data or b'"Firefox"' in data:
 32 |             val += 15
 33 |         if b'"entries"' in data:
 34 |             val += 15
 35 |         if b'"version"' in data:
 36 |             val += 15
 37 |     return val
 38 | 
 39 | 
 40 | class HarFlowWrapper:
 41 |     def __init__(self, flow: dict):
 42 |         self.flow = flow
 43 | 
 44 |     def get_url(self):
 45 |         return self.flow["request"]["url"]
 46 | 
 47 |     def get_matching_url(self, prefix) -> Union[str, None]:
 48 |         """Get the requests URL if the prefix matches the URL, None otherwise."""
 49 |         if self.flow["request"]["url"].startswith(prefix):
 50 |             return self.flow["request"]["url"]
 51 |         return None
 52 | 
 53 |     def get_method(self):
 54 |         return self.flow["request"]["method"]
 55 | 
 56 |     def get_request_headers(self):
 57 |         headers = {}
 58 |         for kv in self.flow["request"]["headers"]:
 59 |             k = kv["name"]
 60 |             v = kv["value"]
 61 |             # create list on key if it does not exist
 62 |             headers[k] = headers.get(k, [])
 63 |             headers[k].append(v)
 64 | 
 65 |     def get_request_body(self):
 66 |         if (
 67 |             "request" in self.flow
 68 |             and "postData" in self.flow["request"]
 69 |             and "text" in self.flow["request"]["postData"]
 70 |         ):
 71 |             return self.flow["request"]["postData"]["text"]
 72 |         return None
 73 | 
 74 |     def get_response_status_code(self):
 75 |         return self.flow["response"]["status"]
 76 | 
 77 |     def get_response_reason(self):
 78 |         return self.flow["response"]["statusText"]
 79 | 
 80 |     def get_response_headers(self):
 81 |         headers = {}
 82 |         for kv in self.flow["response"]["headers"]:
 83 |             k = kv["name"]
 84 |             v = kv["value"]
 85 |             # create list on key if it does not exist
 86 |             headers[k] = headers.get(k, [])
 87 |             headers[k].append(v)
 88 |         return headers
 89 | 
 90 |     def get_response_body(self):
 91 |         if (
 92 |             "response" in self.flow
 93 |             and "content" in self.flow["response"]
 94 |             and "text" in self.flow["response"]["content"]
 95 |         ):
 96 |             try:
 97 |                 if (
 98 |                     "encoding" in self.flow["response"]["content"]
 99 |                     and self.flow["response"]["content"]["encoding"] == "base64"
100 |                 ):
101 |                     return b64decode(self.flow["response"]["content"]["text"]).decode()
102 |             except UnicodeDecodeError:
103 |                 return None
104 |             return self.flow["response"]["content"]["text"]
105 |         return None
106 | 
107 | 
108 | class HarCaptureReader:
109 |     def __init__(self, file_path: str, progress_callback=None):
110 |         self.file_path = file_path
111 |         self.progress_callback = progress_callback
112 | 
113 |     def captured_requests(self) -> Iterator[HarFlowWrapper]:
114 |         har_file_size = os.path.getsize(self.file_path)
115 |         with open(self.file_path, "r", encoding="utf-8") as f:
116 |             data = json_stream.load(f)
117 |             for entry in data["log"]["entries"].persistent():
118 |                 if self.progress_callback:
119 |                     self.progress_callback(f.tell() / har_file_size)
120 |                 yield HarFlowWrapper(entry)
121 | 
122 |     def name(self):
123 |         return "har"
124 | 


--------------------------------------------------------------------------------
/web2sdk/web2swagger/main.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | """Converts a mitmproxy dump file to a swagger schema."""
  4 | import argparse
  5 | import json
  6 | import os
  7 | import re
  8 | import sys
  9 | import traceback
 10 | import urllib
 11 | from typing import Any, Optional, Sequence, Union
 12 | 
 13 | import msgpack
 14 | import ruamel.yaml
 15 | from mitmproxy.exceptions import FlowReadException
 16 | 
 17 | from web2sdk import console_util
 18 | from web2sdk.web2swagger import swagger_util
 19 | from web2sdk.web2swagger.har_capture_reader import HarCaptureReader, har_archive_heuristic
 20 | from web2sdk.web2swagger.mitmproxy_capture_reader import (
 21 |     MitmproxyCaptureReader,
 22 |     mitmproxy_dump_file_huristic,
 23 | )
 24 | 
 25 | 
 26 | def path_to_regex(path):
 27 |     # replace the path template with a regex
 28 |     path = re.escape(path)
 29 |     path = path.replace(r"\{", "(?P<")
 30 |     path = path.replace(r"\}", ">[^/]+)")
 31 |     path = path.replace(r"\*", ".*")
 32 |     return "^" + path + "$"
 33 | 
 34 | 
 35 | def strip_query_string(path):
 36 |     # remove the query string from the path
 37 |     return path.split("?")[0]
 38 | 
 39 | 
 40 | def set_key_if_not_exists(dict, key, value):
 41 |     if key not in dict:
 42 |         dict[key] = value
 43 | 
 44 | 
 45 | def progress_callback(progress):
 46 |     console_util.print_progress_bar(progress, "Generating OpenAPI Schema...")
 47 | 
 48 | 
 49 | def detect_input_format(file_path):
 50 |     har_score = har_archive_heuristic(file_path)
 51 |     mitmproxy_score = mitmproxy_dump_file_huristic(file_path)
 52 |     if "MITMPROXY2SWAGGER_DEBUG" in os.environ:
 53 |         print("har score: " + str(har_score))
 54 |         print("mitmproxy score: " + str(mitmproxy_score))
 55 |     if har_score > mitmproxy_score:
 56 |         return HarCaptureReader(file_path, progress_callback)
 57 |     return MitmproxyCaptureReader(file_path, progress_callback)
 58 | 
 59 | def main(sdk_name: str, override_args: Optional[Sequence[str]] = None):
 60 |     parser = argparse.ArgumentParser(
 61 |         description="Converts a mitmproxy dump file or HAR to a swagger schema."
 62 |     )
 63 |     parser.add_argument(
 64 |         "-i",
 65 |         "--input",
 66 |         help="The input mitmproxy dump file or HAR dump file (from DevTools)",
 67 |         required=True,
 68 |     )
 69 |     parser.add_argument(
 70 |         "-o",
 71 |         "--output",
 72 |         help="The output swagger schema file (yaml). If it exists, new endpoints will be added",
 73 |         required=True,
 74 |     )
 75 |     parser.add_argument("-p", "--api-prefix", help="The api prefix", required=True)
 76 |     parser.add_argument(
 77 |         "-e",
 78 |         "--examples",
 79 |         action="store_true",
 80 |         help="Include examples in the schema. This might expose sensitive information.",
 81 |     )
 82 |     parser.add_argument(
 83 |         "-hd",
 84 |         "--headers",
 85 |         action="store_true",
 86 |         help="Include headers in the schema. This might expose sensitive information.",
 87 |     )
 88 |     parser.add_argument(
 89 |         "-f",
 90 |         "--format",
 91 |         choices=["flow", "har"],
 92 |         help="Override the input file format auto-detection.",
 93 |     )
 94 |     parser.add_argument(
 95 |         "-r",
 96 |         "--param-regex",
 97 |         default="[0-9]+",
 98 |         help="Regex to match parameters in the API paths. Path segments that match this regex will be turned into parameter placeholders.",
 99 |     )
100 |     parser.add_argument(
101 |         "-s",
102 |         "--suppress-params",
103 |         action="store_true",
104 |         help="Do not include API paths that have the original parameter values, only the ones with placeholders.",
105 |     )
106 |     args = parser.parse_args(override_args)
107 |     try:
108 |         args.param_regex = re.compile("^" + args.param_regex + "$")
109 |     except re.error as e:
110 |         print(
111 |             f"{console_util.ANSI_RED}Invalid path parameter regex: {e}{console_util.ANSI_RESET}"
112 |         )
113 |         sys.exit(1)
114 | 
115 |     yaml = ruamel.yaml.YAML()
116 | 
117 |     capture_reader: Union[MitmproxyCaptureReader, HarCaptureReader]
118 |     if args.format == "flow" or args.format == "mitmproxy":
119 |         capture_reader = MitmproxyCaptureReader(args.input, progress_callback)
120 |     elif args.format == "har":
121 |         capture_reader = HarCaptureReader(args.input, progress_callback)
122 |     else:
123 |         capture_reader = detect_input_format(args.input)
124 | 
125 |     swagger = None
126 | 
127 |     # try loading the existing swagger file
128 |     try:
129 |         base_dir = os.getcwd()
130 |         relative_path = args.output
131 |         abs_path = os.path.join(base_dir, relative_path)
132 |         with open(abs_path, "r") as f:
133 |             swagger = yaml.load(f)
134 |     except FileNotFoundError:
135 |         print("No existing OpenAPI file found. Creating new one.")
136 |     if swagger is None:
137 |         swagger = ruamel.yaml.comments.CommentedMap(
138 |             {
139 |                 "openapi": "3.0.0",
140 |                 "info": {
141 |                     "title ": args.input + sdk_name,
142 |                     "version": "1.0.0",
143 |                 },
144 |             }
145 |         )
146 |     # strip the trailing slash from the api prefix
147 |     args.api_prefix = args.api_prefix.rstrip("/")
148 | 
149 |     if "servers" not in swagger or swagger["servers"] is None:
150 |         swagger["servers"] = []
151 | 
152 |     # add the server if it doesn't exist
153 |     if not any(server["url"] == args.api_prefix for server in swagger["servers"]):
154 |         swagger["servers"].append(
155 |             {"url": args.api_prefix, "description": "The default server"}
156 |         )
157 | 
158 |     if "paths" not in swagger or swagger["paths"] is None:
159 |         swagger["paths"] = {}
160 | 
161 |     # Add the component/securitySchemes section if it doesn't exist
162 |     if "components" not in swagger or swagger["components"] is None:
163 |         swagger["components"] = {}
164 | 
165 |     # add existing path templates
166 |     path_templates = []
167 |     for path in swagger["paths"]:
168 |         path_templates.append(path)
169 | 
170 |     path_template_regexes = [re.compile(path_to_regex(path)) for path in path_templates]
171 | 
172 |     try:
173 |         for req in capture_reader.captured_requests():
174 |             # strip the api prefix from the url
175 |             url = req.get_matching_url(args.api_prefix)
176 | 
177 |             if url is None:
178 |                 continue
179 |             method = req.get_method().lower()
180 |             path = strip_query_string(url).removeprefix(args.api_prefix)
181 |             status = req.get_response_status_code()
182 | 
183 |             # check if the path matches any of the path templates, and save the index
184 |             path_template_index = None
185 |             for i, path_template_regex in enumerate(path_template_regexes):
186 |                 if path_template_regex.match(path):
187 |                     path_template_index = i
188 |                     break
189 |             if path_template_index is None:
190 |                 path_template_to_set = path
191 |             else:
192 |                 path_template_to_set = path_templates[path_template_index]
193 |             
194 |             set_key_if_not_exists(swagger["paths"], path_template_to_set, {})
195 | 
196 |             set_key_if_not_exists(
197 |                 swagger["paths"][path_template_to_set],
198 |                 method,
199 |                 {
200 |                     "summary": swagger_util.path_template_to_endpoint_name(
201 |                         method, path_template_to_set
202 |                     ),
203 |                     "responses": {},
204 |                 },
205 |             )
206 | 
207 |             params = swagger_util.url_to_params(url, path_template_to_set)
208 |             if args.headers:
209 |                 headers_request = swagger_util.request_to_headers(
210 |                     req.get_request_headers()
211 |                 )
212 |                 if headers_request is not None and len(headers_request) > 0:
213 |                     set_key_if_not_exists(
214 |                         swagger["paths"][path_template_to_set][method],
215 |                         "parameters",
216 |                         headers_request,
217 |                     )
218 |             if params is not None and len(params) > 0:
219 |                 set_key_if_not_exists(
220 |                     swagger["paths"][path_template_to_set][method], "parameters", params
221 |                 )
222 | 
223 |             if method not in ["get", "head"]:
224 |                 body = req.get_request_body()
225 |                 if body is not None:
226 |                     body_val = None
227 |                     content_type = None
228 |                     # try to parse the body as json
229 |                     try:
230 |                         body_val = json.loads(req.get_request_body())
231 |                         content_type = "application/json"
232 |                     except UnicodeDecodeError:
233 |                         pass
234 |                     except json.decoder.JSONDecodeError:
235 |                         pass
236 | 
237 |                     # try to parse the body as msgpack, if it's not json
238 |                     if body_val is None:
239 |                         try:
240 |                             body_val = msgpack.loads(req.get_request_body())
241 |                             content_type = "application/msgpack"
242 |                         except Exception:
243 |                             pass
244 | 
245 |                     if content_type is None:
246 |                         # try to parse the body as form data
247 |                         try:
248 |                             body_val_bytes: Any = dict(
249 |                                 urllib.parse.parse_qsl(
250 |                                     body, encoding="utf-8", keep_blank_values=True
251 |                                 )
252 |                             )
253 |                             body_val = {}
254 |                             did_find_anything = False
255 |                             for key, value in body_val_bytes.items():
256 |                                 did_find_anything = True
257 |                                 body_val[key.decode("utf-8")] = value.decode("utf-8")
258 |                             if did_find_anything:
259 |                                 content_type = "application/x-www-form-urlencoded"
260 |                             else:
261 |                                 body_val = None
262 |                         except UnicodeDecodeError:
263 |                             pass
264 | 
265 |                     if body_val is not None:
266 |                         content_to_set = {
267 |                             "content": {
268 |                                 content_type: {
269 |                                     "schema": swagger_util.value_to_schema(body_val)
270 |                                 }
271 |                             }
272 |                         }
273 |                         if args.examples:
274 |                             content_to_set["content"][content_type][
275 |                                 "example"
276 |                             ] = swagger_util.limit_example_size(body_val)
277 |                         set_key_if_not_exists(
278 |                             swagger["paths"][path_template_to_set][method],
279 |                             "requestBody",
280 |                             content_to_set,
281 |                         )
282 | 
283 |             response_body = req.get_response_body()
284 |             if response_body is not None:
285 |                 # try parsing the response as json
286 |                 try:
287 |                     response_parsed = json.loads(response_body)
288 |                     response_content_type = "application/json"
289 |                 except UnicodeDecodeError:
290 |                     response_parsed = None
291 |                 except json.decoder.JSONDecodeError:
292 |                     response_parsed = None
293 | 
294 |                 if response_parsed is None:
295 |                     # try parsing the response as msgpack, if it's not json
296 |                     try:
297 |                         response_parsed = msgpack.loads(response_body)
298 |                         response_content_type = "application/msgpack"
299 |                     except Exception:
300 |                         response_parsed = None
301 |                 
302 |                 if response_parsed is None:
303 |                     # try parsing the response as text
304 |                     if type(response_body) is str:
305 |                         response_parsed = response_body
306 |                     else:
307 |                         response_parsed = response_body.decode("utf-8", "ignore")
308 |                     response_content_type = req.get_response_headers().get("content-type")
309 |                     if type(response_content_type) is list:
310 |                         response_content_type = response_content_type[0]
311 |                     elif response_content_type is None:
312 |                         response_content_type = "text/plain"
313 | 
314 |                 if response_parsed is not None:
315 |                     resp_data_to_set = {
316 |                         "description": req.get_response_reason(),
317 |                         "content": {
318 |                             response_content_type: {
319 |                                 "schema": swagger_util.value_to_schema(response_parsed)
320 |                             }
321 |                         },
322 |                     }
323 |                     if args.examples:
324 |                         resp_data_to_set["content"][response_content_type][
325 |                             "example"
326 |                         ] = swagger_util.limit_example_size(response_parsed)
327 |                     if args.headers:
328 |                         resp_data_to_set["headers"] = swagger_util.response_to_headers(
329 |                             req.get_response_headers()
330 |                         )
331 | 
332 |                     set_key_if_not_exists(
333 |                         swagger["paths"][path_template_to_set][method]["responses"],
334 |                         str(status),
335 |                         resp_data_to_set,
336 |                     )
337 | 
338 |             if (
339 |                 "responses" in swagger["paths"][path_template_to_set][method]
340 |                 and len(swagger["paths"][path_template_to_set][method]["responses"])
341 |                 == 0
342 |             ):
343 |                 # add a default response if there were no responses detected,
344 |                 # this is for compliance with the OpenAPI spec
345 |                 content_type = (
346 |                     req.get_response_headers().get("content-type") or "text/plain"
347 |                 )
348 | 
349 |                 swagger["paths"][path_template_to_set][method]["responses"]["200"] = {
350 |                     "description": "OK",
351 |                     "content": {},
352 |                 }
353 | 
354 |     except FlowReadException as e:
355 |         print(f"Flow file corrupted: {e}")
356 |         traceback.print_exception(*sys.exc_info())
357 |         print(
358 |             f"{console_util.ANSI_RED}Failed to parse the input file as '{capture_reader.name()}'. "
359 |         )
360 |         if not args.format:
361 |             print(
362 |                 f"It might happen that the input format as incorrectly detected. Please try using '--format flow' or '--format har' to specify the input format.{console_util.ANSI_RESET}"
363 |             )
364 |         sys.exit(1)
365 |     except ValueError as e:
366 |         print(f"ValueError: {e}")
367 |         # print stack trace
368 |         traceback.print_exception(*sys.exc_info())
369 |         print(
370 |             f"{console_util.ANSI_RED}Failed to parse the input file as '{capture_reader.name()}'. "
371 |         )
372 |         if not args.format:
373 |             print(
374 |                 f"It might happen that the input format as incorrectly detected. Please try using '--format flow' or '--format har' to specify the input format.{console_util.ANSI_RESET}"
375 |             )
376 |         sys.exit(1)
377 | 
378 |     # save the swagger file
379 |     with open(args.output, "w") as f:
380 |         yaml.dump(swagger, f)
381 |     print(" Done!")
382 | 
383 | 
384 | if __name__ == "__main__":
385 |     main()
386 | 


--------------------------------------------------------------------------------
/web2sdk/web2swagger/mitmproxy_capture_reader.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import os
  3 | import typing
  4 | from typing import Iterator
  5 | from urllib.parse import urlparse
  6 | 
  7 | from mitmproxy import http
  8 | from mitmproxy import io as iom
  9 | from mitmproxy.exceptions import FlowReadException
 10 | 
 11 | 
 12 | def mitmproxy_dump_file_huristic(file_path: str) -> int:
 13 |     val = 0
 14 |     if "flow" in file_path:
 15 |         val += 1
 16 |     if "mitmproxy" in file_path:
 17 |         val += 1
 18 |     # read the first 2048 bytes
 19 |     with open(file_path, "rb") as f:
 20 |         data = f.read(2048)
 21 |         # if file contains non-ascii characters after remove EOL characters
 22 |         if (
 23 |             data.decode("utf-8", "ignore")
 24 |             .replace("\r", "")
 25 |             .replace("\n", "")
 26 |             .isprintable()
 27 |             is False
 28 |         ):
 29 |             val += 50
 30 |         # if first character of the byte array is a digit
 31 |         if data[0:1].decode("utf-8", "ignore").isdigit() is True:
 32 |             val += 5
 33 |         # if it contains the word status_code
 34 |         if b"status_code" in data:
 35 |             val += 5
 36 |         if b"regular" in data:
 37 |             val += 10
 38 |     return val
 39 | 
 40 | 
 41 | class MitmproxyFlowWrapper:
 42 |     def __init__(self, flow: http.HTTPFlow):
 43 |         self.flow = flow
 44 | 
 45 |     def get_url(self) -> str:
 46 |         return self.flow.request.url
 47 | 
 48 |     def get_matching_url(self, prefix) -> typing.Union[str, None]:
 49 |         """Get the requests URL if the prefix matches the URL, None otherwise.
 50 | 
 51 |         This takes into account a quirk of mitmproxy where it sometimes
 52 |         puts the raw IP address in the URL instead of the hostname. Then
 53 |         the hostname is in the Host header.
 54 |         """
 55 |         if self.flow.request.url.startswith(prefix):
 56 |             return self.flow.request.url
 57 |         # All the stuff where the real hostname could be
 58 |         replacement_hostnames = [
 59 |             self.flow.request.headers.get("Host", ""),
 60 |             self.flow.request.host_header,
 61 |             self.flow.request.host,
 62 |         ]
 63 |         for replacement_hostname in replacement_hostnames:
 64 |             if replacement_hostname is not None and replacement_hostname != "":
 65 |                 fixed_url = (
 66 |                     urlparse(self.flow.request.url)
 67 |                     ._replace(netloc=replacement_hostname)
 68 |                     .geturl()
 69 |                 )
 70 |                 if fixed_url.startswith(prefix):
 71 |                     return fixed_url
 72 |         return None
 73 | 
 74 |     def get_method(self) -> str:
 75 |         return self.flow.request.method
 76 | 
 77 |     def get_request_headers(self) -> dict[str, typing.List[str]]:
 78 |         headers: dict[str, typing.List[str]] = {}
 79 |         for k, v in self.flow.request.headers.items(multi=True):
 80 |             # create list on key if it does not exist
 81 |             headers[k] = headers.get(k, [])
 82 |             headers[k].append(v)
 83 |         return headers
 84 | 
 85 |     def get_request_body(self):
 86 |         return self.flow.request.content
 87 | 
 88 |     def get_response_status_code(self):
 89 |         return self.flow.response.status_code
 90 | 
 91 |     def get_response_reason(self):
 92 |         return self.flow.response.reason
 93 | 
 94 |     def get_response_headers(self):
 95 |         headers = {}
 96 |         for k, v in self.flow.response.headers.items(multi=True):
 97 |             # create list on key if it does not exist
 98 |             headers[k] = headers.get(k, [])
 99 |             headers[k].append(v)
100 |         return headers
101 | 
102 |     def get_response_body(self):
103 |         return self.flow.response.content
104 | 
105 | 
106 | class MitmproxyCaptureReader:
107 |     def __init__(self, file_path, progress_callback=None):
108 |         self.file_path = file_path
109 |         self.progress_callback = progress_callback
110 | 
111 |     def captured_requests(self) -> Iterator[MitmproxyFlowWrapper]:
112 |         with open(self.file_path, "rb") as logfile:
113 |             logfile_size = os.path.getsize(self.file_path)
114 |             freader = iom.FlowReader(logfile)
115 |             try:
116 |                 for f in freader.stream():
117 |                     if self.progress_callback:
118 |                         self.progress_callback(logfile.tell() / logfile_size)
119 |                     if isinstance(f, http.HTTPFlow):
120 |                         if f.response is None:
121 |                             print(
122 |                                 "[warn] flow without response: {}".format(f.request.url)
123 |                             )
124 |                             continue
125 |                         yield MitmproxyFlowWrapper(f)
126 |             except FlowReadException as e:
127 |                 print(f"Flow file corrupted: {e}")
128 | 
129 |     def name(self):
130 |         return "flow"
131 | 


--------------------------------------------------------------------------------
/web2sdk/web2swagger/swagger_util.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import urllib
  3 | import uuid
  4 | from typing import Any, List
  5 | 
  6 | VERBS = [
  7 |     "add",
  8 |     "create",
  9 |     "delete",
 10 |     "get",
 11 |     "attach",
 12 |     "detach",
 13 |     "update",
 14 |     "push",
 15 |     "extendedcreate",
 16 |     "activate",
 17 | ]
 18 | 
 19 | 
 20 | # generate a name for the endpoint from the path template
 21 | # POST /api/v1/things/{id}/create -> POST create thing by id
 22 | def path_template_to_endpoint_name(method, path_template):
 23 |     path_template = path_template.strip("/")
 24 |     segments = path_template.split("/")
 25 |     # remove params to a separate array
 26 |     params = []
 27 |     for idx, segment in enumerate(segments):
 28 |         if segment.startswith("{") and segment.endswith("}"):
 29 |             params.append(segment)
 30 |             segments[idx] = "{}"
 31 |     # remove them from the segments
 32 |     segments = [segment for segment in segments if segment != "{}"]
 33 |     # reverse the segments
 34 |     segments.reverse()
 35 |     name_parts = []
 36 |     for segment in segments:
 37 |         if segment in VERBS:
 38 |             # prepend to the name_parts
 39 |             name_parts.insert(0, segment.lower())
 40 |         else:
 41 |             name_parts.insert(0, segment.lower())
 42 |             break
 43 |     for param in params:
 44 |         name_parts.append("by " + param.replace("{", "").replace("}", ""))
 45 |         break
 46 |     return method.upper() + " " + " ".join(name_parts)
 47 | 
 48 | 
 49 | # when given an url and its path template, generates the parameters section of the request
 50 | def url_to_params(url, path_template):
 51 |     path_template = path_template.strip("/")
 52 |     segments = path_template.split("/")
 53 |     url_segments = url.split("?")[0].strip("/").split("/")
 54 |     params = []
 55 |     for idx, segment in enumerate(segments):
 56 |         if segment.startswith("{") and segment.endswith("}"):
 57 |             params.append(
 58 |                 {
 59 |                     "name": segment.replace("{", "").replace("}", ""),
 60 |                     "in": "path",
 61 |                     "required": True,
 62 |                     "schema": {
 63 |                         "type": "number" if url_segments[idx].isdigit() else "string"
 64 |                     },
 65 |                 }
 66 |             )
 67 |     query_string = urllib.parse.urlparse(url).query
 68 |     if query_string:
 69 |         query_params = urllib.parse.parse_qs(query_string)
 70 |         for key in query_params:
 71 |             params.append(
 72 |                 {
 73 |                     "name": key,
 74 |                     "in": "query",
 75 |                     "required": False,
 76 |                     "schema": {
 77 |                         "type": "number" if query_params[key][0].isdigit() else "string"
 78 |                     },
 79 |                 }
 80 |             )
 81 |     return params
 82 | 
 83 | 
 84 | def request_to_headers(headers: dict[str, List[Any]], add_example: bool = False):
 85 |     """When given an url and its path template, generates the parameters section of the
 86 |     request."""
 87 |     params = []
 88 |     if headers:
 89 |         for key in headers:
 90 |             h = {
 91 |                 "name": key,
 92 |                 "in": "header",
 93 |                 "required": False,
 94 |                 "schema": {"type": "number" if headers[key][0].isdigit() else "string"},
 95 |             }
 96 |             if add_example:
 97 |                 h["example"] = headers[key][0]
 98 |             params.append(h)
 99 |     return params
100 | 
101 | 
102 | def response_to_headers(headers):
103 |     header = {}
104 |     if headers:
105 |         for key in headers:
106 |             header[key] = {
107 |                 "description": headers[key][0],
108 |                 "schema": {"type": "number" if headers[key][0].isdigit() else "string"},
109 |             }
110 |     return header
111 | 
112 | 
113 | def value_to_schema(value):
114 |     # check if value is a number
115 |     if type(value) is int or type(value) is float:
116 |         return {"type": "number"}
117 |     # check if value is a boolean
118 |     elif isinstance(value, bool):
119 |         return {"type": "boolean"}
120 |     # check if value is a string
121 |     elif isinstance(value, str):
122 |         return {"type": "string"}
123 |     # check if value is a list
124 |     elif isinstance(value, list):
125 |         if len(value) == 0:
126 |             return {"type": "array", "items": {}}
127 | 
128 |         return {"type": "array", "items": value_to_schema(value[0])}
129 |     # check if value is a dict
130 |     elif isinstance(value, dict):
131 |         all_keys_are_numeric = all(is_numeric_string(key) for key in value)
132 |         all_keys_are_uuid = all(is_uuid(key) for key in value)
133 |         keys_are_generic = all_keys_are_numeric or all_keys_are_uuid
134 | 
135 |         if keys_are_generic and len(value) > 0:
136 |             return {
137 |                 "type": "object",
138 |                 "additionalProperties": value_to_schema(list(value.values())[0]),
139 |             }
140 |         return {
141 |             "type": "object",
142 |             "properties": {key: value_to_schema(value[key]) for key in value},
143 |         }
144 |     # if it is none, return null
145 |     elif value is None:
146 |         return {"type": "object", "nullable": True}
147 | 
148 | 
149 | def is_uuid(key):
150 |     return isinstance(key, str) and is_valid_uuid(key)
151 | 
152 | 
153 | def is_numeric_string(key):
154 |     return isinstance(key, str) and key.isnumeric()
155 | 
156 | 
157 | def is_valid_uuid(val):
158 |     try:
159 |         uuid.UUID(str(val))
160 |         return True
161 |     except ValueError:
162 |         return False
163 | 
164 | 
165 | MAX_EXAMPLE_ARRAY_ELEMENTS = 10
166 | MAX_EXAMPLE_OBJECT_PROPERTIES = 150
167 | 
168 | 
169 | # recursively scan an example value and limit the number of elements and properties
170 | def limit_example_size(example):
171 |     if isinstance(example, list):
172 |         new_list = []
173 |         for element in example:
174 |             if len(new_list) >= MAX_EXAMPLE_ARRAY_ELEMENTS:
175 |                 break
176 |             new_list.append(limit_example_size(element))
177 |         return new_list
178 |     elif isinstance(example, dict):
179 |         new_dict = {}
180 |         for key in example:
181 |             if len(new_dict) >= MAX_EXAMPLE_OBJECT_PROPERTIES:
182 |                 break
183 |             new_dict[key] = limit_example_size(example[key])
184 |         return new_dict
185 |     else:
186 |         return example
187 | 


--------------------------------------------------------------------------------