├── .github
    ├── dependabot.yml
    └── workflows
    │   ├── build.yml
    │   ├── lint.yml
    │   └── unit-tests.yml
├── .gitignore
├── .readthedocs.yaml
├── LICENSE
├── MANIFEST.in
├── Makefile
├── README.rst
├── ada_url
    ├── __init__.py
    ├── ada.cpp
    ├── ada.h
    ├── ada_adapter.py
    ├── ada_build.py
    ├── ada_c.h
    └── py.typed
├── benchmark.py
├── docs
    ├── Makefile
    ├── conf.py
    ├── index.rst
    ├── make.bat
    └── requirements.txt
├── pyproject.toml
├── requirements
    ├── base.txt
    └── development.txt
├── setup.py
├── tests
    ├── __init__.py
    ├── files
    │   └── urltestdata.json
    └── test_ada_url.py
├── update-wpt.sh
├── update_ada.py
└── update_sdist.py


/.github/dependabot.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | updates:
 3 |   - package-ecosystem: "pip"
 4 |     directory: "/requirements"
 5 |     schedule:
 6 |       interval: "monthly"
 7 |   - package-ecosystem: "pip"
 8 |     directory: "/docs"
 9 |     schedule:
10 |       interval: "monthly"
11 | 


--------------------------------------------------------------------------------
/.github/workflows/build.yml:
--------------------------------------------------------------------------------
 1 | name: Build
 2 | 
 3 | on:
 4 |   pull_request:
 5 |     types: [opened, synchronize, reopened, ready_for_review]
 6 |     paths-ignore:
 7 |       - '**.rst'
 8 |       - 'docs/**'
 9 |   push:
10 |     branches:
11 |       - main
12 |     paths-ignore:
13 |       - '**.rst'
14 |       - 'docs/**'
15 |   release:
16 |     types:
17 |       - published
18 | 
19 | env:
20 |   PIP_BREAK_SYSTEM_PACKAGES: 1
21 | 
22 | concurrency:
23 |   group: ${{ github.workflow }}-${{ github.ref }}
24 |   cancel-in-progress: true
25 |   
26 | jobs:
27 |   build_wheels:
28 |     strategy:
29 |       fail-fast: false
30 |       matrix:
31 |         os: ["ubuntu-latest", "macos-latest", "windows-latest"]
32 | 
33 |     runs-on: ${{ matrix.os }}
34 |     steps:
35 |     - uses: actions/checkout@v4
36 |     - run: make requirements
37 |     - name: Set up QEMU  # Needed to build aarch64 wheels
38 |       if: runner.os == 'Linux'
39 |       uses: docker/setup-qemu-action@v3
40 |       with:
41 |         platforms: all
42 |     - uses: pypa/cibuildwheel@v2.22.0
43 |     - uses: actions/upload-artifact@v4
44 |       with:
45 |         name: artifact-wheel-${{ matrix.os }}
46 |         path: wheelhouse/*.whl
47 | 
48 |   make_sdist:
49 |     runs-on: "ubuntu-latest"
50 |     steps:
51 |     - uses: actions/checkout@v4
52 |     - run: |
53 |         make requirements
54 |         python -m build --no-isolation --sdist
55 |     - uses: actions/upload-artifact@v4
56 |       with:
57 |         name: artifact-sdist
58 |         path: dist/*.tar.gz
59 | 
60 |   merge:
61 |     runs-on: ubuntu-latest
62 |     needs: [build_wheels, make_sdist]
63 |     steps:
64 |       - name: Merge Artifacts
65 |         uses: actions/upload-artifact/merge@v4
66 |         with:
67 |           name: artifact
68 |           pattern: artifact-*
69 |           delete-merged: true
70 | 
71 |   upload_all:
72 |     needs: merge
73 |     runs-on: "ubuntu-latest"
74 |     environment: release
75 |     if: github.event_name == 'release' && github.event.action == 'published'
76 |     permissions:
77 |       id-token: write
78 |     steps:
79 |     - uses: actions/download-artifact@v4
80 |       with:
81 |         name: artifact
82 |         path: dist
83 |     - uses: pypa/gh-action-pypi-publish@release/v1
84 | 


--------------------------------------------------------------------------------
/.github/workflows/lint.yml:
--------------------------------------------------------------------------------
 1 | name: Lint
 2 | 
 3 | on:
 4 |   pull_request:
 5 |     types: [opened, synchronize, reopened, ready_for_review]
 6 |     paths-ignore:
 7 |       - '**.rst'
 8 |       - 'docs/**'
 9 |   push:
10 |     branches:
11 |       - main
12 |     paths-ignore:
13 |       - '**.rst'
14 |       - 'docs/**'
15 | 
16 | concurrency:
17 |   group: ${{ github.workflow }}-${{ github.ref }}
18 |   cancel-in-progress: true
19 | 
20 | jobs:
21 |   lint:
22 |     runs-on: ubuntu-latest
23 |     steps:
24 |     - uses: actions/checkout@v4
25 |     - name: Set up Python
26 |       uses: actions/setup-python@v5
27 |       with:
28 |         python-version: "3.9"
29 |     - name: Install dependencies
30 |       run: |
31 |         make requirements
32 |     - name: Static analysis
33 |       run: |
34 |         make check
35 | 


--------------------------------------------------------------------------------
/.github/workflows/unit-tests.yml:
--------------------------------------------------------------------------------
 1 | name: Unit tests
 2 | 
 3 | on:
 4 |   pull_request:
 5 |     types: [opened, synchronize, reopened, ready_for_review]
 6 |     paths-ignore:
 7 |       - '**.rst'
 8 |       - 'docs/**'
 9 |   push:
10 |     branches:
11 |       - main
12 |     paths-ignore:
13 |       - '**.rst'
14 |       - 'docs/**'
15 | 
16 | concurrency:
17 |   group: ${{ github.workflow }}-${{ github.ref }}
18 |   cancel-in-progress: true
19 | 
20 | 
21 | env:
22 |   MACOSX_DEPLOYMENT_TARGET: "10.15"
23 |   
24 | jobs:
25 |   build_test:
26 |     strategy:
27 |       fail-fast: false
28 |       matrix:
29 |         os: ["ubuntu-latest", "macos-latest"]
30 | 
31 |     runs-on: ${{ matrix.os }}
32 | 
33 |     steps:
34 |     - uses: actions/checkout@v4
35 |     - name: Set up Python 3.9
36 |       uses: actions/setup-python@v5
37 |       with:
38 |         python-version: "3.9"
39 |     - name: Install dependencies
40 |       run: |
41 |         make requirements
42 |     - name: Build packages
43 |       run: |
44 |         make package
45 |     - name: Run tests
46 |       run: |
47 |         pip install -e .
48 |         make coverage
49 |     - name: Check docs
50 |       run: |
51 |         make docs
52 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | *.o
  9 | 
 10 | # Distribution / packaging
 11 | .Python
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | wheels/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | cover/
 54 | 
 55 | # Translations
 56 | *.mo
 57 | *.pot
 58 | 
 59 | # Django stuff:
 60 | *.log
 61 | local_settings.py
 62 | db.sqlite3
 63 | db.sqlite3-journal
 64 | 
 65 | # Flask stuff:
 66 | instance/
 67 | .webassets-cache
 68 | 
 69 | # Scrapy stuff:
 70 | .scrapy
 71 | 
 72 | # Sphinx documentation
 73 | docs/_build/
 74 | 
 75 | # PyBuilder
 76 | .pybuilder/
 77 | target/
 78 | 
 79 | # Jupyter Notebook
 80 | .ipynb_checkpoints
 81 | 
 82 | # IPython
 83 | profile_default/
 84 | ipython_config.py
 85 | 
 86 | # pyenv
 87 | #   For a library or package, you might want to ignore these files since the code is
 88 | #   intended to run in multiple environments; otherwise, check them in:
 89 | # .python-version
 90 | 
 91 | # pipenv
 92 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 93 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 94 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 95 | #   install all needed dependencies.
 96 | #Pipfile.lock
 97 | 
 98 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 99 | __pypackages__/
100 | 
101 | # Celery stuff
102 | celerybeat-schedule
103 | celerybeat.pid
104 | 
105 | # SageMath parsed files
106 | *.sage.py
107 | 
108 | # Environments
109 | .env
110 | .venv
111 | env/
112 | venv/
113 | ENV/
114 | env.bak/
115 | venv.bak/
116 | 
117 | # Spyder project settings
118 | .spyderproject
119 | .spyproject
120 | 
121 | # Rope project settings
122 | .ropeproject
123 | 
124 | # mkdocs documentation
125 | /site
126 | 
127 | # mypy
128 | .mypy_cache/
129 | .dmypy.json
130 | dmypy.json
131 | 
132 | # Pyre type checker
133 | .pyre/
134 | 
135 | # pytype static type analyzer
136 | .pytype/
137 | 
138 | # Cython debug symbols
139 | cython_debug/
140 | 
141 | # MacOS stuff
142 | .DS_Store
143 | 


--------------------------------------------------------------------------------
/.readthedocs.yaml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | 
 3 | build:
 4 |   os: ubuntu-22.04
 5 |   tools:
 6 |     python: "3.11"
 7 |   jobs:
 8 |     pre_install:
 9 |       - make requirements
10 |       - make package
11 |       - python -m pip install -e .
12 | sphinx:
13 |    configuration: docs/conf.py
14 | 
15 | python:
16 |    install:
17 |    - requirements: docs/requirements.txt
18 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 Ada
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include ada_url/*.c
2 | include ada_url/*.cpp
3 | include ada_url/*.h
4 | exclude ada_url/*.o
5 | exclude ada_url/_ada_wrapper.*
6 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | .PHONY: requirements
 2 | requirements:
 3 | 	python -m pip install -U -r requirements/development.txt ${req_args}
 4 | 
 5 | .PHONY: check
 6 | check:
 7 | 	ruff check .
 8 | 
 9 | .PHONY: format
10 | format:
11 | 	ruff format .
12 | 
13 | .PHONY: coverage
14 | coverage:
15 | 	coverage run -m unittest
16 | 	coverage report --show-missing --fail-under 99
17 | 
18 | .PHONY: test
19 | test:
20 | 	python -m unittest -v ${tests}
21 | 
22 | .PHONY: docs
23 | docs:
24 | 	sphinx-build -W -b html docs docs/_build/html
25 | 
26 | .PHONY: clean
27 | clean:
28 | 	rm -rf _build/
29 | 	rm -rf _dist/
30 | 	rm -rf ada_url.egg-info/
31 | 	$(RM) ada_url/_ada_wrapper.abi3.so
32 | 	$(RM) ada_url/ada.o
33 | 
34 | .PHONY: package
35 | package:
36 | 	python -m build --no-isolation
37 | 	twine check dist/*
38 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
  1 | ada-url
  2 | ========
  3 | 
  4 | The `urlib.parse` module in Python does not follow the legacy RFC 3978 standard nor
  5 | does it follow the newer WHATWG URL specification. It is also relatively slow.
  6 | 
  7 | This is ``ada_url``, a fast standard-compliant Python library for working with URLs based on the ``Ada`` URL
  8 | parser.
  9 | 
 10 | * `Documentation <https://ada-url.readthedocs.io>`__
 11 | * `Development <https://github.com/ada-url/ada-python/>`__
 12 | * `Ada <https://www.ada-url.com/>`__ 
 13 | 
 14 | Installation
 15 | ------------
 16 | 
 17 | Install from `PyPI <https://pypi.org/project/ada-url/>`__:
 18 | 
 19 | .. code-block:: sh
 20 | 
 21 |     pip install ada_url
 22 | 
 23 | Usage examples
 24 | --------------
 25 | 
 26 | Parsing URLs
 27 | ^^^^^^^^^^^^
 28 | 
 29 | The ``URL`` class is intended to match the one described in the
 30 | `WHATWG URL spec <https://url.spec.whatwg.org/#url-class>`_:.
 31 | 
 32 | .. code-block:: python
 33 | 
 34 |     >>> from ada_url import URL
 35 |     >>> urlobj = URL('https://example.org/path/../file.txt')
 36 |     >>> urlobj.href
 37 |     'https://example.org/path/file.txt'
 38 | 
 39 | The ``parse_url`` function returns a dictionary of all URL elements:
 40 | 
 41 | .. code-block:: python
 42 | 
 43 |     >>> from ada_url import parse_url
 44 |     >>> parse_url('https://user:pass@example.org:80/api?q=1#2')
 45 |     {
 46 |         'href': 'https://user:pass@example.org:80/api?q=1#2',
 47 |         'username': 'user',
 48 |         'password': 'pass',
 49 |         'protocol': 'https:',
 50 |         'port': '80',
 51 |         'hostname': 'example.org',
 52 |         'host': 'example.org:80',
 53 |         'pathname': '/api',
 54 |         'search': '?q=1',
 55 |         'hash': '#2',
 56 |         'origin': 'https://example.org:80',
 57 |         'host_type': <HostType.DEFAULT: 0>,
 58 |         'scheme_type': <SchemeType.HTTPS: 2>
 59 |     }
 60 | 
 61 | Altering URLs
 62 | ^^^^^^^^^^^^^
 63 | 
 64 | Replacing URL components with the ``URL`` class:
 65 | 
 66 | .. code-block:: python
 67 | 
 68 |     >>> from ada_url import URL
 69 |     >>> urlobj = URL('https://example.org/path/../file.txt')
 70 |     >>> urlobj.host = 'example.com'
 71 |     >>> urlobj.href
 72 |     'https://example.com/file.txt'
 73 | 
 74 | Replacing URL components with the ``replace_url`` function:
 75 | 
 76 |     >>> from ada_url import replace_url
 77 |     >>> replace_url('https://example.org/path/../file.txt', host='example.com')
 78 |     'https://example.com/file.txt'
 79 | 
 80 | Search parameters
 81 | ^^^^^^^^^^^^^^^^^
 82 | 
 83 | The ``URLSearchParams`` class is intended to match the one described in the
 84 | `WHATWG URL spec <https://url.spec.whatwg.org/#interface-urlsearchparams>`__.
 85 | 
 86 | .. code-block:: python
 87 | 
 88 |     >>> from ada_url import URLSearchParams
 89 |     >>> obj = URLSearchParams('key1=value1&key2=value2')
 90 |     >>> list(obj.items())
 91 |     [('key1', 'value1'), ('key2', 'value2')]
 92 | 
 93 | The ``parse_search_params`` function returns a dictionary of search keys mapped to
 94 | value lists:
 95 | 
 96 | .. code-block:: python
 97 | 
 98 |     >>> from ada_url import parse_search_params
 99 |     >>> parse_search_params('key1=value1&key2=value2')
100 |     {'key1': ['value1'], 'key2': ['value2']}
101 | 
102 | Internationalized domain names
103 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
104 | 
105 | The ``idna`` class can encode and decode IDNs:
106 | 
107 | .. code-block:: python
108 | 
109 |     >>> from ada_url import idna
110 |     >>> idna.encode('Bücher.example')
111 |     b'xn--bcher-kva.example'
112 |     >>> idna.decode(b'xn--bcher-kva.example')
113 |     'bücher.example'
114 | 
115 | WHATWG URL compliance
116 | ---------------------
117 | 
118 | This library is compliant with the WHATWG URL spec. This means, among other things,
119 | that it properly encodes IDNs and resolves paths:
120 | 
121 | .. code-block:: python
122 | 
123 |     >>> from ada_url import URL
124 |     >>> parsed_url = URL('https://www.GOoglé.com/./path/../path2/')
125 |     >>> parsed_url.hostname
126 |     'www.xn--googl-fsa.com'
127 |     >>> parsed_url.pathname
128 |     '/path2/'
129 | 
130 | Contrast that with the Python standard library's ``urlib.parse`` module:
131 | 
132 | .. code-block:: python
133 | 
134 |     >>> from urllib.parse import urlparse
135 |     >>> parsed_url = urlparse('https://www.GOoglé.com/./path/../path2/')
136 |     >>> parsed_url.hostname
137 |     'www.googlé.com'
138 |     >>> parsed_url.path
139 |     '/./path/../path2/'
140 | 
141 | Alternative Python bindings
142 | ---------------------------
143 | 
144 | This package uses `CFFI <https://github.com/ada-url/ada-python/>`__ to call
145 | the ``Ada`` library's functions, which has a performance cost.
146 | The alternative `can_ada <https://github.com/tktech/can_ada>`__ (Canadian Ada)
147 | package uses `pybind11 <https://pybind11.readthedocs.io/en/stable/>`__ to generate a
148 | Python extension module, which is more performant.
149 | 


--------------------------------------------------------------------------------
/ada_url/__init__.py:
--------------------------------------------------------------------------------
 1 | from ada_url.ada_adapter import (
 2 |     URL,
 3 |     HostType,
 4 |     SchemeType,
 5 |     URLSearchParams,
 6 |     check_url,
 7 |     idna,
 8 |     idna_to_ascii,
 9 |     idna_to_unicode,
10 |     join_url,
11 |     normalize_url,
12 |     parse_search_params,
13 |     parse_url,
14 |     replace_search_params,
15 |     replace_url,
16 | )
17 | 
18 | __all__ = [
19 |     'HostType',
20 |     'SchemeType',
21 |     'URL',
22 |     'URLSearchParams',
23 |     'check_url',
24 |     'idna',
25 |     'idna_to_ascii',
26 |     'idna_to_unicode',
27 |     'join_url',
28 |     'normalize_url',
29 |     'parse_search_params',
30 |     'parse_url',
31 |     'replace_search_params',
32 |     'replace_url',
33 | ]
34 | 


--------------------------------------------------------------------------------
/ada_url/ada_adapter.py:
--------------------------------------------------------------------------------
  1 | from enum import IntEnum
  2 | from typing import (
  3 |     Dict,
  4 |     Final,
  5 |     Iterable,
  6 |     Iterator,
  7 |     List,
  8 |     Optional,
  9 |     Tuple,
 10 |     TypedDict,
 11 |     Union,
 12 | )
 13 | 
 14 | from ada_url._ada_wrapper import ffi, lib
 15 | 
 16 | URL_ATTRIBUTES = (
 17 |     'href',
 18 |     'username',
 19 |     'password',
 20 |     'protocol',
 21 |     'port',
 22 |     'hostname',
 23 |     'host',
 24 |     'pathname',
 25 |     'search',
 26 |     'hash',
 27 | )
 28 | PARSE_ATTRIBUTES = URL_ATTRIBUTES + ('origin', 'host_type', 'scheme_type')
 29 | 
 30 | # These are the attributes that have corresponding ada_get_* functions
 31 | GET_ATTRIBUTES = frozenset(PARSE_ATTRIBUTES)
 32 | 
 33 | # These are the attributes that have corresponding ada_set_* functons
 34 | SET_ATTRIBUTES = frozenset(URL_ATTRIBUTES)
 35 | 
 36 | # These are the attributes that can be cleared with one of the ada_clear_* functions
 37 | CLEAR_ATTRIBUTES = frozenset(('port', 'hash', 'search'))
 38 | 
 39 | # These are the attributes that must be cleared by setting the empty string
 40 | UNSET_ATTRIBUTES = frozenset(('username', 'password', 'pathname'))
 41 | 
 42 | _marker = object()
 43 | 
 44 | 
 45 | class HostType(IntEnum):
 46 |     """
 47 |     Enum for URL host types:
 48 | 
 49 |     * ``DEFAULT`` hosts like ``https://example.org`` are ``0``.
 50 |     * ``IPV4`` hosts like ``https://192.0.2.1`` are ``1``.
 51 |     * ``IPV6`` hosts like ``https://[2001:db8::]`` are ``2``.
 52 | 
 53 |     .. code-block:: python
 54 | 
 55 |         >>> from ada_url import HostType
 56 |         >>> HostType.DEFAULT
 57 |         <HostType.DEFAULT: 0>
 58 | 
 59 |     """
 60 | 
 61 |     DEFAULT = 0
 62 |     IPV4 = 1
 63 |     IPV6 = 2
 64 | 
 65 | 
 66 | class SchemeType(IntEnum):
 67 |     """
 68 |     Enum for `URL scheme types <https://url.spec.whatwg.org/#url-miscellaneous>`__.
 69 | 
 70 |     * ``HTTP`` URLs like ``http://example.org`` are ``0``.
 71 |     * ``NOT_SPECIAL`` URLs like ``git://example.og`` are ``1``.
 72 |     * ``HTTPS`` URLs like ``https://example.org`` are ``2``.
 73 |     * ``WS`` URLs like ``ws://example.org`` are ``3``.
 74 |     * ``FTP`` URLs like ``ftp://example.org`` are ``4``.
 75 |     * ``WSS`` URLs like ``wss://example.org`` are ``5``.
 76 |     * ``FILE`` URLs like ``file://example`` are ``6``.
 77 | 
 78 |     .. code-block:: python
 79 | 
 80 |         >>> from ada_url import SchemeType
 81 |         >>> SchemeType.HTTPS
 82 |         <SchemeType.HTTPS: 2>
 83 | 
 84 |     """
 85 | 
 86 |     HTTP = 0
 87 |     NOT_SPECIAL = 1
 88 |     HTTPS = 2
 89 |     WS = 3
 90 |     FTP = 4
 91 |     WSS = 5
 92 |     FILE = 6
 93 | 
 94 | 
 95 | class ParseAttributes(TypedDict, total=False):
 96 |     href: str
 97 |     username: str
 98 |     password: str
 99 |     protocol: str
100 |     port: str
101 |     hostname: str
102 |     host: str
103 |     pathname: str
104 |     search: str
105 |     hash: str
106 |     origin: str
107 |     host_type: HostType
108 |     scheme_type: SchemeType
109 | 
110 | 
111 | def _get_obj(constructor, destructor, *args):
112 |     obj = constructor(*args)
113 | 
114 |     return ffi.gc(obj, destructor)
115 | 
116 | 
117 | def _get_str(x):
118 |     ret = ffi.string(x.data, x.length).decode() if x.length else ''
119 |     return ret
120 | 
121 | 
122 | class URL:
123 |     """
124 |     Parses a *url* (with an optional *base*) according to the
125 |     WHATWG URL parsing standard.
126 | 
127 |     .. code-block:: python
128 | 
129 |         >>> from ada_url import URL
130 |         >>> old_url = 'https://example.org:443/file.txt?q=1'
131 |         >>> urlobj = URL(old_url)
132 |         >>> urlobj.host
133 |         'example.org'
134 |         >>> urlobj.host = 'example.com'
135 |         >>> new_url = urlobj.href
136 |         >>> new_url
137 |         'https://example.com:443/file.txt?q=1'
138 | 
139 |     You can read and write the following attributes:
140 | 
141 |     * ``href``
142 |     * ``protocol``
143 |     * ``username``
144 |     * ``password``
145 |     * ``host``
146 |     * ``hostname``
147 |     * ``port``
148 |     * ``pathname``
149 |     * ``search``
150 |     * ``hash``
151 | 
152 |     You can additionally read these attributes:
153 | 
154 |     * ``origin``, which will be a ``str``
155 |     * ``host_type``, which will be a :class:`HostType` enum
156 |     * ``scheme_type``, which will be a :class:`SchemeType` enum
157 | 
158 |     The class also exposes a static method that checks whether the input
159 |     *url* (and optional *base*) can be parsed:
160 | 
161 |     .. code-block:: python
162 | 
163 |         >>> url = 'file_2.txt'
164 |         >>> base = 'https://example.org:443/file_1.txt'
165 |         >>> URL.can_parse(url, base)
166 |         True
167 | 
168 |     See the `WHATWG docs <https://url.spec.whatwg.org/#url-class>`__ for
169 |     more details on the URL class.
170 | 
171 |     """
172 | 
173 |     href: str
174 |     username: str
175 |     password: str
176 |     protocol: str
177 |     port: str
178 |     hostname: str
179 |     host: str
180 |     pathname: str
181 |     search: str
182 |     hash: str
183 |     origin: Final[str]
184 |     host_type: Final[HostType]
185 |     scheme_type: Final[SchemeType]
186 | 
187 |     def __init__(self, url: str, base: Optional[str] = None):
188 |         url_bytes = url.encode()
189 | 
190 |         if base is None:
191 |             self.urlobj = _get_obj(
192 |                 lib.ada_parse, lib.ada_free, url_bytes, len(url_bytes)
193 |             )
194 |         else:
195 |             base_bytes = base.encode()
196 |             self.urlobj = _get_obj(
197 |                 lib.ada_parse_with_base,
198 |                 lib.ada_free,
199 |                 url_bytes,
200 |                 len(url_bytes),
201 |                 base_bytes,
202 |                 len(base_bytes),
203 |             )
204 | 
205 |         if not lib.ada_is_valid(self.urlobj):
206 |             raise ValueError('Invalid input')
207 | 
208 |     def __copy__(self):
209 |         cls = self.__class__
210 |         ret = cls.__new__(cls)
211 |         ret.__dict__.update(self.__dict__)
212 |         super(URL, ret).__init__()
213 |         return ret
214 | 
215 |     def __deepcopy__(self, memo):
216 |         cls = self.__class__
217 |         ret = cls.__new__(cls)
218 |         super(URL, ret).__init__()
219 |         ret.urlobj = lib.ada_copy(self.urlobj)
220 | 
221 |         return ret
222 | 
223 |     def __delattr__(self, attr: str):
224 |         if attr in CLEAR_ATTRIBUTES:
225 |             clear_func = getattr(lib, f'ada_clear_{attr}')
226 |             clear_func(self.urlobj)
227 |         elif attr in UNSET_ATTRIBUTES:
228 |             set_func = getattr(lib, f'ada_set_{attr}')
229 |             set_func(self.urlobj, b'', 0)
230 |         else:
231 |             raise AttributeError(f'cannot remove {attr}')
232 | 
233 |     def __dir__(self) -> List[str]:
234 |         return super().__dir__() + list(PARSE_ATTRIBUTES)
235 | 
236 |     def __getattr__(self, attr: str) -> Union[str, HostType, SchemeType]:
237 |         if attr in GET_ATTRIBUTES:
238 |             get_func = getattr(lib, f'ada_get_{attr}')
239 |             data = get_func(self.urlobj)
240 |             if attr == 'origin':
241 |                 ret = _get_str(data)
242 |                 lib.ada_free_owned_string(data)
243 |             elif attr == 'host_type':
244 |                 ret = HostType(data)
245 |             elif attr == 'scheme_type':
246 |                 ret = SchemeType(data)
247 |             else:
248 |                 ret = _get_str(data)
249 | 
250 |             return ret
251 | 
252 |         raise AttributeError(f'no attribute named {attr}')
253 | 
254 |     def __setattr__(self, attr: str, value: str) -> None:
255 |         if attr in SET_ATTRIBUTES:
256 |             try:
257 |                 value_bytes = value.encode()
258 |             except Exception:
259 |                 raise ValueError(f'Invalid value for {attr}') from None
260 | 
261 |             set_func = getattr(lib, f'ada_set_{attr}')
262 |             ret = set_func(self.urlobj, value_bytes, len(value_bytes))
263 |             if (ret is not None) and (not ret):
264 |                 raise ValueError(f'Invalid value for {attr}') from None
265 | 
266 |             return ret
267 | 
268 |         return super().__setattr__(attr, value)
269 | 
270 |     def __str__(self):
271 |         return self.href
272 | 
273 |     def __repr__(self):
274 |         return f'<URL "{self.href}">'
275 | 
276 |     @staticmethod
277 |     def can_parse(url: str, base: Optional[str] = None) -> bool:
278 |         try:
279 |             url_bytes = url.encode()
280 |         except Exception:
281 |             return False
282 | 
283 |         if base is None:
284 |             return lib.ada_can_parse(url_bytes, len(url_bytes))
285 | 
286 |         try:
287 |             base_bytes = base.encode()
288 |         except Exception:
289 |             return False
290 | 
291 |         return lib.ada_can_parse_with_base(
292 |             url_bytes, len(url_bytes), base_bytes, len(base_bytes)
293 |         )
294 | 
295 | 
296 | class URLSearchParams:
297 |     """
298 |     Parses the given *params* string according to the WHATWG URL parsing standard.
299 | 
300 |     The attribute and methods from the standard are implemented:
301 | 
302 |     .. code-block:: python
303 | 
304 |         >>> from ada_url import URLSearchParams
305 |         >>> obj = URLSearchParams('key1=value1&key2=value2&key2=value3')
306 |         >>> obj.size
307 |         3
308 |         >>> obj.append('key2', 'value4')
309 |         >>> str(obj)
310 |         'key1=value1&key2=value2&key2=value3&key2=value4'
311 |         >>> obj.delete('key1')
312 |         >>> str(obj)
313 |         'key2=value2&key2=value3&key2=value4'
314 |         >>> obj.delete('key2', 'value2')
315 |         >>> str(obj)
316 |         'key2=value3&key2=value4'
317 |         >>> obj.get('key2')
318 |         'value3'
319 |         >>> obj.get_all('key2')
320 |         ['value3', 'value4']
321 |         >>> obj.has('key2')
322 |         True
323 |         >>> obj.has('key2', 'value5')
324 |         False
325 |         >>> obj.set('key1', 'value6')
326 |         >>> str(obj)
327 |         'key2=value3&key2=value4&key1=value6'
328 |         >>> obj.sort()
329 |         >>> str(obj)
330 |         'key1=value6&key2=value3&key2=value4'
331 | 
332 |     Iterators for the ``keys``, ``values``, and ``items`` are also implemented:
333 | 
334 |     .. code-block:: python
335 | 
336 |         >>> obj = URLSearchParams('key1=value1&key2=value2&key2=value3')
337 |         >>> list(obj.keys())
338 |         ['key1', 'key2', 'key2']
339 |         >>> list(obj.values())
340 |         ['value1', 'value2', 'value3']
341 |         >>> list(obj.items())
342 |         [('key1', 'value1'), ('key2', 'value2'), ('key2', 'value3')]
343 | 
344 |     See the `WHATWG docs <https://url.spec.whatwg.org/#interface-urlsearchparams>`__ for
345 |     more details on the URLSearchParams class.
346 | 
347 |     """
348 | 
349 |     def __init__(self, params: str):
350 |         params_bytes = params.encode()
351 |         self.paramsobj = _get_obj(
352 |             lib.ada_parse_search_params,
353 |             lib.ada_free_search_params,
354 |             params_bytes,
355 |             len(params_bytes),
356 |         )
357 | 
358 |     @property
359 |     def size(self) -> int:
360 |         return lib.ada_search_params_size(self.paramsobj)
361 | 
362 |     def __len__(self) -> int:
363 |         return self.size
364 | 
365 |     def append(self, key: str, value: str):
366 |         key_bytes = key.encode()
367 |         value_bytes = value.encode()
368 |         lib.ada_search_params_append(
369 |             self.paramsobj,
370 |             key_bytes,
371 |             len(key_bytes),
372 |             value_bytes,
373 |             len(value_bytes),
374 |         )
375 | 
376 |     def delete(self, key: str, value: Optional[str] = None):
377 |         key_bytes = key.encode()
378 |         if value is None:
379 |             lib.ada_search_params_remove(self.paramsobj, key_bytes, len(key_bytes))
380 |         else:
381 |             value_bytes = value.encode()
382 |             lib.ada_search_params_remove_value(
383 |                 self.paramsobj,
384 |                 key_bytes,
385 |                 len(key_bytes),
386 |                 value_bytes,
387 |                 len(value_bytes),
388 |             )
389 | 
390 |     def get(self, key: str) -> str:
391 |         key_bytes = key.encode()
392 |         item = lib.ada_search_params_get(self.paramsobj, key_bytes, len(key_bytes))
393 |         return _get_str(item)
394 | 
395 |     def get_all(self, key: str) -> List[str]:
396 |         key_bytes = key.encode()
397 |         items = lib.ada_search_params_get_all(self.paramsobj, key_bytes, len(key_bytes))
398 |         count = lib.ada_strings_size(items)
399 | 
400 |         ret = []
401 |         for i in range(count):
402 |             value = _get_str(lib.ada_strings_get(items, i))
403 |             ret.append(value)
404 | 
405 |         return ret
406 | 
407 |     def has(self, key: str, value: Optional[str] = None) -> bool:
408 |         key_bytes = key.encode()
409 |         if value is None:
410 |             return lib.ada_search_params_has(self.paramsobj, key_bytes, len(key_bytes))
411 |         else:
412 |             value_bytes = value.encode()
413 |             return lib.ada_search_params_has_value(
414 |                 self.paramsobj,
415 |                 key_bytes,
416 |                 len(key_bytes),
417 |                 value_bytes,
418 |                 len(value_bytes),
419 |             )
420 | 
421 |     def set(self, key: str, value: str):
422 |         key_bytes = key.encode()
423 |         value_bytes = value.encode()
424 |         lib.ada_search_params_set(
425 |             self.paramsobj,
426 |             key_bytes,
427 |             len(key_bytes),
428 |             value_bytes,
429 |             len(value_bytes),
430 |         )
431 | 
432 |     def sort(self):
433 |         lib.ada_search_params_sort(self.paramsobj)
434 | 
435 |     def keys(self) -> Iterator[str]:
436 |         iterator = _get_obj(
437 |             lib.ada_search_params_get_keys,
438 |             lib.ada_free_search_params_keys_iter,
439 |             self.paramsobj,
440 |         )
441 |         while lib.ada_search_params_keys_iter_has_next(iterator):
442 |             item = lib.ada_search_params_keys_iter_next(iterator)
443 |             yield _get_str(item)
444 | 
445 |     def values(self) -> Iterator[str]:
446 |         iterator = _get_obj(
447 |             lib.ada_search_params_get_values,
448 |             lib.ada_free_search_params_values_iter,
449 |             self.paramsobj,
450 |         )
451 |         while lib.ada_search_params_values_iter_has_next(iterator):
452 |             item = lib.ada_search_params_values_iter_next(iterator)
453 |             yield _get_str(item)
454 | 
455 |     def items(self) -> Iterator[Tuple[str, str]]:
456 |         iterator = _get_obj(
457 |             lib.ada_search_params_get_entries,
458 |             lib.ada_free_search_params_entries_iter,
459 |             self.paramsobj,
460 |         )
461 |         while lib.ada_search_params_entries_iter_has_next(iterator):
462 |             item = lib.ada_search_params_entries_iter_next(iterator)
463 |             yield _get_str(item.key), _get_str(item.value)
464 | 
465 |     def __repr__(self):
466 |         return f'<SearchParams "{self}">'
467 | 
468 |     def __str__(self) -> str:
469 |         result = _get_obj(
470 |             lib.ada_search_params_to_string, lib.ada_free_owned_string, self.paramsobj
471 |         )
472 |         return _get_str(result)
473 | 
474 | 
475 | def check_url(s: str) -> bool:
476 |     """
477 |     Returns ``True`` if *s* represents a valid URL, and ``False`` otherwise.
478 | 
479 |     .. code-block:: python
480 | 
481 |         >>> from ada_url import check_url
482 |         >>> check_url('bogus')
483 |         False
484 |         >>> check_url('http://a/b/c/d;p?q')
485 |         True
486 | 
487 |     """
488 |     try:
489 |         s_bytes = s.encode()
490 |     except Exception:
491 |         return False
492 | 
493 |     urlobj = _get_obj(lib.ada_parse, lib.ada_free, s_bytes, len(s_bytes))
494 |     return lib.ada_is_valid(urlobj)
495 | 
496 | 
497 | def join_url(base_url: str, s: str) -> str:
498 |     """
499 |     Return the URL that results from joining *base_url* to *s*.
500 |     Raises ``ValueError`` if no valid URL can be constructed.
501 | 
502 |     .. code-block:: python
503 | 
504 |         >>> from ada_url import join_url
505 |         >>> base_url = 'http://a/b/c/d;p?q'
506 |         >>> join_url(base_url, '../g')
507 |         'http://a/b/g'
508 | 
509 |     """
510 |     try:
511 |         base_bytes = base_url.encode()
512 |         s_bytes = s.encode()
513 |     except Exception:
514 |         raise ValueError('Invalid URL') from None
515 | 
516 |     urlobj = _get_obj(
517 |         lib.ada_parse_with_base,
518 |         lib.ada_free,
519 |         s_bytes,
520 |         len(s_bytes),
521 |         base_bytes,
522 |         len(base_bytes),
523 |     )
524 |     if not lib.ada_is_valid(urlobj):
525 |         raise ValueError('Invalid URL') from None
526 | 
527 |     return _get_str(lib.ada_get_href(urlobj))
528 | 
529 | 
530 | def normalize_url(s: str) -> str:
531 |     """
532 |     Returns a "normalized" URL with all ``'..'`` and ``'/'`` characters resolved.
533 | 
534 |     .. code-block:: python
535 | 
536 |         >>> from ada_url import normalize_url
537 |         >>> normalize_url('http://a/b/c/../g')
538 |         'http://a/b/g'
539 | 
540 |     """
541 |     return parse_url(s, attributes=('href',))['href']
542 | 
543 | 
544 | def parse_url(s: str, attributes: Iterable[str] = PARSE_ATTRIBUTES) -> ParseAttributes:
545 |     """
546 |     Returns a dictionary with the parsed components of the URL represented by *s*.
547 | 
548 |     .. code-block:: python
549 | 
550 |         >>> from ada_url import parse_url
551 |         >>> url = 'https://user_1:password_1@example.org:8080/dir/../api?q=1#frag'
552 |         >>> parse_url(url)
553 |         {
554 |             'href': 'https://user_1:password_1@example.org:8080/api?q=1#frag',
555 |             'username': 'user_1',
556 |             'password': 'password_1',
557 |             'protocol': 'https:',
558 |             'host': 'example.org:8080',
559 |             'port': '8080',
560 |             'hostname': 'example.org',
561 |             'pathname': '/api',
562 |             'search': '?q=1',
563 |             'hash': '#frag'
564 |             'origin': 'https://example.org:8080',
565 |             'host_type': 0
566 |             'scheme_type': 2
567 |         }
568 | 
569 |     The names of the dictionary keys correspond to the components of the "URL class"
570 |     in the WHATWG URL spec.
571 |     ``host_type`` is a :class:`HostType` enum.
572 |     ``scheme_type`` is a :class:`SchemeType` enum.
573 | 
574 |     Pass in a sequence of *attributes* to limit which keys are returned.
575 | 
576 |     .. code-block:: python
577 | 
578 |         >>> from ada_url import parse_url
579 |         >>> url = 'https://user_1:password_1@example.org:8080/dir/../api?q=1#frag'
580 |         >>> parse_url(url, attributes=('protocol'))
581 |         {'protocol': 'https:'}
582 | 
583 |     Unrecognized attributes are ignored.
584 | 
585 |     """
586 |     try:
587 |         s_bytes = s.encode()
588 |     except Exception:
589 |         raise ValueError('Invalid URL') from None
590 | 
591 |     ret = {}
592 |     urlobj = _get_obj(lib.ada_parse, lib.ada_free, s_bytes, len(s_bytes))
593 |     if not lib.ada_is_valid(urlobj):
594 |         raise ValueError('Invalid URL') from None
595 | 
596 |     for attr in attributes:
597 |         get_func = getattr(lib, f'ada_get_{attr}')
598 |         data = get_func(urlobj)
599 |         if attr == 'origin':
600 |             ret[attr] = _get_str(data)
601 |             lib.ada_free_owned_string(data)
602 |         elif attr == 'host_type':
603 |             ret[attr] = HostType(data)
604 |         elif attr == 'scheme_type':
605 |             ret[attr] = SchemeType(data)
606 |         else:
607 |             ret[attr] = _get_str(data)
608 | 
609 |     return ret
610 | 
611 | 
612 | def replace_url(s: str, **kwargs: str) -> str:
613 |     """
614 |     Start with the URL represented by *s*, replace the attributes given in the *kwargs*
615 |     mapping, and return a normalized URL with the result.
616 | 
617 |     Provide an empty string to unset an attribute.
618 | 
619 |     .. code-block:: python
620 | 
621 |         >>> from ada_url import replace_url
622 |         >>> base_url = 'https://user_1:password_1@example.org/resource'
623 |         >>> replace_url(base_url, username='user_2', password='', protocol='http:')
624 |         'http://user_2@example.org/resource'
625 | 
626 |     Unrecognized attributes are ignored. ``href`` is replaced first if it is given.
627 |     ``hostname`` is replaced before ``host`` if both are given.
628 | 
629 |     ``ValueError`` is raised if the input URL or one of the components is not valid.
630 |     """
631 |     try:
632 |         s_bytes = s.encode()
633 |     except Exception:
634 |         raise ValueError('Invalid URL') from None
635 | 
636 |     urlobj = _get_obj(lib.ada_parse, lib.ada_free, s_bytes, len(s_bytes))
637 |     if not lib.ada_is_valid(urlobj):
638 |         raise ValueError('Invalid URL') from None
639 | 
640 |     # We process attributes in the order given by the documentation, e.g.
641 |     # href before anything else.
642 |     for attr in URL_ATTRIBUTES:
643 |         value = kwargs.get(attr, _marker)
644 |         if value is _marker:
645 |             continue
646 | 
647 |         try:
648 |             value_bytes = value.encode()
649 |         except Exception:
650 |             raise ValueError(f'Invalid value for {attr}') from None
651 | 
652 |         if (not value_bytes) and (attr in CLEAR_ATTRIBUTES):
653 |             clear_func = getattr(lib, f'ada_clear_{attr}')
654 |             clear_func(urlobj)
655 |         else:
656 |             set_func = getattr(lib, f'ada_set_{attr}')
657 |             set_result = set_func(urlobj, value_bytes, len(value_bytes))
658 |             if (set_result is not None) and (not set_result):
659 |                 raise ValueError(f'Invalid value for {attr}') from None
660 | 
661 |     return _get_str(lib.ada_get_href(urlobj))
662 | 
663 | 
664 | def parse_search_params(s: str) -> Dict[str, List[str]]:
665 |     """
666 |     Returns a dictionary representing the parsed URL Parameters specified by *s*.
667 |     The returned dictionary maps each key to a list of values associated with it.
668 | 
669 |     .. code-block:: python
670 | 
671 |         >>> from ada_url import parse_search_params
672 |         >>> parse_search_params('key1=value1&key1=value2&key2=value3')
673 |         {'key1': ['value1', 'value2'], 'key2': ['value3']}
674 | 
675 |     """
676 |     ret = {}
677 |     for key, value in URLSearchParams(s).items():
678 |         if key not in ret:
679 |             ret[key] = [value]
680 |         else:
681 |             ret[key].append(value)
682 | 
683 |     return ret
684 | 
685 | 
686 | def replace_search_params(s: str, *args: Tuple[str, str]) -> str:
687 |     """
688 |     Returns a string representing the URL parameters specified by *s*, modified by the
689 |     ``(key, value)`` pairs passed in as *args*.
690 | 
691 |     .. code-block:: python
692 | 
693 |         >>> from ada_url import replace_search_params
694 |         >>> replace_search_params(
695 |         ...     'key1=value1&key1=value2',
696 |         ...     ('key1', 'value3'),
697 |         ...     ('key2', 'value4')
698 |         ... )
699 |         'key1=value3&key2=value4'
700 |     """
701 |     search_params = URLSearchParams(s)
702 |     for key, value in args:
703 |         search_params.delete(key)
704 | 
705 |     for key, value in args:
706 |         search_params.append(key, value)
707 | 
708 |     return str(search_params)
709 | 
710 | 
711 | class idna:
712 |     """Process international domains according to the UTS #46 standard.
713 | 
714 |     :func:`idna.encode` implements the UTS #46 ``ToASCII`` operation.
715 |     Its output is a Python ``bytes`` object.
716 |     It is also available as :func:`idna_to_ascii`.
717 | 
718 |     .. code-block:: python
719 | 
720 |         >>> from ada_url import idna
721 |         >>> idna.encode('meßagefactory.ca')
722 |         b'xn--meagefactory-m9a.ca'
723 | 
724 |     :func:`idna.decode` implements the UTS #46 ``ToUnicode`` operation.
725 |     Its oputput is a Python ``str`` object.
726 |     It is also available as :func:`idna_to_unicode`.
727 | 
728 |     .. code-block:: python
729 | 
730 |         >>> from ada_url import idna
731 |         >>> idna.decode('xn--meagefactory-m9a.ca')
732 |         'meßagefactory.ca'
733 | 
734 |     Both functions accept either ``str`` or ``bytes`` objects as input.
735 |     """
736 | 
737 |     @staticmethod
738 |     def decode(s: Union[str, bytes]) -> str:
739 |         if isinstance(s, str):
740 |             s = s.encode('ascii')
741 | 
742 |         data = _get_obj(lib.ada_idna_to_unicode, lib.ada_free_owned_string, s, len(s))
743 |         return _get_str(data)
744 | 
745 |     @staticmethod
746 |     def encode(s: Union[str, bytes]) -> bytes:
747 |         if isinstance(s, str):
748 |             s = s.encode()
749 | 
750 |         val = _get_obj(lib.ada_idna_to_ascii, lib.ada_free_owned_string, s, len(s))
751 |         return ffi.string(val.data, val.length) if val.length else b''
752 | 
753 | 
754 | idna_to_unicode = idna.decode
755 | 
756 | idna_to_ascii = idna.encode
757 | 


--------------------------------------------------------------------------------
/ada_url/ada_build.py:
--------------------------------------------------------------------------------
 1 | from cffi import FFI
 2 | from os.path import dirname, join
 3 | from setuptools.extension import Extension
 4 | from sys import platform
 5 | 
 6 | file_dir = dirname(__file__)
 7 | 
 8 | compile_args = ['/std:c++20'] if platform == 'win32' else ['-std=c++20']
 9 | 
10 | ada_obj = Extension(
11 |     'ada',
12 |     define_macros=[('ADA_INCLUDE_URL_PATTERN', '0')],
13 |     language="c++",
14 |     sources=['ada_url/ada.cpp'],
15 |     include_dirs=[file_dir],
16 |     extra_compile_args=compile_args,
17 | )
18 | 
19 | libraries = ['stdc++'] if platform == 'linux' else []
20 | 
21 | ffi_builder = FFI()
22 | ffi_builder.set_source(
23 |     'ada_url._ada_wrapper',
24 |     '# include "ada_c.h"',
25 |     libraries=libraries,
26 |     include_dirs=[file_dir],
27 |     extra_objects=[ada_obj],
28 | )
29 | 
30 | cdef_lines = []
31 | with open(join(file_dir, 'ada_c.h'), 'rt') as f:
32 |     for line in f:
33 |         if not line.startswith('#'):
34 |             cdef_lines.append(line)
35 | ffi_builder.cdef(''.join(cdef_lines))
36 | 
37 | if __name__ == '__main__':
38 |     ffi_builder.compile()
39 | 


--------------------------------------------------------------------------------
/ada_url/ada_c.h:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * @file ada_c.h
  3 |  * @brief Includes the C definitions for Ada. This is a C file, not C++.
  4 |  */
  5 | #ifndef ADA_C_H
  6 | #define ADA_C_H
  7 | 
  8 | #include <stdbool.h>
  9 | #include <stdint.h>
 10 | #include <stddef.h>
 11 | 
 12 | // This is a reference to ada::url_components::omitted
 13 | // It represents "uint32_t(-1)"
 14 | #define ada_url_omitted 0xffffffff
 15 | 
 16 | // string that is owned by the ada_url instance
 17 | typedef struct {
 18 |   const char* data;
 19 |   size_t length;
 20 | } ada_string;
 21 | 
 22 | // string that must be freed by the caller
 23 | typedef struct {
 24 |   const char* data;
 25 |   size_t length;
 26 | } ada_owned_string;
 27 | 
 28 | typedef struct {
 29 |   uint32_t protocol_end;
 30 |   uint32_t username_end;
 31 |   uint32_t host_start;
 32 |   uint32_t host_end;
 33 |   uint32_t port;
 34 |   uint32_t pathname_start;
 35 |   uint32_t search_start;
 36 |   uint32_t hash_start;
 37 | } ada_url_components;
 38 | 
 39 | typedef void* ada_url;
 40 | 
 41 | // input should be a null terminated C string (ASCII or UTF-8)
 42 | // you must call ada_free on the returned pointer
 43 | ada_url ada_parse(const char* input, size_t length);
 44 | ada_url ada_parse_with_base(const char* input, size_t input_length,
 45 |                             const char* base, size_t base_length);
 46 | 
 47 | // input and base should be a null terminated C strings
 48 | bool ada_can_parse(const char* input, size_t length);
 49 | bool ada_can_parse_with_base(const char* input, size_t input_length,
 50 |                              const char* base, size_t base_length);
 51 | 
 52 | void ada_free(ada_url result);
 53 | void ada_free_owned_string(ada_owned_string owned);
 54 | ada_url ada_copy(ada_url input);
 55 | 
 56 | bool ada_is_valid(ada_url result);
 57 | 
 58 | // url_aggregator getters
 59 | // if ada_is_valid(result)) is false, an empty string is returned
 60 | ada_owned_string ada_get_origin(ada_url result);
 61 | ada_string ada_get_href(ada_url result);
 62 | ada_string ada_get_username(ada_url result);
 63 | ada_string ada_get_password(ada_url result);
 64 | ada_string ada_get_port(ada_url result);
 65 | ada_string ada_get_hash(ada_url result);
 66 | ada_string ada_get_host(ada_url result);
 67 | ada_string ada_get_hostname(ada_url result);
 68 | ada_string ada_get_pathname(ada_url result);
 69 | ada_string ada_get_search(ada_url result);
 70 | ada_string ada_get_protocol(ada_url result);
 71 | uint8_t ada_get_host_type(ada_url result);
 72 | uint8_t ada_get_scheme_type(ada_url result);
 73 | 
 74 | // url_aggregator setters
 75 | // if ada_is_valid(result)) is false, the setters have no effect
 76 | // input should be a null terminated C string
 77 | bool ada_set_href(ada_url result, const char* input, size_t length);
 78 | bool ada_set_host(ada_url result, const char* input, size_t length);
 79 | bool ada_set_hostname(ada_url result, const char* input, size_t length);
 80 | bool ada_set_protocol(ada_url result, const char* input, size_t length);
 81 | bool ada_set_username(ada_url result, const char* input, size_t length);
 82 | bool ada_set_password(ada_url result, const char* input, size_t length);
 83 | bool ada_set_port(ada_url result, const char* input, size_t length);
 84 | bool ada_set_pathname(ada_url result, const char* input, size_t length);
 85 | void ada_set_search(ada_url result, const char* input, size_t length);
 86 | void ada_set_hash(ada_url result, const char* input, size_t length);
 87 | 
 88 | // url_aggregator clear methods
 89 | void ada_clear_port(ada_url result);
 90 | void ada_clear_hash(ada_url result);
 91 | void ada_clear_search(ada_url result);
 92 | 
 93 | // url_aggregator functions
 94 | // if ada_is_valid(result) is false, functions below will return false
 95 | bool ada_has_credentials(ada_url result);
 96 | bool ada_has_empty_hostname(ada_url result);
 97 | bool ada_has_hostname(ada_url result);
 98 | bool ada_has_non_empty_username(ada_url result);
 99 | bool ada_has_non_empty_password(ada_url result);
100 | bool ada_has_port(ada_url result);
101 | bool ada_has_password(ada_url result);
102 | bool ada_has_hash(ada_url result);
103 | bool ada_has_search(ada_url result);
104 | 
105 | // returns a pointer to the internal url_aggregator::url_components
106 | const ada_url_components* ada_get_components(ada_url result);
107 | 
108 | // idna methods
109 | ada_owned_string ada_idna_to_unicode(const char* input, size_t length);
110 | ada_owned_string ada_idna_to_ascii(const char* input, size_t length);
111 | 
112 | // url search params
113 | typedef void* ada_url_search_params;
114 | 
115 | // Represents an std::vector<std::string>
116 | typedef void* ada_strings;
117 | typedef void* ada_url_search_params_keys_iter;
118 | typedef void* ada_url_search_params_values_iter;
119 | 
120 | typedef struct {
121 |   ada_string key;
122 |   ada_string value;
123 | } ada_string_pair;
124 | 
125 | typedef void* ada_url_search_params_entries_iter;
126 | 
127 | ada_url_search_params ada_parse_search_params(const char* input, size_t length);
128 | void ada_free_search_params(ada_url_search_params result);
129 | 
130 | size_t ada_search_params_size(ada_url_search_params result);
131 | void ada_search_params_sort(ada_url_search_params result);
132 | ada_owned_string ada_search_params_to_string(ada_url_search_params result);
133 | 
134 | void ada_search_params_append(ada_url_search_params result, const char* key,
135 |                               size_t key_length, const char* value,
136 |                               size_t value_length);
137 | void ada_search_params_set(ada_url_search_params result, const char* key,
138 |                            size_t key_length, const char* value,
139 |                            size_t value_length);
140 | void ada_search_params_remove(ada_url_search_params result, const char* key,
141 |                               size_t key_length);
142 | void ada_search_params_remove_value(ada_url_search_params result,
143 |                                     const char* key, size_t key_length,
144 |                                     const char* value, size_t value_length);
145 | bool ada_search_params_has(ada_url_search_params result, const char* key,
146 |                            size_t key_length);
147 | bool ada_search_params_has_value(ada_url_search_params result, const char* key,
148 |                                  size_t key_length, const char* value,
149 |                                  size_t value_length);
150 | ada_string ada_search_params_get(ada_url_search_params result, const char* key,
151 |                                  size_t key_length);
152 | ada_strings ada_search_params_get_all(ada_url_search_params result,
153 |                                       const char* key, size_t key_length);
154 | void ada_search_params_reset(ada_url_search_params result, const char* input,
155 |                              size_t length);
156 | ada_url_search_params_keys_iter ada_search_params_get_keys(
157 |     ada_url_search_params result);
158 | ada_url_search_params_values_iter ada_search_params_get_values(
159 |     ada_url_search_params result);
160 | ada_url_search_params_entries_iter ada_search_params_get_entries(
161 |     ada_url_search_params result);
162 | 
163 | void ada_free_strings(ada_strings result);
164 | size_t ada_strings_size(ada_strings result);
165 | ada_string ada_strings_get(ada_strings result, size_t index);
166 | 
167 | void ada_free_search_params_keys_iter(ada_url_search_params_keys_iter result);
168 | ada_string ada_search_params_keys_iter_next(
169 |     ada_url_search_params_keys_iter result);
170 | bool ada_search_params_keys_iter_has_next(
171 |     ada_url_search_params_keys_iter result);
172 | 
173 | void ada_free_search_params_values_iter(
174 |     ada_url_search_params_values_iter result);
175 | ada_string ada_search_params_values_iter_next(
176 |     ada_url_search_params_values_iter result);
177 | bool ada_search_params_values_iter_has_next(
178 |     ada_url_search_params_values_iter result);
179 | 
180 | void ada_free_search_params_entries_iter(
181 |     ada_url_search_params_entries_iter result);
182 | ada_string_pair ada_search_params_entries_iter_next(
183 |     ada_url_search_params_entries_iter result);
184 | bool ada_search_params_entries_iter_has_next(
185 |     ada_url_search_params_entries_iter result);
186 | 
187 | #endif  // ADA_C_H
188 | 


--------------------------------------------------------------------------------
/ada_url/py.typed:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ada-url/ada-python/0097d6319b1e4d17dafe90835bb926274590b65a/ada_url/py.typed


--------------------------------------------------------------------------------
/benchmark.py:
--------------------------------------------------------------------------------
 1 | from os.path import dirname, join
 2 | from json import load
 3 | from urllib.parse import urlparse
 4 | from time import perf_counter
 5 | 
 6 | from ada_url import URL
 7 | 
 8 | URL_TEST_DATA_PATH = join(dirname(__file__), 'tests/files/urltestdata.json')
 9 | 
10 | with open(URL_TEST_DATA_PATH, 'rb') as f:
11 |     test_data = load(f)
12 | 
13 | test_cases = []
14 | for item in test_data:
15 |     if isinstance(item, str) or item.get('failure', False):
16 |         continue
17 |     test_cases.append(item['href'])
18 | 
19 | print('Function', 'msec', 'URLs/msec', sep='\t')
20 | for func_name, func in (('stdlib urlparse', urlparse), ('ada_url URL', URL)):
21 |     start_time = perf_counter()
22 |     for item in test_cases:
23 |         func(item)
24 |     duration = perf_counter() - start_time
25 |     rate = len(test_cases) / duration
26 |     print(func_name, f'{duration * 1000:0.2f}', f'{rate / 1000:0.2f}', sep='\t')
27 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import shutil
 3 | import sys
 4 | 
 5 | # Put the project package on the path
 6 | parent_dir = os.path.abspath('..')
 7 | sys.path.insert(0, parent_dir)
 8 | 
 9 | # Copy README.rst so it can be included in index.rst
10 | build_dir = '_build'
11 | os.makedirs(build_dir, exist_ok=True)
12 | 
13 | readme_src = os.path.join(parent_dir, 'README.rst')
14 | readme_dst = os.path.join(build_dir, 'README.pprst')
15 | shutil.copyfile(readme_src, readme_dst)
16 | 
17 | project = 'ada-url/ada-python'
18 | copyright = '2023, Ada authors'
19 | author = 'Ada authors'
20 | 
21 | extensions = ['sphinx.ext.autodoc', 'sphinx.ext.viewcode']
22 | autodoc_member_order = 'bysource'
23 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
24 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | .. include:: ./_build/README.pprst
 2 | 
 3 | Building from source
 4 | ====================
 5 | 
 6 | You will need to have Python 3 development files installed.
 7 | On macOS, you will have these if you installed Python with ``brew``.
 8 | On Linux, you may need to install some packages (e.g., ``python3-dev`` and ``python3-venv``).
 9 | 
10 | You will also need a C++ toolchain.
11 | On macOS, Xcode will provide this for you.
12 | On Linux, you may need to install some more pacakges (e.g. ``build-esential``).
13 | 
14 | Clone the git repository to a directory for development:
15 | 
16 | .. code-block:: sh
17 | 
18 |     git clone https://github.com/ada-url/ada-python.git ada_url_python
19 |     cd ada_url_python
20 | 
21 | Create a virtual environment to use for building:
22 | 
23 | .. code-block:: sh
24 | 
25 |     python3 -m venv env
26 |     source ./env/bin/activate
27 | 
28 | After that, you're ready to build the package:
29 | 
30 | .. code-block:: sh
31 | 
32 |     python -m pip install -r requirements/development.txt
33 |     python -m build --no-isolation
34 | 
35 | This will create a `.whl` file in the `dist` directory. You can install it in other
36 | virtual environments on the same machine.
37 | 
38 | To run tests, first build a package. Then:
39 | 
40 |  .. code-block:: sh
41 | 
42 |     python -m pip install -e .
43 |     python -m unittest
44 | 
45 | Leave the virtual environment with the ``deactivate`` comamnd.
46 | 
47 | API Documentation
48 | =================
49 | 
50 | .. automodule:: ada_url
51 | 
52 | .. autoclass:: URL(url, base=None)
53 | .. autoclass:: HostType()
54 | .. autoclass:: SchemeType()
55 | 
56 | ----
57 | 
58 | .. autofunction:: check_url(s)
59 | .. autofunction:: join_url(base_url, s)
60 | .. autofunction:: normalize_url(s)
61 | .. autofunction:: parse_url(s, [attributes])
62 | .. autofunction:: replace_url(s, **kwargs)
63 | 
64 | ----
65 | 
66 | .. autoclass:: URLSearchParams(params)
67 | .. autoclass:: parse_search_params(s)
68 | .. autoclass:: replace_search_params(s, *args)
69 | 
70 | ----
71 | 
72 | .. autoclass:: idna
73 | 
74 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 | 
13 | %SPHINXBUILD% >NUL 2>NUL
14 | if errorlevel 9009 (
15 | 	echo.
16 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
17 | 	echo.installed, then set the SPHINXBUILD environment variable to point
18 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
19 | 	echo.may add the Sphinx directory to PATH.
20 | 	echo.
21 | 	echo.If you don't have Sphinx installed, grab it from
22 | 	echo.https://www.sphinx-doc.org/
23 | 	exit /b 1
24 | )
25 | 
26 | if "%1" == "" goto help
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ada-url/ada-python/0097d6319b1e4d17dafe90835bb926274590b65a/docs/requirements.txt


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["cffi>=1.17.1", "setuptools", "urllib3", "wheel"]
 3 | build-backend = "setuptools.build_meta"
 4 | 
 5 | [project]
 6 | name = "ada-url"
 7 | version = "1.23.0"
 8 | authors = [
 9 |     {name = "Bo Bayles", email = "bo@bbayles.com"},
10 | ]
11 | description = 'URL parser and manipulator based on the WHAT WG URL standard'
12 | readme = "README.rst"
13 | requires-python = ">=3.9"
14 | license = {text = "Apache 2.0"}
15 | classifiers = [
16 |     "License :: OSI Approved :: Apache Software License",
17 |     "Programming Language :: Python :: 3",
18 |     "Programming Language :: Python :: 3 :: Only",
19 | ]
20 | dependencies = [
21 |     "cffi",
22 | ]
23 | 
24 | [project.urls]
25 | Homepage = "https://www.ada-url.com/"
26 | Documentation = "https://ada-url.readthedocs.io"
27 | Repository = "https://github.com/ada-url/ada-python"
28 | 
29 | [tool.setuptools.packages.find]
30 | exclude = ["tests"]
31 | 
32 | [tool.setuptools]
33 | include-package-data = true
34 | 
35 | [tool.setuptools.package-data]
36 | ada_url = ["*.c", "*.h", "*.o"]
37 | 
38 | [tool.ruff]
39 | line-length = 88
40 | target-version = "py39"
41 | exclude = [
42 |     ".git",
43 |     ".ruff_cache",
44 | ]
45 | 
46 | [tool.ruff.format]
47 | quote-style = "single"
48 | 
49 | [tool.ruff.lint]
50 | select = ["E", "F"]
51 | ignore = ["E501"]
52 | 
53 | [tool.coverage.run]
54 | include = [
55 |     "ada_url/**",
56 | ]
57 | 
58 | [tool.cibuildwheel]
59 | build = [
60 |     "cp39-*",
61 |     "cp310-*",
62 |     "cp311-*",
63 |     "cp312-*",
64 |     "cp313-*",
65 |     "pp39-*",
66 |     "pp310-*",
67 | ]
68 | manylinux-x86_64-image = "manylinux_2_28"
69 | manylinux-aarch64-image = "manylinux_2_28"
70 | 
71 | [tool.cibuildwheel.linux]
72 | archs = ["x86_64", "aarch64"]
73 | 
74 | [tool.cibuildwheel.macos]
75 | archs = ["x86_64", "universal2", "arm64"]
76 | environment = { MACOSX_DEPLOYMENT_TARGET="10.15" }
77 | before-build = "make clean"
78 | 
79 | [tool.cibuildwheel.windows]
80 | archs = ["AMD64"]
81 | 
82 | # https://github.com/pypy/pypy/issues/5027
83 | [[tool.cibuildwheel.overrides]]
84 | select = "pp3{9,10}-win_amd64"
85 | environment = { SETUPTOOLS_USE_DISTUTILS="stdlib" }
86 | 


--------------------------------------------------------------------------------
/requirements/base.txt:
--------------------------------------------------------------------------------
1 | # What we want
2 | cffi==1.17.1
3 | 
4 | # What we need
5 | pycparser==2.22
6 | 


--------------------------------------------------------------------------------
/requirements/development.txt:
--------------------------------------------------------------------------------
 1 | build
 2 | coverage
 3 | ruff
 4 | setuptools
 5 | Sphinx
 6 | twine
 7 | urllib3
 8 | wheel
 9 | 
10 | -r base.txt
11 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | from setuptools.command.build_ext import build_ext as _build_ext
 3 | from setuptools.extension import Extension
 4 | 
 5 | 
 6 | class build_ext(_build_ext):
 7 |     def build_extension(self, ext):
 8 |         for i, extra in enumerate(ext.extra_objects):
 9 |             if isinstance(extra, Extension):
10 |                 sources = sorted(extra.sources)
11 |                 extra_args = extra.extra_compile_args or []
12 |                 macros = extra.define_macros[:]
13 |                 for undef in extra.undef_macros:
14 |                     macros.append((undef,))
15 |                 objects = self.compiler.compile(
16 |                     sources,
17 |                     output_dir=self.build_temp,
18 |                     macros=macros,
19 |                     include_dirs=extra.include_dirs,
20 |                     debug=self.debug,
21 |                     extra_postargs=extra_args,
22 |                     depends=extra.depends,
23 |                 )
24 |                 ext.extra_objects[i] = objects[0]
25 |         return super().build_extension(ext)
26 | 
27 | setup(
28 |     cmdclass={'build_ext': build_ext},
29 |     cffi_modules=[
30 |         './ada_url/ada_build.py:ffi_builder',
31 |     ],
32 | )
33 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ada-url/ada-python/0097d6319b1e4d17dafe90835bb926274590b65a/tests/__init__.py


--------------------------------------------------------------------------------
/tests/test_ada_url.py:
--------------------------------------------------------------------------------
  1 | from copy import copy, deepcopy
  2 | from json import load
  3 | from os.path import dirname, join
  4 | from unittest import TestCase
  5 | 
  6 | from ada_url import (
  7 |     HostType,
  8 |     SchemeType,
  9 |     URLSearchParams as SearchParams,
 10 |     URL,
 11 |     check_url,
 12 |     idna,
 13 |     idna_to_ascii,
 14 |     idna_to_unicode,
 15 |     join_url,
 16 |     normalize_url,
 17 |     replace_search_params,
 18 |     parse_search_params,
 19 |     parse_url,
 20 |     replace_url,
 21 | )
 22 | from ada_url.ada_adapter import GET_ATTRIBUTES
 23 | 
 24 | URL_TEST_DATA_PATH = join(dirname(__file__), 'files/urltestdata.json')
 25 | 
 26 | 
 27 | class ADAURLTests(TestCase):
 28 |     def test_class_get(self):
 29 |         url = 'https://user_1:password_1@example.org:8080/dir/../api?q=1#frag'
 30 |         urlobj = URL(url)
 31 |         self.assertEqual(
 32 |             urlobj.href, 'https://user_1:password_1@example.org:8080/api?q=1#frag'
 33 |         )
 34 |         self.assertEqual(urlobj.username, 'user_1')
 35 |         self.assertEqual(urlobj.password, 'password_1')
 36 |         self.assertEqual(urlobj.protocol, 'https:')
 37 |         self.assertEqual(urlobj.port, '8080')
 38 |         self.assertEqual(urlobj.hostname, 'example.org')
 39 |         self.assertEqual(urlobj.host, 'example.org:8080')
 40 |         self.assertEqual(urlobj.pathname, '/api')
 41 |         self.assertEqual(urlobj.search, '?q=1')
 42 |         self.assertEqual(urlobj.hash, '#frag')
 43 |         self.assertEqual(urlobj.origin, 'https://example.org:8080')
 44 | 
 45 |         with self.assertRaises(AttributeError):
 46 |             urlobj.bogus
 47 | 
 48 |     def test_class_host_type(self):
 49 |         # host_type should return an IntEnum, which can be compared to a Python int
 50 |         for url, expected in (
 51 |             ('http://localhost:3000', HostType.DEFAULT),
 52 |             ('http://0.0.0.0', HostType.IPV4),
 53 |             ('http://[2001:db8:3333:4444:5555:6666:7777:8888]', HostType.IPV6),
 54 |         ):
 55 |             with self.subTest(url=url):
 56 |                 urlobj = URL(url)
 57 |                 self.assertEqual(urlobj.host_type, int(expected))
 58 |                 self.assertEqual(urlobj.host_type, expected)
 59 | 
 60 |     def test_class_scheme_type(self):
 61 |         # host_type should return an IntEnum, which can be compared to a Python int
 62 |         for url, expected in (
 63 |             ('http://localhost', SchemeType.HTTP),
 64 |             ('git://localhost', SchemeType.NOT_SPECIAL),
 65 |             ('https://localhost', SchemeType.HTTPS),
 66 |             ('ws://localhost', SchemeType.WS),
 67 |             ('ftp://localhost', SchemeType.FTP),
 68 |             ('wss://localhost', SchemeType.WSS),
 69 |             ('file://localhost', SchemeType.FILE),
 70 |         ):
 71 |             with self.subTest(url=url):
 72 |                 urlobj = URL(url)
 73 |                 self.assertEqual(urlobj.scheme_type, int(expected))
 74 |                 self.assertEqual(urlobj.scheme_type, expected)
 75 | 
 76 |     def test_copy_vs_deepcopy(self):
 77 |         obj = URL('http://example.org:8080')
 78 |         copied_obj = copy(obj)
 79 |         deepcopied_obj = deepcopy(obj)
 80 | 
 81 |         obj.port = '8081'
 82 |         self.assertEqual(copied_obj.port, '8081')
 83 |         self.assertEqual(deepcopied_obj.port, '8080')
 84 | 
 85 |         deepcopied_obj.port = '8082'
 86 |         self.assertEqual(copied_obj.port, '8081')
 87 |         self.assertEqual(deepcopied_obj.port, '8082')
 88 | 
 89 |     def test_class_set(self):
 90 |         url = 'https://username:password@www.google.com:8080/'
 91 |         urlobj = URL(url)
 92 |         urlobj.href = 'https://www.yagiz.co'
 93 |         urlobj.hash = 'new-hash'
 94 |         urlobj.hostname = 'new-host'
 95 |         urlobj.host = 'changed-host:9090'
 96 |         urlobj.pathname = 'new-pathname'
 97 |         urlobj.search = 'new-search'
 98 |         urlobj.protocol = 'wss'
 99 |         actual = urlobj.href
100 | 
101 |         with self.assertRaises(ValueError):
102 |             urlobj.hostname = 1
103 | 
104 |         with self.assertRaises(ValueError):
105 |             urlobj.hostname = '127.0.0.0.0.1'
106 | 
107 |         expected = 'wss://changed-host:9090/new-pathname?new-search#new-hash'
108 |         self.assertEqual(actual, expected)
109 | 
110 |     def test_class_delete(self):
111 |         url = 'https://user_1:password_1@example.org:8080/dir/../api?q=1#frag'
112 |         urlobj = URL(url)
113 | 
114 |         del urlobj.port
115 |         self.assertEqual(
116 |             urlobj.href, 'https://user_1:password_1@example.org/api?q=1#frag'
117 |         )
118 | 
119 |         del urlobj.hash
120 |         self.assertEqual(urlobj.href, 'https://user_1:password_1@example.org/api?q=1')
121 | 
122 |         del urlobj.pathname
123 |         self.assertEqual(urlobj.href, 'https://user_1:password_1@example.org/?q=1')
124 | 
125 |         del urlobj.search
126 |         self.assertEqual(urlobj.href, 'https://user_1:password_1@example.org/')
127 | 
128 |         with self.assertRaises(AttributeError):
129 |             del urlobj.href
130 | 
131 |     def test_unset(self):
132 |         url = 'https://user_1:password_1@example.org:8080/dir/../api?q=1#frag'
133 |         for attr, expected in (
134 |             ('username', 'https://:password_1@example.org:8080/api?q=1#frag'),
135 |             ('password', 'https://user_1@example.org:8080/api?q=1#frag'),
136 |             ('port', 'https://user_1:password_1@example.org/api?q=1#frag'),
137 |             ('pathname', 'https://user_1:password_1@example.org:8080/?q=1#frag'),
138 |             ('search', 'https://user_1:password_1@example.org:8080/api#frag'),
139 |             ('hash', 'https://user_1:password_1@example.org:8080/api?q=1'),
140 |         ):
141 |             with self.subTest(attr=attr):
142 |                 urlobj = URL(url)
143 |                 urlobj.__delattr__(attr)
144 |                 self.assertEqual(urlobj.href, expected)
145 | 
146 |     def test_class_with_base(self):
147 |         url = '../example.txt'
148 |         base = 'https://example.org/path/'
149 |         urlobj = URL(url, base)
150 |         self.assertEqual(urlobj.href, 'https://example.org/example.txt')
151 | 
152 |     def test_class_invalid(self):
153 |         with self.assertRaises(ValueError):
154 |             URL('bogus')
155 | 
156 |     def test_class_can_parse(self):
157 |         for url, expected in (
158 |             (1, False),
159 |             (None, False),
160 |             ('bogus', False),
161 |             ('https://example.org', True),
162 |         ):
163 |             with self.subTest(url=url):
164 |                 actual = URL.can_parse(url)
165 |                 self.assertEqual(actual, expected)
166 | 
167 |     def test_class_can_parse_with_base(self):
168 |         url = 'example.txt'
169 |         for base, expected in (
170 |             ('https://example.org', True),
171 |             (1, False),
172 |             (None, False),
173 |             ('bogus', False),
174 |         ):
175 |             with self.subTest(url=url):
176 |                 actual = URL.can_parse(url, base)
177 |                 self.assertEqual(actual, expected)
178 | 
179 |     def test_class_dir(self):
180 |         urlobj = URL('https://example.org')
181 |         actual = set(dir(urlobj))
182 |         self.assertTrue(actual.issuperset(GET_ATTRIBUTES))
183 | 
184 |     def test_to_str(self):
185 |         urlobj = URL('https://example.org/../something.txt')
186 |         actual = str(urlobj)
187 |         expected = 'https://example.org/something.txt'
188 |         self.assertEqual(actual, expected)
189 | 
190 |     def test_to_repr(self):
191 |         urlobj = URL('https://example.org/../something.txt')
192 |         actual = repr(urlobj)
193 |         expected = '<URL "https://example.org/something.txt">'
194 |         self.assertEqual(actual, expected)
195 | 
196 |     def test_check_url(self):
197 |         for s, expected in (
198 |             ('https:example.org', True),
199 |             ('https://////example.com/// ', True),
200 |             ('https://example.com/././foo', True),
201 |             ('file:///C|/demo', True),
202 |             ('https://127.0.0.1./', True),
203 |             ('bogus', False),
204 |             ('https://exa%23mple.org', False),
205 |             ('foo://exa[mple.org', False),
206 |             ('https://127.0.0.0.1./', False),
207 |             (None, False),
208 |             (1, False),
209 |             ('', False),
210 |             ('\n', False),
211 |         ):
212 |             with self.subTest(s=s):
213 |                 actual = check_url(s)
214 |                 self.assertEqual(actual, expected)
215 | 
216 |     def test_join_url(self):
217 |         # Tests from https://www.rfc-editor.org/rfc/rfc3986.html
218 |         # sections 5.4.1. and 5.4.2
219 |         base_url = 'http://a/b/c/d;p?q'
220 |         for s, expected in (
221 |             ('g:h', 'g:h'),
222 |             ('g', 'http://a/b/c/g'),
223 |             ('./g', 'http://a/b/c/g'),
224 |             ('g/', 'http://a/b/c/g/'),
225 |             ('/g', 'http://a/g'),
226 |             ('//g', 'http://g/'),  # Slightly different output, trailing /
227 |             ('?y', 'http://a/b/c/d;p?y'),
228 |             ('g?y', 'http://a/b/c/g?y'),
229 |             ('#s', 'http://a/b/c/d;p?q#s'),
230 |             ('g#s', 'http://a/b/c/g#s'),
231 |             ('g?y#s', 'http://a/b/c/g?y#s'),
232 |             (';x', 'http://a/b/c/;x'),
233 |             ('g;x', 'http://a/b/c/g;x'),
234 |             ('g;x?y#s', 'http://a/b/c/g;x?y#s'),
235 |             ('', 'http://a/b/c/d;p?q'),
236 |             ('.', 'http://a/b/c/'),
237 |             ('./', 'http://a/b/c/'),
238 |             ('..', 'http://a/b/'),
239 |             ('../', 'http://a/b/'),
240 |             ('../g', 'http://a/b/g'),
241 |             ('../..', 'http://a/'),
242 |             ('../../', 'http://a/'),
243 |             ('../../g', 'http://a/g'),
244 |             ('/./g', 'http://a/g'),
245 |             ('/../g', 'http://a/g'),
246 |             ('g.', 'http://a/b/c/g.'),
247 |             ('.g', 'http://a/b/c/.g'),
248 |             ('g..', 'http://a/b/c/g..'),
249 |             ('..g', 'http://a/b/c/..g'),
250 |             ('./../g', 'http://a/b/g'),
251 |             ('./g/.', 'http://a/b/c/g/'),
252 |             ('g/./h', 'http://a/b/c/g/h'),
253 |             ('g/../h', 'http://a/b/c/h'),
254 |             ('g;x=1/./y', 'http://a/b/c/g;x=1/y'),
255 |             ('g;x=1/../y', 'http://a/b/c/y'),
256 |             ('g?y/./x', 'http://a/b/c/g?y/./x'),
257 |             ('g?y/../x', 'http://a/b/c/g?y/../x'),
258 |             ('g#s/./x', 'http://a/b/c/g#s/./x'),
259 |             ('g#s/../x', 'http://a/b/c/g#s/../x'),
260 |         ):
261 |             with self.subTest(s=s):
262 |                 actual = join_url(base_url, s)
263 |                 self.assertEqual(actual, expected)
264 | 
265 |     def test_join_url_invalid(self):
266 |         for base_url, s in (
267 |             (1, './g'),
268 |             ('https://example.org', 1),
269 |             ('bogus', './g'),
270 |         ):
271 |             with self.subTest(base_url=base_url, s=s):
272 |                 with self.assertRaises(ValueError):
273 |                     join_url(base_url, s)
274 | 
275 |     def test_normalize_url(self):
276 |         for s, expected in (
277 |             ('https://example.org', 'https://example.org/'),
278 |             ('https://example.org/../yolo.txt', 'https://example.org/yolo.txt'),
279 |             ('https://example.org/dir/../yolo.txt', 'https://example.org/yolo.txt'),
280 |             (
281 |                 'https://example.org/dir_1/dir_2/../../yolo.txt',
282 |                 'https://example.org/yolo.txt',
283 |             ),
284 |             (
285 |                 'https://example.org/dir_1/dir_2/../../../yolo.txt',
286 |                 'https://example.org/yolo.txt',
287 |             ),
288 |             (
289 |                 'https://example.org/dir_1/dir_2/../..//yolo.txt',
290 |                 'https://example.org//yolo.txt',
291 |             ),
292 |         ):
293 |             with self.subTest(s=s):
294 |                 actual = normalize_url(s)
295 |                 self.assertEqual(actual, expected)
296 | 
297 |     def test_normalize_url_error(self):
298 |         for s in (1, 'bogus'):
299 |             with self.subTest(s=s):
300 |                 with self.assertRaises(ValueError):
301 |                     normalize_url(s)
302 | 
303 |     def test_parse_url(self):
304 |         s = 'https://user_1:password_1@example.org:8080/dir/../api?q=1#frag'
305 |         actual = parse_url(s)
306 |         expected = {
307 |             'href': 'https://user_1:password_1@example.org:8080/api?q=1#frag',
308 |             'username': 'user_1',
309 |             'password': 'password_1',
310 |             'protocol': 'https:',
311 |             'host': 'example.org:8080',
312 |             'port': '8080',
313 |             'hostname': 'example.org',
314 |             'pathname': '/api',
315 |             'search': '?q=1',
316 |             'hash': '#frag',
317 |             'origin': 'https://example.org:8080',
318 |             'host_type': HostType(0),
319 |             'scheme_type': SchemeType(2),
320 |         }
321 |         self.assertEqual(actual, expected)
322 | 
323 |     def test_parse_url_subset(self):
324 |         s = 'https://user_1:password_1@example.org:8080/dir/../api?q=1#frag'
325 |         actual = parse_url(s, attributes=('username', 'password'))
326 |         expected = {'username': 'user_1', 'password': 'password_1'}
327 |         self.assertEqual(actual, expected)
328 | 
329 |     def test_parse_url_error(self):
330 |         for s in (1, 'bogus'):
331 |             with self.subTest(s=s):
332 |                 with self.assertRaises(ValueError):
333 |                     parse_url(s)
334 | 
335 |     def test_replace_url(self):
336 |         s = 'https://www.example.org/yolo.txt?q=1#2'
337 |         for kwargs, expected in (
338 |             (
339 |                 {'username': 'user', 'password': 'pass'},
340 |                 'https://user:pass@www.example.org/yolo.txt?q=1#2',
341 |             ),
342 |             ({'protocol': 'http:'}, 'http://www.example.org/yolo.txt?q=1#2'),
343 |             ({'protocol': 'http'}, 'http://www.example.org/yolo.txt?q=1#2'),
344 |             ({'port': '80'}, 'https://www.example.org:80/yolo.txt?q=1#2'),
345 |             ({'host': 'www.example.com'}, 'https://www.example.com/yolo.txt?q=1#2'),
346 |             ({'hostname': 'example.com'}, 'https://example.com/yolo.txt?q=1#2'),
347 |             ({'search': '?q=0'}, 'https://www.example.org/yolo.txt?q=0#2'),
348 |             ({'hash': '0'}, 'https://www.example.org/yolo.txt?q=1#0'),
349 |         ):
350 |             with self.subTest(kwargs=kwargs):
351 |                 actual = replace_url(s, **kwargs)
352 |                 self.assertEqual(actual, expected)
353 | 
354 |     def test_replace_url_clear(self):
355 |         s = 'https://user_1:password_1@example.org:8443/api?q=1#frag'
356 |         actual = replace_url(s, port='', hash='', search='')
357 |         expected = 'https://user_1:password_1@example.org/api'
358 |         self.assertEqual(actual, expected)
359 | 
360 |     def test_replace_url_unset(self):
361 |         s = 'https://user:pass@example.org'
362 |         actual = replace_url(s, username='', password='')
363 |         expected = 'https://example.org/'
364 |         self.assertEqual(actual, expected)
365 | 
366 |     def test_replace_href(self):
367 |         s = 'https://username:password@www.google.com:8080/'
368 |         kwargs = {
369 |             'href': 'https://www.yagiz.co',
370 |             'hash': 'new-hash',
371 |             'hostname': 'new-host',
372 |             'host': 'changed-host:9090',
373 |             'pathname': 'new-pathname',
374 |             'search': 'new-search',
375 |             'protocol': 'wss',
376 |         }
377 |         actual = replace_url(s, **kwargs)
378 |         expected = 'wss://changed-host:9090/new-pathname?new-search#new-hash'
379 |         self.assertEqual(actual, expected)
380 | 
381 |     def test_replace_url_error(self):
382 |         for s, kwargs in (
383 |             (1, {}),
384 |             ('bogus', {}),
385 |             ('http://localhost/', {'password': 1}),
386 |             ('http://localhost/', {'hostname': 'exa[mple.org'}),
387 |         ):
388 |             with self.subTest(s=s, kwargs=kwargs):
389 |                 with self.assertRaises(ValueError):
390 |                     replace_url(s, **kwargs)
391 | 
392 |     def test_idna_decode(self):
393 |         self.assertEqual(idna.decode('xn--meagefactory-m9a.ca'), 'meßagefactory.ca')
394 |         self.assertEqual(
395 |             idna_to_unicode(b'xn--meagefactory-m9a.ca'), 'meßagefactory.ca'
396 |         )
397 | 
398 |     def test_idna_encode(self):
399 |         self.assertEqual(idna.encode('meßagefactory.ca'), b'xn--meagefactory-m9a.ca')
400 |         self.assertEqual(
401 |             idna_to_ascii('meßagefactory.ca'.encode('utf-8')),
402 |             b'xn--meagefactory-m9a.ca',
403 |         )
404 | 
405 | 
406 | class SearchParamsTests(TestCase):
407 |     def test_append(self):
408 |         search_params = SearchParams('key1=value1&key1=value2&key2=value3')
409 |         search_params.append('key2', 'value4')
410 |         search_params.append('key3', 'value5')
411 |         actual = list(search_params.items())
412 |         expected = [
413 |             ('key1', 'value1'),
414 |             ('key1', 'value2'),
415 |             ('key2', 'value3'),
416 |             ('key2', 'value4'),
417 |             ('key3', 'value5'),
418 |         ]
419 |         self.assertEqual(actual, expected)
420 | 
421 |     def test_delete_key(self):
422 |         search_params = SearchParams('key1=value1&key1=value2&key2=value3')
423 |         search_params.delete('key1')
424 |         search_params.delete('key3')
425 |         actual = list(search_params.items())
426 |         expected = [('key2', 'value3')]
427 |         self.assertEqual(actual, expected)
428 | 
429 |     def test_delete_value(self):
430 |         search_params = SearchParams('key1=value1&key1=value2&key2=value3')
431 |         search_params.delete('key1', 'value1')
432 |         search_params.delete('key1', 'value4')
433 |         search_params.delete('key3', 'value5')
434 |         actual = list(search_params.items())
435 |         expected = [('key1', 'value2'), ('key2', 'value3')]
436 |         self.assertEqual(actual, expected)
437 | 
438 |     def test_get(self):
439 |         search_params = SearchParams('key1=value1&key1=value2&key2=value3')
440 |         self.assertEqual(search_params.get('key1'), 'value1')
441 |         self.assertEqual(search_params.get('key2'), 'value3')
442 |         self.assertEqual(search_params.get('key3'), '')
443 | 
444 |     def test_get_all(self):
445 |         search_params = SearchParams('key1=value1&key1=value2&key2=value3')
446 |         self.assertEqual(search_params.get_all('key1'), ['value1', 'value2'])
447 |         self.assertEqual(search_params.get_all('key2'), ['value3'])
448 | 
449 |     def test_has_key(self):
450 |         search_params = SearchParams('key1=value1&key1=value2&key2=value3')
451 |         self.assertTrue(search_params.has('key1'))
452 |         self.assertTrue(search_params.has('key2'))
453 |         self.assertFalse(search_params.has('key3'))
454 | 
455 |     def test_has_value(self):
456 |         search_params = SearchParams('key1=value1&key1=value2&key2=value3')
457 |         self.assertTrue(search_params.has('key1', 'value1'))
458 |         self.assertTrue(search_params.has('key1', 'value2'))
459 |         self.assertTrue(search_params.has('key2', 'value3'))
460 |         self.assertFalse(search_params.has('key1', 'value4'))
461 |         self.assertFalse(search_params.has('key2', 'value5'))
462 |         self.assertFalse(search_params.has('key3', 'value6'))
463 | 
464 |     def test_items(self):
465 |         search_params = SearchParams('key1=value1&key1=value2&key2=value3')
466 |         actual = list(search_params.items())
467 |         expected = [('key1', 'value1'), ('key1', 'value2'), ('key2', 'value3')]
468 |         self.assertEqual(actual, expected)
469 | 
470 |     def test_size(self):
471 |         search_params = SearchParams('key1=value1&key1=value2&key2=value3')
472 |         self.assertEqual(search_params.size, 3)
473 | 
474 |     def test_keys(self):
475 |         search_params = SearchParams('key1=value1&key1=value2&key2=value3')
476 |         actual = list(search_params.keys())
477 |         expected = ['key1', 'key1', 'key2']
478 |         self.assertEqual(actual, expected)
479 | 
480 |     def test_repr(self):
481 |         search_params = SearchParams('key1=value1')
482 |         actual = repr(search_params)
483 |         expected = '<SearchParams "key1=value1">'
484 |         self.assertEqual(actual, expected)
485 | 
486 |     def test_set(self):
487 |         search_params = SearchParams('key1=value1&key1=value2&key2=value3')
488 |         search_params.set('key1', 'value4')
489 |         search_params.set('key3', 'value5')
490 |         actual = list(search_params.items())
491 |         expected = [('key1', 'value4'), ('key2', 'value3'), ('key3', 'value5')]
492 |         self.assertEqual(actual, expected)
493 | 
494 |     def test_sort(self):
495 |         search_params = SearchParams('key2=value2&key1=value1&key3=value3')
496 |         search_params.sort()
497 |         actual = list(search_params.items())
498 |         expected = [('key1', 'value1'), ('key2', 'value2'), ('key3', 'value3')]
499 |         self.assertEqual(actual, expected)
500 | 
501 |     def test_str(self):
502 |         params = 'key2=value2&key1=value1&key3=value3'
503 |         search_params = SearchParams(params)
504 |         self.assertEqual(str(search_params), params)
505 | 
506 |     def test_values(self):
507 |         search_params = SearchParams('key1=value1&key1=value2&key2=value3')
508 |         actual = list(search_params.values())
509 |         expected = ['value1', 'value2', 'value3']
510 |         self.assertEqual(actual, expected)
511 | 
512 |     def test_parse_search_params(self):
513 |         s = 'key1=value1&key1=value2&key2=value3'
514 |         actual = parse_search_params(s)
515 |         expected = {'key1': ['value1', 'value2'], 'key2': ['value3']}
516 |         self.assertEqual(actual, expected)
517 | 
518 |     def test_replace_search_params(self):
519 |         s = 'key1=value1&key1=value2&key2=value3'
520 |         actual = replace_search_params(s, ('key1', 'value4'), ('key1', 'value5'))
521 |         expected = 'key2=value3&key1=value4&key1=value5'
522 |         self.assertEqual(actual, expected)
523 | 
524 | 
525 | class ParseTests(TestCase):
526 |     def test_url_suite(self):
527 |         with open(URL_TEST_DATA_PATH, 'rb') as f:
528 |             test_data = load(f)
529 | 
530 |         for i, item in enumerate(test_data, 1):
531 |             # Skip the comments
532 |             if isinstance(item, str):
533 |                 continue
534 | 
535 |             # Skip tests that can't be represented properly with the json module
536 |             try:
537 |                 (item.get('input') or '').encode('utf-8')
538 |                 (item.get('base') or '').encode('utf-8')
539 |             except UnicodeEncodeError:
540 |                 continue
541 | 
542 |             with self.subTest(i=i):
543 |                 s = item['input']
544 |                 base = item.get('base', None)
545 |                 if item.get('failure', False):
546 |                     with self.assertRaises(ValueError):
547 |                         URL(s, base=base)
548 |                 else:
549 |                     urlobj = URL(s, base=base)
550 |                     self.assertEqual(urlobj.href, item['href'])
551 | 


--------------------------------------------------------------------------------
/update-wpt.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | set -e
 3 | 
 4 | BASE_DIR=$(pwd)
 5 | WPT_DIR="$BASE_DIR/tests/files"
 6 | 
 7 | WORKSPACE=$(mktemp -d 2> /dev/null || mktemp -d -t 'tmp')
 8 | 
 9 | cleanup () {
10 |   EXIT_CODE=$?
11 |   [ -d "$WORKSPACE" ] && rm -rf "$WORKSPACE"
12 |   exit $EXIT_CODE
13 | }
14 | 
15 | trap cleanup INT TERM EXIT
16 | 
17 | cd "$WORKSPACE"
18 | git clone \
19 |   --no-checkout \
20 |   --depth=1 \
21 |   --filter=blob:none \
22 |   --sparse \
23 |   https://github.com/web-platform-tests/wpt.git wpt
24 | cd wpt
25 | git sparse-checkout add "url/resources"
26 | git checkout
27 | cp url/resources/urltestdata.json "$WPT_DIR"
28 | 


--------------------------------------------------------------------------------
/update_ada.py:
--------------------------------------------------------------------------------
 1 | """
 2 | update_ada.py
 3 | 
 4 | Run this script to pull in the latest version of `ada-url/ada` single
 5 | header package.
 6 | """
 7 | 
 8 | from io import BytesIO
 9 | from os.path import dirname, join
10 | from zipfile import ZipFile
11 | 
12 | from certifi import where
13 | from urllib3 import PoolManager
14 | 
15 | 
16 | RELEASE_URL = 'https://github.com/ada-url/ada/releases/latest/download/singleheader.zip'
17 | TARGET_DIR = join(dirname(__file__), 'ada_url/')
18 | 
19 | 
20 | def main():
21 |     http_client = PoolManager(cert_reqs='CERT_REQUIRED', ca_certs=where())
22 |     resp = http_client.request('GET', RELEASE_URL)
23 |     with BytesIO(resp.data) as f, ZipFile(f) as z:
24 |         for file_name in ('ada.cpp', 'ada.h', 'ada_c.h'):
25 |             z.extract(file_name, TARGET_DIR)
26 | 
27 | 
28 | if __name__ == '__main__':
29 |     main()
30 | 


--------------------------------------------------------------------------------
/update_sdist.py:
--------------------------------------------------------------------------------
 1 | """
 2 | update_sdist.py
 3 | 
 4 | Run this script to remove compiled artifacts from source distribution tarballs.
 5 | """
 6 | from pathlib import Path
 7 | from tarfile import open as tar_open
 8 | from tempfile import TemporaryDirectory
 9 | 
10 | REMOVE_FILES = frozenset(['ada_url/ada.o'])
11 | 
12 | 
13 | def update_archive(file_path, removals):
14 |     with TemporaryDirectory() as temp_dir:
15 |         with tar_open(file_path, mode='r:gz') as tf:
16 |             tf.extractall(temp_dir)
17 | 
18 |         dir_path = next(Path(temp_dir).glob('ada_url-*'))
19 |         all_files = []
20 |         for file_path in Path(temp_dir).glob('**/*'):
21 |             if file_path.is_dir():
22 |                 continue
23 |             if str(file_path.relative_to(dir_path)) in REMOVE_FILES:
24 |                 continue
25 |             all_files.append(file_path)
26 | 
27 |         with tar_open(file_path, mode='w:gz') as tf:
28 |             for file_path in all_files:
29 |                 arcname = file_path.relative_to(temp_dir)
30 |                 print(arcname)
31 |                 tf.add(file_path, arcname=arcname)
32 | 
33 | 
34 | if __name__ == '__main__':
35 |     for file_path in Path().glob('dist/*.tar.gz'):
36 |         update_archive(file_path, REMOVE_FILES)
37 |         print(f'Updated {file_path}')
38 | 


--------------------------------------------------------------------------------