├── .github ├── dependabot.yml └── workflows │ ├── build.yml │ ├── lint.yml │ └── unit-tests.yml ├── .gitignore ├── .readthedocs.yaml ├── LICENSE ├── MANIFEST.in ├── Makefile ├── README.rst ├── ada_url ├── __init__.py ├── ada.cpp ├── ada.h ├── ada_adapter.py ├── ada_build.py ├── ada_c.h └── py.typed ├── benchmark.py ├── docs ├── Makefile ├── conf.py ├── index.rst ├── make.bat └── requirements.txt ├── pyproject.toml ├── requirements ├── base.txt └── development.txt ├── setup.py ├── tests ├── __init__.py ├── files │ └── urltestdata.json └── test_ada_url.py ├── update-wpt.sh ├── update_ada.py └── update_sdist.py /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: "pip" 4 | directory: "/requirements" 5 | schedule: 6 | interval: "monthly" 7 | - package-ecosystem: "pip" 8 | directory: "/docs" 9 | schedule: 10 | interval: "monthly" 11 | -------------------------------------------------------------------------------- /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | name: Build 2 | 3 | on: 4 | pull_request: 5 | types: [opened, synchronize, reopened, ready_for_review] 6 | paths-ignore: 7 | - '**.rst' 8 | - 'docs/**' 9 | push: 10 | branches: 11 | - main 12 | paths-ignore: 13 | - '**.rst' 14 | - 'docs/**' 15 | release: 16 | types: 17 | - published 18 | 19 | env: 20 | PIP_BREAK_SYSTEM_PACKAGES: 1 21 | 22 | concurrency: 23 | group: ${{ github.workflow }}-${{ github.ref }} 24 | cancel-in-progress: true 25 | 26 | jobs: 27 | build_wheels: 28 | strategy: 29 | fail-fast: false 30 | matrix: 31 | os: ["ubuntu-latest", "macos-latest", "windows-latest"] 32 | 33 | runs-on: ${{ matrix.os }} 34 | steps: 35 | - uses: actions/checkout@v4 36 | - run: make requirements 37 | - name: Set up QEMU # Needed to build aarch64 wheels 38 | if: runner.os == 'Linux' 39 | uses: docker/setup-qemu-action@v3 40 | with: 41 | platforms: all 42 | - uses: pypa/cibuildwheel@v2.22.0 43 | - uses: actions/upload-artifact@v4 44 | with: 45 | name: artifact-wheel-${{ matrix.os }} 46 | path: wheelhouse/*.whl 47 | 48 | make_sdist: 49 | runs-on: "ubuntu-latest" 50 | steps: 51 | - uses: actions/checkout@v4 52 | - run: | 53 | make requirements 54 | python -m build --no-isolation --sdist 55 | - uses: actions/upload-artifact@v4 56 | with: 57 | name: artifact-sdist 58 | path: dist/*.tar.gz 59 | 60 | merge: 61 | runs-on: ubuntu-latest 62 | needs: [build_wheels, make_sdist] 63 | steps: 64 | - name: Merge Artifacts 65 | uses: actions/upload-artifact/merge@v4 66 | with: 67 | name: artifact 68 | pattern: artifact-* 69 | delete-merged: true 70 | 71 | upload_all: 72 | needs: merge 73 | runs-on: "ubuntu-latest" 74 | environment: release 75 | if: github.event_name == 'release' && github.event.action == 'published' 76 | permissions: 77 | id-token: write 78 | steps: 79 | - uses: actions/download-artifact@v4 80 | with: 81 | name: artifact 82 | path: dist 83 | - uses: pypa/gh-action-pypi-publish@release/v1 84 | -------------------------------------------------------------------------------- /.github/workflows/lint.yml: -------------------------------------------------------------------------------- 1 | name: Lint 2 | 3 | on: 4 | pull_request: 5 | types: [opened, synchronize, reopened, ready_for_review] 6 | paths-ignore: 7 | - '**.rst' 8 | - 'docs/**' 9 | push: 10 | branches: 11 | - main 12 | paths-ignore: 13 | - '**.rst' 14 | - 'docs/**' 15 | 16 | concurrency: 17 | group: ${{ github.workflow }}-${{ github.ref }} 18 | cancel-in-progress: true 19 | 20 | jobs: 21 | lint: 22 | runs-on: ubuntu-latest 23 | steps: 24 | - uses: actions/checkout@v4 25 | - name: Set up Python 26 | uses: actions/setup-python@v5 27 | with: 28 | python-version: "3.9" 29 | - name: Install dependencies 30 | run: | 31 | make requirements 32 | - name: Static analysis 33 | run: | 34 | make check 35 | -------------------------------------------------------------------------------- /.github/workflows/unit-tests.yml: -------------------------------------------------------------------------------- 1 | name: Unit tests 2 | 3 | on: 4 | pull_request: 5 | types: [opened, synchronize, reopened, ready_for_review] 6 | paths-ignore: 7 | - '**.rst' 8 | - 'docs/**' 9 | push: 10 | branches: 11 | - main 12 | paths-ignore: 13 | - '**.rst' 14 | - 'docs/**' 15 | 16 | concurrency: 17 | group: ${{ github.workflow }}-${{ github.ref }} 18 | cancel-in-progress: true 19 | 20 | 21 | env: 22 | MACOSX_DEPLOYMENT_TARGET: "10.15" 23 | 24 | jobs: 25 | build_test: 26 | strategy: 27 | fail-fast: false 28 | matrix: 29 | os: ["ubuntu-latest", "macos-latest"] 30 | 31 | runs-on: ${{ matrix.os }} 32 | 33 | steps: 34 | - uses: actions/checkout@v4 35 | - name: Set up Python 3.9 36 | uses: actions/setup-python@v5 37 | with: 38 | python-version: "3.9" 39 | - name: Install dependencies 40 | run: | 41 | make requirements 42 | - name: Build packages 43 | run: | 44 | make package 45 | - name: Run tests 46 | run: | 47 | pip install -e . 48 | make coverage 49 | - name: Check docs 50 | run: | 51 | make docs 52 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | *.o 9 | 10 | # Distribution / packaging 11 | .Python 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | cover/ 54 | 55 | # Translations 56 | *.mo 57 | *.pot 58 | 59 | # Django stuff: 60 | *.log 61 | local_settings.py 62 | db.sqlite3 63 | db.sqlite3-journal 64 | 65 | # Flask stuff: 66 | instance/ 67 | .webassets-cache 68 | 69 | # Scrapy stuff: 70 | .scrapy 71 | 72 | # Sphinx documentation 73 | docs/_build/ 74 | 75 | # PyBuilder 76 | .pybuilder/ 77 | target/ 78 | 79 | # Jupyter Notebook 80 | .ipynb_checkpoints 81 | 82 | # IPython 83 | profile_default/ 84 | ipython_config.py 85 | 86 | # pyenv 87 | # For a library or package, you might want to ignore these files since the code is 88 | # intended to run in multiple environments; otherwise, check them in: 89 | # .python-version 90 | 91 | # pipenv 92 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 93 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 94 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 95 | # install all needed dependencies. 96 | #Pipfile.lock 97 | 98 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 99 | __pypackages__/ 100 | 101 | # Celery stuff 102 | celerybeat-schedule 103 | celerybeat.pid 104 | 105 | # SageMath parsed files 106 | *.sage.py 107 | 108 | # Environments 109 | .env 110 | .venv 111 | env/ 112 | venv/ 113 | ENV/ 114 | env.bak/ 115 | venv.bak/ 116 | 117 | # Spyder project settings 118 | .spyderproject 119 | .spyproject 120 | 121 | # Rope project settings 122 | .ropeproject 123 | 124 | # mkdocs documentation 125 | /site 126 | 127 | # mypy 128 | .mypy_cache/ 129 | .dmypy.json 130 | dmypy.json 131 | 132 | # Pyre type checker 133 | .pyre/ 134 | 135 | # pytype static type analyzer 136 | .pytype/ 137 | 138 | # Cython debug symbols 139 | cython_debug/ 140 | 141 | # MacOS stuff 142 | .DS_Store 143 | -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | build: 4 | os: ubuntu-22.04 5 | tools: 6 | python: "3.11" 7 | jobs: 8 | pre_install: 9 | - make requirements 10 | - make package 11 | - python -m pip install -e . 12 | sphinx: 13 | configuration: docs/conf.py 14 | 15 | python: 16 | install: 17 | - requirements: docs/requirements.txt 18 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Ada 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include ada_url/*.c 2 | include ada_url/*.cpp 3 | include ada_url/*.h 4 | exclude ada_url/*.o 5 | exclude ada_url/_ada_wrapper.* 6 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: requirements 2 | requirements: 3 | python -m pip install -U -r requirements/development.txt ${req_args} 4 | 5 | .PHONY: check 6 | check: 7 | ruff check . 8 | 9 | .PHONY: format 10 | format: 11 | ruff format . 12 | 13 | .PHONY: coverage 14 | coverage: 15 | coverage run -m unittest 16 | coverage report --show-missing --fail-under 99 17 | 18 | .PHONY: test 19 | test: 20 | python -m unittest -v ${tests} 21 | 22 | .PHONY: docs 23 | docs: 24 | sphinx-build -W -b html docs docs/_build/html 25 | 26 | .PHONY: clean 27 | clean: 28 | rm -rf _build/ 29 | rm -rf _dist/ 30 | rm -rf ada_url.egg-info/ 31 | $(RM) ada_url/_ada_wrapper.abi3.so 32 | $(RM) ada_url/ada.o 33 | 34 | .PHONY: package 35 | package: 36 | python -m build --no-isolation 37 | twine check dist/* 38 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | ada-url 2 | ======== 3 | 4 | The `urlib.parse` module in Python does not follow the legacy RFC 3978 standard nor 5 | does it follow the newer WHATWG URL specification. It is also relatively slow. 6 | 7 | This is ``ada_url``, a fast standard-compliant Python library for working with URLs based on the ``Ada`` URL 8 | parser. 9 | 10 | * `Documentation `__ 11 | * `Development `__ 12 | * `Ada `__ 13 | 14 | Installation 15 | ------------ 16 | 17 | Install from `PyPI `__: 18 | 19 | .. code-block:: sh 20 | 21 | pip install ada_url 22 | 23 | Usage examples 24 | -------------- 25 | 26 | Parsing URLs 27 | ^^^^^^^^^^^^ 28 | 29 | The ``URL`` class is intended to match the one described in the 30 | `WHATWG URL spec `_:. 31 | 32 | .. code-block:: python 33 | 34 | >>> from ada_url import URL 35 | >>> urlobj = URL('https://example.org/path/../file.txt') 36 | >>> urlobj.href 37 | 'https://example.org/path/file.txt' 38 | 39 | The ``parse_url`` function returns a dictionary of all URL elements: 40 | 41 | .. code-block:: python 42 | 43 | >>> from ada_url import parse_url 44 | >>> parse_url('https://user:pass@example.org:80/api?q=1#2') 45 | { 46 | 'href': 'https://user:pass@example.org:80/api?q=1#2', 47 | 'username': 'user', 48 | 'password': 'pass', 49 | 'protocol': 'https:', 50 | 'port': '80', 51 | 'hostname': 'example.org', 52 | 'host': 'example.org:80', 53 | 'pathname': '/api', 54 | 'search': '?q=1', 55 | 'hash': '#2', 56 | 'origin': 'https://example.org:80', 57 | 'host_type': , 58 | 'scheme_type': 59 | } 60 | 61 | Altering URLs 62 | ^^^^^^^^^^^^^ 63 | 64 | Replacing URL components with the ``URL`` class: 65 | 66 | .. code-block:: python 67 | 68 | >>> from ada_url import URL 69 | >>> urlobj = URL('https://example.org/path/../file.txt') 70 | >>> urlobj.host = 'example.com' 71 | >>> urlobj.href 72 | 'https://example.com/file.txt' 73 | 74 | Replacing URL components with the ``replace_url`` function: 75 | 76 | >>> from ada_url import replace_url 77 | >>> replace_url('https://example.org/path/../file.txt', host='example.com') 78 | 'https://example.com/file.txt' 79 | 80 | Search parameters 81 | ^^^^^^^^^^^^^^^^^ 82 | 83 | The ``URLSearchParams`` class is intended to match the one described in the 84 | `WHATWG URL spec `__. 85 | 86 | .. code-block:: python 87 | 88 | >>> from ada_url import URLSearchParams 89 | >>> obj = URLSearchParams('key1=value1&key2=value2') 90 | >>> list(obj.items()) 91 | [('key1', 'value1'), ('key2', 'value2')] 92 | 93 | The ``parse_search_params`` function returns a dictionary of search keys mapped to 94 | value lists: 95 | 96 | .. code-block:: python 97 | 98 | >>> from ada_url import parse_search_params 99 | >>> parse_search_params('key1=value1&key2=value2') 100 | {'key1': ['value1'], 'key2': ['value2']} 101 | 102 | Internationalized domain names 103 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 104 | 105 | The ``idna`` class can encode and decode IDNs: 106 | 107 | .. code-block:: python 108 | 109 | >>> from ada_url import idna 110 | >>> idna.encode('Bücher.example') 111 | b'xn--bcher-kva.example' 112 | >>> idna.decode(b'xn--bcher-kva.example') 113 | 'bücher.example' 114 | 115 | WHATWG URL compliance 116 | --------------------- 117 | 118 | This library is compliant with the WHATWG URL spec. This means, among other things, 119 | that it properly encodes IDNs and resolves paths: 120 | 121 | .. code-block:: python 122 | 123 | >>> from ada_url import URL 124 | >>> parsed_url = URL('https://www.GOoglé.com/./path/../path2/') 125 | >>> parsed_url.hostname 126 | 'www.xn--googl-fsa.com' 127 | >>> parsed_url.pathname 128 | '/path2/' 129 | 130 | Contrast that with the Python standard library's ``urlib.parse`` module: 131 | 132 | .. code-block:: python 133 | 134 | >>> from urllib.parse import urlparse 135 | >>> parsed_url = urlparse('https://www.GOoglé.com/./path/../path2/') 136 | >>> parsed_url.hostname 137 | 'www.googlé.com' 138 | >>> parsed_url.path 139 | '/./path/../path2/' 140 | 141 | Alternative Python bindings 142 | --------------------------- 143 | 144 | This package uses `CFFI `__ to call 145 | the ``Ada`` library's functions, which has a performance cost. 146 | The alternative `can_ada `__ (Canadian Ada) 147 | package uses `pybind11 `__ to generate a 148 | Python extension module, which is more performant. 149 | -------------------------------------------------------------------------------- /ada_url/__init__.py: -------------------------------------------------------------------------------- 1 | from ada_url.ada_adapter import ( 2 | URL, 3 | HostType, 4 | SchemeType, 5 | URLSearchParams, 6 | check_url, 7 | idna, 8 | idna_to_ascii, 9 | idna_to_unicode, 10 | join_url, 11 | normalize_url, 12 | parse_search_params, 13 | parse_url, 14 | replace_search_params, 15 | replace_url, 16 | ) 17 | 18 | __all__ = [ 19 | 'HostType', 20 | 'SchemeType', 21 | 'URL', 22 | 'URLSearchParams', 23 | 'check_url', 24 | 'idna', 25 | 'idna_to_ascii', 26 | 'idna_to_unicode', 27 | 'join_url', 28 | 'normalize_url', 29 | 'parse_search_params', 30 | 'parse_url', 31 | 'replace_search_params', 32 | 'replace_url', 33 | ] 34 | -------------------------------------------------------------------------------- /ada_url/ada_adapter.py: -------------------------------------------------------------------------------- 1 | from enum import IntEnum 2 | from typing import ( 3 | Dict, 4 | Final, 5 | Iterable, 6 | Iterator, 7 | List, 8 | Optional, 9 | Tuple, 10 | TypedDict, 11 | Union, 12 | ) 13 | 14 | from ada_url._ada_wrapper import ffi, lib 15 | 16 | URL_ATTRIBUTES = ( 17 | 'href', 18 | 'username', 19 | 'password', 20 | 'protocol', 21 | 'port', 22 | 'hostname', 23 | 'host', 24 | 'pathname', 25 | 'search', 26 | 'hash', 27 | ) 28 | PARSE_ATTRIBUTES = URL_ATTRIBUTES + ('origin', 'host_type', 'scheme_type') 29 | 30 | # These are the attributes that have corresponding ada_get_* functions 31 | GET_ATTRIBUTES = frozenset(PARSE_ATTRIBUTES) 32 | 33 | # These are the attributes that have corresponding ada_set_* functons 34 | SET_ATTRIBUTES = frozenset(URL_ATTRIBUTES) 35 | 36 | # These are the attributes that can be cleared with one of the ada_clear_* functions 37 | CLEAR_ATTRIBUTES = frozenset(('port', 'hash', 'search')) 38 | 39 | # These are the attributes that must be cleared by setting the empty string 40 | UNSET_ATTRIBUTES = frozenset(('username', 'password', 'pathname')) 41 | 42 | _marker = object() 43 | 44 | 45 | class HostType(IntEnum): 46 | """ 47 | Enum for URL host types: 48 | 49 | * ``DEFAULT`` hosts like ``https://example.org`` are ``0``. 50 | * ``IPV4`` hosts like ``https://192.0.2.1`` are ``1``. 51 | * ``IPV6`` hosts like ``https://[2001:db8::]`` are ``2``. 52 | 53 | .. code-block:: python 54 | 55 | >>> from ada_url import HostType 56 | >>> HostType.DEFAULT 57 | 58 | 59 | """ 60 | 61 | DEFAULT = 0 62 | IPV4 = 1 63 | IPV6 = 2 64 | 65 | 66 | class SchemeType(IntEnum): 67 | """ 68 | Enum for `URL scheme types `__. 69 | 70 | * ``HTTP`` URLs like ``http://example.org`` are ``0``. 71 | * ``NOT_SPECIAL`` URLs like ``git://example.og`` are ``1``. 72 | * ``HTTPS`` URLs like ``https://example.org`` are ``2``. 73 | * ``WS`` URLs like ``ws://example.org`` are ``3``. 74 | * ``FTP`` URLs like ``ftp://example.org`` are ``4``. 75 | * ``WSS`` URLs like ``wss://example.org`` are ``5``. 76 | * ``FILE`` URLs like ``file://example`` are ``6``. 77 | 78 | .. code-block:: python 79 | 80 | >>> from ada_url import SchemeType 81 | >>> SchemeType.HTTPS 82 | 83 | 84 | """ 85 | 86 | HTTP = 0 87 | NOT_SPECIAL = 1 88 | HTTPS = 2 89 | WS = 3 90 | FTP = 4 91 | WSS = 5 92 | FILE = 6 93 | 94 | 95 | class ParseAttributes(TypedDict, total=False): 96 | href: str 97 | username: str 98 | password: str 99 | protocol: str 100 | port: str 101 | hostname: str 102 | host: str 103 | pathname: str 104 | search: str 105 | hash: str 106 | origin: str 107 | host_type: HostType 108 | scheme_type: SchemeType 109 | 110 | 111 | def _get_obj(constructor, destructor, *args): 112 | obj = constructor(*args) 113 | 114 | return ffi.gc(obj, destructor) 115 | 116 | 117 | def _get_str(x): 118 | ret = ffi.string(x.data, x.length).decode() if x.length else '' 119 | return ret 120 | 121 | 122 | class URL: 123 | """ 124 | Parses a *url* (with an optional *base*) according to the 125 | WHATWG URL parsing standard. 126 | 127 | .. code-block:: python 128 | 129 | >>> from ada_url import URL 130 | >>> old_url = 'https://example.org:443/file.txt?q=1' 131 | >>> urlobj = URL(old_url) 132 | >>> urlobj.host 133 | 'example.org' 134 | >>> urlobj.host = 'example.com' 135 | >>> new_url = urlobj.href 136 | >>> new_url 137 | 'https://example.com:443/file.txt?q=1' 138 | 139 | You can read and write the following attributes: 140 | 141 | * ``href`` 142 | * ``protocol`` 143 | * ``username`` 144 | * ``password`` 145 | * ``host`` 146 | * ``hostname`` 147 | * ``port`` 148 | * ``pathname`` 149 | * ``search`` 150 | * ``hash`` 151 | 152 | You can additionally read these attributes: 153 | 154 | * ``origin``, which will be a ``str`` 155 | * ``host_type``, which will be a :class:`HostType` enum 156 | * ``scheme_type``, which will be a :class:`SchemeType` enum 157 | 158 | The class also exposes a static method that checks whether the input 159 | *url* (and optional *base*) can be parsed: 160 | 161 | .. code-block:: python 162 | 163 | >>> url = 'file_2.txt' 164 | >>> base = 'https://example.org:443/file_1.txt' 165 | >>> URL.can_parse(url, base) 166 | True 167 | 168 | See the `WHATWG docs `__ for 169 | more details on the URL class. 170 | 171 | """ 172 | 173 | href: str 174 | username: str 175 | password: str 176 | protocol: str 177 | port: str 178 | hostname: str 179 | host: str 180 | pathname: str 181 | search: str 182 | hash: str 183 | origin: Final[str] 184 | host_type: Final[HostType] 185 | scheme_type: Final[SchemeType] 186 | 187 | def __init__(self, url: str, base: Optional[str] = None): 188 | url_bytes = url.encode() 189 | 190 | if base is None: 191 | self.urlobj = _get_obj( 192 | lib.ada_parse, lib.ada_free, url_bytes, len(url_bytes) 193 | ) 194 | else: 195 | base_bytes = base.encode() 196 | self.urlobj = _get_obj( 197 | lib.ada_parse_with_base, 198 | lib.ada_free, 199 | url_bytes, 200 | len(url_bytes), 201 | base_bytes, 202 | len(base_bytes), 203 | ) 204 | 205 | if not lib.ada_is_valid(self.urlobj): 206 | raise ValueError('Invalid input') 207 | 208 | def __copy__(self): 209 | cls = self.__class__ 210 | ret = cls.__new__(cls) 211 | ret.__dict__.update(self.__dict__) 212 | super(URL, ret).__init__() 213 | return ret 214 | 215 | def __deepcopy__(self, memo): 216 | cls = self.__class__ 217 | ret = cls.__new__(cls) 218 | super(URL, ret).__init__() 219 | ret.urlobj = lib.ada_copy(self.urlobj) 220 | 221 | return ret 222 | 223 | def __delattr__(self, attr: str): 224 | if attr in CLEAR_ATTRIBUTES: 225 | clear_func = getattr(lib, f'ada_clear_{attr}') 226 | clear_func(self.urlobj) 227 | elif attr in UNSET_ATTRIBUTES: 228 | set_func = getattr(lib, f'ada_set_{attr}') 229 | set_func(self.urlobj, b'', 0) 230 | else: 231 | raise AttributeError(f'cannot remove {attr}') 232 | 233 | def __dir__(self) -> List[str]: 234 | return super().__dir__() + list(PARSE_ATTRIBUTES) 235 | 236 | def __getattr__(self, attr: str) -> Union[str, HostType, SchemeType]: 237 | if attr in GET_ATTRIBUTES: 238 | get_func = getattr(lib, f'ada_get_{attr}') 239 | data = get_func(self.urlobj) 240 | if attr == 'origin': 241 | ret = _get_str(data) 242 | lib.ada_free_owned_string(data) 243 | elif attr == 'host_type': 244 | ret = HostType(data) 245 | elif attr == 'scheme_type': 246 | ret = SchemeType(data) 247 | else: 248 | ret = _get_str(data) 249 | 250 | return ret 251 | 252 | raise AttributeError(f'no attribute named {attr}') 253 | 254 | def __setattr__(self, attr: str, value: str) -> None: 255 | if attr in SET_ATTRIBUTES: 256 | try: 257 | value_bytes = value.encode() 258 | except Exception: 259 | raise ValueError(f'Invalid value for {attr}') from None 260 | 261 | set_func = getattr(lib, f'ada_set_{attr}') 262 | ret = set_func(self.urlobj, value_bytes, len(value_bytes)) 263 | if (ret is not None) and (not ret): 264 | raise ValueError(f'Invalid value for {attr}') from None 265 | 266 | return ret 267 | 268 | return super().__setattr__(attr, value) 269 | 270 | def __str__(self): 271 | return self.href 272 | 273 | def __repr__(self): 274 | return f'' 275 | 276 | @staticmethod 277 | def can_parse(url: str, base: Optional[str] = None) -> bool: 278 | try: 279 | url_bytes = url.encode() 280 | except Exception: 281 | return False 282 | 283 | if base is None: 284 | return lib.ada_can_parse(url_bytes, len(url_bytes)) 285 | 286 | try: 287 | base_bytes = base.encode() 288 | except Exception: 289 | return False 290 | 291 | return lib.ada_can_parse_with_base( 292 | url_bytes, len(url_bytes), base_bytes, len(base_bytes) 293 | ) 294 | 295 | 296 | class URLSearchParams: 297 | """ 298 | Parses the given *params* string according to the WHATWG URL parsing standard. 299 | 300 | The attribute and methods from the standard are implemented: 301 | 302 | .. code-block:: python 303 | 304 | >>> from ada_url import URLSearchParams 305 | >>> obj = URLSearchParams('key1=value1&key2=value2&key2=value3') 306 | >>> obj.size 307 | 3 308 | >>> obj.append('key2', 'value4') 309 | >>> str(obj) 310 | 'key1=value1&key2=value2&key2=value3&key2=value4' 311 | >>> obj.delete('key1') 312 | >>> str(obj) 313 | 'key2=value2&key2=value3&key2=value4' 314 | >>> obj.delete('key2', 'value2') 315 | >>> str(obj) 316 | 'key2=value3&key2=value4' 317 | >>> obj.get('key2') 318 | 'value3' 319 | >>> obj.get_all('key2') 320 | ['value3', 'value4'] 321 | >>> obj.has('key2') 322 | True 323 | >>> obj.has('key2', 'value5') 324 | False 325 | >>> obj.set('key1', 'value6') 326 | >>> str(obj) 327 | 'key2=value3&key2=value4&key1=value6' 328 | >>> obj.sort() 329 | >>> str(obj) 330 | 'key1=value6&key2=value3&key2=value4' 331 | 332 | Iterators for the ``keys``, ``values``, and ``items`` are also implemented: 333 | 334 | .. code-block:: python 335 | 336 | >>> obj = URLSearchParams('key1=value1&key2=value2&key2=value3') 337 | >>> list(obj.keys()) 338 | ['key1', 'key2', 'key2'] 339 | >>> list(obj.values()) 340 | ['value1', 'value2', 'value3'] 341 | >>> list(obj.items()) 342 | [('key1', 'value1'), ('key2', 'value2'), ('key2', 'value3')] 343 | 344 | See the `WHATWG docs `__ for 345 | more details on the URLSearchParams class. 346 | 347 | """ 348 | 349 | def __init__(self, params: str): 350 | params_bytes = params.encode() 351 | self.paramsobj = _get_obj( 352 | lib.ada_parse_search_params, 353 | lib.ada_free_search_params, 354 | params_bytes, 355 | len(params_bytes), 356 | ) 357 | 358 | @property 359 | def size(self) -> int: 360 | return lib.ada_search_params_size(self.paramsobj) 361 | 362 | def __len__(self) -> int: 363 | return self.size 364 | 365 | def append(self, key: str, value: str): 366 | key_bytes = key.encode() 367 | value_bytes = value.encode() 368 | lib.ada_search_params_append( 369 | self.paramsobj, 370 | key_bytes, 371 | len(key_bytes), 372 | value_bytes, 373 | len(value_bytes), 374 | ) 375 | 376 | def delete(self, key: str, value: Optional[str] = None): 377 | key_bytes = key.encode() 378 | if value is None: 379 | lib.ada_search_params_remove(self.paramsobj, key_bytes, len(key_bytes)) 380 | else: 381 | value_bytes = value.encode() 382 | lib.ada_search_params_remove_value( 383 | self.paramsobj, 384 | key_bytes, 385 | len(key_bytes), 386 | value_bytes, 387 | len(value_bytes), 388 | ) 389 | 390 | def get(self, key: str) -> str: 391 | key_bytes = key.encode() 392 | item = lib.ada_search_params_get(self.paramsobj, key_bytes, len(key_bytes)) 393 | return _get_str(item) 394 | 395 | def get_all(self, key: str) -> List[str]: 396 | key_bytes = key.encode() 397 | items = lib.ada_search_params_get_all(self.paramsobj, key_bytes, len(key_bytes)) 398 | count = lib.ada_strings_size(items) 399 | 400 | ret = [] 401 | for i in range(count): 402 | value = _get_str(lib.ada_strings_get(items, i)) 403 | ret.append(value) 404 | 405 | return ret 406 | 407 | def has(self, key: str, value: Optional[str] = None) -> bool: 408 | key_bytes = key.encode() 409 | if value is None: 410 | return lib.ada_search_params_has(self.paramsobj, key_bytes, len(key_bytes)) 411 | else: 412 | value_bytes = value.encode() 413 | return lib.ada_search_params_has_value( 414 | self.paramsobj, 415 | key_bytes, 416 | len(key_bytes), 417 | value_bytes, 418 | len(value_bytes), 419 | ) 420 | 421 | def set(self, key: str, value: str): 422 | key_bytes = key.encode() 423 | value_bytes = value.encode() 424 | lib.ada_search_params_set( 425 | self.paramsobj, 426 | key_bytes, 427 | len(key_bytes), 428 | value_bytes, 429 | len(value_bytes), 430 | ) 431 | 432 | def sort(self): 433 | lib.ada_search_params_sort(self.paramsobj) 434 | 435 | def keys(self) -> Iterator[str]: 436 | iterator = _get_obj( 437 | lib.ada_search_params_get_keys, 438 | lib.ada_free_search_params_keys_iter, 439 | self.paramsobj, 440 | ) 441 | while lib.ada_search_params_keys_iter_has_next(iterator): 442 | item = lib.ada_search_params_keys_iter_next(iterator) 443 | yield _get_str(item) 444 | 445 | def values(self) -> Iterator[str]: 446 | iterator = _get_obj( 447 | lib.ada_search_params_get_values, 448 | lib.ada_free_search_params_values_iter, 449 | self.paramsobj, 450 | ) 451 | while lib.ada_search_params_values_iter_has_next(iterator): 452 | item = lib.ada_search_params_values_iter_next(iterator) 453 | yield _get_str(item) 454 | 455 | def items(self) -> Iterator[Tuple[str, str]]: 456 | iterator = _get_obj( 457 | lib.ada_search_params_get_entries, 458 | lib.ada_free_search_params_entries_iter, 459 | self.paramsobj, 460 | ) 461 | while lib.ada_search_params_entries_iter_has_next(iterator): 462 | item = lib.ada_search_params_entries_iter_next(iterator) 463 | yield _get_str(item.key), _get_str(item.value) 464 | 465 | def __repr__(self): 466 | return f'' 467 | 468 | def __str__(self) -> str: 469 | result = _get_obj( 470 | lib.ada_search_params_to_string, lib.ada_free_owned_string, self.paramsobj 471 | ) 472 | return _get_str(result) 473 | 474 | 475 | def check_url(s: str) -> bool: 476 | """ 477 | Returns ``True`` if *s* represents a valid URL, and ``False`` otherwise. 478 | 479 | .. code-block:: python 480 | 481 | >>> from ada_url import check_url 482 | >>> check_url('bogus') 483 | False 484 | >>> check_url('http://a/b/c/d;p?q') 485 | True 486 | 487 | """ 488 | try: 489 | s_bytes = s.encode() 490 | except Exception: 491 | return False 492 | 493 | urlobj = _get_obj(lib.ada_parse, lib.ada_free, s_bytes, len(s_bytes)) 494 | return lib.ada_is_valid(urlobj) 495 | 496 | 497 | def join_url(base_url: str, s: str) -> str: 498 | """ 499 | Return the URL that results from joining *base_url* to *s*. 500 | Raises ``ValueError`` if no valid URL can be constructed. 501 | 502 | .. code-block:: python 503 | 504 | >>> from ada_url import join_url 505 | >>> base_url = 'http://a/b/c/d;p?q' 506 | >>> join_url(base_url, '../g') 507 | 'http://a/b/g' 508 | 509 | """ 510 | try: 511 | base_bytes = base_url.encode() 512 | s_bytes = s.encode() 513 | except Exception: 514 | raise ValueError('Invalid URL') from None 515 | 516 | urlobj = _get_obj( 517 | lib.ada_parse_with_base, 518 | lib.ada_free, 519 | s_bytes, 520 | len(s_bytes), 521 | base_bytes, 522 | len(base_bytes), 523 | ) 524 | if not lib.ada_is_valid(urlobj): 525 | raise ValueError('Invalid URL') from None 526 | 527 | return _get_str(lib.ada_get_href(urlobj)) 528 | 529 | 530 | def normalize_url(s: str) -> str: 531 | """ 532 | Returns a "normalized" URL with all ``'..'`` and ``'/'`` characters resolved. 533 | 534 | .. code-block:: python 535 | 536 | >>> from ada_url import normalize_url 537 | >>> normalize_url('http://a/b/c/../g') 538 | 'http://a/b/g' 539 | 540 | """ 541 | return parse_url(s, attributes=('href',))['href'] 542 | 543 | 544 | def parse_url(s: str, attributes: Iterable[str] = PARSE_ATTRIBUTES) -> ParseAttributes: 545 | """ 546 | Returns a dictionary with the parsed components of the URL represented by *s*. 547 | 548 | .. code-block:: python 549 | 550 | >>> from ada_url import parse_url 551 | >>> url = 'https://user_1:password_1@example.org:8080/dir/../api?q=1#frag' 552 | >>> parse_url(url) 553 | { 554 | 'href': 'https://user_1:password_1@example.org:8080/api?q=1#frag', 555 | 'username': 'user_1', 556 | 'password': 'password_1', 557 | 'protocol': 'https:', 558 | 'host': 'example.org:8080', 559 | 'port': '8080', 560 | 'hostname': 'example.org', 561 | 'pathname': '/api', 562 | 'search': '?q=1', 563 | 'hash': '#frag' 564 | 'origin': 'https://example.org:8080', 565 | 'host_type': 0 566 | 'scheme_type': 2 567 | } 568 | 569 | The names of the dictionary keys correspond to the components of the "URL class" 570 | in the WHATWG URL spec. 571 | ``host_type`` is a :class:`HostType` enum. 572 | ``scheme_type`` is a :class:`SchemeType` enum. 573 | 574 | Pass in a sequence of *attributes* to limit which keys are returned. 575 | 576 | .. code-block:: python 577 | 578 | >>> from ada_url import parse_url 579 | >>> url = 'https://user_1:password_1@example.org:8080/dir/../api?q=1#frag' 580 | >>> parse_url(url, attributes=('protocol')) 581 | {'protocol': 'https:'} 582 | 583 | Unrecognized attributes are ignored. 584 | 585 | """ 586 | try: 587 | s_bytes = s.encode() 588 | except Exception: 589 | raise ValueError('Invalid URL') from None 590 | 591 | ret = {} 592 | urlobj = _get_obj(lib.ada_parse, lib.ada_free, s_bytes, len(s_bytes)) 593 | if not lib.ada_is_valid(urlobj): 594 | raise ValueError('Invalid URL') from None 595 | 596 | for attr in attributes: 597 | get_func = getattr(lib, f'ada_get_{attr}') 598 | data = get_func(urlobj) 599 | if attr == 'origin': 600 | ret[attr] = _get_str(data) 601 | lib.ada_free_owned_string(data) 602 | elif attr == 'host_type': 603 | ret[attr] = HostType(data) 604 | elif attr == 'scheme_type': 605 | ret[attr] = SchemeType(data) 606 | else: 607 | ret[attr] = _get_str(data) 608 | 609 | return ret 610 | 611 | 612 | def replace_url(s: str, **kwargs: str) -> str: 613 | """ 614 | Start with the URL represented by *s*, replace the attributes given in the *kwargs* 615 | mapping, and return a normalized URL with the result. 616 | 617 | Provide an empty string to unset an attribute. 618 | 619 | .. code-block:: python 620 | 621 | >>> from ada_url import replace_url 622 | >>> base_url = 'https://user_1:password_1@example.org/resource' 623 | >>> replace_url(base_url, username='user_2', password='', protocol='http:') 624 | 'http://user_2@example.org/resource' 625 | 626 | Unrecognized attributes are ignored. ``href`` is replaced first if it is given. 627 | ``hostname`` is replaced before ``host`` if both are given. 628 | 629 | ``ValueError`` is raised if the input URL or one of the components is not valid. 630 | """ 631 | try: 632 | s_bytes = s.encode() 633 | except Exception: 634 | raise ValueError('Invalid URL') from None 635 | 636 | urlobj = _get_obj(lib.ada_parse, lib.ada_free, s_bytes, len(s_bytes)) 637 | if not lib.ada_is_valid(urlobj): 638 | raise ValueError('Invalid URL') from None 639 | 640 | # We process attributes in the order given by the documentation, e.g. 641 | # href before anything else. 642 | for attr in URL_ATTRIBUTES: 643 | value = kwargs.get(attr, _marker) 644 | if value is _marker: 645 | continue 646 | 647 | try: 648 | value_bytes = value.encode() 649 | except Exception: 650 | raise ValueError(f'Invalid value for {attr}') from None 651 | 652 | if (not value_bytes) and (attr in CLEAR_ATTRIBUTES): 653 | clear_func = getattr(lib, f'ada_clear_{attr}') 654 | clear_func(urlobj) 655 | else: 656 | set_func = getattr(lib, f'ada_set_{attr}') 657 | set_result = set_func(urlobj, value_bytes, len(value_bytes)) 658 | if (set_result is not None) and (not set_result): 659 | raise ValueError(f'Invalid value for {attr}') from None 660 | 661 | return _get_str(lib.ada_get_href(urlobj)) 662 | 663 | 664 | def parse_search_params(s: str) -> Dict[str, List[str]]: 665 | """ 666 | Returns a dictionary representing the parsed URL Parameters specified by *s*. 667 | The returned dictionary maps each key to a list of values associated with it. 668 | 669 | .. code-block:: python 670 | 671 | >>> from ada_url import parse_search_params 672 | >>> parse_search_params('key1=value1&key1=value2&key2=value3') 673 | {'key1': ['value1', 'value2'], 'key2': ['value3']} 674 | 675 | """ 676 | ret = {} 677 | for key, value in URLSearchParams(s).items(): 678 | if key not in ret: 679 | ret[key] = [value] 680 | else: 681 | ret[key].append(value) 682 | 683 | return ret 684 | 685 | 686 | def replace_search_params(s: str, *args: Tuple[str, str]) -> str: 687 | """ 688 | Returns a string representing the URL parameters specified by *s*, modified by the 689 | ``(key, value)`` pairs passed in as *args*. 690 | 691 | .. code-block:: python 692 | 693 | >>> from ada_url import replace_search_params 694 | >>> replace_search_params( 695 | ... 'key1=value1&key1=value2', 696 | ... ('key1', 'value3'), 697 | ... ('key2', 'value4') 698 | ... ) 699 | 'key1=value3&key2=value4' 700 | """ 701 | search_params = URLSearchParams(s) 702 | for key, value in args: 703 | search_params.delete(key) 704 | 705 | for key, value in args: 706 | search_params.append(key, value) 707 | 708 | return str(search_params) 709 | 710 | 711 | class idna: 712 | """Process international domains according to the UTS #46 standard. 713 | 714 | :func:`idna.encode` implements the UTS #46 ``ToASCII`` operation. 715 | Its output is a Python ``bytes`` object. 716 | It is also available as :func:`idna_to_ascii`. 717 | 718 | .. code-block:: python 719 | 720 | >>> from ada_url import idna 721 | >>> idna.encode('meßagefactory.ca') 722 | b'xn--meagefactory-m9a.ca' 723 | 724 | :func:`idna.decode` implements the UTS #46 ``ToUnicode`` operation. 725 | Its oputput is a Python ``str`` object. 726 | It is also available as :func:`idna_to_unicode`. 727 | 728 | .. code-block:: python 729 | 730 | >>> from ada_url import idna 731 | >>> idna.decode('xn--meagefactory-m9a.ca') 732 | 'meßagefactory.ca' 733 | 734 | Both functions accept either ``str`` or ``bytes`` objects as input. 735 | """ 736 | 737 | @staticmethod 738 | def decode(s: Union[str, bytes]) -> str: 739 | if isinstance(s, str): 740 | s = s.encode('ascii') 741 | 742 | data = _get_obj(lib.ada_idna_to_unicode, lib.ada_free_owned_string, s, len(s)) 743 | return _get_str(data) 744 | 745 | @staticmethod 746 | def encode(s: Union[str, bytes]) -> bytes: 747 | if isinstance(s, str): 748 | s = s.encode() 749 | 750 | val = _get_obj(lib.ada_idna_to_ascii, lib.ada_free_owned_string, s, len(s)) 751 | return ffi.string(val.data, val.length) if val.length else b'' 752 | 753 | 754 | idna_to_unicode = idna.decode 755 | 756 | idna_to_ascii = idna.encode 757 | -------------------------------------------------------------------------------- /ada_url/ada_build.py: -------------------------------------------------------------------------------- 1 | from cffi import FFI 2 | from os.path import dirname, join 3 | from setuptools.extension import Extension 4 | from sys import platform 5 | 6 | file_dir = dirname(__file__) 7 | 8 | compile_args = ['/std:c++20'] if platform == 'win32' else ['-std=c++20'] 9 | 10 | ada_obj = Extension( 11 | 'ada', 12 | define_macros=[('ADA_INCLUDE_URL_PATTERN', '0')], 13 | language="c++", 14 | sources=['ada_url/ada.cpp'], 15 | include_dirs=[file_dir], 16 | extra_compile_args=compile_args, 17 | ) 18 | 19 | libraries = ['stdc++'] if platform == 'linux' else [] 20 | 21 | ffi_builder = FFI() 22 | ffi_builder.set_source( 23 | 'ada_url._ada_wrapper', 24 | '# include "ada_c.h"', 25 | libraries=libraries, 26 | include_dirs=[file_dir], 27 | extra_objects=[ada_obj], 28 | ) 29 | 30 | cdef_lines = [] 31 | with open(join(file_dir, 'ada_c.h'), 'rt') as f: 32 | for line in f: 33 | if not line.startswith('#'): 34 | cdef_lines.append(line) 35 | ffi_builder.cdef(''.join(cdef_lines)) 36 | 37 | if __name__ == '__main__': 38 | ffi_builder.compile() 39 | -------------------------------------------------------------------------------- /ada_url/ada_c.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file ada_c.h 3 | * @brief Includes the C definitions for Ada. This is a C file, not C++. 4 | */ 5 | #ifndef ADA_C_H 6 | #define ADA_C_H 7 | 8 | #include 9 | #include 10 | #include 11 | 12 | // This is a reference to ada::url_components::omitted 13 | // It represents "uint32_t(-1)" 14 | #define ada_url_omitted 0xffffffff 15 | 16 | // string that is owned by the ada_url instance 17 | typedef struct { 18 | const char* data; 19 | size_t length; 20 | } ada_string; 21 | 22 | // string that must be freed by the caller 23 | typedef struct { 24 | const char* data; 25 | size_t length; 26 | } ada_owned_string; 27 | 28 | typedef struct { 29 | uint32_t protocol_end; 30 | uint32_t username_end; 31 | uint32_t host_start; 32 | uint32_t host_end; 33 | uint32_t port; 34 | uint32_t pathname_start; 35 | uint32_t search_start; 36 | uint32_t hash_start; 37 | } ada_url_components; 38 | 39 | typedef void* ada_url; 40 | 41 | // input should be a null terminated C string (ASCII or UTF-8) 42 | // you must call ada_free on the returned pointer 43 | ada_url ada_parse(const char* input, size_t length); 44 | ada_url ada_parse_with_base(const char* input, size_t input_length, 45 | const char* base, size_t base_length); 46 | 47 | // input and base should be a null terminated C strings 48 | bool ada_can_parse(const char* input, size_t length); 49 | bool ada_can_parse_with_base(const char* input, size_t input_length, 50 | const char* base, size_t base_length); 51 | 52 | void ada_free(ada_url result); 53 | void ada_free_owned_string(ada_owned_string owned); 54 | ada_url ada_copy(ada_url input); 55 | 56 | bool ada_is_valid(ada_url result); 57 | 58 | // url_aggregator getters 59 | // if ada_is_valid(result)) is false, an empty string is returned 60 | ada_owned_string ada_get_origin(ada_url result); 61 | ada_string ada_get_href(ada_url result); 62 | ada_string ada_get_username(ada_url result); 63 | ada_string ada_get_password(ada_url result); 64 | ada_string ada_get_port(ada_url result); 65 | ada_string ada_get_hash(ada_url result); 66 | ada_string ada_get_host(ada_url result); 67 | ada_string ada_get_hostname(ada_url result); 68 | ada_string ada_get_pathname(ada_url result); 69 | ada_string ada_get_search(ada_url result); 70 | ada_string ada_get_protocol(ada_url result); 71 | uint8_t ada_get_host_type(ada_url result); 72 | uint8_t ada_get_scheme_type(ada_url result); 73 | 74 | // url_aggregator setters 75 | // if ada_is_valid(result)) is false, the setters have no effect 76 | // input should be a null terminated C string 77 | bool ada_set_href(ada_url result, const char* input, size_t length); 78 | bool ada_set_host(ada_url result, const char* input, size_t length); 79 | bool ada_set_hostname(ada_url result, const char* input, size_t length); 80 | bool ada_set_protocol(ada_url result, const char* input, size_t length); 81 | bool ada_set_username(ada_url result, const char* input, size_t length); 82 | bool ada_set_password(ada_url result, const char* input, size_t length); 83 | bool ada_set_port(ada_url result, const char* input, size_t length); 84 | bool ada_set_pathname(ada_url result, const char* input, size_t length); 85 | void ada_set_search(ada_url result, const char* input, size_t length); 86 | void ada_set_hash(ada_url result, const char* input, size_t length); 87 | 88 | // url_aggregator clear methods 89 | void ada_clear_port(ada_url result); 90 | void ada_clear_hash(ada_url result); 91 | void ada_clear_search(ada_url result); 92 | 93 | // url_aggregator functions 94 | // if ada_is_valid(result) is false, functions below will return false 95 | bool ada_has_credentials(ada_url result); 96 | bool ada_has_empty_hostname(ada_url result); 97 | bool ada_has_hostname(ada_url result); 98 | bool ada_has_non_empty_username(ada_url result); 99 | bool ada_has_non_empty_password(ada_url result); 100 | bool ada_has_port(ada_url result); 101 | bool ada_has_password(ada_url result); 102 | bool ada_has_hash(ada_url result); 103 | bool ada_has_search(ada_url result); 104 | 105 | // returns a pointer to the internal url_aggregator::url_components 106 | const ada_url_components* ada_get_components(ada_url result); 107 | 108 | // idna methods 109 | ada_owned_string ada_idna_to_unicode(const char* input, size_t length); 110 | ada_owned_string ada_idna_to_ascii(const char* input, size_t length); 111 | 112 | // url search params 113 | typedef void* ada_url_search_params; 114 | 115 | // Represents an std::vector 116 | typedef void* ada_strings; 117 | typedef void* ada_url_search_params_keys_iter; 118 | typedef void* ada_url_search_params_values_iter; 119 | 120 | typedef struct { 121 | ada_string key; 122 | ada_string value; 123 | } ada_string_pair; 124 | 125 | typedef void* ada_url_search_params_entries_iter; 126 | 127 | ada_url_search_params ada_parse_search_params(const char* input, size_t length); 128 | void ada_free_search_params(ada_url_search_params result); 129 | 130 | size_t ada_search_params_size(ada_url_search_params result); 131 | void ada_search_params_sort(ada_url_search_params result); 132 | ada_owned_string ada_search_params_to_string(ada_url_search_params result); 133 | 134 | void ada_search_params_append(ada_url_search_params result, const char* key, 135 | size_t key_length, const char* value, 136 | size_t value_length); 137 | void ada_search_params_set(ada_url_search_params result, const char* key, 138 | size_t key_length, const char* value, 139 | size_t value_length); 140 | void ada_search_params_remove(ada_url_search_params result, const char* key, 141 | size_t key_length); 142 | void ada_search_params_remove_value(ada_url_search_params result, 143 | const char* key, size_t key_length, 144 | const char* value, size_t value_length); 145 | bool ada_search_params_has(ada_url_search_params result, const char* key, 146 | size_t key_length); 147 | bool ada_search_params_has_value(ada_url_search_params result, const char* key, 148 | size_t key_length, const char* value, 149 | size_t value_length); 150 | ada_string ada_search_params_get(ada_url_search_params result, const char* key, 151 | size_t key_length); 152 | ada_strings ada_search_params_get_all(ada_url_search_params result, 153 | const char* key, size_t key_length); 154 | void ada_search_params_reset(ada_url_search_params result, const char* input, 155 | size_t length); 156 | ada_url_search_params_keys_iter ada_search_params_get_keys( 157 | ada_url_search_params result); 158 | ada_url_search_params_values_iter ada_search_params_get_values( 159 | ada_url_search_params result); 160 | ada_url_search_params_entries_iter ada_search_params_get_entries( 161 | ada_url_search_params result); 162 | 163 | void ada_free_strings(ada_strings result); 164 | size_t ada_strings_size(ada_strings result); 165 | ada_string ada_strings_get(ada_strings result, size_t index); 166 | 167 | void ada_free_search_params_keys_iter(ada_url_search_params_keys_iter result); 168 | ada_string ada_search_params_keys_iter_next( 169 | ada_url_search_params_keys_iter result); 170 | bool ada_search_params_keys_iter_has_next( 171 | ada_url_search_params_keys_iter result); 172 | 173 | void ada_free_search_params_values_iter( 174 | ada_url_search_params_values_iter result); 175 | ada_string ada_search_params_values_iter_next( 176 | ada_url_search_params_values_iter result); 177 | bool ada_search_params_values_iter_has_next( 178 | ada_url_search_params_values_iter result); 179 | 180 | void ada_free_search_params_entries_iter( 181 | ada_url_search_params_entries_iter result); 182 | ada_string_pair ada_search_params_entries_iter_next( 183 | ada_url_search_params_entries_iter result); 184 | bool ada_search_params_entries_iter_has_next( 185 | ada_url_search_params_entries_iter result); 186 | 187 | #endif // ADA_C_H 188 | -------------------------------------------------------------------------------- /ada_url/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ada-url/ada-python/0097d6319b1e4d17dafe90835bb926274590b65a/ada_url/py.typed -------------------------------------------------------------------------------- /benchmark.py: -------------------------------------------------------------------------------- 1 | from os.path import dirname, join 2 | from json import load 3 | from urllib.parse import urlparse 4 | from time import perf_counter 5 | 6 | from ada_url import URL 7 | 8 | URL_TEST_DATA_PATH = join(dirname(__file__), 'tests/files/urltestdata.json') 9 | 10 | with open(URL_TEST_DATA_PATH, 'rb') as f: 11 | test_data = load(f) 12 | 13 | test_cases = [] 14 | for item in test_data: 15 | if isinstance(item, str) or item.get('failure', False): 16 | continue 17 | test_cases.append(item['href']) 18 | 19 | print('Function', 'msec', 'URLs/msec', sep='\t') 20 | for func_name, func in (('stdlib urlparse', urlparse), ('ada_url URL', URL)): 21 | start_time = perf_counter() 22 | for item in test_cases: 23 | func(item) 24 | duration = perf_counter() - start_time 25 | rate = len(test_cases) / duration 26 | print(func_name, f'{duration * 1000:0.2f}', f'{rate / 1000:0.2f}', sep='\t') 27 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | import sys 4 | 5 | # Put the project package on the path 6 | parent_dir = os.path.abspath('..') 7 | sys.path.insert(0, parent_dir) 8 | 9 | # Copy README.rst so it can be included in index.rst 10 | build_dir = '_build' 11 | os.makedirs(build_dir, exist_ok=True) 12 | 13 | readme_src = os.path.join(parent_dir, 'README.rst') 14 | readme_dst = os.path.join(build_dir, 'README.pprst') 15 | shutil.copyfile(readme_src, readme_dst) 16 | 17 | project = 'ada-url/ada-python' 18 | copyright = '2023, Ada authors' 19 | author = 'Ada authors' 20 | 21 | extensions = ['sphinx.ext.autodoc', 'sphinx.ext.viewcode'] 22 | autodoc_member_order = 'bysource' 23 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] 24 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. include:: ./_build/README.pprst 2 | 3 | Building from source 4 | ==================== 5 | 6 | You will need to have Python 3 development files installed. 7 | On macOS, you will have these if you installed Python with ``brew``. 8 | On Linux, you may need to install some packages (e.g., ``python3-dev`` and ``python3-venv``). 9 | 10 | You will also need a C++ toolchain. 11 | On macOS, Xcode will provide this for you. 12 | On Linux, you may need to install some more pacakges (e.g. ``build-esential``). 13 | 14 | Clone the git repository to a directory for development: 15 | 16 | .. code-block:: sh 17 | 18 | git clone https://github.com/ada-url/ada-python.git ada_url_python 19 | cd ada_url_python 20 | 21 | Create a virtual environment to use for building: 22 | 23 | .. code-block:: sh 24 | 25 | python3 -m venv env 26 | source ./env/bin/activate 27 | 28 | After that, you're ready to build the package: 29 | 30 | .. code-block:: sh 31 | 32 | python -m pip install -r requirements/development.txt 33 | python -m build --no-isolation 34 | 35 | This will create a `.whl` file in the `dist` directory. You can install it in other 36 | virtual environments on the same machine. 37 | 38 | To run tests, first build a package. Then: 39 | 40 | .. code-block:: sh 41 | 42 | python -m pip install -e . 43 | python -m unittest 44 | 45 | Leave the virtual environment with the ``deactivate`` comamnd. 46 | 47 | API Documentation 48 | ================= 49 | 50 | .. automodule:: ada_url 51 | 52 | .. autoclass:: URL(url, base=None) 53 | .. autoclass:: HostType() 54 | .. autoclass:: SchemeType() 55 | 56 | ---- 57 | 58 | .. autofunction:: check_url(s) 59 | .. autofunction:: join_url(base_url, s) 60 | .. autofunction:: normalize_url(s) 61 | .. autofunction:: parse_url(s, [attributes]) 62 | .. autofunction:: replace_url(s, **kwargs) 63 | 64 | ---- 65 | 66 | .. autoclass:: URLSearchParams(params) 67 | .. autoclass:: parse_search_params(s) 68 | .. autoclass:: replace_search_params(s, *args) 69 | 70 | ---- 71 | 72 | .. autoclass:: idna 73 | 74 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | 13 | %SPHINXBUILD% >NUL 2>NUL 14 | if errorlevel 9009 ( 15 | echo. 16 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 17 | echo.installed, then set the SPHINXBUILD environment variable to point 18 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 19 | echo.may add the Sphinx directory to PATH. 20 | echo. 21 | echo.If you don't have Sphinx installed, grab it from 22 | echo.https://www.sphinx-doc.org/ 23 | exit /b 1 24 | ) 25 | 26 | if "%1" == "" goto help 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ada-url/ada-python/0097d6319b1e4d17dafe90835bb926274590b65a/docs/requirements.txt -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["cffi>=1.17.1", "setuptools", "urllib3", "wheel"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [project] 6 | name = "ada-url" 7 | version = "1.23.0" 8 | authors = [ 9 | {name = "Bo Bayles", email = "bo@bbayles.com"}, 10 | ] 11 | description = 'URL parser and manipulator based on the WHAT WG URL standard' 12 | readme = "README.rst" 13 | requires-python = ">=3.9" 14 | license = {text = "Apache 2.0"} 15 | classifiers = [ 16 | "License :: OSI Approved :: Apache Software License", 17 | "Programming Language :: Python :: 3", 18 | "Programming Language :: Python :: 3 :: Only", 19 | ] 20 | dependencies = [ 21 | "cffi", 22 | ] 23 | 24 | [project.urls] 25 | Homepage = "https://www.ada-url.com/" 26 | Documentation = "https://ada-url.readthedocs.io" 27 | Repository = "https://github.com/ada-url/ada-python" 28 | 29 | [tool.setuptools.packages.find] 30 | exclude = ["tests"] 31 | 32 | [tool.setuptools] 33 | include-package-data = true 34 | 35 | [tool.setuptools.package-data] 36 | ada_url = ["*.c", "*.h", "*.o"] 37 | 38 | [tool.ruff] 39 | line-length = 88 40 | target-version = "py39" 41 | exclude = [ 42 | ".git", 43 | ".ruff_cache", 44 | ] 45 | 46 | [tool.ruff.format] 47 | quote-style = "single" 48 | 49 | [tool.ruff.lint] 50 | select = ["E", "F"] 51 | ignore = ["E501"] 52 | 53 | [tool.coverage.run] 54 | include = [ 55 | "ada_url/**", 56 | ] 57 | 58 | [tool.cibuildwheel] 59 | build = [ 60 | "cp39-*", 61 | "cp310-*", 62 | "cp311-*", 63 | "cp312-*", 64 | "cp313-*", 65 | "pp39-*", 66 | "pp310-*", 67 | ] 68 | manylinux-x86_64-image = "manylinux_2_28" 69 | manylinux-aarch64-image = "manylinux_2_28" 70 | 71 | [tool.cibuildwheel.linux] 72 | archs = ["x86_64", "aarch64"] 73 | 74 | [tool.cibuildwheel.macos] 75 | archs = ["x86_64", "universal2", "arm64"] 76 | environment = { MACOSX_DEPLOYMENT_TARGET="10.15" } 77 | before-build = "make clean" 78 | 79 | [tool.cibuildwheel.windows] 80 | archs = ["AMD64"] 81 | 82 | # https://github.com/pypy/pypy/issues/5027 83 | [[tool.cibuildwheel.overrides]] 84 | select = "pp3{9,10}-win_amd64" 85 | environment = { SETUPTOOLS_USE_DISTUTILS="stdlib" } 86 | -------------------------------------------------------------------------------- /requirements/base.txt: -------------------------------------------------------------------------------- 1 | # What we want 2 | cffi==1.17.1 3 | 4 | # What we need 5 | pycparser==2.22 6 | -------------------------------------------------------------------------------- /requirements/development.txt: -------------------------------------------------------------------------------- 1 | build 2 | coverage 3 | ruff 4 | setuptools 5 | Sphinx 6 | twine 7 | urllib3 8 | wheel 9 | 10 | -r base.txt 11 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from setuptools.command.build_ext import build_ext as _build_ext 3 | from setuptools.extension import Extension 4 | 5 | 6 | class build_ext(_build_ext): 7 | def build_extension(self, ext): 8 | for i, extra in enumerate(ext.extra_objects): 9 | if isinstance(extra, Extension): 10 | sources = sorted(extra.sources) 11 | extra_args = extra.extra_compile_args or [] 12 | macros = extra.define_macros[:] 13 | for undef in extra.undef_macros: 14 | macros.append((undef,)) 15 | objects = self.compiler.compile( 16 | sources, 17 | output_dir=self.build_temp, 18 | macros=macros, 19 | include_dirs=extra.include_dirs, 20 | debug=self.debug, 21 | extra_postargs=extra_args, 22 | depends=extra.depends, 23 | ) 24 | ext.extra_objects[i] = objects[0] 25 | return super().build_extension(ext) 26 | 27 | setup( 28 | cmdclass={'build_ext': build_ext}, 29 | cffi_modules=[ 30 | './ada_url/ada_build.py:ffi_builder', 31 | ], 32 | ) 33 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ada-url/ada-python/0097d6319b1e4d17dafe90835bb926274590b65a/tests/__init__.py -------------------------------------------------------------------------------- /tests/test_ada_url.py: -------------------------------------------------------------------------------- 1 | from copy import copy, deepcopy 2 | from json import load 3 | from os.path import dirname, join 4 | from unittest import TestCase 5 | 6 | from ada_url import ( 7 | HostType, 8 | SchemeType, 9 | URLSearchParams as SearchParams, 10 | URL, 11 | check_url, 12 | idna, 13 | idna_to_ascii, 14 | idna_to_unicode, 15 | join_url, 16 | normalize_url, 17 | replace_search_params, 18 | parse_search_params, 19 | parse_url, 20 | replace_url, 21 | ) 22 | from ada_url.ada_adapter import GET_ATTRIBUTES 23 | 24 | URL_TEST_DATA_PATH = join(dirname(__file__), 'files/urltestdata.json') 25 | 26 | 27 | class ADAURLTests(TestCase): 28 | def test_class_get(self): 29 | url = 'https://user_1:password_1@example.org:8080/dir/../api?q=1#frag' 30 | urlobj = URL(url) 31 | self.assertEqual( 32 | urlobj.href, 'https://user_1:password_1@example.org:8080/api?q=1#frag' 33 | ) 34 | self.assertEqual(urlobj.username, 'user_1') 35 | self.assertEqual(urlobj.password, 'password_1') 36 | self.assertEqual(urlobj.protocol, 'https:') 37 | self.assertEqual(urlobj.port, '8080') 38 | self.assertEqual(urlobj.hostname, 'example.org') 39 | self.assertEqual(urlobj.host, 'example.org:8080') 40 | self.assertEqual(urlobj.pathname, '/api') 41 | self.assertEqual(urlobj.search, '?q=1') 42 | self.assertEqual(urlobj.hash, '#frag') 43 | self.assertEqual(urlobj.origin, 'https://example.org:8080') 44 | 45 | with self.assertRaises(AttributeError): 46 | urlobj.bogus 47 | 48 | def test_class_host_type(self): 49 | # host_type should return an IntEnum, which can be compared to a Python int 50 | for url, expected in ( 51 | ('http://localhost:3000', HostType.DEFAULT), 52 | ('http://0.0.0.0', HostType.IPV4), 53 | ('http://[2001:db8:3333:4444:5555:6666:7777:8888]', HostType.IPV6), 54 | ): 55 | with self.subTest(url=url): 56 | urlobj = URL(url) 57 | self.assertEqual(urlobj.host_type, int(expected)) 58 | self.assertEqual(urlobj.host_type, expected) 59 | 60 | def test_class_scheme_type(self): 61 | # host_type should return an IntEnum, which can be compared to a Python int 62 | for url, expected in ( 63 | ('http://localhost', SchemeType.HTTP), 64 | ('git://localhost', SchemeType.NOT_SPECIAL), 65 | ('https://localhost', SchemeType.HTTPS), 66 | ('ws://localhost', SchemeType.WS), 67 | ('ftp://localhost', SchemeType.FTP), 68 | ('wss://localhost', SchemeType.WSS), 69 | ('file://localhost', SchemeType.FILE), 70 | ): 71 | with self.subTest(url=url): 72 | urlobj = URL(url) 73 | self.assertEqual(urlobj.scheme_type, int(expected)) 74 | self.assertEqual(urlobj.scheme_type, expected) 75 | 76 | def test_copy_vs_deepcopy(self): 77 | obj = URL('http://example.org:8080') 78 | copied_obj = copy(obj) 79 | deepcopied_obj = deepcopy(obj) 80 | 81 | obj.port = '8081' 82 | self.assertEqual(copied_obj.port, '8081') 83 | self.assertEqual(deepcopied_obj.port, '8080') 84 | 85 | deepcopied_obj.port = '8082' 86 | self.assertEqual(copied_obj.port, '8081') 87 | self.assertEqual(deepcopied_obj.port, '8082') 88 | 89 | def test_class_set(self): 90 | url = 'https://username:password@www.google.com:8080/' 91 | urlobj = URL(url) 92 | urlobj.href = 'https://www.yagiz.co' 93 | urlobj.hash = 'new-hash' 94 | urlobj.hostname = 'new-host' 95 | urlobj.host = 'changed-host:9090' 96 | urlobj.pathname = 'new-pathname' 97 | urlobj.search = 'new-search' 98 | urlobj.protocol = 'wss' 99 | actual = urlobj.href 100 | 101 | with self.assertRaises(ValueError): 102 | urlobj.hostname = 1 103 | 104 | with self.assertRaises(ValueError): 105 | urlobj.hostname = '127.0.0.0.0.1' 106 | 107 | expected = 'wss://changed-host:9090/new-pathname?new-search#new-hash' 108 | self.assertEqual(actual, expected) 109 | 110 | def test_class_delete(self): 111 | url = 'https://user_1:password_1@example.org:8080/dir/../api?q=1#frag' 112 | urlobj = URL(url) 113 | 114 | del urlobj.port 115 | self.assertEqual( 116 | urlobj.href, 'https://user_1:password_1@example.org/api?q=1#frag' 117 | ) 118 | 119 | del urlobj.hash 120 | self.assertEqual(urlobj.href, 'https://user_1:password_1@example.org/api?q=1') 121 | 122 | del urlobj.pathname 123 | self.assertEqual(urlobj.href, 'https://user_1:password_1@example.org/?q=1') 124 | 125 | del urlobj.search 126 | self.assertEqual(urlobj.href, 'https://user_1:password_1@example.org/') 127 | 128 | with self.assertRaises(AttributeError): 129 | del urlobj.href 130 | 131 | def test_unset(self): 132 | url = 'https://user_1:password_1@example.org:8080/dir/../api?q=1#frag' 133 | for attr, expected in ( 134 | ('username', 'https://:password_1@example.org:8080/api?q=1#frag'), 135 | ('password', 'https://user_1@example.org:8080/api?q=1#frag'), 136 | ('port', 'https://user_1:password_1@example.org/api?q=1#frag'), 137 | ('pathname', 'https://user_1:password_1@example.org:8080/?q=1#frag'), 138 | ('search', 'https://user_1:password_1@example.org:8080/api#frag'), 139 | ('hash', 'https://user_1:password_1@example.org:8080/api?q=1'), 140 | ): 141 | with self.subTest(attr=attr): 142 | urlobj = URL(url) 143 | urlobj.__delattr__(attr) 144 | self.assertEqual(urlobj.href, expected) 145 | 146 | def test_class_with_base(self): 147 | url = '../example.txt' 148 | base = 'https://example.org/path/' 149 | urlobj = URL(url, base) 150 | self.assertEqual(urlobj.href, 'https://example.org/example.txt') 151 | 152 | def test_class_invalid(self): 153 | with self.assertRaises(ValueError): 154 | URL('bogus') 155 | 156 | def test_class_can_parse(self): 157 | for url, expected in ( 158 | (1, False), 159 | (None, False), 160 | ('bogus', False), 161 | ('https://example.org', True), 162 | ): 163 | with self.subTest(url=url): 164 | actual = URL.can_parse(url) 165 | self.assertEqual(actual, expected) 166 | 167 | def test_class_can_parse_with_base(self): 168 | url = 'example.txt' 169 | for base, expected in ( 170 | ('https://example.org', True), 171 | (1, False), 172 | (None, False), 173 | ('bogus', False), 174 | ): 175 | with self.subTest(url=url): 176 | actual = URL.can_parse(url, base) 177 | self.assertEqual(actual, expected) 178 | 179 | def test_class_dir(self): 180 | urlobj = URL('https://example.org') 181 | actual = set(dir(urlobj)) 182 | self.assertTrue(actual.issuperset(GET_ATTRIBUTES)) 183 | 184 | def test_to_str(self): 185 | urlobj = URL('https://example.org/../something.txt') 186 | actual = str(urlobj) 187 | expected = 'https://example.org/something.txt' 188 | self.assertEqual(actual, expected) 189 | 190 | def test_to_repr(self): 191 | urlobj = URL('https://example.org/../something.txt') 192 | actual = repr(urlobj) 193 | expected = '' 194 | self.assertEqual(actual, expected) 195 | 196 | def test_check_url(self): 197 | for s, expected in ( 198 | ('https:example.org', True), 199 | ('https://////example.com/// ', True), 200 | ('https://example.com/././foo', True), 201 | ('file:///C|/demo', True), 202 | ('https://127.0.0.1./', True), 203 | ('bogus', False), 204 | ('https://exa%23mple.org', False), 205 | ('foo://exa[mple.org', False), 206 | ('https://127.0.0.0.1./', False), 207 | (None, False), 208 | (1, False), 209 | ('', False), 210 | ('\n', False), 211 | ): 212 | with self.subTest(s=s): 213 | actual = check_url(s) 214 | self.assertEqual(actual, expected) 215 | 216 | def test_join_url(self): 217 | # Tests from https://www.rfc-editor.org/rfc/rfc3986.html 218 | # sections 5.4.1. and 5.4.2 219 | base_url = 'http://a/b/c/d;p?q' 220 | for s, expected in ( 221 | ('g:h', 'g:h'), 222 | ('g', 'http://a/b/c/g'), 223 | ('./g', 'http://a/b/c/g'), 224 | ('g/', 'http://a/b/c/g/'), 225 | ('/g', 'http://a/g'), 226 | ('//g', 'http://g/'), # Slightly different output, trailing / 227 | ('?y', 'http://a/b/c/d;p?y'), 228 | ('g?y', 'http://a/b/c/g?y'), 229 | ('#s', 'http://a/b/c/d;p?q#s'), 230 | ('g#s', 'http://a/b/c/g#s'), 231 | ('g?y#s', 'http://a/b/c/g?y#s'), 232 | (';x', 'http://a/b/c/;x'), 233 | ('g;x', 'http://a/b/c/g;x'), 234 | ('g;x?y#s', 'http://a/b/c/g;x?y#s'), 235 | ('', 'http://a/b/c/d;p?q'), 236 | ('.', 'http://a/b/c/'), 237 | ('./', 'http://a/b/c/'), 238 | ('..', 'http://a/b/'), 239 | ('../', 'http://a/b/'), 240 | ('../g', 'http://a/b/g'), 241 | ('../..', 'http://a/'), 242 | ('../../', 'http://a/'), 243 | ('../../g', 'http://a/g'), 244 | ('/./g', 'http://a/g'), 245 | ('/../g', 'http://a/g'), 246 | ('g.', 'http://a/b/c/g.'), 247 | ('.g', 'http://a/b/c/.g'), 248 | ('g..', 'http://a/b/c/g..'), 249 | ('..g', 'http://a/b/c/..g'), 250 | ('./../g', 'http://a/b/g'), 251 | ('./g/.', 'http://a/b/c/g/'), 252 | ('g/./h', 'http://a/b/c/g/h'), 253 | ('g/../h', 'http://a/b/c/h'), 254 | ('g;x=1/./y', 'http://a/b/c/g;x=1/y'), 255 | ('g;x=1/../y', 'http://a/b/c/y'), 256 | ('g?y/./x', 'http://a/b/c/g?y/./x'), 257 | ('g?y/../x', 'http://a/b/c/g?y/../x'), 258 | ('g#s/./x', 'http://a/b/c/g#s/./x'), 259 | ('g#s/../x', 'http://a/b/c/g#s/../x'), 260 | ): 261 | with self.subTest(s=s): 262 | actual = join_url(base_url, s) 263 | self.assertEqual(actual, expected) 264 | 265 | def test_join_url_invalid(self): 266 | for base_url, s in ( 267 | (1, './g'), 268 | ('https://example.org', 1), 269 | ('bogus', './g'), 270 | ): 271 | with self.subTest(base_url=base_url, s=s): 272 | with self.assertRaises(ValueError): 273 | join_url(base_url, s) 274 | 275 | def test_normalize_url(self): 276 | for s, expected in ( 277 | ('https://example.org', 'https://example.org/'), 278 | ('https://example.org/../yolo.txt', 'https://example.org/yolo.txt'), 279 | ('https://example.org/dir/../yolo.txt', 'https://example.org/yolo.txt'), 280 | ( 281 | 'https://example.org/dir_1/dir_2/../../yolo.txt', 282 | 'https://example.org/yolo.txt', 283 | ), 284 | ( 285 | 'https://example.org/dir_1/dir_2/../../../yolo.txt', 286 | 'https://example.org/yolo.txt', 287 | ), 288 | ( 289 | 'https://example.org/dir_1/dir_2/../..//yolo.txt', 290 | 'https://example.org//yolo.txt', 291 | ), 292 | ): 293 | with self.subTest(s=s): 294 | actual = normalize_url(s) 295 | self.assertEqual(actual, expected) 296 | 297 | def test_normalize_url_error(self): 298 | for s in (1, 'bogus'): 299 | with self.subTest(s=s): 300 | with self.assertRaises(ValueError): 301 | normalize_url(s) 302 | 303 | def test_parse_url(self): 304 | s = 'https://user_1:password_1@example.org:8080/dir/../api?q=1#frag' 305 | actual = parse_url(s) 306 | expected = { 307 | 'href': 'https://user_1:password_1@example.org:8080/api?q=1#frag', 308 | 'username': 'user_1', 309 | 'password': 'password_1', 310 | 'protocol': 'https:', 311 | 'host': 'example.org:8080', 312 | 'port': '8080', 313 | 'hostname': 'example.org', 314 | 'pathname': '/api', 315 | 'search': '?q=1', 316 | 'hash': '#frag', 317 | 'origin': 'https://example.org:8080', 318 | 'host_type': HostType(0), 319 | 'scheme_type': SchemeType(2), 320 | } 321 | self.assertEqual(actual, expected) 322 | 323 | def test_parse_url_subset(self): 324 | s = 'https://user_1:password_1@example.org:8080/dir/../api?q=1#frag' 325 | actual = parse_url(s, attributes=('username', 'password')) 326 | expected = {'username': 'user_1', 'password': 'password_1'} 327 | self.assertEqual(actual, expected) 328 | 329 | def test_parse_url_error(self): 330 | for s in (1, 'bogus'): 331 | with self.subTest(s=s): 332 | with self.assertRaises(ValueError): 333 | parse_url(s) 334 | 335 | def test_replace_url(self): 336 | s = 'https://www.example.org/yolo.txt?q=1#2' 337 | for kwargs, expected in ( 338 | ( 339 | {'username': 'user', 'password': 'pass'}, 340 | 'https://user:pass@www.example.org/yolo.txt?q=1#2', 341 | ), 342 | ({'protocol': 'http:'}, 'http://www.example.org/yolo.txt?q=1#2'), 343 | ({'protocol': 'http'}, 'http://www.example.org/yolo.txt?q=1#2'), 344 | ({'port': '80'}, 'https://www.example.org:80/yolo.txt?q=1#2'), 345 | ({'host': 'www.example.com'}, 'https://www.example.com/yolo.txt?q=1#2'), 346 | ({'hostname': 'example.com'}, 'https://example.com/yolo.txt?q=1#2'), 347 | ({'search': '?q=0'}, 'https://www.example.org/yolo.txt?q=0#2'), 348 | ({'hash': '0'}, 'https://www.example.org/yolo.txt?q=1#0'), 349 | ): 350 | with self.subTest(kwargs=kwargs): 351 | actual = replace_url(s, **kwargs) 352 | self.assertEqual(actual, expected) 353 | 354 | def test_replace_url_clear(self): 355 | s = 'https://user_1:password_1@example.org:8443/api?q=1#frag' 356 | actual = replace_url(s, port='', hash='', search='') 357 | expected = 'https://user_1:password_1@example.org/api' 358 | self.assertEqual(actual, expected) 359 | 360 | def test_replace_url_unset(self): 361 | s = 'https://user:pass@example.org' 362 | actual = replace_url(s, username='', password='') 363 | expected = 'https://example.org/' 364 | self.assertEqual(actual, expected) 365 | 366 | def test_replace_href(self): 367 | s = 'https://username:password@www.google.com:8080/' 368 | kwargs = { 369 | 'href': 'https://www.yagiz.co', 370 | 'hash': 'new-hash', 371 | 'hostname': 'new-host', 372 | 'host': 'changed-host:9090', 373 | 'pathname': 'new-pathname', 374 | 'search': 'new-search', 375 | 'protocol': 'wss', 376 | } 377 | actual = replace_url(s, **kwargs) 378 | expected = 'wss://changed-host:9090/new-pathname?new-search#new-hash' 379 | self.assertEqual(actual, expected) 380 | 381 | def test_replace_url_error(self): 382 | for s, kwargs in ( 383 | (1, {}), 384 | ('bogus', {}), 385 | ('http://localhost/', {'password': 1}), 386 | ('http://localhost/', {'hostname': 'exa[mple.org'}), 387 | ): 388 | with self.subTest(s=s, kwargs=kwargs): 389 | with self.assertRaises(ValueError): 390 | replace_url(s, **kwargs) 391 | 392 | def test_idna_decode(self): 393 | self.assertEqual(idna.decode('xn--meagefactory-m9a.ca'), 'meßagefactory.ca') 394 | self.assertEqual( 395 | idna_to_unicode(b'xn--meagefactory-m9a.ca'), 'meßagefactory.ca' 396 | ) 397 | 398 | def test_idna_encode(self): 399 | self.assertEqual(idna.encode('meßagefactory.ca'), b'xn--meagefactory-m9a.ca') 400 | self.assertEqual( 401 | idna_to_ascii('meßagefactory.ca'.encode('utf-8')), 402 | b'xn--meagefactory-m9a.ca', 403 | ) 404 | 405 | 406 | class SearchParamsTests(TestCase): 407 | def test_append(self): 408 | search_params = SearchParams('key1=value1&key1=value2&key2=value3') 409 | search_params.append('key2', 'value4') 410 | search_params.append('key3', 'value5') 411 | actual = list(search_params.items()) 412 | expected = [ 413 | ('key1', 'value1'), 414 | ('key1', 'value2'), 415 | ('key2', 'value3'), 416 | ('key2', 'value4'), 417 | ('key3', 'value5'), 418 | ] 419 | self.assertEqual(actual, expected) 420 | 421 | def test_delete_key(self): 422 | search_params = SearchParams('key1=value1&key1=value2&key2=value3') 423 | search_params.delete('key1') 424 | search_params.delete('key3') 425 | actual = list(search_params.items()) 426 | expected = [('key2', 'value3')] 427 | self.assertEqual(actual, expected) 428 | 429 | def test_delete_value(self): 430 | search_params = SearchParams('key1=value1&key1=value2&key2=value3') 431 | search_params.delete('key1', 'value1') 432 | search_params.delete('key1', 'value4') 433 | search_params.delete('key3', 'value5') 434 | actual = list(search_params.items()) 435 | expected = [('key1', 'value2'), ('key2', 'value3')] 436 | self.assertEqual(actual, expected) 437 | 438 | def test_get(self): 439 | search_params = SearchParams('key1=value1&key1=value2&key2=value3') 440 | self.assertEqual(search_params.get('key1'), 'value1') 441 | self.assertEqual(search_params.get('key2'), 'value3') 442 | self.assertEqual(search_params.get('key3'), '') 443 | 444 | def test_get_all(self): 445 | search_params = SearchParams('key1=value1&key1=value2&key2=value3') 446 | self.assertEqual(search_params.get_all('key1'), ['value1', 'value2']) 447 | self.assertEqual(search_params.get_all('key2'), ['value3']) 448 | 449 | def test_has_key(self): 450 | search_params = SearchParams('key1=value1&key1=value2&key2=value3') 451 | self.assertTrue(search_params.has('key1')) 452 | self.assertTrue(search_params.has('key2')) 453 | self.assertFalse(search_params.has('key3')) 454 | 455 | def test_has_value(self): 456 | search_params = SearchParams('key1=value1&key1=value2&key2=value3') 457 | self.assertTrue(search_params.has('key1', 'value1')) 458 | self.assertTrue(search_params.has('key1', 'value2')) 459 | self.assertTrue(search_params.has('key2', 'value3')) 460 | self.assertFalse(search_params.has('key1', 'value4')) 461 | self.assertFalse(search_params.has('key2', 'value5')) 462 | self.assertFalse(search_params.has('key3', 'value6')) 463 | 464 | def test_items(self): 465 | search_params = SearchParams('key1=value1&key1=value2&key2=value3') 466 | actual = list(search_params.items()) 467 | expected = [('key1', 'value1'), ('key1', 'value2'), ('key2', 'value3')] 468 | self.assertEqual(actual, expected) 469 | 470 | def test_size(self): 471 | search_params = SearchParams('key1=value1&key1=value2&key2=value3') 472 | self.assertEqual(search_params.size, 3) 473 | 474 | def test_keys(self): 475 | search_params = SearchParams('key1=value1&key1=value2&key2=value3') 476 | actual = list(search_params.keys()) 477 | expected = ['key1', 'key1', 'key2'] 478 | self.assertEqual(actual, expected) 479 | 480 | def test_repr(self): 481 | search_params = SearchParams('key1=value1') 482 | actual = repr(search_params) 483 | expected = '' 484 | self.assertEqual(actual, expected) 485 | 486 | def test_set(self): 487 | search_params = SearchParams('key1=value1&key1=value2&key2=value3') 488 | search_params.set('key1', 'value4') 489 | search_params.set('key3', 'value5') 490 | actual = list(search_params.items()) 491 | expected = [('key1', 'value4'), ('key2', 'value3'), ('key3', 'value5')] 492 | self.assertEqual(actual, expected) 493 | 494 | def test_sort(self): 495 | search_params = SearchParams('key2=value2&key1=value1&key3=value3') 496 | search_params.sort() 497 | actual = list(search_params.items()) 498 | expected = [('key1', 'value1'), ('key2', 'value2'), ('key3', 'value3')] 499 | self.assertEqual(actual, expected) 500 | 501 | def test_str(self): 502 | params = 'key2=value2&key1=value1&key3=value3' 503 | search_params = SearchParams(params) 504 | self.assertEqual(str(search_params), params) 505 | 506 | def test_values(self): 507 | search_params = SearchParams('key1=value1&key1=value2&key2=value3') 508 | actual = list(search_params.values()) 509 | expected = ['value1', 'value2', 'value3'] 510 | self.assertEqual(actual, expected) 511 | 512 | def test_parse_search_params(self): 513 | s = 'key1=value1&key1=value2&key2=value3' 514 | actual = parse_search_params(s) 515 | expected = {'key1': ['value1', 'value2'], 'key2': ['value3']} 516 | self.assertEqual(actual, expected) 517 | 518 | def test_replace_search_params(self): 519 | s = 'key1=value1&key1=value2&key2=value3' 520 | actual = replace_search_params(s, ('key1', 'value4'), ('key1', 'value5')) 521 | expected = 'key2=value3&key1=value4&key1=value5' 522 | self.assertEqual(actual, expected) 523 | 524 | 525 | class ParseTests(TestCase): 526 | def test_url_suite(self): 527 | with open(URL_TEST_DATA_PATH, 'rb') as f: 528 | test_data = load(f) 529 | 530 | for i, item in enumerate(test_data, 1): 531 | # Skip the comments 532 | if isinstance(item, str): 533 | continue 534 | 535 | # Skip tests that can't be represented properly with the json module 536 | try: 537 | (item.get('input') or '').encode('utf-8') 538 | (item.get('base') or '').encode('utf-8') 539 | except UnicodeEncodeError: 540 | continue 541 | 542 | with self.subTest(i=i): 543 | s = item['input'] 544 | base = item.get('base', None) 545 | if item.get('failure', False): 546 | with self.assertRaises(ValueError): 547 | URL(s, base=base) 548 | else: 549 | urlobj = URL(s, base=base) 550 | self.assertEqual(urlobj.href, item['href']) 551 | -------------------------------------------------------------------------------- /update-wpt.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | set -e 3 | 4 | BASE_DIR=$(pwd) 5 | WPT_DIR="$BASE_DIR/tests/files" 6 | 7 | WORKSPACE=$(mktemp -d 2> /dev/null || mktemp -d -t 'tmp') 8 | 9 | cleanup () { 10 | EXIT_CODE=$? 11 | [ -d "$WORKSPACE" ] && rm -rf "$WORKSPACE" 12 | exit $EXIT_CODE 13 | } 14 | 15 | trap cleanup INT TERM EXIT 16 | 17 | cd "$WORKSPACE" 18 | git clone \ 19 | --no-checkout \ 20 | --depth=1 \ 21 | --filter=blob:none \ 22 | --sparse \ 23 | https://github.com/web-platform-tests/wpt.git wpt 24 | cd wpt 25 | git sparse-checkout add "url/resources" 26 | git checkout 27 | cp url/resources/urltestdata.json "$WPT_DIR" 28 | -------------------------------------------------------------------------------- /update_ada.py: -------------------------------------------------------------------------------- 1 | """ 2 | update_ada.py 3 | 4 | Run this script to pull in the latest version of `ada-url/ada` single 5 | header package. 6 | """ 7 | 8 | from io import BytesIO 9 | from os.path import dirname, join 10 | from zipfile import ZipFile 11 | 12 | from certifi import where 13 | from urllib3 import PoolManager 14 | 15 | 16 | RELEASE_URL = 'https://github.com/ada-url/ada/releases/latest/download/singleheader.zip' 17 | TARGET_DIR = join(dirname(__file__), 'ada_url/') 18 | 19 | 20 | def main(): 21 | http_client = PoolManager(cert_reqs='CERT_REQUIRED', ca_certs=where()) 22 | resp = http_client.request('GET', RELEASE_URL) 23 | with BytesIO(resp.data) as f, ZipFile(f) as z: 24 | for file_name in ('ada.cpp', 'ada.h', 'ada_c.h'): 25 | z.extract(file_name, TARGET_DIR) 26 | 27 | 28 | if __name__ == '__main__': 29 | main() 30 | -------------------------------------------------------------------------------- /update_sdist.py: -------------------------------------------------------------------------------- 1 | """ 2 | update_sdist.py 3 | 4 | Run this script to remove compiled artifacts from source distribution tarballs. 5 | """ 6 | from pathlib import Path 7 | from tarfile import open as tar_open 8 | from tempfile import TemporaryDirectory 9 | 10 | REMOVE_FILES = frozenset(['ada_url/ada.o']) 11 | 12 | 13 | def update_archive(file_path, removals): 14 | with TemporaryDirectory() as temp_dir: 15 | with tar_open(file_path, mode='r:gz') as tf: 16 | tf.extractall(temp_dir) 17 | 18 | dir_path = next(Path(temp_dir).glob('ada_url-*')) 19 | all_files = [] 20 | for file_path in Path(temp_dir).glob('**/*'): 21 | if file_path.is_dir(): 22 | continue 23 | if str(file_path.relative_to(dir_path)) in REMOVE_FILES: 24 | continue 25 | all_files.append(file_path) 26 | 27 | with tar_open(file_path, mode='w:gz') as tf: 28 | for file_path in all_files: 29 | arcname = file_path.relative_to(temp_dir) 30 | print(arcname) 31 | tf.add(file_path, arcname=arcname) 32 | 33 | 34 | if __name__ == '__main__': 35 | for file_path in Path().glob('dist/*.tar.gz'): 36 | update_archive(file_path, REMOVE_FILES) 37 | print(f'Updated {file_path}') 38 | --------------------------------------------------------------------------------