├── .github └── workflows │ ├── release.yml │ └── tests.yml ├── .gitignore ├── .gitmodules ├── LICENSE ├── MANIFEST.in ├── Makefile ├── README.md ├── httptools ├── __init__.py ├── _version.py └── parser │ ├── .gitignore │ ├── __init__.py │ ├── cparser.pxd │ ├── errors.py │ ├── parser.pyx │ ├── python.pxd │ ├── url_cparser.pxd │ └── url_parser.pyx ├── pytest.ini ├── setup.py └── tests ├── __init__.py └── test_parser.py /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Release 2 | 3 | on: 4 | pull_request: 5 | branches: 6 | - "master" 7 | - "ci" 8 | - "[0-9]+.[0-9x]+*" 9 | paths: 10 | - "httptools/_version.py" 11 | 12 | jobs: 13 | validate-release-request: 14 | runs-on: ubuntu-latest 15 | steps: 16 | - name: Validate release PR 17 | uses: edgedb/action-release/validate-pr@bae6b9134e872166b43d218dd79397c851c41c9a 18 | id: checkver 19 | with: 20 | require_team: Release Managers 21 | require_approval: no 22 | github_token: ${{ secrets.RELEASE_BOT_GITHUB_TOKEN }} 23 | version_file: httptools/_version.py 24 | version_line_pattern: | 25 | __version__\s*=\s*(?:['"])([[:PEP440:]])(?:['"]) 26 | 27 | - name: Stop if not approved 28 | if: steps.checkver.outputs.approved != 'true' 29 | run: | 30 | echo ::error::PR is not approved yet. 31 | exit 1 32 | 33 | - name: Store release version for later use 34 | env: 35 | VERSION: ${{ steps.checkver.outputs.version }} 36 | run: | 37 | mkdir -p dist/ 38 | echo "${VERSION}" > dist/VERSION 39 | 40 | - uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 41 | with: 42 | name: dist-version 43 | path: dist/ 44 | 45 | build-sdist: 46 | needs: validate-release-request 47 | runs-on: ubuntu-latest 48 | 49 | steps: 50 | - uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1 51 | with: 52 | fetch-depth: 50 53 | submodules: true 54 | 55 | - name: Set up Python 56 | uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0 57 | 58 | - name: Build source distribution 59 | run: | 60 | python -m pip install -U setuptools wheel pip 61 | python setup.py sdist 62 | 63 | - uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 64 | with: 65 | name: dist-sdist 66 | path: dist/*.tar.* 67 | 68 | build-wheels: 69 | needs: validate-release-request 70 | runs-on: ${{ matrix.os }} 71 | strategy: 72 | matrix: 73 | os: [ubuntu-latest, macos-latest, windows-latest] 74 | cibw_arch: ["auto64", "aarch64", "universal2"] 75 | cibw_python: 76 | - "cp38" 77 | - "cp39" 78 | - "cp310" 79 | - "cp311" 80 | - "cp312" 81 | - "cp313" 82 | exclude: 83 | - os: ubuntu-latest 84 | cibw_arch: universal2 85 | - os: macos-latest 86 | cibw_arch: aarch64 87 | - os: windows-latest 88 | cibw_arch: universal2 89 | - os: windows-latest 90 | cibw_arch: aarch64 91 | 92 | defaults: 93 | run: 94 | shell: bash 95 | 96 | env: 97 | PIP_DISABLE_PIP_VERSION_CHECK: 1 98 | 99 | steps: 100 | - uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1 101 | with: 102 | fetch-depth: 50 103 | submodules: true 104 | 105 | - name: Set up QEMU 106 | if: matrix.os == 'ubuntu-latest' && matrix.cibw_arch == 'aarch64' 107 | uses: docker/setup-qemu-action@49b3bc8e6bdd4a60e6116a5414239cba5943d3cf # v3.2.0 108 | with: 109 | platforms: arm64 110 | 111 | - uses: pypa/cibuildwheel@7940a4c0e76eb2030e473a5f864f291f63ee879b # v2.21.3 112 | env: 113 | CIBW_BUILD_VERBOSITY: 1 114 | CIBW_BUILD: ${{ matrix.cibw_python }}-* 115 | CIBW_ARCHS: ${{ matrix.cibw_arch }} 116 | CIBW_TEST_EXTRAS: "test" 117 | CIBW_TEST_COMMAND: "python {project}/tests/__init__.py" 118 | CIBW_TEST_COMMAND_WINDOWS: "python {project}\\tests\\__init__.py" 119 | CIBW_TEST_SKIP: "*universal2:arm64" 120 | 121 | - uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 122 | with: 123 | name: dist-${{ matrix.os }}-${{ matrix.cibw_arch }}-${{ matrix.cibw_python }} 124 | path: wheelhouse/*.whl 125 | 126 | publish: 127 | needs: [build-sdist, build-wheels] 128 | runs-on: ubuntu-latest 129 | 130 | steps: 131 | - uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1 132 | with: 133 | fetch-depth: 5 134 | submodules: false 135 | 136 | - uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 137 | with: 138 | pattern: dist-* 139 | merge-multiple: true 140 | path: dist/ 141 | 142 | - name: Extract Release Version 143 | id: relver 144 | run: | 145 | set -e 146 | echo ::set-output name=version::$(cat dist/VERSION) 147 | rm dist/VERSION 148 | 149 | - name: Merge and tag the PR 150 | uses: edgedb/action-release/merge@bae6b9134e872166b43d218dd79397c851c41c9a 151 | with: 152 | github_token: ${{ secrets.RELEASE_BOT_GITHUB_TOKEN }} 153 | ssh_key: ${{ secrets.RELEASE_BOT_SSH_KEY }} 154 | gpg_key: ${{ secrets.RELEASE_BOT_GPG_KEY }} 155 | gpg_key_id: "5C468778062D87BF!" 156 | tag_name: v${{ steps.relver.outputs.version }} 157 | 158 | - name: Publish Github Release 159 | uses: elprans/gh-action-create-release@5f9abb8f0677196a76ea77e64341fa8ca31dad4f 160 | env: 161 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 162 | with: 163 | tag_name: v${{ steps.relver.outputs.version }} 164 | release_name: v${{ steps.relver.outputs.version }} 165 | target: ${{ github.event.pull_request.base.ref }} 166 | body: ${{ github.event.pull_request.body }} 167 | draft: false 168 | 169 | - run: | 170 | ls -al dist/ 171 | 172 | - name: Upload to PyPI 173 | uses: pypa/gh-action-pypi-publish@f7600683efdcb7656dec5b29656edb7bc586e597 # v1.10.3 174 | with: 175 | user: __token__ 176 | password: ${{ secrets.PYPI_TOKEN }} 177 | # password: ${{ secrets.TEST_PYPI_TOKEN }} 178 | # repository_url: https://test.pypi.org/legacy/ 179 | -------------------------------------------------------------------------------- /.github/workflows/tests.yml: -------------------------------------------------------------------------------- 1 | name: Tests 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | - ci 8 | pull_request: 9 | branches: 10 | - master 11 | 12 | jobs: 13 | build: 14 | runs-on: ${{ matrix.os }} 15 | strategy: 16 | matrix: 17 | python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13"] 18 | os: [windows-latest, ubuntu-latest, macos-latest] 19 | 20 | env: 21 | PIP_DISABLE_PIP_VERSION_CHECK: 1 22 | 23 | steps: 24 | - uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1 25 | with: 26 | fetch-depth: 50 27 | submodules: true 28 | 29 | - name: Check if release PR. 30 | uses: edgedb/action-release/validate-pr@bae6b9134e872166b43d218dd79397c851c41c9a 31 | id: release 32 | with: 33 | github_token: ${{ secrets.RELEASE_BOT_GITHUB_TOKEN }} 34 | missing_version_ok: yes 35 | version_file: httptools/_version.py 36 | version_line_pattern: | 37 | __version__\s*=\s*(?:['"])([[:PEP440:]])(?:['"]) 38 | 39 | - name: Set up Python ${{ matrix.python-version }} 40 | uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0 41 | if: steps.release.outputs.version == 0 42 | with: 43 | python-version: ${{ matrix.python-version }} 44 | 45 | - name: Test 46 | if: steps.release.outputs.version == 0 47 | run: | 48 | python -m pip install -U pip setuptools wheel 49 | python -m pip install -e .[test] 50 | python -m unittest -v tests.suite 51 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *._* 2 | *.pyc 3 | *.pyo 4 | *.ymlc 5 | *.ymlc~ 6 | *.scssc 7 | *.so 8 | *.pyd 9 | *~ 10 | .#* 11 | .DS_Store 12 | .project 13 | .pydevproject 14 | .settings 15 | .idea 16 | /.ropeproject 17 | \#*# 18 | /pub 19 | /test*.py 20 | /.local 21 | /perf.data* 22 | /config_local.yml 23 | /build 24 | __pycache__/ 25 | .d8_history 26 | /*.egg 27 | /*.egg-info 28 | /dist 29 | /.pytest_cache 30 | /.mypy_cache 31 | /.vscode 32 | .eggs 33 | .venv 34 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "vendor/http-parser"] 2 | path = vendor/http-parser 3 | url = https://github.com/nodejs/http-parser.git 4 | [submodule "vendor/llhttp"] 5 | path = vendor/llhttp 6 | url = https://github.com/nodejs/llhttp.git 7 | branch = release 8 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License 2 | 3 | Copyright (c) 2015 MagicStack Inc. http://magic.io 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | recursive-include httptools *.pxd *.pyx 2 | recursive-include vendor *.c *.h LICENSE* README* 3 | include MANIFEST.in LICENSE 4 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: compile release test distclean clean 2 | 3 | 4 | PYTHON ?= python3 5 | ROOT = $(dir $(realpath $(firstword $(MAKEFILE_LIST)))) 6 | 7 | 8 | compile: 9 | python3 setup.py build_ext --inplace 10 | 11 | 12 | release: compile test 13 | python3 setup.py sdist upload 14 | 15 | 16 | test: compile 17 | python3 -m unittest -v 18 | 19 | clean: 20 | find $(ROOT)/httptools/parser -name '*.c' | xargs rm -f 21 | find $(ROOT)/httptools/parser -name '*.html' | xargs rm -f 22 | 23 | distclean: clean 24 | git --git-dir="$(ROOT)/vendor/http-parser/.git" clean -dfx 25 | git --git-dir="$(ROOT)/vendor/llhttp/.git" clean -dfx 26 | 27 | 28 | testinstalled: 29 | cd /tmp && $(PYTHON) $(ROOT)/tests/__init__.py -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![Tests](https://github.com/MagicStack/httptools/workflows/Tests/badge.svg) 2 | 3 | httptools is a Python binding for the nodejs HTTP parser. 4 | 5 | The package is available on PyPI: `pip install httptools`. 6 | 7 | 8 | # APIs 9 | 10 | httptools contains two classes `httptools.HttpRequestParser`, 11 | `httptools.HttpResponseParser` (fulfilled through 12 | [llhttp](https://github.com/nodejs/llhttp)) and a function for 13 | parsing URLs `httptools.parse_url` (through 14 | [http-parse](https://github.com/nodejs/http-parser) for now). 15 | See unittests for examples. 16 | 17 | 18 | ```python 19 | 20 | class HttpRequestParser: 21 | 22 | def __init__(self, protocol): 23 | """HttpRequestParser 24 | 25 | protocol -- a Python object with the following methods 26 | (all optional): 27 | 28 | - on_message_begin() 29 | - on_url(url: bytes) 30 | - on_header(name: bytes, value: bytes) 31 | - on_headers_complete() 32 | - on_body(body: bytes) 33 | - on_message_complete() 34 | - on_chunk_header() 35 | - on_chunk_complete() 36 | - on_status(status: bytes) 37 | """ 38 | 39 | def get_http_version(self) -> str: 40 | """Return an HTTP protocol version.""" 41 | 42 | def should_keep_alive(self) -> bool: 43 | """Return ``True`` if keep-alive mode is preferred.""" 44 | 45 | def should_upgrade(self) -> bool: 46 | """Return ``True`` if the parsed request is a valid Upgrade request. 47 | The method exposes a flag set just before on_headers_complete. 48 | Calling this method earlier will only yield `False`. 49 | """ 50 | 51 | def feed_data(self, data: bytes): 52 | """Feed data to the parser. 53 | 54 | Will eventually trigger callbacks on the ``protocol`` 55 | object. 56 | 57 | On HTTP upgrade, this method will raise an 58 | ``HttpParserUpgrade`` exception, with its sole argument 59 | set to the offset of the non-HTTP data in ``data``. 60 | """ 61 | 62 | def get_method(self) -> bytes: 63 | """Return HTTP request method (GET, HEAD, etc)""" 64 | 65 | 66 | class HttpResponseParser: 67 | 68 | """Has all methods except ``get_method()`` that 69 | HttpRequestParser has.""" 70 | 71 | def get_status_code(self) -> int: 72 | """Return the status code of the HTTP response""" 73 | 74 | 75 | def parse_url(url: bytes): 76 | """Parse URL strings into a structured Python object. 77 | 78 | Returns an instance of ``httptools.URL`` class with the 79 | following attributes: 80 | 81 | - schema: bytes 82 | - host: bytes 83 | - port: int 84 | - path: bytes 85 | - query: bytes 86 | - fragment: bytes 87 | - userinfo: bytes 88 | """ 89 | ``` 90 | 91 | 92 | # Development 93 | 94 | 1. Clone this repository with 95 | `git clone --recursive git@github.com:MagicStack/httptools.git` 96 | 97 | 2. Create a virtual environment with Python 3: 98 | `python3 -m venv envname` 99 | 100 | 3. Activate the environment with `source envname/bin/activate` 101 | 102 | 4. Install development requirements with `pip install -e .[test]` 103 | 104 | 5. Run `make` and `make test`. 105 | 106 | 107 | # License 108 | 109 | MIT. 110 | -------------------------------------------------------------------------------- /httptools/__init__.py: -------------------------------------------------------------------------------- 1 | from . import parser 2 | from .parser import * # NOQA 3 | 4 | from ._version import __version__ # NOQA 5 | 6 | __all__ = parser.__all__ + ('__version__',) # NOQA 7 | -------------------------------------------------------------------------------- /httptools/_version.py: -------------------------------------------------------------------------------- 1 | # This file MUST NOT contain anything but the __version__ assignment. 2 | # 3 | # When making a release, change the value of __version__ 4 | # to an appropriate value, and open a pull request against 5 | # the correct branch (master if making a new feature release). 6 | # The commit message MUST contain a properly formatted release 7 | # log, and the commit must be signed. 8 | # 9 | # The release automation will: build and test the packages for the 10 | # supported platforms, publish the packages on PyPI, merge the PR 11 | # to the target branch, create a Git tag pointing to the commit. 12 | 13 | __version__ = '0.7.0.dev0' 14 | -------------------------------------------------------------------------------- /httptools/parser/.gitignore: -------------------------------------------------------------------------------- 1 | *.so 2 | *.html 3 | *.c 4 | -------------------------------------------------------------------------------- /httptools/parser/__init__.py: -------------------------------------------------------------------------------- 1 | from .parser import * # NoQA 2 | from .errors import * # NoQA 3 | from .url_parser import * # NoQA 4 | 5 | __all__ = parser.__all__ + errors.__all__ + url_parser.__all__ # NoQA 6 | -------------------------------------------------------------------------------- /httptools/parser/cparser.pxd: -------------------------------------------------------------------------------- 1 | from libc.stdint cimport int32_t, uint8_t, uint16_t, uint64_t 2 | 3 | 4 | cdef extern from "llhttp.h": 5 | struct llhttp__internal_s: 6 | int32_t _index 7 | void *_span_pos0 8 | void *_span_cb0 9 | int32_t error 10 | const char *reason 11 | const char *error_pos 12 | void *data 13 | void *_current 14 | uint64_t content_length 15 | uint8_t type 16 | uint8_t method 17 | uint8_t http_major 18 | uint8_t http_minor 19 | uint8_t header_state 20 | uint16_t flags 21 | uint8_t upgrade 22 | uint16_t status_code 23 | uint8_t finish 24 | void *settings 25 | ctypedef llhttp__internal_s llhttp__internal_t 26 | ctypedef llhttp__internal_t llhttp_t 27 | 28 | ctypedef int (*llhttp_data_cb) (llhttp_t*, 29 | const char *at, 30 | size_t length) except -1 31 | 32 | ctypedef int (*llhttp_cb) (llhttp_t*) except -1 33 | 34 | struct llhttp_settings_s: 35 | llhttp_cb on_message_begin 36 | llhttp_data_cb on_url 37 | llhttp_data_cb on_status 38 | llhttp_data_cb on_header_field 39 | llhttp_data_cb on_header_value 40 | llhttp_cb on_headers_complete 41 | llhttp_data_cb on_body 42 | llhttp_cb on_message_complete 43 | llhttp_cb on_chunk_header 44 | llhttp_cb on_chunk_complete 45 | ctypedef llhttp_settings_s llhttp_settings_t 46 | 47 | enum llhttp_type: 48 | HTTP_BOTH, 49 | HTTP_REQUEST, 50 | HTTP_RESPONSE 51 | ctypedef llhttp_type llhttp_type_t 52 | 53 | enum llhttp_errno: 54 | HPE_OK, 55 | HPE_INTERNAL, 56 | HPE_STRICT, 57 | HPE_LF_EXPECTED, 58 | HPE_UNEXPECTED_CONTENT_LENGTH, 59 | HPE_CLOSED_CONNECTION, 60 | HPE_INVALID_METHOD, 61 | HPE_INVALID_URL, 62 | HPE_INVALID_CONSTANT, 63 | HPE_INVALID_VERSION, 64 | HPE_INVALID_HEADER_TOKEN, 65 | HPE_INVALID_CONTENT_LENGTH, 66 | HPE_INVALID_CHUNK_SIZE, 67 | HPE_INVALID_STATUS, 68 | HPE_INVALID_EOF_STATE, 69 | HPE_INVALID_TRANSFER_ENCODING, 70 | HPE_CB_MESSAGE_BEGIN, 71 | HPE_CB_HEADERS_COMPLETE, 72 | HPE_CB_MESSAGE_COMPLETE, 73 | HPE_CB_CHUNK_HEADER, 74 | HPE_CB_CHUNK_COMPLETE, 75 | HPE_PAUSED, 76 | HPE_PAUSED_UPGRADE, 77 | HPE_USER 78 | ctypedef llhttp_errno llhttp_errno_t 79 | 80 | enum llhttp_flags: 81 | F_CONNECTION_KEEP_ALIVE, 82 | F_CONNECTION_CLOSE, 83 | F_CONNECTION_UPGRADE, 84 | F_CHUNKED, 85 | F_UPGRADE, 86 | F_CONTENT_LENGTH, 87 | F_SKIPBODY, 88 | F_TRAILING, 89 | F_LENIENT, 90 | F_TRANSFER_ENCODING 91 | ctypedef llhttp_flags llhttp_flags_t 92 | 93 | enum llhttp_method: 94 | HTTP_DELETE, 95 | HTTP_GET, 96 | HTTP_HEAD, 97 | HTTP_POST, 98 | HTTP_PUT, 99 | HTTP_CONNECT, 100 | HTTP_OPTIONS, 101 | HTTP_TRACE, 102 | HTTP_COPY, 103 | HTTP_LOCK, 104 | HTTP_MKCOL, 105 | HTTP_MOVE, 106 | HTTP_PROPFIND, 107 | HTTP_PROPPATCH, 108 | HTTP_SEARCH, 109 | HTTP_UNLOCK, 110 | HTTP_BIND, 111 | HTTP_REBIND, 112 | HTTP_UNBIND, 113 | HTTP_ACL, 114 | HTTP_REPORT, 115 | HTTP_MKACTIVITY, 116 | HTTP_CHECKOUT, 117 | HTTP_MERGE, 118 | HTTP_MSEARCH, 119 | HTTP_NOTIFY, 120 | HTTP_SUBSCRIBE, 121 | HTTP_UNSUBSCRIBE, 122 | HTTP_PATCH, 123 | HTTP_PURGE, 124 | HTTP_MKCALENDAR, 125 | HTTP_LINK, 126 | HTTP_UNLINK, 127 | HTTP_SOURCE, 128 | HTTP_PRI, 129 | HTTP_DESCRIBE, 130 | HTTP_ANNOUNCE, 131 | HTTP_SETUP, 132 | HTTP_PLAY, 133 | HTTP_PAUSE, 134 | HTTP_TEARDOWN, 135 | HTTP_GET_PARAMETER, 136 | HTTP_SET_PARAMETER, 137 | HTTP_REDIRECT, 138 | HTTP_RECORD, 139 | HTTP_FLUSH 140 | ctypedef llhttp_method llhttp_method_t 141 | 142 | void llhttp_init(llhttp_t* parser, llhttp_type_t type, const llhttp_settings_t* settings) 143 | 144 | void llhttp_settings_init(llhttp_settings_t* settings) 145 | 146 | llhttp_errno_t llhttp_execute(llhttp_t* parser, const char* data, size_t len) 147 | 148 | void llhttp_resume_after_upgrade(llhttp_t* parser) 149 | 150 | int llhttp_should_keep_alive(const llhttp_t* parser) 151 | 152 | const char* llhttp_get_error_pos(const llhttp_t* parser) 153 | const char* llhttp_get_error_reason(const llhttp_t* parser) 154 | const char* llhttp_method_name(llhttp_method_t method) 155 | 156 | void llhttp_set_error_reason(llhttp_t* parser, const char* reason); 157 | 158 | void llhttp_set_lenient_headers(llhttp_t* parser, bint enabled); 159 | void llhttp_set_lenient_chunked_length(llhttp_t* parser, bint enabled); 160 | void llhttp_set_lenient_keep_alive(llhttp_t* parser, bint enabled); 161 | void llhttp_set_lenient_transfer_encoding(llhttp_t* parser, bint enabled); 162 | void llhttp_set_lenient_version(llhttp_t* parser, bint enabled); 163 | void llhttp_set_lenient_data_after_close(llhttp_t* parser, bint enabled); 164 | void llhttp_set_lenient_optional_lf_after_cr(llhttp_t* parser, bint enabled); 165 | void llhttp_set_lenient_optional_cr_before_lf(llhttp_t* parser, bint enabled); 166 | void llhttp_set_lenient_optional_crlf_after_chunk(llhttp_t* parser, bint enabled); 167 | void llhttp_set_lenient_spaces_after_chunk_size(llhttp_t* parser, bint enabled); 168 | -------------------------------------------------------------------------------- /httptools/parser/errors.py: -------------------------------------------------------------------------------- 1 | __all__ = ('HttpParserError', 2 | 'HttpParserCallbackError', 3 | 'HttpParserInvalidStatusError', 4 | 'HttpParserInvalidMethodError', 5 | 'HttpParserInvalidURLError', 6 | 'HttpParserUpgrade') 7 | 8 | 9 | class HttpParserError(Exception): 10 | pass 11 | 12 | 13 | class HttpParserCallbackError(HttpParserError): 14 | pass 15 | 16 | 17 | class HttpParserInvalidStatusError(HttpParserError): 18 | pass 19 | 20 | 21 | class HttpParserInvalidMethodError(HttpParserError): 22 | pass 23 | 24 | 25 | class HttpParserInvalidURLError(HttpParserError): 26 | pass 27 | 28 | 29 | class HttpParserUpgrade(Exception): 30 | pass 31 | -------------------------------------------------------------------------------- /httptools/parser/parser.pyx: -------------------------------------------------------------------------------- 1 | #cython: language_level=3 2 | 3 | from __future__ import print_function 4 | from typing import Optional 5 | 6 | from cpython.mem cimport PyMem_Malloc, PyMem_Free 7 | from cpython cimport PyObject_GetBuffer, PyBuffer_Release, PyBUF_SIMPLE, \ 8 | Py_buffer, PyBytes_AsString 9 | 10 | from .python cimport PyMemoryView_Check, PyMemoryView_GET_BUFFER 11 | 12 | 13 | from .errors import (HttpParserError, 14 | HttpParserCallbackError, 15 | HttpParserInvalidStatusError, 16 | HttpParserInvalidMethodError, 17 | HttpParserInvalidURLError, 18 | HttpParserUpgrade) 19 | 20 | cimport cython 21 | from . cimport cparser 22 | 23 | 24 | __all__ = ('HttpRequestParser', 'HttpResponseParser') 25 | 26 | 27 | @cython.internal 28 | cdef class HttpParser: 29 | 30 | cdef: 31 | cparser.llhttp_t* _cparser 32 | cparser.llhttp_settings_t* _csettings 33 | 34 | bytes _current_header_name 35 | bytes _current_header_value 36 | 37 | _proto_on_url, _proto_on_status, _proto_on_body, \ 38 | _proto_on_header, _proto_on_headers_complete, \ 39 | _proto_on_message_complete, _proto_on_chunk_header, \ 40 | _proto_on_chunk_complete, _proto_on_message_begin 41 | 42 | object _last_error 43 | 44 | Py_buffer py_buf 45 | 46 | def __cinit__(self): 47 | self._cparser = \ 48 | PyMem_Malloc(sizeof(cparser.llhttp_t)) 49 | if self._cparser is NULL: 50 | raise MemoryError() 51 | 52 | self._csettings = \ 53 | PyMem_Malloc(sizeof(cparser.llhttp_settings_t)) 54 | if self._csettings is NULL: 55 | raise MemoryError() 56 | 57 | def __dealloc__(self): 58 | PyMem_Free(self._cparser) 59 | PyMem_Free(self._csettings) 60 | 61 | cdef _init(self, protocol, cparser.llhttp_type_t mode): 62 | cparser.llhttp_settings_init(self._csettings) 63 | 64 | cparser.llhttp_init(self._cparser, mode, self._csettings) 65 | self._cparser.data = self 66 | 67 | self._current_header_name = None 68 | self._current_header_value = None 69 | 70 | self._proto_on_header = getattr(protocol, 'on_header', None) 71 | if self._proto_on_header is not None: 72 | self._csettings.on_header_field = cb_on_header_field 73 | self._csettings.on_header_value = cb_on_header_value 74 | self._proto_on_headers_complete = getattr( 75 | protocol, 'on_headers_complete', None) 76 | self._csettings.on_headers_complete = cb_on_headers_complete 77 | 78 | self._proto_on_body = getattr(protocol, 'on_body', None) 79 | if self._proto_on_body is not None: 80 | self._csettings.on_body = cb_on_body 81 | 82 | self._proto_on_message_begin = getattr( 83 | protocol, 'on_message_begin', None) 84 | if self._proto_on_message_begin is not None: 85 | self._csettings.on_message_begin = cb_on_message_begin 86 | 87 | self._proto_on_message_complete = getattr( 88 | protocol, 'on_message_complete', None) 89 | if self._proto_on_message_complete is not None: 90 | self._csettings.on_message_complete = cb_on_message_complete 91 | 92 | self._proto_on_chunk_header = getattr( 93 | protocol, 'on_chunk_header', None) 94 | self._csettings.on_chunk_header = cb_on_chunk_header 95 | 96 | self._proto_on_chunk_complete = getattr( 97 | protocol, 'on_chunk_complete', None) 98 | self._csettings.on_chunk_complete = cb_on_chunk_complete 99 | 100 | self._last_error = None 101 | 102 | cdef _maybe_call_on_header(self): 103 | if self._current_header_value is not None: 104 | current_header_name = self._current_header_name 105 | current_header_value = self._current_header_value 106 | 107 | self._current_header_name = self._current_header_value = None 108 | 109 | if self._proto_on_header is not None: 110 | self._proto_on_header(current_header_name, 111 | current_header_value) 112 | 113 | cdef _on_header_field(self, bytes field): 114 | self._maybe_call_on_header() 115 | if self._current_header_name is None: 116 | self._current_header_name = field 117 | else: 118 | self._current_header_name += field 119 | 120 | cdef _on_header_value(self, bytes val): 121 | if self._current_header_value is None: 122 | self._current_header_value = val 123 | else: 124 | # This is unlikely, as mostly HTTP headers are one-line 125 | self._current_header_value += val 126 | 127 | cdef _on_headers_complete(self): 128 | self._maybe_call_on_header() 129 | 130 | if self._proto_on_headers_complete is not None: 131 | self._proto_on_headers_complete() 132 | 133 | cdef _on_chunk_header(self): 134 | if (self._current_header_value is not None or 135 | self._current_header_name is not None): 136 | raise HttpParserError('invalid headers state') 137 | 138 | if self._proto_on_chunk_header is not None: 139 | self._proto_on_chunk_header() 140 | 141 | cdef _on_chunk_complete(self): 142 | self._maybe_call_on_header() 143 | 144 | if self._proto_on_chunk_complete is not None: 145 | self._proto_on_chunk_complete() 146 | 147 | ### Public API ### 148 | 149 | def set_dangerous_leniencies( 150 | self, 151 | lenient_headers: Optional[bool] = None, 152 | lenient_chunked_length: Optional[bool] = None, 153 | lenient_keep_alive: Optional[bool] = None, 154 | lenient_transfer_encoding: Optional[bool] = None, 155 | lenient_version: Optional[bool] = None, 156 | lenient_data_after_close: Optional[bool] = None, 157 | lenient_optional_lf_after_cr: Optional[bool] = None, 158 | lenient_optional_cr_before_lf: Optional[bool] = None, 159 | lenient_optional_crlf_after_chunk: Optional[bool] = None, 160 | lenient_spaces_after_chunk_size: Optional[bool] = None, 161 | ): 162 | cdef cparser.llhttp_t* parser = self._cparser 163 | if lenient_headers is not None: 164 | cparser.llhttp_set_lenient_headers( 165 | parser, lenient_headers) 166 | if lenient_chunked_length is not None: 167 | cparser.llhttp_set_lenient_chunked_length( 168 | parser, lenient_chunked_length) 169 | if lenient_keep_alive is not None: 170 | cparser.llhttp_set_lenient_keep_alive( 171 | parser, lenient_keep_alive) 172 | if lenient_transfer_encoding is not None: 173 | cparser.llhttp_set_lenient_transfer_encoding( 174 | parser, lenient_transfer_encoding) 175 | if lenient_version is not None: 176 | cparser.llhttp_set_lenient_version( 177 | parser, lenient_version) 178 | if lenient_data_after_close is not None: 179 | cparser.llhttp_set_lenient_data_after_close( 180 | parser, lenient_data_after_close) 181 | if lenient_optional_lf_after_cr is not None: 182 | cparser.llhttp_set_lenient_optional_lf_after_cr( 183 | parser, lenient_optional_lf_after_cr) 184 | if lenient_optional_cr_before_lf is not None: 185 | cparser.llhttp_set_lenient_optional_cr_before_lf( 186 | parser, lenient_optional_cr_before_lf) 187 | if lenient_optional_crlf_after_chunk is not None: 188 | cparser.llhttp_set_lenient_optional_crlf_after_chunk( 189 | parser, lenient_optional_crlf_after_chunk) 190 | if lenient_spaces_after_chunk_size is not None: 191 | cparser.llhttp_set_lenient_spaces_after_chunk_size( 192 | parser, lenient_spaces_after_chunk_size) 193 | 194 | def get_http_version(self): 195 | cdef cparser.llhttp_t* parser = self._cparser 196 | return '{}.{}'.format(parser.http_major, parser.http_minor) 197 | 198 | def should_keep_alive(self): 199 | return bool(cparser.llhttp_should_keep_alive(self._cparser)) 200 | 201 | def should_upgrade(self): 202 | cdef cparser.llhttp_t* parser = self._cparser 203 | return bool(parser.upgrade) 204 | 205 | def feed_data(self, data): 206 | cdef: 207 | size_t data_len 208 | cparser.llhttp_errno_t err 209 | Py_buffer *buf 210 | bint owning_buf = False 211 | const char* err_pos 212 | 213 | if PyMemoryView_Check(data): 214 | buf = PyMemoryView_GET_BUFFER(data) 215 | data_len = buf.len 216 | err = cparser.llhttp_execute( 217 | self._cparser, 218 | buf.buf, 219 | data_len) 220 | 221 | else: 222 | buf = &self.py_buf 223 | PyObject_GetBuffer(data, buf, PyBUF_SIMPLE) 224 | owning_buf = True 225 | data_len = buf.len 226 | 227 | err = cparser.llhttp_execute( 228 | self._cparser, 229 | buf.buf, 230 | data_len) 231 | 232 | try: 233 | if self._cparser.upgrade == 1 and err == cparser.HPE_PAUSED_UPGRADE: 234 | err_pos = cparser.llhttp_get_error_pos(self._cparser) 235 | 236 | # Immediately free the parser from "error" state, simulating 237 | # http-parser behavior here because 1) we never had the API to 238 | # allow users manually "resume after upgrade", and 2) the use 239 | # case for resuming parsing is very rare. 240 | cparser.llhttp_resume_after_upgrade(self._cparser) 241 | 242 | # The err_pos here is specific for the input buf. So if we ever 243 | # switch to the llhttp behavior (re-raise HttpParserUpgrade for 244 | # successive calls to feed_data() until resume_after_upgrade is 245 | # called), we have to store the result and keep our own state. 246 | raise HttpParserUpgrade(err_pos - buf.buf) 247 | finally: 248 | if owning_buf: 249 | PyBuffer_Release(buf) 250 | 251 | if err != cparser.HPE_OK: 252 | ex = parser_error_from_errno( 253 | self._cparser, 254 | self._cparser.error) 255 | if isinstance(ex, HttpParserCallbackError): 256 | if self._last_error is not None: 257 | ex.__context__ = self._last_error 258 | self._last_error = None 259 | raise ex 260 | 261 | 262 | cdef class HttpRequestParser(HttpParser): 263 | 264 | def __init__(self, protocol): 265 | self._init(protocol, cparser.HTTP_REQUEST) 266 | 267 | self._proto_on_url = getattr(protocol, 'on_url', None) 268 | if self._proto_on_url is not None: 269 | self._csettings.on_url = cb_on_url 270 | 271 | def get_method(self): 272 | cdef cparser.llhttp_t* parser = self._cparser 273 | return cparser.llhttp_method_name( parser.method) 274 | 275 | 276 | cdef class HttpResponseParser(HttpParser): 277 | 278 | def __init__(self, protocol): 279 | self._init(protocol, cparser.HTTP_RESPONSE) 280 | 281 | self._proto_on_status = getattr(protocol, 'on_status', None) 282 | if self._proto_on_status is not None: 283 | self._csettings.on_status = cb_on_status 284 | 285 | def get_status_code(self): 286 | cdef cparser.llhttp_t* parser = self._cparser 287 | return parser.status_code 288 | 289 | 290 | cdef int cb_on_message_begin(cparser.llhttp_t* parser) except -1: 291 | cdef HttpParser pyparser = parser.data 292 | try: 293 | pyparser._proto_on_message_begin() 294 | except BaseException as ex: 295 | pyparser._last_error = ex 296 | return -1 297 | else: 298 | return 0 299 | 300 | 301 | cdef int cb_on_url(cparser.llhttp_t* parser, 302 | const char *at, size_t length) except -1: 303 | cdef HttpParser pyparser = parser.data 304 | try: 305 | pyparser._proto_on_url(at[:length]) 306 | except BaseException as ex: 307 | cparser.llhttp_set_error_reason(parser, "`on_url` callback error") 308 | pyparser._last_error = ex 309 | return cparser.HPE_USER 310 | else: 311 | return 0 312 | 313 | 314 | cdef int cb_on_status(cparser.llhttp_t* parser, 315 | const char *at, size_t length) except -1: 316 | cdef HttpParser pyparser = parser.data 317 | try: 318 | pyparser._proto_on_status(at[:length]) 319 | except BaseException as ex: 320 | cparser.llhttp_set_error_reason(parser, "`on_status` callback error") 321 | pyparser._last_error = ex 322 | return cparser.HPE_USER 323 | else: 324 | return 0 325 | 326 | 327 | cdef int cb_on_header_field(cparser.llhttp_t* parser, 328 | const char *at, size_t length) except -1: 329 | cdef HttpParser pyparser = parser.data 330 | try: 331 | pyparser._on_header_field(at[:length]) 332 | except BaseException as ex: 333 | cparser.llhttp_set_error_reason(parser, "`on_header_field` callback error") 334 | pyparser._last_error = ex 335 | return cparser.HPE_USER 336 | else: 337 | return 0 338 | 339 | 340 | cdef int cb_on_header_value(cparser.llhttp_t* parser, 341 | const char *at, size_t length) except -1: 342 | cdef HttpParser pyparser = parser.data 343 | try: 344 | pyparser._on_header_value(at[:length]) 345 | except BaseException as ex: 346 | cparser.llhttp_set_error_reason(parser, "`on_header_value` callback error") 347 | pyparser._last_error = ex 348 | return cparser.HPE_USER 349 | else: 350 | return 0 351 | 352 | 353 | cdef int cb_on_headers_complete(cparser.llhttp_t* parser) except -1: 354 | cdef HttpParser pyparser = parser.data 355 | try: 356 | pyparser._on_headers_complete() 357 | except BaseException as ex: 358 | pyparser._last_error = ex 359 | return -1 360 | else: 361 | if pyparser._cparser.upgrade: 362 | return 1 363 | else: 364 | return 0 365 | 366 | 367 | cdef int cb_on_body(cparser.llhttp_t* parser, 368 | const char *at, size_t length) except -1: 369 | cdef HttpParser pyparser = parser.data 370 | try: 371 | pyparser._proto_on_body(at[:length]) 372 | except BaseException as ex: 373 | cparser.llhttp_set_error_reason(parser, "`on_body` callback error") 374 | pyparser._last_error = ex 375 | return cparser.HPE_USER 376 | else: 377 | return 0 378 | 379 | 380 | cdef int cb_on_message_complete(cparser.llhttp_t* parser) except -1: 381 | cdef HttpParser pyparser = parser.data 382 | try: 383 | pyparser._proto_on_message_complete() 384 | except BaseException as ex: 385 | pyparser._last_error = ex 386 | return -1 387 | else: 388 | return 0 389 | 390 | 391 | cdef int cb_on_chunk_header(cparser.llhttp_t* parser) except -1: 392 | cdef HttpParser pyparser = parser.data 393 | try: 394 | pyparser._on_chunk_header() 395 | except BaseException as ex: 396 | pyparser._last_error = ex 397 | return -1 398 | else: 399 | return 0 400 | 401 | 402 | cdef int cb_on_chunk_complete(cparser.llhttp_t* parser) except -1: 403 | cdef HttpParser pyparser = parser.data 404 | try: 405 | pyparser._on_chunk_complete() 406 | except BaseException as ex: 407 | pyparser._last_error = ex 408 | return -1 409 | else: 410 | return 0 411 | 412 | 413 | cdef parser_error_from_errno(cparser.llhttp_t* parser, cparser.llhttp_errno_t errno): 414 | cdef bytes reason = cparser.llhttp_get_error_reason(parser) 415 | 416 | if errno in (cparser.HPE_CB_MESSAGE_BEGIN, 417 | cparser.HPE_CB_HEADERS_COMPLETE, 418 | cparser.HPE_CB_MESSAGE_COMPLETE, 419 | cparser.HPE_CB_CHUNK_HEADER, 420 | cparser.HPE_CB_CHUNK_COMPLETE, 421 | cparser.HPE_USER): 422 | cls = HttpParserCallbackError 423 | 424 | elif errno == cparser.HPE_INVALID_STATUS: 425 | cls = HttpParserInvalidStatusError 426 | 427 | elif errno == cparser.HPE_INVALID_METHOD: 428 | cls = HttpParserInvalidMethodError 429 | 430 | elif errno == cparser.HPE_INVALID_URL: 431 | cls = HttpParserInvalidURLError 432 | 433 | else: 434 | cls = HttpParserError 435 | 436 | return cls(reason.decode('latin-1')) 437 | -------------------------------------------------------------------------------- /httptools/parser/python.pxd: -------------------------------------------------------------------------------- 1 | cimport cpython 2 | 3 | 4 | cdef extern from "Python.h": 5 | cpython.Py_buffer* PyMemoryView_GET_BUFFER(object) 6 | bint PyMemoryView_Check(object) 7 | -------------------------------------------------------------------------------- /httptools/parser/url_cparser.pxd: -------------------------------------------------------------------------------- 1 | from libc.stdint cimport uint16_t 2 | 3 | 4 | cdef extern from "http_parser.h": 5 | # URL Parser 6 | 7 | enum http_parser_url_fields: 8 | UF_SCHEMA = 0, 9 | UF_HOST = 1, 10 | UF_PORT = 2, 11 | UF_PATH = 3, 12 | UF_QUERY = 4, 13 | UF_FRAGMENT = 5, 14 | UF_USERINFO = 6, 15 | UF_MAX = 7 16 | 17 | struct http_parser_url_field_data: 18 | uint16_t off 19 | uint16_t len 20 | 21 | struct http_parser_url: 22 | uint16_t field_set 23 | uint16_t port 24 | http_parser_url_field_data[UF_MAX] field_data 25 | 26 | void http_parser_url_init(http_parser_url *u) 27 | 28 | int http_parser_parse_url(const char *buf, 29 | size_t buflen, 30 | int is_connect, 31 | http_parser_url *u) 32 | -------------------------------------------------------------------------------- /httptools/parser/url_parser.pyx: -------------------------------------------------------------------------------- 1 | #cython: language_level=3 2 | 3 | from __future__ import print_function 4 | from cpython.mem cimport PyMem_Malloc, PyMem_Free 5 | from cpython cimport PyObject_GetBuffer, PyBuffer_Release, PyBUF_SIMPLE, \ 6 | Py_buffer 7 | 8 | from .errors import HttpParserInvalidURLError 9 | 10 | cimport cython 11 | from . cimport url_cparser as uparser 12 | 13 | __all__ = ('parse_url',) 14 | 15 | @cython.freelist(250) 16 | cdef class URL: 17 | cdef readonly bytes schema 18 | cdef readonly bytes host 19 | cdef readonly object port 20 | cdef readonly bytes path 21 | cdef readonly bytes query 22 | cdef readonly bytes fragment 23 | cdef readonly bytes userinfo 24 | 25 | def __cinit__(self, bytes schema, bytes host, object port, bytes path, 26 | bytes query, bytes fragment, bytes userinfo): 27 | 28 | self.schema = schema 29 | self.host = host 30 | self.port = port 31 | self.path = path 32 | self.query = query 33 | self.fragment = fragment 34 | self.userinfo = userinfo 35 | 36 | def __repr__(self): 37 | return ('' 39 | .format(self.schema, self.host, self.port, self.path, 40 | self.query, self.fragment, self.userinfo)) 41 | 42 | 43 | def parse_url(url): 44 | cdef: 45 | Py_buffer py_buf 46 | char* buf_data 47 | uparser.http_parser_url* parsed 48 | int res 49 | bytes schema = None 50 | bytes host = None 51 | object port = None 52 | bytes path = None 53 | bytes query = None 54 | bytes fragment = None 55 | bytes userinfo = None 56 | object result = None 57 | int off 58 | int ln 59 | 60 | parsed = \ 61 | PyMem_Malloc(sizeof(uparser.http_parser_url)) 62 | uparser.http_parser_url_init(parsed) 63 | 64 | PyObject_GetBuffer(url, &py_buf, PyBUF_SIMPLE) 65 | try: 66 | buf_data = py_buf.buf 67 | res = uparser.http_parser_parse_url(buf_data, py_buf.len, 0, parsed) 68 | 69 | if res == 0: 70 | if parsed.field_set & (1 << uparser.UF_SCHEMA): 71 | off = parsed.field_data[uparser.UF_SCHEMA].off 72 | ln = parsed.field_data[uparser.UF_SCHEMA].len 73 | schema = buf_data[off:off+ln] 74 | 75 | if parsed.field_set & (1 << uparser.UF_HOST): 76 | off = parsed.field_data[uparser.UF_HOST].off 77 | ln = parsed.field_data[uparser.UF_HOST].len 78 | host = buf_data[off:off+ln] 79 | 80 | if parsed.field_set & (1 << uparser.UF_PORT): 81 | port = parsed.port 82 | 83 | if parsed.field_set & (1 << uparser.UF_PATH): 84 | off = parsed.field_data[uparser.UF_PATH].off 85 | ln = parsed.field_data[uparser.UF_PATH].len 86 | path = buf_data[off:off+ln] 87 | 88 | if parsed.field_set & (1 << uparser.UF_QUERY): 89 | off = parsed.field_data[uparser.UF_QUERY].off 90 | ln = parsed.field_data[uparser.UF_QUERY].len 91 | query = buf_data[off:off+ln] 92 | 93 | if parsed.field_set & (1 << uparser.UF_FRAGMENT): 94 | off = parsed.field_data[uparser.UF_FRAGMENT].off 95 | ln = parsed.field_data[uparser.UF_FRAGMENT].len 96 | fragment = buf_data[off:off+ln] 97 | 98 | if parsed.field_set & (1 << uparser.UF_USERINFO): 99 | off = parsed.field_data[uparser.UF_USERINFO].off 100 | ln = parsed.field_data[uparser.UF_USERINFO].len 101 | userinfo = buf_data[off:off+ln] 102 | 103 | return URL(schema, host, port, path, query, fragment, userinfo) 104 | else: 105 | raise HttpParserInvalidURLError("invalid url {!r}".format(url)) 106 | finally: 107 | PyBuffer_Release(&py_buf) 108 | PyMem_Free(parsed) 109 | -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | addopts = --capture=no --assert=plain --strict-markers --tb=native --import-mode=importlib 3 | testpaths = tests 4 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | vi = sys.version_info 4 | if vi < (3, 8): 5 | raise RuntimeError('httptools require Python 3.8 or greater') 6 | else: 7 | import os.path 8 | import pathlib 9 | 10 | from setuptools import setup, Extension 11 | from setuptools.command.build_ext import build_ext as build_ext 12 | 13 | 14 | CFLAGS = ['-O2'] 15 | 16 | ROOT = pathlib.Path(__file__).parent 17 | 18 | CYTHON_DEPENDENCY = 'Cython>=0.29.24' 19 | 20 | 21 | class httptools_build_ext(build_ext): 22 | user_options = build_ext.user_options + [ 23 | ('cython-always', None, 24 | 'run cythonize() even if .c files are present'), 25 | ('cython-annotate', None, 26 | 'Produce a colorized HTML version of the Cython source.'), 27 | ('cython-directives=', None, 28 | 'Cythion compiler directives'), 29 | ('use-system-llhttp', None, 30 | 'Use the system provided llhttp, instead of the bundled one'), 31 | ('use-system-http-parser', None, 32 | 'Use the system provided http-parser, instead of the bundled one'), 33 | ] 34 | 35 | boolean_options = build_ext.boolean_options + [ 36 | 'cython-always', 37 | 'cython-annotate', 38 | 'use-system-llhttp', 39 | 'use-system-http-parser', 40 | ] 41 | 42 | def initialize_options(self): 43 | # initialize_options() may be called multiple times on the 44 | # same command object, so make sure not to override previously 45 | # set options. 46 | if getattr(self, '_initialized', False): 47 | return 48 | 49 | super().initialize_options() 50 | self.use_system_llhttp = False 51 | self.use_system_http_parser = False 52 | self.cython_always = False 53 | self.cython_annotate = None 54 | self.cython_directives = None 55 | 56 | def finalize_options(self): 57 | # finalize_options() may be called multiple times on the 58 | # same command object, so make sure not to override previously 59 | # set options. 60 | if getattr(self, '_initialized', False): 61 | return 62 | 63 | need_cythonize = self.cython_always 64 | cfiles = {} 65 | 66 | for extension in self.distribution.ext_modules: 67 | for i, sfile in enumerate(extension.sources): 68 | if sfile.endswith('.pyx'): 69 | prefix, ext = os.path.splitext(sfile) 70 | cfile = prefix + '.c' 71 | 72 | if os.path.exists(cfile) and not self.cython_always: 73 | extension.sources[i] = cfile 74 | else: 75 | if os.path.exists(cfile): 76 | cfiles[cfile] = os.path.getmtime(cfile) 77 | else: 78 | cfiles[cfile] = 0 79 | need_cythonize = True 80 | 81 | if need_cythonize: 82 | try: 83 | import Cython 84 | except ImportError: 85 | raise RuntimeError( 86 | 'please install Cython to compile httptools from source') 87 | 88 | if Cython.__version__ < '0.29': 89 | raise RuntimeError( 90 | 'httptools requires Cython version 0.29 or greater') 91 | 92 | from Cython.Build import cythonize 93 | 94 | directives = {} 95 | if self.cython_directives: 96 | for directive in self.cython_directives.split(','): 97 | k, _, v = directive.partition('=') 98 | if v.lower() == 'false': 99 | v = False 100 | if v.lower() == 'true': 101 | v = True 102 | 103 | directives[k] = v 104 | 105 | self.distribution.ext_modules[:] = cythonize( 106 | self.distribution.ext_modules, 107 | compiler_directives=directives, 108 | annotate=self.cython_annotate) 109 | 110 | super().finalize_options() 111 | 112 | self._initialized = True 113 | 114 | def build_extensions(self): 115 | mod_parser, mod_url_parser = self.distribution.ext_modules 116 | if self.use_system_llhttp: 117 | mod_parser.libraries.append('llhttp') 118 | 119 | if sys.platform == 'darwin' and \ 120 | os.path.exists('/opt/local/include'): 121 | # Support macports on Mac OS X. 122 | mod_parser.include_dirs.append('/opt/local/include') 123 | else: 124 | mod_parser.include_dirs.append( 125 | str(ROOT / 'vendor' / 'llhttp' / 'include')) 126 | mod_parser.include_dirs.append( 127 | str(ROOT / 'vendor' / 'llhttp' / 'src')) 128 | mod_parser.sources.append('vendor/llhttp/src/api.c') 129 | mod_parser.sources.append('vendor/llhttp/src/http.c') 130 | mod_parser.sources.append('vendor/llhttp/src/llhttp.c') 131 | 132 | if self.use_system_http_parser: 133 | mod_url_parser.libraries.append('http_parser') 134 | 135 | if sys.platform == 'darwin' and \ 136 | os.path.exists('/opt/local/include'): 137 | # Support macports on Mac OS X. 138 | mod_url_parser.include_dirs.append('/opt/local/include') 139 | else: 140 | mod_url_parser.include_dirs.append( 141 | str(ROOT / 'vendor' / 'http-parser')) 142 | mod_url_parser.sources.append( 143 | 'vendor/http-parser/http_parser.c') 144 | 145 | super().build_extensions() 146 | 147 | 148 | with open(str(ROOT / 'README.md')) as f: 149 | long_description = f.read() 150 | 151 | 152 | with open(str(ROOT / 'httptools' / '_version.py')) as f: 153 | for line in f: 154 | if line.startswith('__version__ ='): 155 | _, _, version = line.partition('=') 156 | VERSION = version.strip(" \n'\"") 157 | break 158 | else: 159 | raise RuntimeError( 160 | 'unable to read the version from httptools/_version.py') 161 | 162 | 163 | setup_requires = [] 164 | 165 | if (not (ROOT / 'httptools' / 'parser' / 'parser.c').exists() or 166 | '--cython-always' in sys.argv): 167 | # No Cython output, require Cython to build. 168 | setup_requires.append(CYTHON_DEPENDENCY) 169 | 170 | 171 | setup( 172 | name='httptools', 173 | version=VERSION, 174 | description='A collection of framework independent HTTP protocol utils.', 175 | long_description=long_description, 176 | long_description_content_type='text/markdown', 177 | url='https://github.com/MagicStack/httptools', 178 | classifiers=[ 179 | 'License :: OSI Approved :: MIT License', 180 | 'Intended Audience :: Developers', 181 | 'Programming Language :: Python :: 3', 182 | 'Operating System :: POSIX', 183 | 'Operating System :: MacOS :: MacOS X', 184 | 'Environment :: Web Environment', 185 | 'Development Status :: 5 - Production/Stable', 186 | ], 187 | platforms=['macOS', 'POSIX', 'Windows'], 188 | python_requires='>=3.8.0', 189 | zip_safe=False, 190 | author='Yury Selivanov', 191 | author_email='yury@magic.io', 192 | license='MIT', 193 | packages=['httptools', 'httptools.parser'], 194 | cmdclass={ 195 | 'build_ext': httptools_build_ext, 196 | }, 197 | ext_modules=[ 198 | Extension( 199 | "httptools.parser.parser", 200 | sources=[ 201 | "httptools/parser/parser.pyx", 202 | ], 203 | extra_compile_args=CFLAGS, 204 | ), 205 | Extension( 206 | "httptools.parser.url_parser", 207 | sources=[ 208 | "httptools/parser/url_parser.pyx", 209 | ], 210 | extra_compile_args=CFLAGS, 211 | ), 212 | ], 213 | include_package_data=True, 214 | exclude_package_data={"": ["*.c", "*.h"]}, 215 | test_suite='tests.suite', 216 | setup_requires=setup_requires, 217 | extras_require={ 218 | 'test': [ 219 | CYTHON_DEPENDENCY 220 | ] 221 | } 222 | ) 223 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | import os.path 2 | import sys 3 | import unittest 4 | import unittest.runner 5 | 6 | 7 | def suite(): 8 | test_loader = unittest.TestLoader() 9 | test_suite = test_loader.discover( 10 | os.path.dirname(__file__), pattern='test_*.py') 11 | return test_suite 12 | 13 | 14 | if __name__ == '__main__': 15 | runner = unittest.runner.TextTestRunner() 16 | result = runner.run(suite()) 17 | sys.exit(not result.wasSuccessful()) 18 | -------------------------------------------------------------------------------- /tests/test_parser.py: -------------------------------------------------------------------------------- 1 | import httptools 2 | 3 | import unittest 4 | from unittest import mock 5 | 6 | 7 | RESPONSE1_HEAD = b'''HTTP/1.1 200 OK 8 | Date: Mon, 23 May 2005 22:38:34 GMT 9 | Server: Apache/1.3.3.7 (Unix) (Red-Hat/Linux) 10 | Last-Modified: Wed, 08 Jan 2003 23:11:55 GMT 11 | ETag: "3f80f-1b6-3e1cb03b" 12 | Content-Type: text/html; charset=UTF-8 13 | Content-Length: 130 14 | Accept-Ranges: bytes 15 | Connection: close 16 | 17 | '''.replace(b'\n', b'\r\n') 18 | 19 | RESPONSE1_SPACES_IN_HEAD = b'''HTTP/1.1 200 OK 20 | Date: Mon, 23 May 2005 22:38:34 GMT 21 | Server: Apache/1.3.3.7 22 | (Unix) (Red-Hat/Linux) 23 | Last-Modified: Wed, 08 Jan 2003 23:11:55 GMT 24 | ETag: "3f80f-1b6-3e1cb03b" 25 | Content-Type: text/html; 26 | charset=UTF-8 27 | Content-Length: 130 28 | Accept-Ranges: bytes 29 | Connection: close 30 | 31 | '''.replace(b'\n', b'\r\n') 32 | 33 | RESPONSE1_BODY = b''' 34 | 35 | 36 | An Example Page 37 | 38 | 39 | Hello World, this is a very simple HTML document. 40 | 41 | ''' 42 | 43 | 44 | CHUNKED_REQUEST1_1 = b'''POST /test.php?a=b+c HTTP/1.1\r 45 | User-Agent: Fooo\r 46 | Host: bar\r 47 | Transfer-Encoding: chunked\r 48 | \r 49 | 5\r\nhello\r\n6\r\n world\r\n''' 50 | 51 | CHUNKED_REQUEST1_2 = b'''0\r\nVary: *\r\nUser-Agent: spam\r\n\r\n''' 52 | 53 | CHUNKED_REQUEST1_3 = b'''POST /test.php?a=b+c HTTP/1.1\r 54 | User-Agent: Fooo\r 55 | Host: bar\r 56 | Transfer-Encoding: chunked\r 57 | \r 58 | b\r\n+\xce\xcfM\xb5MI,I\x04\x00\r\n0\r\n\r\n''' 59 | 60 | 61 | UPGRADE_REQUEST1 = b'''GET /demo HTTP/1.1\r 62 | Host: example.com\r 63 | Connection: Upgrade\r 64 | Sec-WebSocket-Key2: 12998 5 Y3 1 .P00\r 65 | Sec-WebSocket-Protocol: sample\r 66 | Upgrade: WebSocket\r 67 | Sec-WebSocket-Key1: 4 @1 46546xW%0l 1 5\r 68 | Origin: http://example.com\r 69 | \r 70 | Hot diggity dogg''' 71 | 72 | UPGRADE_RESPONSE1 = b'''HTTP/1.1 101 Switching Protocols 73 | UPGRADE: websocket 74 | SEC-WEBSOCKET-ACCEPT: rVg+XakFNFOxk3ZH0lzrZBmg0aU= 75 | TRANSFER-ENCODING: chunked 76 | CONNECTION: upgrade 77 | DATE: Sat, 07 May 2016 23:44:32 GMT 78 | SERVER: Python/3.4 aiohttp/1.0.3 79 | 80 | data'''.replace(b'\n', b'\r\n') 81 | 82 | 83 | class TestResponseParser(unittest.TestCase): 84 | 85 | def test_parser_response_1(self): 86 | m = mock.Mock() 87 | 88 | headers = {} 89 | m.on_header.side_effect = headers.__setitem__ 90 | 91 | p = httptools.HttpResponseParser(m) 92 | p.feed_data(memoryview(RESPONSE1_HEAD)) 93 | 94 | self.assertEqual(p.get_http_version(), '1.1') 95 | self.assertEqual(p.get_status_code(), 200) 96 | 97 | m.on_status.assert_called_once_with(b'OK') 98 | 99 | m.on_headers_complete.assert_called_once_with() 100 | self.assertEqual(m.on_header.call_count, 8) 101 | self.assertEqual(len(headers), 8) 102 | self.assertEqual(headers.get(b'Connection'), b'close') 103 | self.assertEqual(headers.get(b'Content-Type'), 104 | b'text/html; charset=UTF-8') 105 | 106 | self.assertFalse(m.on_body.called) 107 | p.feed_data(bytearray(RESPONSE1_BODY)) 108 | m.on_body.assert_called_once_with(RESPONSE1_BODY) 109 | 110 | m.on_message_complete.assert_called_once_with() 111 | 112 | self.assertFalse(m.on_url.called) 113 | self.assertFalse(m.on_chunk_header.called) 114 | self.assertFalse(m.on_chunk_complete.called) 115 | 116 | def test_parser_response_1b(self): 117 | p = httptools.HttpResponseParser(None) 118 | 119 | with self.assertRaisesRegex( 120 | httptools.HttpParserError, 121 | 'Expected HTTP/'): 122 | p.feed_data(b'12123123') 123 | 124 | def test_parser_response_leninent_headers_1(self): 125 | m = mock.Mock() 126 | 127 | headers = {} 128 | m.on_header.side_effect = headers.__setitem__ 129 | 130 | p = httptools.HttpResponseParser(m) 131 | 132 | with self.assertRaisesRegex( 133 | httptools.HttpParserError, 134 | "whitespace after header value", 135 | ): 136 | p.feed_data(memoryview(RESPONSE1_SPACES_IN_HEAD)) 137 | 138 | def test_parser_response_leninent_headers_2(self): 139 | m = mock.Mock() 140 | 141 | headers = {} 142 | m.on_header.side_effect = headers.__setitem__ 143 | 144 | p = httptools.HttpResponseParser(m) 145 | 146 | p.set_dangerous_leniencies(lenient_headers=True) 147 | p.feed_data(memoryview(RESPONSE1_SPACES_IN_HEAD)) 148 | 149 | self.assertEqual(p.get_http_version(), '1.1') 150 | self.assertEqual(p.get_status_code(), 200) 151 | 152 | m.on_status.assert_called_once_with(b'OK') 153 | 154 | m.on_headers_complete.assert_called_once_with() 155 | self.assertEqual(m.on_header.call_count, 8) 156 | self.assertEqual(len(headers), 8) 157 | self.assertEqual(headers.get(b'Connection'), b'close') 158 | self.assertEqual(headers.get(b'Content-Type'), 159 | b'text/html; charset=UTF-8') 160 | 161 | self.assertFalse(m.on_body.called) 162 | p.feed_data(bytearray(RESPONSE1_BODY)) 163 | m.on_body.assert_called_once_with(RESPONSE1_BODY) 164 | 165 | m.on_message_complete.assert_called_once_with() 166 | 167 | self.assertFalse(m.on_url.called) 168 | self.assertFalse(m.on_chunk_header.called) 169 | self.assertFalse(m.on_chunk_complete.called) 170 | 171 | def test_parser_response_2(self): 172 | with self.assertRaisesRegex(TypeError, 'a bytes-like object'): 173 | httptools.HttpResponseParser(None).feed_data('') 174 | 175 | def test_parser_response_3(self): 176 | callbacks = {'on_header', 'on_headers_complete', 'on_body', 177 | 'on_message_complete'} 178 | 179 | for cbname in callbacks: 180 | with self.subTest('{} callback fails correctly'.format(cbname)): 181 | with self.assertRaisesRegex(httptools.HttpParserCallbackError, 182 | 'callback error'): 183 | 184 | m = mock.Mock() 185 | getattr(m, cbname).side_effect = Exception() 186 | 187 | p = httptools.HttpResponseParser(m) 188 | p.feed_data(RESPONSE1_HEAD + RESPONSE1_BODY) 189 | 190 | def test_parser_response_4(self): 191 | p = httptools.HttpResponseParser(None) 192 | with self.assertRaises(httptools.HttpParserInvalidStatusError): 193 | p.feed_data(b'HTTP/1.1 1299 FOOSPAM\r\n') 194 | 195 | def test_parser_response_5(self): 196 | m = mock.Mock() 197 | m.on_status = None 198 | m.on_header = None 199 | m.on_body = None 200 | m.on_headers_complete = None 201 | m.on_chunk_header = None 202 | m.on_chunk_complete = None 203 | 204 | p = httptools.HttpResponseParser(m) 205 | p.feed_data(RESPONSE1_HEAD) 206 | p.feed_data(RESPONSE1_BODY) 207 | 208 | m.on_message_complete.assert_called_once_with() 209 | 210 | def test_parser_response_cb_on_status_1(self): 211 | class Error(Exception): 212 | pass 213 | 214 | m = mock.Mock() 215 | m.on_status.side_effect = Error() 216 | 217 | p = httptools.HttpResponseParser(m) 218 | try: 219 | p.feed_data(RESPONSE1_HEAD + RESPONSE1_BODY) 220 | except httptools.HttpParserCallbackError as ex: 221 | self.assertIsInstance(ex.__context__, Error) 222 | else: 223 | self.fail('HttpParserCallbackError was not raised') 224 | 225 | def test_parser_response_cb_on_body_1(self): 226 | class Error(Exception): 227 | pass 228 | 229 | m = mock.Mock() 230 | m.on_body.side_effect = Error() 231 | 232 | p = httptools.HttpResponseParser(m) 233 | try: 234 | p.feed_data(RESPONSE1_HEAD + RESPONSE1_BODY) 235 | except httptools.HttpParserCallbackError as ex: 236 | self.assertIsInstance(ex.__context__, Error) 237 | else: 238 | self.fail('HttpParserCallbackError was not raised') 239 | 240 | def test_parser_response_cb_on_message_complete_1(self): 241 | class Error(Exception): 242 | pass 243 | 244 | m = mock.Mock() 245 | m.on_message_complete.side_effect = Error() 246 | 247 | p = httptools.HttpResponseParser(m) 248 | try: 249 | p.feed_data(RESPONSE1_HEAD + RESPONSE1_BODY) 250 | except httptools.HttpParserCallbackError as ex: 251 | self.assertIsInstance(ex.__context__, Error) 252 | else: 253 | self.fail('HttpParserCallbackError was not raised') 254 | 255 | def test_parser_upgrade_response_1(self): 256 | m = mock.Mock() 257 | 258 | headers = {} 259 | m.on_header.side_effect = headers.__setitem__ 260 | 261 | p = httptools.HttpResponseParser(m) 262 | try: 263 | p.feed_data(UPGRADE_RESPONSE1) 264 | except httptools.HttpParserUpgrade as ex: 265 | offset = ex.args[0] 266 | else: 267 | self.fail('HttpParserUpgrade was not raised') 268 | 269 | self.assertEqual(UPGRADE_RESPONSE1[offset:], b'data') 270 | 271 | self.assertEqual(p.get_http_version(), '1.1') 272 | self.assertEqual(p.get_status_code(), 101) 273 | 274 | m.on_status.assert_called_once_with(b'Switching Protocols') 275 | 276 | m.on_headers_complete.assert_called_once_with() 277 | self.assertEqual(m.on_header.call_count, 6) 278 | self.assertEqual(len(headers), 6) 279 | 280 | m.on_message_complete.assert_called_once_with() 281 | 282 | 283 | class TestRequestParser(unittest.TestCase): 284 | 285 | def test_parser_request_chunked_1(self): 286 | m = mock.Mock() 287 | p = httptools.HttpRequestParser(m) 288 | 289 | p.feed_data(CHUNKED_REQUEST1_1) 290 | self.assertEqual(p.get_method(), b'POST') 291 | 292 | m.on_message_begin.assert_called_once_with() 293 | 294 | m.on_url.assert_called_once_with(b'/test.php?a=b+c') 295 | self.assertEqual(p.get_http_version(), '1.1') 296 | 297 | m.on_header.assert_called_with(b'Transfer-Encoding', b'chunked') 298 | m.on_chunk_header.assert_called_with() 299 | m.on_chunk_complete.assert_called_with() 300 | 301 | self.assertFalse(m.on_message_complete.called) 302 | m.on_message_begin.assert_called_once_with() 303 | 304 | m.reset_mock() 305 | p.feed_data(CHUNKED_REQUEST1_2) 306 | 307 | m.on_chunk_header.assert_called_with() 308 | m.on_chunk_complete.assert_called_with() 309 | m.on_header.assert_called_with(b'User-Agent', b'spam') 310 | self.assertEqual(m.on_header.call_count, 2) 311 | 312 | self.assertFalse(m.on_message_begin.called) 313 | 314 | m.on_message_complete.assert_called_once_with() 315 | 316 | def test_parser_request_chunked_2(self): 317 | m = mock.Mock() 318 | 319 | headers = {} 320 | m.on_header.side_effect = headers.__setitem__ 321 | 322 | m.on_url = None 323 | m.on_body = None 324 | m.on_headers_complete = None 325 | m.on_chunk_header = None 326 | m.on_chunk_complete = None 327 | 328 | p = httptools.HttpRequestParser(m) 329 | p.feed_data(CHUNKED_REQUEST1_1) 330 | p.feed_data(CHUNKED_REQUEST1_2) 331 | 332 | self.assertEqual( 333 | headers, 334 | {b'User-Agent': b'spam', 335 | b'Transfer-Encoding': b'chunked', 336 | b'Host': b'bar', 337 | b'Vary': b'*'}) 338 | 339 | def test_parser_request_chunked_cb_error_1(self): 340 | class Error(Exception): 341 | pass 342 | 343 | m = mock.Mock() 344 | m.on_chunk_header.side_effect = Error() 345 | 346 | p = httptools.HttpRequestParser(m) 347 | try: 348 | p.feed_data(CHUNKED_REQUEST1_1) 349 | except httptools.HttpParserCallbackError as ex: 350 | self.assertIsInstance(ex.__context__, Error) 351 | else: 352 | self.fail('HttpParserCallbackError was not raised') 353 | 354 | def test_parser_request_chunked_cb_error_2(self): 355 | class Error(Exception): 356 | pass 357 | 358 | m = mock.Mock() 359 | m.on_chunk_complete.side_effect = Error() 360 | 361 | p = httptools.HttpRequestParser(m) 362 | try: 363 | p.feed_data(CHUNKED_REQUEST1_1) 364 | except httptools.HttpParserCallbackError as ex: 365 | self.assertIsInstance(ex.__context__, Error) 366 | else: 367 | self.fail('HttpParserCallbackError was not raised') 368 | 369 | def test_parser_request_chunked_3(self): 370 | m = mock.Mock() 371 | p = httptools.HttpRequestParser(m) 372 | 373 | p.feed_data(CHUNKED_REQUEST1_3) 374 | 375 | self.assertEqual(p.get_method(), b'POST') 376 | 377 | m.on_url.assert_called_once_with(b'/test.php?a=b+c') 378 | self.assertEqual(p.get_http_version(), '1.1') 379 | 380 | m.on_header.assert_called_with(b'Transfer-Encoding', b'chunked') 381 | m.on_chunk_header.assert_called_with() 382 | m.on_chunk_complete.assert_called_with() 383 | 384 | self.assertTrue(m.on_message_complete.called) 385 | 386 | def test_parser_request_upgrade_1(self): 387 | m = mock.Mock() 388 | 389 | headers = {} 390 | m.on_header.side_effect = headers.__setitem__ 391 | 392 | p = httptools.HttpRequestParser(m) 393 | 394 | try: 395 | p.feed_data(UPGRADE_REQUEST1) 396 | except httptools.HttpParserUpgrade as ex: 397 | offset = ex.args[0] 398 | else: 399 | self.fail('HttpParserUpgrade was not raised') 400 | 401 | self.assertEqual(UPGRADE_REQUEST1[offset:], b'Hot diggity dogg') 402 | 403 | self.assertEqual(headers, { 404 | b'Sec-WebSocket-Key2': b'12998 5 Y3 1 .P00', 405 | b'Sec-WebSocket-Key1': b'4 @1 46546xW%0l 1 5', 406 | b'Connection': b'Upgrade', 407 | b'Origin': b'http://example.com', 408 | b'Sec-WebSocket-Protocol': b'sample', 409 | b'Host': b'example.com', 410 | b'Upgrade': b'WebSocket'}) 411 | 412 | # The parser can be used again for further parsing - this is a legacy 413 | # behavior from the time we were still using http-parser. 414 | p.feed_data(CHUNKED_REQUEST1_1) 415 | self.assertEqual(p.get_method(), b'POST') 416 | 417 | def test_parser_request_upgrade_flag(self): 418 | 419 | class Protocol: 420 | 421 | def __init__(self): 422 | self.parser = httptools.HttpRequestParser(self) 423 | 424 | def on_url(self, url): 425 | assert self.parser.should_upgrade() is False 426 | 427 | def on_headers_complete(self): 428 | assert self.parser.should_upgrade() is True 429 | 430 | def on_message_complete(self): 431 | assert self.parser.should_upgrade() is True 432 | 433 | protocol = Protocol() 434 | try: 435 | protocol.parser.feed_data(UPGRADE_REQUEST1) 436 | except httptools.HttpParserUpgrade: 437 | # Raise as usual. 438 | pass 439 | else: 440 | self.fail('HttpParserUpgrade was not raised') 441 | 442 | def test_parser_request_error_in_on_header(self): 443 | class Error(Exception): 444 | pass 445 | m = mock.Mock() 446 | m.on_header.side_effect = Error() 447 | p = httptools.HttpRequestParser(m) 448 | 449 | try: 450 | p.feed_data(UPGRADE_REQUEST1) 451 | except httptools.HttpParserCallbackError as ex: 452 | self.assertIsInstance(ex.__context__, Error) 453 | else: 454 | self.fail('HttpParserCallbackError was not raised') 455 | 456 | def test_parser_request_error_in_on_message_begin(self): 457 | class Error(Exception): 458 | pass 459 | m = mock.Mock() 460 | m.on_message_begin.side_effect = Error() 461 | p = httptools.HttpRequestParser(m) 462 | 463 | try: 464 | p.feed_data(UPGRADE_REQUEST1) 465 | except httptools.HttpParserCallbackError as ex: 466 | self.assertIsInstance(ex.__context__, Error) 467 | else: 468 | self.fail('HttpParserCallbackError was not raised') 469 | 470 | def test_parser_request_error_in_cb_on_url(self): 471 | class Error(Exception): 472 | pass 473 | m = mock.Mock() 474 | m.on_url.side_effect = Error() 475 | p = httptools.HttpRequestParser(m) 476 | 477 | try: 478 | p.feed_data(UPGRADE_REQUEST1) 479 | except httptools.HttpParserCallbackError as ex: 480 | self.assertIsInstance(ex.__context__, Error) 481 | else: 482 | self.fail('HttpParserCallbackError was not raised') 483 | 484 | def test_parser_request_error_in_cb_on_headers_complete(self): 485 | class Error(Exception): 486 | pass 487 | m = mock.Mock() 488 | m.on_headers_complete.side_effect = Error() 489 | p = httptools.HttpRequestParser(m) 490 | 491 | try: 492 | p.feed_data(UPGRADE_REQUEST1) 493 | except httptools.HttpParserCallbackError as ex: 494 | self.assertIsInstance(ex.__context__, Error) 495 | else: 496 | self.fail('HttpParserCallbackError was not raised') 497 | 498 | def test_parser_request_2(self): 499 | p = httptools.HttpRequestParser(None) 500 | with self.assertRaises(httptools.HttpParserInvalidMethodError): 501 | p.feed_data(b'SPAM /test.php?a=b+c HTTP/1.1') 502 | 503 | def test_parser_request_3(self): 504 | p = httptools.HttpRequestParser(None) 505 | with self.assertRaises(httptools.HttpParserInvalidURLError): 506 | p.feed_data(b'POST HTTP/1.1') 507 | 508 | def test_parser_request_4(self): 509 | p = httptools.HttpRequestParser(None) 510 | with self.assertRaisesRegex(TypeError, 'a bytes-like object'): 511 | p.feed_data('POST HTTP/1.1') 512 | 513 | def test_parser_request_fragmented(self): 514 | m = mock.Mock() 515 | headers = {} 516 | m.on_header.side_effect = headers.__setitem__ 517 | p = httptools.HttpRequestParser(m) 518 | 519 | REQUEST = ( 520 | b'PUT / HTTP/1.1\r\nHost: localhost:1234\r\nContent-Type: text/pl', 521 | b'ain; charset=utf-8\r\nX-Empty-Header: \r\nConnection: close\r\n', 522 | b'Content-Length: 10\r\n\r\n1234567890', 523 | ) 524 | 525 | p.feed_data(REQUEST[0]) 526 | 527 | m.on_message_begin.assert_called_once_with() 528 | m.on_url.assert_called_once_with(b'/') 529 | self.assertEqual(headers, {b'Host': b'localhost:1234'}) 530 | 531 | p.feed_data(REQUEST[1]) 532 | self.assertEqual( 533 | headers, 534 | {b'Host': b'localhost:1234', 535 | b'Content-Type': b'text/plain; charset=utf-8', 536 | b'X-Empty-Header': b''}) 537 | 538 | p.feed_data(REQUEST[2]) 539 | self.assertEqual( 540 | headers, 541 | {b'Host': b'localhost:1234', 542 | b'Content-Type': b'text/plain; charset=utf-8', 543 | b'X-Empty-Header': b'', 544 | b'Connection': b'close', 545 | b'Content-Length': b'10'}) 546 | m.on_message_complete.assert_called_once_with() 547 | 548 | def test_parser_request_fragmented_header(self): 549 | m = mock.Mock() 550 | headers = {} 551 | m.on_header.side_effect = headers.__setitem__ 552 | p = httptools.HttpRequestParser(m) 553 | 554 | REQUEST = ( 555 | b'PUT / HTTP/1.1\r\nHost: localhost:1234\r\nContent-', 556 | b'Type: text/plain; charset=utf-8\r\n\r\n', 557 | ) 558 | 559 | p.feed_data(REQUEST[0]) 560 | 561 | m.on_message_begin.assert_called_once_with() 562 | m.on_url.assert_called_once_with(b'/') 563 | self.assertEqual(headers, {b'Host': b'localhost:1234'}) 564 | 565 | p.feed_data(REQUEST[1]) 566 | self.assertEqual( 567 | headers, 568 | {b'Host': b'localhost:1234', 569 | b'Content-Type': b'text/plain; charset=utf-8'}) 570 | 571 | def test_parser_request_fragmented_value(self): 572 | m = mock.Mock() 573 | headers = {} 574 | m.on_header.side_effect = headers.__setitem__ 575 | p = httptools.HttpRequestParser(m) 576 | 577 | REQUEST = ( 578 | b'PUT / HTTP/1.1\r\nHost: localhost:1234\r\nContent-Type:', 579 | b' text/pla', 580 | b'in; chars', 581 | b'et=utf-8\r\n\r\n', 582 | ) 583 | 584 | p.feed_data(REQUEST[0]) 585 | 586 | m.on_message_begin.assert_called_once_with() 587 | m.on_url.assert_called_once_with(b'/') 588 | self.assertEqual(headers, {b'Host': b'localhost:1234'}) 589 | 590 | p.feed_data(REQUEST[1]) 591 | p.feed_data(REQUEST[2]) 592 | p.feed_data(REQUEST[3]) 593 | self.assertEqual( 594 | headers, 595 | {b'Host': b'localhost:1234', 596 | b'Content-Type': b'text/plain; charset=utf-8'}) 597 | 598 | def test_parser_request_fragmented_bytes(self): 599 | m = mock.Mock() 600 | headers = {} 601 | m.on_header.side_effect = headers.__setitem__ 602 | p = httptools.HttpRequestParser(m) 603 | 604 | REQUEST = \ 605 | b'PUT / HTTP/1.1\r\nHost: localhost:1234\r\nContent-' \ 606 | b'Type: text/plain; charset=utf-8\r\n\r\n' 607 | 608 | step = 1 609 | for i in range(0, len(REQUEST), step): 610 | p.feed_data(REQUEST[i:i+step]) 611 | 612 | self.assertEqual( 613 | headers, 614 | {b'Host': b'localhost:1234', 615 | b'Content-Type': b'text/plain; charset=utf-8'}) 616 | 617 | 618 | class TestUrlParser(unittest.TestCase): 619 | 620 | def parse(self, url:bytes): 621 | parsed = httptools.parse_url(url) 622 | return (parsed.schema, parsed.host, parsed.port, parsed.path, 623 | parsed.query, parsed.fragment, parsed.userinfo) 624 | 625 | def test_parser_url_1(self): 626 | self.assertEqual( 627 | self.parse(b'dsf://aaa/b/c?aa#123'), 628 | (b'dsf', b'aaa', None, b'/b/c', b'aa', b'123', None)) 629 | 630 | self.assertEqual( 631 | self.parse(b'dsf://i:n@aaa:88/b/c?aa#123'), 632 | (b'dsf', b'aaa', 88, b'/b/c', b'aa', b'123', b'i:n')) 633 | 634 | self.assertEqual( 635 | self.parse(b'////'), 636 | (None, None, None, b'////', None, None, None)) 637 | 638 | self.assertEqual( 639 | self.parse(b'////1/1?a=b&c[]=d&c[]=z'), 640 | (None, None, None, b'////1/1', b'a=b&c[]=d&c[]=z', None, None)) 641 | 642 | self.assertEqual( 643 | self.parse(b'/////?#123'), 644 | (None, None, None, b'/////', None, b'123', None)) 645 | 646 | self.assertEqual( 647 | self.parse(b'/a/b/c?b=1&'), 648 | (None, None, None, b'/a/b/c', b'b=1&', None, None)) 649 | 650 | def test_parser_url_2(self): 651 | with self.assertRaises(httptools.HttpParserInvalidURLError): 652 | self.parse(b'') 653 | 654 | def test_parser_url_3(self): 655 | with self.assertRaises(httptools.HttpParserInvalidURLError): 656 | self.parse(b' ') 657 | 658 | def test_parser_url_4(self): 659 | with self.assertRaises(httptools.HttpParserInvalidURLError): 660 | self.parse(b':///1') 661 | 662 | def test_parser_url_5(self): 663 | self.assertEqual( 664 | self.parse(b'http://[1:2::3:4]:67/'), 665 | (b'http', b'1:2::3:4', 67, b'/', None, None, None)) 666 | 667 | def test_parser_url_6(self): 668 | self.assertEqual( 669 | self.parse(bytearray(b'/')), 670 | (None, None, None, b'/', None, None, None)) 671 | 672 | def test_parser_url_7(self): 673 | url = httptools.parse_url(b'/') 674 | with self.assertRaisesRegex(AttributeError, 'not writable'): 675 | url.port = 0 676 | 677 | def test_parser_url_8(self): 678 | with self.assertRaises(TypeError): 679 | httptools.parse_url(None) 680 | 681 | def test_parser_url_9(self): 682 | with self.assertRaisesRegex(httptools.HttpParserInvalidURLError, 683 | r'a\\x00aa'): 684 | self.parse(b'dsf://a\x00aa') 685 | 686 | def test_parser_url_10(self): 687 | with self.assertRaisesRegex(TypeError, 'a bytes-like object'): 688 | self.parse('dsf://aaa') 689 | --------------------------------------------------------------------------------