├── .gitattributes ├── .github ├── dependabot.yml └── workflows │ ├── ci.yml │ ├── codeql-analysis.yml │ └── wheels.yml ├── .gitignore ├── .gitmodules ├── .readthedocs.yaml ├── CHANGELOG.md ├── DerivedGeneralCategory.txt.sha ├── LICENSE.Apache ├── LICENSE.MIT ├── MANIFEST.in ├── Makefile ├── README.rst ├── docs ├── changelog.md ├── conf.py ├── decoder.rst ├── encoder.rst ├── exceptions.rst ├── index.rst └── performance.rst ├── pyjson5.pyx ├── pyproject.toml ├── requirements-dev.txt ├── requirements-readthedocs.txt ├── scripts ├── make_decoder_recursive_select.py ├── make_escape_dct.py ├── make_unicode_categories.py ├── run-minefield-test.py ├── run-tests.py ├── sha512sum.py └── transcode-to-json.py ├── setup.cfg ├── setup.py └── src ├── DESCRIPTION.inc ├── VERSION.inc ├── _constants.pyx ├── _decoder.pyx ├── _decoder_recursive_select.hpp ├── _encoder.pyx ├── _encoder_options.pyx ├── _escape_dct.hpp ├── _exceptions.pyx ├── _exceptions_decoder.pyx ├── _exceptions_encoder.pyx ├── _exports.pyx ├── _imports.pyx ├── _legacy.pyx ├── _raise_decoder.pyx ├── _raise_encoder.pyx ├── _reader_callback.pyx ├── _reader_ucs.pyx ├── _readers.pyx ├── _stack_heap_string.hpp ├── _unicode.pyx ├── _unicode_cat_of.hpp ├── _writer_callback.pyx ├── _writer_noop.pyx ├── _writer_reallocatable.pyx ├── _writers.pyx ├── dragonbox.cc ├── native.hpp └── pyjson5 ├── __init__.py ├── __init__.pyi └── py.typed /.gitattributes: -------------------------------------------------------------------------------- 1 | src/dragonbox.cc linguist-vendored 2 | src/*.hpp linguist-vendored 3 | third-party/** linguist-vendored 4 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: "pip" 4 | directory: "/" 5 | schedule: 6 | interval: "daily" 7 | 8 | - package-ecosystem: "github-actions" 9 | directory: "/" 10 | schedule: 11 | interval: "daily" 12 | 13 | - package-ecosystem: "gitsubmodule" 14 | directory: "/" 15 | schedule: 16 | interval: "daily" 17 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: [push] 4 | 5 | jobs: 6 | build: 7 | runs-on: ${{ matrix.os }} 8 | strategy: 9 | matrix: 10 | os: 11 | - ubuntu-latest 12 | - macos-latest 13 | - windows-latest 14 | python: 15 | - '3.10' 16 | - '3.13' 17 | 18 | steps: 19 | - uses: actions/checkout@v4 20 | with: 21 | submodules: true 22 | 23 | - name: Cache pip 24 | uses: actions/cache@v4 25 | with: 26 | key: cache--${{ matrix.os }}--${{ matrix.python }}--${{ hashFiles('./requirements*.txt', './Makefile') }} 27 | restore-keys: cache--${{ matrix.os }}--${{ matrix.python }}-- 28 | path: ~/.cache/pip 29 | 30 | - name: Setup python 31 | uses: actions/setup-python@v5 32 | with: 33 | python-version: ${{ matrix.python }} 34 | 35 | - name: Display Python version 36 | run: python -c 'import sys; print(sys.version)' 37 | 38 | - name: Update pip 39 | run: python -m pip install -U pip wheel setuptools 40 | 41 | - name: Install requirements 42 | run: python -m pip install -Ur requirements-dev.txt 43 | 44 | - name: Compile project 45 | run: make install 46 | 47 | - name: Run JSON5 tests suite 48 | run: python scripts/run-tests.py 49 | 50 | - name: Run "JSON is a Minefield" suite 51 | run: python scripts/run-minefield-test.py 52 | 53 | lint: 54 | runs-on: ubuntu-latest 55 | 56 | steps: 57 | - uses: actions/checkout@v4 58 | with: 59 | submodules: true 60 | 61 | - name: Cache pip 62 | uses: actions/cache@v4 63 | with: 64 | key: lint--${{ hashFiles('./requirements*.txt', './Makefile') }} 65 | restore-keys: lint-- 66 | path: ~/.cache/pip 67 | 68 | - name: Setup python 69 | uses: actions/setup-python@v5 70 | with: 71 | python-version: '3.13' 72 | 73 | - name: Display Python version 74 | run: python -c 'import sys; print(sys.version)' 75 | 76 | - name: Update pip 77 | run: python -m pip install -U pip wheel setuptools 78 | 79 | - name: Install requirements 80 | run: python -m pip install -Ur requirements-dev.txt 81 | 82 | - name: Compile project 83 | run: make install 84 | -------------------------------------------------------------------------------- /.github/workflows/codeql-analysis.yml: -------------------------------------------------------------------------------- 1 | name: "CodeQL" 2 | 3 | on: 4 | push: 5 | branches: [ master ] 6 | pull_request: 7 | branches: [ master ] 8 | schedule: 9 | - cron: '41 21 * * 3' 10 | 11 | jobs: 12 | analyze: 13 | name: Analyze 14 | runs-on: ubuntu-latest 15 | permissions: 16 | actions: read 17 | contents: read 18 | security-events: write 19 | 20 | strategy: 21 | fail-fast: false 22 | matrix: 23 | language: [ 'cpp', 'python' ] 24 | 25 | steps: 26 | - name: Checkout repository 27 | uses: actions/checkout@v4 28 | with: 29 | submodules: true 30 | 31 | - name: Cache pip 32 | uses: actions/cache@v4 33 | with: 34 | key: codeql-analysis--${{ github.event.inputs.os }}--${{ github.event.inputs.python }}--${{ hashFiles('./requirements-dev.txt') }} 35 | path: ~/.cache/pip 36 | 37 | - name: Setup python 38 | uses: actions/setup-python@v5 39 | with: 40 | python-version: ${{ github.event.inputs.python }} 41 | 42 | - name: Initialize CodeQL 43 | uses: github/codeql-action/init@v3 44 | with: 45 | languages: ${{ matrix.language }} 46 | 47 | - name: Update pip 48 | run: python -m pip install -U pip wheel setuptools 49 | 50 | - name: Install requirements 51 | run: python -m pip install -Ur requirements-dev.txt 52 | 53 | - name: Compile 54 | run: make wheel 55 | 56 | - name: Perform CodeQL Analysis 57 | uses: github/codeql-action/analyze@v3 58 | -------------------------------------------------------------------------------- /.github/workflows/wheels.yml: -------------------------------------------------------------------------------- 1 | name: Build Wheels 2 | 3 | on: 4 | workflow_dispatch: 5 | 6 | jobs: 7 | linux-define-matrix: 8 | runs-on: ubuntu-latest 9 | outputs: 10 | matrix: ${{ steps.set-matrix.outputs.matrix }} 11 | steps: 12 | - uses: actions/checkout@v4 13 | with: 14 | submodules: true 15 | 16 | - name: Setup python 17 | uses: actions/setup-python@v5 18 | with: 19 | python-version: '3.13' 20 | 21 | - run: python -m pip install -U pip wheel setuptools 22 | - run: python -m pip install -U 'cibuildwheel==2.*' 23 | 24 | - id: set-matrix 25 | run: | 26 | TARGETS="$(python -m cibuildwheel --archs "x86_64 i686 aarch64 ppc64le s390x armv7l" --print-build-identifiers)" 27 | echo 'matrix=["'$(echo $TARGETS | sed -e 's/ /","/g')'"]' >> $GITHUB_OUTPUT 28 | shell: bash 29 | env: 30 | CIBW_BUILD_FRONTEND: build 31 | CIBW_SKIP: 'cp27-* cp36-* pp*' 32 | CIBW_DEPENDENCY_VERSIONS: pinned 33 | CIBW_PLATFORM: linux 34 | 35 | macos-define-matrix: 36 | runs-on: macos-13 37 | outputs: 38 | matrix: ${{ steps.set-matrix.outputs.matrix }} 39 | steps: 40 | - uses: actions/checkout@v4 41 | with: 42 | submodules: true 43 | 44 | - name: Setup python 45 | uses: actions/setup-python@v5 46 | with: 47 | python-version: '3.13' 48 | 49 | - run: python -m pip install -U pip wheel setuptools 50 | - run: python -m pip install -U 'cibuildwheel==2.*' 51 | 52 | - id: set-matrix 53 | run: | 54 | TARGETS="$(python -m cibuildwheel --archs "x86_64 arm64 universal2" --print-build-identifiers)" 55 | echo 'matrix=["'$(echo $TARGETS | sed -e 's/ /","/g')'"]' >> $GITHUB_OUTPUT 56 | shell: bash 57 | env: 58 | CIBW_BUILD_FRONTEND: build 59 | CIBW_SKIP: 'cp27-* cp36-* pp*' 60 | CIBW_DEPENDENCY_VERSIONS: pinned 61 | CIBW_PLATFORM: macos 62 | 63 | windows-define-matrix: 64 | runs-on: windows-2022 65 | outputs: 66 | matrix: ${{ steps.set-matrix.outputs.matrix }} 67 | steps: 68 | - uses: actions/checkout@v4 69 | with: 70 | submodules: true 71 | 72 | - name: Setup python 73 | uses: actions/setup-python@v5 74 | with: 75 | python-version: '3.13' 76 | 77 | - run: python -m pip install -U pip wheel setuptools 78 | - run: python -m pip install -U 'cibuildwheel==2.*' 79 | 80 | - id: set-matrix 81 | run: | 82 | TARGETS="$(python -m cibuildwheel --archs "AMD64 x86 ARM64" --print-build-identifiers)" 83 | echo 'matrix=["'$(echo $TARGETS | sed -e 's/ /","/g')'"]' >> $GITHUB_OUTPUT 84 | shell: bash 85 | env: 86 | CIBW_BUILD_FRONTEND: build 87 | CIBW_SKIP: 'cp27-* cp36-* pp*' 88 | CIBW_DEPENDENCY_VERSIONS: pinned 89 | CIBW_PLATFORM: windows 90 | 91 | linux-build: 92 | runs-on: ubuntu-latest 93 | 94 | needs: 95 | - linux-define-matrix 96 | strategy: 97 | matrix: 98 | only: ${{ fromJSON(needs.linux-define-matrix.outputs.matrix) }} 99 | 100 | steps: 101 | - uses: actions/checkout@v4 102 | with: 103 | submodules: true 104 | 105 | - name: Set up QEMU 106 | uses: docker/setup-qemu-action@v3 107 | with: 108 | platforms: all 109 | 110 | - name: Cache pip 111 | uses: actions/cache@v4 112 | with: 113 | key: linux--${{ hashFiles('./requirements-dev.txt') }} 114 | path: ~/.cache/pip 115 | 116 | - name: Setup python 117 | uses: actions/setup-python@v5 118 | with: 119 | python-version: '3.13' 120 | 121 | - run: python -m pip install -U pip wheel setuptools 122 | - run: python -m pip install -Ur requirements-dev.txt 123 | - run: python -m pip install -U 'cibuildwheel==2.*' 124 | 125 | - run: make prepare 126 | 127 | - run: python -m cibuildwheel --output-dir wheelhouse --only ${{ matrix.only }} 128 | env: 129 | CIBW_BUILD_FRONTEND: build 130 | CIBW_SKIP: 'cp27-* pp*' 131 | CIBW_DEPENDENCY_VERSIONS: pinned 132 | CIBW_PLATFORM: linux 133 | CIBW_TEST_COMMAND: python {project}/scripts/run-tests.py 134 | 135 | - uses: actions/upload-artifact@v4 136 | with: 137 | name: ${{ matrix.only }} 138 | path: ./wheelhouse 139 | retention-days: 1 140 | 141 | macos-build: 142 | runs-on: macos-13 143 | 144 | needs: 145 | - macos-define-matrix 146 | strategy: 147 | matrix: 148 | only: ${{ fromJSON(needs.macos-define-matrix.outputs.matrix) }} 149 | 150 | steps: 151 | - uses: actions/checkout@v4 152 | with: 153 | submodules: true 154 | 155 | - name: Cache pip 156 | uses: actions/cache@v4 157 | with: 158 | key: windows--${{ hashFiles('./requirements-dev.txt') }} 159 | path: ~/.cache/pip 160 | 161 | - name: Setup python 162 | uses: actions/setup-python@v5 163 | with: 164 | python-version: '3.13' 165 | 166 | - run: python -m pip install -U pip wheel setuptools 167 | - run: python -m pip install -Ur requirements-dev.txt 168 | - run: python -m pip install -U 'cibuildwheel==2.*' 169 | 170 | - run: make prepare 171 | 172 | - run: python -m cibuildwheel --output-dir wheelhouse --only ${{ matrix.only }} 173 | env: 174 | CIBW_BUILD_FRONTEND: build 175 | CIBW_SKIP: 'cp27-* pp*' 176 | CIBW_DEPENDENCY_VERSIONS: pinned 177 | CIBW_PLATFORM: macos 178 | CIBW_TEST_COMMAND: python {project}/scripts/run-tests.py 179 | 180 | - uses: actions/upload-artifact@v4 181 | with: 182 | name: ${{ matrix.only }} 183 | path: ./wheelhouse 184 | retention-days: 1 185 | 186 | windows-build: 187 | runs-on: windows-2022 188 | 189 | needs: 190 | - windows-define-matrix 191 | strategy: 192 | matrix: 193 | only: ${{ fromJSON(needs.windows-define-matrix.outputs.matrix) }} 194 | 195 | steps: 196 | - uses: actions/checkout@v4 197 | with: 198 | submodules: true 199 | 200 | - name: Cache pip 201 | uses: actions/cache@v4 202 | with: 203 | key: windows--${{ hashFiles('./requirements-dev.txt') }} 204 | path: ~/.cache/pip 205 | 206 | - name: Setup python 207 | uses: actions/setup-python@v5 208 | with: 209 | python-version: '3.13' 210 | 211 | - run: python -m pip install -U pip wheel setuptools 212 | - run: python -m pip install -Ur requirements-dev.txt 213 | - run: python -m pip install -U 'cibuildwheel==2.*' 214 | 215 | - run: make prepare 216 | 217 | - run: python -m cibuildwheel --output-dir wheelhouse --only ${{ matrix.only }} 218 | env: 219 | CIBW_BUILD_FRONTEND: build 220 | CIBW_SKIP: 'cp27-* pp*' 221 | CIBW_DEPENDENCY_VERSIONS: pinned 222 | CIBW_PLATFORM: windows 223 | CIBW_TEST_COMMAND: python {project}/scripts/run-tests.py 224 | 225 | - uses: actions/upload-artifact@v4 226 | with: 227 | name: ${{ matrix.only }} 228 | path: ./wheelhouse 229 | retention-days: 1 230 | 231 | combine: 232 | runs-on: ubuntu-latest 233 | needs: 234 | - linux-build 235 | - macos-build 236 | - windows-build 237 | steps: 238 | - uses: actions/download-artifact@v4 239 | with: 240 | path: ./wheelhouse 241 | - run: | 242 | find -name '*.zip' -exec unzip '{}' ';' 243 | find -name '*.zip' -exec rm '{}' + 244 | find -name '*.whl' -exec mv -t. '{}' + 245 | find -type d -delete 246 | shell: bash 247 | working-directory: ./wheelhouse 248 | - uses: actions/upload-artifact@v4 249 | with: 250 | name: wheelhouse 251 | path: ./wheelhouse 252 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.py[cdo] 2 | *.egg-info/ 3 | 4 | /env*/ 5 | /build/ 6 | /cython_debug/ 7 | /dist/ 8 | 9 | *.c 10 | *.cpp 11 | *.so 12 | *.o 13 | 14 | run.cgi 15 | 16 | *.swp* 17 | *.nfs* 18 | *~ 19 | *.~* 20 | ~* 21 | *.tmp 22 | *.old 23 | *.bak 24 | *.pid 25 | 26 | .* 27 | !.git* 28 | !.readthedocs.yaml 29 | 30 | *.orig 31 | 32 | /DerivedGeneralCategory.txt 33 | /citylots.json 34 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "third-party/json5-tests"] 2 | path = third-party/json5-tests 3 | url = https://github.com/json5/json5-tests.git 4 | [submodule "third-party/JSONTestSuite"] 5 | path = third-party/JSONTestSuite 6 | url = https://github.com/nst/JSONTestSuite.git 7 | [submodule "third-party/fast_double_parser"] 8 | path = third-party/fast_double_parser 9 | url = https://github.com/lemire/fast_double_parser.git 10 | -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 2 | 3 | version: 2 4 | 5 | build: 6 | os: ubuntu-24.04 7 | apt_packages: 8 | - graphviz 9 | tools: 10 | python: "3.13" 11 | 12 | sphinx: 13 | configuration: docs/conf.py 14 | 15 | python: 16 | install: 17 | - requirements: requirements-readthedocs.txt 18 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | **1.6.9 (2025-05-12)** 4 | 5 | * Remove unused import to fix installation on Termux (by veka0, [#105](https://github.com/Kijewski/pyjson5/pull/105)) 6 | 7 | **1.6.8 (2025-01-03)** 8 | 9 | * Requires at least Python 3.7 10 | * Update dependencies 11 | * Relicense to MIT OR Apache-2.0 12 | 13 | **1.6.7 (2024-10-08)** 14 | 15 | * Update to Unicode 16.0.0 16 | * Update for Python 3.13 17 | 18 | **1.6.6 (2024-02-09)** 19 | 20 | * Fix return type of `load()` (by Q-ten, [#88](https://github.com/Kijewski/pyjson5/pull/88)) 21 | 22 | **1.6.5 (2023-12-04)** 23 | 24 | * Fix type hints for optional arguments 25 | 26 | **1.6.4 (2023-07-31)** 27 | 28 | * Update to Cython 3 29 | * Update for Python 3.12 30 | 31 | **1.6.3 (2023-06-24)** 32 | 33 | * Fix typing for `dump()` ([#61](https://github.com/Kijewski/pyjson5/issues/61)) 34 | 35 | **1.6.2 (2022-09-15)** 36 | 37 | * Update to Unicode 15.0.0 38 | 39 | **1.6.1 (2022-01-18)** 40 | 41 | * Fix [PEP 517](https://www.python.org/dev/peps/pep-0517/)-like installation using [build](https://github.com/pypa/build) (by [Tomasz Kłoczko](https://github.com/kloczek)) 42 | 43 | **1.6.0 (2021-11-17)** 44 | 45 | * Fallback to encode `vars(obj)` if `obj` is not stringifyable, e.g. to serialize [dataclasses](https://docs.python.org/3/library/dataclasses.html) 46 | * Update documentation to use newer [sphinx](https://www.sphinx-doc.org/) version 47 | * Use [dependabot](https://github.com/dependabot) to keep dependencies current 48 | * Update [fast_double_parser](https://github.com/lemire/fast_double_parser) 49 | 50 | **1.5.3 (2021-11-16)** 51 | 52 | * Add [PEP 484](https://www.python.org/dev/peps/pep-0484/) type hints (by [Pascal Corpet](https://github.com/pcorpet)) 53 | * Update [JSONTestSuite](https://github.com/nst/JSONTestSuite) 54 | 55 | **1.5.2 (2021-07-09)** 56 | 57 | * Add file extensions to fix compilation with current Apple SDKs 58 | * Update fast_double_parser to v0.5.0 59 | * Update to Unicode 14.0.0d18 60 | 61 | **1.5.1 (2021-05-01)** 62 | 63 | * Update up Unicode 14.0.0d9 64 | 65 | **1.5.0 (2021-03-11)** 66 | 67 | * Faster floating-point number encoding using [Junekey Jeon's Dragonbox algorithm](https://github.com/abolz/Drachennest/blob/77f4889a4cd9d7f0b9da82a379f14beabcfba13e/src/dragonbox.cc) implemented by Alexander Bolz 68 | * Removed a lot of configuration options from pyjson5.Options() 69 | 70 | **1.4.9 (2021-03-03)** 71 | 72 | * Faster floating-point number decoding using [fast_double_parser](https://github.com/lemire/fast_double_parser) by Daniel Lemire 73 | 74 | **1.4.8 (2020-12-23)** 75 | 76 | * Update up Unicode 13.0.0 77 | * Don't use non-standard ``__uint128`` 78 | * Add PyPy compatibility 79 | * Add ``decode_utf8(byte-like)`` 80 | 81 | **1.4.7 (2019-12-20)** 82 | 83 | * Allow ``\uXXXX`` sequences in identifier names 84 | * Update to Unicode 12.1.0 85 | * Optimized encoder and decoder for a little better speed 86 | * Setup basic CI environment 87 | * Parse ``\uXXXX`` in literal keys 88 | * Understand "0." 89 | * Add CI tests 90 | * Reject unescaped newlines in strings per spec 91 | * Allow overriding default quotation mark 92 | * Make Options objects pickle-able 93 | * Bump major version number 94 | 95 | **0.4.6 (2019-02-09)** 96 | 97 | * Fix PyUnicode_AsUTF8AndSize()'s signature 98 | 99 | **0.4.5 (2018-06-02)** 100 | 101 | * Don't use C++14 features, only C++11 102 | 103 | **0.4.4 (2018-05-19)** 104 | 105 | * Better documentation 106 | * Optimized encoder for a little better speed 107 | 108 | **0.4.3 (2018-05-03)** 109 | 110 | * Initial release 111 | -------------------------------------------------------------------------------- /DerivedGeneralCategory.txt.sha: -------------------------------------------------------------------------------- 1 | 993261c82681a5580aaa42c6184d61a289a1eaa48022fded929d00487066b0ed1014f35cbc0890c6db5f3cbf8ca51dd99362e088aceedf548cfb3cc8d72bb14e DerivedGeneralCategory.txt 2 | -------------------------------------------------------------------------------- /LICENSE.Apache: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /LICENSE.MIT: -------------------------------------------------------------------------------- 1 | Permission is hereby granted, free of charge, to any person obtaining a copy 2 | of this software and associated documentation files (the "Software"), to deal 3 | in the Software without restriction, including without limitation the rights 4 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 5 | copies of the Software, and to permit persons to whom the Software is 6 | furnished to do so, subject to the following conditions: 7 | 8 | The above copyright notice and this permission notice shall be included in all 9 | copies or substantial portions of the Software. 10 | 11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 12 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 13 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 14 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 15 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 16 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 17 | SOFTWARE. 18 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include CHANGELOG.md 2 | include LICENSE* 3 | include Makefile 4 | include pyjson5.cpp 5 | include pyjson5.pyx 6 | include pyproject.toml 7 | include third-party/fast_double_parser/include/fast_double_parser.h 8 | include requirements*.txt 9 | recursive-include docs ** 10 | recursive-include scripts ** 11 | recursive-include src ** 12 | recursive-include third-party/json5-tests ** 13 | recursive-include third-party/JSONTestSuite/test_parsing ** 14 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | all: sdist wheel docs 2 | 3 | .DELETE_ON_ERROR: 4 | 5 | .PHONY: all sdist wheel clean docs prepare test install 6 | 7 | export PYTHONUTF8 := 1 8 | export PYTHONIOENCODING := UTF-8 9 | 10 | INCLUDES := \ 11 | src/VERSION.inc src/DESCRIPTION.inc \ 12 | src/_decoder_recursive_select.hpp src/_unicode_cat_of.hpp \ 13 | src/_escape_dct.hpp src/_stack_heap_string.hpp src/native.hpp \ 14 | src/dragonbox.cc 15 | 16 | FILES := Makefile MANIFEST.in pyjson5.pyx README.rst pyproject.toml ${INCLUDES} 17 | 18 | DerivedGeneralCategory.txt: DerivedGeneralCategory.txt.sha 19 | curl -s -o $@ https://www.unicode.org/Public/16.0.0/ucd/extracted/DerivedGeneralCategory.txt 20 | python scripts/sha512sum.py -c $@.sha 21 | 22 | src/_unicode_cat_of.hpp: DerivedGeneralCategory.txt scripts/make_unicode_categories.py 23 | python scripts/make_unicode_categories.py $< $@ 24 | 25 | src/_decoder_recursive_select.py.hpp: scripts/make_decoder_recursive_select.py 26 | python $< $@ 27 | 28 | src/_escape_dct.hpp: scripts/make_escape_dct.py 29 | python $< $@ 30 | 31 | pyjson5.cpp: pyjson5.pyx $(wildcard src/*.pyx) $(wildcard src/*.hpp) 32 | python -m cython -f -o $@ $< 33 | 34 | prepare: pyjson5.cpp ${FILES} 35 | 36 | sdist: prepare 37 | -rm -- dist/pyjson5-*.tar.gz 38 | python -m build --sdist 39 | 40 | wheel: prepare 41 | -rm -- dist/pyjson5-*.whl 42 | python -m build --wheel 43 | 44 | install: wheel 45 | pip install --force dist/pyjson5-*.whl 46 | 47 | docs: install $(wildcard docs/* docs/*/*) 48 | python -m sphinx -M html docs/ dist/ 49 | 50 | clean: 51 | [ ! -d build/ ] || rm -r -- build/ 52 | [ ! -d dist/ ] || rm -r -- dist/ 53 | [ ! -d pyjson5.egg-info/ ] || rm -r -- pyjson5.egg-info/ 54 | -rm -- pyjson5.*.so python5.cpp 55 | 56 | test: wheel 57 | pip install --force dist/pyjson5-*.whl 58 | python scripts/run-minefield-test.py 59 | python scripts/run-tests.py 60 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | PyJSON5 2 | ========== 3 | 4 | A JSON5 serializer and parser library for Python 3 written in 5 | `Cython `_. 6 | 7 | 8 | Serializer 9 | ---------- 10 | 11 | The serializer returns ASCII data that can safely be used in an HTML template. 12 | Apostrophes, ampersands, greater-than, and less-then signs are encoded as 13 | unicode escaped sequences. E.g. this snippet is safe for any and all input: 14 | 15 | .. code:: html 16 | 17 | "show message" 18 | 19 | Unless the input contains infinite or NaN values, the result will be valid 20 | `JSON `_ data. 21 | 22 | 23 | Parser 24 | ------ 25 | 26 | All valid `JSON5 1.0.0 `_ and 27 | `JSON `_ data can be read, 28 | unless the nesting level is absurdly high. 29 | 30 | Functions 31 | --------- 32 | 33 | You can find the full documentation online at https://pyjson5.readthedocs.io/en/latest/. 34 | Or simply call ``help(pyjson5)``. :-) 35 | 36 | The library supplies load(s) and dump(s) functions, so you can use it as a 37 | drop-in replacement for Python's builtin ``json`` module, but you *should* 38 | use the functions ``encode_*()`` and ``decode_*()`` instead. 39 | 40 | Compatibility 41 | ------------- 42 | 43 | At least CPython 3.7 or a recent Pypy3 version is needed. 44 | -------------------------------------------------------------------------------- /docs/changelog.md: -------------------------------------------------------------------------------- 1 | ../CHANGELOG.md -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | 5 | sys.path.insert(0, os.path.abspath('..')) 6 | 7 | extensions = [ 8 | 'sphinx.ext.autodoc', 9 | 'sphinx.ext.autosectionlabel', 10 | 'sphinx.ext.autosummary', 11 | 'sphinx.ext.graphviz', 12 | 'sphinx.ext.napoleon', 13 | 'sphinx.ext.intersphinx', 14 | 'sphinx.ext.inheritance_diagram', 15 | 'sphinx_autodoc_typehints', 16 | 'sphinx.ext.autosectionlabel', 17 | 'myst_parser', 18 | ] 19 | 20 | templates_path = ['_templates'] 21 | source_suffix = '.rst' 22 | master_doc = 'index' 23 | 24 | project = 'PyJSON5' 25 | copyright = '2018-2025, René Kijewski' 26 | author = 'René Kijewski' 27 | 28 | with open('../src/VERSION.inc', 'rt') as f: 29 | version = eval(f.read().strip()) 30 | release = version 31 | 32 | language = 'en' 33 | exclude_patterns = [] 34 | pygments_style = 'sphinx' 35 | todo_include_todos = False 36 | 37 | html_theme = 'furo' 38 | htmlhelp_basename = 'PyJSON5doc' 39 | 40 | display_toc = True 41 | autodoc_default_flags = ['members'] 42 | autosummary_generate = True 43 | 44 | intersphinx_mapping = { 45 | 'python': ('https://docs.python.org/3.13', None), 46 | } 47 | 48 | graphviz_output_format = 'svg' 49 | 50 | inheritance_graph_attrs = { 51 | 'size': '"8.0, 10.0"', 52 | 'fontsize': 32, 53 | 'bgcolor': 'lightgrey', 54 | } 55 | inheritance_node_attrs = { 56 | 'color': 'black', 57 | 'fillcolor': 'white', 58 | 'style': '"filled,solid"', 59 | } 60 | inheritance_edge_attrs = { 61 | 'penwidth': 1.5, 62 | 'arrowsize': 1.2, 63 | } 64 | -------------------------------------------------------------------------------- /docs/decoder.rst: -------------------------------------------------------------------------------- 1 | Parser / Decoder 2 | ================ 3 | 4 | All valid `JSON5 1.0.0 `_ and 5 | `JSON `_ data can be read, 6 | unless the nesting level is absurdly high. 7 | 8 | 9 | Quick Decoder Summary 10 | --------------------- 11 | 12 | .. autosummary:: 13 | 14 | ~pyjson5.decode 15 | ~pyjson5.decode_latin1 16 | ~pyjson5.decode_buffer 17 | ~pyjson5.decode_callback 18 | ~pyjson5.decode_io 19 | ~pyjson5.load 20 | ~pyjson5.loads 21 | ~pyjson5.Json5DecoderException 22 | ~pyjson5.Json5NestingTooDeep 23 | ~pyjson5.Json5EOF 24 | ~pyjson5.Json5IllegalCharacter 25 | ~pyjson5.Json5ExtraData 26 | ~pyjson5.Json5IllegalType 27 | 28 | 29 | Full Decoder Description 30 | ------------------------ 31 | 32 | .. autofunction:: pyjson5.decode 33 | 34 | .. autofunction:: pyjson5.decode_latin1 35 | 36 | .. autofunction:: pyjson5.decode_buffer 37 | 38 | .. autofunction:: pyjson5.decode_callback 39 | 40 | .. autofunction:: pyjson5.decode_io 41 | 42 | 43 | Decoder Compatibility Functions 44 | ------------------------------- 45 | 46 | .. autofunction:: pyjson5.load 47 | 48 | .. autofunction:: pyjson5.loads 49 | 50 | 51 | Decoder Exceptions 52 | ------------------ 53 | 54 | .. inheritance-diagram:: 55 | pyjson5.Json5DecoderException 56 | pyjson5.Json5NestingTooDeep 57 | pyjson5.Json5EOF 58 | pyjson5.Json5IllegalCharacter 59 | pyjson5.Json5ExtraData 60 | pyjson5.Json5IllegalType 61 | 62 | .. autoexception:: pyjson5.Json5DecoderException 63 | :members: 64 | :inherited-members: 65 | 66 | .. autoexception:: pyjson5.Json5NestingTooDeep 67 | :members: 68 | :inherited-members: 69 | 70 | .. autoexception:: pyjson5.Json5EOF 71 | :members: 72 | :inherited-members: 73 | 74 | .. autoexception:: pyjson5.Json5IllegalCharacter 75 | :members: 76 | :inherited-members: 77 | 78 | .. autoexception:: pyjson5.Json5ExtraData 79 | :members: 80 | :inherited-members: 81 | 82 | .. autoexception:: pyjson5.Json5IllegalType 83 | :members: 84 | :inherited-members: 85 | -------------------------------------------------------------------------------- /docs/encoder.rst: -------------------------------------------------------------------------------- 1 | Serializer / Encoder 2 | ==================== 3 | 4 | The serializer returns ASCII data that can safely be used in an HTML template. 5 | Apostrophes, ampersands, greater-than, and less-then signs are encoded as 6 | unicode escaped sequences. E.g. this snippet is safe for any and all input: 7 | 8 | .. code:: html 9 | 10 | "show message" 11 | 12 | Unless the input contains infinite or NaN values, the result will be valid 13 | `JSON `_ data. 14 | 15 | 16 | Quick Encoder Summary 17 | --------------------- 18 | 19 | .. autosummary:: 20 | 21 | ~pyjson5.encode 22 | ~pyjson5.encode_bytes 23 | ~pyjson5.encode_callback 24 | ~pyjson5.encode_io 25 | ~pyjson5.encode_noop 26 | ~pyjson5.dump 27 | ~pyjson5.dumps 28 | ~pyjson5.Options 29 | ~pyjson5.Json5EncoderException 30 | ~pyjson5.Json5UnstringifiableType 31 | 32 | 33 | Full Encoder Description 34 | ------------------------ 35 | 36 | .. autofunction:: pyjson5.encode 37 | 38 | .. autofunction:: pyjson5.encode_bytes 39 | 40 | .. autofunction:: pyjson5.encode_callback 41 | 42 | .. autofunction:: pyjson5.encode_io 43 | 44 | .. autofunction:: pyjson5.encode_noop 45 | 46 | .. autoclass:: pyjson5.Options 47 | :members: 48 | :inherited-members: 49 | 50 | 51 | Encoder Compatibility Functions 52 | ------------------------------- 53 | 54 | .. autofunction:: pyjson5.dump 55 | 56 | .. autofunction:: pyjson5.dumps 57 | 58 | 59 | Encoder Exceptions 60 | ------------------ 61 | 62 | .. inheritance-diagram:: 63 | pyjson5.Json5Exception 64 | pyjson5.Json5EncoderException 65 | pyjson5.Json5UnstringifiableType 66 | 67 | .. autoexception:: pyjson5.Json5EncoderException 68 | :members: 69 | :inherited-members: 70 | 71 | .. autoexception:: pyjson5.Json5UnstringifiableType 72 | :members: 73 | :inherited-members: 74 | -------------------------------------------------------------------------------- /docs/exceptions.rst: -------------------------------------------------------------------------------- 1 | Exceptions 2 | ========== 3 | 4 | .. inheritance-diagram:: 5 | pyjson5.Json5Exception 6 | pyjson5.Json5EncoderException 7 | pyjson5.Json5UnstringifiableType 8 | pyjson5.Json5DecoderException 9 | pyjson5.Json5NestingTooDeep 10 | pyjson5.Json5EOF 11 | pyjson5.Json5IllegalCharacter 12 | pyjson5.Json5ExtraData 13 | pyjson5.Json5IllegalType 14 | 15 | .. autoexception:: pyjson5.Json5Exception 16 | :members: 17 | :inherited-members: 18 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | PyJSON5 2 | ======= 3 | 4 | A JSON5 serializer and parser library for Python 3.7 and later. 5 | 6 | 7 | The serializer returns ASCII data that can safely be used in an HTML template. 8 | Apostrophes, ampersands, greater-than, and less-then signs are encoded as 9 | unicode escaped sequences. E.g. this snippet is safe for any and all input: 10 | 11 | .. code:: html 12 | 13 | "show message" 14 | 15 | Unless the input contains infinite or NaN values, the result will be valid 16 | `JSON `_ data. 17 | 18 | 19 | All valid `JSON5 1.0.0 `_ and 20 | `JSON `_ data can be read, 21 | unless the nesting level is absurdly high. 22 | 23 | 24 | Installation 25 | ------------ 26 | 27 | .. code:: bash 28 | 29 | $ pip install pyjson5 30 | 31 | 32 | Table of Contents 33 | ----------------- 34 | 35 | .. toctree:: 36 | :maxdepth: 2 37 | 38 | encoder.rst 39 | decoder.rst 40 | exceptions.rst 41 | performance.rst 42 | changelog.md 43 | 44 | 45 | Quick Summary 46 | ------------- 47 | 48 | .. autosummary:: 49 | 50 | ~pyjson5.decode 51 | ~pyjson5.decode_buffer 52 | ~pyjson5.decode_callback 53 | ~pyjson5.decode_io 54 | ~pyjson5.load 55 | ~pyjson5.loads 56 | ~pyjson5.encode 57 | ~pyjson5.encode_bytes 58 | ~pyjson5.encode_callback 59 | ~pyjson5.encode_io 60 | ~pyjson5.encode_noop 61 | ~pyjson5.dump 62 | ~pyjson5.dumps 63 | ~pyjson5.Options 64 | ~pyjson5.Json5EncoderException 65 | ~pyjson5.Json5DecoderException 66 | 67 | 68 | Compatibility 69 | ------------- 70 | 71 | At least CPython / PyPy 3.7, and a C++11 compatible compiler (such as GCC 5.2+) is needed. 72 | 73 | 74 | ------------------------------------------------------------------------------- 75 | 76 | :ref:`Glossary / Index ` 77 | -------------------------------------------------------------------------------- /docs/performance.rst: -------------------------------------------------------------------------------- 1 | Performance 2 | =========== 3 | 4 | This library is written in Cython for a better performance than a pure-Python implementation could give you. 5 | 6 | 7 | Decoder Performance 8 | ------------------- 9 | 10 | The library has about the same speed as the shipped ``json`` module for *pure* JSON data. 11 | 12 | * Dataset: https://github.com/zemirco/sf-city-lots-json 13 | * Version: Python 3.9.1+ (default, Feb 5 2021, 13:46:56) 14 | * CPU: AMD Ryzen 7 2700 @ 3.7GHz 15 | * :func:`pyjson5.decode`: **2.08 s** ± 7.49 ms per loop *(lower is better)* 16 | * :func:`json.loads`: **2.71 s** ± 12.1 ms per loop 17 | * The decoder works correcty: ``json.loads(content) == pyjson5.loads(content)`` 18 | 19 | 20 | Encoder Performance 21 | ------------------- 22 | 23 | The encoder generates pure JSON data if there are no infinite or NaN values in the input, which are invalid in JSON. 24 | The serialized data is XML-safe, i.e. there are no cheverons ``<>``, ampersands ``&``, apostrophes ``'`` or control characters in the output. 25 | The output is always ASCII regardless if you call :func:`pyjson5.encode` or :func:`pyjson5.encode_bytes`. 26 | 27 | * Dataset: https://github.com/zemirco/sf-city-lots-json 28 | * Python 3.9.1+ (default, Feb 5 2021, 13:46:56) 29 | * CPU: AMD Ryzen 7 2700 @ 3.7GHz 30 | * :func:`pyjson5.encode`: **1.37** s ± 19.2 per loop *(lower is better)* 31 | * :func:`json.dumps`: **3.66** s ± 72.6 ms per loop 32 | * :func:`json.dumps` + :func:`xml.sax.saxutils.escape`: **4.01** s ± 21.3 ms per loop 33 | * The encoder works correcty: ``obj == json.loads(pyjson5.encode(obj))`` 34 | 35 | 36 | Benchmark 37 | --------- 38 | 39 | Using `Ultrajson's benchmark `_ 40 | you can tell for which kind of data PyJSON5 is fast, and for which data it is slow in comparison *(higher is better)*: 41 | 42 | +-----------------------------------------------------------+-------------+------------+------------+------------+ 43 | | | json | pyjson5 | ujson | orjson | 44 | +===========================================================+=============+============+============+============+ 45 | | **Array with 256 doubles** | | | | | 46 | +-----------------------------------------------------------+-------------+------------+------------+------------+ 47 | | encode | 6,425 | 81,202 | 28,966 | 83,836 | 48 | +-----------------------------------------------------------+-------------+------------+------------+------------+ 49 | | decode | 16,759 | 34,801 | 34,794 | 80,655 | 50 | +-----------------------------------------------------------+-------------+------------+------------+------------+ 51 | | **Array with 256 strings** | | | | | 52 | +-----------------------------------------------------------+-------------+------------+------------+------------+ 53 | | encode | 36,969 | 73,165 | 35,574 | 113,082 | 54 | +-----------------------------------------------------------+-------------+------------+------------+------------+ 55 | | decode | 42,730 | 38,542 | 38,386 | 60,732 | 56 | +-----------------------------------------------------------+-------------+------------+------------+------------+ 57 | | **Array with 256 UTF-8 strings** | | | | | 58 | +-----------------------------------------------------------+-------------+------------+------------+------------+ 59 | | encode | 3,458 | 3,134 | 4,024 | 31,677 | 60 | +-----------------------------------------------------------+-------------+------------+------------+------------+ 61 | | decode | 2,428 | 2,498 | 2,491 | 1,750 | 62 | +-----------------------------------------------------------+-------------+------------+------------+------------+ 63 | | **Array with 256 True values** | | | | | 64 | +-----------------------------------------------------------+-------------+------------+------------+------------+ 65 | | encode | 130,441 | 282,703 | 131,279 | 423,371 | 66 | +-----------------------------------------------------------+-------------+------------+------------+------------+ 67 | | decode | 220,657 | 262,690 | 264,485 | 262,283 | 68 | +-----------------------------------------------------------+-------------+------------+------------+------------+ 69 | | **Array with 256 dict{string, int} pairs** | | | | | 70 | +-----------------------------------------------------------+-------------+------------+------------+------------+ 71 | | encode | 11,621 | 10,014 | 18,148 | 73,905 | 72 | +-----------------------------------------------------------+-------------+------------+------------+------------+ 73 | | decode | 17,802 | 19,406 | 19,391 | 23,478 | 74 | +-----------------------------------------------------------+-------------+------------+------------+------------+ 75 | | **Dict with 256 arrays with 256 dict{string, int} pairs** | | | | | 76 | +-----------------------------------------------------------+-------------+------------+------------+------------+ 77 | | encode | 40 | 38 | 68 | 213 | 78 | +-----------------------------------------------------------+-------------+------------+------------+------------+ 79 | | decode | 43 | 49 | 48 | 51 | 80 | +-----------------------------------------------------------+-------------+------------+------------+------------+ 81 | | **Medium complex object** | | | | | 82 | +-----------------------------------------------------------+-------------+------------+------------+------------+ 83 | | encode | 8,704 | 11,922 | 15,319 | 49,677 | 84 | +-----------------------------------------------------------+-------------+------------+------------+------------+ 85 | | decode | 12,567 | 14,042 | 13,985 | 19,481 | 86 | +-----------------------------------------------------------+-------------+------------+------------+------------+ 87 | | **Complex object** | | | | | 88 | +-----------------------------------------------------------+-------------+------------+------------+------------+ 89 | | encode | 672 | 909 | 731 | | 90 | +-----------------------------------------------------------+-------------+------------+------------+------------+ 91 | | decode | 462 | 700 | 700 | | 92 | +-----------------------------------------------------------+-------------+------------+------------+------------+ 93 | 94 | * `ujson `_ == 4.0.3.dev9 95 | * `orjson `_ == 3.5.1 96 | -------------------------------------------------------------------------------- /pyjson5.pyx: -------------------------------------------------------------------------------- 1 | # distutils: language = c++ 2 | # cython: embedsignature = True, language_level = 3, warn.undeclared = True, warn.unreachable = True, warn.maybe_uninitialized = True 3 | 4 | # SPDX-License-Identifier: MIT OR Apache-2.0 5 | # SPDX-FileCopyrightText: 2018-2025 René Kijewski 6 | 7 | include 'src/_imports.pyx' 8 | include 'src/_constants.pyx' 9 | 10 | include 'src/_exceptions.pyx' 11 | include 'src/_exceptions_decoder.pyx' 12 | include 'src/_exceptions_encoder.pyx' 13 | include 'src/_raise_decoder.pyx' 14 | include 'src/_raise_encoder.pyx' 15 | 16 | include 'src/_unicode.pyx' 17 | 18 | include 'src/_reader_ucs.pyx' 19 | include 'src/_reader_callback.pyx' 20 | include 'src/_readers.pyx' 21 | include 'src/_decoder.pyx' 22 | 23 | include 'src/_writers.pyx' 24 | include 'src/_writer_reallocatable.pyx' 25 | include 'src/_writer_callback.pyx' 26 | include 'src/_writer_noop.pyx' 27 | include 'src/_encoder_options.pyx' 28 | include 'src/_encoder.pyx' 29 | 30 | include 'src/_exports.pyx' 31 | include 'src/_legacy.pyx' 32 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: MIT OR Apache-2.0 2 | # SPDX-FileCopyrightText: 2018-2025 René Kijewski 3 | 4 | [build-system] 5 | requires = [ 6 | "Cython < 4, >= 0.29", 7 | "setuptools >= 61", 8 | ] 9 | build-backend = "setuptools.build_meta" 10 | -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- 1 | build 2 | colorama == 0.4.* 3 | cython < 4, >= 0.29 4 | more_itertools == 10.* 5 | mypy 6 | setuptools >= 61 7 | wheel 8 | 9 | # keep synchronous to requirements-readthedocs.txt 10 | docutils == 0.21.* 11 | furo 12 | myst-parser == 4.* 13 | sphinx == 8.* 14 | sphinx-autodoc-typehints == 3.* 15 | -------------------------------------------------------------------------------- /requirements-readthedocs.txt: -------------------------------------------------------------------------------- 1 | # keep synchronous to pyproject.toml 2 | # keep synchronous to src/VERSION.inc 3 | pyjson5 == 1.6.9 4 | 5 | # keep synchronous to requirements-dev.txt 6 | docutils == 0.21.* 7 | furo 8 | myst-parser == 4.* 9 | sphinx == 8.* 10 | sphinx-autodoc-typehints == 3.* 11 | -------------------------------------------------------------------------------- /scripts/make_decoder_recursive_select.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from argparse import ArgumentParser 4 | from logging import basicConfig, DEBUG 5 | from pathlib import Path 6 | 7 | from more_itertools import chunked 8 | 9 | 10 | def generate(out): 11 | lst = ["DRS_fail"] * 128 12 | lst[ord("n")] = "DRS_null" 13 | lst[ord("t")] = "DRS_true" 14 | lst[ord("f")] = "DRS_false" 15 | lst[ord("I")] = "DRS_inf" 16 | lst[ord("N")] = "DRS_nan" 17 | lst[ord('"')] = "DRS_string" 18 | lst[ord("'")] = "DRS_string" 19 | lst[ord("{")] = "DRS_recursive" 20 | lst[ord("[")] = "DRS_recursive" 21 | for c in "+-.0123456789": 22 | lst[ord(c)] = "DRS_number" 23 | 24 | print("#ifndef JSON5EncoderCpp_decoder_recursive_select", file=out) 25 | print("#define JSON5EncoderCpp_decoder_recursive_select", file=out) 26 | print(file=out) 27 | print("// GENERATED FILE", file=out) 28 | print("// All changes will be lost.", file=out) 29 | print(file=out) 30 | print("#include ", file=out) 31 | print(file=out) 32 | print("namespace JSON5EncoderCpp {", file=out) 33 | print("inline namespace {", file=out) 34 | print(file=out) 35 | print("enum DrsKind : std::uint8_t {", file=out) 36 | print( 37 | " DRS_fail, DRS_null, DRS_true, DRS_false, DRS_inf, DRS_nan, DRS_string, DRS_number, DRS_recursive", 38 | file=out, 39 | ) 40 | print("};", file=out) 41 | print(file=out) 42 | print("static const DrsKind drs_lookup[128] = {", file=out) 43 | for chunk in chunked(lst, 8): 44 | print(" ", end="", file=out) 45 | for t in chunk: 46 | print(" ", t, ",", sep="", end="", file=out) 47 | print(file=out) 48 | print("};", file=out) 49 | print(file=out) 50 | print("} // anonymous inline namespace", sep="", file=out) 51 | print("} // namespace JSON5EncoderCpp", sep="", file=out) 52 | print(file=out) 53 | print("#endif", sep="", file=out) 54 | 55 | 56 | argparser = ArgumentParser(description="Generate src/_decoder_recursive_select.hpp") 57 | argparser.add_argument( 58 | "input", nargs="?", type=Path, default=Path("src/_decoder_recursive_select.hpp") 59 | ) 60 | 61 | if __name__ == "__main__": 62 | basicConfig(level=DEBUG) 63 | args = argparser.parse_args() 64 | with open(str(args.input.resolve()), "wt") as out: 65 | generate(out) 66 | -------------------------------------------------------------------------------- /scripts/make_escape_dct.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from argparse import ArgumentParser 4 | from logging import basicConfig, DEBUG 5 | from pathlib import Path 6 | 7 | 8 | def generate(f): 9 | unescaped = 0 10 | print("const EscapeDct::Items EscapeDct::items = {", file=f) 11 | for c in range(0x100): 12 | if c == ord("\\"): 13 | s = "\\\\" 14 | elif c == ord("\b"): 15 | s = "\\b" 16 | elif c == ord("\f"): 17 | s = "\\f" 18 | elif c == ord("\n"): 19 | s = "\\n" 20 | elif c == ord("\r"): 21 | s = "\\r" 22 | elif c == ord("\t"): 23 | s = "\\t" 24 | elif c == ord('"'): 25 | s = '\\"' 26 | elif (c < 0x20) or (c >= 0x7F) or (chr(c) in "'&<>\\"): 27 | s = f"\\u{c:04x}" 28 | else: 29 | s = f"{c:c}" 30 | if c < 128: 31 | unescaped |= 1 << c 32 | 33 | t = ( 34 | [str(len(s))] 35 | + [f"'{c}'" if c != "\\" else f"'\\\\'" for c in s] 36 | + ["0"] * 6 37 | ) 38 | l = ", ".join(t[:8]) 39 | print(f" {{ {l:35s} }}, /* 0x{c:02x} {chr(c)!r} */", file=f) 40 | print("};", file=f) 41 | 42 | escaped = unescaped ^ ((1 << 128) - 1) 43 | print( 44 | f"const std::uint64_t EscapeDct::is_escaped_lo = UINT64_C(0x{(escaped & ((1 << 64) - 1)):016x});", 45 | file=f, 46 | ) 47 | print( 48 | f"const std::uint64_t EscapeDct::is_escaped_hi = UINT64_C(0x{(escaped >> 64):016x});", 49 | file=f, 50 | ) 51 | 52 | 53 | argparser = ArgumentParser(description="Generate src/_escape_dct.hpp") 54 | argparser.add_argument( 55 | "input", nargs="?", type=Path, default=Path("src/_escape_dct.hpp") 56 | ) 57 | 58 | if __name__ == "__main__": 59 | basicConfig(level=DEBUG) 60 | args = argparser.parse_args() 61 | with open(str(args.input.resolve()), "wt") as out: 62 | generate(out) 63 | -------------------------------------------------------------------------------- /scripts/make_unicode_categories.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from argparse import ArgumentParser 4 | from collections import defaultdict, OrderedDict 5 | from functools import reduce 6 | from pathlib import Path 7 | from re import match 8 | 9 | from more_itertools import chunked 10 | 11 | 12 | def main(input_file, output_file): 13 | Nothing = 0 14 | WhiteSpace = 1 15 | IdentifierStart = 2 16 | IdentifierPart = 3 17 | 18 | cat_indices = { 19 | "zs": WhiteSpace, 20 | "lc": IdentifierStart, 21 | "ll": IdentifierStart, 22 | "lm": IdentifierStart, 23 | "lo": IdentifierStart, 24 | "lt": IdentifierStart, 25 | "lu": IdentifierStart, 26 | "nl": IdentifierStart, 27 | "mc": IdentifierPart, 28 | "mn": IdentifierPart, 29 | "pc": IdentifierPart, 30 | "nd": IdentifierPart, 31 | } 32 | 33 | planes = defaultdict(lambda: [0] * 0x100) 34 | 35 | for input_line in input_file: 36 | m = match(r"^([0-9A-F]+)(?:\.\.([0-9A-F]+))?\s+;\s+([A-Z][a-z])", input_line) 37 | if not m: 38 | continue 39 | start, end, cat = m.groups() 40 | 41 | idx = cat_indices.get(cat.lower()) 42 | if idx: 43 | end = int(end or start, 16) 44 | start = int(start, 16) 45 | for i in range(start, end + 1): 46 | planes[i // 0x100][i % 0x100] = idx 47 | 48 | # per: https://spec.json5.org/#white-space 49 | for i in (0x9, 0xA, 0xB, 0xC, 0xD, 0x20, 0xA0, 0x2028, 0x2028, 0x2029, 0xFEFF): 50 | planes[i // 0x100][i % 0x100] = WhiteSpace 51 | 52 | # per: https://www.ecma-international.org/ecma-262/5.1/#sec-7.6 53 | for i in (ord("$"), ord("_"), ord("\\")): 54 | planes[i // 0x100][i % 0x100] = IdentifierStart 55 | 56 | # per: https://www.ecma-international.org/ecma-262/5.1/#sec-7.6 57 | for i in (0x200C, 0x200D): 58 | planes[i // 0x100][i % 0x100] = IdentifierPart 59 | 60 | # 0x110000 == NO_EXTRA_DATA is spuriously used as input at the end of an item. 61 | # FIXME: this should not be needed. %s/18/17/g once the problem it fixed in the decoder. 62 | planes[0x0011_0000 // 0x100][0x0011_0000 % 0x100] = WhiteSpace 63 | 64 | print("#ifndef JSON5EncoderCpp_unicode_cat_of", file=output_file) 65 | print("#define JSON5EncoderCpp_unicode_cat_of", file=output_file) 66 | print(file=output_file) 67 | print("// GENERATED FILE", file=output_file) 68 | print("// All changes will be lost.", file=output_file) 69 | print(file=output_file) 70 | print("#include ", file=output_file) 71 | print(file=output_file) 72 | print("namespace JSON5EncoderCpp {", file=output_file) 73 | print("inline namespace {", file=output_file) 74 | print(file=output_file) 75 | print("static unsigned unicode_cat_of(std::uint32_t codepoint) {", file=output_file) 76 | 77 | demiplane_to_idx = OrderedDict() # demiplane_idx → data_idx 78 | data_to_idx = [None] * 18 * 0x100 # demiplane data → data_idx 79 | print(" // A 'demiplane' is a 1/256th of a Unicode plane.", file=output_file) 80 | print( 81 | " // This way a 'demiplane' fits nicely into a cache line.", file=output_file 82 | ) 83 | print( 84 | " alignas(64) static const std::uint8_t demiplane_data[][0x100 / 4] = {", 85 | file=output_file, 86 | ) 87 | for i in range(18 * 0x100): 88 | plane_data = "" 89 | plane = planes[i] 90 | while plane and plane[-1] == 0: 91 | plane.pop() 92 | 93 | for chunk in chunked(plane, 4 * 16): 94 | plane_data += " " 95 | for value in chunked(chunk, 4): 96 | value = reduce(lambda a, i: ((a << 2) | i), reversed(value), 0) 97 | plane_data += "0x{:02x}u, ".format(value) 98 | plane_data = plane_data[:-1] + "\n" 99 | 100 | produced_idx = demiplane_to_idx.get(plane_data) 101 | if produced_idx is None: 102 | produced_idx = len(demiplane_to_idx) 103 | print( 104 | " {{ // {} -> 0x{:02x}u".format(i, produced_idx), 105 | file=output_file, 106 | ) 107 | print(plane_data, file=output_file, end="") 108 | print(" },", file=output_file) 109 | demiplane_to_idx[plane_data] = produced_idx 110 | 111 | data_to_idx[i] = produced_idx 112 | print(" };", file=output_file) 113 | print(file=output_file) 114 | 115 | snd_lookup_lines = OrderedDict() 116 | snd_lookup_indices = OrderedDict() 117 | print( 118 | " alignas(64) static const std::uint8_t demiplane_snd_data[][64] = {", 119 | file=output_file, 120 | ) 121 | for start in range(0, 18 * 0x100, 64): 122 | snd_lookup_line: str 123 | for i in range(start, min(start + 64, 18 * 0x100)): 124 | if i % 16 == 0: 125 | if i % 64 == 0: 126 | snd_lookup_line = " " 127 | else: 128 | snd_lookup_line += "\n " 129 | snd_lookup_line += " 0x{:02x}u,".format(data_to_idx[i]) 130 | 131 | snd_lookup_idx = snd_lookup_lines.get(snd_lookup_line, None) 132 | if snd_lookup_idx is None: 133 | snd_lookup_idx = len(snd_lookup_lines) 134 | print( 135 | " {{ // {} -> 0x{:02x}u".format(start // 64, snd_lookup_idx), 136 | file=output_file, 137 | ) 138 | print(snd_lookup_line, file=output_file) 139 | print(" },", file=output_file) 140 | snd_lookup_lines[snd_lookup_line] = snd_lookup_idx 141 | snd_lookup_indices[start // 64] = snd_lookup_idx 142 | print(" };", file=output_file) 143 | print(file=output_file) 144 | 145 | print( 146 | " alignas(64) static const std::uint8_t demiplane_snd[18 * 0x100 / 64] = {{".format( 147 | 68 148 | ), 149 | end="", 150 | file=output_file, 151 | ) 152 | for i in range(18 * 0x100 // 64): 153 | if i % 16 == 0: 154 | print("\n ", end="", file=output_file) 155 | print(" 0x{:02x}u,".format(snd_lookup_indices[i]), end="", file=output_file) 156 | print(file=output_file) 157 | print(" };", file=output_file) 158 | print(file=output_file) 159 | 160 | print(" if (JSON5EncoderCpp_expect(codepoint < 256, true)) {", file=output_file) 161 | print( 162 | " return (demiplane_data[0][codepoint / 4] >> (2 * (codepoint % 4))) % 4;", 163 | file=output_file, 164 | ) 165 | print(" }", file=output_file) 166 | print(file=output_file) 167 | print(" if (codepoint > 0x110000) codepoint = 0x110000;", file=output_file) 168 | print(" std::uint32_t fst_row = codepoint / 0x100;", file=output_file) 169 | print(" std::uint32_t fst_col = codepoint % 0x100;", file=output_file) 170 | print(" std::uint32_t snd_row = fst_row / 64;", file=output_file) 171 | print(" std::uint32_t snd_col = fst_row % 64;", file=output_file) 172 | print(file=output_file) 173 | print( 174 | " const std::uint8_t *cell = demiplane_data[demiplane_snd_data[demiplane_snd[snd_row]][snd_col]];", 175 | file=output_file, 176 | ) 177 | print( 178 | " return (cell[fst_col / 4] >> (2 * (fst_col % 4))) % 4;", file=output_file 179 | ) 180 | print("}", file=output_file) 181 | print(file=output_file) 182 | print("}", file=output_file) 183 | print("}", file=output_file) 184 | print(file=output_file) 185 | print("#endif", file=output_file) 186 | 187 | 188 | argparser = ArgumentParser(description="Generate Unicode Category Matcher(s)") 189 | argparser.add_argument("input", nargs="?", type=Path, default=Path("/dev/stdin")) 190 | argparser.add_argument("output", nargs="?", type=Path, default=Path("/dev/stdout")) 191 | 192 | if __name__ == "__main__": 193 | args = argparser.parse_args() 194 | with open(str(args.input.resolve()), "rt") as input_file, open( 195 | str(args.output.resolve()), "wt" 196 | ) as output_file: 197 | raise SystemExit(main(input_file, output_file)) 198 | -------------------------------------------------------------------------------- /scripts/run-minefield-test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from argparse import ArgumentParser 4 | from logging import basicConfig, INFO, getLogger 5 | from os import chdir, name 6 | from pathlib import Path 7 | from subprocess import Popen 8 | from sys import executable 9 | 10 | 11 | argparser = ArgumentParser(description="Run JSON5 parser tests") 12 | argparser.add_argument( 13 | "tests", 14 | nargs="?", 15 | type=Path, 16 | default=Path("third-party/JSONTestSuite/test_parsing"), 17 | ) 18 | 19 | suffix_implies_success = { 20 | "json": True, 21 | "json5": True, 22 | "txt": False, 23 | } 24 | 25 | if __name__ == "__main__": 26 | basicConfig(level=INFO) 27 | logger = getLogger(__name__) 28 | chdir(Path(__file__).absolute().parent.parent) 29 | 30 | good = bad = errors = severe = 0 31 | 32 | try: 33 | from colorama import init, Fore 34 | 35 | init() 36 | except Exception: 37 | code_severe = "SEVERE" 38 | code_good = "GOOD" 39 | code_bad = "BAD" 40 | code_ignored = "IGNORED" 41 | reset = "" 42 | else: 43 | if name != "nt": 44 | code_severe = Fore.RED + "😱" 45 | code_good = Fore.CYAN + "😄" 46 | code_bad = Fore.YELLOW + "😠" 47 | code_ignored = Fore.BLUE + "🙅" 48 | else: 49 | code_severe = Fore.RED + "SEVERE" 50 | code_good = Fore.CYAN + "GOOD" 51 | code_bad = Fore.YELLOW + "BAD" 52 | code_ignored = Fore.BLUE + "IGNORED" 53 | reset = Fore.RESET 54 | 55 | script = str(Path(__file__).absolute().parent / "transcode-to-json.py") 56 | 57 | args = argparser.parse_args() 58 | index = 0 59 | for path in sorted(args.tests.glob("?_?*.json")): 60 | category, name = path.stem.split("_", 1) 61 | if category not in "yni": 62 | continue 63 | 64 | if category in "ni": 65 | # ignore anything but tests that are expected to pass for now 66 | continue 67 | 68 | try: 69 | # ignore any UTF-8 errors 70 | with open(str(path.resolve()), "rt") as f: 71 | f.read() 72 | except Exception: 73 | continue 74 | 75 | index += 1 76 | try: 77 | p = Popen((executable, script, str(path))) 78 | outcome = p.wait(5) 79 | except Exception: 80 | logger.error("Error while testing: %s", path, exc_info=True) 81 | errors += 1 82 | continue 83 | 84 | if outcome not in (0, 1): 85 | code = code_severe 86 | severe += 1 87 | elif category == "y": 88 | if outcome == 0: 89 | code = code_good 90 | good += 1 91 | else: 92 | code = code_bad 93 | bad += 1 94 | else: 95 | code = code_ignored 96 | 97 | print( 98 | "#", 99 | index, 100 | " ", 101 | code, 102 | " | " "Category <", 103 | category, 104 | "> | " "Test <", 105 | name, 106 | "> | " "Actual <", 107 | "pass" if outcome == 0 else "FAIL", 108 | ">", 109 | reset, 110 | sep="", 111 | ) 112 | 113 | is_severe = severe > 0 114 | is_good = bad == 0 115 | code = code_severe if is_severe else code_good if is_good else code_bad 116 | print() 117 | print( 118 | code, 119 | " | ", 120 | good, 121 | " correct outcomes | ", 122 | bad, 123 | " wrong outcomes | ", 124 | severe, 125 | " severe errors", 126 | reset, 127 | sep="", 128 | ) 129 | raise SystemExit(2 if is_severe else 0 if is_good else 1) 130 | -------------------------------------------------------------------------------- /scripts/run-tests.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from argparse import ArgumentParser 4 | from logging import basicConfig, INFO, getLogger 5 | from os import chdir, name 6 | from pathlib import Path 7 | from subprocess import Popen 8 | from sys import executable 9 | 10 | 11 | argparser = ArgumentParser(description="Run JSON5 parser tests") 12 | argparser.add_argument( 13 | "tests", nargs="?", type=Path, default=Path("third-party/json5-tests") 14 | ) 15 | 16 | suffix_implies_success = { 17 | ".json": True, 18 | ".json5": True, 19 | ".txt": False, 20 | } 21 | 22 | if __name__ == "__main__": 23 | basicConfig(level=INFO) 24 | logger = getLogger(__name__) 25 | chdir(Path(__file__).absolute().parent.parent) 26 | 27 | try: 28 | from colorama import init, Fore 29 | 30 | init() 31 | except Exception: 32 | code_severe = "SEVERE" 33 | code_good = "GOOD" 34 | code_bad = "BAD" 35 | reset = "" 36 | else: 37 | if name != "nt": 38 | code_severe = Fore.RED + "😱" 39 | code_good = Fore.CYAN + "😄" 40 | code_bad = Fore.YELLOW + "😠" 41 | else: 42 | code_severe = Fore.RED + "SEVERE" 43 | code_good = Fore.CYAN + "GOOD" 44 | code_bad = Fore.YELLOW + "BAD" 45 | reset = Fore.RESET 46 | 47 | good = 0 48 | bad = 0 49 | severe = 0 50 | 51 | script = str(Path(__file__).absolute().parent / "transcode-to-json.py") 52 | 53 | args = argparser.parse_args() 54 | index = 0 55 | for path in sorted(args.tests.glob("*/*.*")): 56 | kind = path.suffix.split(".")[-1] 57 | expect_success = suffix_implies_success.get(path.suffix) 58 | if expect_success is None: 59 | continue 60 | 61 | index += 1 62 | category = path.parent.name 63 | name = path.stem 64 | try: 65 | p = Popen((executable, script, str(path))) 66 | outcome = p.wait(5) 67 | except Exception: 68 | logger.error("Error while testing: %s", path, exc_info=True) 69 | severe += 1 70 | continue 71 | 72 | is_success = outcome == 0 73 | is_failure = outcome == 1 74 | is_severe = outcome not in (0, 1) 75 | is_good = is_success if expect_success else is_failure 76 | code = code_severe if is_severe else code_good if is_good else code_bad 77 | print( 78 | "#", 79 | index, 80 | " ", 81 | code, 82 | " " "Category <", 83 | category, 84 | "> | " "Test <", 85 | name, 86 | "> | " "Data <", 87 | kind, 88 | "> | " "Expected <", 89 | "pass" if expect_success else "FAIL", 90 | "> | " "Actual <", 91 | "pass" if is_success else "FAIL", 92 | ">", 93 | reset, 94 | sep="", 95 | ) 96 | if is_severe: 97 | severe += 1 98 | elif is_good: 99 | good += 1 100 | else: 101 | bad += 1 102 | 103 | is_severe = severe > 0 104 | is_good = bad == 0 105 | code = code_severe if is_severe else code_good if is_good else code_bad 106 | print() 107 | print( 108 | code, 109 | " ", 110 | good, 111 | " × correct outcome | ", 112 | bad, 113 | " × wrong outcome | ", 114 | severe, 115 | " × severe errors", 116 | reset, 117 | sep="", 118 | ) 119 | raise SystemExit(2 if is_severe else 0 if is_good else 1) 120 | -------------------------------------------------------------------------------- /scripts/sha512sum.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from argparse import ArgumentParser 4 | from hashlib import sha512 5 | from logging import basicConfig, DEBUG 6 | from pathlib import Path 7 | from sys import argv, exit 8 | 9 | 10 | argparser = ArgumentParser( 11 | description="sha512sum replacement if coreutils isn't installed" 12 | ) 13 | argparser.add_argument("-c", "--check", type=Path, required=True) 14 | 15 | if __name__ == "__main__": 16 | basicConfig(level=DEBUG) 17 | args = argparser.parse_args() 18 | errors = 0 19 | with open(str(args.check.resolve()), "rt") as f: 20 | for line in f: 21 | expected_hash, filename = line.rstrip("\r\n").split(" ", 1) 22 | with open(str(Path(filename).resolve()), "rb") as f: 23 | actual_hash = sha512(f.read()).hexdigest() 24 | 25 | if expected_hash == actual_hash: 26 | print(filename + ": OK") 27 | else: 28 | errors += 1 29 | print(filename + ": FAILED") 30 | 31 | if errors: 32 | print("%s: WARNING: %s computed checksum did NOT match" % (argv[0], errors)) 33 | exit(1) 34 | else: 35 | exit(0) 36 | -------------------------------------------------------------------------------- /scripts/transcode-to-json.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from argparse import ArgumentParser 4 | from collections.abc import Mapping, Sequence 5 | from codecs import open as codecs_open 6 | from itertools import zip_longest 7 | from json import loads 8 | from logging import basicConfig, DEBUG, getLogger 9 | from math import isnan 10 | from pathlib import Path 11 | 12 | from pyjson5 import decode, encode 13 | 14 | 15 | def eq_with_nans(left, right): 16 | if left == right: 17 | return True 18 | elif isnan(left): 19 | return isnan(right) 20 | elif isnan(right): 21 | return False 22 | 23 | if not isinstance(left, Sequence) or not isinstance(right, Sequence): 24 | return False 25 | elif len(left) != len(right): 26 | return False 27 | 28 | left_mapping = isinstance(left, Mapping) 29 | right_mapping = isinstance(right, Mapping) 30 | if left_mapping != right_mapping: 31 | return False 32 | 33 | sentinel = object() 34 | if left_mapping: 35 | for k, left_value in left.items(): 36 | right_value = right.pop(k, sentinel) 37 | if not eq_with_nans(left_value, right_value): 38 | return False 39 | if right: 40 | # extraneous keys 41 | return False 42 | else: 43 | for l, r in zip_longest(left, right, fillvalue=sentinel): 44 | if not eq_with_nans(l, r): 45 | return False 46 | 47 | return True 48 | 49 | 50 | argparser = ArgumentParser(description="Run JSON5 parser tests") 51 | argparser.add_argument("input", type=Path) 52 | argparser.add_argument("output", nargs="?", type=Path) 53 | 54 | if __name__ == "__main__": 55 | basicConfig(level=DEBUG) 56 | logger = getLogger(__name__) 57 | 58 | args = argparser.parse_args() 59 | try: 60 | with codecs_open(args.input.resolve(), "r", "UTF-8") as f: 61 | data = f.read() 62 | except Exception: 63 | logger.error("Could not even read file: %s", args.input, exc_info=True) 64 | raise SystemExit(-1) 65 | 66 | try: 67 | obj = decode(data) 68 | except Exception: 69 | logger.error("Could not parse content: %s", args.input) 70 | raise SystemExit(1) 71 | 72 | try: 73 | json_obj = loads(data) 74 | except Exception: 75 | pass 76 | else: 77 | if not eq_with_nans(obj, json_obj): 78 | logger.error( 79 | "JSON and PyJSON5 did not read the same data: %s, %r != %r", 80 | args.input, 81 | obj, 82 | json_obj, 83 | ) 84 | raise SystemExit(2) 85 | 86 | try: 87 | data = encode(obj) 88 | except Exception: 89 | logger.error("Could open stringify content: %s", args.input, exc_info=True) 90 | raise SystemExit(2) 91 | 92 | if args.output is not None: 93 | try: 94 | with codecs_open(args.output.resolve(), "w", "UTF-8") as f: 95 | f.write(data) 96 | except Exception: 97 | logger.error("Could open output file: %s", args.output, exc_info=True) 98 | raise SystemExit(-1) 99 | 100 | raise SystemExit(0) 101 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | # keep synchronous to requirements-readthedocs.txt 3 | # keep synchronous to src/VERSION.inc 4 | version = 1.6.9 5 | 6 | name = pyjson5 7 | description = JSON5 serializer and parser for Python 3 written in Cython. 8 | url = https://github.com/Kijewski/pyjson5 9 | project_urls = 10 | Changelog = https://github.com/Kijewski/pyjson5/blob/main/CHANGELOG.md 11 | Code = https://github.com/Kijewski/pyjson5 12 | Documentation = https://pyjson5.readthedocs.io/ 13 | Download = https://pypi.org/project/pyjson5/ 14 | Homepage = https://github.com/Kijewski/pyjson5 15 | Tracker = https://github.com/Kijewski/pyjson5/issues 16 | 17 | author = René Kijewski 18 | maintainer = René Kijewski 19 | author_email = pypi.org@k6i.de 20 | maintainer_email = pypi.org@k6i.de 21 | 22 | long_description = file: README.rst 23 | long_description_content_type = text/x-rst 24 | 25 | license = MIT OR Apache-2.0 26 | license_files = LICENSE.MIT, LICENSE.Apache 27 | 28 | classifiers = 29 | Development Status :: 5 - Production/Stable 30 | Intended Audience :: Developers 31 | Intended Audience :: System Administrators 32 | License :: OSI Approved :: Apache Software License 33 | License :: OSI Approved :: MIT License 34 | Operating System :: OS Independent 35 | Programming Language :: Cython 36 | Programming Language :: JavaScript 37 | Programming Language :: Python :: 3 38 | Programming Language :: Python :: 3.7 39 | Programming Language :: Python :: 3.8 40 | Programming Language :: Python :: 3.9 41 | Programming Language :: Python :: 3.10 42 | Programming Language :: Python :: 3.11 43 | Programming Language :: Python :: 3.12 44 | Programming Language :: Python :: 3.13 45 | Programming Language :: Python :: 3 :: Only 46 | Programming Language :: Python :: Implementation :: CPython 47 | Topic :: Text Processing :: General 48 | 49 | [options] 50 | zip_safe = False 51 | 52 | python_requires = ~= 3.7 53 | setup_requires = 54 | Cython 55 | setuptools 56 | 57 | include_package_data = True 58 | packages = pyjson5 59 | package_dir = 60 | = src 61 | 62 | [options.package_data] 63 | pyjson5 = __init__.pyi, py.typed 64 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | from setuptools import setup, Extension 4 | 5 | extra_compile_args = [ 6 | "-std=c++11", 7 | "-O3", 8 | "-fPIC", 9 | "-g0", 10 | "-pipe", 11 | "-fomit-frame-pointer", 12 | ] 13 | 14 | setup( 15 | ext_modules=[ 16 | Extension( 17 | "pyjson5.pyjson5", 18 | sources=["pyjson5.pyx"], 19 | include_dirs=["src"], 20 | extra_compile_args=extra_compile_args, 21 | extra_link_args=extra_compile_args, 22 | language="c++", 23 | ) 24 | ], 25 | ) 26 | -------------------------------------------------------------------------------- /src/DESCRIPTION.inc: -------------------------------------------------------------------------------- 1 | """\ 2 | PyJSON5\n\ 3 | =======\n\ 4 | \n\ 5 | A `JSON5 `_ serializer and parser library for Python 3 written in Cython.\n\ 6 | \n\ 7 | The serializer returns ASCII data that can safely be used in an HTML template.\n\ 8 | Apostrophes, ampersands, greater-than, and less-then signs are encoded as\n\ 9 | unicode escaped sequences. E.g. this snippet is safe for any and all input:\n\ 10 | \n\ 11 | .. code:: python\n\ 12 | \n\ 13 | \"show message\"\n\ 14 | \n\ 15 | Unless the input contains infinite or NaN values, the result will be valid\n\ 16 | JSON data.\n\ 17 | \n\ 18 | All valid JSON5 1.0.0 and `JSON `_ data can be read,\n\ 19 | unless the nesting level is absurdly high.\n\ 20 | """ 21 | -------------------------------------------------------------------------------- /src/VERSION.inc: -------------------------------------------------------------------------------- 1 | "1.6.9" 2 | -------------------------------------------------------------------------------- /src/_constants.pyx: -------------------------------------------------------------------------------- 1 | cdef object CONST_POS_NAN = float('+NaN') 2 | cdef object CONST_POS_INF = float('+Infinity') 3 | cdef object CONST_NEG_NAN = float('-NaN') 4 | cdef object CONST_NEG_INF = float('-Infinity') 5 | 6 | cdef object DATETIME_CLASSES = (date, time,) # issubclass(datetime, date) == True 7 | cdef object ORD_CLASSES = (unicode, bytes, bytearray,) 8 | 9 | cdef object UCS1_COMPATIBLE_CODECS = frozenset(( 10 | # ASCII 11 | 'ascii', 646, '646', 'us-ascii', 12 | # Latin-1 13 | 'latin_1', 'latin-1', 'iso-8859-1', 'iso8859-1', 14 | 8859, '8859', 'cp819', 'latin', 'latin1', 'l1', 15 | )) 16 | 17 | cdef object TEST_DECIMAL = Decimal('47.11') 18 | cdef object TEST_FLOAT = 47.11 19 | cdef object TEST_INT = 4711 20 | -------------------------------------------------------------------------------- /src/_decoder.pyx: -------------------------------------------------------------------------------- 1 | cdef enum: 2 | NO_EXTRA_DATA = 0x0011_0000 3 | 4 | 5 | cdef boolean _skip_single_line(ReaderRef reader) except False: 6 | cdef uint32_t c0 7 | while _reader_good(reader): 8 | c0 = _reader_get(reader) 9 | if _is_line_terminator(c0): 10 | break 11 | 12 | return True 13 | 14 | 15 | cdef boolean _skip_multiline_comment(ReaderRef reader) except False: 16 | cdef uint32_t c0 17 | cdef boolean seen_asterisk = False 18 | cdef Py_ssize_t comment_start = _reader_tell(reader) 19 | 20 | while True: 21 | if expect(not _reader_good(reader), False): 22 | break 23 | 24 | c0 = _reader_get(reader) 25 | if c0 == b'*': 26 | seen_asterisk = True 27 | elif seen_asterisk: 28 | if c0 == b'/': 29 | return True 30 | seen_asterisk = False 31 | 32 | _raise_unclosed(b'comment', comment_start) 33 | return False 34 | 35 | 36 | # data found 37 | # -1: exhausted 38 | # -2: exception 39 | cdef int32_t _skip_to_data_sub(ReaderRef reader, uint32_t c0) except -2: 40 | cdef int32_t c1 = 0 # silence warning 41 | cdef boolean seen_slash 42 | 43 | seen_slash = False 44 | while True: 45 | if c0 == b'/': 46 | if seen_slash: 47 | _skip_single_line(reader) 48 | seen_slash = False 49 | else: 50 | seen_slash = True 51 | elif c0 == b'*': 52 | if expect(not seen_slash, False): 53 | _raise_stray_character('asterisk', _reader_tell(reader)) 54 | 55 | _skip_multiline_comment(reader) 56 | seen_slash = False 57 | elif not _is_ws_zs(c0): 58 | c1 = cast_to_int32(c0) 59 | break 60 | elif expect(seen_slash, False): 61 | _raise_stray_character('slash', _reader_tell(reader)) 62 | 63 | if not _reader_good(reader): 64 | c1 = -1 65 | break 66 | 67 | c0 = _reader_get(reader) 68 | 69 | if expect(seen_slash, False): 70 | _raise_stray_character('slash', _reader_tell(reader)) 71 | 72 | return c1 73 | 74 | 75 | # data found 76 | # -1 exhausted 77 | # -2 exception 78 | cdef int32_t _skip_to_data(ReaderRef reader) except -2: 79 | cdef uint32_t c0 80 | cdef int32_t c1 81 | if _reader_good(reader): 82 | c0 = _reader_get(reader) 83 | c1 = _skip_to_data_sub(reader, c0) 84 | else: 85 | c1 = -1 86 | return c1 87 | 88 | 89 | cdef int32_t _get_hex_character(ReaderRef reader, Py_ssize_t length) except -1: 90 | cdef Py_ssize_t start 91 | cdef uint32_t c0 92 | cdef uint32_t result 93 | cdef Py_ssize_t index 94 | 95 | start = _reader_tell(reader) 96 | result = 0 97 | for index in range(length): 98 | result <<= 4 99 | if expect(not _reader_good(reader), False): 100 | _raise_unclosed(b'escape sequence', start) 101 | 102 | c0 = _reader_get(reader) 103 | if b'0' <= c0 <= b'9': 104 | result |= c0 - b'0' 105 | elif b'a' <= c0 <= b'f': 106 | result |= c0 - b'a' + 10 107 | elif b'A' <= c0 <= b'F': 108 | result |= c0 - b'A' + 10 109 | else: 110 | _raise_expected_s('hexadecimal character', start, c0) 111 | 112 | if expect(result > 0x10ffff, False): 113 | _raise_expected_s('Unicode code point', start, result) 114 | 115 | return cast_to_int32(result) 116 | 117 | 118 | # >= 0: character to append 119 | cdef int32_t _get_escaped_unicode_maybe_surrogate(ReaderRef reader, Py_ssize_t start) except -1: 120 | cdef uint32_t c0 121 | cdef uint32_t c1 122 | 123 | c0 = cast_to_uint32(_get_hex_character(reader, 4)) 124 | if expect(unicode_is_lo_surrogate(c0), False): 125 | _raise_expected_s('high surrogate before low surrogate', start, c0) 126 | elif not unicode_is_hi_surrogate(c0): 127 | return c0 128 | 129 | _accept_string(reader, b'\\u') 130 | 131 | c1 = cast_to_uint32(_get_hex_character(reader, 4)) 132 | if expect(not unicode_is_lo_surrogate(c1), False): 133 | _raise_expected_s('low surrogate', start, c1) 134 | 135 | return unicode_join_surrogates(c0, c1) 136 | 137 | 138 | # >= 0: character to append 139 | # -1: skip 140 | # < -1: -(next character + 1) 141 | cdef int32_t _get_escape_sequence(ReaderRef reader, 142 | Py_ssize_t start) except 0x7ffffff: 143 | cdef uint32_t c0 144 | 145 | c0 = _reader_get(reader) 146 | if expect(not _reader_good(reader), False): 147 | _raise_unclosed(b'string', start) 148 | 149 | if c0 == b'b': 150 | return 0x0008 151 | elif c0 == b'f': 152 | return 0x000c 153 | elif c0 == b'n': 154 | return 0x000a 155 | elif c0 == b'r': 156 | return 0x000d 157 | elif c0 == b't': 158 | return 0x0009 159 | elif c0 == b'v': 160 | return 0x000b 161 | elif c0 == b'0': 162 | return 0x0000 163 | elif c0 == b'x': 164 | return _get_hex_character(reader, 2) 165 | elif c0 == b'u': 166 | return _get_escaped_unicode_maybe_surrogate(reader, start) 167 | elif c0 == b'U': 168 | return _get_hex_character(reader, 8) 169 | elif expect(b'1' <= c0 <= b'9', False): 170 | _raise_expected_s('escape sequence', start, c0) 171 | return -2 172 | elif _is_line_terminator(c0): 173 | if c0 != 0x000D: 174 | return -1 175 | 176 | c0 = _reader_get(reader) 177 | if c0 == 0x000A: 178 | return -1 179 | 180 | return -cast_to_int32(c0 + 1) 181 | else: 182 | return cast_to_int32(c0) 183 | 184 | 185 | cdef object _decode_string_sub(ReaderRef reader, uint32_t delim, 186 | Py_ssize_t start, uint32_t c0): 187 | cdef int32_t c1 188 | cdef StackHeapString[uint32_t] buf 189 | 190 | while True: 191 | if expect(c0 == delim, False): 192 | break 193 | 194 | if expect(not _reader_good(reader), False): 195 | _raise_unclosed(b'string', start) 196 | 197 | if expect(c0 != b'\\', True): 198 | if expect(c0 in (0xA, 0xD), False): 199 | _raise_unclosed(b'string', start) 200 | 201 | buf.push_back(c0) 202 | c0 = _reader_get(reader) 203 | continue 204 | 205 | c1 = _get_escape_sequence(reader, start) 206 | if c1 >= -1: 207 | if expect(not _reader_good(reader), False): 208 | _raise_unclosed(b'string', start) 209 | 210 | if c1 >= 0: 211 | c0 = cast_to_uint32(c1) 212 | buf.push_back(c0) 213 | 214 | c0 = _reader_get(reader) 215 | else: 216 | c0 = cast_to_uint32(-(c1 + 1)) 217 | 218 | return PyUnicode_FromKindAndData( 219 | PyUnicode_4BYTE_KIND, buf.data(), buf.size(), 220 | ) 221 | 222 | 223 | cdef object _decode_string(ReaderRef reader, int32_t *c_in_out): 224 | cdef uint32_t delim 225 | cdef uint32_t c0 226 | cdef int32_t c1 227 | cdef Py_ssize_t start 228 | cdef object result 229 | 230 | c1 = c_in_out[0] 231 | delim = cast_to_uint32(c1) 232 | start = _reader_tell(reader) 233 | 234 | if expect(not _reader_good(reader), False): 235 | _raise_unclosed(b'string', start) 236 | 237 | c0 = _reader_get(reader) 238 | result = _decode_string_sub(reader, delim, start, c0) 239 | 240 | c_in_out[0] = NO_EXTRA_DATA 241 | return result 242 | 243 | 244 | cdef object _decode_double(StackHeapString[char] &buf, Py_ssize_t start): 245 | cdef double d0 246 | cdef const char *end_of_double 247 | 248 | d0 = 0.0 # silence warning 249 | end_of_double = parse_number(buf.data(), &d0) 250 | if end_of_double != NULL and end_of_double[0] == b'\0': 251 | return PyFloat_FromDouble(d0) 252 | 253 | _raise_unclosed('NumericLiteral', start) 254 | 255 | 256 | cdef object _decode_number_leading_zero(ReaderRef reader, StackHeapString[char] &buf, 257 | int32_t *c_in_out, Py_ssize_t start): 258 | cdef uint32_t c0 259 | cdef int32_t c1 = 0 # silence warning 260 | 261 | if not _reader_good(reader): 262 | c_in_out[0] = -1 263 | return 0 264 | 265 | c0 = _reader_get(reader) 266 | if _is_x(c0): 267 | while True: 268 | if not _reader_good(reader): 269 | c1 = -1 270 | break 271 | 272 | c0 = _reader_get(reader) 273 | if _is_hexadecimal(c0): 274 | buf.push_back( c0) 275 | elif c0 != b'_': 276 | c1 = cast_to_int32(c0) 277 | break 278 | 279 | c_in_out[0] = c1 280 | 281 | buf.push_back(b'\0') 282 | try: 283 | return PyLong_FromString(buf.data(), NULL, 16) 284 | except Exception: 285 | _raise_unclosed('NumericLiteral', start) 286 | elif c0 == b'.': 287 | buf.push_back(b'0') 288 | buf.push_back(b'.') 289 | 290 | while True: 291 | if not _reader_good(reader): 292 | c1 = -1 293 | break 294 | 295 | c0 = _reader_get(reader) 296 | if _is_in_float_representation(c0): 297 | buf.push_back( c0) 298 | elif c0 != b'_': 299 | c1 = cast_to_int32(c0) 300 | break 301 | 302 | c_in_out[0] = c1 303 | 304 | if buf.data()[buf.size() - 1] == b'.': 305 | ( buf.data())[buf.size() - 1] = b'\0' 306 | else: 307 | buf.push_back(b'\0') 308 | 309 | return _decode_double(buf, start) 310 | elif _is_e(c0): 311 | while True: 312 | if not _reader_good(reader): 313 | c1 = -1 314 | break 315 | 316 | c0 = _reader_get(reader) 317 | if _is_in_float_representation(c0): 318 | pass 319 | elif c0 == b'_': 320 | pass 321 | else: 322 | c1 = cast_to_int32(c0) 323 | break 324 | 325 | c_in_out[0] = c1 326 | return 0.0 327 | else: 328 | c1 = cast_to_int32(c0) 329 | c_in_out[0] = c1 330 | return 0 331 | 332 | 333 | cdef object _decode_number_any(ReaderRef reader, StackHeapString[char] &buf, 334 | int32_t *c_in_out, Py_ssize_t start): 335 | cdef uint32_t c0 336 | cdef int32_t c1 337 | cdef boolean is_float = False 338 | cdef boolean was_point = False 339 | cdef boolean leading_point = False 340 | 341 | c1 = c_in_out[0] 342 | c0 = cast_to_uint32(c1) 343 | 344 | if c0 == b'.': 345 | buf.push_back(b'0') 346 | is_float = True 347 | leading_point = True 348 | 349 | while True: 350 | if _is_decimal(c0): 351 | pass 352 | elif _is_in_float_representation(c0): 353 | is_float = True 354 | elif c0 != b'_': 355 | c1 = cast_to_int32(c0) 356 | break 357 | 358 | if c0 == b'_': 359 | pass 360 | elif c0 != b'.': 361 | if was_point: 362 | was_point = False 363 | if not _is_e(c0): 364 | buf.push_back(b'.') 365 | buf.push_back( c0) 366 | elif not was_point: 367 | was_point = True 368 | else: 369 | _raise_unclosed('NumericLiteral', start) 370 | 371 | if not _reader_good(reader): 372 | c1 = -1 373 | break 374 | 375 | c0 = _reader_get(reader) 376 | 377 | c_in_out[0] = c1 378 | 379 | if leading_point and buf.size() == 1: # single '.' 380 | _raise_unclosed('NumericLiteral', start) 381 | 382 | buf.push_back(b'\0') 383 | 384 | if not is_float: 385 | try: 386 | return PyLong_FromString(buf.data(), NULL, 10) 387 | except Exception: 388 | pass 389 | _raise_unclosed('NumericLiteral', start) 390 | else: 391 | return _decode_double(buf, start) 392 | 393 | 394 | cdef object _decode_number(ReaderRef reader, int32_t *c_in_out): 395 | cdef uint32_t c0 396 | cdef int32_t c1 397 | cdef Py_ssize_t start = _reader_tell(reader) 398 | cdef StackHeapString[char] buf 399 | 400 | c1 = c_in_out[0] 401 | c0 = cast_to_uint32(c1) 402 | 403 | if c0 == b'+': 404 | if expect(not _reader_good(reader), False): 405 | _raise_unclosed(b'number', start) 406 | 407 | c0 = _reader_get(reader) 408 | if c0 == b'I': 409 | _accept_string(reader, b'nfinity') 410 | c_in_out[0] = NO_EXTRA_DATA 411 | return CONST_POS_INF 412 | elif c0 == b'N': 413 | _accept_string(reader, b'aN') 414 | c_in_out[0] = NO_EXTRA_DATA 415 | return CONST_POS_NAN 416 | elif c0 == b'-': 417 | if expect(not _reader_good(reader), False): 418 | _raise_unclosed(b'number', start) 419 | 420 | c0 = _reader_get(reader) 421 | if c0 == b'I': 422 | _accept_string(reader, b'nfinity') 423 | c_in_out[0] = NO_EXTRA_DATA 424 | return CONST_NEG_INF 425 | elif c0 == b'N': 426 | _accept_string(reader, b'aN') 427 | c_in_out[0] = NO_EXTRA_DATA 428 | return CONST_NEG_NAN 429 | 430 | buf.push_back(b'-') 431 | 432 | if c0 == b'0': 433 | return _decode_number_leading_zero(reader, buf, c_in_out, start) 434 | else: 435 | c1 = cast_to_int32(c0) 436 | c_in_out[0] = c1 437 | return _decode_number_any(reader, buf, c_in_out, start) 438 | 439 | 440 | # 1: done 441 | # 0: data found 442 | # -1: exception (exhausted) 443 | cdef uint32_t _skip_comma(ReaderRef reader, Py_ssize_t start, 444 | uint32_t terminator, const char *what, 445 | int32_t *c_in_out) except -1: 446 | cdef int32_t c0 447 | cdef uint32_t c1 448 | cdef boolean needs_comma 449 | cdef uint32_t done 450 | 451 | c0 = c_in_out[0] 452 | c1 = cast_to_uint32(c0) 453 | 454 | needs_comma = True 455 | while True: 456 | c0 = _skip_to_data_sub(reader, c1) 457 | if c0 < 0: 458 | break 459 | 460 | c1 = cast_to_uint32(c0) 461 | if c1 == terminator: 462 | c_in_out[0] = NO_EXTRA_DATA 463 | return 1 464 | 465 | if c1 != b',': 466 | if expect(needs_comma, False): 467 | _raise_expected_sc( 468 | 'comma', terminator, _reader_tell(reader), c1, 469 | ) 470 | c_in_out[0] = c0 471 | return 0 472 | 473 | if expect(not needs_comma, False): 474 | _raise_stray_character('comma', _reader_tell(reader)) 475 | 476 | if expect(not _reader_good(reader), False): 477 | break 478 | 479 | c1 = _reader_get(reader) 480 | needs_comma = False 481 | 482 | _raise_unclosed(what, start) 483 | return -1 484 | 485 | 486 | cdef unicode _decode_identifier_name(ReaderRef reader, int32_t *c_in_out): 487 | cdef int32_t c0 488 | cdef uint32_t c1 489 | cdef Py_ssize_t start 490 | cdef StackHeapString[uint32_t] buf 491 | 492 | start = _reader_tell(reader) 493 | 494 | c0 = c_in_out[0] 495 | c1 = cast_to_uint32(c0) 496 | if expect(not _is_identifier_start(c1), False): 497 | _raise_expected_s('IdentifierStart', _reader_tell(reader), c1) 498 | 499 | while True: 500 | if expect(c1 == b'\\', False): 501 | if not _reader_good(reader): 502 | _raise_unclosed('IdentifierName', start) 503 | break 504 | 505 | c1 = _reader_get(reader) 506 | if c1 == b'u': 507 | c1 = cast_to_uint32(_get_escaped_unicode_maybe_surrogate(reader, _reader_tell(reader))) 508 | elif c1 == b'U': 509 | c1 = cast_to_uint32(_get_hex_character(reader, 8)) 510 | else: 511 | _raise_expected_s('UnicodeEscapeSequence', _reader_tell(reader), c1) 512 | 513 | buf.push_back(c1) 514 | 515 | if not _reader_good(reader): 516 | c0 = -1 517 | break 518 | 519 | c1 = _reader_get(reader) 520 | if not _is_identifier_part(c1): 521 | c0 = cast_to_int32(c1) 522 | break 523 | 524 | c_in_out[0] = c0 525 | return PyUnicode_FromKindAndData( 526 | PyUnicode_4BYTE_KIND, buf.data(), buf.size(), 527 | ) 528 | 529 | 530 | cdef boolean _decode_object(ReaderRef reader, object result) except False: 531 | cdef int32_t c0 532 | cdef uint32_t c1 533 | cdef Py_ssize_t start 534 | cdef boolean done 535 | cdef object key 536 | cdef object value 537 | cdef object ex 538 | 539 | start = _reader_tell(reader) 540 | 541 | c0 = _skip_to_data(reader) 542 | if expect(c0 >= 0, True): 543 | c1 = cast_to_uint32(c0) 544 | if c1 == b'}': 545 | return True 546 | 547 | while True: 548 | if c1 in b'"\'': 549 | key = _decode_string(reader, &c0) 550 | else: 551 | key = _decode_identifier_name(reader, &c0) 552 | if expect(c0 < 0, False): 553 | break 554 | 555 | c1 = cast_to_uint32(c0) 556 | c0 = _skip_to_data_sub(reader, c1) 557 | if expect(c0 < 0, False): 558 | break 559 | 560 | c1 = cast_to_uint32(c0) 561 | if expect(c1 != b':', False): 562 | _raise_expected_s('colon', _reader_tell(reader), c1) 563 | 564 | if expect(not _reader_good(reader), False): 565 | break 566 | 567 | c0 = _skip_to_data(reader) 568 | if expect(c0 < 0, False): 569 | break 570 | 571 | try: 572 | value = _decode_recursive(reader, &c0) 573 | except _DecoderException as ex: 574 | PyDict_SetItem(result, key, (<_DecoderException> ex).result) 575 | raise 576 | 577 | if expect(c0 < 0, False): 578 | break 579 | 580 | PyDict_SetItem(result, key, value) 581 | 582 | done = _skip_comma( 583 | reader, start, b'}', b'object', &c0, 584 | ) 585 | if done: 586 | return True 587 | 588 | c1 = cast_to_uint32(c0) 589 | 590 | _raise_unclosed(b'object', start) 591 | return False 592 | 593 | 594 | cdef boolean _decode_array(ReaderRef reader, object result) except False: 595 | cdef int32_t c0 596 | cdef uint32_t c1 597 | cdef Py_ssize_t start 598 | cdef boolean done 599 | cdef object value 600 | cdef object ex 601 | 602 | start = _reader_tell(reader) 603 | 604 | c0 = _skip_to_data(reader) 605 | if expect(c0 >= 0, True): 606 | c1 = cast_to_uint32(c0) 607 | if c1 == b']': 608 | return True 609 | 610 | while True: 611 | try: 612 | value = _decode_recursive(reader, &c0) 613 | except _DecoderException as ex: 614 | PyList_Append(result, (<_DecoderException> ex).result) 615 | raise 616 | 617 | if expect(c0 < 0, False): 618 | break 619 | 620 | PyList_Append(result, value) 621 | 622 | done = _skip_comma( 623 | reader, start, b']', b'array', &c0, 624 | ) 625 | if done: 626 | return True 627 | 628 | _raise_unclosed(b'array', start) 629 | 630 | 631 | cdef boolean _accept_string(ReaderRef reader, const char *string) except False: 632 | cdef uint32_t c0 633 | cdef uint32_t c1 634 | cdef Py_ssize_t start 635 | 636 | start = _reader_tell(reader) 637 | while True: 638 | c0 = string[0] 639 | string += 1 640 | if not c0: 641 | break 642 | 643 | if expect(not _reader_good(reader), False): 644 | _raise_unclosed(b'literal', start) 645 | 646 | c1 = _reader_get(reader) 647 | if expect(c0 != c1, False): 648 | _raise_expected_c(c0, start, c1) 649 | 650 | return True 651 | 652 | 653 | cdef object _decode_null(ReaderRef reader, int32_t *c_in_out): 654 | # n 655 | _accept_string(reader, b'ull') 656 | c_in_out[0] = NO_EXTRA_DATA 657 | return None 658 | 659 | 660 | cdef object _decode_true(ReaderRef reader, int32_t *c_in_out): 661 | # t 662 | _accept_string(reader, b'rue') 663 | c_in_out[0] = NO_EXTRA_DATA 664 | return True 665 | 666 | 667 | cdef object _decode_false(ReaderRef reader, int32_t *c_in_out): 668 | # f 669 | _accept_string(reader, b'alse') 670 | c_in_out[0] = NO_EXTRA_DATA 671 | return False 672 | 673 | 674 | cdef object _decode_inf(ReaderRef reader, int32_t *c_in_out): 675 | # I 676 | _accept_string(reader, b'nfinity') 677 | c_in_out[0] = NO_EXTRA_DATA 678 | return CONST_POS_INF 679 | 680 | 681 | cdef object _decode_nan(ReaderRef reader, int32_t *c_in_out): 682 | # N 683 | _accept_string(reader, b'aN') 684 | c_in_out[0] = NO_EXTRA_DATA 685 | return CONST_POS_NAN 686 | 687 | 688 | cdef object _decode_recursive_enter(ReaderRef reader, int32_t *c_in_out): 689 | cdef boolean (*fn)(ReaderRef reader, object result) except False 690 | cdef object result 691 | cdef int32_t c0 692 | cdef uint32_t c1 693 | cdef object ex 694 | 695 | c0 = c_in_out[0] 696 | c1 = cast_to_uint32(c0) 697 | 698 | if c1 == b'{': 699 | result = {} 700 | fn = _decode_object 701 | else: 702 | result = [] 703 | fn = _decode_array 704 | 705 | _reader_enter(reader) 706 | try: 707 | fn(reader, result) 708 | except RecursionError: 709 | _raise_nesting(_reader_tell(reader), result) 710 | except _DecoderException as ex: 711 | (<_DecoderException> ex).result = result 712 | raise 713 | finally: 714 | _reader_leave(reader) 715 | 716 | c_in_out[0] = NO_EXTRA_DATA 717 | return result 718 | 719 | 720 | cdef object _decoder_unknown(ReaderRef reader, int32_t *c_in_out): 721 | cdef int32_t c0 722 | cdef uint32_t c1 723 | cdef Py_ssize_t start 724 | 725 | c0 = c_in_out[0] 726 | c1 = cast_to_uint32(c0) 727 | start = _reader_tell(reader) 728 | 729 | _raise_expected_s('JSON5Value', start, c1) 730 | 731 | 732 | cdef object _decode_recursive(ReaderRef reader, int32_t *c_in_out): 733 | cdef int32_t c0 734 | cdef uint32_t c1 735 | cdef Py_ssize_t start 736 | cdef DrsKind kind 737 | cdef object (*decoder)(ReaderRef, int32_t*) 738 | 739 | c0 = c_in_out[0] 740 | c1 = cast_to_uint32(c0) 741 | if c1 >= 128: 742 | start = _reader_tell(reader) 743 | _raise_expected_s('JSON5Value', start, c1) 744 | 745 | kind = drs_lookup[c1] 746 | if kind == DRS_fail: 747 | decoder = _decoder_unknown 748 | elif kind == DRS_null: 749 | decoder = _decode_null 750 | elif kind == DRS_true: 751 | decoder = _decode_true 752 | elif kind == DRS_false: 753 | decoder = _decode_false 754 | elif kind == DRS_inf: 755 | decoder = _decode_inf 756 | elif kind == DRS_nan: 757 | decoder = _decode_nan 758 | elif kind == DRS_string: 759 | decoder = _decode_string 760 | elif kind == DRS_number: 761 | decoder = _decode_number 762 | elif kind == DRS_recursive: 763 | decoder = _decode_recursive_enter 764 | else: 765 | unreachable() 766 | decoder = _decoder_unknown 767 | 768 | return decoder(reader, c_in_out) 769 | 770 | 771 | cdef object _decode_all_sub(ReaderRef reader, boolean some): 772 | cdef Py_ssize_t start 773 | cdef int32_t c0 774 | cdef uint32_t c1 775 | cdef object result 776 | cdef object ex 777 | 778 | start = _reader_tell(reader) 779 | c0 = _skip_to_data(reader) 780 | if expect(c0 < 0, False): 781 | _raise_no_data(start) 782 | 783 | result = _decode_recursive(reader, &c0) 784 | try: 785 | if c0 < 0: 786 | pass 787 | elif not some: 788 | start = _reader_tell(reader) 789 | c1 = cast_to_uint32(c0) 790 | c0 = _skip_to_data_sub(reader, c1) 791 | if expect(c0 >= 0, False): 792 | c1 = cast_to_uint32(c0) 793 | _raise_extra_data(c1, start) 794 | elif expect(not _is_ws_zs(c0), False): 795 | start = _reader_tell(reader) 796 | c1 = cast_to_uint32(c0) 797 | _raise_unframed_data(c1, start) 798 | except _DecoderException as ex: 799 | (<_DecoderException> ex).result = result 800 | raise 801 | 802 | return result 803 | 804 | 805 | cdef object _decode_all(ReaderRef reader, boolean some): 806 | cdef object ex, ex2 807 | try: 808 | return _decode_all_sub(reader, some) 809 | except _DecoderException as ex: 810 | ex2 = (<_DecoderException> ex).cls( 811 | (<_DecoderException> ex).msg, 812 | (<_DecoderException> ex).result, 813 | (<_DecoderException> ex).extra, 814 | ) 815 | raise ex2 816 | 817 | 818 | cdef object _decode_ucs1(const void *string, Py_ssize_t length, 819 | Py_ssize_t maxdepth, boolean some): 820 | cdef ReaderUCS1 reader = ReaderUCS1( 821 | ReaderUCS(length, 0, maxdepth), 822 | string, 823 | ) 824 | return _decode_all(reader, some) 825 | 826 | 827 | cdef object _decode_ucs2(const void *string, Py_ssize_t length, 828 | Py_ssize_t maxdepth, boolean some): 829 | cdef ReaderUCS2 reader = ReaderUCS2( 830 | ReaderUCS(length, 0, maxdepth), 831 | string, 832 | ) 833 | return _decode_all(reader, some) 834 | 835 | 836 | cdef object _decode_ucs4(const void *string, Py_ssize_t length, 837 | Py_ssize_t maxdepth, boolean some): 838 | cdef ReaderUCS4 reader = ReaderUCS4( 839 | ReaderUCS(length, 0, maxdepth), 840 | string, 841 | ) 842 | return _decode_all(reader, some) 843 | 844 | 845 | cdef object _decode_utf8(const void *string, Py_ssize_t length, 846 | Py_ssize_t maxdepth, boolean some): 847 | cdef ReaderUTF8 reader = ReaderUTF8( 848 | ReaderUCS(length, 0, maxdepth), 849 | string, 850 | ) 851 | return _decode_all(reader, some) 852 | 853 | 854 | cdef object _decode_unicode(object data, Py_ssize_t maxdepth, boolean some): 855 | cdef Py_ssize_t length 856 | cdef int kind 857 | cdef const char *s 858 | 859 | PyUnicode_READY(data) 860 | 861 | if CYTHON_COMPILING_IN_PYPY: 862 | length = 0 863 | s = PyUnicode_AsUTF8AndSize(data, &length) 864 | return _decode_utf8(s, length, maxdepth, some) 865 | 866 | length = PyUnicode_GET_LENGTH(data) 867 | kind = PyUnicode_KIND(data) 868 | 869 | if kind == PyUnicode_1BYTE_KIND: 870 | return _decode_ucs1(PyUnicode_1BYTE_DATA(data), length, maxdepth, some) 871 | elif kind == PyUnicode_2BYTE_KIND: 872 | return _decode_ucs2(PyUnicode_2BYTE_DATA(data), length, maxdepth, some) 873 | elif kind == PyUnicode_4BYTE_KIND: 874 | return _decode_ucs4(PyUnicode_4BYTE_DATA(data), length, maxdepth, some) 875 | else: 876 | unreachable() 877 | 878 | 879 | cdef object _decode_buffer(Py_buffer &view, int32_t wordlength, 880 | Py_ssize_t maxdepth, boolean some): 881 | cdef object (*decoder)(const void*, Py_ssize_t, Py_ssize_t, boolean) 882 | cdef Py_ssize_t length = 0 883 | 884 | if wordlength == 0: 885 | decoder = _decode_utf8 886 | length = view.len // 1 887 | elif wordlength == 1: 888 | decoder = _decode_ucs1 889 | length = view.len // 1 890 | elif wordlength == 2: 891 | decoder = _decode_ucs2 892 | length = view.len // 2 893 | elif wordlength == 4: 894 | decoder = _decode_ucs4 895 | length = view.len // 4 896 | else: 897 | _raise_illegal_wordlength(wordlength) 898 | unreachable() 899 | length = 0 900 | decoder = NULL 901 | 902 | return decoder(view.buf, length, maxdepth, some) 903 | 904 | 905 | cdef object _decode_callback(object cb, object args, Py_ssize_t maxdepth, 906 | boolean some): 907 | cdef ReaderCallback reader = ReaderCallback( 908 | ReaderCallbackBase(0, maxdepth), 909 | cb, 910 | args, 911 | -1, 912 | ) 913 | return _decode_all(reader, some) 914 | -------------------------------------------------------------------------------- /src/_decoder_recursive_select.hpp: -------------------------------------------------------------------------------- 1 | #ifndef JSON5EncoderCpp_decoder_recursive_select 2 | #define JSON5EncoderCpp_decoder_recursive_select 3 | 4 | // GENERATED FILE 5 | // All changes will be lost. 6 | 7 | #include 8 | 9 | namespace JSON5EncoderCpp { 10 | inline namespace { 11 | 12 | enum DrsKind : std::uint8_t { 13 | DRS_fail, DRS_null, DRS_true, DRS_false, DRS_inf, DRS_nan, DRS_string, DRS_number, DRS_recursive 14 | }; 15 | 16 | static const DrsKind drs_lookup[128] = { 17 | DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_fail, 18 | DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_fail, 19 | DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_fail, 20 | DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_fail, 21 | DRS_fail, DRS_fail, DRS_string, DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_string, 22 | DRS_fail, DRS_fail, DRS_fail, DRS_number, DRS_fail, DRS_number, DRS_number, DRS_fail, 23 | DRS_number, DRS_number, DRS_number, DRS_number, DRS_number, DRS_number, DRS_number, DRS_number, 24 | DRS_number, DRS_number, DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_fail, 25 | DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_fail, 26 | DRS_fail, DRS_inf, DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_nan, DRS_fail, 27 | DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_fail, 28 | DRS_fail, DRS_fail, DRS_fail, DRS_recursive, DRS_fail, DRS_fail, DRS_fail, DRS_fail, 29 | DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_false, DRS_fail, 30 | DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_null, DRS_fail, 31 | DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_true, DRS_fail, DRS_fail, DRS_fail, 32 | DRS_fail, DRS_fail, DRS_fail, DRS_recursive, DRS_fail, DRS_fail, DRS_fail, DRS_fail, 33 | }; 34 | 35 | } // anonymous inline namespace 36 | } // namespace JSON5EncoderCpp 37 | 38 | #endif 39 | -------------------------------------------------------------------------------- /src/_encoder.pyx: -------------------------------------------------------------------------------- 1 | cdef int _encode_unicode_impl(WriterRef writer, UCSString data, Py_ssize_t length) except -1: 2 | cdef char buf[32] 3 | cdef uint32_t c 4 | cdef uint32_t s1, s2 5 | cdef const char *escaped_string 6 | cdef Py_ssize_t escaped_length 7 | cdef size_t unescaped_length, index 8 | cdef Py_ssize_t sublength 9 | 10 | if length > 0: 11 | writer.reserve(writer, 2 + length) 12 | writer.append_c(writer, PyUnicode_1BYTE_DATA(( writer.options).quotationmark)[0]) 13 | while True: 14 | if UCSString is UCS1String: 15 | sublength = length 16 | else: 17 | sublength = min(length, sizeof(buf)) 18 | 19 | unescaped_length = ESCAPE_DCT.find_unescaped_range(data, sublength) 20 | if unescaped_length > 0: 21 | if UCSString is UCS1String: 22 | writer.append_s(writer, data, unescaped_length) 23 | else: 24 | for index in range(unescaped_length): 25 | buf[index] = data[index] 26 | writer.append_s(writer, buf, unescaped_length) 27 | 28 | data += unescaped_length 29 | length -= unescaped_length 30 | if length <= 0: 31 | break 32 | 33 | if UCSString is not UCS1String: 34 | continue 35 | 36 | c = data[0] 37 | if (UCSString is UCS1String) or (c < 0x100): 38 | escaped_length = ESCAPE_DCT.items[c][0] 39 | escaped_string = &ESCAPE_DCT.items[c][1] 40 | writer.append_s(writer, escaped_string, escaped_length) 41 | elif (UCSString is UCS2String) or (c <= 0xffff): 42 | buf[0] = b'\\'; 43 | buf[1] = b'u'; 44 | buf[2] = HEX[(c >> (4*3)) & 0xf]; 45 | buf[3] = HEX[(c >> (4*2)) & 0xf]; 46 | buf[4] = HEX[(c >> (4*1)) & 0xf]; 47 | buf[5] = HEX[(c >> (4*0)) & 0xf]; 48 | buf[6] = 0; 49 | 50 | writer.append_s(writer, buf, 6); 51 | else: 52 | # surrogate pair 53 | c -= 0x10000 54 | s1 = 0xd800 | ((c >> 10) & 0x3ff) 55 | s2 = 0xdc00 | (c & 0x3ff) 56 | 57 | buf[0x0] = b'\\'; 58 | buf[0x1] = b'u'; 59 | buf[0x2] = HEX[(s1 >> (4*3)) & 0xf]; 60 | buf[0x3] = HEX[(s1 >> (4*2)) & 0xf]; 61 | buf[0x4] = HEX[(s1 >> (4*1)) & 0xf]; 62 | buf[0x5] = HEX[(s1 >> (4*0)) & 0xf]; 63 | 64 | buf[0x6] = b'\\'; 65 | buf[0x7] = b'u'; 66 | buf[0x8] = HEX[(s2 >> (4*3)) & 0xf]; 67 | buf[0x9] = HEX[(s2 >> (4*2)) & 0xf]; 68 | buf[0xa] = HEX[(s2 >> (4*1)) & 0xf]; 69 | buf[0xb] = HEX[(s2 >> (4*0)) & 0xf]; 70 | 71 | buf[0xc] = 0; 72 | 73 | writer.append_s(writer, buf, 12); 74 | 75 | data += 1 76 | length -= 1 77 | if length <= 0: 78 | break 79 | writer.append_c(writer, PyUnicode_1BYTE_DATA(( writer.options).quotationmark)[0]) 80 | else: 81 | writer.append_s(writer, b'""', 2) 82 | 83 | return True 84 | 85 | 86 | cdef int _encode_unicode(WriterRef writer, object data) except -1: 87 | cdef Py_ssize_t length 88 | cdef int kind 89 | 90 | PyUnicode_READY(data) 91 | 92 | length = PyUnicode_GET_LENGTH(data) 93 | kind = PyUnicode_KIND(data) 94 | 95 | if kind == PyUnicode_1BYTE_KIND: 96 | _encode_unicode_impl(writer, PyUnicode_1BYTE_DATA(data), length) 97 | elif kind == PyUnicode_2BYTE_KIND: 98 | _encode_unicode_impl(writer, PyUnicode_2BYTE_DATA(data), length) 99 | elif kind == PyUnicode_4BYTE_KIND: 100 | _encode_unicode_impl(writer, PyUnicode_4BYTE_DATA(data), length) 101 | else: 102 | pass # impossible 103 | 104 | return True 105 | 106 | 107 | cdef int _encode_nested_key(WriterRef writer, object data) except -1: 108 | cdef const char *string 109 | cdef char c 110 | cdef Py_ssize_t index, length 111 | cdef int result 112 | 113 | cdef WriterReallocatable sub_writer = WriterReallocatable( 114 | Writer( 115 | _WriterReallocatable_reserve, 116 | _WriterReallocatable_append_c, 117 | _WriterReallocatable_append_s, 118 | writer.options, 119 | ), 120 | 0, 0, NULL, 121 | ) 122 | try: 123 | result = _encode(sub_writer.base, data) 124 | if expect(result < 0, False): 125 | return result 126 | 127 | length = sub_writer.position 128 | string = sub_writer.obj 129 | 130 | writer.reserve(writer, 2 + length) 131 | writer.append_c(writer, PyUnicode_1BYTE_DATA(( writer.options).quotationmark)[0]) 132 | for index in range(length): 133 | c = string[index] 134 | if c in b'\\"': 135 | writer.append_c(writer, b'\\') 136 | writer.append_c(writer, c) 137 | writer.append_c(writer, PyUnicode_1BYTE_DATA(( writer.options).quotationmark)[0]) 138 | finally: 139 | if sub_writer.obj is not NULL: 140 | ObjectFree(sub_writer.obj) 141 | 142 | return True 143 | 144 | 145 | cdef int _append_ascii(WriterRef writer, object data) except -1: 146 | cdef Py_buffer view 147 | cdef const char *buf 148 | cdef Py_ssize_t index 149 | cdef unsigned char c 150 | 151 | if PyUnicode_Check(data): 152 | PyUnicode_READY(data) 153 | if not PyUnicode_IS_ASCII(data): 154 | raise TypeError('Expected ASCII data') 155 | writer.append_s(writer, PyUnicode_1BYTE_DATA(data), PyUnicode_GET_LENGTH(data)) 156 | else: 157 | PyObject_GetBuffer(data, &view, PyBUF_CONTIG_RO) 158 | try: 159 | buf = view.buf 160 | for index in range(view.len): 161 | c = buf[index] 162 | if c & ~0x7f: 163 | raise TypeError('Expected ASCII data') 164 | 165 | writer.append_s(writer, buf, view.len) 166 | finally: 167 | PyBuffer_Release(&view) 168 | 169 | return True 170 | 171 | 172 | cdef int _encode_tojson(WriterRef writer, object data) except -1: 173 | cdef object value = getattr(data, ( writer.options).tojson, None) 174 | if value is None: 175 | return False 176 | 177 | if callable(value): 178 | Py_EnterRecursiveCall(' while encoding nested JSON5 object') 179 | try: 180 | value = value() 181 | finally: 182 | Py_LeaveRecursiveCall() 183 | 184 | _append_ascii(writer, value) 185 | return True 186 | 187 | 188 | cdef int _encode_sequence(WriterRef writer, object data) except -1: 189 | cdef boolean first 190 | cdef object iterator 191 | cdef object value 192 | cdef int result 193 | 194 | try: 195 | iterator = PyObject_GetIter(data) 196 | except TypeError: 197 | return False 198 | 199 | Py_EnterRecursiveCall(' while encoding nested JSON5 object') 200 | try: 201 | writer.append_c(writer, b'[') 202 | first = True 203 | value = None 204 | while iter_next(iterator, & value): 205 | if not first: 206 | writer.append_c(writer, b',') 207 | else: 208 | first = False 209 | 210 | result = _encode(writer, value) 211 | if expect(result < 0, False): 212 | return result 213 | writer.append_c(writer, b']') 214 | finally: 215 | Py_LeaveRecursiveCall() 216 | 217 | return True 218 | 219 | 220 | cdef int _encode_mapping(WriterRef writer, object data) except -1: 221 | cdef boolean first 222 | cdef object iterator, key, value 223 | cdef int result 224 | 225 | if not isinstance(data, ( writer.options).mappingtypes): 226 | return False 227 | 228 | iterator = PyObject_GetIter(data) 229 | 230 | Py_EnterRecursiveCall(' while encoding nested JSON5 object') 231 | try: 232 | writer.append_c(writer, b'{') 233 | first = True 234 | key = None 235 | while iter_next(iterator, & key): 236 | if not first: 237 | writer.append_c(writer, b',') 238 | else: 239 | first = False 240 | value = data[key] 241 | 242 | if PyUnicode_Check(key): 243 | _encode_unicode(writer, key) 244 | else: 245 | _encode_nested_key(writer, key) 246 | 247 | writer.append_c(writer, b':') 248 | 249 | result = _encode(writer, value) 250 | if expect(result < 0, False): 251 | return result 252 | writer.append_c(writer, b'}') 253 | finally: 254 | Py_LeaveRecursiveCall() 255 | 256 | return True 257 | 258 | 259 | cdef int _encode_none(WriterRef writer, object data) except -1: 260 | writer.append_s(writer, b'null', 4) 261 | return True 262 | 263 | 264 | cdef int _encode_bytes(WriterRef writer, object data) except -1: 265 | _encode_unicode(writer, PyUnicode_FromEncodedObject(data, 'UTF-8', 'strict')) 266 | return True 267 | 268 | 269 | cdef int _encode_datetime(WriterRef writer, object data) except -1: 270 | cdef object stringified 271 | cdef Py_ssize_t length 272 | cdef const char *string 273 | 274 | if not isinstance(data, DATETIME_CLASSES): 275 | return False 276 | 277 | stringified = data.isoformat() 278 | length = 0 279 | string = PyUnicode_AsUTF8AndSize(stringified, &length) 280 | 281 | writer.reserve(writer, 2 + length) 282 | writer.append_c(writer, PyUnicode_1BYTE_DATA(( writer.options).quotationmark)[0]) 283 | writer.append_s(writer, string, length) 284 | writer.append_c(writer, PyUnicode_1BYTE_DATA(( writer.options).quotationmark)[0]) 285 | 286 | return True 287 | 288 | 289 | cdef int _encode_format_string(WriterRef writer, object data, object fmt) except -1: 290 | cdef object formatted 291 | cdef const char *string 292 | cdef Py_ssize_t length = 0 # silence warning 293 | 294 | formatted = PyUnicode_Format(fmt, data) 295 | string = PyUnicode_AsUTF8AndSize(formatted, &length) 296 | writer.append_s(writer, string, length) 297 | 298 | return True 299 | 300 | 301 | cdef int _encode_float(WriterRef writer, object data) except -1: 302 | cdef double value = PyFloat_AsDouble(data) 303 | cdef int classification = fpclassify(value) 304 | cdef char buf[64] 305 | cdef char *end 306 | cdef char *string 307 | cdef Py_ssize_t length 308 | 309 | if classification == FP_NORMAL: 310 | end = Dtoa(buf, PyFloat_AsDouble(data)) 311 | length = end - buf 312 | string = buf 313 | elif classification in (FP_SUBNORMAL, FP_ZERO): 314 | string = b'0.0' 315 | length = 3 316 | elif classification == FP_NAN: 317 | string = b'NaN' 318 | length = 3 319 | else: 320 | # classification == FP_INFINITE 321 | if value > 0.0: 322 | string = b'Infinity' 323 | length = 8 324 | else: 325 | string = b'-Infinity' 326 | length = 9 327 | 328 | writer.append_s(writer, string, length) 329 | return True 330 | 331 | 332 | cdef int _encode_long(WriterRef writer, object data) except -1: 333 | if PyBool_Check(data): 334 | if data is True: 335 | writer.append_s(writer, 'true', 4) 336 | else: 337 | writer.append_s(writer, 'false', 5) 338 | else: 339 | _encode_format_string(writer, data, DEFAULT_INTFORMAT) 340 | return True 341 | 342 | 343 | cdef int _encode_decimal(WriterRef writer, object data) except -1: 344 | if not isinstance(data, Decimal): 345 | return False 346 | 347 | _encode_format_string(writer, data, DEFAULT_DECIMALFORMAT) 348 | return True 349 | 350 | 351 | cdef int _encode_unstringifiable(WriterRef writer, object data) except -1: 352 | if not data: 353 | writer.append_s(writer, b'none', 4) 354 | return True 355 | 356 | Py_EnterRecursiveCall(' while encoding JSON5 object with vars(obj) fallback') 357 | try: 358 | try: 359 | data = PyObject_GenericGetDict(data, NULL) 360 | except: 361 | pass 362 | else: 363 | if _encode_mapping(writer, data): 364 | return True 365 | finally: 366 | Py_LeaveRecursiveCall() 367 | 368 | _raise_unstringifiable(data) 369 | 370 | 371 | cdef int _encode_other(WriterRef writer, object data): 372 | cdef int result = 0 373 | 374 | while True: 375 | if ( writer.options).tojson is not None: 376 | result = ( _encode_tojson)(writer, data) 377 | if result != 0: 378 | break 379 | 380 | if obj_has_iter(data): 381 | result = ( _encode_mapping)(writer, data) 382 | if result != 0: 383 | break 384 | 385 | result = ( _encode_sequence)(writer, data) 386 | if result != 0: 387 | break 388 | 389 | result = ( _encode_decimal)(writer, data) 390 | if result != 0: 391 | break 392 | 393 | result = ( _encode_datetime)(writer, data) 394 | if result != 0: 395 | break 396 | 397 | result = ( _encode_unstringifiable)(writer, data) 398 | if result != 0: 399 | break 400 | 401 | break 402 | 403 | return result 404 | 405 | 406 | cdef int _encode(WriterRef writer, object data): 407 | cdef int (*encoder)(WriterRef, object) 408 | 409 | if data is None: 410 | encoder = _encode_none 411 | elif PyUnicode_Check(data): 412 | encoder = _encode_unicode 413 | elif PyLong_Check(data): 414 | encoder = _encode_long 415 | elif PyFloat_Check(data): 416 | encoder = _encode_float 417 | elif PyBytes_Check(data): 418 | encoder = _encode_bytes 419 | else: 420 | encoder = _encode_other 421 | 422 | return encoder(writer, data) 423 | 424 | 425 | cdef int _encode_callback_bytes(object data, object cb, object options) except -1: 426 | cdef WriterCallback writer = WriterCallback( 427 | Writer( 428 | _WriterNoop_reserve, 429 | _WriterCbBytes_append_c, 430 | _WriterCbBytes_append_s, 431 | options, 432 | ), 433 | cb, 434 | ) 435 | 436 | if expect(not callable(cb), False): 437 | raise TypeError(f'type(cb)=={type(cb)!r} is not callable') 438 | 439 | return _encode(writer.base, data) 440 | 441 | 442 | cdef int _encode_callback_str(object data, object cb, object options) except -1: 443 | cdef WriterCallback writer = WriterCallback( 444 | Writer( 445 | _WriterNoop_reserve, 446 | _WriterCbStr_append_c, 447 | _WriterCbStr_append_s, 448 | options, 449 | ), 450 | cb, 451 | ) 452 | 453 | if expect(not callable(cb), False): 454 | raise TypeError(f'type(cb)=={type(cb)!r} is not callable') 455 | 456 | return _encode(writer.base, data) 457 | -------------------------------------------------------------------------------- /src/_encoder_options.pyx: -------------------------------------------------------------------------------- 1 | cdef object DEFAULT_TOJSON = False 2 | cdef object DEFAULT_INTFORMAT = '%d' 3 | cdef object DEFAULT_DECIMALFORMAT = '%s' 4 | cdef object DEFAULT_MAPPINGTYPES = (Mapping,) 5 | cdef object DEFAULT_QUOTATIONMARK = '"' 6 | 7 | 8 | cdef object _options_ascii(object datum, boolean expect_ascii=True): 9 | if datum is False: 10 | return None 11 | elif PyBytes_Check(datum): 12 | datum = unicode(datum, 'UTF-8', 'strict') 13 | elif not PyUnicode_Check(datum): 14 | raise TypeError('Expected str instance or False') 15 | 16 | PyUnicode_READY(datum) 17 | if expect_ascii and not PyUnicode_IS_ASCII(datum): 18 | raise ValueError('Expected ASCII data') 19 | 20 | return datum 21 | 22 | 23 | cdef object _options_reduce_arg(object key, object value, object default): 24 | if value != default: 25 | if value is not None: 26 | return key, value 27 | else: 28 | return key, False 29 | 30 | 31 | cdef object _option_from_ascii(object name, object value, object default): 32 | if value == default: 33 | return 34 | elif value is None: 35 | return f'{name}=False' 36 | else: 37 | return f'{name}={value!r}' 38 | 39 | 40 | cdef _options_from_ascii(Options self): 41 | return ', '.join(filter(bool, ( 42 | _option_from_ascii('quotationmark', self.quotationmark, DEFAULT_QUOTATIONMARK), 43 | _option_from_ascii('tojson', self.tojson, None), 44 | ))) 45 | 46 | 47 | @final 48 | @no_gc 49 | @freelist(8) 50 | @auto_pickle(False) 51 | cdef class Options: 52 | ''' 53 | Customizations for the :func:`encoder_*(...) ` function family. 54 | 55 | Immutable. Use :meth:`Options.update(**kw) ` to create a **new** Options instance. 56 | 57 | Parameters 58 | ---------- 59 | quotationmark : str|None 60 | * **str**: One character string that is used to surround strings. 61 | * **None**: Use default: ``'"'``. 62 | tojson : str|False|None 63 | * **str:** A special method to call on objects to return a custom JSON encoded string. Must return ASCII data! 64 | * **False:** No such member exists. (Default.) 65 | * **None:** Use default. 66 | mappingtypes : Iterable[type]|False|None 67 | * **Iterable[type]:** Classes that should be encoded to objects. Must be iterable over their keys, and implement ``__getitem__``. 68 | * **False:** There are no objects. Any object will be encoded as list of keys as in list(obj). 69 | * **None:** Use default: ``[collections.abc.Mapping]``. 70 | ''' 71 | cdef readonly unicode quotationmark 72 | '''The creation argument ``quotationmark``. 73 | ''' 74 | cdef readonly unicode tojson 75 | '''The creation argument ``tojson``. 76 | ``None`` if ``False`` was specified. 77 | ''' 78 | cdef readonly tuple mappingtypes 79 | '''The creation argument ``mappingtypes``. 80 | ``()`` if ``False`` was specified. 81 | ''' 82 | 83 | def __reduce__(self): 84 | cdef object args = tuple(filter(bool, ( 85 | _options_reduce_arg('quotationmark', self.quotationmark, DEFAULT_QUOTATIONMARK), 86 | _options_reduce_arg('tojson', self.tojson, None), 87 | _options_reduce_arg('mappingtypes', self.mappingtypes, DEFAULT_MAPPINGTYPES), 88 | ))) 89 | return (_UnpickleOptions if args else Options), args 90 | 91 | def __repr__(self): 92 | cdef object repr_options = _options_from_ascii(self) 93 | cdef object repr_cls = ( 94 | '' 95 | if self.mappingtypes == DEFAULT_MAPPINGTYPES else 96 | repr(DEFAULT_MAPPINGTYPES) 97 | ) 98 | return (f'Options(' 99 | f'{repr_options}' 100 | f'{repr_options and repr_cls and ", "}' 101 | f'{repr_cls}' 102 | ')') 103 | 104 | def __str__(self): 105 | return self.__repr__() 106 | 107 | def __cinit__(self, *, 108 | quotationmark=None, 109 | tojson=None, posinfinity=None, neginfinity=None, nan=None, 110 | decimalformat=None, intformat=None, 111 | mappingtypes=None): 112 | cdef object cls 113 | cdef object ex 114 | 115 | if quotationmark is None: 116 | quotationmark = DEFAULT_QUOTATIONMARK 117 | if tojson is None: 118 | tojson = DEFAULT_TOJSON 119 | if mappingtypes is None: 120 | mappingtypes = DEFAULT_MAPPINGTYPES 121 | 122 | self.quotationmark = _options_ascii(quotationmark) 123 | self.tojson = _options_ascii(tojson, False) 124 | 125 | if self.quotationmark is None or PyUnicode_GET_LENGTH(self.quotationmark) != 1: 126 | raise TypeError('quotationmark must be one ASCII character.') 127 | 128 | if mappingtypes is False: 129 | self.mappingtypes = () 130 | else: 131 | self.mappingtypes = tuple(mappingtypes) 132 | for cls in self.mappingtypes: 133 | if not PyType_Check(cls): 134 | raise TypeError('mappingtypes must be a sequence of types ' 135 | 'or False') 136 | 137 | def update(self, *args, **kw): 138 | ''' 139 | Creates a new Options instance by modifying some members. 140 | ''' 141 | if kw: 142 | return _to_options(self, kw) 143 | else: 144 | return self 145 | 146 | 147 | cdef Options DEFAULT_OPTIONS_OBJECT = Options() 148 | 149 | 150 | def _UnpickleOptions(*args): 151 | if args: 152 | return _to_options(None, dict(args)) 153 | else: 154 | return DEFAULT_OPTIONS_OBJECT 155 | 156 | 157 | cdef object _to_options(Options arg, dict kw): 158 | if arg is None: 159 | if not kw: 160 | return DEFAULT_OPTIONS_OBJECT 161 | else: 162 | return Options(**kw) 163 | elif not kw: 164 | return arg 165 | 166 | PyDict_SetDefault(kw, 'quotationmark', ( arg).quotationmark) 167 | PyDict_SetDefault(kw, 'tojson', ( arg).tojson) 168 | PyDict_SetDefault(kw, 'mappingtypes', ( arg).mappingtypes) 169 | 170 | return Options(**kw) 171 | -------------------------------------------------------------------------------- /src/_escape_dct.hpp: -------------------------------------------------------------------------------- 1 | const EscapeDct::Items EscapeDct::items = { 2 | { 6, '\\', 'u', '0', '0', '0', '0', 0 }, /* 0x00 '\x00' */ 3 | { 6, '\\', 'u', '0', '0', '0', '1', 0 }, /* 0x01 '\x01' */ 4 | { 6, '\\', 'u', '0', '0', '0', '2', 0 }, /* 0x02 '\x02' */ 5 | { 6, '\\', 'u', '0', '0', '0', '3', 0 }, /* 0x03 '\x03' */ 6 | { 6, '\\', 'u', '0', '0', '0', '4', 0 }, /* 0x04 '\x04' */ 7 | { 6, '\\', 'u', '0', '0', '0', '5', 0 }, /* 0x05 '\x05' */ 8 | { 6, '\\', 'u', '0', '0', '0', '6', 0 }, /* 0x06 '\x06' */ 9 | { 6, '\\', 'u', '0', '0', '0', '7', 0 }, /* 0x07 '\x07' */ 10 | { 2, '\\', 'b', 0, 0, 0, 0, 0 }, /* 0x08 '\x08' */ 11 | { 2, '\\', 't', 0, 0, 0, 0, 0 }, /* 0x09 '\t' */ 12 | { 2, '\\', 'n', 0, 0, 0, 0, 0 }, /* 0x0a '\n' */ 13 | { 6, '\\', 'u', '0', '0', '0', 'b', 0 }, /* 0x0b '\x0b' */ 14 | { 2, '\\', 'f', 0, 0, 0, 0, 0 }, /* 0x0c '\x0c' */ 15 | { 2, '\\', 'r', 0, 0, 0, 0, 0 }, /* 0x0d '\r' */ 16 | { 6, '\\', 'u', '0', '0', '0', 'e', 0 }, /* 0x0e '\x0e' */ 17 | { 6, '\\', 'u', '0', '0', '0', 'f', 0 }, /* 0x0f '\x0f' */ 18 | { 6, '\\', 'u', '0', '0', '1', '0', 0 }, /* 0x10 '\x10' */ 19 | { 6, '\\', 'u', '0', '0', '1', '1', 0 }, /* 0x11 '\x11' */ 20 | { 6, '\\', 'u', '0', '0', '1', '2', 0 }, /* 0x12 '\x12' */ 21 | { 6, '\\', 'u', '0', '0', '1', '3', 0 }, /* 0x13 '\x13' */ 22 | { 6, '\\', 'u', '0', '0', '1', '4', 0 }, /* 0x14 '\x14' */ 23 | { 6, '\\', 'u', '0', '0', '1', '5', 0 }, /* 0x15 '\x15' */ 24 | { 6, '\\', 'u', '0', '0', '1', '6', 0 }, /* 0x16 '\x16' */ 25 | { 6, '\\', 'u', '0', '0', '1', '7', 0 }, /* 0x17 '\x17' */ 26 | { 6, '\\', 'u', '0', '0', '1', '8', 0 }, /* 0x18 '\x18' */ 27 | { 6, '\\', 'u', '0', '0', '1', '9', 0 }, /* 0x19 '\x19' */ 28 | { 6, '\\', 'u', '0', '0', '1', 'a', 0 }, /* 0x1a '\x1a' */ 29 | { 6, '\\', 'u', '0', '0', '1', 'b', 0 }, /* 0x1b '\x1b' */ 30 | { 6, '\\', 'u', '0', '0', '1', 'c', 0 }, /* 0x1c '\x1c' */ 31 | { 6, '\\', 'u', '0', '0', '1', 'd', 0 }, /* 0x1d '\x1d' */ 32 | { 6, '\\', 'u', '0', '0', '1', 'e', 0 }, /* 0x1e '\x1e' */ 33 | { 6, '\\', 'u', '0', '0', '1', 'f', 0 }, /* 0x1f '\x1f' */ 34 | { 1, ' ', 0, 0, 0, 0, 0, 0 }, /* 0x20 ' ' */ 35 | { 1, '!', 0, 0, 0, 0, 0, 0 }, /* 0x21 '!' */ 36 | { 2, '\\', '"', 0, 0, 0, 0, 0 }, /* 0x22 '"' */ 37 | { 1, '#', 0, 0, 0, 0, 0, 0 }, /* 0x23 '#' */ 38 | { 1, '$', 0, 0, 0, 0, 0, 0 }, /* 0x24 '$' */ 39 | { 1, '%', 0, 0, 0, 0, 0, 0 }, /* 0x25 '%' */ 40 | { 6, '\\', 'u', '0', '0', '2', '6', 0 }, /* 0x26 '&' */ 41 | { 6, '\\', 'u', '0', '0', '2', '7', 0 }, /* 0x27 "'" */ 42 | { 1, '(', 0, 0, 0, 0, 0, 0 }, /* 0x28 '(' */ 43 | { 1, ')', 0, 0, 0, 0, 0, 0 }, /* 0x29 ')' */ 44 | { 1, '*', 0, 0, 0, 0, 0, 0 }, /* 0x2a '*' */ 45 | { 1, '+', 0, 0, 0, 0, 0, 0 }, /* 0x2b '+' */ 46 | { 1, ',', 0, 0, 0, 0, 0, 0 }, /* 0x2c ',' */ 47 | { 1, '-', 0, 0, 0, 0, 0, 0 }, /* 0x2d '-' */ 48 | { 1, '.', 0, 0, 0, 0, 0, 0 }, /* 0x2e '.' */ 49 | { 1, '/', 0, 0, 0, 0, 0, 0 }, /* 0x2f '/' */ 50 | { 1, '0', 0, 0, 0, 0, 0, 0 }, /* 0x30 '0' */ 51 | { 1, '1', 0, 0, 0, 0, 0, 0 }, /* 0x31 '1' */ 52 | { 1, '2', 0, 0, 0, 0, 0, 0 }, /* 0x32 '2' */ 53 | { 1, '3', 0, 0, 0, 0, 0, 0 }, /* 0x33 '3' */ 54 | { 1, '4', 0, 0, 0, 0, 0, 0 }, /* 0x34 '4' */ 55 | { 1, '5', 0, 0, 0, 0, 0, 0 }, /* 0x35 '5' */ 56 | { 1, '6', 0, 0, 0, 0, 0, 0 }, /* 0x36 '6' */ 57 | { 1, '7', 0, 0, 0, 0, 0, 0 }, /* 0x37 '7' */ 58 | { 1, '8', 0, 0, 0, 0, 0, 0 }, /* 0x38 '8' */ 59 | { 1, '9', 0, 0, 0, 0, 0, 0 }, /* 0x39 '9' */ 60 | { 1, ':', 0, 0, 0, 0, 0, 0 }, /* 0x3a ':' */ 61 | { 1, ';', 0, 0, 0, 0, 0, 0 }, /* 0x3b ';' */ 62 | { 6, '\\', 'u', '0', '0', '3', 'c', 0 }, /* 0x3c '<' */ 63 | { 1, '=', 0, 0, 0, 0, 0, 0 }, /* 0x3d '=' */ 64 | { 6, '\\', 'u', '0', '0', '3', 'e', 0 }, /* 0x3e '>' */ 65 | { 1, '?', 0, 0, 0, 0, 0, 0 }, /* 0x3f '?' */ 66 | { 1, '@', 0, 0, 0, 0, 0, 0 }, /* 0x40 '@' */ 67 | { 1, 'A', 0, 0, 0, 0, 0, 0 }, /* 0x41 'A' */ 68 | { 1, 'B', 0, 0, 0, 0, 0, 0 }, /* 0x42 'B' */ 69 | { 1, 'C', 0, 0, 0, 0, 0, 0 }, /* 0x43 'C' */ 70 | { 1, 'D', 0, 0, 0, 0, 0, 0 }, /* 0x44 'D' */ 71 | { 1, 'E', 0, 0, 0, 0, 0, 0 }, /* 0x45 'E' */ 72 | { 1, 'F', 0, 0, 0, 0, 0, 0 }, /* 0x46 'F' */ 73 | { 1, 'G', 0, 0, 0, 0, 0, 0 }, /* 0x47 'G' */ 74 | { 1, 'H', 0, 0, 0, 0, 0, 0 }, /* 0x48 'H' */ 75 | { 1, 'I', 0, 0, 0, 0, 0, 0 }, /* 0x49 'I' */ 76 | { 1, 'J', 0, 0, 0, 0, 0, 0 }, /* 0x4a 'J' */ 77 | { 1, 'K', 0, 0, 0, 0, 0, 0 }, /* 0x4b 'K' */ 78 | { 1, 'L', 0, 0, 0, 0, 0, 0 }, /* 0x4c 'L' */ 79 | { 1, 'M', 0, 0, 0, 0, 0, 0 }, /* 0x4d 'M' */ 80 | { 1, 'N', 0, 0, 0, 0, 0, 0 }, /* 0x4e 'N' */ 81 | { 1, 'O', 0, 0, 0, 0, 0, 0 }, /* 0x4f 'O' */ 82 | { 1, 'P', 0, 0, 0, 0, 0, 0 }, /* 0x50 'P' */ 83 | { 1, 'Q', 0, 0, 0, 0, 0, 0 }, /* 0x51 'Q' */ 84 | { 1, 'R', 0, 0, 0, 0, 0, 0 }, /* 0x52 'R' */ 85 | { 1, 'S', 0, 0, 0, 0, 0, 0 }, /* 0x53 'S' */ 86 | { 1, 'T', 0, 0, 0, 0, 0, 0 }, /* 0x54 'T' */ 87 | { 1, 'U', 0, 0, 0, 0, 0, 0 }, /* 0x55 'U' */ 88 | { 1, 'V', 0, 0, 0, 0, 0, 0 }, /* 0x56 'V' */ 89 | { 1, 'W', 0, 0, 0, 0, 0, 0 }, /* 0x57 'W' */ 90 | { 1, 'X', 0, 0, 0, 0, 0, 0 }, /* 0x58 'X' */ 91 | { 1, 'Y', 0, 0, 0, 0, 0, 0 }, /* 0x59 'Y' */ 92 | { 1, 'Z', 0, 0, 0, 0, 0, 0 }, /* 0x5a 'Z' */ 93 | { 1, '[', 0, 0, 0, 0, 0, 0 }, /* 0x5b '[' */ 94 | { 2, '\\', '\\', 0, 0, 0, 0, 0 }, /* 0x5c '\\' */ 95 | { 1, ']', 0, 0, 0, 0, 0, 0 }, /* 0x5d ']' */ 96 | { 1, '^', 0, 0, 0, 0, 0, 0 }, /* 0x5e '^' */ 97 | { 1, '_', 0, 0, 0, 0, 0, 0 }, /* 0x5f '_' */ 98 | { 1, '`', 0, 0, 0, 0, 0, 0 }, /* 0x60 '`' */ 99 | { 1, 'a', 0, 0, 0, 0, 0, 0 }, /* 0x61 'a' */ 100 | { 1, 'b', 0, 0, 0, 0, 0, 0 }, /* 0x62 'b' */ 101 | { 1, 'c', 0, 0, 0, 0, 0, 0 }, /* 0x63 'c' */ 102 | { 1, 'd', 0, 0, 0, 0, 0, 0 }, /* 0x64 'd' */ 103 | { 1, 'e', 0, 0, 0, 0, 0, 0 }, /* 0x65 'e' */ 104 | { 1, 'f', 0, 0, 0, 0, 0, 0 }, /* 0x66 'f' */ 105 | { 1, 'g', 0, 0, 0, 0, 0, 0 }, /* 0x67 'g' */ 106 | { 1, 'h', 0, 0, 0, 0, 0, 0 }, /* 0x68 'h' */ 107 | { 1, 'i', 0, 0, 0, 0, 0, 0 }, /* 0x69 'i' */ 108 | { 1, 'j', 0, 0, 0, 0, 0, 0 }, /* 0x6a 'j' */ 109 | { 1, 'k', 0, 0, 0, 0, 0, 0 }, /* 0x6b 'k' */ 110 | { 1, 'l', 0, 0, 0, 0, 0, 0 }, /* 0x6c 'l' */ 111 | { 1, 'm', 0, 0, 0, 0, 0, 0 }, /* 0x6d 'm' */ 112 | { 1, 'n', 0, 0, 0, 0, 0, 0 }, /* 0x6e 'n' */ 113 | { 1, 'o', 0, 0, 0, 0, 0, 0 }, /* 0x6f 'o' */ 114 | { 1, 'p', 0, 0, 0, 0, 0, 0 }, /* 0x70 'p' */ 115 | { 1, 'q', 0, 0, 0, 0, 0, 0 }, /* 0x71 'q' */ 116 | { 1, 'r', 0, 0, 0, 0, 0, 0 }, /* 0x72 'r' */ 117 | { 1, 's', 0, 0, 0, 0, 0, 0 }, /* 0x73 's' */ 118 | { 1, 't', 0, 0, 0, 0, 0, 0 }, /* 0x74 't' */ 119 | { 1, 'u', 0, 0, 0, 0, 0, 0 }, /* 0x75 'u' */ 120 | { 1, 'v', 0, 0, 0, 0, 0, 0 }, /* 0x76 'v' */ 121 | { 1, 'w', 0, 0, 0, 0, 0, 0 }, /* 0x77 'w' */ 122 | { 1, 'x', 0, 0, 0, 0, 0, 0 }, /* 0x78 'x' */ 123 | { 1, 'y', 0, 0, 0, 0, 0, 0 }, /* 0x79 'y' */ 124 | { 1, 'z', 0, 0, 0, 0, 0, 0 }, /* 0x7a 'z' */ 125 | { 1, '{', 0, 0, 0, 0, 0, 0 }, /* 0x7b '{' */ 126 | { 1, '|', 0, 0, 0, 0, 0, 0 }, /* 0x7c '|' */ 127 | { 1, '}', 0, 0, 0, 0, 0, 0 }, /* 0x7d '}' */ 128 | { 1, '~', 0, 0, 0, 0, 0, 0 }, /* 0x7e '~' */ 129 | { 6, '\\', 'u', '0', '0', '7', 'f', 0 }, /* 0x7f '\x7f' */ 130 | { 6, '\\', 'u', '0', '0', '8', '0', 0 }, /* 0x80 '\x80' */ 131 | { 6, '\\', 'u', '0', '0', '8', '1', 0 }, /* 0x81 '\x81' */ 132 | { 6, '\\', 'u', '0', '0', '8', '2', 0 }, /* 0x82 '\x82' */ 133 | { 6, '\\', 'u', '0', '0', '8', '3', 0 }, /* 0x83 '\x83' */ 134 | { 6, '\\', 'u', '0', '0', '8', '4', 0 }, /* 0x84 '\x84' */ 135 | { 6, '\\', 'u', '0', '0', '8', '5', 0 }, /* 0x85 '\x85' */ 136 | { 6, '\\', 'u', '0', '0', '8', '6', 0 }, /* 0x86 '\x86' */ 137 | { 6, '\\', 'u', '0', '0', '8', '7', 0 }, /* 0x87 '\x87' */ 138 | { 6, '\\', 'u', '0', '0', '8', '8', 0 }, /* 0x88 '\x88' */ 139 | { 6, '\\', 'u', '0', '0', '8', '9', 0 }, /* 0x89 '\x89' */ 140 | { 6, '\\', 'u', '0', '0', '8', 'a', 0 }, /* 0x8a '\x8a' */ 141 | { 6, '\\', 'u', '0', '0', '8', 'b', 0 }, /* 0x8b '\x8b' */ 142 | { 6, '\\', 'u', '0', '0', '8', 'c', 0 }, /* 0x8c '\x8c' */ 143 | { 6, '\\', 'u', '0', '0', '8', 'd', 0 }, /* 0x8d '\x8d' */ 144 | { 6, '\\', 'u', '0', '0', '8', 'e', 0 }, /* 0x8e '\x8e' */ 145 | { 6, '\\', 'u', '0', '0', '8', 'f', 0 }, /* 0x8f '\x8f' */ 146 | { 6, '\\', 'u', '0', '0', '9', '0', 0 }, /* 0x90 '\x90' */ 147 | { 6, '\\', 'u', '0', '0', '9', '1', 0 }, /* 0x91 '\x91' */ 148 | { 6, '\\', 'u', '0', '0', '9', '2', 0 }, /* 0x92 '\x92' */ 149 | { 6, '\\', 'u', '0', '0', '9', '3', 0 }, /* 0x93 '\x93' */ 150 | { 6, '\\', 'u', '0', '0', '9', '4', 0 }, /* 0x94 '\x94' */ 151 | { 6, '\\', 'u', '0', '0', '9', '5', 0 }, /* 0x95 '\x95' */ 152 | { 6, '\\', 'u', '0', '0', '9', '6', 0 }, /* 0x96 '\x96' */ 153 | { 6, '\\', 'u', '0', '0', '9', '7', 0 }, /* 0x97 '\x97' */ 154 | { 6, '\\', 'u', '0', '0', '9', '8', 0 }, /* 0x98 '\x98' */ 155 | { 6, '\\', 'u', '0', '0', '9', '9', 0 }, /* 0x99 '\x99' */ 156 | { 6, '\\', 'u', '0', '0', '9', 'a', 0 }, /* 0x9a '\x9a' */ 157 | { 6, '\\', 'u', '0', '0', '9', 'b', 0 }, /* 0x9b '\x9b' */ 158 | { 6, '\\', 'u', '0', '0', '9', 'c', 0 }, /* 0x9c '\x9c' */ 159 | { 6, '\\', 'u', '0', '0', '9', 'd', 0 }, /* 0x9d '\x9d' */ 160 | { 6, '\\', 'u', '0', '0', '9', 'e', 0 }, /* 0x9e '\x9e' */ 161 | { 6, '\\', 'u', '0', '0', '9', 'f', 0 }, /* 0x9f '\x9f' */ 162 | { 6, '\\', 'u', '0', '0', 'a', '0', 0 }, /* 0xa0 '\xa0' */ 163 | { 6, '\\', 'u', '0', '0', 'a', '1', 0 }, /* 0xa1 '¡' */ 164 | { 6, '\\', 'u', '0', '0', 'a', '2', 0 }, /* 0xa2 '¢' */ 165 | { 6, '\\', 'u', '0', '0', 'a', '3', 0 }, /* 0xa3 '£' */ 166 | { 6, '\\', 'u', '0', '0', 'a', '4', 0 }, /* 0xa4 '¤' */ 167 | { 6, '\\', 'u', '0', '0', 'a', '5', 0 }, /* 0xa5 '¥' */ 168 | { 6, '\\', 'u', '0', '0', 'a', '6', 0 }, /* 0xa6 '¦' */ 169 | { 6, '\\', 'u', '0', '0', 'a', '7', 0 }, /* 0xa7 '§' */ 170 | { 6, '\\', 'u', '0', '0', 'a', '8', 0 }, /* 0xa8 '¨' */ 171 | { 6, '\\', 'u', '0', '0', 'a', '9', 0 }, /* 0xa9 '©' */ 172 | { 6, '\\', 'u', '0', '0', 'a', 'a', 0 }, /* 0xaa 'ª' */ 173 | { 6, '\\', 'u', '0', '0', 'a', 'b', 0 }, /* 0xab '«' */ 174 | { 6, '\\', 'u', '0', '0', 'a', 'c', 0 }, /* 0xac '¬' */ 175 | { 6, '\\', 'u', '0', '0', 'a', 'd', 0 }, /* 0xad '\xad' */ 176 | { 6, '\\', 'u', '0', '0', 'a', 'e', 0 }, /* 0xae '®' */ 177 | { 6, '\\', 'u', '0', '0', 'a', 'f', 0 }, /* 0xaf '¯' */ 178 | { 6, '\\', 'u', '0', '0', 'b', '0', 0 }, /* 0xb0 '°' */ 179 | { 6, '\\', 'u', '0', '0', 'b', '1', 0 }, /* 0xb1 '±' */ 180 | { 6, '\\', 'u', '0', '0', 'b', '2', 0 }, /* 0xb2 '²' */ 181 | { 6, '\\', 'u', '0', '0', 'b', '3', 0 }, /* 0xb3 '³' */ 182 | { 6, '\\', 'u', '0', '0', 'b', '4', 0 }, /* 0xb4 '´' */ 183 | { 6, '\\', 'u', '0', '0', 'b', '5', 0 }, /* 0xb5 'µ' */ 184 | { 6, '\\', 'u', '0', '0', 'b', '6', 0 }, /* 0xb6 '¶' */ 185 | { 6, '\\', 'u', '0', '0', 'b', '7', 0 }, /* 0xb7 '·' */ 186 | { 6, '\\', 'u', '0', '0', 'b', '8', 0 }, /* 0xb8 '¸' */ 187 | { 6, '\\', 'u', '0', '0', 'b', '9', 0 }, /* 0xb9 '¹' */ 188 | { 6, '\\', 'u', '0', '0', 'b', 'a', 0 }, /* 0xba 'º' */ 189 | { 6, '\\', 'u', '0', '0', 'b', 'b', 0 }, /* 0xbb '»' */ 190 | { 6, '\\', 'u', '0', '0', 'b', 'c', 0 }, /* 0xbc '¼' */ 191 | { 6, '\\', 'u', '0', '0', 'b', 'd', 0 }, /* 0xbd '½' */ 192 | { 6, '\\', 'u', '0', '0', 'b', 'e', 0 }, /* 0xbe '¾' */ 193 | { 6, '\\', 'u', '0', '0', 'b', 'f', 0 }, /* 0xbf '¿' */ 194 | { 6, '\\', 'u', '0', '0', 'c', '0', 0 }, /* 0xc0 'À' */ 195 | { 6, '\\', 'u', '0', '0', 'c', '1', 0 }, /* 0xc1 'Á' */ 196 | { 6, '\\', 'u', '0', '0', 'c', '2', 0 }, /* 0xc2 'Â' */ 197 | { 6, '\\', 'u', '0', '0', 'c', '3', 0 }, /* 0xc3 'Ã' */ 198 | { 6, '\\', 'u', '0', '0', 'c', '4', 0 }, /* 0xc4 'Ä' */ 199 | { 6, '\\', 'u', '0', '0', 'c', '5', 0 }, /* 0xc5 'Å' */ 200 | { 6, '\\', 'u', '0', '0', 'c', '6', 0 }, /* 0xc6 'Æ' */ 201 | { 6, '\\', 'u', '0', '0', 'c', '7', 0 }, /* 0xc7 'Ç' */ 202 | { 6, '\\', 'u', '0', '0', 'c', '8', 0 }, /* 0xc8 'È' */ 203 | { 6, '\\', 'u', '0', '0', 'c', '9', 0 }, /* 0xc9 'É' */ 204 | { 6, '\\', 'u', '0', '0', 'c', 'a', 0 }, /* 0xca 'Ê' */ 205 | { 6, '\\', 'u', '0', '0', 'c', 'b', 0 }, /* 0xcb 'Ë' */ 206 | { 6, '\\', 'u', '0', '0', 'c', 'c', 0 }, /* 0xcc 'Ì' */ 207 | { 6, '\\', 'u', '0', '0', 'c', 'd', 0 }, /* 0xcd 'Í' */ 208 | { 6, '\\', 'u', '0', '0', 'c', 'e', 0 }, /* 0xce 'Î' */ 209 | { 6, '\\', 'u', '0', '0', 'c', 'f', 0 }, /* 0xcf 'Ï' */ 210 | { 6, '\\', 'u', '0', '0', 'd', '0', 0 }, /* 0xd0 'Ð' */ 211 | { 6, '\\', 'u', '0', '0', 'd', '1', 0 }, /* 0xd1 'Ñ' */ 212 | { 6, '\\', 'u', '0', '0', 'd', '2', 0 }, /* 0xd2 'Ò' */ 213 | { 6, '\\', 'u', '0', '0', 'd', '3', 0 }, /* 0xd3 'Ó' */ 214 | { 6, '\\', 'u', '0', '0', 'd', '4', 0 }, /* 0xd4 'Ô' */ 215 | { 6, '\\', 'u', '0', '0', 'd', '5', 0 }, /* 0xd5 'Õ' */ 216 | { 6, '\\', 'u', '0', '0', 'd', '6', 0 }, /* 0xd6 'Ö' */ 217 | { 6, '\\', 'u', '0', '0', 'd', '7', 0 }, /* 0xd7 '×' */ 218 | { 6, '\\', 'u', '0', '0', 'd', '8', 0 }, /* 0xd8 'Ø' */ 219 | { 6, '\\', 'u', '0', '0', 'd', '9', 0 }, /* 0xd9 'Ù' */ 220 | { 6, '\\', 'u', '0', '0', 'd', 'a', 0 }, /* 0xda 'Ú' */ 221 | { 6, '\\', 'u', '0', '0', 'd', 'b', 0 }, /* 0xdb 'Û' */ 222 | { 6, '\\', 'u', '0', '0', 'd', 'c', 0 }, /* 0xdc 'Ü' */ 223 | { 6, '\\', 'u', '0', '0', 'd', 'd', 0 }, /* 0xdd 'Ý' */ 224 | { 6, '\\', 'u', '0', '0', 'd', 'e', 0 }, /* 0xde 'Þ' */ 225 | { 6, '\\', 'u', '0', '0', 'd', 'f', 0 }, /* 0xdf 'ß' */ 226 | { 6, '\\', 'u', '0', '0', 'e', '0', 0 }, /* 0xe0 'à' */ 227 | { 6, '\\', 'u', '0', '0', 'e', '1', 0 }, /* 0xe1 'á' */ 228 | { 6, '\\', 'u', '0', '0', 'e', '2', 0 }, /* 0xe2 'â' */ 229 | { 6, '\\', 'u', '0', '0', 'e', '3', 0 }, /* 0xe3 'ã' */ 230 | { 6, '\\', 'u', '0', '0', 'e', '4', 0 }, /* 0xe4 'ä' */ 231 | { 6, '\\', 'u', '0', '0', 'e', '5', 0 }, /* 0xe5 'å' */ 232 | { 6, '\\', 'u', '0', '0', 'e', '6', 0 }, /* 0xe6 'æ' */ 233 | { 6, '\\', 'u', '0', '0', 'e', '7', 0 }, /* 0xe7 'ç' */ 234 | { 6, '\\', 'u', '0', '0', 'e', '8', 0 }, /* 0xe8 'è' */ 235 | { 6, '\\', 'u', '0', '0', 'e', '9', 0 }, /* 0xe9 'é' */ 236 | { 6, '\\', 'u', '0', '0', 'e', 'a', 0 }, /* 0xea 'ê' */ 237 | { 6, '\\', 'u', '0', '0', 'e', 'b', 0 }, /* 0xeb 'ë' */ 238 | { 6, '\\', 'u', '0', '0', 'e', 'c', 0 }, /* 0xec 'ì' */ 239 | { 6, '\\', 'u', '0', '0', 'e', 'd', 0 }, /* 0xed 'í' */ 240 | { 6, '\\', 'u', '0', '0', 'e', 'e', 0 }, /* 0xee 'î' */ 241 | { 6, '\\', 'u', '0', '0', 'e', 'f', 0 }, /* 0xef 'ï' */ 242 | { 6, '\\', 'u', '0', '0', 'f', '0', 0 }, /* 0xf0 'ð' */ 243 | { 6, '\\', 'u', '0', '0', 'f', '1', 0 }, /* 0xf1 'ñ' */ 244 | { 6, '\\', 'u', '0', '0', 'f', '2', 0 }, /* 0xf2 'ò' */ 245 | { 6, '\\', 'u', '0', '0', 'f', '3', 0 }, /* 0xf3 'ó' */ 246 | { 6, '\\', 'u', '0', '0', 'f', '4', 0 }, /* 0xf4 'ô' */ 247 | { 6, '\\', 'u', '0', '0', 'f', '5', 0 }, /* 0xf5 'õ' */ 248 | { 6, '\\', 'u', '0', '0', 'f', '6', 0 }, /* 0xf6 'ö' */ 249 | { 6, '\\', 'u', '0', '0', 'f', '7', 0 }, /* 0xf7 '÷' */ 250 | { 6, '\\', 'u', '0', '0', 'f', '8', 0 }, /* 0xf8 'ø' */ 251 | { 6, '\\', 'u', '0', '0', 'f', '9', 0 }, /* 0xf9 'ù' */ 252 | { 6, '\\', 'u', '0', '0', 'f', 'a', 0 }, /* 0xfa 'ú' */ 253 | { 6, '\\', 'u', '0', '0', 'f', 'b', 0 }, /* 0xfb 'û' */ 254 | { 6, '\\', 'u', '0', '0', 'f', 'c', 0 }, /* 0xfc 'ü' */ 255 | { 6, '\\', 'u', '0', '0', 'f', 'd', 0 }, /* 0xfd 'ý' */ 256 | { 6, '\\', 'u', '0', '0', 'f', 'e', 0 }, /* 0xfe 'þ' */ 257 | { 6, '\\', 'u', '0', '0', 'f', 'f', 0 }, /* 0xff 'ÿ' */ 258 | }; 259 | const std::uint64_t EscapeDct::is_escaped_lo = UINT64_C(0x500000c4ffffffff); 260 | const std::uint64_t EscapeDct::is_escaped_hi = UINT64_C(0x8000000010000000); 261 | -------------------------------------------------------------------------------- /src/_exceptions.pyx: -------------------------------------------------------------------------------- 1 | @auto_pickle(False) 2 | cdef class Json5Exception(Exception): 3 | ''' 4 | Base class of any exception thrown by PyJSON5. 5 | ''' 6 | def __init__(self, message=None, *args): 7 | super().__init__(message, *args) 8 | 9 | @property 10 | def message(self): 11 | '''Human readable error description''' 12 | return self.args[0] 13 | -------------------------------------------------------------------------------- /src/_exceptions_decoder.pyx: -------------------------------------------------------------------------------- 1 | @auto_pickle(False) 2 | cdef class Json5DecoderException(Json5Exception): 3 | ''' 4 | Base class of any exception thrown by the parser. 5 | ''' 6 | def __init__(self, message=None, result=None, *args): 7 | super().__init__(message, result, *args) 8 | 9 | @property 10 | def result(self): 11 | '''Deserialized data up until now.''' 12 | return self.args[1] 13 | 14 | 15 | @final 16 | @auto_pickle(False) 17 | cdef class Json5NestingTooDeep(Json5DecoderException): 18 | ''' 19 | The maximum nesting level on the input data was exceeded. 20 | ''' 21 | 22 | 23 | @final 24 | @auto_pickle(False) 25 | cdef class Json5EOF(Json5DecoderException): 26 | ''' 27 | The input ended prematurely. 28 | ''' 29 | 30 | 31 | @final 32 | @auto_pickle(False) 33 | cdef class Json5IllegalCharacter(Json5DecoderException): 34 | ''' 35 | An unexpected character was encountered. 36 | ''' 37 | def __init__(self, message=None, result=None, character=None, *args): 38 | super().__init__(message, result, character, *args) 39 | 40 | @property 41 | def character(self): 42 | ''' 43 | Illegal character. 44 | ''' 45 | return self.args[2] 46 | 47 | 48 | @final 49 | @auto_pickle(False) 50 | cdef class Json5ExtraData(Json5DecoderException): 51 | ''' 52 | The input contained extranous data. 53 | ''' 54 | def __init__(self, message=None, result=None, character=None, *args): 55 | super().__init__(message, result, character, *args) 56 | 57 | @property 58 | def character(self): 59 | ''' 60 | Extranous character. 61 | ''' 62 | return self.args[2] 63 | 64 | 65 | @final 66 | @auto_pickle(False) 67 | cdef class Json5IllegalType(Json5DecoderException): 68 | ''' 69 | The user supplied callback function returned illegal data. 70 | ''' 71 | def __init__(self, message=None, result=None, value=None, *args): 72 | super().__init__(message, result, value, *args) 73 | 74 | @property 75 | def value(self): 76 | ''' 77 | Value that caused the problem. 78 | ''' 79 | return self.args[2] 80 | 81 | 82 | @final 83 | @auto_pickle(False) 84 | cdef class _DecoderException(Exception): 85 | cdef object cls 86 | cdef object msg 87 | cdef object extra 88 | cdef object result 89 | 90 | def __cinit__(self, cls, msg, extra, result): 91 | self.cls = cls 92 | self.msg = msg 93 | self.extra = extra 94 | self.result = result 95 | -------------------------------------------------------------------------------- /src/_exceptions_encoder.pyx: -------------------------------------------------------------------------------- 1 | @auto_pickle(False) 2 | cdef class Json5EncoderException(Json5Exception): 3 | ''' 4 | Base class of any exception thrown by the serializer. 5 | ''' 6 | 7 | 8 | @auto_pickle(False) 9 | cdef class Json5UnstringifiableType(Json5EncoderException): 10 | ''' 11 | The encoder was not able to stringify the input, or it was told not to by the supplied ``Options``. 12 | ''' 13 | def __init__(self, message=None, unstringifiable=None): 14 | super().__init__(message, unstringifiable) 15 | 16 | @property 17 | def unstringifiable(self): 18 | ''' 19 | The value that caused the problem. 20 | ''' 21 | return self.args[1] 22 | -------------------------------------------------------------------------------- /src/_exports.pyx: -------------------------------------------------------------------------------- 1 | global DEFAULT_MAX_NESTING_LEVEL, __all__, __author__, __doc__, __license__, __version__ 2 | 3 | DEFAULT_MAX_NESTING_LEVEL = 32 4 | ''' 5 | Maximum nesting level of data to decode if no ``maxdepth`` argument is specified. 6 | ''' 7 | 8 | __version__ = PyUnicode_FromKindAndData(PyUnicode_1BYTE_KIND, VERSION, VERSION_LENGTH) 9 | ''' 10 | Current library version. 11 | ''' 12 | 13 | 14 | def decode(object data, object maxdepth=None, object some=False): 15 | ''' 16 | Decodes JSON5 serialized data from an :class:`str` object. 17 | 18 | .. code:: python 19 | 20 | decode('["Hello", "world!"]') == ['Hello', 'world!'] 21 | 22 | Parameters 23 | ---------- 24 | data : str 25 | JSON5 serialized data 26 | maxdepth : Optional[int] 27 | Maximum nesting level before are the parsing is aborted. 28 | 29 | * If ``None`` is supplied, then the value of the global variable \ 30 | ``DEFAULT_MAX_NESTING_LEVEL`` is used instead. 31 | * If the value is ``0``, then only literals are accepted, e.g. ``false``, \ 32 | ``47.11``, or ``"string"``. 33 | * If the value is negative, then the any nesting level is allowed until \ 34 | Python's recursion limit is hit. 35 | some : bool 36 | Allow trailing junk. 37 | 38 | Raises 39 | ------ 40 | Json5DecoderException 41 | An exception occured while decoding. 42 | TypeError 43 | An argument had a wrong type. 44 | 45 | Returns 46 | ------- 47 | object 48 | Deserialized data. 49 | ''' 50 | if maxdepth is None: 51 | maxdepth = DEFAULT_MAX_NESTING_LEVEL 52 | 53 | if isinstance(data, unicode): 54 | return _decode_unicode(data, maxdepth, bool(some)) 55 | else: 56 | raise TypeError(f'type(data) == {type(data)!r} not supported') 57 | 58 | 59 | def decode_latin1(object data, object maxdepth=None, object some=False): 60 | ''' 61 | Decodes JSON5 serialized data from a :class:`bytes` object. 62 | 63 | .. code:: python 64 | 65 | decode_latin1(b'["Hello", "world!"]') == ['Hello', 'world!'] 66 | 67 | Parameters 68 | ---------- 69 | data : bytes 70 | JSON5 serialized data, encoded as Latin-1 or ASCII. 71 | maxdepth : Optional[int] 72 | see :func:`decode(…) ` 73 | some : bool 74 | see :func:`decode(…) ` 75 | 76 | Raises 77 | ------ 78 | Json5DecoderException 79 | An exception occured while decoding. 80 | TypeError 81 | An argument had a wrong type. 82 | 83 | Returns 84 | ------- 85 | object 86 | see :func:`decode(…) ` 87 | ''' 88 | return decode_buffer(data, maxdepth, bool(some), 1) 89 | 90 | 91 | def decode_utf8(object data, object maxdepth=None, object some=False): 92 | ''' 93 | Decodes JSON5 serialized data from a :class:`bytes` object. 94 | 95 | .. code:: python 96 | 97 | decode_utf8(b'["H\\xe2\\x82\\xacllo", "w\\xc3\\xb6rld!"]') == ['H€llo', 'wörld!'] 98 | 99 | Parameters 100 | ---------- 101 | data : bytes 102 | JSON5 serialized data, encoded as UTF-8 or ASCII. 103 | maxdepth : Optional[int] 104 | see :func:`decode(…) ` 105 | some : bool 106 | see :func:`decode(…) ` 107 | 108 | Raises 109 | ------ 110 | Json5DecoderException 111 | An exception occured while decoding. 112 | TypeError 113 | An argument had a wrong type. 114 | 115 | Returns 116 | ------- 117 | object 118 | see :func:`decode(…) ` 119 | ''' 120 | return decode_buffer(data, maxdepth, bool(some), 0) 121 | 122 | 123 | def decode_buffer(object obj, object maxdepth=None, object some=False, 124 | object wordlength=None): 125 | ''' 126 | Decodes JSON5 serialized data from an object that supports the buffer protocol, 127 | e.g. :class:`bytearray`. 128 | 129 | .. code:: python 130 | 131 | obj = memoryview(b'["Hello", "world!"]') 132 | 133 | decode_buffer(obj) == ['Hello', 'world!'] 134 | 135 | Parameters 136 | ---------- 137 | data : object 138 | JSON5 serialized data. 139 | The argument must support Python's buffer protocol, i.e. 140 | :class:`memoryview(…) ` must work. The buffer must be contigious. 141 | maxdepth : Optional[int] 142 | see :func:`decode(…) ` 143 | some : bool 144 | see :func:`decode(…) ` 145 | wordlength : Optional[int] 146 | Must be 0, 1, 2, 4 to denote UTF-8, UCS1, USC2 or USC4 data, resp. 147 | Surrogates are not supported. Decode the data to an :class:`str` if need be. 148 | If ``None`` is supplied, then the buffer's ``itemsize`` is used. 149 | 150 | Raises 151 | ------ 152 | Json5DecoderException 153 | An exception occured while decoding. 154 | TypeError 155 | An argument had a wrong type. 156 | ValueError 157 | The value of ``wordlength`` was invalid. 158 | 159 | Returns 160 | ------- 161 | object 162 | see :func:`decode(…) ` 163 | ''' 164 | cdef Py_buffer view 165 | 166 | if maxdepth is None: 167 | maxdepth = DEFAULT_MAX_NESTING_LEVEL 168 | 169 | PyObject_GetBuffer(obj, &view, PyBUF_CONTIG_RO) 170 | try: 171 | if wordlength is None: 172 | wordlength = view.itemsize 173 | return _decode_buffer(view, wordlength, maxdepth, bool(some)) 174 | finally: 175 | PyBuffer_Release(&view) 176 | 177 | 178 | def decode_callback(object cb, object maxdepth=None, object some=False, 179 | object args=None): 180 | ''' 181 | Decodes JSON5 serialized data by invoking a callback. 182 | 183 | .. code:: python 184 | 185 | cb = iter('["Hello","world!"]').__next__ 186 | 187 | decode_callback(cb) == ['Hello', 'world!'] 188 | 189 | Parameters 190 | ---------- 191 | cb : Callable[Any, Union[str|bytes|bytearray|int|None]] 192 | A function to get values from. 193 | The functions is called like ``cb(*args)``, and it returns: 194 | 195 | * **str, bytes, bytearray:** \ 196 | ``len(…) == 0`` denotes exhausted input. \ 197 | ``len(…) == 1`` is the next character. 198 | * **int:** \ 199 | ``< 0`` denotes exhausted input. \ 200 | ``>= 0`` is the ordinal value of the next character. 201 | * **None:** \ 202 | input exhausted 203 | maxdepth : Optional[int] 204 | see :func:`decode(…) ` 205 | some : bool 206 | see :func:`decode(…) ` 207 | args : Optional[Iterable[Any]] 208 | Arguments to call ``cb`` with. 209 | 210 | Raises 211 | ------ 212 | Json5DecoderException 213 | An exception occured while decoding. 214 | TypeError 215 | An argument had a wrong type. 216 | 217 | Returns 218 | ------- 219 | object 220 | see :func:`decode(…) ` 221 | ''' 222 | if not callable(cb): 223 | raise TypeError(f'type(cb)=={type(cb)!r} is not callable') 224 | 225 | if maxdepth is None: 226 | maxdepth = DEFAULT_MAX_NESTING_LEVEL 227 | 228 | if args: 229 | args = tuple(args) 230 | else: 231 | args = () 232 | 233 | return _decode_callback(cb, args, maxdepth, bool(some)) 234 | 235 | 236 | def decode_io(object fp, object maxdepth=None, object some=True): 237 | ''' 238 | Decodes JSON5 serialized data from a file-like object. 239 | 240 | .. code:: python 241 | 242 | fp = io.StringIO(""" 243 | ['Hello', /* TODO look into specs whom to greet */] 244 | 'Wolrd' // FIXME: look for typos 245 | """) 246 | 247 | decode_io(fp) == ['Hello'] 248 | decode_io(fp) == 'Wolrd' 249 | 250 | fp.seek(0) 251 | 252 | decode_io(fp, some=False) 253 | # raises Json5ExtraData('Extra data U+0027 near 56', ['Hello'], "'") 254 | 255 | Parameters 256 | ---------- 257 | fp : IOBase 258 | A file-like object to parse from. 259 | maxdepth : Optional[int] = None 260 | see :func:`decode(…) ` 261 | some : bool 262 | see :func:`decode(…) ` 263 | 264 | Raises 265 | ------ 266 | Json5DecoderException 267 | An exception occured while decoding. 268 | TypeError 269 | An argument had a wrong type. 270 | 271 | Returns 272 | ------- 273 | object 274 | see :func:`decode(…) ` 275 | ''' 276 | if not isinstance(fp, IOBase): 277 | raise TypeError(f'type(fp)=={type(fp)!r} is not IOBase compatible') 278 | elif not fp.readable(): 279 | raise TypeError(f'fp is not readable') 280 | elif fp.closed: 281 | raise TypeError(f'fp is closed') 282 | 283 | if maxdepth is None: 284 | maxdepth = DEFAULT_MAX_NESTING_LEVEL 285 | 286 | return _decode_callback(fp.read, (1,), maxdepth, bool(some)) 287 | 288 | 289 | def encode(object data, *, options=None, **options_kw): 290 | ''' 291 | Serializes a Python object as a JSON5 compatible string. 292 | 293 | .. code:: python 294 | 295 | encode(['Hello', 'world!']) == '["Hello","world!"]' 296 | 297 | Parameters 298 | ---------- 299 | data : object 300 | Python object to serialize. 301 | options : Optional[Options] 302 | Extra options for the encoder. 303 | If ``options`` **and** ``options_kw`` are specified, then ``options.update(**options_kw)`` is used. 304 | options_kw 305 | See Option's arguments. 306 | 307 | Raises 308 | ------ 309 | Json5EncoderException 310 | An exception occured while encoding. 311 | TypeError 312 | An argument had a wrong type. 313 | 314 | Returns 315 | ------- 316 | str 317 | Unless ``float('inf')`` or ``float('nan')`` is encountered, the result 318 | will be valid JSON data (as of RFC8259). 319 | 320 | The result is always ASCII. All characters outside of the ASCII range 321 | are escaped. 322 | 323 | The result safe to use in an HTML template, e.g. 324 | ``show message``. 325 | Apostrophes ``"'"`` are encoded as ``"\\u0027"``, less-than, 326 | greater-than, and ampersand likewise. 327 | ''' 328 | cdef void *temp 329 | cdef object result 330 | cdef Py_ssize_t start = ( 331 | &( NULL).data[0] 332 | ) 333 | cdef Py_ssize_t length 334 | cdef object opts = _to_options(options, options_kw) 335 | cdef WriterReallocatable writer = WriterReallocatable( 336 | Writer( 337 | _WriterReallocatable_reserve, 338 | _WriterReallocatable_append_c, 339 | _WriterReallocatable_append_s, 340 | opts, 341 | ), 342 | start, 0, NULL, 343 | ) 344 | 345 | try: 346 | if expect(_encode(writer.base, data) < 0, False): 347 | exception_thrown() 348 | 349 | length = writer.position - start 350 | if length <= 0: 351 | # impossible 352 | return u'' 353 | 354 | temp = ObjectRealloc(writer.obj, writer.position + 1) 355 | if temp is not NULL: 356 | writer.obj = temp 357 | ( writer.obj)[writer.position] = 0 358 | 359 | result = ObjectInit( writer.obj, unicode) 360 | writer.obj = NULL 361 | 362 | ( result).length = length 363 | reset_hash( result) 364 | reset_wstr( result) 365 | ( result).state.interned = SSTATE_NOT_INTERNED 366 | ( result).state.kind = PyUnicode_1BYTE_KIND 367 | ( result).state.compact = True 368 | set_ready( result) 369 | ( result).state.ascii = True 370 | 371 | return result 372 | finally: 373 | if writer.obj is not NULL: 374 | ObjectFree(writer.obj) 375 | 376 | 377 | def encode_bytes(object data, *, options=None, **options_kw): 378 | ''' 379 | Serializes a Python object to a JSON5 compatible bytes string. 380 | 381 | .. code:: python 382 | 383 | encode_bytes(['Hello', 'world!']) == b'["Hello","world!"]' 384 | 385 | Parameters 386 | ---------- 387 | data : object 388 | see :func:`encode(…) ` 389 | options : Optional[Options] 390 | see :func:`encode(…) ` 391 | options_kw 392 | see :func:`encode(…) ` 393 | 394 | Raises 395 | ------ 396 | Json5EncoderException 397 | An exception occured while encoding. 398 | TypeError 399 | An argument had a wrong type. 400 | 401 | Returns 402 | ------- 403 | bytes 404 | see :func:`encode(…) ` 405 | ''' 406 | cdef void *temp 407 | cdef object result 408 | cdef Py_ssize_t start = ( 409 | &( NULL).ob_sval[0] 410 | ) 411 | cdef Py_ssize_t length 412 | cdef object opts = _to_options(options, options_kw) 413 | cdef WriterReallocatable writer = WriterReallocatable( 414 | Writer( 415 | _WriterReallocatable_reserve, 416 | _WriterReallocatable_append_c, 417 | _WriterReallocatable_append_s, 418 | opts, 419 | ), 420 | start, 0, NULL, 421 | ) 422 | 423 | try: 424 | if expect(_encode(writer.base, data) < 0, False): 425 | exception_thrown() 426 | 427 | length = writer.position - start 428 | if length <= 0: 429 | # impossible 430 | return b'' 431 | 432 | temp = ObjectRealloc(writer.obj, writer.position + 1) 433 | if temp is not NULL: 434 | writer.obj = temp 435 | ( writer.obj)[writer.position] = 0 436 | 437 | result = ObjectInitVar( 438 | ( writer.obj), bytes, length, 439 | ) 440 | writer.obj = NULL 441 | 442 | reset_hash( result) 443 | 444 | return result 445 | finally: 446 | if writer.obj is not NULL: 447 | ObjectFree(writer.obj) 448 | 449 | 450 | def encode_callback(object data, object cb, object supply_bytes=False, *, 451 | options=None, **options_kw): 452 | ''' 453 | Serializes a Python object into a callback function. 454 | 455 | The callback function ``cb`` gets called with single characters and strings 456 | until the input ``data`` is fully serialized. 457 | 458 | .. code:: python 459 | 460 | encode_callback(['Hello', 'world!'], print) 461 | #prints: 462 | # [ 463 | # " 464 | # Hello 465 | # " 466 | # , 467 | # " 468 | # world! 469 | # " 470 | " ] 471 | 472 | Parameters 473 | ---------- 474 | data : object 475 | see :func:`encode(…) ` 476 | cb : Callable[[Union[bytes|str]], None] 477 | A callback function. 478 | Depending on the truthyness of ``supply_bytes`` either :class:`bytes` or 479 | :class:`str` is supplied. 480 | supply_bytes : bool 481 | Call ``cb(…)`` with a :class:`bytes` argument if true, 482 | otherwise :class:`str`. 483 | options : Optional[Options] 484 | see :func:`encode(…) ` 485 | options_kw 486 | see :func:`encode(…) ` 487 | 488 | Raises 489 | ------ 490 | Json5EncoderException 491 | An exception occured while encoding. 492 | TypeError 493 | An argument had a wrong type. 494 | 495 | Returns 496 | ------- 497 | Callable[[Union[bytes|str]], None] 498 | The supplied argument ``cb``. 499 | ''' 500 | cdef int (*encoder)(object obj, object cb, object options) except -1 501 | cdef Options opts = _to_options(options, options_kw) 502 | 503 | if supply_bytes: 504 | encoder = _encode_callback_bytes 505 | else: 506 | encoder = _encode_callback_str 507 | 508 | encoder(data, cb, options=opts) 509 | 510 | return cb 511 | 512 | 513 | def encode_io(object data, object fp, object supply_bytes=True, *, 514 | options=None, **options_kw): 515 | ''' 516 | Serializes a Python object into a file-object. 517 | 518 | The return value of :meth:`fp.write(…) ` is not checked. 519 | If ``fp`` is unbuffered, then the result will be garbage! 520 | 521 | Parameters 522 | ---------- 523 | data : object 524 | see :func:`encode(…) ` 525 | fp : IOBase 526 | A file-like object to serialize into. 527 | supply_bytes : bool 528 | Call :meth:`fp.write(…) ` with a :class:`bytes` argument if true, 529 | otherwise :class:`str`. 530 | options : Optional[Options] 531 | see :func:`encode(…) ` 532 | options_kw 533 | see :func:`encode(…) ` 534 | 535 | Raises 536 | ------ 537 | Json5EncoderException 538 | An exception occured while encoding. 539 | TypeError 540 | An argument had a wrong type. 541 | 542 | Returns 543 | ------- 544 | IOBase 545 | The supplied argument ``fp``. 546 | ''' 547 | cdef int (*encoder)(object obj, object cb, object options) except -1 548 | cdef object opts = _to_options(options, options_kw) 549 | 550 | if not isinstance(fp, IOBase): 551 | raise TypeError(f'type(fp)=={type(fp)!r} is not IOBase compatible') 552 | elif not fp.writable(): 553 | raise TypeError(f'fp is not writable') 554 | elif fp.closed: 555 | raise TypeError(f'fp is closed') 556 | 557 | if supply_bytes: 558 | encoder = _encode_callback_bytes 559 | else: 560 | encoder = _encode_callback_str 561 | 562 | encoder(data, fp.write, options=opts) 563 | 564 | return fp 565 | 566 | 567 | def encode_noop(object data, *, options=None, **options_kw): 568 | ''' 569 | Test if the input is serializable. 570 | 571 | Most likely you want to serialize ``data`` directly, and catch exceptions 572 | instead of using this function! 573 | 574 | .. code:: python 575 | 576 | encode_noop({47: 11}) == True 577 | encode_noop({47: object()}) == False 578 | 579 | Parameters 580 | ---------- 581 | data : object 582 | see :func:`encode(…) ` 583 | options : Optional[Options] 584 | see :func:`encode(…) ` 585 | options_kw 586 | see :func:`encode(…) ` 587 | 588 | Returns 589 | ------- 590 | bool 591 | ``True`` iff ``data`` is serializable. 592 | ''' 593 | cdef object opts = _to_options(options, options_kw) 594 | cdef Writer writer = Writer( 595 | _WriterNoop_reserve, 596 | _WriterNoop_append_c, 597 | _WriterNoop_append_s, 598 | opts, 599 | ) 600 | 601 | if expect(_encode(writer, data) < 0, False): 602 | exception_thrown() 603 | 604 | return True 605 | 606 | 607 | __all__ = ( 608 | # DECODE 609 | 'decode', 'decode_utf8', 'decode_latin1', 'decode_buffer', 'decode_callback', 'decode_io', 610 | # ENCODE 611 | 'encode', 'encode_bytes', 'encode_callback', 'encode_io', 'encode_noop', 'Options', 612 | # LEGACY 613 | 'loads', 'load', 'dumps', 'dump', 614 | # EXCEPTIONS 615 | 'Json5Exception', 616 | 'Json5EncoderException', 'Json5UnstringifiableType', 617 | 'Json5DecoderException', 'Json5NestingTooDeep', 'Json5EOF', 'Json5IllegalCharacter', 'Json5ExtraData', 'Json5IllegalType', 618 | ) 619 | 620 | __doc__ = PyUnicode_FromKindAndData(PyUnicode_1BYTE_KIND, LONGDESCRIPTION, LONGDESCRIPTION_LENGTH) 621 | 622 | __license__ = 'MIT OR Apache-2.0' 623 | 624 | __author__ = '2018-2025 René Kijewski ' 625 | -------------------------------------------------------------------------------- /src/_imports.pyx: -------------------------------------------------------------------------------- 1 | from cython import final, no_gc, auto_pickle, freelist 2 | from cpython cimport dict, int, list, long, tuple, type 3 | from cpython.bool cimport PyBool_Check 4 | from cpython.buffer cimport PyObject_GetBuffer, PyBUF_CONTIG_RO, PyBuffer_Release 5 | from cpython.bytes cimport ( 6 | PyBytes_AsStringAndSize, PyBytes_FromStringAndSize, PyBytes_Check, 7 | ) 8 | from cpython.dict cimport PyDict_SetItem 9 | from cpython.float cimport PyFloat_Check, PyFloat_AsDouble, PyFloat_FromDouble 10 | from cpython.list cimport PyList_Append 11 | from cpython.long cimport PyLong_FromString, PyLong_Check 12 | from cpython.object cimport PyObject, PyObject_GetIter 13 | from cpython.type cimport PyType_Check 14 | from cpython.unicode cimport PyUnicode_Check, PyUnicode_FromEncodedObject, PyUnicode_Format 15 | from libcpp cimport bool as boolean 16 | 17 | 18 | cdef extern from '' namespace 'std' nogil: 19 | ctypedef unsigned long size_t 20 | 21 | 22 | cdef extern from '' namespace 'std' nogil: 23 | ctypedef unsigned char uint8_t 24 | ctypedef unsigned short uint16_t 25 | ctypedef unsigned long uint32_t 26 | ctypedef unsigned long long uint64_t 27 | 28 | ctypedef signed char int8_t 29 | ctypedef signed short int16_t 30 | ctypedef signed long int32_t 31 | ctypedef signed long long int64_t 32 | 33 | 34 | cdef extern from '' namespace 'std' nogil: 35 | int snprintf(char *buffer, size_t buf_size, const char *format, ...) 36 | size_t strlen(const char *s) 37 | 38 | 39 | cdef extern from '' namespace 'std' nogil: 40 | void memcpy(void *dest, const void *std, size_t count) 41 | void memset(void *dest, char value, size_t count) 42 | size_t strlen(const char *s) 43 | 44 | 45 | cdef extern from '' nogil: 46 | enum: 47 | FP_INFINITE, FP_NAN, FP_NORMAL, FP_SUBNORMAL, FP_ZERO 48 | 49 | cdef extern from '' namespace 'std' nogil: 50 | int fpclassify(...) 51 | 52 | 53 | cdef extern from '' namespace 'std' nogil: 54 | void swap[T](T&, T&) 55 | 56 | 57 | cdef extern from 'Python.h': 58 | ctypedef signed char Py_UCS1 59 | ctypedef signed short Py_UCS2 60 | ctypedef signed long Py_UCS4 61 | 62 | 63 | cdef extern from 'src/native.hpp' namespace 'JSON5EncoderCpp' nogil: 64 | int32_t cast_to_int32(...) 65 | uint32_t cast_to_uint32(...) 66 | 67 | ctypedef boolean AlwaysTrue 68 | boolean obj_has_iter(object obj) 69 | 70 | ctypedef char EscapeDctItem[8] 71 | cppclass EscapeDct: 72 | EscapeDctItem items[0x100] 73 | boolean is_escaped(uint32_t c) 74 | Py_ssize_t find_unescaped_range(const Py_UCS1 *start, Py_ssize_t length) 75 | Py_ssize_t find_unescaped_range(const Py_UCS2 *start, Py_ssize_t length) 76 | Py_ssize_t find_unescaped_range(const Py_UCS4 *start, Py_ssize_t length) 77 | EscapeDct ESCAPE_DCT 78 | 79 | enum: 80 | VERSION_LENGTH 81 | const char VERSION[] 82 | 83 | enum: 84 | LONGDESCRIPTION_LENGTH 85 | const char LONGDESCRIPTION[] 86 | 87 | const char HEX[] 88 | 89 | boolean unicode_is_lo_surrogate(uint32_t ch) 90 | boolean unicode_is_hi_surrogate(uint32_t ch) 91 | uint32_t unicode_join_surrogates(uint32_t hi, uint32_t lo) 92 | 93 | void reset_hash[T](T *obj) 94 | void reset_wstr[T](T *obj) 95 | void set_ready[T](T *obj) 96 | AlwaysTrue exception_thrown() except True 97 | void unreachable() 98 | 99 | 100 | cdef extern from 'src/native.hpp' namespace 'JSON5EncoderCpp': 101 | int iter_next(object iterator, PyObject **value) except -1 102 | 103 | 104 | cdef extern from 'src/native.hpp' nogil: 105 | boolean expect 'JSON5EncoderCpp_expect'(boolean actual, boolean expected) 106 | 107 | 108 | cdef extern from 'src/_unicode_cat_of.hpp' namespace 'JSON5EncoderCpp' nogil: 109 | unsigned unicode_cat_of(uint32_t codepoint) 110 | 111 | 112 | cdef extern from 'src/_stack_heap_string.hpp' namespace 'JSON5EncoderCpp' nogil: 113 | cdef cppclass StackHeapString [T]: 114 | const T *data() 115 | Py_ssize_t size() 116 | boolean push_back(T codepoint) except False 117 | 118 | 119 | cdef extern from 'src/_decoder_recursive_select.hpp' namespace 'JSON5EncoderCpp' nogil: 120 | cdef enum DrsKind: 121 | DRS_fail, 122 | DRS_null, DRS_true, DRS_false, DRS_inf, DRS_nan, 123 | DRS_string, DRS_number, DRS_recursive 124 | 125 | DrsKind drs_lookup[128] 126 | 127 | 128 | cdef extern from 'third-party/fast_double_parser/include/fast_double_parser.h' namespace 'fast_double_parser' nogil: 129 | const char *parse_number(const char *p, double *outDouble) 130 | 131 | 132 | cdef extern from 'src/dragonbox.cc' namespace 'dragonbox' nogil: 133 | char *Dtoa(char* buffer, double value) 134 | 135 | 136 | cdef extern from 'Python.h': 137 | enum: 138 | PyUnicode_WCHAR_KIND 139 | PyUnicode_1BYTE_KIND 140 | PyUnicode_2BYTE_KIND 141 | PyUnicode_4BYTE_KIND 142 | 143 | int PyUnicode_READY(object o) except -1 144 | Py_ssize_t PyUnicode_GET_LENGTH(object o) nogil 145 | int PyUnicode_KIND(object o) nogil 146 | boolean PyUnicode_IS_ASCII(object) nogil 147 | Py_UCS1 *PyUnicode_1BYTE_DATA(object o) nogil 148 | Py_UCS2 *PyUnicode_2BYTE_DATA(object o) nogil 149 | Py_UCS4 *PyUnicode_4BYTE_DATA(object o) nogil 150 | 151 | boolean Py_EnterRecursiveCall(const char *where) except True 152 | void Py_LeaveRecursiveCall() 153 | 154 | bint Py_UNICODE_ISALPHA(Py_UCS4 ch) nogil 155 | bint Py_UNICODE_ISDIGIT(Py_UCS4 ch) nogil 156 | 157 | object PyUnicode_FromKindAndData(int kind, const void *buf, Py_ssize_t size) 158 | const char *PyUnicode_AsUTF8AndSize(object o, Py_ssize_t *size) except NULL 159 | 160 | object PyDict_SetDefault(object p, object key, object value) 161 | 162 | object CallFunction 'PyObject_CallFunction'(PyObject *cb, const char *format, ...) 163 | object CallObject 'PyObject_CallObject'(PyObject *cb, PyObject *args) 164 | 165 | ctypedef signed long Py_hash 166 | ctypedef signed short wchar_t 167 | 168 | enum: 169 | SSTATE_NOT_INTERNED 170 | SSTATE_INTERNED_MORTAL 171 | SSTATE_INTERNED_IMMORTAL 172 | 173 | ctypedef struct __ascii_object_state: 174 | uint8_t interned 175 | uint8_t kind 176 | boolean compact 177 | boolean ascii 178 | boolean ready 179 | 180 | ctypedef struct PyASCIIObject: 181 | Py_ssize_t length 182 | Py_hash hash 183 | wchar_t *wstr 184 | __ascii_object_state state 185 | 186 | ctypedef struct PyVarObject: 187 | pass 188 | 189 | ctypedef struct PyBytesObject: 190 | PyVarObject ob_base 191 | Py_hash ob_shash 192 | char ob_sval[1] 193 | 194 | AlwaysTrue ErrNoMemory 'PyErr_NoMemory'() except True 195 | void *ObjectRealloc 'PyObject_Realloc'(void *p, size_t n) 196 | void ObjectFree 'PyObject_Free'(void *p) 197 | object ObjectInit 'PyObject_INIT'(PyObject *obj, type cls) 198 | PyVarObject *ObjectInitVar 'PyObject_InitVar'(PyVarObject *obj, type cls, Py_ssize_t size) 199 | object PyObject_GenericGetDict(object o, void *context) 200 | 201 | object PyLong_FromString(const char *str, char **pend, int base) 202 | 203 | 204 | ctypedef struct AsciiObject: 205 | PyASCIIObject base 206 | char data[1] 207 | 208 | 209 | cdef extern from * nogil: 210 | enum: 211 | CYTHON_COMPILING_IN_PYPY 212 | 213 | 214 | cdef type datetime, date, time, Decimal, Mapping, IOBase 215 | cdef object saferepr 216 | 217 | from collections.abc import Mapping 218 | from datetime import datetime, date, time 219 | from decimal import Decimal 220 | from io import IOBase 221 | from pprint import saferepr 222 | -------------------------------------------------------------------------------- /src/_legacy.pyx: -------------------------------------------------------------------------------- 1 | def loads(s, *, encoding='UTF-8', **kw): 2 | ''' 3 | Decodes JSON5 serialized data from a string. 4 | 5 | Use :func:`decode(…) ` instead! 6 | 7 | .. code:: python 8 | 9 | loads(s) == decode(s) 10 | 11 | Parameters 12 | ---------- 13 | s : object 14 | Unless the argument is an ``str``, it gets decoded according to the 15 | parameter ``encoding``. 16 | encoding : str 17 | Codec to use if ``s`` is not an ``str``. 18 | kw 19 | Silently ignored. 20 | 21 | Returns 22 | ------- 23 | object 24 | see :func:`decode(…) ` 25 | ''' 26 | if not isinstance(s, unicode): 27 | s = PyUnicode_FromEncodedObject(s, encoding, 'strict') 28 | return decode(s) 29 | 30 | 31 | def load(fp, **kw): 32 | ''' 33 | Decodes JSON5 serialized data from a file-like object. 34 | 35 | Use :func:`decode_io(…) ` instead! 36 | 37 | .. code:: python 38 | 39 | load(fp) == decode_io(fp, None, False) 40 | 41 | Parameters 42 | ---------- 43 | fp : IOBase 44 | A file-like object to parse from. 45 | kw 46 | Silently ignored. 47 | 48 | Returns 49 | ------- 50 | object 51 | see :func:`decode_io(…) ` 52 | ''' 53 | return decode_io(fp, None, False) 54 | 55 | 56 | def dumps(obj, **kw): 57 | ''' 58 | Serializes a Python object to a JSON5 compatible string. 59 | 60 | Use :func:`encode(…) ` instead! 61 | 62 | .. code:: python 63 | 64 | dumps(obj) == encode(obj) 65 | 66 | Parameters 67 | ---------- 68 | obj : object 69 | Python object to serialize. 70 | kw 71 | Silently ignored. 72 | 73 | Returns 74 | ------- 75 | str 76 | see :func:`encode(…) ` 77 | ''' 78 | return encode(obj) 79 | 80 | 81 | def dump(object obj, object fp, **kw): 82 | ''' 83 | Serializes a Python object to a JSON5 compatible string. 84 | 85 | Use :func:`encode_io(…) ` instead! 86 | 87 | .. code:: python 88 | 89 | dump(obj, fp) == encode_io(obj, fp) 90 | 91 | Parameters 92 | ---------- 93 | obj : object 94 | Python object to serialize. 95 | fp : IOBase 96 | A file-like object to serialize into. 97 | kw 98 | Silently ignored. 99 | ''' 100 | encode_io(obj, fp) 101 | -------------------------------------------------------------------------------- /src/_raise_decoder.pyx: -------------------------------------------------------------------------------- 1 | cdef AlwaysTrue _raise_decoder(cls, msg, extra=None, result=None) except True: 2 | raise _DecoderException(cls, msg, extra, result) 3 | 4 | 5 | cdef AlwaysTrue _raise_unclosed(const char *what, Py_ssize_t start) except True: 6 | return _raise_decoder( 7 | Json5EOF, 8 | f'Unclosed {what} starting near {start}', 9 | ) 10 | 11 | 12 | cdef AlwaysTrue _raise_no_data(Py_ssize_t where) except True: 13 | return _raise_decoder( 14 | Json5EOF, 15 | f'No JSON data found near {where}', 16 | ) 17 | 18 | 19 | cdef AlwaysTrue _raise_stray_character(const char *what, Py_ssize_t where) except True: 20 | return _raise_decoder( 21 | Json5IllegalCharacter, 22 | f'Stray {what} near {where}', 23 | what, 24 | ) 25 | 26 | 27 | cdef AlwaysTrue _raise_expected_sc(const char *char_a, uint32_t char_b, Py_ssize_t near, uint32_t found) except True: 28 | return _raise_decoder( 29 | Json5IllegalCharacter, 30 | f'Expected {char_a} or U+{char_b:04x} near {near}, found U+{found:04x}', 31 | f'{found:c}', 32 | ) 33 | 34 | 35 | cdef AlwaysTrue _raise_expected_s(const char *char_a, Py_ssize_t near, uint32_t found) except True: 36 | return _raise_decoder( 37 | Json5IllegalCharacter, 38 | f'Expected {char_a} near {near}, found U+{found:04x}', 39 | f'{found:c}', 40 | ) 41 | 42 | 43 | cdef AlwaysTrue _raise_expected_c(uint32_t char_a, Py_ssize_t near, uint32_t found) except True: 44 | return _raise_decoder( 45 | Json5IllegalCharacter, 46 | f'Expected U+{char_a:04x} near {near}, found U+{found:04x}', 47 | f'{found:c}', 48 | ) 49 | 50 | 51 | cdef AlwaysTrue _raise_extra_data(uint32_t found, Py_ssize_t where) except True: 52 | return _raise_decoder( 53 | Json5ExtraData, 54 | f'Extra data U+{found:04X} near {where}', 55 | f'{found:c}', 56 | ) 57 | 58 | 59 | cdef AlwaysTrue _raise_unframed_data(uint32_t found, Py_ssize_t where) except True: 60 | return _raise_decoder( 61 | Json5ExtraData, 62 | f'Lost unframed data near {where}', 63 | f'{found:c}', 64 | ) 65 | 66 | 67 | cdef AlwaysTrue _raise_nesting(Py_ssize_t where, object result=None) except True: 68 | return _raise_decoder( 69 | Json5NestingTooDeep, 70 | f'Maximum nesting level exceeded near {where}', 71 | None, 72 | result, 73 | ) 74 | 75 | 76 | cdef AlwaysTrue _raise_not_ord(object value, Py_ssize_t where) except True: 77 | return _raise_decoder( 78 | Json5IllegalType, 79 | f'type(value)=={type(value)!r} not in (int, str, bytes) near {where} or the value is not valid.', 80 | value, 81 | ) 82 | -------------------------------------------------------------------------------- /src/_raise_encoder.pyx: -------------------------------------------------------------------------------- 1 | cdef AlwaysTrue _raise_unstringifiable(object data) except True: 2 | raise Json5UnstringifiableType(f'Unstringifiable type(data)={type(data)!r}', data) 3 | 4 | 5 | cdef AlwaysTrue _raise_illegal_wordlength(int32_t wordlength) except True: 6 | raise ValueError(f'wordlength must be 1, 2 or 4, not {wordlength!r}') 7 | -------------------------------------------------------------------------------- /src/_reader_callback.pyx: -------------------------------------------------------------------------------- 1 | cdef struct ReaderCallbackBase: 2 | Py_ssize_t position 3 | Py_ssize_t maxdepth 4 | 5 | 6 | cdef struct ReaderCallback: 7 | ReaderCallbackBase base 8 | PyObject *callback 9 | PyObject *args 10 | int32_t lookahead 11 | 12 | ctypedef ReaderCallback &ReaderCallbackRef 13 | 14 | 15 | cdef inline uint32_t _reader_Callback_get(ReaderCallbackRef self): 16 | cdef int32_t c = self.lookahead 17 | 18 | self.lookahead = -1 19 | self.base.position += 1 20 | 21 | return cast_to_uint32(c) 22 | 23 | 24 | cdef int32_t _reader_Callback_good(ReaderCallbackRef self) except -1: 25 | cdef Py_ssize_t c = -1 26 | 27 | if self.lookahead >= 0: 28 | return True 29 | 30 | cdef object value = CallObject(self.callback, self.args) 31 | if (value is None) or (value is False): 32 | return False 33 | 34 | if isinstance(value, int): 35 | c = value 36 | elif isinstance(value, ORD_CLASSES): 37 | if not value: 38 | return False 39 | c = ord(value) 40 | else: 41 | _raise_not_ord(value, self.base.position) 42 | 43 | if c < 0: 44 | return False 45 | elif c > 0x10ffff: 46 | _raise_not_ord(value, self.base.position) 47 | 48 | self.lookahead = c 49 | 50 | return True 51 | -------------------------------------------------------------------------------- /src/_reader_ucs.pyx: -------------------------------------------------------------------------------- 1 | cdef struct ReaderUCS: 2 | Py_ssize_t remaining 3 | Py_ssize_t position 4 | Py_ssize_t maxdepth 5 | 6 | 7 | cdef struct ReaderUCS1: 8 | ReaderUCS base 9 | const Py_UCS1 *string 10 | 11 | 12 | cdef struct ReaderUCS2: 13 | ReaderUCS base 14 | const Py_UCS2 *string 15 | 16 | 17 | cdef struct ReaderUCS4: 18 | ReaderUCS base 19 | const Py_UCS4 *string 20 | 21 | 22 | cdef struct ReaderUTF8: 23 | ReaderUCS base 24 | const Py_UCS1 *string 25 | 26 | 27 | ctypedef ReaderUCS1 &ReaderUCS1Ref 28 | ctypedef ReaderUCS2 &ReaderUCS2Ref 29 | ctypedef ReaderUCS4 &ReaderUCS4Ref 30 | ctypedef ReaderUTF8 &ReaderUTF8Ref 31 | 32 | ctypedef Py_UCS1 *UCS1String 33 | ctypedef Py_UCS2 *UCS2String 34 | ctypedef Py_UCS4 *UCS4String 35 | 36 | ctypedef fused ReaderUCSRef: 37 | ReaderUCS1Ref 38 | ReaderUCS2Ref 39 | ReaderUCS4Ref 40 | ReaderUTF8Ref 41 | 42 | ctypedef fused UCSString: 43 | UCS1String 44 | UCS2String 45 | UCS4String 46 | 47 | 48 | cdef inline int32_t _reader_ucs_good(ReaderUCSRef self): 49 | return self.base.remaining > 0 50 | 51 | 52 | cdef inline uint32_t _reader_ucs_get(ReaderUCSRef self): 53 | cdef int32_t c = self.string[0] 54 | 55 | self.string += 1 56 | self.base.remaining -= 1 57 | self.base.position += 1 58 | 59 | return cast_to_uint32(c) 60 | 61 | 62 | cdef inline uint32_t _reader_utf8_get(ReaderUCSRef self): 63 | cdef uint32_t c0 = _reader_ucs_get(self) 64 | cdef unsigned int n 65 | 66 | if (c0 & 0b1_0000000) == 0b0_0000000: # ASCII 67 | return c0 68 | elif (c0 & 0b11_000000) == 0b10_000000: # broken continuation 69 | return c0 70 | elif (c0 & 0b111_00000) == 0b110_00000: # 2 bytes 71 | c0 = (c0 & 0b000_11111) 72 | n = 1 73 | elif (c0 & 0b1111_0000) == 0b1110_0000: # 3 bytes 74 | c0 = (c0 & 0b0000_1111) 75 | n = 2 76 | elif (c0 & 0b11111_000) == 0b11110_000: # 4 bytes 77 | c0 = (c0 & 0b00000_111) 78 | n = 3 79 | else: # 5+ bytes, invalid 80 | return c0 81 | 82 | for n in range(n, 0, -1): 83 | if not _reader_ucs_good(self): 84 | return c0 85 | 86 | c0 = (c0 << 6) | (_reader_ucs_get(self) & 0b00_111111) 87 | 88 | return c0 89 | -------------------------------------------------------------------------------- /src/_readers.pyx: -------------------------------------------------------------------------------- 1 | ctypedef fused ReaderRef: 2 | ReaderUCSRef 3 | ReaderCallbackRef 4 | 5 | 6 | cdef boolean _reader_enter(ReaderRef self) except False: 7 | if self.base.maxdepth == 0: 8 | _raise_nesting(_reader_tell(self)) 9 | 10 | Py_EnterRecursiveCall(' while decoding nested JSON5 object') 11 | 12 | self.base.maxdepth -= 1 13 | 14 | return True 15 | 16 | 17 | cdef void _reader_leave(ReaderRef self): 18 | Py_LeaveRecursiveCall() 19 | self.base.maxdepth += 1 20 | 21 | 22 | cdef inline Py_ssize_t _reader_tell(ReaderRef self): 23 | return self.base.position 24 | 25 | 26 | cdef inline uint32_t _reader_get(ReaderRef self): 27 | cdef uint32_t c0 28 | if ReaderRef is ReaderUTF8Ref: 29 | c0 = _reader_utf8_get(self) 30 | elif ReaderRef in ReaderUCSRef: 31 | c0 = _reader_ucs_get(self) 32 | elif ReaderRef is ReaderCallbackRef: 33 | c0 = _reader_Callback_get(self) 34 | return c0 35 | 36 | 37 | cdef int32_t _reader_good(ReaderRef self) except -1: 38 | if ReaderRef in ReaderUCSRef: 39 | return _reader_ucs_good(self) 40 | elif ReaderRef is ReaderCallbackRef: 41 | return _reader_Callback_good(self) 42 | -------------------------------------------------------------------------------- /src/_stack_heap_string.hpp: -------------------------------------------------------------------------------- 1 | #ifndef JSON5EncoderCpp_StackHeapString 2 | #define JSON5EncoderCpp_StackHeapString 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | 9 | namespace JSON5EncoderCpp { 10 | inline namespace { 11 | 12 | static constexpr Py_ssize_t StackHeapStringStackSize = 64; 13 | static constexpr Py_ssize_t StackHeapStringHeapSize = 256; 14 | static constexpr Py_ssize_t StackHeapStringHeapFactor = 4; 15 | 16 | 17 | template 18 | class StackHeapString { 19 | StackHeapString(const StackHeapString&) = delete; 20 | StackHeapString(StackHeapString&&) = delete; 21 | StackHeapString &operator =(const StackHeapString&) = delete; 22 | StackHeapString &operator =(StackHeapString&&) = delete; 23 | 24 | public: 25 | StackHeapString() = default; 26 | 27 | ~StackHeapString() { 28 | if (m_heap != nullptr) { 29 | PyMem_RawFree(m_heap); 30 | } 31 | } 32 | 33 | const T *data() const& { 34 | if (JSON5EncoderCpp_expect(m_heap == nullptr, true)) { 35 | return m_stack; 36 | } else { 37 | return m_heap; 38 | } 39 | } 40 | 41 | Py_ssize_t size() const& { 42 | return m_size; 43 | } 44 | 45 | bool push_back(T c) { 46 | if (JSON5EncoderCpp_expect(m_left == 0, false)) { 47 | if (m_heap == nullptr) { 48 | void *new_ptr = PyMem_RawMalloc(sizeof(T) * StackHeapStringHeapSize); 49 | if (new_ptr == nullptr) { 50 | PyErr_NoMemory(); 51 | return false; 52 | } 53 | 54 | m_heap = reinterpret_cast(new_ptr); 55 | m_left = StackHeapStringHeapSize - StackHeapStringStackSize; 56 | std::memcpy(m_heap, m_stack, sizeof(T) * StackHeapStringStackSize); 57 | } else { 58 | void *new_ptr = PyMem_RawRealloc(m_heap, sizeof(T) * (m_size * StackHeapStringHeapFactor)); 59 | if (new_ptr == nullptr) { 60 | PyErr_NoMemory(); 61 | return false; 62 | } 63 | 64 | m_heap = reinterpret_cast(new_ptr); 65 | m_left = m_size * (StackHeapStringHeapFactor - 1); 66 | } 67 | } 68 | 69 | if (JSON5EncoderCpp_expect(m_heap == nullptr, true)) { 70 | m_stack[m_size] = c; 71 | } else { 72 | m_heap[m_size] = c; 73 | } 74 | 75 | ++m_size; 76 | --m_left; 77 | return true; 78 | } 79 | 80 | private: 81 | Py_ssize_t m_size = 0; 82 | Py_ssize_t m_left = StackHeapStringStackSize; 83 | T *m_heap = nullptr; 84 | T m_stack[StackHeapStringStackSize]; 85 | }; 86 | 87 | } 88 | } 89 | 90 | #endif // ifndef JSON5EncoderCpp_StackHeapString 91 | -------------------------------------------------------------------------------- /src/_unicode.pyx: -------------------------------------------------------------------------------- 1 | cdef boolean _is_line_terminator(uint32_t c) nogil: 2 | # https://www.ecma-international.org/ecma-262/5.1/#sec-7.3 3 | return c in ( 4 | 0x000A, # Line Feed 5 | 0x000D, # Carriage Return 6 | 0x2028, # Line separator 7 | 0x2029, # Paragraph separator 8 | ) 9 | 10 | cdef boolean _is_ws_zs(uint32_t c) nogil: 11 | # https://spec.json5.org/#white-space 12 | return unicode_cat_of(c) == 1 13 | 14 | cdef boolean _is_identifier_start(uint32_t c) nogil: 15 | # https://www.ecma-international.org/ecma-262/5.1/#sec-7.6 16 | return unicode_cat_of(c) == 2 17 | 18 | cdef boolean _is_identifier_part(uint32_t c) nogil: 19 | # https://www.ecma-international.org/ecma-262/5.1/#sec-7.6 20 | return unicode_cat_of(c) >= 2 21 | 22 | cdef inline boolean _is_x(uint32_t c) nogil: 23 | return (c | 0x20) == b'x' 24 | 25 | cdef inline boolean _is_e(uint32_t c) nogil: 26 | return (c | 0x20) == b'e' 27 | 28 | cdef inline boolean _is_decimal(uint32_t c) nogil: 29 | return b'0' <= c <= b'9' 30 | 31 | cdef inline boolean _is_hex(uint32_t c) nogil: 32 | return b'a' <= (c | 0x20) <= b'f' 33 | 34 | cdef inline boolean _is_hexadecimal(uint32_t c) nogil: 35 | return _is_decimal(c) or _is_hex(c) 36 | 37 | cdef boolean _is_in_float_representation(uint32_t c) nogil: 38 | if _is_decimal(c): 39 | return True 40 | if _is_e(c): 41 | return True 42 | elif c in b'.+-': 43 | return True 44 | else: 45 | return False 46 | -------------------------------------------------------------------------------- /src/_writer_callback.pyx: -------------------------------------------------------------------------------- 1 | cdef struct WriterCallback: 2 | Writer base 3 | PyObject *callback 4 | 5 | 6 | cdef boolean _WriterCbBytes_append_c(Writer &writer_, char datum) except False: 7 | cdef WriterCallback *writer = &writer_ 8 | 9 | CallFunction(writer.callback, b'c', datum) 10 | 11 | return True 12 | 13 | 14 | cdef boolean _WriterCbBytes_append_s(Writer &writer_, const char *s, Py_ssize_t length) except False: 15 | cdef WriterCallback *writer = &writer_ 16 | 17 | if expect(length <= 0, False): 18 | return True 19 | 20 | CallFunction(writer.callback, b'y#', s, length) 21 | 22 | return True 23 | 24 | 25 | cdef boolean _WriterCbStr_append_c(Writer &writer_, char datum) except False: 26 | cdef WriterCallback *writer = &writer_ 27 | 28 | CallFunction(writer.callback, b'C', datum) 29 | 30 | return True 31 | 32 | 33 | cdef boolean _WriterCbStr_append_s(Writer &writer_, const char *s, Py_ssize_t length) except False: 34 | cdef WriterCallback *writer = &writer_ 35 | 36 | if expect(length <= 0, False): 37 | return True 38 | 39 | CallFunction(writer.callback, b'U#', s, length) 40 | 41 | return True 42 | -------------------------------------------------------------------------------- /src/_writer_noop.pyx: -------------------------------------------------------------------------------- 1 | cdef struct WriterNoop: 2 | Writer base 3 | 4 | 5 | cdef boolean _WriterNoop_reserve(WriterRef writer_, size_t amount) except False: 6 | return True 7 | 8 | 9 | cdef boolean _WriterNoop_append_c(Writer &writer_, char datum) except False: 10 | return True 11 | 12 | 13 | cdef boolean _WriterNoop_append_s(Writer &writer_, const char *s, 14 | Py_ssize_t length) except False: 15 | return True 16 | -------------------------------------------------------------------------------- /src/_writer_reallocatable.pyx: -------------------------------------------------------------------------------- 1 | cdef struct WriterReallocatable: 2 | Writer base 3 | size_t position 4 | size_t length 5 | void *obj 6 | 7 | 8 | cdef boolean _WriterReallocatable_reserve(WriterRef writer_, size_t amount) except False: 9 | cdef size_t current_size 10 | cdef size_t needed_size 11 | cdef size_t new_size 12 | cdef void *temp 13 | cdef WriterReallocatable *writer = &writer_ 14 | 15 | if expect(amount <= 0, False): 16 | return True 17 | 18 | needed_size = writer.position + amount 19 | current_size = writer.length 20 | if expect(needed_size < current_size, True): 21 | return True 22 | 23 | new_size = current_size 24 | while new_size <= needed_size: 25 | new_size = (new_size + 32) + (new_size // 4) 26 | if expect(new_size < current_size, False): 27 | ErrNoMemory() 28 | 29 | temp = ObjectRealloc(writer.obj, new_size + 1) 30 | if temp is NULL: 31 | ErrNoMemory() 32 | 33 | writer.obj = temp 34 | writer.length = new_size 35 | 36 | return True 37 | 38 | 39 | cdef boolean _WriterReallocatable_append_c(Writer &writer_, char datum) except False: 40 | cdef WriterReallocatable *writer = &writer_ 41 | 42 | _WriterReallocatable_reserve(writer.base, 1) 43 | ( writer.obj)[writer.position] = datum 44 | writer.position += 1 45 | 46 | return True 47 | 48 | 49 | cdef boolean _WriterReallocatable_append_s(Writer &writer_, const char *s, Py_ssize_t length) except False: 50 | cdef WriterReallocatable *writer = &writer_ 51 | 52 | if expect(length <= 0, False): 53 | return True 54 | 55 | _WriterReallocatable_reserve(writer.base, length) 56 | memcpy(&( writer.obj)[writer.position], s, length) 57 | writer.position += length 58 | 59 | return True 60 | 61 | 62 | -------------------------------------------------------------------------------- /src/_writers.pyx: -------------------------------------------------------------------------------- 1 | cdef struct Writer: 2 | boolean (*reserve)(Writer &writer, size_t amount) except False 3 | boolean (*append_c)(Writer &writer, char datum) except False 4 | boolean (*append_s)(Writer &writer, const char *s, Py_ssize_t length) except False 5 | PyObject *options 6 | 7 | 8 | ctypedef Writer &WriterRef 9 | -------------------------------------------------------------------------------- /src/native.hpp: -------------------------------------------------------------------------------- 1 | #ifndef JSON5EncoderCpp_native 2 | #define JSON5EncoderCpp_native 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | namespace JSON5EncoderCpp { 10 | 11 | template 12 | constexpr std::uint32_t cast_to_uint32( 13 | const From &unsigned_from, 14 | typename std::enable_if< 15 | !std::is_signed::value 16 | >::type* = nullptr 17 | ) { 18 | return static_cast(unsigned_from); 19 | } 20 | 21 | template 22 | constexpr std::uint32_t cast_to_uint32( 23 | const From &from, 24 | typename std::enable_if< 25 | std::is_signed::value 26 | >::type* = nullptr 27 | ) { 28 | return cast_to_uint32(static_cast::type>(from)); 29 | } 30 | 31 | template 32 | constexpr std::int32_t cast_to_int32(const From &from) { 33 | return static_cast(cast_to_uint32(from)); 34 | } 35 | 36 | struct AlwaysTrue { 37 | inline AlwaysTrue() = default; 38 | inline ~AlwaysTrue() = default; 39 | 40 | inline AlwaysTrue(const AlwaysTrue&) = default; 41 | inline AlwaysTrue(AlwaysTrue&&) = default; 42 | inline AlwaysTrue &operator =(const AlwaysTrue&) = default; 43 | inline AlwaysTrue &operator =(AlwaysTrue&&) = default; 44 | 45 | template 46 | inline AlwaysTrue(T&&) : AlwaysTrue() {} 47 | 48 | template 49 | inline bool operator ==(T&&) const { return true; } 50 | 51 | inline operator bool () const { return true; } 52 | }; 53 | 54 | static inline bool obj_has_iter(const PyObject *obj) { 55 | const auto *cls = Py_TYPE(obj); 56 | return cls->tp_iter != nullptr; 57 | } 58 | 59 | constexpr char HEX[] = "0123456789abcdef"; 60 | 61 | struct EscapeDct { 62 | using Item = std::array; // length, upto 6 characters, terminator (actually not needed) 63 | static constexpr std::size_t length = 0x100; 64 | using Items = Item[length]; 65 | 66 | static const Items items; 67 | static const std::uint64_t is_escaped_lo; 68 | static const std::uint64_t is_escaped_hi; 69 | 70 | static inline bool is_escaped(std::uint32_t c) { 71 | if (c < 0x40) { 72 | return (is_escaped_lo & (static_cast(1) << c)) != 0; 73 | } else if (c < 0x80) { 74 | return (is_escaped_hi & (static_cast(1) << (c - 0x40))) != 0; 75 | } else { 76 | return true; 77 | } 78 | } 79 | 80 | template 81 | static inline std::size_t find_unescaped_range(const S *start, Py_ssize_t length) { 82 | Py_ssize_t index = 0; 83 | while ((index < length) && !is_escaped(start[index])) { 84 | ++index; 85 | } 86 | return index; 87 | } 88 | }; 89 | 90 | static inline bool unicode_is_lo_surrogate(std::uint32_t ch) { 91 | return 0xDC00u <= ch && ch <= 0xDFFFu; 92 | } 93 | 94 | static inline bool unicode_is_hi_surrogate(std::uint32_t ch) { 95 | return 0xD800u <= ch && ch <= 0xDBFFu; 96 | } 97 | 98 | static inline std::uint32_t unicode_join_surrogates(std::uint32_t hi, std::uint32_t lo) { 99 | return (((hi & 0x03FFu) << 10) | (lo & 0x03FFu)) + 0x10000u; 100 | } 101 | 102 | 103 | template 104 | struct VoidT_ { 105 | using Value = void*; 106 | }; 107 | 108 | 109 | template 110 | struct has_ob_shash { 111 | template static std::uint8_t test(typename VoidT_().ob_shash, true))>::Value); 112 | template static std::uint64_t test(...); 113 | enum { value = sizeof(test(0)) == sizeof(std::uint8_t) }; 114 | }; 115 | 116 | template 117 | struct has_hash { 118 | template static std::uint8_t test(typename VoidT_().hash, true))>::Value); 119 | template static std::uint64_t test(...); 120 | enum { value = sizeof(test(0)) == sizeof(std::uint8_t) }; 121 | }; 122 | 123 | template::value, bool hash = has_hash::value> 124 | struct ResetHash_; 125 | 126 | template 127 | struct ResetHash_ { 128 | static inline void reset(T *obj) { 129 | obj->ob_shash = -1; // CPython: str 130 | } 131 | }; 132 | 133 | template 134 | struct ResetHash_ { 135 | static inline void reset(T *obj) { 136 | obj->hash = -1; // CPython: bytes 137 | } 138 | }; 139 | 140 | template 141 | struct ResetHash_ { 142 | static inline void reset(T *obj) { 143 | (void) 0; // PyPy 144 | } 145 | }; 146 | 147 | template 148 | static inline void reset_hash(T *obj) { 149 | ResetHash_::reset(obj); 150 | } 151 | 152 | 153 | template 154 | struct has_wstr { 155 | template static std::uint8_t test(typename VoidT_().wstr, true))>::Value); 156 | template static std::uint64_t test(...); 157 | enum { value = sizeof(test(0)) == sizeof(std::uint8_t) }; 158 | }; 159 | 160 | template::value> 161 | struct ResetWstr_; 162 | 163 | template 164 | struct ResetWstr_ { 165 | static inline void reset(T *obj) { 166 | obj->wstr = nullptr; // CPython >= 3.12: absent 167 | } 168 | }; 169 | 170 | template 171 | struct ResetWstr_ { 172 | static inline void reset(T *) { 173 | (void) 0; 174 | } 175 | }; 176 | 177 | template 178 | static inline void reset_wstr(T *obj) { 179 | ResetWstr_::reset(obj); 180 | } 181 | 182 | template 183 | struct has_ready { 184 | template static std::uint8_t test(typename VoidT_().state.ready, true))>::Value); 185 | template static std::uint64_t test(...); 186 | enum { value = sizeof(test(0)) == sizeof(std::uint8_t) }; 187 | }; 188 | 189 | template::value> 190 | struct SetReady_; 191 | 192 | template 193 | struct SetReady_ { 194 | static inline void set(T *obj) { 195 | obj->state.ready = true; // CPython >= 3.12: absent 196 | } 197 | }; 198 | 199 | template 200 | struct SetReady_ { 201 | static inline void set(T *) { 202 | (void) 0; 203 | } 204 | }; 205 | 206 | template 207 | static inline void set_ready(T *obj) { 208 | SetReady_::set(obj); 209 | } 210 | 211 | 212 | static int iter_next(PyObject *iterator, PyObject **value) { 213 | Py_XDECREF(*value); 214 | PyObject *v = PyIter_Next(iterator); 215 | *value = v; 216 | if (v) { 217 | return true; 218 | } else if (!PyErr_Occurred()) { 219 | return 0; 220 | } else { 221 | return -1; 222 | } 223 | } 224 | 225 | static inline AlwaysTrue exception_thrown() { 226 | return true; 227 | } 228 | 229 | // https://stackoverflow.com/a/65258501/416224 230 | #ifdef __GNUC__ // GCC 4.8+, Clang, Intel and other compilers compatible with GCC (-std=c++0x or above) 231 | [[noreturn]] inline __attribute__((always_inline)) void unreachable() { __builtin_unreachable(); } 232 | #elif defined(_MSC_VER) // MSVC 233 | [[noreturn]] __forceinline void unreachable() { __assume(false); } 234 | #else // ??? 235 | inline void unreachable() {} 236 | #endif 237 | 238 | #include "./_escape_dct.hpp" 239 | 240 | const EscapeDct ESCAPE_DCT; 241 | 242 | const char VERSION[] = 243 | # include "./VERSION.inc" 244 | ; 245 | static constexpr std::size_t VERSION_LENGTH = sizeof(VERSION) - 1; 246 | 247 | const char LONGDESCRIPTION[] = 248 | # include "./DESCRIPTION.inc" 249 | ; 250 | static constexpr std::size_t LONGDESCRIPTION_LENGTH = sizeof(LONGDESCRIPTION) - 1; 251 | 252 | #ifdef __GNUC__ 253 | # define JSON5EncoderCpp_expect(cond, likely) __builtin_expect(!!(cond), !!(likely)) 254 | #else 255 | # define JSON5EncoderCpp_expect(cond, likely) !!(cond) 256 | #endif 257 | 258 | } 259 | 260 | #endif 261 | -------------------------------------------------------------------------------- /src/pyjson5/__init__.py: -------------------------------------------------------------------------------- 1 | from .pyjson5 import * 2 | from .pyjson5 import __all__, __author__, __doc__, __license__, __version__ 3 | -------------------------------------------------------------------------------- /src/pyjson5/__init__.pyi: -------------------------------------------------------------------------------- 1 | from typing import ( 2 | Any, 3 | Callable, 4 | final, 5 | Final, 6 | Iterable, 7 | Literal, 8 | Optional, 9 | overload, 10 | Protocol, 11 | Tuple, 12 | TypeVar, 13 | Union, 14 | ) 15 | 16 | _Data = TypeVar("_Data") 17 | 18 | class _SupportsRead(Protocol): 19 | def read(self, size: int = ...) -> str: ... 20 | 21 | class _SupportsWrite(Protocol[_Data]): 22 | def write(self, s: _Data) -> Any: ... 23 | 24 | _CallbackStr = TypeVar("_CallbackStr", bound=Callable[[str], None]) 25 | 26 | _CallbackBytes = TypeVar("_CallbackBytes", bound=Callable[[bytes], None]) 27 | 28 | _SupportsWriteBytes = TypeVar("_SupportsWriteBytes", bound=_SupportsWrite[bytes]) 29 | 30 | _SupportsWriteStr = TypeVar("_SupportsWriteStr", bound=_SupportsWrite[str]) 31 | 32 | ############################################################################### 33 | ### _exports.pyx 34 | ############################################################################### 35 | 36 | @final 37 | class Options: 38 | """Customizations for the ``encoder_*(...)`` function family.""" 39 | 40 | quotationmark: Final[str] = ... 41 | tojson: Final[Optional[str]] = ... 42 | mappingtypes: Final[Tuple[type, ...]] = ... 43 | 44 | def __init__( 45 | self, 46 | *, 47 | quotationmark: Optional[str] = ..., 48 | tojson: Optional[str] = ..., 49 | mappingtypes: Optional[Tuple[type, ...]] = ..., 50 | ) -> None: ... 51 | def update( 52 | self, 53 | *, 54 | quotationmark: Optional[str] = ..., 55 | tojson: Optional[str] = ..., 56 | mappingtypes: Optional[Tuple[type, ...]] = ..., 57 | ) -> Options: 58 | """Creates a new Options instance by modifying some members.""" 59 | 60 | def decode(data: str, maxdepth: Optional[int] = ..., some: bool = ...) -> Any: 61 | """Decodes JSON5 serialized data from an ``str`` object.""" 62 | 63 | def decode_latin1( 64 | data: bytes, 65 | maxdepth: Optional[int] = ..., 66 | some: bool = ..., 67 | ) -> Any: 68 | """Decodes JSON5 serialized data from a ``bytes`` object.""" 69 | 70 | def decode_utf8( 71 | data: bytes, 72 | maxdepth: Optional[int] = ..., 73 | some: bool = ..., 74 | ) -> Any: 75 | """Decodes JSON5 serialized data from a ``bytes`` object.""" 76 | 77 | def decode_buffer( 78 | data: bytes, 79 | maxdepth: Optional[int] = ..., 80 | some: bool = ..., 81 | wordlength: Optional[int] = ..., 82 | ) -> Any: 83 | """Decodes JSON5 serialized data from an object that supports the buffer protocol, e.g. bytearray.""" 84 | 85 | def decode_callback( 86 | cb: Callable[..., Union[str, bytes, bytearray, int, None]], 87 | maxdepth: Optional[int] = ..., 88 | some: bool = ..., 89 | args: Optional[Iterable[Any]] = ..., 90 | ) -> Any: 91 | """Decodes JSON5 serialized data by invoking a callback.""" 92 | 93 | def decode_io( 94 | fp: _SupportsRead, 95 | maxdepth: Optional[int] = ..., 96 | some: bool = ..., 97 | ) -> Any: 98 | """Decodes JSON5 serialized data from a file-like object.""" 99 | 100 | def encode( 101 | data: Any, 102 | *, 103 | options: Optional[Options] = ..., 104 | quotationmark: Optional[str] = ..., 105 | tojson: Optional[str] = ..., 106 | mappingtypes: Optional[Tuple[type, ...]] = ..., 107 | ) -> str: 108 | """Serializes a Python object to a JSON5 compatible string.""" 109 | ... 110 | 111 | def encode_bytes( 112 | data: Any, 113 | *, 114 | options: Optional[Options] = ..., 115 | quotationmark: Optional[str] = ..., 116 | tojson: Optional[str] = ..., 117 | mappingtypes: Optional[Tuple[type, ...]] = ..., 118 | ) -> bytes: 119 | """Serializes a Python object to a JSON5 compatible bytes string.""" 120 | 121 | @overload 122 | def encode_callback( 123 | data: Any, 124 | cb: _CallbackStr, 125 | supply_bytes: Literal[False] = ..., 126 | *, 127 | options: Optional[Options] = ..., 128 | quotationmark: Optional[str] = ..., 129 | tojson: Optional[str] = ..., 130 | mappingtypes: Optional[Tuple[type, ...]] = ..., 131 | ) -> _CallbackStr: 132 | """Serializes a Python object into a callback function.""" 133 | 134 | @overload 135 | def encode_callback( 136 | data: Any, 137 | cb: _CallbackBytes, 138 | supply_bytes: Literal[True], 139 | *, 140 | options: Optional[Options] = ..., 141 | quotationmark: Optional[str] = ..., 142 | tojson: Optional[str] = ..., 143 | mappingtypes: Optional[Tuple[type, ...]] = ..., 144 | ) -> _CallbackBytes: ... 145 | @overload 146 | def encode_io( 147 | data: Any, 148 | fp: _SupportsWriteBytes, 149 | supply_bytes: Literal[True] = ..., 150 | *, 151 | options: Optional[Options] = ..., 152 | quotationmark: Optional[str] = ..., 153 | tojson: Optional[str] = ..., 154 | mappingtypes: Optional[Tuple[type, ...]] = ..., 155 | ) -> _SupportsWriteBytes: 156 | """Serializes a Python object into a file-object.""" 157 | 158 | @overload 159 | def encode_io( 160 | data: Any, 161 | fp: _SupportsWriteStr, 162 | supply_bytes: Literal[False], 163 | *, 164 | options: Optional[Options] = ..., 165 | quotationmark: Optional[str] = ..., 166 | tojson: Optional[str] = ..., 167 | mappingtypes: Optional[Tuple[type, ...]] = ..., 168 | ) -> _SupportsWriteStr: ... 169 | def encode_noop( 170 | data: Any, 171 | *, 172 | options: Optional[Options] = ..., 173 | quotationmark: Optional[str] = ..., 174 | tojson: Optional[str] = ..., 175 | mappingtypes: Optional[Tuple[type, ...]] = ..., 176 | ) -> bool: 177 | """Test if the input is serializable.""" 178 | 179 | ############################################################################### 180 | ### _legacy.pyx 181 | ############################################################################### 182 | 183 | def loads(s: str, *, encoding: str = ...) -> Any: 184 | """Decodes JSON5 serialized data from a string.""" 185 | 186 | def load(fp: _SupportsRead) -> Any: 187 | """Decodes JSON5 serialized data from a file-like object.""" 188 | 189 | def dumps(obj: Any) -> str: 190 | """Serializes a Python object to a JSON5 compatible string.""" 191 | 192 | def dump(obj: Any, fp: _SupportsWrite[str]) -> None: 193 | """Serializes a Python object to a JSON5 compatible string.""" 194 | 195 | ############################################################################### 196 | ### _exceptions.pyx 197 | ############################################################################### 198 | 199 | class Json5Exception(Exception): 200 | """Base class of any exception thrown by PyJSON5.""" 201 | 202 | def __init__(self, message: Optional[str] = ..., *args: Any) -> None: ... 203 | @property 204 | def message(self) -> Optional[str]: ... 205 | 206 | ############################################################################### 207 | ### _exceptions_encoder.pyx 208 | ############################################################################### 209 | 210 | class Json5EncoderException(Json5Exception): 211 | """Base class of any exception thrown by the serializer.""" 212 | 213 | @final 214 | class Json5UnstringifiableType(Json5EncoderException): 215 | """The encoder was not able to stringify the input, or it was told not to by the supplied ``Options``.""" 216 | 217 | def __init__( 218 | self, 219 | message: Optional[str] = ..., 220 | unstringifiable: Any = ..., 221 | ) -> None: ... 222 | @property 223 | def unstringifiable(self) -> Any: 224 | """The value that caused the problem.""" 225 | 226 | ############################################################################### 227 | ### _exceptions_decoder.pyx 228 | ############################################################################### 229 | 230 | class Json5DecoderException(Json5Exception): 231 | """Base class of any exception thrown by the parser.""" 232 | 233 | def __init__( 234 | self, 235 | message: Optional[str] = ..., 236 | result: Any = ..., 237 | *args: Any, 238 | ) -> None: ... 239 | @property 240 | def result(self) -> Any: 241 | """Deserialized data up until now.""" 242 | 243 | @final 244 | class Json5NestingTooDeep(Json5DecoderException): 245 | """The maximum nesting level on the input data was exceeded.""" 246 | 247 | @final 248 | class Json5EOF(Json5DecoderException): 249 | """The input ended prematurely.""" 250 | 251 | @final 252 | class Json5IllegalCharacter(Json5DecoderException): 253 | """An unexpected character was encountered.""" 254 | 255 | def __init__( 256 | self, 257 | message: Optional[str] = ..., 258 | result: Any = ..., 259 | character: Optional[str] = ..., 260 | *args: Any, 261 | ) -> None: ... 262 | @property 263 | def character(self) -> Optional[str]: 264 | """Illegal character.""" 265 | 266 | @final 267 | class Json5ExtraData(Json5DecoderException): 268 | """The input contained extranous data.""" 269 | 270 | def __init__( 271 | self, 272 | message: Optional[str] = ..., 273 | result: Any = ..., 274 | character: Optional[str] = ..., 275 | *args: Any, 276 | ) -> None: ... 277 | @property 278 | def character(self) -> Optional[str]: 279 | """Extranous character.""" 280 | 281 | @final 282 | class Json5IllegalType(Json5DecoderException): 283 | """The user supplied callback function returned illegal data.""" 284 | 285 | def __init__( 286 | self, 287 | message: Optional[str] = ..., 288 | result: Any = ..., 289 | value: Optional[Any] = ..., 290 | *args: Any, 291 | ) -> None: ... 292 | @property 293 | def value(self) -> Optional[Any]: 294 | """Value that caused the problem.""" 295 | -------------------------------------------------------------------------------- /src/pyjson5/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kijewski/pyjson5/574b56f322e497f041ed16ba2f715f2cc612f0a8/src/pyjson5/py.typed --------------------------------------------------------------------------------