├── .gitattributes
├── .github
    ├── dependabot.yml
    └── workflows
    │   ├── ci.yml
    │   ├── codeql-analysis.yml
    │   └── wheels.yml
├── .gitignore
├── .gitmodules
├── .readthedocs.yaml
├── CHANGELOG.md
├── DerivedGeneralCategory.txt.sha
├── LICENSE.Apache
├── LICENSE.MIT
├── MANIFEST.in
├── Makefile
├── README.rst
├── docs
    ├── changelog.md
    ├── conf.py
    ├── decoder.rst
    ├── encoder.rst
    ├── exceptions.rst
    ├── index.rst
    └── performance.rst
├── pyjson5.pyx
├── pyproject.toml
├── requirements-dev.txt
├── requirements-readthedocs.txt
├── scripts
    ├── make_decoder_recursive_select.py
    ├── make_escape_dct.py
    ├── make_unicode_categories.py
    ├── run-minefield-test.py
    ├── run-tests.py
    ├── sha512sum.py
    └── transcode-to-json.py
├── setup.cfg
├── setup.py
└── src
    ├── DESCRIPTION.inc
    ├── VERSION.inc
    ├── _constants.pyx
    ├── _decoder.pyx
    ├── _decoder_recursive_select.hpp
    ├── _encoder.pyx
    ├── _encoder_options.pyx
    ├── _escape_dct.hpp
    ├── _exceptions.pyx
    ├── _exceptions_decoder.pyx
    ├── _exceptions_encoder.pyx
    ├── _exports.pyx
    ├── _imports.pyx
    ├── _legacy.pyx
    ├── _raise_decoder.pyx
    ├── _raise_encoder.pyx
    ├── _reader_callback.pyx
    ├── _reader_ucs.pyx
    ├── _readers.pyx
    ├── _stack_heap_string.hpp
    ├── _unicode.pyx
    ├── _unicode_cat_of.hpp
    ├── _writer_callback.pyx
    ├── _writer_noop.pyx
    ├── _writer_reallocatable.pyx
    ├── _writers.pyx
    ├── dragonbox.cc
    ├── native.hpp
    └── pyjson5
        ├── __init__.py
        ├── __init__.pyi
        └── py.typed


/.gitattributes:
--------------------------------------------------------------------------------
1 | src/dragonbox.cc linguist-vendored
2 | src/*.hpp linguist-vendored
3 | third-party/** linguist-vendored
4 | 


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | updates:
 3 |   - package-ecosystem: "pip"
 4 |     directory: "/"
 5 |     schedule:
 6 |       interval: "daily"
 7 | 
 8 |   - package-ecosystem: "github-actions"
 9 |     directory: "/"
10 |     schedule:
11 |       interval: "daily"
12 | 
13 |   - package-ecosystem: "gitsubmodule"
14 |     directory: "/"
15 |     schedule:
16 |       interval: "daily"
17 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | name: CI
 2 | 
 3 | on: [push]
 4 | 
 5 | jobs:
 6 |   build:
 7 |     runs-on: ${{ matrix.os }}
 8 |     strategy:
 9 |       matrix:
10 |         os:
11 |           - ubuntu-latest
12 |           - macos-latest
13 |           - windows-latest
14 |         python:
15 |           - '3.10'
16 |           - '3.13'
17 | 
18 |     steps:
19 |       - uses: actions/checkout@v4
20 |         with:
21 |           submodules: true
22 | 
23 |       - name: Cache pip
24 |         uses: actions/cache@v4
25 |         with:
26 |           key: cache--${{ matrix.os }}--${{ matrix.python }}--${{ hashFiles('./requirements*.txt', './Makefile') }}
27 |           restore-keys: cache--${{ matrix.os }}--${{ matrix.python }}--
28 |           path: ~/.cache/pip
29 | 
30 |       - name: Setup python
31 |         uses: actions/setup-python@v5
32 |         with:
33 |           python-version: ${{ matrix.python }}
34 | 
35 |       - name: Display Python version
36 |         run: python -c 'import sys; print(sys.version)'
37 | 
38 |       - name: Update pip
39 |         run: python -m pip install -U pip wheel setuptools
40 | 
41 |       - name: Install requirements
42 |         run: python -m pip install -Ur requirements-dev.txt
43 | 
44 |       - name: Compile project
45 |         run: make install
46 | 
47 |       - name: Run JSON5 tests suite
48 |         run: python scripts/run-tests.py
49 | 
50 |       - name: Run "JSON is a Minefield" suite
51 |         run: python scripts/run-minefield-test.py
52 | 
53 |   lint:
54 |     runs-on: ubuntu-latest
55 | 
56 |     steps:
57 |       - uses: actions/checkout@v4
58 |         with:
59 |           submodules: true
60 | 
61 |       - name: Cache pip
62 |         uses: actions/cache@v4
63 |         with:
64 |           key: lint--${{ hashFiles('./requirements*.txt', './Makefile') }}
65 |           restore-keys: lint--
66 |           path: ~/.cache/pip
67 | 
68 |       - name: Setup python
69 |         uses: actions/setup-python@v5
70 |         with:
71 |           python-version: '3.13'
72 | 
73 |       - name: Display Python version
74 |         run: python -c 'import sys; print(sys.version)'
75 | 
76 |       - name: Update pip
77 |         run: python -m pip install -U pip wheel setuptools
78 | 
79 |       - name: Install requirements
80 |         run: python -m pip install -Ur requirements-dev.txt
81 | 
82 |       - name: Compile project
83 |         run: make install
84 | 


--------------------------------------------------------------------------------
/.github/workflows/codeql-analysis.yml:
--------------------------------------------------------------------------------
 1 | name: "CodeQL"
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ master ]
 6 |   pull_request:
 7 |     branches: [ master ]
 8 |   schedule:
 9 |     - cron: '41 21 * * 3'
10 | 
11 | jobs:
12 |   analyze:
13 |     name: Analyze
14 |     runs-on: ubuntu-latest
15 |     permissions:
16 |       actions: read
17 |       contents: read
18 |       security-events: write
19 | 
20 |     strategy:
21 |       fail-fast: false
22 |       matrix:
23 |         language: [ 'cpp', 'python' ]
24 | 
25 |     steps:
26 |     - name: Checkout repository
27 |       uses: actions/checkout@v4
28 |       with:
29 |         submodules: true
30 | 
31 |     - name: Cache pip
32 |       uses: actions/cache@v4
33 |       with:
34 |         key: codeql-analysis--${{ github.event.inputs.os }}--${{ github.event.inputs.python }}--${{ hashFiles('./requirements-dev.txt') }}
35 |         path: ~/.cache/pip
36 | 
37 |     - name: Setup python
38 |       uses: actions/setup-python@v5
39 |       with:
40 |         python-version: ${{ github.event.inputs.python }}
41 | 
42 |     - name: Initialize CodeQL
43 |       uses: github/codeql-action/init@v3
44 |       with:
45 |         languages: ${{ matrix.language }}
46 | 
47 |     - name: Update pip
48 |       run: python -m pip install -U pip wheel setuptools
49 | 
50 |     - name: Install requirements
51 |       run: python -m pip install -Ur requirements-dev.txt
52 | 
53 |     - name: Compile
54 |       run: make wheel
55 | 
56 |     - name: Perform CodeQL Analysis
57 |       uses: github/codeql-action/analyze@v3
58 | 


--------------------------------------------------------------------------------
/.github/workflows/wheels.yml:
--------------------------------------------------------------------------------
  1 | name: Build Wheels
  2 | 
  3 | on:
  4 |   workflow_dispatch:
  5 | 
  6 | jobs:
  7 |   linux-define-matrix:
  8 |     runs-on: ubuntu-latest
  9 |     outputs:
 10 |       matrix: ${{ steps.set-matrix.outputs.matrix }}
 11 |     steps:
 12 |       - uses: actions/checkout@v4
 13 |         with:
 14 |           submodules: true
 15 | 
 16 |       - name: Setup python
 17 |         uses: actions/setup-python@v5
 18 |         with:
 19 |           python-version: '3.13'
 20 |   
 21 |       - run: python -m pip install -U pip wheel setuptools
 22 |       - run: python -m pip install -U 'cibuildwheel==2.*'
 23 | 
 24 |       - id: set-matrix
 25 |         run: |
 26 |           TARGETS="$(python -m cibuildwheel --archs "x86_64 i686 aarch64 ppc64le s390x armv7l" --print-build-identifiers)"
 27 |           echo 'matrix=["'$(echo $TARGETS | sed -e 's/ /","/g')'"]' >> $GITHUB_OUTPUT
 28 |         shell: bash
 29 |         env:
 30 |           CIBW_BUILD_FRONTEND: build
 31 |           CIBW_SKIP: 'cp27-* cp36-* pp*'
 32 |           CIBW_DEPENDENCY_VERSIONS: pinned
 33 |           CIBW_PLATFORM: linux
 34 | 
 35 |   macos-define-matrix:
 36 |     runs-on: macos-13
 37 |     outputs:
 38 |       matrix: ${{ steps.set-matrix.outputs.matrix }}
 39 |     steps:
 40 |       - uses: actions/checkout@v4
 41 |         with:
 42 |           submodules: true
 43 | 
 44 |       - name: Setup python
 45 |         uses: actions/setup-python@v5
 46 |         with:
 47 |           python-version: '3.13'
 48 |   
 49 |       - run: python -m pip install -U pip wheel setuptools
 50 |       - run: python -m pip install -U 'cibuildwheel==2.*'
 51 | 
 52 |       - id: set-matrix
 53 |         run: |
 54 |           TARGETS="$(python -m cibuildwheel --archs "x86_64 arm64 universal2" --print-build-identifiers)"
 55 |           echo 'matrix=["'$(echo $TARGETS | sed -e 's/ /","/g')'"]' >> $GITHUB_OUTPUT
 56 |         shell: bash
 57 |         env:
 58 |           CIBW_BUILD_FRONTEND: build
 59 |           CIBW_SKIP: 'cp27-* cp36-* pp*'
 60 |           CIBW_DEPENDENCY_VERSIONS: pinned
 61 |           CIBW_PLATFORM: macos
 62 | 
 63 |   windows-define-matrix:
 64 |     runs-on: windows-2022
 65 |     outputs:
 66 |       matrix: ${{ steps.set-matrix.outputs.matrix }}
 67 |     steps:
 68 |       - uses: actions/checkout@v4
 69 |         with:
 70 |           submodules: true
 71 | 
 72 |       - name: Setup python
 73 |         uses: actions/setup-python@v5
 74 |         with:
 75 |           python-version: '3.13'
 76 |   
 77 |       - run: python -m pip install -U pip wheel setuptools
 78 |       - run: python -m pip install -U 'cibuildwheel==2.*'
 79 | 
 80 |       - id: set-matrix
 81 |         run: |
 82 |           TARGETS="$(python -m cibuildwheel --archs "AMD64 x86 ARM64" --print-build-identifiers)"
 83 |           echo 'matrix=["'$(echo $TARGETS | sed -e 's/ /","/g')'"]' >> $GITHUB_OUTPUT
 84 |         shell: bash
 85 |         env:
 86 |           CIBW_BUILD_FRONTEND: build
 87 |           CIBW_SKIP: 'cp27-* cp36-* pp*'
 88 |           CIBW_DEPENDENCY_VERSIONS: pinned
 89 |           CIBW_PLATFORM: windows
 90 | 
 91 |   linux-build:
 92 |     runs-on: ubuntu-latest
 93 | 
 94 |     needs:
 95 |       - linux-define-matrix
 96 |     strategy:
 97 |       matrix:
 98 |         only: ${{ fromJSON(needs.linux-define-matrix.outputs.matrix) }}
 99 | 
100 |     steps:
101 |       - uses: actions/checkout@v4
102 |         with:
103 |           submodules: true
104 | 
105 |       - name: Set up QEMU
106 |         uses: docker/setup-qemu-action@v3
107 |         with:
108 |           platforms: all
109 |   
110 |       - name: Cache pip
111 |         uses: actions/cache@v4
112 |         with:
113 |           key: linux--${{ hashFiles('./requirements-dev.txt') }}
114 |           path: ~/.cache/pip
115 |   
116 |       - name: Setup python
117 |         uses: actions/setup-python@v5
118 |         with:
119 |           python-version: '3.13'
120 |   
121 |       - run: python -m pip install -U pip wheel setuptools
122 |       - run: python -m pip install -Ur requirements-dev.txt
123 |       - run: python -m pip install -U 'cibuildwheel==2.*'
124 | 
125 |       - run: make prepare
126 | 
127 |       - run: python -m cibuildwheel --output-dir wheelhouse --only ${{ matrix.only }}
128 |         env:
129 |           CIBW_BUILD_FRONTEND: build
130 |           CIBW_SKIP: 'cp27-* pp*'
131 |           CIBW_DEPENDENCY_VERSIONS: pinned
132 |           CIBW_PLATFORM: linux
133 |           CIBW_TEST_COMMAND: python {project}/scripts/run-tests.py
134 | 
135 |       - uses: actions/upload-artifact@v4
136 |         with:
137 |           name: ${{ matrix.only }}
138 |           path: ./wheelhouse
139 |           retention-days: 1
140 | 
141 |   macos-build:
142 |     runs-on: macos-13
143 | 
144 |     needs:
145 |       - macos-define-matrix
146 |     strategy:
147 |       matrix:
148 |         only: ${{ fromJSON(needs.macos-define-matrix.outputs.matrix) }}
149 | 
150 |     steps:
151 |       - uses: actions/checkout@v4
152 |         with:
153 |           submodules: true
154 |   
155 |       - name: Cache pip
156 |         uses: actions/cache@v4
157 |         with:
158 |           key: windows--${{ hashFiles('./requirements-dev.txt') }}
159 |           path: ~/.cache/pip
160 |   
161 |       - name: Setup python
162 |         uses: actions/setup-python@v5
163 |         with:
164 |           python-version: '3.13'
165 |   
166 |       - run: python -m pip install -U pip wheel setuptools
167 |       - run: python -m pip install -Ur requirements-dev.txt
168 |       - run: python -m pip install -U 'cibuildwheel==2.*'
169 | 
170 |       - run: make prepare
171 | 
172 |       - run: python -m cibuildwheel --output-dir wheelhouse --only ${{ matrix.only }}
173 |         env:
174 |           CIBW_BUILD_FRONTEND: build
175 |           CIBW_SKIP: 'cp27-* pp*'
176 |           CIBW_DEPENDENCY_VERSIONS: pinned
177 |           CIBW_PLATFORM: macos
178 |           CIBW_TEST_COMMAND: python {project}/scripts/run-tests.py
179 | 
180 |       - uses: actions/upload-artifact@v4
181 |         with:
182 |           name: ${{ matrix.only }}
183 |           path: ./wheelhouse
184 |           retention-days: 1
185 | 
186 |   windows-build:
187 |     runs-on: windows-2022
188 | 
189 |     needs:
190 |       - windows-define-matrix
191 |     strategy:
192 |       matrix:
193 |         only: ${{ fromJSON(needs.windows-define-matrix.outputs.matrix) }}
194 | 
195 |     steps:
196 |       - uses: actions/checkout@v4
197 |         with:
198 |           submodules: true
199 |   
200 |       - name: Cache pip
201 |         uses: actions/cache@v4
202 |         with:
203 |           key: windows--${{ hashFiles('./requirements-dev.txt') }}
204 |           path: ~/.cache/pip
205 |   
206 |       - name: Setup python
207 |         uses: actions/setup-python@v5
208 |         with:
209 |           python-version: '3.13'
210 |   
211 |       - run: python -m pip install -U pip wheel setuptools
212 |       - run: python -m pip install -Ur requirements-dev.txt
213 |       - run: python -m pip install -U 'cibuildwheel==2.*'
214 | 
215 |       - run: make prepare
216 | 
217 |       - run: python -m cibuildwheel --output-dir wheelhouse --only ${{ matrix.only }}
218 |         env:
219 |           CIBW_BUILD_FRONTEND: build
220 |           CIBW_SKIP: 'cp27-* pp*'
221 |           CIBW_DEPENDENCY_VERSIONS: pinned
222 |           CIBW_PLATFORM: windows
223 |           CIBW_TEST_COMMAND: python {project}/scripts/run-tests.py
224 | 
225 |       - uses: actions/upload-artifact@v4
226 |         with:
227 |           name: ${{ matrix.only }}
228 |           path: ./wheelhouse
229 |           retention-days: 1
230 | 
231 |   combine:
232 |     runs-on: ubuntu-latest
233 |     needs:
234 |       - linux-build
235 |       - macos-build
236 |       - windows-build
237 |     steps:
238 |       - uses: actions/download-artifact@v4
239 |         with:
240 |           path: ./wheelhouse
241 |       - run: |
242 |           find -name '*.zip' -exec unzip '{}' ';'
243 |           find -name '*.zip' -exec rm '{}' +
244 |           find -name '*.whl' -exec mv -t. '{}' +
245 |           find -type d -delete
246 |         shell: bash
247 |         working-directory: ./wheelhouse
248 |       - uses: actions/upload-artifact@v4
249 |         with:
250 |           name: wheelhouse
251 |           path: ./wheelhouse
252 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.py[cdo]
 2 | *.egg-info/
 3 | 
 4 | /env*/
 5 | /build/
 6 | /cython_debug/
 7 | /dist/
 8 | 
 9 | *.c
10 | *.cpp
11 | *.so
12 | *.o
13 | 
14 | run.cgi
15 | 
16 | *.swp*
17 | *.nfs*
18 | *~
19 | *.~*
20 | ~*
21 | *.tmp
22 | *.old
23 | *.bak
24 | *.pid
25 | 
26 | .*
27 | !.git*
28 | !.readthedocs.yaml
29 | 
30 | *.orig
31 | 
32 | /DerivedGeneralCategory.txt
33 | /citylots.json
34 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
 1 | [submodule "third-party/json5-tests"]
 2 | 	path = third-party/json5-tests
 3 | 	url = https://github.com/json5/json5-tests.git
 4 | [submodule "third-party/JSONTestSuite"]
 5 | 	path = third-party/JSONTestSuite
 6 | 	url = https://github.com/nst/JSONTestSuite.git
 7 | [submodule "third-party/fast_double_parser"]
 8 | 	path = third-party/fast_double_parser
 9 | 	url = https://github.com/lemire/fast_double_parser.git
10 | 


--------------------------------------------------------------------------------
/.readthedocs.yaml:
--------------------------------------------------------------------------------
 1 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
 2 | 
 3 | version: 2
 4 | 
 5 | build:
 6 |   os: ubuntu-24.04
 7 |   apt_packages:
 8 |     - graphviz
 9 |   tools:
10 |     python: "3.13"
11 | 
12 | sphinx:
13 |   configuration: docs/conf.py
14 | 
15 | python:
16 |   install:
17 |     - requirements: requirements-readthedocs.txt
18 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
  1 | # Changelog
  2 | 
  3 | **1.6.9 (2025-05-12)**
  4 | 
  5 | * Remove unused import to fix installation on Termux (by veka0, [#105](https://github.com/Kijewski/pyjson5/pull/105))
  6 | 
  7 | **1.6.8 (2025-01-03)**
  8 | 
  9 | * Requires at least Python 3.7
 10 | * Update dependencies
 11 | * Relicense to MIT OR Apache-2.0
 12 | 
 13 | **1.6.7 (2024-10-08)**
 14 | 
 15 | * Update to Unicode 16.0.0
 16 | * Update for Python 3.13
 17 | 
 18 | **1.6.6 (2024-02-09)**
 19 | 
 20 | * Fix return type of `load()` (by Q-ten, [#88](https://github.com/Kijewski/pyjson5/pull/88))
 21 | 
 22 | **1.6.5 (2023-12-04)**
 23 | 
 24 | * Fix type hints for optional arguments
 25 | 
 26 | **1.6.4 (2023-07-31)**
 27 | 
 28 | * Update to Cython 3
 29 | * Update for Python 3.12
 30 | 
 31 | **1.6.3 (2023-06-24)**
 32 | 
 33 | * Fix typing for `dump()` ([#61](https://github.com/Kijewski/pyjson5/issues/61))
 34 | 
 35 | **1.6.2 (2022-09-15)**
 36 | 
 37 | * Update to Unicode 15.0.0
 38 | 
 39 | **1.6.1 (2022-01-18)**
 40 | 
 41 | * Fix [PEP 517](https://www.python.org/dev/peps/pep-0517/)-like installation using [build](https://github.com/pypa/build) (by [Tomasz Kłoczko](https://github.com/kloczek))
 42 | 
 43 | **1.6.0 (2021-11-17)**
 44 | 
 45 | * Fallback to encode `vars(obj)` if `obj` is not stringifyable, e.g. to serialize [dataclasses](https://docs.python.org/3/library/dataclasses.html)
 46 | * Update documentation to use newer [sphinx](https://www.sphinx-doc.org/) version
 47 | * Use [dependabot](https://github.com/dependabot) to keep dependencies current
 48 | * Update [fast_double_parser](https://github.com/lemire/fast_double_parser)
 49 | 
 50 | **1.5.3 (2021-11-16)**
 51 | 
 52 | * Add [PEP 484](https://www.python.org/dev/peps/pep-0484/) type hints (by [Pascal Corpet](https://github.com/pcorpet))
 53 | * Update [JSONTestSuite](https://github.com/nst/JSONTestSuite)
 54 | 
 55 | **1.5.2 (2021-07-09)**
 56 | 
 57 | * Add file extensions to fix compilation with current Apple SDKs
 58 | * Update fast_double_parser to v0.5.0
 59 | * Update to Unicode 14.0.0d18
 60 | 
 61 | **1.5.1 (2021-05-01)**
 62 | 
 63 | * Update up Unicode 14.0.0d9
 64 | 
 65 | **1.5.0 (2021-03-11)**
 66 | 
 67 | * Faster floating-point number encoding using [Junekey Jeon's Dragonbox algorithm](https://github.com/abolz/Drachennest/blob/77f4889a4cd9d7f0b9da82a379f14beabcfba13e/src/dragonbox.cc) implemented by Alexander Bolz
 68 | * Removed a lot of configuration options from pyjson5.Options()
 69 | 
 70 | **1.4.9 (2021-03-03)**
 71 | 
 72 | * Faster floating-point number decoding using [fast_double_parser](https://github.com/lemire/fast_double_parser) by Daniel Lemire
 73 | 
 74 | **1.4.8 (2020-12-23)**
 75 | 
 76 | * Update up Unicode 13.0.0
 77 | * Don't use non-standard ``__uint128``
 78 | * Add PyPy compatibility
 79 | * Add ``decode_utf8(byte-like)``
 80 | 
 81 | **1.4.7 (2019-12-20)**
 82 | 
 83 | * Allow ``\uXXXX`` sequences in identifier names
 84 | * Update to Unicode 12.1.0
 85 | * Optimized encoder and decoder for a little better speed
 86 | * Setup basic CI environment
 87 | * Parse ``\uXXXX`` in literal keys
 88 | * Understand "0."
 89 | * Add CI tests
 90 | * Reject unescaped newlines in strings per spec
 91 | * Allow overriding default quotation mark
 92 | * Make Options objects pickle-able
 93 | * Bump major version number
 94 | 
 95 | **0.4.6 (2019-02-09)**
 96 | 
 97 | * Fix PyUnicode_AsUTF8AndSize()'s signature
 98 | 
 99 | **0.4.5 (2018-06-02)**
100 | 
101 | * Don't use C++14 features, only C++11
102 | 
103 | **0.4.4 (2018-05-19)**
104 | 
105 | * Better documentation
106 | * Optimized encoder for a little better speed
107 | 
108 | **0.4.3 (2018-05-03)**
109 | 
110 | * Initial release
111 | 


--------------------------------------------------------------------------------
/DerivedGeneralCategory.txt.sha:
--------------------------------------------------------------------------------
1 | 993261c82681a5580aaa42c6184d61a289a1eaa48022fded929d00487066b0ed1014f35cbc0890c6db5f3cbf8ca51dd99362e088aceedf548cfb3cc8d72bb14e  DerivedGeneralCategory.txt
2 | 


--------------------------------------------------------------------------------
/LICENSE.Apache:
--------------------------------------------------------------------------------
  1 | 
  2 |                                  Apache License
  3 |                            Version 2.0, January 2004
  4 |                         http://www.apache.org/licenses/
  5 | 
  6 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  7 | 
  8 |    1. Definitions.
  9 | 
 10 |       "License" shall mean the terms and conditions for use, reproduction,
 11 |       and distribution as defined by Sections 1 through 9 of this document.
 12 | 
 13 |       "Licensor" shall mean the copyright owner or entity authorized by
 14 |       the copyright owner that is granting the License.
 15 | 
 16 |       "Legal Entity" shall mean the union of the acting entity and all
 17 |       other entities that control, are controlled by, or are under common
 18 |       control with that entity. For the purposes of this definition,
 19 |       "control" means (i) the power, direct or indirect, to cause the
 20 |       direction or management of such entity, whether by contract or
 21 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 22 |       outstanding shares, or (iii) beneficial ownership of such entity.
 23 | 
 24 |       "You" (or "Your") shall mean an individual or Legal Entity
 25 |       exercising permissions granted by this License.
 26 | 
 27 |       "Source" form shall mean the preferred form for making modifications,
 28 |       including but not limited to software source code, documentation
 29 |       source, and configuration files.
 30 | 
 31 |       "Object" form shall mean any form resulting from mechanical
 32 |       transformation or translation of a Source form, including but
 33 |       not limited to compiled object code, generated documentation,
 34 |       and conversions to other media types.
 35 | 
 36 |       "Work" shall mean the work of authorship, whether in Source or
 37 |       Object form, made available under the License, as indicated by a
 38 |       copyright notice that is included in or attached to the work
 39 |       (an example is provided in the Appendix below).
 40 | 
 41 |       "Derivative Works" shall mean any work, whether in Source or Object
 42 |       form, that is based on (or derived from) the Work and for which the
 43 |       editorial revisions, annotations, elaborations, or other modifications
 44 |       represent, as a whole, an original work of authorship. For the purposes
 45 |       of this License, Derivative Works shall not include works that remain
 46 |       separable from, or merely link (or bind by name) to the interfaces of,
 47 |       the Work and Derivative Works thereof.
 48 | 
 49 |       "Contribution" shall mean any work of authorship, including
 50 |       the original version of the Work and any modifications or additions
 51 |       to that Work or Derivative Works thereof, that is intentionally
 52 |       submitted to Licensor for inclusion in the Work by the copyright owner
 53 |       or by an individual or Legal Entity authorized to submit on behalf of
 54 |       the copyright owner. For the purposes of this definition, "submitted"
 55 |       means any form of electronic, verbal, or written communication sent
 56 |       to the Licensor or its representatives, including but not limited to
 57 |       communication on electronic mailing lists, source code control systems,
 58 |       and issue tracking systems that are managed by, or on behalf of, the
 59 |       Licensor for the purpose of discussing and improving the Work, but
 60 |       excluding communication that is conspicuously marked or otherwise
 61 |       designated in writing by the copyright owner as "Not a Contribution."
 62 | 
 63 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 64 |       on behalf of whom a Contribution has been received by Licensor and
 65 |       subsequently incorporated within the Work.
 66 | 
 67 |    2. Grant of Copyright License. Subject to the terms and conditions of
 68 |       this License, each Contributor hereby grants to You a perpetual,
 69 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 70 |       copyright license to reproduce, prepare Derivative Works of,
 71 |       publicly display, publicly perform, sublicense, and distribute the
 72 |       Work and such Derivative Works in Source or Object form.
 73 | 
 74 |    3. Grant of Patent License. Subject to the terms and conditions of
 75 |       this License, each Contributor hereby grants to You a perpetual,
 76 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 77 |       (except as stated in this section) patent license to make, have made,
 78 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 79 |       where such license applies only to those patent claims licensable
 80 |       by such Contributor that are necessarily infringed by their
 81 |       Contribution(s) alone or by combination of their Contribution(s)
 82 |       with the Work to which such Contribution(s) was submitted. If You
 83 |       institute patent litigation against any entity (including a
 84 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 85 |       or a Contribution incorporated within the Work constitutes direct
 86 |       or contributory patent infringement, then any patent licenses
 87 |       granted to You under this License for that Work shall terminate
 88 |       as of the date such litigation is filed.
 89 | 
 90 |    4. Redistribution. You may reproduce and distribute copies of the
 91 |       Work or Derivative Works thereof in any medium, with or without
 92 |       modifications, and in Source or Object form, provided that You
 93 |       meet the following conditions:
 94 | 
 95 |       (a) You must give any other recipients of the Work or
 96 |           Derivative Works a copy of this License; and
 97 | 
 98 |       (b) You must cause any modified files to carry prominent notices
 99 |           stating that You changed the files; and
100 | 
101 |       (c) You must retain, in the Source form of any Derivative Works
102 |           that You distribute, all copyright, patent, trademark, and
103 |           attribution notices from the Source form of the Work,
104 |           excluding those notices that do not pertain to any part of
105 |           the Derivative Works; and
106 | 
107 |       (d) If the Work includes a "NOTICE" text file as part of its
108 |           distribution, then any Derivative Works that You distribute must
109 |           include a readable copy of the attribution notices contained
110 |           within such NOTICE file, excluding those notices that do not
111 |           pertain to any part of the Derivative Works, in at least one
112 |           of the following places: within a NOTICE text file distributed
113 |           as part of the Derivative Works; within the Source form or
114 |           documentation, if provided along with the Derivative Works; or,
115 |           within a display generated by the Derivative Works, if and
116 |           wherever such third-party notices normally appear. The contents
117 |           of the NOTICE file are for informational purposes only and
118 |           do not modify the License. You may add Your own attribution
119 |           notices within Derivative Works that You distribute, alongside
120 |           or as an addendum to the NOTICE text from the Work, provided
121 |           that such additional attribution notices cannot be construed
122 |           as modifying the License.
123 | 
124 |       You may add Your own copyright statement to Your modifications and
125 |       may provide additional or different license terms and conditions
126 |       for use, reproduction, or distribution of Your modifications, or
127 |       for any such Derivative Works as a whole, provided Your use,
128 |       reproduction, and distribution of the Work otherwise complies with
129 |       the conditions stated in this License.
130 | 
131 |    5. Submission of Contributions. Unless You explicitly state otherwise,
132 |       any Contribution intentionally submitted for inclusion in the Work
133 |       by You to the Licensor shall be under the terms and conditions of
134 |       this License, without any additional terms or conditions.
135 |       Notwithstanding the above, nothing herein shall supersede or modify
136 |       the terms of any separate license agreement you may have executed
137 |       with Licensor regarding such Contributions.
138 | 
139 |    6. Trademarks. This License does not grant permission to use the trade
140 |       names, trademarks, service marks, or product names of the Licensor,
141 |       except as required for reasonable and customary use in describing the
142 |       origin of the Work and reproducing the content of the NOTICE file.
143 | 
144 |    7. Disclaimer of Warranty. Unless required by applicable law or
145 |       agreed to in writing, Licensor provides the Work (and each
146 |       Contributor provides its Contributions) on an "AS IS" BASIS,
147 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 |       implied, including, without limitation, any warranties or conditions
149 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 |       PARTICULAR PURPOSE. You are solely responsible for determining the
151 |       appropriateness of using or redistributing the Work and assume any
152 |       risks associated with Your exercise of permissions under this License.
153 | 
154 |    8. Limitation of Liability. In no event and under no legal theory,
155 |       whether in tort (including negligence), contract, or otherwise,
156 |       unless required by applicable law (such as deliberate and grossly
157 |       negligent acts) or agreed to in writing, shall any Contributor be
158 |       liable to You for damages, including any direct, indirect, special,
159 |       incidental, or consequential damages of any character arising as a
160 |       result of this License or out of the use or inability to use the
161 |       Work (including but not limited to damages for loss of goodwill,
162 |       work stoppage, computer failure or malfunction, or any and all
163 |       other commercial damages or losses), even if such Contributor
164 |       has been advised of the possibility of such damages.
165 | 
166 |    9. Accepting Warranty or Additional Liability. While redistributing
167 |       the Work or Derivative Works thereof, You may choose to offer,
168 |       and charge a fee for, acceptance of support, warranty, indemnity,
169 |       or other liability obligations and/or rights consistent with this
170 |       License. However, in accepting such obligations, You may act only
171 |       on Your own behalf and on Your sole responsibility, not on behalf
172 |       of any other Contributor, and only if You agree to indemnify,
173 |       defend, and hold each Contributor harmless for any liability
174 |       incurred by, or claims asserted against, such Contributor by reason
175 |       of your accepting any such warranty or additional liability.
176 | 
177 |    END OF TERMS AND CONDITIONS
178 | 
179 |    APPENDIX: How to apply the Apache License to your work.
180 | 
181 |       To apply the Apache License to your work, attach the following
182 |       boilerplate notice, with the fields enclosed by brackets "[]"
183 |       replaced with your own identifying information. (Don't include
184 |       the brackets!)  The text should be enclosed in the appropriate
185 |       comment syntax for the file format. We also recommend that a
186 |       file or class name and description of purpose be included on the
187 |       same "printed page" as the copyright notice for easier
188 |       identification within third-party archives.
189 | 
190 |    Copyright [yyyy] [name of copyright owner]
191 | 
192 |    Licensed under the Apache License, Version 2.0 (the "License");
193 |    you may not use this file except in compliance with the License.
194 |    You may obtain a copy of the License at
195 | 
196 |        http://www.apache.org/licenses/LICENSE-2.0
197 | 
198 |    Unless required by applicable law or agreed to in writing, software
199 |    distributed under the License is distributed on an "AS IS" BASIS,
200 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201 |    See the License for the specific language governing permissions and
202 |    limitations under the License.
203 | 


--------------------------------------------------------------------------------
/LICENSE.MIT:
--------------------------------------------------------------------------------
 1 | Permission is hereby granted, free of charge, to any person obtaining a copy
 2 | of this software and associated documentation files (the "Software"), to deal
 3 | in the Software without restriction, including without limitation the rights
 4 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 5 | copies of the Software, and to permit persons to whom the Software is
 6 | furnished to do so, subject to the following conditions:
 7 | 
 8 | The above copyright notice and this permission notice shall be included in all
 9 | copies or substantial portions of the Software.
10 | 
11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
12 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
13 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
14 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
15 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
16 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
17 | SOFTWARE.
18 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
 1 | include CHANGELOG.md
 2 | include LICENSE*
 3 | include Makefile
 4 | include pyjson5.cpp
 5 | include pyjson5.pyx
 6 | include pyproject.toml
 7 | include third-party/fast_double_parser/include/fast_double_parser.h
 8 | include requirements*.txt
 9 | recursive-include docs **
10 | recursive-include scripts **
11 | recursive-include src **
12 | recursive-include third-party/json5-tests **
13 | recursive-include third-party/JSONTestSuite/test_parsing **
14 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | all: sdist wheel docs
 2 | 
 3 | .DELETE_ON_ERROR:
 4 | 
 5 | .PHONY: all sdist wheel clean docs prepare test install
 6 | 
 7 | export PYTHONUTF8 := 1
 8 | export PYTHONIOENCODING := UTF-8
 9 | 
10 | INCLUDES := \
11 |     src/VERSION.inc src/DESCRIPTION.inc \
12 |     src/_decoder_recursive_select.hpp src/_unicode_cat_of.hpp \
13 |     src/_escape_dct.hpp src/_stack_heap_string.hpp src/native.hpp \
14 |     src/dragonbox.cc
15 | 
16 | FILES := Makefile MANIFEST.in pyjson5.pyx README.rst pyproject.toml ${INCLUDES}
17 | 
18 | DerivedGeneralCategory.txt: DerivedGeneralCategory.txt.sha
19 | 	curl -s -o $@ https://www.unicode.org/Public/16.0.0/ucd/extracted/DerivedGeneralCategory.txt
20 | 	python scripts/sha512sum.py -c $@.sha
21 | 
22 | src/_unicode_cat_of.hpp: DerivedGeneralCategory.txt scripts/make_unicode_categories.py
23 | 	python scripts/make_unicode_categories.py $< $@
24 | 
25 | src/_decoder_recursive_select.py.hpp: scripts/make_decoder_recursive_select.py
26 | 	python $< $@
27 | 
28 | src/_escape_dct.hpp: scripts/make_escape_dct.py
29 | 	python $< $@
30 | 
31 | pyjson5.cpp: pyjson5.pyx $(wildcard src/*.pyx) $(wildcard src/*.hpp)
32 | 	python -m cython -f -o $@ $<
33 | 
34 | prepare: pyjson5.cpp ${FILES}
35 | 
36 | sdist: prepare
37 | 	-rm -- dist/pyjson5-*.tar.gz
38 | 	python -m build --sdist
39 | 
40 | wheel: prepare
41 | 	-rm -- dist/pyjson5-*.whl
42 | 	python -m build --wheel
43 | 
44 | install: wheel
45 | 	pip install --force dist/pyjson5-*.whl
46 | 
47 | docs: install $(wildcard docs/* docs/*/*)
48 | 	python -m sphinx -M html docs/ dist/
49 | 
50 | clean:
51 | 	[ ! -d build/ ] || rm -r -- build/
52 | 	[ ! -d dist/ ] || rm -r -- dist/
53 | 	[ ! -d pyjson5.egg-info/ ] || rm -r -- pyjson5.egg-info/
54 | 	-rm -- pyjson5.*.so python5.cpp
55 | 
56 | test: wheel
57 | 	pip install --force dist/pyjson5-*.whl
58 | 	python scripts/run-minefield-test.py
59 | 	python scripts/run-tests.py
60 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
 1 | PyJSON5
 2 | ==========
 3 | 
 4 | A JSON5 serializer and parser library for Python 3 written in
 5 | `Cython <http://cython.org/>`_.
 6 | 
 7 | 
 8 | Serializer
 9 | ----------
10 | 
11 | The serializer returns ASCII data that can safely be used in an HTML template.
12 | Apostrophes, ampersands, greater-than, and less-then signs are encoded as
13 | unicode escaped sequences. E.g. this snippet is safe for any and all input:
14 | 
15 | .. code:: html
16 | 
17 |     "<a onclick='alert(" + encode(data) + ")'>show message</a>"
18 | 
19 | Unless the input contains infinite or NaN values, the result will be valid
20 | `JSON <https://tools.ietf.org/html/rfc8259>`_ data.
21 | 
22 | 
23 | Parser
24 | ------
25 | 
26 | All valid `JSON5 1.0.0 <https://spec.json5.org/>`_ and
27 | `JSON <https://tools.ietf.org/html/rfc8259>`_ data can be read,
28 | unless the nesting level is absurdly high.
29 | 
30 | Functions
31 | ---------
32 | 
33 | You can find the full documentation online at https://pyjson5.readthedocs.io/en/latest/.
34 | Or simply call ``help(pyjson5)``. :-)
35 | 
36 | The library supplies load(s) and dump(s) functions, so you can use it as a
37 | drop-in replacement for Python's builtin ``json`` module, but you *should*
38 | use the functions ``encode_*()`` and ``decode_*()`` instead.
39 | 
40 | Compatibility
41 | -------------
42 | 
43 | At least CPython 3.7 or a recent Pypy3 version is needed.
44 | 


--------------------------------------------------------------------------------
/docs/changelog.md:
--------------------------------------------------------------------------------
1 | ../CHANGELOG.md


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | 
 4 | 
 5 | sys.path.insert(0, os.path.abspath('..'))
 6 | 
 7 | extensions = [
 8 |     'sphinx.ext.autodoc',
 9 |     'sphinx.ext.autosectionlabel',
10 |     'sphinx.ext.autosummary',
11 |     'sphinx.ext.graphviz',
12 |     'sphinx.ext.napoleon',
13 |     'sphinx.ext.intersphinx',
14 |     'sphinx.ext.inheritance_diagram',
15 |     'sphinx_autodoc_typehints',
16 |     'sphinx.ext.autosectionlabel',
17 |     'myst_parser',
18 | ]
19 | 
20 | templates_path = ['_templates']
21 | source_suffix = '.rst'
22 | master_doc = 'index'
23 | 
24 | project = 'PyJSON5'
25 | copyright = '2018-2025, René Kijewski'
26 | author = 'René Kijewski'
27 | 
28 | with open('../src/VERSION.inc', 'rt') as f:
29 |     version = eval(f.read().strip())
30 |     release = version
31 | 
32 | language = 'en'
33 | exclude_patterns = []
34 | pygments_style = 'sphinx'
35 | todo_include_todos = False
36 | 
37 | html_theme = 'furo'
38 | htmlhelp_basename = 'PyJSON5doc'
39 | 
40 | display_toc = True
41 | autodoc_default_flags = ['members']
42 | autosummary_generate = True
43 | 
44 | intersphinx_mapping = {
45 |     'python': ('https://docs.python.org/3.13', None),
46 | }
47 | 
48 | graphviz_output_format = 'svg'
49 | 
50 | inheritance_graph_attrs = {
51 |     'size': '"8.0, 10.0"',
52 |     'fontsize': 32,
53 |     'bgcolor': 'lightgrey',
54 | }
55 | inheritance_node_attrs = {
56 |     'color': 'black',
57 |     'fillcolor': 'white',
58 |     'style': '"filled,solid"',
59 | }
60 | inheritance_edge_attrs = {
61 |     'penwidth': 1.5,
62 |     'arrowsize': 1.2,
63 | }
64 | 


--------------------------------------------------------------------------------
/docs/decoder.rst:
--------------------------------------------------------------------------------
 1 | Parser / Decoder
 2 | ================
 3 | 
 4 | All valid `JSON5 1.0.0 <https://spec.json5.org/>`_ and
 5 | `JSON <https://tools.ietf.org/html/rfc8259>`_ data can be read,
 6 | unless the nesting level is absurdly high.
 7 | 
 8 | 
 9 | Quick Decoder Summary
10 | ---------------------
11 | 
12 | .. autosummary::
13 | 
14 |     ~pyjson5.decode
15 |     ~pyjson5.decode_latin1
16 |     ~pyjson5.decode_buffer
17 |     ~pyjson5.decode_callback
18 |     ~pyjson5.decode_io
19 |     ~pyjson5.load
20 |     ~pyjson5.loads
21 |     ~pyjson5.Json5DecoderException
22 |     ~pyjson5.Json5NestingTooDeep
23 |     ~pyjson5.Json5EOF
24 |     ~pyjson5.Json5IllegalCharacter
25 |     ~pyjson5.Json5ExtraData
26 |     ~pyjson5.Json5IllegalType
27 | 
28 | 
29 | Full Decoder Description
30 | ------------------------
31 | 
32 | .. autofunction:: pyjson5.decode
33 | 
34 | .. autofunction:: pyjson5.decode_latin1
35 | 
36 | .. autofunction:: pyjson5.decode_buffer
37 | 
38 | .. autofunction:: pyjson5.decode_callback
39 | 
40 | .. autofunction:: pyjson5.decode_io
41 | 
42 | 
43 | Decoder Compatibility Functions
44 | -------------------------------
45 | 
46 | .. autofunction:: pyjson5.load
47 | 
48 | .. autofunction:: pyjson5.loads
49 | 
50 | 
51 | Decoder Exceptions
52 | ------------------
53 | 
54 | .. inheritance-diagram::
55 |     pyjson5.Json5DecoderException
56 |     pyjson5.Json5NestingTooDeep
57 |     pyjson5.Json5EOF
58 |     pyjson5.Json5IllegalCharacter
59 |     pyjson5.Json5ExtraData
60 |     pyjson5.Json5IllegalType
61 | 
62 | .. autoexception:: pyjson5.Json5DecoderException
63 |     :members:
64 |     :inherited-members:
65 | 
66 | .. autoexception:: pyjson5.Json5NestingTooDeep
67 |     :members:
68 |     :inherited-members:
69 | 
70 | .. autoexception:: pyjson5.Json5EOF
71 |     :members:
72 |     :inherited-members:
73 | 
74 | .. autoexception:: pyjson5.Json5IllegalCharacter
75 |     :members:
76 |     :inherited-members:
77 | 
78 | .. autoexception:: pyjson5.Json5ExtraData
79 |     :members:
80 |     :inherited-members:
81 | 
82 | .. autoexception:: pyjson5.Json5IllegalType
83 |     :members:
84 |     :inherited-members:
85 | 


--------------------------------------------------------------------------------
/docs/encoder.rst:
--------------------------------------------------------------------------------
 1 | Serializer / Encoder
 2 | ====================
 3 | 
 4 | The serializer returns ASCII data that can safely be used in an HTML template.
 5 | Apostrophes, ampersands, greater-than, and less-then signs are encoded as
 6 | unicode escaped sequences. E.g. this snippet is safe for any and all input:
 7 | 
 8 | .. code:: html
 9 | 
10 |     "<a onclick='alert(" + encode(data) + ")'>show message</a>"
11 | 
12 | Unless the input contains infinite or NaN values, the result will be valid
13 | `JSON <https://tools.ietf.org/html/rfc8259>`_ data.
14 | 
15 | 
16 | Quick Encoder Summary
17 | ---------------------
18 | 
19 | .. autosummary::
20 | 
21 |     ~pyjson5.encode
22 |     ~pyjson5.encode_bytes
23 |     ~pyjson5.encode_callback
24 |     ~pyjson5.encode_io
25 |     ~pyjson5.encode_noop
26 |     ~pyjson5.dump
27 |     ~pyjson5.dumps
28 |     ~pyjson5.Options
29 |     ~pyjson5.Json5EncoderException
30 |     ~pyjson5.Json5UnstringifiableType
31 | 
32 | 
33 | Full Encoder Description
34 | ------------------------
35 | 
36 | .. autofunction:: pyjson5.encode
37 | 
38 | .. autofunction:: pyjson5.encode_bytes
39 | 
40 | .. autofunction:: pyjson5.encode_callback
41 | 
42 | .. autofunction:: pyjson5.encode_io
43 | 
44 | .. autofunction:: pyjson5.encode_noop
45 | 
46 | .. autoclass:: pyjson5.Options
47 |     :members:
48 |     :inherited-members:
49 | 
50 | 
51 | Encoder Compatibility Functions
52 | -------------------------------
53 | 
54 | .. autofunction:: pyjson5.dump
55 | 
56 | .. autofunction:: pyjson5.dumps
57 | 
58 | 
59 | Encoder Exceptions
60 | ------------------
61 | 
62 | .. inheritance-diagram::
63 |     pyjson5.Json5Exception
64 |     pyjson5.Json5EncoderException
65 |     pyjson5.Json5UnstringifiableType
66 | 
67 | .. autoexception:: pyjson5.Json5EncoderException
68 |     :members:
69 |     :inherited-members:
70 | 
71 | .. autoexception:: pyjson5.Json5UnstringifiableType
72 |     :members:
73 |     :inherited-members:
74 | 


--------------------------------------------------------------------------------
/docs/exceptions.rst:
--------------------------------------------------------------------------------
 1 | Exceptions
 2 | ==========
 3 | 
 4 | .. inheritance-diagram::
 5 |     pyjson5.Json5Exception
 6 |     pyjson5.Json5EncoderException
 7 |     pyjson5.Json5UnstringifiableType
 8 |     pyjson5.Json5DecoderException
 9 |     pyjson5.Json5NestingTooDeep
10 |     pyjson5.Json5EOF
11 |     pyjson5.Json5IllegalCharacter
12 |     pyjson5.Json5ExtraData
13 |     pyjson5.Json5IllegalType
14 | 
15 | .. autoexception:: pyjson5.Json5Exception
16 |     :members:
17 |     :inherited-members:
18 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | PyJSON5
 2 | =======
 3 | 
 4 | A JSON5 serializer and parser library for Python 3.7 and later.
 5 | 
 6 | 
 7 | The serializer returns ASCII data that can safely be used in an HTML template.
 8 | Apostrophes, ampersands, greater-than, and less-then signs are encoded as
 9 | unicode escaped sequences. E.g. this snippet is safe for any and all input:
10 | 
11 | .. code:: html
12 | 
13 |     "<a onclick='alert(" + encode(data) + ")'>show message</a>"
14 | 
15 | Unless the input contains infinite or NaN values, the result will be valid
16 | `JSON <https://tools.ietf.org/html/rfc8259>`_ data.
17 | 
18 | 
19 | All valid `JSON5 1.0.0 <https://spec.json5.org/>`_ and
20 | `JSON <https://tools.ietf.org/html/rfc8259>`_ data can be read,
21 | unless the nesting level is absurdly high.
22 | 
23 | 
24 | Installation
25 | ------------
26 | 
27 | .. code:: bash
28 | 
29 |     $ pip install pyjson5
30 | 
31 | 
32 | Table of Contents
33 | -----------------
34 | 
35 | .. toctree::
36 |     :maxdepth: 2
37 | 
38 |     encoder.rst
39 |     decoder.rst
40 |     exceptions.rst
41 |     performance.rst
42 |     changelog.md
43 | 
44 | 
45 | Quick Summary
46 | -------------
47 | 
48 | .. autosummary::
49 | 
50 |     ~pyjson5.decode
51 |     ~pyjson5.decode_buffer
52 |     ~pyjson5.decode_callback
53 |     ~pyjson5.decode_io
54 |     ~pyjson5.load
55 |     ~pyjson5.loads
56 |     ~pyjson5.encode
57 |     ~pyjson5.encode_bytes
58 |     ~pyjson5.encode_callback
59 |     ~pyjson5.encode_io
60 |     ~pyjson5.encode_noop
61 |     ~pyjson5.dump
62 |     ~pyjson5.dumps
63 |     ~pyjson5.Options
64 |     ~pyjson5.Json5EncoderException
65 |     ~pyjson5.Json5DecoderException
66 | 
67 | 
68 | Compatibility
69 | -------------
70 | 
71 | At least CPython / PyPy 3.7, and a C++11 compatible compiler (such as GCC 5.2+) is needed.
72 | 
73 | 
74 | -------------------------------------------------------------------------------
75 | 
76 | :ref:`Glossary / Index <genindex>`
77 | 


--------------------------------------------------------------------------------
/docs/performance.rst:
--------------------------------------------------------------------------------
 1 | Performance
 2 | ===========
 3 | 
 4 | This library is written in Cython for a better performance than a pure-Python implementation could give you.
 5 | 
 6 | 
 7 | Decoder Performance
 8 | -------------------
 9 | 
10 | The library has about the same speed as the shipped ``json`` module for *pure* JSON data.
11 | 
12 | * Dataset: https://github.com/zemirco/sf-city-lots-json
13 | * Version: Python 3.9.1+ (default, Feb  5 2021, 13:46:56)
14 | * CPU: AMD Ryzen 7 2700 @ 3.7GHz
15 | * :func:`pyjson5.decode`: **2.08 s** ± 7.49 ms per loop *(lower is better)*
16 | * :func:`json.loads`: **2.71 s** ± 12.1 ms per loop
17 | * The decoder works correcty: ``json.loads(content) == pyjson5.loads(content)``
18 | 
19 | 
20 | Encoder Performance
21 | -------------------
22 | 
23 | The encoder generates pure JSON data if there are no infinite or NaN values in the input, which are invalid in JSON.
24 | The serialized data is XML-safe, i.e. there are no cheverons ``<>``, ampersands ``&``, apostrophes ``'`` or control characters in the output.
25 | The output is always ASCII regardless if you call :func:`pyjson5.encode` or :func:`pyjson5.encode_bytes`.
26 | 
27 | * Dataset: https://github.com/zemirco/sf-city-lots-json
28 | * Python 3.9.1+ (default, Feb  5 2021, 13:46:56) 
29 | * CPU: AMD Ryzen 7 2700 @ 3.7GHz
30 | * :func:`pyjson5.encode`: **1.37** s ± 19.2 per loop *(lower is better)*
31 | * :func:`json.dumps`: **3.66** s ± 72.6 ms per loop
32 | * :func:`json.dumps` + :func:`xml.sax.saxutils.escape`: **4.01** s ± 21.3 ms per loop
33 | * The encoder works correcty: ``obj == json.loads(pyjson5.encode(obj))``
34 | 
35 | 
36 | Benchmark
37 | ---------
38 | 
39 | Using `Ultrajson's benchmark <https://github.com/ultrajson/ultrajson/blob/197a7fd4d8bbf0a8355852017c8b25aab26b6777/tests/benchmark.py>`_
40 | you can tell for which kind of data PyJSON5 is fast, and for which data it is slow in comparison *(higher is better)*:
41 | 
42 | +-----------------------------------------------------------+-------------+------------+------------+------------+
43 | |                                                           |  json       | pyjson5    | ujson      | orjson     |
44 | +===========================================================+=============+============+============+============+
45 | | **Array with 256 doubles**                                |             |            |            |            |
46 | +-----------------------------------------------------------+-------------+------------+------------+------------+
47 | | encode                                                    |       6,425 |     81,202 |     28,966 |     83,836 |
48 | +-----------------------------------------------------------+-------------+------------+------------+------------+
49 | | decode                                                    |      16,759 |     34,801 |     34,794 |     80,655 |
50 | +-----------------------------------------------------------+-------------+------------+------------+------------+
51 | | **Array with 256 strings**                                |             |            |            |            |
52 | +-----------------------------------------------------------+-------------+------------+------------+------------+
53 | | encode                                                    |      36,969 |     73,165 |     35,574 |    113,082 |
54 | +-----------------------------------------------------------+-------------+------------+------------+------------+
55 | | decode                                                    |      42,730 |     38,542 |     38,386 |     60,732 |
56 | +-----------------------------------------------------------+-------------+------------+------------+------------+
57 | | **Array with 256 UTF-8 strings**                          |             |            |            |            |
58 | +-----------------------------------------------------------+-------------+------------+------------+------------+
59 | | encode                                                    |       3,458 |      3,134 |      4,024 |     31,677 |
60 | +-----------------------------------------------------------+-------------+------------+------------+------------+
61 | | decode                                                    |       2,428 |      2,498 |      2,491 |      1,750 |
62 | +-----------------------------------------------------------+-------------+------------+------------+------------+
63 | | **Array with 256 True values**                            |             |            |            |            |
64 | +-----------------------------------------------------------+-------------+------------+------------+------------+
65 | | encode                                                    |     130,441 |    282,703 |    131,279 |    423,371 |
66 | +-----------------------------------------------------------+-------------+------------+------------+------------+
67 | | decode                                                    |     220,657 |    262,690 |    264,485 |    262,283 |
68 | +-----------------------------------------------------------+-------------+------------+------------+------------+
69 | | **Array with 256 dict{string, int} pairs**                |             |            |            |            |
70 | +-----------------------------------------------------------+-------------+------------+------------+------------+
71 | | encode                                                    |      11,621 |     10,014 |     18,148 |     73,905 |
72 | +-----------------------------------------------------------+-------------+------------+------------+------------+
73 | | decode                                                    |      17,802 |     19,406 |     19,391 |     23,478 |
74 | +-----------------------------------------------------------+-------------+------------+------------+------------+
75 | | **Dict with 256 arrays with 256 dict{string, int} pairs** |             |            |            |            |
76 | +-----------------------------------------------------------+-------------+------------+------------+------------+
77 | | encode                                                    |          40 |         38 |         68 |        213 |
78 | +-----------------------------------------------------------+-------------+------------+------------+------------+
79 | | decode                                                    |          43 |         49 |         48 |         51 |
80 | +-----------------------------------------------------------+-------------+------------+------------+------------+
81 | | **Medium complex object**                                 |             |            |            |            |
82 | +-----------------------------------------------------------+-------------+------------+------------+------------+
83 | | encode                                                    |       8,704 |     11,922 |     15,319 |     49,677 |
84 | +-----------------------------------------------------------+-------------+------------+------------+------------+
85 | | decode                                                    |      12,567 |     14,042 |     13,985 |     19,481 |
86 | +-----------------------------------------------------------+-------------+------------+------------+------------+
87 | | **Complex object**                                        |             |            |            |            |
88 | +-----------------------------------------------------------+-------------+------------+------------+------------+
89 | | encode                                                    |         672 |        909 |        731 |            |
90 | +-----------------------------------------------------------+-------------+------------+------------+------------+
91 | | decode                                                    |         462 |        700 |        700 |            |
92 | +-----------------------------------------------------------+-------------+------------+------------+------------+
93 | 
94 | * `ujson <https://github.com/ultrajson/ultrajson>`_ == 4.0.3.dev9
95 | * `orjson <https://github.com/ijl/orjson>`_ == 3.5.1
96 | 


--------------------------------------------------------------------------------
/pyjson5.pyx:
--------------------------------------------------------------------------------
 1 | # distutils: language = c++
 2 | # cython: embedsignature = True, language_level = 3, warn.undeclared = True, warn.unreachable = True, warn.maybe_uninitialized = True
 3 | 
 4 | # SPDX-License-Identifier: MIT OR Apache-2.0
 5 | # SPDX-FileCopyrightText: 2018-2025 René Kijewski <pypi.org@k6i.de>
 6 | 
 7 | include 'src/_imports.pyx'
 8 | include 'src/_constants.pyx'
 9 | 
10 | include 'src/_exceptions.pyx'
11 | include 'src/_exceptions_decoder.pyx'
12 | include 'src/_exceptions_encoder.pyx'
13 | include 'src/_raise_decoder.pyx'
14 | include 'src/_raise_encoder.pyx'
15 | 
16 | include 'src/_unicode.pyx'
17 | 
18 | include 'src/_reader_ucs.pyx'
19 | include 'src/_reader_callback.pyx'
20 | include 'src/_readers.pyx'
21 | include 'src/_decoder.pyx'
22 | 
23 | include 'src/_writers.pyx'
24 | include 'src/_writer_reallocatable.pyx'
25 | include 'src/_writer_callback.pyx'
26 | include 'src/_writer_noop.pyx'
27 | include 'src/_encoder_options.pyx'
28 | include 'src/_encoder.pyx'
29 | 
30 | include 'src/_exports.pyx'
31 | include 'src/_legacy.pyx'
32 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: MIT OR Apache-2.0
 2 | # SPDX-FileCopyrightText: 2018-2025 René Kijewski <pypi.org@k6i.de>
 3 | 
 4 | [build-system]
 5 | requires = [
 6 |     "Cython < 4, >= 0.29",
 7 |     "setuptools >= 61",
 8 | ]
 9 | build-backend = "setuptools.build_meta"
10 | 


--------------------------------------------------------------------------------
/requirements-dev.txt:
--------------------------------------------------------------------------------
 1 | build
 2 | colorama == 0.4.*
 3 | cython < 4, >= 0.29
 4 | more_itertools == 10.*
 5 | mypy
 6 | setuptools >= 61
 7 | wheel
 8 | 
 9 | # keep synchronous to requirements-readthedocs.txt
10 | docutils == 0.21.*
11 | furo
12 | myst-parser == 4.*
13 | sphinx == 8.*
14 | sphinx-autodoc-typehints == 3.*
15 | 


--------------------------------------------------------------------------------
/requirements-readthedocs.txt:
--------------------------------------------------------------------------------
 1 | # keep synchronous to pyproject.toml
 2 | # keep synchronous to src/VERSION.inc
 3 | pyjson5 == 1.6.9
 4 | 
 5 | # keep synchronous to requirements-dev.txt
 6 | docutils == 0.21.*
 7 | furo
 8 | myst-parser == 4.*
 9 | sphinx == 8.*
10 | sphinx-autodoc-typehints == 3.*
11 | 


--------------------------------------------------------------------------------
/scripts/make_decoder_recursive_select.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | from argparse import ArgumentParser
 4 | from logging import basicConfig, DEBUG
 5 | from pathlib import Path
 6 | 
 7 | from more_itertools import chunked
 8 | 
 9 | 
10 | def generate(out):
11 |     lst = ["DRS_fail"] * 128
12 |     lst[ord("n")] = "DRS_null"
13 |     lst[ord("t")] = "DRS_true"
14 |     lst[ord("f")] = "DRS_false"
15 |     lst[ord("I")] = "DRS_inf"
16 |     lst[ord("N")] = "DRS_nan"
17 |     lst[ord('"')] = "DRS_string"
18 |     lst[ord("'")] = "DRS_string"
19 |     lst[ord("{")] = "DRS_recursive"
20 |     lst[ord("[")] = "DRS_recursive"
21 |     for c in "+-.0123456789":
22 |         lst[ord(c)] = "DRS_number"
23 | 
24 |     print("#ifndef JSON5EncoderCpp_decoder_recursive_select", file=out)
25 |     print("#define JSON5EncoderCpp_decoder_recursive_select", file=out)
26 |     print(file=out)
27 |     print("// GENERATED FILE", file=out)
28 |     print("// All changes will be lost.", file=out)
29 |     print(file=out)
30 |     print("#include <cstdint>", file=out)
31 |     print(file=out)
32 |     print("namespace JSON5EncoderCpp {", file=out)
33 |     print("inline namespace {", file=out)
34 |     print(file=out)
35 |     print("enum DrsKind : std::uint8_t {", file=out)
36 |     print(
37 |         "    DRS_fail, DRS_null, DRS_true, DRS_false, DRS_inf, DRS_nan, DRS_string, DRS_number, DRS_recursive",
38 |         file=out,
39 |     )
40 |     print("};", file=out)
41 |     print(file=out)
42 |     print("static const DrsKind drs_lookup[128] = {", file=out)
43 |     for chunk in chunked(lst, 8):
44 |         print("   ", end="", file=out)
45 |         for t in chunk:
46 |             print(" ", t, ",", sep="", end="", file=out)
47 |         print(file=out)
48 |     print("};", file=out)
49 |     print(file=out)
50 |     print("}  // anonymous inline namespace", sep="", file=out)
51 |     print("}  // namespace JSON5EncoderCpp", sep="", file=out)
52 |     print(file=out)
53 |     print("#endif", sep="", file=out)
54 | 
55 | 
56 | argparser = ArgumentParser(description="Generate src/_decoder_recursive_select.hpp")
57 | argparser.add_argument(
58 |     "input", nargs="?", type=Path, default=Path("src/_decoder_recursive_select.hpp")
59 | )
60 | 
61 | if __name__ == "__main__":
62 |     basicConfig(level=DEBUG)
63 |     args = argparser.parse_args()
64 |     with open(str(args.input.resolve()), "wt") as out:
65 |         generate(out)
66 | 


--------------------------------------------------------------------------------
/scripts/make_escape_dct.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | from argparse import ArgumentParser
 4 | from logging import basicConfig, DEBUG
 5 | from pathlib import Path
 6 | 
 7 | 
 8 | def generate(f):
 9 |     unescaped = 0
10 |     print("const EscapeDct::Items EscapeDct::items = {", file=f)
11 |     for c in range(0x100):
12 |         if c == ord("\\"):
13 |             s = "\\\\"
14 |         elif c == ord("\b"):
15 |             s = "\\b"
16 |         elif c == ord("\f"):
17 |             s = "\\f"
18 |         elif c == ord("\n"):
19 |             s = "\\n"
20 |         elif c == ord("\r"):
21 |             s = "\\r"
22 |         elif c == ord("\t"):
23 |             s = "\\t"
24 |         elif c == ord('"'):
25 |             s = '\\"'
26 |         elif (c < 0x20) or (c >= 0x7F) or (chr(c) in "'&<>\\"):
27 |             s = f"\\u{c:04x}"
28 |         else:
29 |             s = f"{c:c}"
30 |             if c < 128:
31 |                 unescaped |= 1 << c
32 | 
33 |         t = (
34 |             [str(len(s))]
35 |             + [f"'{c}'" if c != "\\" else f"'\\\\'" for c in s]
36 |             + ["0"] * 6
37 |         )
38 |         l = ", ".join(t[:8])
39 |         print(f"   {{ {l:35s} }},  /* 0x{c:02x} {chr(c)!r} */", file=f)
40 |     print("};", file=f)
41 | 
42 |     escaped = unescaped ^ ((1 << 128) - 1)
43 |     print(
44 |         f"const std::uint64_t EscapeDct::is_escaped_lo = UINT64_C(0x{(escaped & ((1 << 64) - 1)):016x});",
45 |         file=f,
46 |     )
47 |     print(
48 |         f"const std::uint64_t EscapeDct::is_escaped_hi = UINT64_C(0x{(escaped >> 64):016x});",
49 |         file=f,
50 |     )
51 | 
52 | 
53 | argparser = ArgumentParser(description="Generate src/_escape_dct.hpp")
54 | argparser.add_argument(
55 |     "input", nargs="?", type=Path, default=Path("src/_escape_dct.hpp")
56 | )
57 | 
58 | if __name__ == "__main__":
59 |     basicConfig(level=DEBUG)
60 |     args = argparser.parse_args()
61 |     with open(str(args.input.resolve()), "wt") as out:
62 |         generate(out)
63 | 


--------------------------------------------------------------------------------
/scripts/make_unicode_categories.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | from argparse import ArgumentParser
  4 | from collections import defaultdict, OrderedDict
  5 | from functools import reduce
  6 | from pathlib import Path
  7 | from re import match
  8 | 
  9 | from more_itertools import chunked
 10 | 
 11 | 
 12 | def main(input_file, output_file):
 13 |     Nothing = 0
 14 |     WhiteSpace = 1
 15 |     IdentifierStart = 2
 16 |     IdentifierPart = 3
 17 | 
 18 |     cat_indices = {
 19 |         "zs": WhiteSpace,
 20 |         "lc": IdentifierStart,
 21 |         "ll": IdentifierStart,
 22 |         "lm": IdentifierStart,
 23 |         "lo": IdentifierStart,
 24 |         "lt": IdentifierStart,
 25 |         "lu": IdentifierStart,
 26 |         "nl": IdentifierStart,
 27 |         "mc": IdentifierPart,
 28 |         "mn": IdentifierPart,
 29 |         "pc": IdentifierPart,
 30 |         "nd": IdentifierPart,
 31 |     }
 32 | 
 33 |     planes = defaultdict(lambda: [0] * 0x100)
 34 | 
 35 |     for input_line in input_file:
 36 |         m = match(r"^([0-9A-F]+)(?:\.\.([0-9A-F]+))?\s+;\s+([A-Z][a-z])", input_line)
 37 |         if not m:
 38 |             continue
 39 |         start, end, cat = m.groups()
 40 | 
 41 |         idx = cat_indices.get(cat.lower())
 42 |         if idx:
 43 |             end = int(end or start, 16)
 44 |             start = int(start, 16)
 45 |             for i in range(start, end + 1):
 46 |                 planes[i // 0x100][i % 0x100] = idx
 47 | 
 48 |     # per: https://spec.json5.org/#white-space
 49 |     for i in (0x9, 0xA, 0xB, 0xC, 0xD, 0x20, 0xA0, 0x2028, 0x2028, 0x2029, 0xFEFF):
 50 |         planes[i // 0x100][i % 0x100] = WhiteSpace
 51 | 
 52 |     # per: https://www.ecma-international.org/ecma-262/5.1/#sec-7.6
 53 |     for i in (ord("$"), ord("_"), ord("\\")):
 54 |         planes[i // 0x100][i % 0x100] = IdentifierStart
 55 | 
 56 |     # per: https://www.ecma-international.org/ecma-262/5.1/#sec-7.6
 57 |     for i in (0x200C, 0x200D):
 58 |         planes[i // 0x100][i % 0x100] = IdentifierPart
 59 | 
 60 |     # 0x110000 == NO_EXTRA_DATA is spuriously used as input at the end of an item.
 61 |     # FIXME: this should not be needed. %s/18/17/g once the problem it fixed in the decoder.
 62 |     planes[0x0011_0000 // 0x100][0x0011_0000 % 0x100] = WhiteSpace
 63 | 
 64 |     print("#ifndef JSON5EncoderCpp_unicode_cat_of", file=output_file)
 65 |     print("#define JSON5EncoderCpp_unicode_cat_of", file=output_file)
 66 |     print(file=output_file)
 67 |     print("// GENERATED FILE", file=output_file)
 68 |     print("// All changes will be lost.", file=output_file)
 69 |     print(file=output_file)
 70 |     print("#include <cstdint>", file=output_file)
 71 |     print(file=output_file)
 72 |     print("namespace JSON5EncoderCpp {", file=output_file)
 73 |     print("inline namespace {", file=output_file)
 74 |     print(file=output_file)
 75 |     print("static unsigned unicode_cat_of(std::uint32_t codepoint) {", file=output_file)
 76 | 
 77 |     demiplane_to_idx = OrderedDict()  # demiplane_idx → data_idx
 78 |     data_to_idx = [None] * 18 * 0x100  # demiplane data → data_idx
 79 |     print("    // A 'demiplane' is a 1/256th of a Unicode plane.", file=output_file)
 80 |     print(
 81 |         "    // This way a 'demiplane' fits nicely into a cache line.", file=output_file
 82 |     )
 83 |     print(
 84 |         "    alignas(64) static const std::uint8_t demiplane_data[][0x100 / 4] = {",
 85 |         file=output_file,
 86 |     )
 87 |     for i in range(18 * 0x100):
 88 |         plane_data = ""
 89 |         plane = planes[i]
 90 |         while plane and plane[-1] == 0:
 91 |             plane.pop()
 92 | 
 93 |         for chunk in chunked(plane, 4 * 16):
 94 |             plane_data += "            "
 95 |             for value in chunked(chunk, 4):
 96 |                 value = reduce(lambda a, i: ((a << 2) | i), reversed(value), 0)
 97 |                 plane_data += "0x{:02x}u, ".format(value)
 98 |             plane_data = plane_data[:-1] + "\n"
 99 | 
100 |         produced_idx = demiplane_to_idx.get(plane_data)
101 |         if produced_idx is None:
102 |             produced_idx = len(demiplane_to_idx)
103 |             print(
104 |                 "        {{   // {} -> 0x{:02x}u".format(i, produced_idx),
105 |                 file=output_file,
106 |             )
107 |             print(plane_data, file=output_file, end="")
108 |             print("        },", file=output_file)
109 |             demiplane_to_idx[plane_data] = produced_idx
110 | 
111 |         data_to_idx[i] = produced_idx
112 |     print("    };", file=output_file)
113 |     print(file=output_file)
114 | 
115 |     snd_lookup_lines = OrderedDict()
116 |     snd_lookup_indices = OrderedDict()
117 |     print(
118 |         "    alignas(64) static const std::uint8_t demiplane_snd_data[][64] = {",
119 |         file=output_file,
120 |     )
121 |     for start in range(0, 18 * 0x100, 64):
122 |         snd_lookup_line: str
123 |         for i in range(start, min(start + 64, 18 * 0x100)):
124 |             if i % 16 == 0:
125 |                 if i % 64 == 0:
126 |                     snd_lookup_line = "           "
127 |                 else:
128 |                     snd_lookup_line += "\n           "
129 |             snd_lookup_line += " 0x{:02x}u,".format(data_to_idx[i])
130 | 
131 |         snd_lookup_idx = snd_lookup_lines.get(snd_lookup_line, None)
132 |         if snd_lookup_idx is None:
133 |             snd_lookup_idx = len(snd_lookup_lines)
134 |             print(
135 |                 "        {{   // {} -> 0x{:02x}u".format(start // 64, snd_lookup_idx),
136 |                 file=output_file,
137 |             )
138 |             print(snd_lookup_line, file=output_file)
139 |             print("        },", file=output_file)
140 |             snd_lookup_lines[snd_lookup_line] = snd_lookup_idx
141 |         snd_lookup_indices[start // 64] = snd_lookup_idx
142 |     print("    };", file=output_file)
143 |     print(file=output_file)
144 | 
145 |     print(
146 |         "    alignas(64) static const std::uint8_t demiplane_snd[18 * 0x100 / 64] = {{".format(
147 |             68
148 |         ),
149 |         end="",
150 |         file=output_file,
151 |     )
152 |     for i in range(18 * 0x100 // 64):
153 |         if i % 16 == 0:
154 |             print("\n       ", end="", file=output_file)
155 |         print(" 0x{:02x}u,".format(snd_lookup_indices[i]), end="", file=output_file)
156 |     print(file=output_file)
157 |     print("    };", file=output_file)
158 |     print(file=output_file)
159 | 
160 |     print("    if (JSON5EncoderCpp_expect(codepoint < 256, true)) {", file=output_file)
161 |     print(
162 |         "        return (demiplane_data[0][codepoint / 4] >> (2 * (codepoint % 4))) % 4;",
163 |         file=output_file,
164 |     )
165 |     print("    }", file=output_file)
166 |     print(file=output_file)
167 |     print("    if (codepoint > 0x110000) codepoint = 0x110000;", file=output_file)
168 |     print("    std::uint32_t fst_row = codepoint / 0x100;", file=output_file)
169 |     print("    std::uint32_t fst_col = codepoint % 0x100;", file=output_file)
170 |     print("    std::uint32_t snd_row = fst_row / 64;", file=output_file)
171 |     print("    std::uint32_t snd_col = fst_row % 64;", file=output_file)
172 |     print(file=output_file)
173 |     print(
174 |         "    const std::uint8_t *cell = demiplane_data[demiplane_snd_data[demiplane_snd[snd_row]][snd_col]];",
175 |         file=output_file,
176 |     )
177 |     print(
178 |         "    return (cell[fst_col / 4] >> (2 * (fst_col % 4))) % 4;", file=output_file
179 |     )
180 |     print("}", file=output_file)
181 |     print(file=output_file)
182 |     print("}", file=output_file)
183 |     print("}", file=output_file)
184 |     print(file=output_file)
185 |     print("#endif", file=output_file)
186 | 
187 | 
188 | argparser = ArgumentParser(description="Generate Unicode Category Matcher(s)")
189 | argparser.add_argument("input", nargs="?", type=Path, default=Path("/dev/stdin"))
190 | argparser.add_argument("output", nargs="?", type=Path, default=Path("/dev/stdout"))
191 | 
192 | if __name__ == "__main__":
193 |     args = argparser.parse_args()
194 |     with open(str(args.input.resolve()), "rt") as input_file, open(
195 |         str(args.output.resolve()), "wt"
196 |     ) as output_file:
197 |         raise SystemExit(main(input_file, output_file))
198 | 


--------------------------------------------------------------------------------
/scripts/run-minefield-test.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | from argparse import ArgumentParser
  4 | from logging import basicConfig, INFO, getLogger
  5 | from os import chdir, name
  6 | from pathlib import Path
  7 | from subprocess import Popen
  8 | from sys import executable
  9 | 
 10 | 
 11 | argparser = ArgumentParser(description="Run JSON5 parser tests")
 12 | argparser.add_argument(
 13 |     "tests",
 14 |     nargs="?",
 15 |     type=Path,
 16 |     default=Path("third-party/JSONTestSuite/test_parsing"),
 17 | )
 18 | 
 19 | suffix_implies_success = {
 20 |     "json": True,
 21 |     "json5": True,
 22 |     "txt": False,
 23 | }
 24 | 
 25 | if __name__ == "__main__":
 26 |     basicConfig(level=INFO)
 27 |     logger = getLogger(__name__)
 28 |     chdir(Path(__file__).absolute().parent.parent)
 29 | 
 30 |     good = bad = errors = severe = 0
 31 | 
 32 |     try:
 33 |         from colorama import init, Fore
 34 | 
 35 |         init()
 36 |     except Exception:
 37 |         code_severe = "SEVERE"
 38 |         code_good = "GOOD"
 39 |         code_bad = "BAD"
 40 |         code_ignored = "IGNORED"
 41 |         reset = ""
 42 |     else:
 43 |         if name != "nt":
 44 |             code_severe = Fore.RED + "😱"
 45 |             code_good = Fore.CYAN + "😄"
 46 |             code_bad = Fore.YELLOW + "😠"
 47 |             code_ignored = Fore.BLUE + "🙅"
 48 |         else:
 49 |             code_severe = Fore.RED + "SEVERE"
 50 |             code_good = Fore.CYAN + "GOOD"
 51 |             code_bad = Fore.YELLOW + "BAD"
 52 |             code_ignored = Fore.BLUE + "IGNORED"
 53 |         reset = Fore.RESET
 54 | 
 55 |     script = str(Path(__file__).absolute().parent / "transcode-to-json.py")
 56 | 
 57 |     args = argparser.parse_args()
 58 |     index = 0
 59 |     for path in sorted(args.tests.glob("?_?*.json")):
 60 |         category, name = path.stem.split("_", 1)
 61 |         if category not in "yni":
 62 |             continue
 63 | 
 64 |         if category in "ni":
 65 |             # ignore anything but tests that are expected to pass for now
 66 |             continue
 67 | 
 68 |         try:
 69 |             # ignore any UTF-8 errors
 70 |             with open(str(path.resolve()), "rt") as f:
 71 |                 f.read()
 72 |         except Exception:
 73 |             continue
 74 | 
 75 |         index += 1
 76 |         try:
 77 |             p = Popen((executable, script, str(path)))
 78 |             outcome = p.wait(5)
 79 |         except Exception:
 80 |             logger.error("Error while testing: %s", path, exc_info=True)
 81 |             errors += 1
 82 |             continue
 83 | 
 84 |         if outcome not in (0, 1):
 85 |             code = code_severe
 86 |             severe += 1
 87 |         elif category == "y":
 88 |             if outcome == 0:
 89 |                 code = code_good
 90 |                 good += 1
 91 |             else:
 92 |                 code = code_bad
 93 |                 bad += 1
 94 |         else:
 95 |             code = code_ignored
 96 | 
 97 |         print(
 98 |             "#",
 99 |             index,
100 |             " ",
101 |             code,
102 |             " | " "Category <",
103 |             category,
104 |             "> | " "Test <",
105 |             name,
106 |             "> | " "Actual <",
107 |             "pass" if outcome == 0 else "FAIL",
108 |             ">",
109 |             reset,
110 |             sep="",
111 |         )
112 | 
113 |     is_severe = severe > 0
114 |     is_good = bad == 0
115 |     code = code_severe if is_severe else code_good if is_good else code_bad
116 |     print()
117 |     print(
118 |         code,
119 |         " | ",
120 |         good,
121 |         " correct outcomes | ",
122 |         bad,
123 |         " wrong outcomes | ",
124 |         severe,
125 |         " severe errors",
126 |         reset,
127 |         sep="",
128 |     )
129 |     raise SystemExit(2 if is_severe else 0 if is_good else 1)
130 | 


--------------------------------------------------------------------------------
/scripts/run-tests.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | from argparse import ArgumentParser
  4 | from logging import basicConfig, INFO, getLogger
  5 | from os import chdir, name
  6 | from pathlib import Path
  7 | from subprocess import Popen
  8 | from sys import executable
  9 | 
 10 | 
 11 | argparser = ArgumentParser(description="Run JSON5 parser tests")
 12 | argparser.add_argument(
 13 |     "tests", nargs="?", type=Path, default=Path("third-party/json5-tests")
 14 | )
 15 | 
 16 | suffix_implies_success = {
 17 |     ".json": True,
 18 |     ".json5": True,
 19 |     ".txt": False,
 20 | }
 21 | 
 22 | if __name__ == "__main__":
 23 |     basicConfig(level=INFO)
 24 |     logger = getLogger(__name__)
 25 |     chdir(Path(__file__).absolute().parent.parent)
 26 | 
 27 |     try:
 28 |         from colorama import init, Fore
 29 | 
 30 |         init()
 31 |     except Exception:
 32 |         code_severe = "SEVERE"
 33 |         code_good = "GOOD"
 34 |         code_bad = "BAD"
 35 |         reset = ""
 36 |     else:
 37 |         if name != "nt":
 38 |             code_severe = Fore.RED + "😱"
 39 |             code_good = Fore.CYAN + "😄"
 40 |             code_bad = Fore.YELLOW + "😠"
 41 |         else:
 42 |             code_severe = Fore.RED + "SEVERE"
 43 |             code_good = Fore.CYAN + "GOOD"
 44 |             code_bad = Fore.YELLOW + "BAD"
 45 |         reset = Fore.RESET
 46 | 
 47 |     good = 0
 48 |     bad = 0
 49 |     severe = 0
 50 | 
 51 |     script = str(Path(__file__).absolute().parent / "transcode-to-json.py")
 52 | 
 53 |     args = argparser.parse_args()
 54 |     index = 0
 55 |     for path in sorted(args.tests.glob("*/*.*")):
 56 |         kind = path.suffix.split(".")[-1]
 57 |         expect_success = suffix_implies_success.get(path.suffix)
 58 |         if expect_success is None:
 59 |             continue
 60 | 
 61 |         index += 1
 62 |         category = path.parent.name
 63 |         name = path.stem
 64 |         try:
 65 |             p = Popen((executable, script, str(path)))
 66 |             outcome = p.wait(5)
 67 |         except Exception:
 68 |             logger.error("Error while testing: %s", path, exc_info=True)
 69 |             severe += 1
 70 |             continue
 71 | 
 72 |         is_success = outcome == 0
 73 |         is_failure = outcome == 1
 74 |         is_severe = outcome not in (0, 1)
 75 |         is_good = is_success if expect_success else is_failure
 76 |         code = code_severe if is_severe else code_good if is_good else code_bad
 77 |         print(
 78 |             "#",
 79 |             index,
 80 |             " ",
 81 |             code,
 82 |             " " "Category <",
 83 |             category,
 84 |             "> | " "Test <",
 85 |             name,
 86 |             "> | " "Data <",
 87 |             kind,
 88 |             "> | " "Expected <",
 89 |             "pass" if expect_success else "FAIL",
 90 |             "> | " "Actual <",
 91 |             "pass" if is_success else "FAIL",
 92 |             ">",
 93 |             reset,
 94 |             sep="",
 95 |         )
 96 |         if is_severe:
 97 |             severe += 1
 98 |         elif is_good:
 99 |             good += 1
100 |         else:
101 |             bad += 1
102 | 
103 |     is_severe = severe > 0
104 |     is_good = bad == 0
105 |     code = code_severe if is_severe else code_good if is_good else code_bad
106 |     print()
107 |     print(
108 |         code,
109 |         " ",
110 |         good,
111 |         " × correct outcome | ",
112 |         bad,
113 |         " × wrong outcome | ",
114 |         severe,
115 |         " × severe errors",
116 |         reset,
117 |         sep="",
118 |     )
119 |     raise SystemExit(2 if is_severe else 0 if is_good else 1)
120 | 


--------------------------------------------------------------------------------
/scripts/sha512sum.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | from argparse import ArgumentParser
 4 | from hashlib import sha512
 5 | from logging import basicConfig, DEBUG
 6 | from pathlib import Path
 7 | from sys import argv, exit
 8 | 
 9 | 
10 | argparser = ArgumentParser(
11 |     description="sha512sum replacement if coreutils isn't installed"
12 | )
13 | argparser.add_argument("-c", "--check", type=Path, required=True)
14 | 
15 | if __name__ == "__main__":
16 |     basicConfig(level=DEBUG)
17 |     args = argparser.parse_args()
18 |     errors = 0
19 |     with open(str(args.check.resolve()), "rt") as f:
20 |         for line in f:
21 |             expected_hash, filename = line.rstrip("\r\n").split("  ", 1)
22 |             with open(str(Path(filename).resolve()), "rb") as f:
23 |                 actual_hash = sha512(f.read()).hexdigest()
24 | 
25 |             if expected_hash == actual_hash:
26 |                 print(filename + ": OK")
27 |             else:
28 |                 errors += 1
29 |                 print(filename + ": FAILED")
30 | 
31 |     if errors:
32 |         print("%s: WARNING: %s computed checksum did NOT match" % (argv[0], errors))
33 |         exit(1)
34 |     else:
35 |         exit(0)
36 | 


--------------------------------------------------------------------------------
/scripts/transcode-to-json.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | from argparse import ArgumentParser
  4 | from collections.abc import Mapping, Sequence
  5 | from codecs import open as codecs_open
  6 | from itertools import zip_longest
  7 | from json import loads
  8 | from logging import basicConfig, DEBUG, getLogger
  9 | from math import isnan
 10 | from pathlib import Path
 11 | 
 12 | from pyjson5 import decode, encode
 13 | 
 14 | 
 15 | def eq_with_nans(left, right):
 16 |     if left == right:
 17 |         return True
 18 |     elif isnan(left):
 19 |         return isnan(right)
 20 |     elif isnan(right):
 21 |         return False
 22 | 
 23 |     if not isinstance(left, Sequence) or not isinstance(right, Sequence):
 24 |         return False
 25 |     elif len(left) != len(right):
 26 |         return False
 27 | 
 28 |     left_mapping = isinstance(left, Mapping)
 29 |     right_mapping = isinstance(right, Mapping)
 30 |     if left_mapping != right_mapping:
 31 |         return False
 32 | 
 33 |     sentinel = object()
 34 |     if left_mapping:
 35 |         for k, left_value in left.items():
 36 |             right_value = right.pop(k, sentinel)
 37 |             if not eq_with_nans(left_value, right_value):
 38 |                 return False
 39 |         if right:
 40 |             # extraneous keys
 41 |             return False
 42 |     else:
 43 |         for l, r in zip_longest(left, right, fillvalue=sentinel):
 44 |             if not eq_with_nans(l, r):
 45 |                 return False
 46 | 
 47 |     return True
 48 | 
 49 | 
 50 | argparser = ArgumentParser(description="Run JSON5 parser tests")
 51 | argparser.add_argument("input", type=Path)
 52 | argparser.add_argument("output", nargs="?", type=Path)
 53 | 
 54 | if __name__ == "__main__":
 55 |     basicConfig(level=DEBUG)
 56 |     logger = getLogger(__name__)
 57 | 
 58 |     args = argparser.parse_args()
 59 |     try:
 60 |         with codecs_open(args.input.resolve(), "r", "UTF-8") as f:
 61 |             data = f.read()
 62 |     except Exception:
 63 |         logger.error("Could not even read file: %s", args.input, exc_info=True)
 64 |         raise SystemExit(-1)
 65 | 
 66 |     try:
 67 |         obj = decode(data)
 68 |     except Exception:
 69 |         logger.error("Could not parse content: %s", args.input)
 70 |         raise SystemExit(1)
 71 | 
 72 |     try:
 73 |         json_obj = loads(data)
 74 |     except Exception:
 75 |         pass
 76 |     else:
 77 |         if not eq_with_nans(obj, json_obj):
 78 |             logger.error(
 79 |                 "JSON and PyJSON5 did not read the same data: %s, %r != %r",
 80 |                 args.input,
 81 |                 obj,
 82 |                 json_obj,
 83 |             )
 84 |             raise SystemExit(2)
 85 | 
 86 |     try:
 87 |         data = encode(obj)
 88 |     except Exception:
 89 |         logger.error("Could open stringify content: %s", args.input, exc_info=True)
 90 |         raise SystemExit(2)
 91 | 
 92 |     if args.output is not None:
 93 |         try:
 94 |             with codecs_open(args.output.resolve(), "w", "UTF-8") as f:
 95 |                 f.write(data)
 96 |         except Exception:
 97 |             logger.error("Could open output file: %s", args.output, exc_info=True)
 98 |             raise SystemExit(-1)
 99 | 
100 |     raise SystemExit(0)
101 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [metadata]
 2 | # keep synchronous to requirements-readthedocs.txt
 3 | # keep synchronous to src/VERSION.inc
 4 | version = 1.6.9
 5 | 
 6 | name = pyjson5
 7 | description = JSON5 serializer and parser for Python 3 written in Cython.
 8 | url = https://github.com/Kijewski/pyjson5
 9 | project_urls =
10 |     Changelog = https://github.com/Kijewski/pyjson5/blob/main/CHANGELOG.md
11 |     Code = https://github.com/Kijewski/pyjson5
12 |     Documentation = https://pyjson5.readthedocs.io/
13 |     Download = https://pypi.org/project/pyjson5/
14 |     Homepage = https://github.com/Kijewski/pyjson5
15 |     Tracker = https://github.com/Kijewski/pyjson5/issues
16 | 
17 | author = René Kijewski
18 | maintainer = René Kijewski
19 | author_email = pypi.org@k6i.de
20 | maintainer_email = pypi.org@k6i.de
21 | 
22 | long_description = file: README.rst
23 | long_description_content_type = text/x-rst
24 | 
25 | license = MIT OR Apache-2.0
26 | license_files = LICENSE.MIT, LICENSE.Apache
27 | 
28 | classifiers =
29 |     Development Status :: 5 - Production/Stable
30 |     Intended Audience :: Developers
31 |     Intended Audience :: System Administrators
32 |     License :: OSI Approved :: Apache Software License
33 |     License :: OSI Approved :: MIT License
34 |     Operating System :: OS Independent
35 |     Programming Language :: Cython
36 |     Programming Language :: JavaScript
37 |     Programming Language :: Python :: 3
38 |     Programming Language :: Python :: 3.7
39 |     Programming Language :: Python :: 3.8
40 |     Programming Language :: Python :: 3.9
41 |     Programming Language :: Python :: 3.10
42 |     Programming Language :: Python :: 3.11
43 |     Programming Language :: Python :: 3.12
44 |     Programming Language :: Python :: 3.13
45 |     Programming Language :: Python :: 3 :: Only
46 |     Programming Language :: Python :: Implementation :: CPython
47 |     Topic :: Text Processing :: General
48 | 
49 | [options]
50 | zip_safe = False
51 | 
52 | python_requires = ~= 3.7
53 | setup_requires =
54 |     Cython
55 |     setuptools
56 | 
57 | include_package_data = True
58 | packages = pyjson5
59 | package_dir =
60 |     = src
61 | 
62 | [options.package_data]
63 | pyjson5 = __init__.pyi, py.typed
64 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | from setuptools import setup, Extension
 4 | 
 5 | extra_compile_args = [
 6 |     "-std=c++11",
 7 |     "-O3",
 8 |     "-fPIC",
 9 |     "-g0",
10 |     "-pipe",
11 |     "-fomit-frame-pointer",
12 | ]
13 | 
14 | setup(
15 |     ext_modules=[
16 |         Extension(
17 |             "pyjson5.pyjson5",
18 |             sources=["pyjson5.pyx"],
19 |             include_dirs=["src"],
20 |             extra_compile_args=extra_compile_args,
21 |             extra_link_args=extra_compile_args,
22 |             language="c++",
23 |         )
24 |     ],
25 | )
26 | 


--------------------------------------------------------------------------------
/src/DESCRIPTION.inc:
--------------------------------------------------------------------------------
 1 | """\
 2 | PyJSON5\n\
 3 | =======\n\
 4 | \n\
 5 | A `JSON5 <https://spec.json5.org/>`_ serializer and parser library for Python 3 written in Cython.\n\
 6 | \n\
 7 | The serializer returns ASCII data that can safely be used in an HTML template.\n\
 8 | Apostrophes, ampersands, greater-than, and less-then signs are encoded as\n\
 9 | unicode escaped sequences. E.g. this snippet is safe for any and all input:\n\
10 | \n\
11 | .. code:: python\n\
12 | \n\
13 |     \"<a onclick='alert(\" + encode(data) + \")'>show message</a>\"\n\
14 | \n\
15 | Unless the input contains infinite or NaN values, the result will be valid\n\
16 | JSON data.\n\
17 | \n\
18 | All valid JSON5 1.0.0 and `JSON <https://tools.ietf.org/html/rfc8259>`_ data can be read,\n\
19 | unless the nesting level is absurdly high.\n\
20 | """
21 | 


--------------------------------------------------------------------------------
/src/VERSION.inc:
--------------------------------------------------------------------------------
1 | "1.6.9"
2 | 


--------------------------------------------------------------------------------
/src/_constants.pyx:
--------------------------------------------------------------------------------
 1 | cdef object CONST_POS_NAN = float('+NaN')
 2 | cdef object CONST_POS_INF = float('+Infinity')
 3 | cdef object CONST_NEG_NAN = float('-NaN')
 4 | cdef object CONST_NEG_INF = float('-Infinity')
 5 | 
 6 | cdef object DATETIME_CLASSES = (date, time,)  # issubclass(datetime, date) == True
 7 | cdef object ORD_CLASSES = (unicode, bytes, bytearray,)
 8 | 
 9 | cdef object UCS1_COMPATIBLE_CODECS = frozenset((
10 |     # ASCII
11 |     'ascii', 646, '646', 'us-ascii',
12 |     # Latin-1
13 |     'latin_1', 'latin-1', 'iso-8859-1', 'iso8859-1',
14 |     8859, '8859', 'cp819', 'latin', 'latin1', 'l1',
15 | ))
16 | 
17 | cdef object TEST_DECIMAL = Decimal('47.11')
18 | cdef object TEST_FLOAT = 47.11
19 | cdef object TEST_INT = 4711
20 | 


--------------------------------------------------------------------------------
/src/_decoder.pyx:
--------------------------------------------------------------------------------
  1 | cdef enum:
  2 |     NO_EXTRA_DATA = 0x0011_0000
  3 | 
  4 | 
  5 | cdef boolean _skip_single_line(ReaderRef reader) except False:
  6 |     cdef uint32_t c0
  7 |     while _reader_good(reader):
  8 |         c0 = _reader_get(reader)
  9 |         if _is_line_terminator(c0):
 10 |             break
 11 | 
 12 |     return True
 13 | 
 14 | 
 15 | cdef boolean _skip_multiline_comment(ReaderRef reader) except False:
 16 |     cdef uint32_t c0
 17 |     cdef boolean seen_asterisk = False
 18 |     cdef Py_ssize_t comment_start = _reader_tell(reader)
 19 | 
 20 |     while True:
 21 |         if expect(not _reader_good(reader), False):
 22 |             break
 23 | 
 24 |         c0 = _reader_get(reader)
 25 |         if c0 == b'*':
 26 |             seen_asterisk = True
 27 |         elif seen_asterisk:
 28 |             if c0 == b'/':
 29 |                 return True
 30 |             seen_asterisk = False
 31 | 
 32 |     _raise_unclosed(b'comment', comment_start)
 33 |     return False
 34 | 
 35 | 
 36 | #     data found
 37 | # -1: exhausted
 38 | # -2: exception
 39 | cdef int32_t _skip_to_data_sub(ReaderRef reader, uint32_t c0) except -2:
 40 |     cdef int32_t c1 = 0  # silence warning
 41 |     cdef boolean seen_slash
 42 | 
 43 |     seen_slash = False
 44 |     while True:
 45 |         if c0 == b'/':
 46 |             if seen_slash:
 47 |                 _skip_single_line(reader)
 48 |                 seen_slash = False
 49 |             else:
 50 |                 seen_slash = True
 51 |         elif c0 == b'*':
 52 |             if expect(not seen_slash, False):
 53 |                 _raise_stray_character('asterisk', _reader_tell(reader))
 54 | 
 55 |             _skip_multiline_comment(reader)
 56 |             seen_slash = False
 57 |         elif not _is_ws_zs(c0):
 58 |             c1 = cast_to_int32(c0)
 59 |             break
 60 |         elif expect(seen_slash, False):
 61 |             _raise_stray_character('slash', _reader_tell(reader))
 62 | 
 63 |         if not _reader_good(reader):
 64 |             c1 = -1
 65 |             break
 66 | 
 67 |         c0 = _reader_get(reader)
 68 | 
 69 |     if expect(seen_slash, False):
 70 |         _raise_stray_character('slash', _reader_tell(reader))
 71 | 
 72 |     return c1
 73 | 
 74 | 
 75 | #    data found
 76 | # -1 exhausted
 77 | # -2 exception
 78 | cdef int32_t _skip_to_data(ReaderRef reader) except -2:
 79 |     cdef uint32_t c0
 80 |     cdef int32_t c1
 81 |     if _reader_good(reader):
 82 |         c0 = _reader_get(reader)
 83 |         c1 = _skip_to_data_sub(reader, c0)
 84 |     else:
 85 |         c1 = -1
 86 |     return c1
 87 | 
 88 | 
 89 | cdef int32_t _get_hex_character(ReaderRef reader, Py_ssize_t length) except -1:
 90 |     cdef Py_ssize_t start
 91 |     cdef uint32_t c0
 92 |     cdef uint32_t result
 93 |     cdef Py_ssize_t index
 94 | 
 95 |     start = _reader_tell(reader)
 96 |     result = 0
 97 |     for index in range(length):
 98 |         result <<= 4
 99 |         if expect(not _reader_good(reader), False):
100 |             _raise_unclosed(b'escape sequence', start)
101 | 
102 |         c0 = _reader_get(reader)
103 |         if b'0' <= c0 <= b'9':
104 |             result |= c0 - <uint32_t> b'0'
105 |         elif b'a' <= c0 <= b'f':
106 |             result |= c0 - <uint32_t> b'a' + 10
107 |         elif b'A' <= c0 <= b'F':
108 |             result |= c0 - <uint32_t> b'A' + 10
109 |         else:
110 |             _raise_expected_s('hexadecimal character', start, c0)
111 | 
112 |     if expect(result > 0x10ffff, False):
113 |         _raise_expected_s('Unicode code point', start, result)
114 | 
115 |     return cast_to_int32(result)
116 | 
117 | 
118 | # >=  0: character to append
119 | cdef int32_t _get_escaped_unicode_maybe_surrogate(ReaderRef reader, Py_ssize_t start) except -1:
120 |     cdef uint32_t c0
121 |     cdef uint32_t c1
122 | 
123 |     c0 = cast_to_uint32(_get_hex_character(reader, 4))
124 |     if expect(unicode_is_lo_surrogate(c0), False):
125 |         _raise_expected_s('high surrogate before low surrogate', start, c0)
126 |     elif not unicode_is_hi_surrogate(c0):
127 |         return c0
128 | 
129 |     _accept_string(reader, b'\\u')
130 | 
131 |     c1 = cast_to_uint32(_get_hex_character(reader, 4))
132 |     if expect(not unicode_is_lo_surrogate(c1), False):
133 |         _raise_expected_s('low surrogate', start, c1)
134 | 
135 |     return unicode_join_surrogates(c0, c1)
136 | 
137 | 
138 | # >=  0: character to append
139 | #    -1: skip
140 | # <  -1: -(next character + 1)
141 | cdef int32_t _get_escape_sequence(ReaderRef reader,
142 |                                   Py_ssize_t start) except 0x7ffffff:
143 |     cdef uint32_t c0
144 | 
145 |     c0 = _reader_get(reader)
146 |     if expect(not _reader_good(reader), False):
147 |         _raise_unclosed(b'string', start)
148 | 
149 |     if c0 == b'b':
150 |         return 0x0008
151 |     elif c0 == b'f':
152 |         return 0x000c
153 |     elif c0 == b'n':
154 |         return 0x000a
155 |     elif c0 == b'r':
156 |         return 0x000d
157 |     elif c0 == b't':
158 |         return 0x0009
159 |     elif c0 == b'v':
160 |         return 0x000b
161 |     elif c0 == b'0':
162 |         return 0x0000
163 |     elif c0 == b'x':
164 |         return _get_hex_character(reader, 2)
165 |     elif c0 == b'u':
166 |         return _get_escaped_unicode_maybe_surrogate(reader, start)
167 |     elif c0 == b'U':
168 |         return _get_hex_character(reader, 8)
169 |     elif expect(b'1' <= c0 <= b'9', False):
170 |         _raise_expected_s('escape sequence', start, c0)
171 |         return -2
172 |     elif _is_line_terminator(c0):
173 |         if c0 != 0x000D:
174 |             return -1
175 | 
176 |         c0 = _reader_get(reader)
177 |         if c0 == 0x000A:
178 |             return -1
179 | 
180 |         return -cast_to_int32(c0 + 1)
181 |     else:
182 |         return cast_to_int32(c0)
183 | 
184 | 
185 | cdef object _decode_string_sub(ReaderRef reader, uint32_t delim,
186 |                                Py_ssize_t start, uint32_t c0):
187 |     cdef int32_t c1
188 |     cdef StackHeapString[uint32_t] buf
189 | 
190 |     while True:
191 |         if expect(c0 == delim, False):
192 |             break
193 | 
194 |         if expect(not _reader_good(reader), False):
195 |             _raise_unclosed(b'string', start)
196 | 
197 |         if expect(c0 != b'\\', True):
198 |             if expect(c0 in (0xA, 0xD), False):
199 |                 _raise_unclosed(b'string', start)
200 | 
201 |             buf.push_back(c0)
202 |             c0 = _reader_get(reader)
203 |             continue
204 | 
205 |         c1 = _get_escape_sequence(reader, start)
206 |         if c1 >= -1:
207 |             if expect(not _reader_good(reader), False):
208 |                 _raise_unclosed(b'string', start)
209 | 
210 |             if c1 >= 0:
211 |                 c0 = cast_to_uint32(c1)
212 |                 buf.push_back(c0)
213 | 
214 |             c0 = _reader_get(reader)
215 |         else:
216 |             c0 = cast_to_uint32(-(c1 + 1))
217 | 
218 |     return PyUnicode_FromKindAndData(
219 |         PyUnicode_4BYTE_KIND, buf.data(), buf.size(),
220 |     )
221 | 
222 | 
223 | cdef object _decode_string(ReaderRef reader, int32_t *c_in_out):
224 |     cdef uint32_t delim
225 |     cdef uint32_t c0
226 |     cdef int32_t c1
227 |     cdef Py_ssize_t start
228 |     cdef object result
229 | 
230 |     c1 = c_in_out[0]
231 |     delim = cast_to_uint32(c1)
232 |     start = _reader_tell(reader)
233 | 
234 |     if expect(not _reader_good(reader), False):
235 |         _raise_unclosed(b'string', start)
236 | 
237 |     c0 = _reader_get(reader)
238 |     result = _decode_string_sub(reader, delim, start, c0)
239 | 
240 |     c_in_out[0] = NO_EXTRA_DATA
241 |     return result
242 | 
243 | 
244 | cdef object _decode_double(StackHeapString[char] &buf, Py_ssize_t start):
245 |     cdef double d0
246 |     cdef const char *end_of_double
247 | 
248 |     d0 = 0.0  # silence warning
249 |     end_of_double = parse_number(buf.data(), &d0)
250 |     if end_of_double != NULL and end_of_double[0] == b'\0':
251 |         return PyFloat_FromDouble(d0)
252 | 
253 |     _raise_unclosed('NumericLiteral', start)
254 | 
255 | 
256 | cdef object _decode_number_leading_zero(ReaderRef reader, StackHeapString[char] &buf,
257 |                                         int32_t *c_in_out, Py_ssize_t start):
258 |     cdef uint32_t c0
259 |     cdef int32_t c1 = 0  # silence warning
260 | 
261 |     if not _reader_good(reader):
262 |         c_in_out[0] = -1
263 |         return 0
264 | 
265 |     c0 = _reader_get(reader)
266 |     if _is_x(c0):
267 |         while True:
268 |             if not _reader_good(reader):
269 |                 c1 = -1
270 |                 break
271 | 
272 |             c0 = _reader_get(reader)
273 |             if _is_hexadecimal(c0):
274 |                 buf.push_back(<char> <unsigned char> c0)
275 |             elif c0 != b'_':
276 |                 c1 = cast_to_int32(c0)
277 |                 break
278 | 
279 |         c_in_out[0] = c1
280 | 
281 |         buf.push_back(b'\0')
282 |         try:
283 |             return PyLong_FromString(buf.data(), NULL, 16)
284 |         except Exception:
285 |             _raise_unclosed('NumericLiteral', start)
286 |     elif c0 == b'.':
287 |         buf.push_back(b'0')
288 |         buf.push_back(b'.')
289 | 
290 |         while True:
291 |             if not _reader_good(reader):
292 |                 c1 = -1
293 |                 break
294 | 
295 |             c0 = _reader_get(reader)
296 |             if _is_in_float_representation(c0):
297 |                 buf.push_back(<char> <unsigned char> c0)
298 |             elif c0 != b'_':
299 |                 c1 = cast_to_int32(c0)
300 |                 break
301 | 
302 |         c_in_out[0] = c1
303 | 
304 |         if buf.data()[buf.size() - 1] == b'.':
305 |             (<char*> buf.data())[buf.size() - 1] = b'\0'
306 |         else:
307 |             buf.push_back(b'\0')
308 | 
309 |         return _decode_double(buf, start)
310 |     elif _is_e(c0):
311 |         while True:
312 |             if not _reader_good(reader):
313 |                 c1 = -1
314 |                 break
315 | 
316 |             c0 = _reader_get(reader)
317 |             if _is_in_float_representation(c0):
318 |                 pass
319 |             elif c0 == b'_':
320 |                 pass
321 |             else:
322 |                 c1 = cast_to_int32(c0)
323 |                 break
324 | 
325 |         c_in_out[0] = c1
326 |         return 0.0
327 |     else:
328 |         c1 = cast_to_int32(c0)
329 |         c_in_out[0] = c1
330 |         return 0
331 | 
332 | 
333 | cdef object _decode_number_any(ReaderRef reader, StackHeapString[char] &buf,
334 |                                int32_t *c_in_out, Py_ssize_t start):
335 |     cdef uint32_t c0
336 |     cdef int32_t c1
337 |     cdef boolean is_float = False
338 |     cdef boolean was_point = False
339 |     cdef boolean leading_point = False
340 | 
341 |     c1 = c_in_out[0]
342 |     c0 = cast_to_uint32(c1)
343 | 
344 |     if c0 == b'.':
345 |         buf.push_back(b'0')
346 |         is_float = True
347 |         leading_point = True
348 | 
349 |     while True:
350 |         if _is_decimal(c0):
351 |             pass
352 |         elif _is_in_float_representation(c0):
353 |             is_float = True
354 |         elif c0 != b'_':
355 |             c1 = cast_to_int32(c0)
356 |             break
357 | 
358 |         if c0 == b'_':
359 |             pass
360 |         elif c0 != b'.':
361 |             if was_point:
362 |                 was_point = False
363 |                 if not _is_e(c0):
364 |                     buf.push_back(b'.')
365 |             buf.push_back(<char> <unsigned char> c0)
366 |         elif not was_point:
367 |             was_point = True
368 |         else:
369 |             _raise_unclosed('NumericLiteral', start)
370 | 
371 |         if not _reader_good(reader):
372 |             c1 = -1
373 |             break
374 | 
375 |         c0 = _reader_get(reader)
376 | 
377 |     c_in_out[0] = c1
378 | 
379 |     if leading_point and buf.size() == 1:  # single '.'
380 |         _raise_unclosed('NumericLiteral', start)
381 | 
382 |     buf.push_back(b'\0')
383 | 
384 |     if not is_float:
385 |         try:
386 |             return PyLong_FromString(buf.data(), NULL, 10)
387 |         except Exception:
388 |             pass
389 |         _raise_unclosed('NumericLiteral', start)
390 |     else:
391 |         return _decode_double(buf, start)
392 | 
393 | 
394 | cdef object _decode_number(ReaderRef reader, int32_t *c_in_out):
395 |     cdef uint32_t c0
396 |     cdef int32_t c1
397 |     cdef Py_ssize_t start = _reader_tell(reader)
398 |     cdef StackHeapString[char] buf
399 | 
400 |     c1 = c_in_out[0]
401 |     c0 = cast_to_uint32(c1)
402 | 
403 |     if c0 == b'+':
404 |         if expect(not _reader_good(reader), False):
405 |             _raise_unclosed(b'number', start)
406 | 
407 |         c0 = _reader_get(reader)
408 |         if c0 == b'I':
409 |             _accept_string(reader, b'nfinity')
410 |             c_in_out[0] = NO_EXTRA_DATA
411 |             return CONST_POS_INF
412 |         elif c0 == b'N':
413 |             _accept_string(reader, b'aN')
414 |             c_in_out[0] = NO_EXTRA_DATA
415 |             return CONST_POS_NAN
416 |     elif c0 == b'-':
417 |         if expect(not _reader_good(reader), False):
418 |             _raise_unclosed(b'number', start)
419 | 
420 |         c0 = _reader_get(reader)
421 |         if c0 == b'I':
422 |             _accept_string(reader, b'nfinity')
423 |             c_in_out[0] = NO_EXTRA_DATA
424 |             return CONST_NEG_INF
425 |         elif c0 == b'N':
426 |             _accept_string(reader, b'aN')
427 |             c_in_out[0] = NO_EXTRA_DATA
428 |             return CONST_NEG_NAN
429 | 
430 |         buf.push_back(b'-')
431 | 
432 |     if c0 == b'0':
433 |         return _decode_number_leading_zero(reader, buf, c_in_out, start)
434 |     else:
435 |         c1 = cast_to_int32(c0)
436 |         c_in_out[0] = c1
437 |         return _decode_number_any(reader, buf, c_in_out, start)
438 | 
439 | 
440 | #  1: done
441 | #  0: data found
442 | # -1: exception (exhausted)
443 | cdef uint32_t _skip_comma(ReaderRef reader, Py_ssize_t start,
444 |                           uint32_t terminator, const char *what,
445 |                           int32_t *c_in_out) except -1:
446 |     cdef int32_t c0
447 |     cdef uint32_t c1
448 |     cdef boolean needs_comma
449 |     cdef uint32_t done
450 | 
451 |     c0 = c_in_out[0]
452 |     c1 = cast_to_uint32(c0)
453 | 
454 |     needs_comma = True
455 |     while True:
456 |         c0 = _skip_to_data_sub(reader, c1)
457 |         if c0 < 0:
458 |             break
459 | 
460 |         c1 = cast_to_uint32(c0)
461 |         if c1 == terminator:
462 |             c_in_out[0] = NO_EXTRA_DATA
463 |             return 1
464 | 
465 |         if c1 != b',':
466 |             if expect(needs_comma, False):
467 |                 _raise_expected_sc(
468 |                     'comma', terminator, _reader_tell(reader), c1,
469 |                 )
470 |             c_in_out[0] = c0
471 |             return 0
472 | 
473 |         if expect(not needs_comma, False):
474 |             _raise_stray_character('comma', _reader_tell(reader))
475 | 
476 |         if expect(not _reader_good(reader), False):
477 |             break
478 | 
479 |         c1 = _reader_get(reader)
480 |         needs_comma = False
481 | 
482 |     _raise_unclosed(what, start)
483 |     return -1
484 | 
485 | 
486 | cdef unicode _decode_identifier_name(ReaderRef reader, int32_t *c_in_out):
487 |     cdef int32_t c0
488 |     cdef uint32_t c1
489 |     cdef Py_ssize_t start
490 |     cdef StackHeapString[uint32_t] buf
491 | 
492 |     start = _reader_tell(reader)
493 | 
494 |     c0 = c_in_out[0]
495 |     c1 = cast_to_uint32(c0)
496 |     if expect(not _is_identifier_start(c1), False):
497 |         _raise_expected_s('IdentifierStart', _reader_tell(reader), c1)
498 | 
499 |     while True:
500 |         if expect(c1 == b'\\', False):
501 |             if not _reader_good(reader):
502 |                 _raise_unclosed('IdentifierName', start)
503 |                 break
504 | 
505 |             c1 = _reader_get(reader)
506 |             if c1 == b'u':
507 |                 c1 = cast_to_uint32(_get_escaped_unicode_maybe_surrogate(reader, _reader_tell(reader)))
508 |             elif c1 == b'U':
509 |                 c1 = cast_to_uint32(_get_hex_character(reader, 8))
510 |             else:
511 |                 _raise_expected_s('UnicodeEscapeSequence', _reader_tell(reader), c1)
512 | 
513 |         buf.push_back(c1)
514 | 
515 |         if not _reader_good(reader):
516 |             c0 = -1
517 |             break
518 | 
519 |         c1 = _reader_get(reader)
520 |         if not _is_identifier_part(c1):
521 |             c0 = cast_to_int32(c1)
522 |             break
523 | 
524 |     c_in_out[0] = c0
525 |     return PyUnicode_FromKindAndData(
526 |         PyUnicode_4BYTE_KIND, buf.data(), buf.size(),
527 |     )
528 | 
529 | 
530 | cdef boolean _decode_object(ReaderRef reader, object result) except False:
531 |     cdef int32_t c0
532 |     cdef uint32_t c1
533 |     cdef Py_ssize_t start
534 |     cdef boolean done
535 |     cdef object key
536 |     cdef object value
537 |     cdef object ex
538 | 
539 |     start = _reader_tell(reader)
540 | 
541 |     c0 = _skip_to_data(reader)
542 |     if expect(c0 >= 0, True):
543 |         c1 = cast_to_uint32(c0)
544 |         if c1 == b'}':
545 |             return True
546 | 
547 |         while True:
548 |             if c1 in b'"\'':
549 |                 key = _decode_string(reader, &c0)
550 |             else:
551 |                 key = _decode_identifier_name(reader, &c0)
552 |             if expect(c0 < 0, False):
553 |                 break
554 | 
555 |             c1 = cast_to_uint32(c0)
556 |             c0 = _skip_to_data_sub(reader, c1)
557 |             if expect(c0 < 0, False):
558 |                 break
559 | 
560 |             c1 = cast_to_uint32(c0)
561 |             if expect(c1 != b':', False):
562 |                 _raise_expected_s('colon', _reader_tell(reader), c1)
563 | 
564 |             if expect(not _reader_good(reader), False):
565 |                 break
566 | 
567 |             c0 = _skip_to_data(reader)
568 |             if expect(c0 < 0, False):
569 |                 break
570 | 
571 |             try:
572 |                 value = _decode_recursive(reader, &c0)
573 |             except _DecoderException as ex:
574 |                 PyDict_SetItem(result, key, (<_DecoderException> ex).result)
575 |                 raise
576 | 
577 |             if expect(c0 < 0, False):
578 |                 break
579 | 
580 |             PyDict_SetItem(result, key, value)
581 | 
582 |             done = _skip_comma(
583 |                 reader, start, <unsigned char>b'}', b'object', &c0,
584 |             )
585 |             if done:
586 |                 return True
587 | 
588 |             c1 = cast_to_uint32(c0)
589 | 
590 |     _raise_unclosed(b'object', start)
591 |     return False
592 | 
593 | 
594 | cdef boolean _decode_array(ReaderRef reader, object result) except False:
595 |     cdef int32_t c0
596 |     cdef uint32_t c1
597 |     cdef Py_ssize_t start
598 |     cdef boolean done
599 |     cdef object value
600 |     cdef object ex
601 | 
602 |     start = _reader_tell(reader)
603 | 
604 |     c0 = _skip_to_data(reader)
605 |     if expect(c0 >= 0, True):
606 |         c1 = cast_to_uint32(c0)
607 |         if c1 == b']':
608 |             return True
609 | 
610 |         while True:
611 |             try:
612 |                 value = _decode_recursive(reader, &c0)
613 |             except _DecoderException as ex:
614 |                 PyList_Append(result, (<_DecoderException> ex).result)
615 |                 raise
616 | 
617 |             if expect(c0 < 0, False):
618 |                 break
619 | 
620 |             PyList_Append(result, value)
621 | 
622 |             done = _skip_comma(
623 |                 reader, start, <unsigned char>b']', b'array', &c0,
624 |             )
625 |             if done:
626 |                 return True
627 | 
628 |     _raise_unclosed(b'array', start)
629 | 
630 | 
631 | cdef boolean _accept_string(ReaderRef reader, const char *string) except False:
632 |     cdef uint32_t c0
633 |     cdef uint32_t c1
634 |     cdef Py_ssize_t start
635 | 
636 |     start = _reader_tell(reader)
637 |     while True:
638 |         c0 = string[0]
639 |         string += 1
640 |         if not c0:
641 |             break
642 | 
643 |         if expect(not _reader_good(reader), False):
644 |             _raise_unclosed(b'literal', start)
645 | 
646 |         c1 = _reader_get(reader)
647 |         if expect(c0 != c1, False):
648 |             _raise_expected_c(c0, start, c1)
649 | 
650 |     return True
651 | 
652 | 
653 | cdef object _decode_null(ReaderRef reader, int32_t *c_in_out):
654 |     #                       n
655 |     _accept_string(reader, b'ull')
656 |     c_in_out[0] = NO_EXTRA_DATA
657 |     return None
658 | 
659 | 
660 | cdef object _decode_true(ReaderRef reader, int32_t *c_in_out):
661 |     #                       t
662 |     _accept_string(reader, b'rue')
663 |     c_in_out[0] = NO_EXTRA_DATA
664 |     return True
665 | 
666 | 
667 | cdef object _decode_false(ReaderRef reader, int32_t *c_in_out):
668 |     #                      f
669 |     _accept_string(reader, b'alse')
670 |     c_in_out[0] = NO_EXTRA_DATA
671 |     return False
672 | 
673 | 
674 | cdef object _decode_inf(ReaderRef reader, int32_t *c_in_out):
675 |     #                       I
676 |     _accept_string(reader, b'nfinity')
677 |     c_in_out[0] = NO_EXTRA_DATA
678 |     return CONST_POS_INF
679 | 
680 | 
681 | cdef object _decode_nan(ReaderRef reader, int32_t *c_in_out):
682 |     #                       N
683 |     _accept_string(reader, b'aN')
684 |     c_in_out[0] = NO_EXTRA_DATA
685 |     return CONST_POS_NAN
686 | 
687 | 
688 | cdef object _decode_recursive_enter(ReaderRef reader, int32_t *c_in_out):
689 |     cdef boolean (*fn)(ReaderRef reader, object result) except False
690 |     cdef object result
691 |     cdef int32_t c0
692 |     cdef uint32_t c1
693 |     cdef object ex
694 | 
695 |     c0 = c_in_out[0]
696 |     c1 = cast_to_uint32(c0)
697 | 
698 |     if c1 == b'{':
699 |         result = {}
700 |         fn = _decode_object
701 |     else:
702 |         result = []
703 |         fn = _decode_array
704 | 
705 |     _reader_enter(reader)
706 |     try:
707 |         fn(reader, result)
708 |     except RecursionError:
709 |         _raise_nesting(_reader_tell(reader), result)
710 |     except _DecoderException as ex:
711 |         (<_DecoderException> ex).result = result
712 |         raise
713 |     finally:
714 |         _reader_leave(reader)
715 | 
716 |     c_in_out[0] = NO_EXTRA_DATA
717 |     return result
718 | 
719 | 
720 | cdef object _decoder_unknown(ReaderRef reader, int32_t *c_in_out):
721 |     cdef int32_t c0
722 |     cdef uint32_t c1
723 |     cdef Py_ssize_t start
724 | 
725 |     c0 = c_in_out[0]
726 |     c1 = cast_to_uint32(c0)
727 |     start = _reader_tell(reader)
728 | 
729 |     _raise_expected_s('JSON5Value', start, c1)
730 | 
731 | 
732 | cdef object _decode_recursive(ReaderRef reader, int32_t *c_in_out):
733 |     cdef int32_t c0
734 |     cdef uint32_t c1
735 |     cdef Py_ssize_t start
736 |     cdef DrsKind kind
737 |     cdef object (*decoder)(ReaderRef, int32_t*)
738 | 
739 |     c0 = c_in_out[0]
740 |     c1 = cast_to_uint32(c0)
741 |     if c1 >= 128:
742 |         start = _reader_tell(reader)
743 |         _raise_expected_s('JSON5Value', start, c1)
744 | 
745 |     kind = drs_lookup[c1]
746 |     if kind == DRS_fail:
747 |         decoder = _decoder_unknown
748 |     elif kind == DRS_null:
749 |         decoder = _decode_null
750 |     elif kind == DRS_true:
751 |         decoder = _decode_true
752 |     elif kind == DRS_false:
753 |         decoder = _decode_false
754 |     elif kind == DRS_inf:
755 |         decoder = _decode_inf
756 |     elif kind == DRS_nan:
757 |         decoder = _decode_nan
758 |     elif kind == DRS_string:
759 |         decoder = _decode_string
760 |     elif kind == DRS_number:
761 |         decoder = _decode_number
762 |     elif kind == DRS_recursive:
763 |         decoder = _decode_recursive_enter
764 |     else:
765 |         unreachable()
766 |         decoder = _decoder_unknown
767 | 
768 |     return decoder(reader, c_in_out)
769 | 
770 | 
771 | cdef object _decode_all_sub(ReaderRef reader, boolean some):
772 |     cdef Py_ssize_t start
773 |     cdef int32_t c0
774 |     cdef uint32_t c1
775 |     cdef object result
776 |     cdef object ex
777 | 
778 |     start = _reader_tell(reader)
779 |     c0 = _skip_to_data(reader)
780 |     if expect(c0 < 0, False):
781 |         _raise_no_data(start)
782 | 
783 |     result = _decode_recursive(reader, &c0)
784 |     try:
785 |         if c0 < 0:
786 |             pass
787 |         elif not some:
788 |             start = _reader_tell(reader)
789 |             c1 = cast_to_uint32(c0)
790 |             c0 = _skip_to_data_sub(reader, c1)
791 |             if expect(c0 >= 0, False):
792 |                 c1 = cast_to_uint32(c0)
793 |                 _raise_extra_data(c1, start)
794 |         elif expect(not _is_ws_zs(c0), False):
795 |             start = _reader_tell(reader)
796 |             c1 = cast_to_uint32(c0)
797 |             _raise_unframed_data(c1, start)
798 |     except _DecoderException as ex:
799 |         (<_DecoderException> ex).result = result
800 |         raise
801 | 
802 |     return result
803 | 
804 | 
805 | cdef object _decode_all(ReaderRef reader, boolean some):
806 |     cdef object ex, ex2
807 |     try:
808 |         return _decode_all_sub(reader, some)
809 |     except _DecoderException as ex:
810 |         ex2 = (<_DecoderException> ex).cls(
811 |             (<_DecoderException> ex).msg,
812 |             (<_DecoderException> ex).result,
813 |             (<_DecoderException> ex).extra,
814 |         )
815 |     raise ex2
816 | 
817 | 
818 | cdef object _decode_ucs1(const void *string, Py_ssize_t length,
819 |                          Py_ssize_t maxdepth, boolean some):
820 |     cdef ReaderUCS1 reader = ReaderUCS1(
821 |         ReaderUCS(length, 0, maxdepth),
822 |         <const Py_UCS1*> string,
823 |     )
824 |     return _decode_all(reader, some)
825 | 
826 | 
827 | cdef object _decode_ucs2(const void *string, Py_ssize_t length,
828 |                          Py_ssize_t maxdepth, boolean some):
829 |     cdef ReaderUCS2 reader = ReaderUCS2(
830 |         ReaderUCS(length, 0, maxdepth),
831 |         <const Py_UCS2*> string,
832 |     )
833 |     return _decode_all(reader, some)
834 | 
835 | 
836 | cdef object _decode_ucs4(const void *string, Py_ssize_t length,
837 |                          Py_ssize_t maxdepth, boolean some):
838 |     cdef ReaderUCS4 reader = ReaderUCS4(
839 |         ReaderUCS(length, 0, maxdepth),
840 |         <const Py_UCS4*> string,
841 |     )
842 |     return _decode_all(reader, some)
843 | 
844 | 
845 | cdef object _decode_utf8(const void *string, Py_ssize_t length,
846 |                          Py_ssize_t maxdepth, boolean some):
847 |     cdef ReaderUTF8 reader = ReaderUTF8(
848 |         ReaderUCS(length, 0, maxdepth),
849 |         <const Py_UCS1*> string,
850 |     )
851 |     return _decode_all(reader, some)
852 | 
853 | 
854 | cdef object _decode_unicode(object data, Py_ssize_t maxdepth, boolean some):
855 |     cdef Py_ssize_t length
856 |     cdef int kind
857 |     cdef const char *s
858 | 
859 |     PyUnicode_READY(data)
860 | 
861 |     if CYTHON_COMPILING_IN_PYPY:
862 |         length = 0
863 |         s = PyUnicode_AsUTF8AndSize(data, &length)
864 |         return _decode_utf8(s, length, maxdepth, some)
865 | 
866 |     length = PyUnicode_GET_LENGTH(data)
867 |     kind = PyUnicode_KIND(data)
868 | 
869 |     if kind == PyUnicode_1BYTE_KIND:
870 |         return _decode_ucs1(PyUnicode_1BYTE_DATA(data), length, maxdepth, some)
871 |     elif kind == PyUnicode_2BYTE_KIND:
872 |         return _decode_ucs2(PyUnicode_2BYTE_DATA(data), length, maxdepth, some)
873 |     elif kind == PyUnicode_4BYTE_KIND:
874 |         return _decode_ucs4(PyUnicode_4BYTE_DATA(data), length, maxdepth, some)
875 |     else:
876 |         unreachable()
877 | 
878 | 
879 | cdef object _decode_buffer(Py_buffer &view, int32_t wordlength,
880 |                            Py_ssize_t maxdepth, boolean some):
881 |     cdef object (*decoder)(const void*, Py_ssize_t, Py_ssize_t, boolean)
882 |     cdef Py_ssize_t length = 0
883 | 
884 |     if wordlength == 0:
885 |         decoder = _decode_utf8
886 |         length = view.len // 1
887 |     elif wordlength == 1:
888 |         decoder = _decode_ucs1
889 |         length = view.len // 1
890 |     elif wordlength == 2:
891 |         decoder = _decode_ucs2
892 |         length = view.len // 2
893 |     elif wordlength == 4:
894 |         decoder = _decode_ucs4
895 |         length = view.len // 4
896 |     else:
897 |         _raise_illegal_wordlength(wordlength)
898 |         unreachable()
899 |         length = 0
900 |         decoder = NULL
901 | 
902 |     return decoder(view.buf, length, maxdepth, some)
903 | 
904 | 
905 | cdef object _decode_callback(object cb, object args, Py_ssize_t maxdepth,
906 |                              boolean some):
907 |     cdef ReaderCallback reader = ReaderCallback(
908 |         ReaderCallbackBase(0, maxdepth),
909 |         <PyObject*> cb,
910 |         <PyObject*> args,
911 |         -1,
912 |     )
913 |     return _decode_all(reader, some)
914 | 


--------------------------------------------------------------------------------
/src/_decoder_recursive_select.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef JSON5EncoderCpp_decoder_recursive_select
 2 | #define JSON5EncoderCpp_decoder_recursive_select
 3 | 
 4 | // GENERATED FILE
 5 | // All changes will be lost.
 6 | 
 7 | #include <cstdint>
 8 | 
 9 | namespace JSON5EncoderCpp {
10 | inline namespace {
11 | 
12 | enum DrsKind : std::uint8_t {
13 |     DRS_fail, DRS_null, DRS_true, DRS_false, DRS_inf, DRS_nan, DRS_string, DRS_number, DRS_recursive
14 | };
15 | 
16 | static const DrsKind drs_lookup[128] = {
17 |     DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_fail,
18 |     DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_fail,
19 |     DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_fail,
20 |     DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_fail,
21 |     DRS_fail, DRS_fail, DRS_string, DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_string,
22 |     DRS_fail, DRS_fail, DRS_fail, DRS_number, DRS_fail, DRS_number, DRS_number, DRS_fail,
23 |     DRS_number, DRS_number, DRS_number, DRS_number, DRS_number, DRS_number, DRS_number, DRS_number,
24 |     DRS_number, DRS_number, DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_fail,
25 |     DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_fail,
26 |     DRS_fail, DRS_inf, DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_nan, DRS_fail,
27 |     DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_fail,
28 |     DRS_fail, DRS_fail, DRS_fail, DRS_recursive, DRS_fail, DRS_fail, DRS_fail, DRS_fail,
29 |     DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_false, DRS_fail,
30 |     DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_null, DRS_fail,
31 |     DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_true, DRS_fail, DRS_fail, DRS_fail,
32 |     DRS_fail, DRS_fail, DRS_fail, DRS_recursive, DRS_fail, DRS_fail, DRS_fail, DRS_fail,
33 | };
34 | 
35 | }  // anonymous inline namespace
36 | }  // namespace JSON5EncoderCpp
37 | 
38 | #endif
39 | 


--------------------------------------------------------------------------------
/src/_encoder.pyx:
--------------------------------------------------------------------------------
  1 | cdef int _encode_unicode_impl(WriterRef writer, UCSString data, Py_ssize_t length) except -1:
  2 |     cdef char buf[32]
  3 |     cdef uint32_t c
  4 |     cdef uint32_t s1, s2
  5 |     cdef const char *escaped_string
  6 |     cdef Py_ssize_t escaped_length
  7 |     cdef size_t unescaped_length, index
  8 |     cdef Py_ssize_t sublength
  9 | 
 10 |     if length > 0:
 11 |         writer.reserve(writer, 2 + length)
 12 |         writer.append_c(writer, <char> PyUnicode_1BYTE_DATA((<Options> writer.options).quotationmark)[0])
 13 |         while True:
 14 |             if UCSString is UCS1String:
 15 |                 sublength = length
 16 |             else:
 17 |                 sublength = min(length, <Py_ssize_t> sizeof(buf))
 18 | 
 19 |             unescaped_length = ESCAPE_DCT.find_unescaped_range(data, sublength)
 20 |             if unescaped_length > 0:
 21 |                 if UCSString is UCS1String:
 22 |                     writer.append_s(writer, <const char*> data, unescaped_length)
 23 |                 else:
 24 |                     for index in range(unescaped_length):
 25 |                         buf[index] = <const char> data[index]
 26 |                     writer.append_s(writer, buf, unescaped_length)
 27 | 
 28 |                 data += unescaped_length
 29 |                 length -= unescaped_length
 30 |                 if length <= 0:
 31 |                     break
 32 | 
 33 |                 if UCSString is not UCS1String:
 34 |                     continue
 35 | 
 36 |             c = data[0]
 37 |             if (UCSString is UCS1String) or (c < 0x100):
 38 |                 escaped_length = ESCAPE_DCT.items[c][0]
 39 |                 escaped_string = &ESCAPE_DCT.items[c][1]
 40 |                 writer.append_s(writer, escaped_string, escaped_length)
 41 |             elif (UCSString is UCS2String) or (c <= 0xffff):
 42 |                 buf[0] = b'\\';
 43 |                 buf[1] = b'u';
 44 |                 buf[2] = HEX[(c >> (4*3)) & 0xf];
 45 |                 buf[3] = HEX[(c >> (4*2)) & 0xf];
 46 |                 buf[4] = HEX[(c >> (4*1)) & 0xf];
 47 |                 buf[5] = HEX[(c >> (4*0)) & 0xf];
 48 |                 buf[6] = 0;
 49 | 
 50 |                 writer.append_s(writer, buf, 6);
 51 |             else:
 52 |                 # surrogate pair
 53 |                 c -= 0x10000
 54 |                 s1 = 0xd800 | ((c >> 10) & 0x3ff)
 55 |                 s2 = 0xdc00 | (c & 0x3ff)
 56 | 
 57 |                 buf[0x0] = b'\\';
 58 |                 buf[0x1] = b'u';
 59 |                 buf[0x2] = HEX[(s1 >> (4*3)) & 0xf];
 60 |                 buf[0x3] = HEX[(s1 >> (4*2)) & 0xf];
 61 |                 buf[0x4] = HEX[(s1 >> (4*1)) & 0xf];
 62 |                 buf[0x5] = HEX[(s1 >> (4*0)) & 0xf];
 63 | 
 64 |                 buf[0x6] = b'\\';
 65 |                 buf[0x7] = b'u';
 66 |                 buf[0x8] = HEX[(s2 >> (4*3)) & 0xf];
 67 |                 buf[0x9] = HEX[(s2 >> (4*2)) & 0xf];
 68 |                 buf[0xa] = HEX[(s2 >> (4*1)) & 0xf];
 69 |                 buf[0xb] = HEX[(s2 >> (4*0)) & 0xf];
 70 | 
 71 |                 buf[0xc] = 0;
 72 | 
 73 |                 writer.append_s(writer, buf, 12);
 74 | 
 75 |             data += 1
 76 |             length -= 1
 77 |             if length <= 0:
 78 |                 break
 79 |         writer.append_c(writer, <char> PyUnicode_1BYTE_DATA((<Options> writer.options).quotationmark)[0])
 80 |     else:
 81 |         writer.append_s(writer, b'""', 2)
 82 | 
 83 |     return True
 84 | 
 85 | 
 86 | cdef int _encode_unicode(WriterRef writer, object data) except -1:
 87 |     cdef Py_ssize_t length
 88 |     cdef int kind
 89 | 
 90 |     PyUnicode_READY(data)
 91 | 
 92 |     length = PyUnicode_GET_LENGTH(data)
 93 |     kind = PyUnicode_KIND(data)
 94 | 
 95 |     if kind == PyUnicode_1BYTE_KIND:
 96 |         _encode_unicode_impl(writer, PyUnicode_1BYTE_DATA(data), length)
 97 |     elif kind == PyUnicode_2BYTE_KIND:
 98 |         _encode_unicode_impl(writer, PyUnicode_2BYTE_DATA(data), length)
 99 |     elif kind == PyUnicode_4BYTE_KIND:
100 |         _encode_unicode_impl(writer, PyUnicode_4BYTE_DATA(data), length)
101 |     else:
102 |         pass  # impossible
103 | 
104 |     return True
105 | 
106 | 
107 | cdef int _encode_nested_key(WriterRef writer, object data) except -1:
108 |     cdef const char *string
109 |     cdef char c
110 |     cdef Py_ssize_t index, length
111 |     cdef int result
112 | 
113 |     cdef WriterReallocatable sub_writer = WriterReallocatable(
114 |         Writer(
115 |             _WriterReallocatable_reserve,
116 |             _WriterReallocatable_append_c,
117 |             _WriterReallocatable_append_s,
118 |             writer.options,
119 |         ),
120 |         0, 0, NULL,
121 |     )
122 |     try:
123 |         result = _encode(sub_writer.base, data)
124 |         if expect(result < 0, False):
125 |             return result
126 | 
127 |         length = sub_writer.position
128 |         string = <char*> sub_writer.obj
129 | 
130 |         writer.reserve(writer, 2 + length)
131 |         writer.append_c(writer, <char> PyUnicode_1BYTE_DATA((<Options> writer.options).quotationmark)[0])
132 |         for index in range(length):
133 |             c = string[index]
134 |             if c in b'\\"':
135 |                 writer.append_c(writer, b'\\')
136 |             writer.append_c(writer, c)
137 |         writer.append_c(writer, <char> PyUnicode_1BYTE_DATA((<Options> writer.options).quotationmark)[0])
138 |     finally:
139 |         if sub_writer.obj is not NULL:
140 |             ObjectFree(sub_writer.obj)
141 | 
142 |     return True
143 | 
144 | 
145 | cdef int _append_ascii(WriterRef writer, object data) except -1:
146 |     cdef Py_buffer view
147 |     cdef const char *buf
148 |     cdef Py_ssize_t index
149 |     cdef unsigned char c
150 | 
151 |     if PyUnicode_Check(data):
152 |         PyUnicode_READY(data)
153 |         if not PyUnicode_IS_ASCII(data):
154 |             raise TypeError('Expected ASCII data')
155 |         writer.append_s(writer, <const char*> PyUnicode_1BYTE_DATA(data), PyUnicode_GET_LENGTH(data))
156 |     else:
157 |         PyObject_GetBuffer(data, &view, PyBUF_CONTIG_RO)
158 |         try:
159 |             buf = <const char*> view.buf
160 |             for index in range(view.len):
161 |                 c = <unsigned char> buf[index]
162 |                 if c & ~0x7f:
163 |                     raise TypeError('Expected ASCII data')
164 | 
165 |             writer.append_s(writer, buf, view.len)
166 |         finally:
167 |             PyBuffer_Release(&view)
168 | 
169 |     return True
170 | 
171 | 
172 | cdef int _encode_tojson(WriterRef writer, object data) except -1:
173 |     cdef object value = getattr(data, (<Options> writer.options).tojson, None)
174 |     if value is None:
175 |         return False
176 | 
177 |     if callable(value):
178 |         Py_EnterRecursiveCall(' while encoding nested JSON5 object')
179 |         try:
180 |             value = value()
181 |         finally:
182 |             Py_LeaveRecursiveCall()
183 | 
184 |     _append_ascii(writer, value)
185 |     return True
186 | 
187 | 
188 | cdef int _encode_sequence(WriterRef writer, object data) except -1:
189 |     cdef boolean first
190 |     cdef object iterator
191 |     cdef object value
192 |     cdef int result
193 | 
194 |     try:
195 |         iterator = PyObject_GetIter(data)
196 |     except TypeError:
197 |         return False
198 | 
199 |     Py_EnterRecursiveCall(' while encoding nested JSON5 object')
200 |     try:
201 |         writer.append_c(writer, <char> b'[')
202 |         first = True
203 |         value = None
204 |         while iter_next(iterator, &<PyObject*&> value):
205 |             if not first:
206 |                 writer.append_c(writer, <char> b',')
207 |             else:
208 |                 first = False
209 | 
210 |             result = _encode(writer, value)
211 |             if expect(result < 0, False):
212 |                 return result
213 |         writer.append_c(writer, <char> b']')
214 |     finally:
215 |         Py_LeaveRecursiveCall()
216 | 
217 |     return True
218 | 
219 | 
220 | cdef int _encode_mapping(WriterRef writer, object data) except -1:
221 |     cdef boolean first
222 |     cdef object iterator, key, value
223 |     cdef int result
224 | 
225 |     if not isinstance(data, (<Options> writer.options).mappingtypes):
226 |         return False
227 | 
228 |     iterator = PyObject_GetIter(data)
229 | 
230 |     Py_EnterRecursiveCall(' while encoding nested JSON5 object')
231 |     try:
232 |         writer.append_c(writer, <char> b'{')
233 |         first = True
234 |         key = None
235 |         while iter_next(iterator, &<PyObject*&> key):
236 |             if not first:
237 |                 writer.append_c(writer, <char> b',')
238 |             else:
239 |                 first = False
240 |             value = data[key]
241 | 
242 |             if PyUnicode_Check(key):
243 |                 _encode_unicode(writer, key)
244 |             else:
245 |                 _encode_nested_key(writer, key)
246 | 
247 |             writer.append_c(writer, <char> b':')
248 | 
249 |             result = _encode(writer, value)
250 |             if expect(result < 0, False):
251 |                 return result
252 |         writer.append_c(writer, <char> b'}')
253 |     finally:
254 |         Py_LeaveRecursiveCall()
255 | 
256 |     return True
257 | 
258 | 
259 | cdef int _encode_none(WriterRef writer, object data) except -1:
260 |     writer.append_s(writer, b'null', 4)
261 |     return True
262 | 
263 | 
264 | cdef int _encode_bytes(WriterRef writer, object data) except -1:
265 |     _encode_unicode(writer, PyUnicode_FromEncodedObject(data, 'UTF-8', 'strict'))
266 |     return True
267 | 
268 | 
269 | cdef int _encode_datetime(WriterRef writer, object data) except -1:
270 |     cdef object stringified
271 |     cdef Py_ssize_t length
272 |     cdef const char *string
273 | 
274 |     if not isinstance(data, DATETIME_CLASSES):
275 |         return False
276 | 
277 |     stringified = data.isoformat()
278 |     length = 0
279 |     string = PyUnicode_AsUTF8AndSize(stringified, &length)
280 | 
281 |     writer.reserve(writer, 2 + length)
282 |     writer.append_c(writer, <char> PyUnicode_1BYTE_DATA((<Options> writer.options).quotationmark)[0])
283 |     writer.append_s(writer, string, length)
284 |     writer.append_c(writer, <char> PyUnicode_1BYTE_DATA((<Options> writer.options).quotationmark)[0])
285 | 
286 |     return True
287 | 
288 | 
289 | cdef int _encode_format_string(WriterRef writer, object data, object fmt) except -1:
290 |     cdef object formatted
291 |     cdef const char *string
292 |     cdef Py_ssize_t length = 0  # silence warning
293 | 
294 |     formatted = PyUnicode_Format(fmt, data)
295 |     string = PyUnicode_AsUTF8AndSize(formatted, &length)
296 |     writer.append_s(writer, string, length)
297 | 
298 |     return True
299 | 
300 | 
301 | cdef int _encode_float(WriterRef writer, object data) except -1:
302 |     cdef double value = PyFloat_AsDouble(data)
303 |     cdef int classification = fpclassify(value)
304 |     cdef char buf[64]
305 |     cdef char *end
306 |     cdef char *string
307 |     cdef Py_ssize_t length
308 | 
309 |     if classification == FP_NORMAL:
310 |         end = Dtoa(buf, PyFloat_AsDouble(data))
311 |         length = end - buf
312 |         string = buf
313 |     elif classification in (FP_SUBNORMAL, FP_ZERO):
314 |         string = b'0.0'
315 |         length = 3
316 |     elif classification == FP_NAN:
317 |         string = b'NaN'
318 |         length = 3
319 |     else:
320 |         # classification == FP_INFINITE
321 |         if value > 0.0:
322 |             string = b'Infinity'
323 |             length = 8
324 |         else:
325 |             string = b'-Infinity'
326 |             length = 9
327 | 
328 |     writer.append_s(writer, string, length)
329 |     return True
330 | 
331 | 
332 | cdef int _encode_long(WriterRef writer, object data) except -1:
333 |     if PyBool_Check(data):
334 |         if data is True:
335 |             writer.append_s(writer, 'true', 4)
336 |         else:
337 |             writer.append_s(writer, 'false', 5)
338 |     else:
339 |         _encode_format_string(writer, data, DEFAULT_INTFORMAT)
340 |     return True
341 | 
342 | 
343 | cdef int _encode_decimal(WriterRef writer, object data) except -1:
344 |     if not isinstance(data, Decimal):
345 |         return False
346 | 
347 |     _encode_format_string(writer, data, DEFAULT_DECIMALFORMAT)
348 |     return True
349 | 
350 | 
351 | cdef int _encode_unstringifiable(WriterRef writer, object data) except -1:
352 |     if not data:
353 |         writer.append_s(writer, b'none', 4)
354 |         return True
355 | 
356 |     Py_EnterRecursiveCall(' while encoding JSON5 object with vars(obj) fallback')
357 |     try:
358 |         try:
359 |             data = PyObject_GenericGetDict(data, NULL)
360 |         except:
361 |             pass
362 |         else:
363 |             if _encode_mapping(writer, data):
364 |                 return True
365 |     finally:
366 |         Py_LeaveRecursiveCall()
367 | 
368 |     _raise_unstringifiable(data)
369 | 
370 | 
371 | cdef int _encode_other(WriterRef writer, object data):
372 |     cdef int result = 0
373 | 
374 |     while True:
375 |         if (<Options> writer.options).tojson is not None:
376 |             result = (<int(*)(WriterRef, object)> _encode_tojson)(writer, data)
377 |             if result != 0:
378 |                 break
379 | 
380 |         if obj_has_iter(data):
381 |             result = (<int(*)(WriterRef, object)> _encode_mapping)(writer, data)
382 |             if result != 0:
383 |                 break
384 | 
385 |             result = (<int(*)(WriterRef, object)> _encode_sequence)(writer, data)
386 |             if result != 0:
387 |                 break
388 | 
389 |         result = (<int(*)(WriterRef, object)> _encode_decimal)(writer, data)
390 |         if result != 0:
391 |             break
392 | 
393 |         result = (<int(*)(WriterRef, object)> _encode_datetime)(writer, data)
394 |         if result != 0:
395 |             break
396 | 
397 |         result = (<int(*)(WriterRef, object)> _encode_unstringifiable)(writer, data)
398 |         if result != 0:
399 |             break
400 | 
401 |         break
402 | 
403 |     return result
404 | 
405 | 
406 | cdef int _encode(WriterRef writer, object data):
407 |     cdef int (*encoder)(WriterRef, object)
408 | 
409 |     if data is None:
410 |         encoder = <int(*)(WriterRef, object)> _encode_none
411 |     elif PyUnicode_Check(data):
412 |         encoder = <int(*)(WriterRef, object)> _encode_unicode
413 |     elif PyLong_Check(data):
414 |         encoder = <int(*)(WriterRef, object)> _encode_long
415 |     elif PyFloat_Check(data):
416 |         encoder = <int(*)(WriterRef, object)> _encode_float
417 |     elif PyBytes_Check(data):
418 |         encoder = <int(*)(WriterRef, object)> _encode_bytes
419 |     else:
420 |         encoder = <int(*)(WriterRef, object)> _encode_other
421 | 
422 |     return encoder(writer, data)
423 | 
424 | 
425 | cdef int _encode_callback_bytes(object data, object cb, object options) except -1:
426 |     cdef WriterCallback writer = WriterCallback(
427 |         Writer(
428 |             _WriterNoop_reserve,
429 |             _WriterCbBytes_append_c,
430 |             _WriterCbBytes_append_s,
431 |             <PyObject*> options,
432 |         ),
433 |         <PyObject*> cb,
434 |     )
435 | 
436 |     if expect(not callable(cb), False):
437 |         raise TypeError(f'type(cb)=={type(cb)!r} is not callable')
438 | 
439 |     return _encode(writer.base, data)
440 | 
441 | 
442 | cdef int _encode_callback_str(object data, object cb, object options) except -1:
443 |     cdef WriterCallback writer = WriterCallback(
444 |         Writer(
445 |             _WriterNoop_reserve,
446 |             _WriterCbStr_append_c,
447 |             _WriterCbStr_append_s,
448 |             <PyObject*> options,
449 |         ),
450 |         <PyObject*> cb,
451 |     )
452 | 
453 |     if expect(not callable(cb), False):
454 |         raise TypeError(f'type(cb)=={type(cb)!r} is not callable')
455 | 
456 |     return _encode(writer.base, data)
457 | 


--------------------------------------------------------------------------------
/src/_encoder_options.pyx:
--------------------------------------------------------------------------------
  1 | cdef object DEFAULT_TOJSON = False
  2 | cdef object DEFAULT_INTFORMAT = '%d'
  3 | cdef object DEFAULT_DECIMALFORMAT = '%s'
  4 | cdef object DEFAULT_MAPPINGTYPES = (Mapping,)
  5 | cdef object DEFAULT_QUOTATIONMARK = '"'
  6 | 
  7 | 
  8 | cdef object _options_ascii(object datum, boolean expect_ascii=True):
  9 |     if datum is False:
 10 |         return None
 11 |     elif PyBytes_Check(datum):
 12 |         datum = unicode(datum, 'UTF-8', 'strict')
 13 |     elif not PyUnicode_Check(datum):
 14 |         raise TypeError('Expected str instance or False')
 15 | 
 16 |     PyUnicode_READY(datum)
 17 |     if expect_ascii and not PyUnicode_IS_ASCII(datum):
 18 |         raise ValueError('Expected ASCII data')
 19 | 
 20 |     return datum
 21 | 
 22 | 
 23 | cdef object _options_reduce_arg(object key, object value, object default):
 24 |     if value != default:
 25 |         if value is not None:
 26 |             return key, value
 27 |         else:
 28 |             return key, False
 29 | 
 30 | 
 31 | cdef object _option_from_ascii(object name, object value, object default):
 32 |     if value == default:
 33 |         return
 34 |     elif value is None:
 35 |         return f'{name}=False'
 36 |     else:
 37 |         return f'{name}={value!r}'
 38 | 
 39 | 
 40 | cdef _options_from_ascii(Options self):
 41 |     return ', '.join(filter(bool, (
 42 |         _option_from_ascii('quotationmark', self.quotationmark, DEFAULT_QUOTATIONMARK),
 43 |         _option_from_ascii('tojson', self.tojson, None),
 44 |     )))
 45 | 
 46 | 
 47 | @final
 48 | @no_gc
 49 | @freelist(8)
 50 | @auto_pickle(False)
 51 | cdef class Options:
 52 |     '''
 53 |     Customizations for the :func:`encoder_*(...) <pyjson5.encoder>` function family.
 54 | 
 55 |     Immutable. Use :meth:`Options.update(**kw) <pyjson5.Options.update>` to create a **new** Options instance.
 56 | 
 57 |     Parameters
 58 |     ----------
 59 |     quotationmark : str|None
 60 |         * **str**: One character string that is used to surround strings.
 61 |         * **None**: Use default: ``'"'``.
 62 |     tojson : str|False|None
 63 |         * **str:** A special method to call on objects to return a custom JSON encoded string. Must return ASCII data!
 64 |         * **False:** No such member exists. (Default.)
 65 |         * **None:** Use default.
 66 |     mappingtypes : Iterable[type]|False|None
 67 |         * **Iterable[type]:** Classes that should be encoded to objects. Must be iterable over their keys, and implement ``__getitem__``.
 68 |         * **False:** There are no objects. Any object will be encoded as list of keys as in list(obj).
 69 |         * **None:** Use default: ``[collections.abc.Mapping]``.
 70 |     '''
 71 |     cdef readonly unicode quotationmark
 72 |     '''The creation argument ``quotationmark``.
 73 |     '''
 74 |     cdef readonly unicode tojson
 75 |     '''The creation argument ``tojson``.
 76 |     ``None`` if ``False`` was specified.
 77 |     '''
 78 |     cdef readonly tuple mappingtypes
 79 |     '''The creation argument ``mappingtypes``.
 80 |     ``()`` if ``False`` was specified.
 81 |     '''
 82 | 
 83 |     def __reduce__(self):
 84 |         cdef object args = tuple(filter(bool, (
 85 |             _options_reduce_arg('quotationmark', self.quotationmark, DEFAULT_QUOTATIONMARK),
 86 |             _options_reduce_arg('tojson', self.tojson, None),
 87 |             _options_reduce_arg('mappingtypes', self.mappingtypes, DEFAULT_MAPPINGTYPES),
 88 |         )))
 89 |         return (_UnpickleOptions if args else Options), args
 90 | 
 91 |     def __repr__(self):
 92 |         cdef object repr_options = _options_from_ascii(self)
 93 |         cdef object repr_cls = (
 94 |             ''
 95 |             if self.mappingtypes == DEFAULT_MAPPINGTYPES else
 96 |             repr(DEFAULT_MAPPINGTYPES)
 97 |         )
 98 |         return (f'Options('
 99 |             f'{repr_options}'
100 |             f'{repr_options and repr_cls and ", "}'
101 |             f'{repr_cls}'
102 |         ')')
103 | 
104 |     def __str__(self):
105 |         return self.__repr__()
106 | 
107 |     def __cinit__(self, *,
108 |                   quotationmark=None,
109 |                   tojson=None, posinfinity=None, neginfinity=None, nan=None,
110 |                   decimalformat=None, intformat=None,
111 |                   mappingtypes=None):
112 |         cdef object cls
113 |         cdef object ex
114 | 
115 |         if quotationmark is None:
116 |             quotationmark = DEFAULT_QUOTATIONMARK
117 |         if tojson is None:
118 |             tojson = DEFAULT_TOJSON
119 |         if mappingtypes is None:
120 |             mappingtypes = DEFAULT_MAPPINGTYPES
121 | 
122 |         self.quotationmark = _options_ascii(quotationmark)
123 |         self.tojson = _options_ascii(tojson, False)
124 | 
125 |         if self.quotationmark is None or PyUnicode_GET_LENGTH(self.quotationmark) != 1:
126 |             raise TypeError('quotationmark must be one ASCII character.')
127 | 
128 |         if mappingtypes is False:
129 |             self.mappingtypes = ()
130 |         else:
131 |             self.mappingtypes = tuple(mappingtypes)
132 |             for cls in self.mappingtypes:
133 |                 if not PyType_Check(cls):
134 |                     raise TypeError('mappingtypes must be a sequence of types '
135 |                                     'or False')
136 | 
137 |     def update(self, *args, **kw):
138 |         '''
139 |         Creates a new Options instance by modifying some members.
140 |         '''
141 |         if kw:
142 |             return _to_options(self, kw)
143 |         else:
144 |             return self
145 | 
146 | 
147 | cdef Options DEFAULT_OPTIONS_OBJECT = Options()
148 | 
149 | 
150 | def _UnpickleOptions(*args):
151 |     if args:
152 |         return _to_options(None, dict(args))
153 |     else:
154 |         return DEFAULT_OPTIONS_OBJECT
155 | 
156 | 
157 | cdef object _to_options(Options arg, dict kw):
158 |     if arg is None:
159 |         if not kw:
160 |             return DEFAULT_OPTIONS_OBJECT
161 |         else:
162 |             return Options(**kw)
163 |     elif not kw:
164 |         return arg
165 | 
166 |     PyDict_SetDefault(kw, 'quotationmark', (<Options> arg).quotationmark)
167 |     PyDict_SetDefault(kw, 'tojson', (<Options> arg).tojson)
168 |     PyDict_SetDefault(kw, 'mappingtypes', (<Options> arg).mappingtypes)
169 | 
170 |     return Options(**kw)
171 | 


--------------------------------------------------------------------------------
/src/_escape_dct.hpp:
--------------------------------------------------------------------------------
  1 | const EscapeDct::Items EscapeDct::items = {
  2 |    { 6, '\\', 'u', '0', '0', '0', '0', 0 },  /* 0x00 '\x00' */
  3 |    { 6, '\\', 'u', '0', '0', '0', '1', 0 },  /* 0x01 '\x01' */
  4 |    { 6, '\\', 'u', '0', '0', '0', '2', 0 },  /* 0x02 '\x02' */
  5 |    { 6, '\\', 'u', '0', '0', '0', '3', 0 },  /* 0x03 '\x03' */
  6 |    { 6, '\\', 'u', '0', '0', '0', '4', 0 },  /* 0x04 '\x04' */
  7 |    { 6, '\\', 'u', '0', '0', '0', '5', 0 },  /* 0x05 '\x05' */
  8 |    { 6, '\\', 'u', '0', '0', '0', '6', 0 },  /* 0x06 '\x06' */
  9 |    { 6, '\\', 'u', '0', '0', '0', '7', 0 },  /* 0x07 '\x07' */
 10 |    { 2, '\\', 'b', 0, 0, 0, 0, 0         },  /* 0x08 '\x08' */
 11 |    { 2, '\\', 't', 0, 0, 0, 0, 0         },  /* 0x09 '\t' */
 12 |    { 2, '\\', 'n', 0, 0, 0, 0, 0         },  /* 0x0a '\n' */
 13 |    { 6, '\\', 'u', '0', '0', '0', 'b', 0 },  /* 0x0b '\x0b' */
 14 |    { 2, '\\', 'f', 0, 0, 0, 0, 0         },  /* 0x0c '\x0c' */
 15 |    { 2, '\\', 'r', 0, 0, 0, 0, 0         },  /* 0x0d '\r' */
 16 |    { 6, '\\', 'u', '0', '0', '0', 'e', 0 },  /* 0x0e '\x0e' */
 17 |    { 6, '\\', 'u', '0', '0', '0', 'f', 0 },  /* 0x0f '\x0f' */
 18 |    { 6, '\\', 'u', '0', '0', '1', '0', 0 },  /* 0x10 '\x10' */
 19 |    { 6, '\\', 'u', '0', '0', '1', '1', 0 },  /* 0x11 '\x11' */
 20 |    { 6, '\\', 'u', '0', '0', '1', '2', 0 },  /* 0x12 '\x12' */
 21 |    { 6, '\\', 'u', '0', '0', '1', '3', 0 },  /* 0x13 '\x13' */
 22 |    { 6, '\\', 'u', '0', '0', '1', '4', 0 },  /* 0x14 '\x14' */
 23 |    { 6, '\\', 'u', '0', '0', '1', '5', 0 },  /* 0x15 '\x15' */
 24 |    { 6, '\\', 'u', '0', '0', '1', '6', 0 },  /* 0x16 '\x16' */
 25 |    { 6, '\\', 'u', '0', '0', '1', '7', 0 },  /* 0x17 '\x17' */
 26 |    { 6, '\\', 'u', '0', '0', '1', '8', 0 },  /* 0x18 '\x18' */
 27 |    { 6, '\\', 'u', '0', '0', '1', '9', 0 },  /* 0x19 '\x19' */
 28 |    { 6, '\\', 'u', '0', '0', '1', 'a', 0 },  /* 0x1a '\x1a' */
 29 |    { 6, '\\', 'u', '0', '0', '1', 'b', 0 },  /* 0x1b '\x1b' */
 30 |    { 6, '\\', 'u', '0', '0', '1', 'c', 0 },  /* 0x1c '\x1c' */
 31 |    { 6, '\\', 'u', '0', '0', '1', 'd', 0 },  /* 0x1d '\x1d' */
 32 |    { 6, '\\', 'u', '0', '0', '1', 'e', 0 },  /* 0x1e '\x1e' */
 33 |    { 6, '\\', 'u', '0', '0', '1', 'f', 0 },  /* 0x1f '\x1f' */
 34 |    { 1, ' ', 0, 0, 0, 0, 0, 0            },  /* 0x20 ' ' */
 35 |    { 1, '!', 0, 0, 0, 0, 0, 0            },  /* 0x21 '!' */
 36 |    { 2, '\\', '"', 0, 0, 0, 0, 0         },  /* 0x22 '"' */
 37 |    { 1, '#', 0, 0, 0, 0, 0, 0            },  /* 0x23 '#' */
 38 |    { 1, '$', 0, 0, 0, 0, 0, 0            },  /* 0x24 '$' */
 39 |    { 1, '%', 0, 0, 0, 0, 0, 0            },  /* 0x25 '%' */
 40 |    { 6, '\\', 'u', '0', '0', '2', '6', 0 },  /* 0x26 '&' */
 41 |    { 6, '\\', 'u', '0', '0', '2', '7', 0 },  /* 0x27 "'" */
 42 |    { 1, '(', 0, 0, 0, 0, 0, 0            },  /* 0x28 '(' */
 43 |    { 1, ')', 0, 0, 0, 0, 0, 0            },  /* 0x29 ')' */
 44 |    { 1, '*', 0, 0, 0, 0, 0, 0            },  /* 0x2a '*' */
 45 |    { 1, '+', 0, 0, 0, 0, 0, 0            },  /* 0x2b '+' */
 46 |    { 1, ',', 0, 0, 0, 0, 0, 0            },  /* 0x2c ',' */
 47 |    { 1, '-', 0, 0, 0, 0, 0, 0            },  /* 0x2d '-' */
 48 |    { 1, '.', 0, 0, 0, 0, 0, 0            },  /* 0x2e '.' */
 49 |    { 1, '/', 0, 0, 0, 0, 0, 0            },  /* 0x2f '/' */
 50 |    { 1, '0', 0, 0, 0, 0, 0, 0            },  /* 0x30 '0' */
 51 |    { 1, '1', 0, 0, 0, 0, 0, 0            },  /* 0x31 '1' */
 52 |    { 1, '2', 0, 0, 0, 0, 0, 0            },  /* 0x32 '2' */
 53 |    { 1, '3', 0, 0, 0, 0, 0, 0            },  /* 0x33 '3' */
 54 |    { 1, '4', 0, 0, 0, 0, 0, 0            },  /* 0x34 '4' */
 55 |    { 1, '5', 0, 0, 0, 0, 0, 0            },  /* 0x35 '5' */
 56 |    { 1, '6', 0, 0, 0, 0, 0, 0            },  /* 0x36 '6' */
 57 |    { 1, '7', 0, 0, 0, 0, 0, 0            },  /* 0x37 '7' */
 58 |    { 1, '8', 0, 0, 0, 0, 0, 0            },  /* 0x38 '8' */
 59 |    { 1, '9', 0, 0, 0, 0, 0, 0            },  /* 0x39 '9' */
 60 |    { 1, ':', 0, 0, 0, 0, 0, 0            },  /* 0x3a ':' */
 61 |    { 1, ';', 0, 0, 0, 0, 0, 0            },  /* 0x3b ';' */
 62 |    { 6, '\\', 'u', '0', '0', '3', 'c', 0 },  /* 0x3c '<' */
 63 |    { 1, '=', 0, 0, 0, 0, 0, 0            },  /* 0x3d '=' */
 64 |    { 6, '\\', 'u', '0', '0', '3', 'e', 0 },  /* 0x3e '>' */
 65 |    { 1, '?', 0, 0, 0, 0, 0, 0            },  /* 0x3f '?' */
 66 |    { 1, '@', 0, 0, 0, 0, 0, 0            },  /* 0x40 '@' */
 67 |    { 1, 'A', 0, 0, 0, 0, 0, 0            },  /* 0x41 'A' */
 68 |    { 1, 'B', 0, 0, 0, 0, 0, 0            },  /* 0x42 'B' */
 69 |    { 1, 'C', 0, 0, 0, 0, 0, 0            },  /* 0x43 'C' */
 70 |    { 1, 'D', 0, 0, 0, 0, 0, 0            },  /* 0x44 'D' */
 71 |    { 1, 'E', 0, 0, 0, 0, 0, 0            },  /* 0x45 'E' */
 72 |    { 1, 'F', 0, 0, 0, 0, 0, 0            },  /* 0x46 'F' */
 73 |    { 1, 'G', 0, 0, 0, 0, 0, 0            },  /* 0x47 'G' */
 74 |    { 1, 'H', 0, 0, 0, 0, 0, 0            },  /* 0x48 'H' */
 75 |    { 1, 'I', 0, 0, 0, 0, 0, 0            },  /* 0x49 'I' */
 76 |    { 1, 'J', 0, 0, 0, 0, 0, 0            },  /* 0x4a 'J' */
 77 |    { 1, 'K', 0, 0, 0, 0, 0, 0            },  /* 0x4b 'K' */
 78 |    { 1, 'L', 0, 0, 0, 0, 0, 0            },  /* 0x4c 'L' */
 79 |    { 1, 'M', 0, 0, 0, 0, 0, 0            },  /* 0x4d 'M' */
 80 |    { 1, 'N', 0, 0, 0, 0, 0, 0            },  /* 0x4e 'N' */
 81 |    { 1, 'O', 0, 0, 0, 0, 0, 0            },  /* 0x4f 'O' */
 82 |    { 1, 'P', 0, 0, 0, 0, 0, 0            },  /* 0x50 'P' */
 83 |    { 1, 'Q', 0, 0, 0, 0, 0, 0            },  /* 0x51 'Q' */
 84 |    { 1, 'R', 0, 0, 0, 0, 0, 0            },  /* 0x52 'R' */
 85 |    { 1, 'S', 0, 0, 0, 0, 0, 0            },  /* 0x53 'S' */
 86 |    { 1, 'T', 0, 0, 0, 0, 0, 0            },  /* 0x54 'T' */
 87 |    { 1, 'U', 0, 0, 0, 0, 0, 0            },  /* 0x55 'U' */
 88 |    { 1, 'V', 0, 0, 0, 0, 0, 0            },  /* 0x56 'V' */
 89 |    { 1, 'W', 0, 0, 0, 0, 0, 0            },  /* 0x57 'W' */
 90 |    { 1, 'X', 0, 0, 0, 0, 0, 0            },  /* 0x58 'X' */
 91 |    { 1, 'Y', 0, 0, 0, 0, 0, 0            },  /* 0x59 'Y' */
 92 |    { 1, 'Z', 0, 0, 0, 0, 0, 0            },  /* 0x5a 'Z' */
 93 |    { 1, '[', 0, 0, 0, 0, 0, 0            },  /* 0x5b '[' */
 94 |    { 2, '\\', '\\', 0, 0, 0, 0, 0        },  /* 0x5c '\\' */
 95 |    { 1, ']', 0, 0, 0, 0, 0, 0            },  /* 0x5d ']' */
 96 |    { 1, '^', 0, 0, 0, 0, 0, 0            },  /* 0x5e '^' */
 97 |    { 1, '_', 0, 0, 0, 0, 0, 0            },  /* 0x5f '_' */
 98 |    { 1, '`', 0, 0, 0, 0, 0, 0            },  /* 0x60 '`' */
 99 |    { 1, 'a', 0, 0, 0, 0, 0, 0            },  /* 0x61 'a' */
100 |    { 1, 'b', 0, 0, 0, 0, 0, 0            },  /* 0x62 'b' */
101 |    { 1, 'c', 0, 0, 0, 0, 0, 0            },  /* 0x63 'c' */
102 |    { 1, 'd', 0, 0, 0, 0, 0, 0            },  /* 0x64 'd' */
103 |    { 1, 'e', 0, 0, 0, 0, 0, 0            },  /* 0x65 'e' */
104 |    { 1, 'f', 0, 0, 0, 0, 0, 0            },  /* 0x66 'f' */
105 |    { 1, 'g', 0, 0, 0, 0, 0, 0            },  /* 0x67 'g' */
106 |    { 1, 'h', 0, 0, 0, 0, 0, 0            },  /* 0x68 'h' */
107 |    { 1, 'i', 0, 0, 0, 0, 0, 0            },  /* 0x69 'i' */
108 |    { 1, 'j', 0, 0, 0, 0, 0, 0            },  /* 0x6a 'j' */
109 |    { 1, 'k', 0, 0, 0, 0, 0, 0            },  /* 0x6b 'k' */
110 |    { 1, 'l', 0, 0, 0, 0, 0, 0            },  /* 0x6c 'l' */
111 |    { 1, 'm', 0, 0, 0, 0, 0, 0            },  /* 0x6d 'm' */
112 |    { 1, 'n', 0, 0, 0, 0, 0, 0            },  /* 0x6e 'n' */
113 |    { 1, 'o', 0, 0, 0, 0, 0, 0            },  /* 0x6f 'o' */
114 |    { 1, 'p', 0, 0, 0, 0, 0, 0            },  /* 0x70 'p' */
115 |    { 1, 'q', 0, 0, 0, 0, 0, 0            },  /* 0x71 'q' */
116 |    { 1, 'r', 0, 0, 0, 0, 0, 0            },  /* 0x72 'r' */
117 |    { 1, 's', 0, 0, 0, 0, 0, 0            },  /* 0x73 's' */
118 |    { 1, 't', 0, 0, 0, 0, 0, 0            },  /* 0x74 't' */
119 |    { 1, 'u', 0, 0, 0, 0, 0, 0            },  /* 0x75 'u' */
120 |    { 1, 'v', 0, 0, 0, 0, 0, 0            },  /* 0x76 'v' */
121 |    { 1, 'w', 0, 0, 0, 0, 0, 0            },  /* 0x77 'w' */
122 |    { 1, 'x', 0, 0, 0, 0, 0, 0            },  /* 0x78 'x' */
123 |    { 1, 'y', 0, 0, 0, 0, 0, 0            },  /* 0x79 'y' */
124 |    { 1, 'z', 0, 0, 0, 0, 0, 0            },  /* 0x7a 'z' */
125 |    { 1, '{', 0, 0, 0, 0, 0, 0            },  /* 0x7b '{' */
126 |    { 1, '|', 0, 0, 0, 0, 0, 0            },  /* 0x7c '|' */
127 |    { 1, '}', 0, 0, 0, 0, 0, 0            },  /* 0x7d '}' */
128 |    { 1, '~', 0, 0, 0, 0, 0, 0            },  /* 0x7e '~' */
129 |    { 6, '\\', 'u', '0', '0', '7', 'f', 0 },  /* 0x7f '\x7f' */
130 |    { 6, '\\', 'u', '0', '0', '8', '0', 0 },  /* 0x80 '\x80' */
131 |    { 6, '\\', 'u', '0', '0', '8', '1', 0 },  /* 0x81 '\x81' */
132 |    { 6, '\\', 'u', '0', '0', '8', '2', 0 },  /* 0x82 '\x82' */
133 |    { 6, '\\', 'u', '0', '0', '8', '3', 0 },  /* 0x83 '\x83' */
134 |    { 6, '\\', 'u', '0', '0', '8', '4', 0 },  /* 0x84 '\x84' */
135 |    { 6, '\\', 'u', '0', '0', '8', '5', 0 },  /* 0x85 '\x85' */
136 |    { 6, '\\', 'u', '0', '0', '8', '6', 0 },  /* 0x86 '\x86' */
137 |    { 6, '\\', 'u', '0', '0', '8', '7', 0 },  /* 0x87 '\x87' */
138 |    { 6, '\\', 'u', '0', '0', '8', '8', 0 },  /* 0x88 '\x88' */
139 |    { 6, '\\', 'u', '0', '0', '8', '9', 0 },  /* 0x89 '\x89' */
140 |    { 6, '\\', 'u', '0', '0', '8', 'a', 0 },  /* 0x8a '\x8a' */
141 |    { 6, '\\', 'u', '0', '0', '8', 'b', 0 },  /* 0x8b '\x8b' */
142 |    { 6, '\\', 'u', '0', '0', '8', 'c', 0 },  /* 0x8c '\x8c' */
143 |    { 6, '\\', 'u', '0', '0', '8', 'd', 0 },  /* 0x8d '\x8d' */
144 |    { 6, '\\', 'u', '0', '0', '8', 'e', 0 },  /* 0x8e '\x8e' */
145 |    { 6, '\\', 'u', '0', '0', '8', 'f', 0 },  /* 0x8f '\x8f' */
146 |    { 6, '\\', 'u', '0', '0', '9', '0', 0 },  /* 0x90 '\x90' */
147 |    { 6, '\\', 'u', '0', '0', '9', '1', 0 },  /* 0x91 '\x91' */
148 |    { 6, '\\', 'u', '0', '0', '9', '2', 0 },  /* 0x92 '\x92' */
149 |    { 6, '\\', 'u', '0', '0', '9', '3', 0 },  /* 0x93 '\x93' */
150 |    { 6, '\\', 'u', '0', '0', '9', '4', 0 },  /* 0x94 '\x94' */
151 |    { 6, '\\', 'u', '0', '0', '9', '5', 0 },  /* 0x95 '\x95' */
152 |    { 6, '\\', 'u', '0', '0', '9', '6', 0 },  /* 0x96 '\x96' */
153 |    { 6, '\\', 'u', '0', '0', '9', '7', 0 },  /* 0x97 '\x97' */
154 |    { 6, '\\', 'u', '0', '0', '9', '8', 0 },  /* 0x98 '\x98' */
155 |    { 6, '\\', 'u', '0', '0', '9', '9', 0 },  /* 0x99 '\x99' */
156 |    { 6, '\\', 'u', '0', '0', '9', 'a', 0 },  /* 0x9a '\x9a' */
157 |    { 6, '\\', 'u', '0', '0', '9', 'b', 0 },  /* 0x9b '\x9b' */
158 |    { 6, '\\', 'u', '0', '0', '9', 'c', 0 },  /* 0x9c '\x9c' */
159 |    { 6, '\\', 'u', '0', '0', '9', 'd', 0 },  /* 0x9d '\x9d' */
160 |    { 6, '\\', 'u', '0', '0', '9', 'e', 0 },  /* 0x9e '\x9e' */
161 |    { 6, '\\', 'u', '0', '0', '9', 'f', 0 },  /* 0x9f '\x9f' */
162 |    { 6, '\\', 'u', '0', '0', 'a', '0', 0 },  /* 0xa0 '\xa0' */
163 |    { 6, '\\', 'u', '0', '0', 'a', '1', 0 },  /* 0xa1 '¡' */
164 |    { 6, '\\', 'u', '0', '0', 'a', '2', 0 },  /* 0xa2 '¢' */
165 |    { 6, '\\', 'u', '0', '0', 'a', '3', 0 },  /* 0xa3 '£' */
166 |    { 6, '\\', 'u', '0', '0', 'a', '4', 0 },  /* 0xa4 '¤' */
167 |    { 6, '\\', 'u', '0', '0', 'a', '5', 0 },  /* 0xa5 '¥' */
168 |    { 6, '\\', 'u', '0', '0', 'a', '6', 0 },  /* 0xa6 '¦' */
169 |    { 6, '\\', 'u', '0', '0', 'a', '7', 0 },  /* 0xa7 '§' */
170 |    { 6, '\\', 'u', '0', '0', 'a', '8', 0 },  /* 0xa8 '¨' */
171 |    { 6, '\\', 'u', '0', '0', 'a', '9', 0 },  /* 0xa9 '©' */
172 |    { 6, '\\', 'u', '0', '0', 'a', 'a', 0 },  /* 0xaa 'ª' */
173 |    { 6, '\\', 'u', '0', '0', 'a', 'b', 0 },  /* 0xab '«' */
174 |    { 6, '\\', 'u', '0', '0', 'a', 'c', 0 },  /* 0xac '¬' */
175 |    { 6, '\\', 'u', '0', '0', 'a', 'd', 0 },  /* 0xad '\xad' */
176 |    { 6, '\\', 'u', '0', '0', 'a', 'e', 0 },  /* 0xae '®' */
177 |    { 6, '\\', 'u', '0', '0', 'a', 'f', 0 },  /* 0xaf '¯' */
178 |    { 6, '\\', 'u', '0', '0', 'b', '0', 0 },  /* 0xb0 '°' */
179 |    { 6, '\\', 'u', '0', '0', 'b', '1', 0 },  /* 0xb1 '±' */
180 |    { 6, '\\', 'u', '0', '0', 'b', '2', 0 },  /* 0xb2 '²' */
181 |    { 6, '\\', 'u', '0', '0', 'b', '3', 0 },  /* 0xb3 '³' */
182 |    { 6, '\\', 'u', '0', '0', 'b', '4', 0 },  /* 0xb4 '´' */
183 |    { 6, '\\', 'u', '0', '0', 'b', '5', 0 },  /* 0xb5 'µ' */
184 |    { 6, '\\', 'u', '0', '0', 'b', '6', 0 },  /* 0xb6 '¶' */
185 |    { 6, '\\', 'u', '0', '0', 'b', '7', 0 },  /* 0xb7 '·' */
186 |    { 6, '\\', 'u', '0', '0', 'b', '8', 0 },  /* 0xb8 '¸' */
187 |    { 6, '\\', 'u', '0', '0', 'b', '9', 0 },  /* 0xb9 '¹' */
188 |    { 6, '\\', 'u', '0', '0', 'b', 'a', 0 },  /* 0xba 'º' */
189 |    { 6, '\\', 'u', '0', '0', 'b', 'b', 0 },  /* 0xbb '»' */
190 |    { 6, '\\', 'u', '0', '0', 'b', 'c', 0 },  /* 0xbc '¼' */
191 |    { 6, '\\', 'u', '0', '0', 'b', 'd', 0 },  /* 0xbd '½' */
192 |    { 6, '\\', 'u', '0', '0', 'b', 'e', 0 },  /* 0xbe '¾' */
193 |    { 6, '\\', 'u', '0', '0', 'b', 'f', 0 },  /* 0xbf '¿' */
194 |    { 6, '\\', 'u', '0', '0', 'c', '0', 0 },  /* 0xc0 'À' */
195 |    { 6, '\\', 'u', '0', '0', 'c', '1', 0 },  /* 0xc1 'Á' */
196 |    { 6, '\\', 'u', '0', '0', 'c', '2', 0 },  /* 0xc2 'Â' */
197 |    { 6, '\\', 'u', '0', '0', 'c', '3', 0 },  /* 0xc3 'Ã' */
198 |    { 6, '\\', 'u', '0', '0', 'c', '4', 0 },  /* 0xc4 'Ä' */
199 |    { 6, '\\', 'u', '0', '0', 'c', '5', 0 },  /* 0xc5 'Å' */
200 |    { 6, '\\', 'u', '0', '0', 'c', '6', 0 },  /* 0xc6 'Æ' */
201 |    { 6, '\\', 'u', '0', '0', 'c', '7', 0 },  /* 0xc7 'Ç' */
202 |    { 6, '\\', 'u', '0', '0', 'c', '8', 0 },  /* 0xc8 'È' */
203 |    { 6, '\\', 'u', '0', '0', 'c', '9', 0 },  /* 0xc9 'É' */
204 |    { 6, '\\', 'u', '0', '0', 'c', 'a', 0 },  /* 0xca 'Ê' */
205 |    { 6, '\\', 'u', '0', '0', 'c', 'b', 0 },  /* 0xcb 'Ë' */
206 |    { 6, '\\', 'u', '0', '0', 'c', 'c', 0 },  /* 0xcc 'Ì' */
207 |    { 6, '\\', 'u', '0', '0', 'c', 'd', 0 },  /* 0xcd 'Í' */
208 |    { 6, '\\', 'u', '0', '0', 'c', 'e', 0 },  /* 0xce 'Î' */
209 |    { 6, '\\', 'u', '0', '0', 'c', 'f', 0 },  /* 0xcf 'Ï' */
210 |    { 6, '\\', 'u', '0', '0', 'd', '0', 0 },  /* 0xd0 'Ð' */
211 |    { 6, '\\', 'u', '0', '0', 'd', '1', 0 },  /* 0xd1 'Ñ' */
212 |    { 6, '\\', 'u', '0', '0', 'd', '2', 0 },  /* 0xd2 'Ò' */
213 |    { 6, '\\', 'u', '0', '0', 'd', '3', 0 },  /* 0xd3 'Ó' */
214 |    { 6, '\\', 'u', '0', '0', 'd', '4', 0 },  /* 0xd4 'Ô' */
215 |    { 6, '\\', 'u', '0', '0', 'd', '5', 0 },  /* 0xd5 'Õ' */
216 |    { 6, '\\', 'u', '0', '0', 'd', '6', 0 },  /* 0xd6 'Ö' */
217 |    { 6, '\\', 'u', '0', '0', 'd', '7', 0 },  /* 0xd7 '×' */
218 |    { 6, '\\', 'u', '0', '0', 'd', '8', 0 },  /* 0xd8 'Ø' */
219 |    { 6, '\\', 'u', '0', '0', 'd', '9', 0 },  /* 0xd9 'Ù' */
220 |    { 6, '\\', 'u', '0', '0', 'd', 'a', 0 },  /* 0xda 'Ú' */
221 |    { 6, '\\', 'u', '0', '0', 'd', 'b', 0 },  /* 0xdb 'Û' */
222 |    { 6, '\\', 'u', '0', '0', 'd', 'c', 0 },  /* 0xdc 'Ü' */
223 |    { 6, '\\', 'u', '0', '0', 'd', 'd', 0 },  /* 0xdd 'Ý' */
224 |    { 6, '\\', 'u', '0', '0', 'd', 'e', 0 },  /* 0xde 'Þ' */
225 |    { 6, '\\', 'u', '0', '0', 'd', 'f', 0 },  /* 0xdf 'ß' */
226 |    { 6, '\\', 'u', '0', '0', 'e', '0', 0 },  /* 0xe0 'à' */
227 |    { 6, '\\', 'u', '0', '0', 'e', '1', 0 },  /* 0xe1 'á' */
228 |    { 6, '\\', 'u', '0', '0', 'e', '2', 0 },  /* 0xe2 'â' */
229 |    { 6, '\\', 'u', '0', '0', 'e', '3', 0 },  /* 0xe3 'ã' */
230 |    { 6, '\\', 'u', '0', '0', 'e', '4', 0 },  /* 0xe4 'ä' */
231 |    { 6, '\\', 'u', '0', '0', 'e', '5', 0 },  /* 0xe5 'å' */
232 |    { 6, '\\', 'u', '0', '0', 'e', '6', 0 },  /* 0xe6 'æ' */
233 |    { 6, '\\', 'u', '0', '0', 'e', '7', 0 },  /* 0xe7 'ç' */
234 |    { 6, '\\', 'u', '0', '0', 'e', '8', 0 },  /* 0xe8 'è' */
235 |    { 6, '\\', 'u', '0', '0', 'e', '9', 0 },  /* 0xe9 'é' */
236 |    { 6, '\\', 'u', '0', '0', 'e', 'a', 0 },  /* 0xea 'ê' */
237 |    { 6, '\\', 'u', '0', '0', 'e', 'b', 0 },  /* 0xeb 'ë' */
238 |    { 6, '\\', 'u', '0', '0', 'e', 'c', 0 },  /* 0xec 'ì' */
239 |    { 6, '\\', 'u', '0', '0', 'e', 'd', 0 },  /* 0xed 'í' */
240 |    { 6, '\\', 'u', '0', '0', 'e', 'e', 0 },  /* 0xee 'î' */
241 |    { 6, '\\', 'u', '0', '0', 'e', 'f', 0 },  /* 0xef 'ï' */
242 |    { 6, '\\', 'u', '0', '0', 'f', '0', 0 },  /* 0xf0 'ð' */
243 |    { 6, '\\', 'u', '0', '0', 'f', '1', 0 },  /* 0xf1 'ñ' */
244 |    { 6, '\\', 'u', '0', '0', 'f', '2', 0 },  /* 0xf2 'ò' */
245 |    { 6, '\\', 'u', '0', '0', 'f', '3', 0 },  /* 0xf3 'ó' */
246 |    { 6, '\\', 'u', '0', '0', 'f', '4', 0 },  /* 0xf4 'ô' */
247 |    { 6, '\\', 'u', '0', '0', 'f', '5', 0 },  /* 0xf5 'õ' */
248 |    { 6, '\\', 'u', '0', '0', 'f', '6', 0 },  /* 0xf6 'ö' */
249 |    { 6, '\\', 'u', '0', '0', 'f', '7', 0 },  /* 0xf7 '÷' */
250 |    { 6, '\\', 'u', '0', '0', 'f', '8', 0 },  /* 0xf8 'ø' */
251 |    { 6, '\\', 'u', '0', '0', 'f', '9', 0 },  /* 0xf9 'ù' */
252 |    { 6, '\\', 'u', '0', '0', 'f', 'a', 0 },  /* 0xfa 'ú' */
253 |    { 6, '\\', 'u', '0', '0', 'f', 'b', 0 },  /* 0xfb 'û' */
254 |    { 6, '\\', 'u', '0', '0', 'f', 'c', 0 },  /* 0xfc 'ü' */
255 |    { 6, '\\', 'u', '0', '0', 'f', 'd', 0 },  /* 0xfd 'ý' */
256 |    { 6, '\\', 'u', '0', '0', 'f', 'e', 0 },  /* 0xfe 'þ' */
257 |    { 6, '\\', 'u', '0', '0', 'f', 'f', 0 },  /* 0xff 'ÿ' */
258 | };
259 | const std::uint64_t EscapeDct::is_escaped_lo = UINT64_C(0x500000c4ffffffff);
260 | const std::uint64_t EscapeDct::is_escaped_hi = UINT64_C(0x8000000010000000);
261 | 


--------------------------------------------------------------------------------
/src/_exceptions.pyx:
--------------------------------------------------------------------------------
 1 | @auto_pickle(False)
 2 | cdef class Json5Exception(Exception):
 3 |     '''
 4 |     Base class of any exception thrown by PyJSON5.
 5 |     '''
 6 |     def __init__(self, message=None, *args):
 7 |         super().__init__(message, *args)
 8 | 
 9 |     @property
10 |     def message(self):
11 |         '''Human readable error description'''
12 |         return self.args[0]
13 | 


--------------------------------------------------------------------------------
/src/_exceptions_decoder.pyx:
--------------------------------------------------------------------------------
 1 | @auto_pickle(False)
 2 | cdef class Json5DecoderException(Json5Exception):
 3 |     '''
 4 |     Base class of any exception thrown by the parser.
 5 |     '''
 6 |     def __init__(self, message=None, result=None, *args):
 7 |         super().__init__(message, result, *args)
 8 | 
 9 |     @property
10 |     def result(self):
11 |         '''Deserialized data up until now.'''
12 |         return self.args[1]
13 | 
14 | 
15 | @final
16 | @auto_pickle(False)
17 | cdef class Json5NestingTooDeep(Json5DecoderException):
18 |     '''
19 |     The maximum nesting level on the input data was exceeded.
20 |     '''
21 | 
22 | 
23 | @final
24 | @auto_pickle(False)
25 | cdef class Json5EOF(Json5DecoderException):
26 |     '''
27 |     The input ended prematurely.
28 |     '''
29 | 
30 | 
31 | @final
32 | @auto_pickle(False)
33 | cdef class Json5IllegalCharacter(Json5DecoderException):
34 |     '''
35 |     An unexpected character was encountered.
36 |     '''
37 |     def __init__(self, message=None, result=None, character=None, *args):
38 |         super().__init__(message, result, character, *args)
39 | 
40 |     @property
41 |     def character(self):
42 |         '''
43 |         Illegal character.
44 |         '''
45 |         return self.args[2]
46 | 
47 | 
48 | @final
49 | @auto_pickle(False)
50 | cdef class Json5ExtraData(Json5DecoderException):
51 |     '''
52 |     The input contained extranous data.
53 |     '''
54 |     def __init__(self, message=None, result=None, character=None, *args):
55 |         super().__init__(message, result, character, *args)
56 | 
57 |     @property
58 |     def character(self):
59 |         '''
60 |         Extranous character.
61 |         '''
62 |         return self.args[2]
63 | 
64 | 
65 | @final
66 | @auto_pickle(False)
67 | cdef class Json5IllegalType(Json5DecoderException):
68 |     '''
69 |     The user supplied callback function returned illegal data.
70 |     '''
71 |     def __init__(self, message=None, result=None, value=None, *args):
72 |         super().__init__(message, result, value, *args)
73 | 
74 |     @property
75 |     def value(self):
76 |         '''
77 |         Value that caused the problem.
78 |         '''
79 |         return self.args[2]
80 | 
81 | 
82 | @final
83 | @auto_pickle(False)
84 | cdef class _DecoderException(Exception):
85 |     cdef object cls
86 |     cdef object msg
87 |     cdef object extra
88 |     cdef object result
89 | 
90 |     def __cinit__(self, cls, msg, extra, result):
91 |         self.cls = cls
92 |         self.msg = msg
93 |         self.extra = extra
94 |         self.result = result
95 | 


--------------------------------------------------------------------------------
/src/_exceptions_encoder.pyx:
--------------------------------------------------------------------------------
 1 | @auto_pickle(False)
 2 | cdef class Json5EncoderException(Json5Exception):
 3 |     '''
 4 |     Base class of any exception thrown by the serializer.
 5 |     '''
 6 | 
 7 | 
 8 | @auto_pickle(False)
 9 | cdef class Json5UnstringifiableType(Json5EncoderException):
10 |     '''
11 |     The encoder was not able to stringify the input, or it was told not to by the supplied ``Options``.
12 |     '''
13 |     def __init__(self, message=None, unstringifiable=None):
14 |         super().__init__(message, unstringifiable)
15 | 
16 |     @property
17 |     def unstringifiable(self):
18 |         '''
19 |         The value that caused the problem.
20 |         '''
21 |         return self.args[1]
22 | 


--------------------------------------------------------------------------------
/src/_exports.pyx:
--------------------------------------------------------------------------------
  1 | global DEFAULT_MAX_NESTING_LEVEL, __all__, __author__, __doc__, __license__, __version__
  2 | 
  3 | DEFAULT_MAX_NESTING_LEVEL = 32
  4 | '''
  5 | Maximum nesting level of data to decode if no ``maxdepth`` argument is specified.
  6 | '''
  7 | 
  8 | __version__ = PyUnicode_FromKindAndData(PyUnicode_1BYTE_KIND, VERSION, VERSION_LENGTH)
  9 | '''
 10 | Current library version.
 11 | '''
 12 | 
 13 | 
 14 | def decode(object data, object maxdepth=None, object some=False):
 15 |     '''
 16 |     Decodes JSON5 serialized data from an :class:`str` object.
 17 | 
 18 |     .. code:: python
 19 | 
 20 |         decode('["Hello", "world!"]') == ['Hello', 'world!']
 21 | 
 22 |     Parameters
 23 |     ----------
 24 |     data : str
 25 |         JSON5 serialized data
 26 |     maxdepth : Optional[int]
 27 |         Maximum nesting level before are the parsing is aborted.
 28 | 
 29 |         * If ``None`` is supplied, then the value of the global variable \
 30 |         ``DEFAULT_MAX_NESTING_LEVEL`` is used instead.
 31 |         * If the value is ``0``, then only literals are accepted, e.g. ``false``, \
 32 |         ``47.11``, or ``"string"``.
 33 |         * If the value is negative, then the any nesting level is allowed until \
 34 |         Python's recursion limit is hit.
 35 |     some : bool
 36 |         Allow trailing junk.
 37 | 
 38 |     Raises
 39 |     ------
 40 |     Json5DecoderException
 41 |         An exception occured while decoding.
 42 |     TypeError
 43 |         An argument had a wrong type.
 44 | 
 45 |     Returns
 46 |     -------
 47 |     object
 48 |         Deserialized data.
 49 |     '''
 50 |     if maxdepth is None:
 51 |         maxdepth = DEFAULT_MAX_NESTING_LEVEL
 52 | 
 53 |     if isinstance(data, unicode):
 54 |         return _decode_unicode(data, maxdepth, bool(some))
 55 |     else:
 56 |         raise TypeError(f'type(data) == {type(data)!r} not supported')
 57 | 
 58 | 
 59 | def decode_latin1(object data, object maxdepth=None, object some=False):
 60 |     '''
 61 |     Decodes JSON5 serialized data from a :class:`bytes` object.
 62 | 
 63 |     .. code:: python
 64 | 
 65 |         decode_latin1(b'["Hello", "world!"]') == ['Hello', 'world!']
 66 | 
 67 |     Parameters
 68 |     ----------
 69 |     data : bytes
 70 |         JSON5 serialized data, encoded as Latin-1 or ASCII.
 71 |     maxdepth : Optional[int]
 72 |         see :func:`decode(…) <pyjson5.decode>`
 73 |     some : bool
 74 |         see :func:`decode(…) <pyjson5.decode>`
 75 | 
 76 |     Raises
 77 |     ------
 78 |     Json5DecoderException
 79 |         An exception occured while decoding.
 80 |     TypeError
 81 |         An argument had a wrong type.
 82 | 
 83 |     Returns
 84 |     -------
 85 |     object
 86 |         see :func:`decode(…) <pyjson5.decode>`
 87 |     '''
 88 |     return decode_buffer(data, maxdepth, bool(some), 1)
 89 | 
 90 | 
 91 | def decode_utf8(object data, object maxdepth=None, object some=False):
 92 |     '''
 93 |     Decodes JSON5 serialized data from a :class:`bytes` object.
 94 | 
 95 |     .. code:: python
 96 | 
 97 |         decode_utf8(b'["H\\xe2\\x82\\xacllo", "w\\xc3\\xb6rld!"]') == ['H€llo', 'wörld!']
 98 | 
 99 |     Parameters
100 |     ----------
101 |     data : bytes
102 |         JSON5 serialized data, encoded as UTF-8 or ASCII.
103 |     maxdepth : Optional[int]
104 |         see :func:`decode(…) <pyjson5.decode>`
105 |     some : bool
106 |         see :func:`decode(…) <pyjson5.decode>`
107 | 
108 |     Raises
109 |     ------
110 |     Json5DecoderException
111 |         An exception occured while decoding.
112 |     TypeError
113 |         An argument had a wrong type.
114 | 
115 |     Returns
116 |     -------
117 |     object
118 |         see :func:`decode(…) <pyjson5.decode>`
119 |     '''
120 |     return decode_buffer(data, maxdepth, bool(some), 0)
121 | 
122 | 
123 | def decode_buffer(object obj, object maxdepth=None, object some=False,
124 |                   object wordlength=None):
125 |     '''
126 |     Decodes JSON5 serialized data from an object that supports the buffer protocol,
127 |     e.g. :class:`bytearray`.
128 | 
129 |     .. code:: python
130 | 
131 |         obj = memoryview(b'["Hello", "world!"]')
132 | 
133 |         decode_buffer(obj) == ['Hello', 'world!']
134 | 
135 |     Parameters
136 |     ----------
137 |     data : object
138 |         JSON5 serialized data.
139 |         The argument must support Python's buffer protocol, i.e.
140 |         :class:`memoryview(…) <memoryview>` must work. The buffer must be contigious.
141 |     maxdepth : Optional[int]
142 |         see :func:`decode(…) <pyjson5.decode>`
143 |     some : bool
144 |         see :func:`decode(…) <pyjson5.decode>`
145 |     wordlength : Optional[int]
146 |         Must be 0, 1, 2, 4 to denote UTF-8, UCS1, USC2 or USC4 data, resp.
147 |         Surrogates are not supported. Decode the data to an :class:`str` if need be.
148 |         If ``None`` is supplied, then the buffer's ``itemsize`` is used.
149 | 
150 |     Raises
151 |     ------
152 |     Json5DecoderException
153 |         An exception occured while decoding.
154 |     TypeError
155 |         An argument had a wrong type.
156 |     ValueError
157 |         The value of ``wordlength`` was invalid.
158 | 
159 |     Returns
160 |     -------
161 |     object
162 |         see :func:`decode(…) <pyjson5.decode>`
163 |     '''
164 |     cdef Py_buffer view
165 | 
166 |     if maxdepth is None:
167 |         maxdepth = DEFAULT_MAX_NESTING_LEVEL
168 | 
169 |     PyObject_GetBuffer(obj, &view, PyBUF_CONTIG_RO)
170 |     try:
171 |         if wordlength is None:
172 |             wordlength = view.itemsize
173 |         return _decode_buffer(view, wordlength, maxdepth, bool(some))
174 |     finally:
175 |         PyBuffer_Release(&view)
176 | 
177 | 
178 | def decode_callback(object cb, object maxdepth=None, object some=False,
179 |                     object args=None):
180 |     '''
181 |     Decodes JSON5 serialized data by invoking a callback.
182 | 
183 |     .. code:: python
184 | 
185 |         cb = iter('["Hello","world!"]').__next__
186 | 
187 |         decode_callback(cb) == ['Hello', 'world!']
188 | 
189 |     Parameters
190 |     ----------
191 |     cb : Callable[Any, Union[str|bytes|bytearray|int|None]]
192 |         A function to get values from.
193 |         The functions is called like ``cb(*args)``, and it returns:
194 | 
195 |         * **str, bytes, bytearray:** \
196 |             ``len(…) == 0`` denotes exhausted input. \
197 |             ``len(…) == 1`` is the next character.
198 |         * **int:** \
199 |             ``< 0`` denotes exhausted input. \
200 |            ``>= 0`` is the ordinal value of the next character.
201 |         * **None:** \
202 |             input exhausted
203 |     maxdepth : Optional[int]
204 |         see :func:`decode(…) <pyjson5.decode>`
205 |     some : bool
206 |         see :func:`decode(…) <pyjson5.decode>`
207 |     args : Optional[Iterable[Any]]
208 |         Arguments to call ``cb`` with.
209 | 
210 |     Raises
211 |     ------
212 |     Json5DecoderException
213 |         An exception occured while decoding.
214 |     TypeError
215 |         An argument had a wrong type.
216 | 
217 |     Returns
218 |     -------
219 |     object
220 |         see :func:`decode(…) <pyjson5.decode>`
221 |     '''
222 |     if not callable(cb):
223 |         raise TypeError(f'type(cb)=={type(cb)!r} is not callable')
224 | 
225 |     if maxdepth is None:
226 |         maxdepth = DEFAULT_MAX_NESTING_LEVEL
227 | 
228 |     if args:
229 |         args = tuple(args)
230 |     else:
231 |         args = ()
232 | 
233 |     return _decode_callback(cb, args, maxdepth, bool(some))
234 | 
235 | 
236 | def decode_io(object fp, object maxdepth=None, object some=True):
237 |     '''
238 |     Decodes JSON5 serialized data from a file-like object.
239 | 
240 |     .. code:: python
241 | 
242 |         fp = io.StringIO("""
243 |             ['Hello', /* TODO look into specs whom to greet */]
244 |             'Wolrd' // FIXME: look for typos
245 |         """)
246 | 
247 |         decode_io(fp) == ['Hello']
248 |         decode_io(fp) == 'Wolrd'
249 | 
250 |         fp.seek(0)
251 | 
252 |         decode_io(fp, some=False)
253 |         # raises Json5ExtraData('Extra data U+0027 near 56', ['Hello'], "'")
254 | 
255 |     Parameters
256 |     ----------
257 |     fp : IOBase
258 |         A file-like object to parse from.
259 |     maxdepth : Optional[int] = None
260 |         see :func:`decode(…) <pyjson5.decode>`
261 |     some : bool
262 |         see :func:`decode(…) <pyjson5.decode>`
263 | 
264 |     Raises
265 |     ------
266 |     Json5DecoderException
267 |         An exception occured while decoding.
268 |     TypeError
269 |         An argument had a wrong type.
270 | 
271 |     Returns
272 |     -------
273 |     object
274 |         see :func:`decode(…) <pyjson5.decode>`
275 |     '''
276 |     if not isinstance(fp, IOBase):
277 |         raise TypeError(f'type(fp)=={type(fp)!r} is not IOBase compatible')
278 |     elif not fp.readable():
279 |         raise TypeError(f'fp is not readable')
280 |     elif fp.closed:
281 |         raise TypeError(f'fp is closed')
282 | 
283 |     if maxdepth is None:
284 |         maxdepth = DEFAULT_MAX_NESTING_LEVEL
285 | 
286 |     return _decode_callback(fp.read, (1,), maxdepth, bool(some))
287 | 
288 | 
289 | def encode(object data, *, options=None, **options_kw):
290 |     '''
291 |     Serializes a Python object as a JSON5 compatible string.
292 | 
293 |     .. code:: python
294 | 
295 |         encode(['Hello', 'world!']) == '["Hello","world!"]'
296 | 
297 |     Parameters
298 |     ----------
299 |     data : object
300 |         Python object to serialize.
301 |     options : Optional[Options]
302 |         Extra options for the encoder.
303 |         If ``options`` **and** ``options_kw`` are specified, then ``options.update(**options_kw)`` is used.
304 |     options_kw
305 |         See Option's arguments.
306 | 
307 |     Raises
308 |     ------
309 |     Json5EncoderException
310 |         An exception occured while encoding.
311 |     TypeError
312 |         An argument had a wrong type.
313 | 
314 |     Returns
315 |     -------
316 |     str
317 |         Unless ``float('inf')`` or ``float('nan')`` is encountered, the result
318 |         will be valid JSON data (as of RFC8259).
319 | 
320 |         The result is always ASCII. All characters outside of the ASCII range
321 |         are escaped.
322 | 
323 |         The result safe to use in an HTML template, e.g.
324 |         ``<a onclick='alert({{ encode(url) }})'>show message</a>``.
325 |         Apostrophes ``"'"`` are encoded as ``"\\u0027"``, less-than,
326 |         greater-than, and ampersand likewise.
327 |     '''
328 |     cdef void *temp
329 |     cdef object result
330 |     cdef Py_ssize_t start = (
331 |         <Py_ssize_t> <void*> &(<AsciiObject*> NULL).data[0]
332 |     )
333 |     cdef Py_ssize_t length
334 |     cdef object opts = _to_options(options, options_kw)
335 |     cdef WriterReallocatable writer = WriterReallocatable(
336 |         Writer(
337 |             _WriterReallocatable_reserve,
338 |             _WriterReallocatable_append_c,
339 |             _WriterReallocatable_append_s,
340 |             <PyObject*> opts,
341 |         ),
342 |         start, 0, NULL,
343 |     )
344 | 
345 |     try:
346 |         if expect(_encode(writer.base, data) < 0, False):
347 |             exception_thrown()
348 | 
349 |         length = writer.position - start
350 |         if length <= 0:
351 |             # impossible
352 |             return u''
353 | 
354 |         temp = ObjectRealloc(writer.obj, writer.position + 1)
355 |         if temp is not NULL:
356 |             writer.obj = temp
357 |         (<char*> writer.obj)[writer.position] = 0
358 | 
359 |         result = ObjectInit(<PyObject*> writer.obj, unicode)
360 |         writer.obj = NULL
361 | 
362 |         (<PyASCIIObject*> result).length = length
363 |         reset_hash(<PyASCIIObject*> result)
364 |         reset_wstr(<PyASCIIObject*> result)
365 |         (<PyASCIIObject*> result).state.interned = SSTATE_NOT_INTERNED
366 |         (<PyASCIIObject*> result).state.kind = PyUnicode_1BYTE_KIND
367 |         (<PyASCIIObject*> result).state.compact = True
368 |         set_ready(<PyASCIIObject*> result)
369 |         (<PyASCIIObject*> result).state.ascii = True
370 | 
371 |         return result
372 |     finally:
373 |         if writer.obj is not NULL:
374 |             ObjectFree(writer.obj)
375 | 
376 | 
377 | def encode_bytes(object data, *, options=None, **options_kw):
378 |     '''
379 |     Serializes a Python object to a JSON5 compatible bytes string.
380 | 
381 |     .. code:: python
382 | 
383 |         encode_bytes(['Hello', 'world!']) == b'["Hello","world!"]'
384 | 
385 |     Parameters
386 |     ----------
387 |     data : object
388 |         see :func:`encode(…) <pyjson5.encode>`
389 |     options : Optional[Options]
390 |         see :func:`encode(…) <pyjson5.encode>`
391 |     options_kw
392 |         see :func:`encode(…) <pyjson5.encode>`
393 | 
394 |     Raises
395 |     ------
396 |     Json5EncoderException
397 |         An exception occured while encoding.
398 |     TypeError
399 |         An argument had a wrong type.
400 | 
401 |     Returns
402 |     -------
403 |     bytes
404 |         see :func:`encode(…) <pyjson5.encode>`
405 |     '''
406 |     cdef void *temp
407 |     cdef object result
408 |     cdef Py_ssize_t start = (
409 |         <Py_ssize_t> <void*> &(<PyBytesObject*> NULL).ob_sval[0]
410 |     )
411 |     cdef Py_ssize_t length
412 |     cdef object opts = _to_options(options, options_kw)
413 |     cdef WriterReallocatable writer = WriterReallocatable(
414 |         Writer(
415 |             _WriterReallocatable_reserve,
416 |             _WriterReallocatable_append_c,
417 |             _WriterReallocatable_append_s,
418 |             <PyObject*> opts,
419 |         ),
420 |         start, 0, NULL,
421 |     )
422 | 
423 |     try:
424 |         if expect(_encode(writer.base, data) < 0, False):
425 |             exception_thrown()
426 | 
427 |         length = writer.position - start
428 |         if length <= 0:
429 |             # impossible
430 |             return b''
431 | 
432 |         temp = ObjectRealloc(writer.obj, writer.position + 1)
433 |         if temp is not NULL:
434 |             writer.obj = temp
435 |         (<char*> writer.obj)[writer.position] = 0
436 | 
437 |         result = <object> <PyObject*> ObjectInitVar(
438 |             (<PyVarObject*> writer.obj), bytes, length,
439 |         )
440 |         writer.obj = NULL
441 | 
442 |         reset_hash(<PyBytesObject*> result)
443 | 
444 |         return result
445 |     finally:
446 |         if writer.obj is not NULL:
447 |             ObjectFree(writer.obj)
448 | 
449 | 
450 | def encode_callback(object data, object cb, object supply_bytes=False, *,
451 |                     options=None, **options_kw):
452 |     '''
453 |     Serializes a Python object into a callback function.
454 | 
455 |     The callback function ``cb`` gets called with single characters and strings
456 |     until the input ``data`` is fully serialized.
457 | 
458 |     .. code:: python
459 | 
460 |         encode_callback(['Hello', 'world!'], print)
461 |         #prints:
462 |         # [
463 |         # "
464 |         # Hello
465 |         # "
466 |         # ,
467 |         # "
468 |         # world!
469 |         # "
470 |         " ]
471 | 
472 |     Parameters
473 |     ----------
474 |     data : object
475 |         see :func:`encode(…) <pyjson5.encode>`
476 |     cb : Callable[[Union[bytes|str]], None]
477 |         A callback function.
478 |         Depending on the truthyness of ``supply_bytes`` either :class:`bytes` or
479 |         :class:`str` is supplied.
480 |     supply_bytes : bool
481 |         Call ``cb(…)`` with a :class:`bytes` argument if true,
482 |         otherwise :class:`str`.
483 |     options : Optional[Options]
484 |         see :func:`encode(…) <pyjson5.encode>`
485 |     options_kw
486 |         see :func:`encode(…) <pyjson5.encode>`
487 | 
488 |     Raises
489 |     ------
490 |     Json5EncoderException
491 |         An exception occured while encoding.
492 |     TypeError
493 |         An argument had a wrong type.
494 | 
495 |     Returns
496 |     -------
497 |     Callable[[Union[bytes|str]], None]
498 |         The supplied argument ``cb``.
499 |     '''
500 |     cdef int (*encoder)(object obj, object cb, object options) except -1
501 |     cdef Options opts = _to_options(options, options_kw)
502 | 
503 |     if supply_bytes:
504 |         encoder = _encode_callback_bytes
505 |     else:
506 |         encoder = _encode_callback_str
507 | 
508 |     encoder(data, cb, options=opts)
509 | 
510 |     return cb
511 | 
512 | 
513 | def encode_io(object data, object fp, object supply_bytes=True, *,
514 |               options=None, **options_kw):
515 |     '''
516 |     Serializes a Python object into a file-object.
517 | 
518 |     The return value of :meth:`fp.write(…) <io.BufferedWriter.write>` is not checked.
519 |     If ``fp`` is unbuffered, then the result will be garbage!
520 | 
521 |     Parameters
522 |     ----------
523 |     data : object
524 |         see :func:`encode(…) <pyjson5.encode>`
525 |     fp : IOBase
526 |         A file-like object to serialize into.
527 |     supply_bytes : bool
528 |         Call :meth:`fp.write(…) <io.BufferedWriter.write>` with a :class:`bytes` argument if true,
529 |         otherwise :class:`str`.
530 |     options : Optional[Options]
531 |         see :func:`encode(…) <pyjson5.encode>`
532 |     options_kw
533 |         see :func:`encode(…) <pyjson5.encode>`
534 | 
535 |     Raises
536 |     ------
537 |     Json5EncoderException
538 |         An exception occured while encoding.
539 |     TypeError
540 |         An argument had a wrong type.
541 | 
542 |     Returns
543 |     -------
544 |     IOBase
545 |         The supplied argument ``fp``.
546 |     '''
547 |     cdef int (*encoder)(object obj, object cb, object options) except -1
548 |     cdef object opts = _to_options(options, options_kw)
549 | 
550 |     if not isinstance(fp, IOBase):
551 |         raise TypeError(f'type(fp)=={type(fp)!r} is not IOBase compatible')
552 |     elif not fp.writable():
553 |         raise TypeError(f'fp is not writable')
554 |     elif fp.closed:
555 |         raise TypeError(f'fp is closed')
556 | 
557 |     if supply_bytes:
558 |         encoder = _encode_callback_bytes
559 |     else:
560 |         encoder = _encode_callback_str
561 | 
562 |     encoder(data, fp.write, options=opts)
563 | 
564 |     return fp
565 | 
566 | 
567 | def encode_noop(object data, *, options=None, **options_kw):
568 |     '''
569 |     Test if the input is serializable.
570 | 
571 |     Most likely you want to serialize ``data`` directly, and catch exceptions
572 |     instead of using this function!
573 | 
574 |     .. code:: python
575 | 
576 |         encode_noop({47: 11}) == True
577 |         encode_noop({47: object()}) == False
578 | 
579 |     Parameters
580 |     ----------
581 |     data : object
582 |         see :func:`encode(…) <pyjson5.encode>`
583 |     options : Optional[Options]
584 |         see :func:`encode(…) <pyjson5.encode>`
585 |     options_kw
586 |         see :func:`encode(…) <pyjson5.encode>`
587 | 
588 |     Returns
589 |     -------
590 |     bool
591 |         ``True`` iff ``data`` is serializable.
592 |     '''
593 |     cdef object opts = _to_options(options, options_kw)
594 |     cdef Writer writer = Writer(
595 |         _WriterNoop_reserve,
596 |         _WriterNoop_append_c,
597 |         _WriterNoop_append_s,
598 |         <PyObject*> opts,
599 |     )
600 | 
601 |     if expect(_encode(writer, data) < 0, False):
602 |         exception_thrown()
603 | 
604 |     return True
605 | 
606 | 
607 | __all__ = (
608 |     # DECODE
609 |     'decode', 'decode_utf8', 'decode_latin1', 'decode_buffer', 'decode_callback', 'decode_io',
610 |     # ENCODE
611 |     'encode', 'encode_bytes', 'encode_callback', 'encode_io', 'encode_noop', 'Options',
612 |     # LEGACY
613 |     'loads', 'load', 'dumps', 'dump',
614 |     # EXCEPTIONS
615 |     'Json5Exception',
616 |     'Json5EncoderException', 'Json5UnstringifiableType',
617 |     'Json5DecoderException', 'Json5NestingTooDeep', 'Json5EOF', 'Json5IllegalCharacter', 'Json5ExtraData', 'Json5IllegalType',
618 | )
619 | 
620 | __doc__ = PyUnicode_FromKindAndData(PyUnicode_1BYTE_KIND, LONGDESCRIPTION, LONGDESCRIPTION_LENGTH)
621 | 
622 | __license__ = 'MIT OR Apache-2.0'
623 | 
624 | __author__ = '2018-2025 René Kijewski <pypi.org@k6i.de>'
625 | 


--------------------------------------------------------------------------------
/src/_imports.pyx:
--------------------------------------------------------------------------------
  1 | from cython import final, no_gc, auto_pickle, freelist
  2 | from cpython cimport dict, int, list, long, tuple, type
  3 | from cpython.bool cimport PyBool_Check
  4 | from cpython.buffer cimport PyObject_GetBuffer, PyBUF_CONTIG_RO, PyBuffer_Release
  5 | from cpython.bytes cimport (
  6 |     PyBytes_AsStringAndSize, PyBytes_FromStringAndSize, PyBytes_Check,
  7 | )
  8 | from cpython.dict cimport PyDict_SetItem
  9 | from cpython.float cimport PyFloat_Check, PyFloat_AsDouble, PyFloat_FromDouble
 10 | from cpython.list cimport PyList_Append
 11 | from cpython.long cimport PyLong_FromString, PyLong_Check
 12 | from cpython.object cimport PyObject, PyObject_GetIter
 13 | from cpython.type cimport PyType_Check
 14 | from cpython.unicode cimport PyUnicode_Check, PyUnicode_FromEncodedObject, PyUnicode_Format
 15 | from libcpp cimport bool as boolean
 16 | 
 17 | 
 18 | cdef extern from '<cstddef>' namespace 'std' nogil:
 19 |     ctypedef unsigned long size_t
 20 | 
 21 | 
 22 | cdef extern from '<cstdint>' namespace 'std' nogil:
 23 |     ctypedef unsigned char uint8_t
 24 |     ctypedef unsigned short uint16_t
 25 |     ctypedef unsigned long uint32_t
 26 |     ctypedef unsigned long long uint64_t
 27 | 
 28 |     ctypedef signed char int8_t
 29 |     ctypedef signed short int16_t
 30 |     ctypedef signed long int32_t
 31 |     ctypedef signed long long int64_t
 32 | 
 33 | 
 34 | cdef extern from '<cstdio>' namespace 'std' nogil:
 35 |     int snprintf(char *buffer, size_t buf_size, const char *format, ...)
 36 |     size_t strlen(const char *s)
 37 | 
 38 | 
 39 | cdef extern from '<cstring>' namespace 'std' nogil:
 40 |     void memcpy(void *dest, const void *std, size_t count)
 41 |     void memset(void *dest, char value, size_t count)
 42 |     size_t strlen(const char *s)
 43 | 
 44 | 
 45 | cdef extern from '<cmath>' nogil:
 46 |     enum:
 47 |         FP_INFINITE, FP_NAN, FP_NORMAL, FP_SUBNORMAL, FP_ZERO
 48 | 
 49 | cdef extern from '<cmath>' namespace 'std' nogil:
 50 |     int fpclassify(...)
 51 | 
 52 | 
 53 | cdef extern from '<utility>' namespace 'std' nogil:
 54 |     void swap[T](T&, T&)
 55 | 
 56 | 
 57 | cdef extern from 'Python.h':
 58 |     ctypedef signed char Py_UCS1
 59 |     ctypedef signed short Py_UCS2
 60 |     ctypedef signed long Py_UCS4
 61 | 
 62 | 
 63 | cdef extern from 'src/native.hpp' namespace 'JSON5EncoderCpp' nogil:
 64 |     int32_t cast_to_int32(...)
 65 |     uint32_t cast_to_uint32(...)
 66 | 
 67 |     ctypedef boolean AlwaysTrue
 68 |     boolean obj_has_iter(object obj)
 69 | 
 70 |     ctypedef char EscapeDctItem[8]
 71 |     cppclass EscapeDct:
 72 |         EscapeDctItem items[0x100]
 73 |         boolean is_escaped(uint32_t c)
 74 |         Py_ssize_t find_unescaped_range(const Py_UCS1 *start, Py_ssize_t length)
 75 |         Py_ssize_t find_unescaped_range(const Py_UCS2 *start, Py_ssize_t length)
 76 |         Py_ssize_t find_unescaped_range(const Py_UCS4 *start, Py_ssize_t length)
 77 |     EscapeDct ESCAPE_DCT
 78 | 
 79 |     enum:
 80 |         VERSION_LENGTH
 81 |     const char VERSION[]
 82 | 
 83 |     enum:
 84 |         LONGDESCRIPTION_LENGTH
 85 |     const char LONGDESCRIPTION[]
 86 | 
 87 |     const char HEX[]
 88 | 
 89 |     boolean unicode_is_lo_surrogate(uint32_t ch)
 90 |     boolean unicode_is_hi_surrogate(uint32_t ch)
 91 |     uint32_t unicode_join_surrogates(uint32_t hi, uint32_t lo)
 92 | 
 93 |     void reset_hash[T](T *obj)
 94 |     void reset_wstr[T](T *obj)
 95 |     void set_ready[T](T *obj)
 96 |     AlwaysTrue exception_thrown() except True
 97 |     void unreachable()
 98 | 
 99 | 
100 | cdef extern from 'src/native.hpp' namespace 'JSON5EncoderCpp':
101 |     int iter_next(object iterator, PyObject **value) except -1
102 | 
103 | 
104 | cdef extern from 'src/native.hpp' nogil:
105 |     boolean expect 'JSON5EncoderCpp_expect'(boolean actual, boolean expected)
106 | 
107 | 
108 | cdef extern from 'src/_unicode_cat_of.hpp' namespace 'JSON5EncoderCpp' nogil:
109 |     unsigned unicode_cat_of(uint32_t codepoint)
110 | 
111 | 
112 | cdef extern from 'src/_stack_heap_string.hpp' namespace 'JSON5EncoderCpp' nogil:
113 |     cdef cppclass StackHeapString [T]:
114 |         const T *data()
115 |         Py_ssize_t size()
116 |         boolean push_back(T codepoint) except False
117 | 
118 | 
119 | cdef extern from 'src/_decoder_recursive_select.hpp' namespace 'JSON5EncoderCpp' nogil:
120 |     cdef enum DrsKind:
121 |         DRS_fail,
122 |         DRS_null, DRS_true, DRS_false, DRS_inf, DRS_nan,
123 |         DRS_string, DRS_number, DRS_recursive
124 | 
125 |     DrsKind drs_lookup[128]
126 | 
127 | 
128 | cdef extern from 'third-party/fast_double_parser/include/fast_double_parser.h' namespace 'fast_double_parser' nogil:
129 |     const char *parse_number(const char *p, double *outDouble)
130 | 
131 | 
132 | cdef extern from 'src/dragonbox.cc' namespace 'dragonbox' nogil:
133 |     char *Dtoa(char* buffer, double value)
134 | 
135 | 
136 | cdef extern from 'Python.h':
137 |     enum:
138 |         PyUnicode_WCHAR_KIND
139 |         PyUnicode_1BYTE_KIND
140 |         PyUnicode_2BYTE_KIND
141 |         PyUnicode_4BYTE_KIND
142 | 
143 |     int PyUnicode_READY(object o) except -1
144 |     Py_ssize_t PyUnicode_GET_LENGTH(object o) nogil
145 |     int PyUnicode_KIND(object o) nogil
146 |     boolean PyUnicode_IS_ASCII(object) nogil
147 |     Py_UCS1 *PyUnicode_1BYTE_DATA(object o) nogil
148 |     Py_UCS2 *PyUnicode_2BYTE_DATA(object o) nogil
149 |     Py_UCS4 *PyUnicode_4BYTE_DATA(object o) nogil
150 | 
151 |     boolean Py_EnterRecursiveCall(const char *where) except True
152 |     void Py_LeaveRecursiveCall()
153 | 
154 |     bint Py_UNICODE_ISALPHA(Py_UCS4 ch) nogil
155 |     bint Py_UNICODE_ISDIGIT(Py_UCS4 ch) nogil
156 | 
157 |     object PyUnicode_FromKindAndData(int kind, const void *buf, Py_ssize_t size)
158 |     const char *PyUnicode_AsUTF8AndSize(object o, Py_ssize_t *size) except NULL
159 | 
160 |     object PyDict_SetDefault(object p, object key, object value)
161 | 
162 |     object CallFunction 'PyObject_CallFunction'(PyObject *cb, const char *format, ...)
163 |     object CallObject 'PyObject_CallObject'(PyObject *cb, PyObject *args)
164 | 
165 |     ctypedef signed long Py_hash
166 |     ctypedef signed short wchar_t
167 | 
168 |     enum:
169 |         SSTATE_NOT_INTERNED
170 |         SSTATE_INTERNED_MORTAL
171 |         SSTATE_INTERNED_IMMORTAL
172 | 
173 |     ctypedef struct __ascii_object_state:
174 |         uint8_t interned
175 |         uint8_t kind
176 |         boolean compact
177 |         boolean ascii
178 |         boolean ready
179 | 
180 |     ctypedef struct PyASCIIObject:
181 |         Py_ssize_t length
182 |         Py_hash hash
183 |         wchar_t *wstr
184 |         __ascii_object_state state
185 | 
186 |     ctypedef struct PyVarObject:
187 |         pass
188 | 
189 |     ctypedef struct PyBytesObject:
190 |         PyVarObject ob_base
191 |         Py_hash ob_shash
192 |         char ob_sval[1]
193 | 
194 |     AlwaysTrue ErrNoMemory 'PyErr_NoMemory'() except True
195 |     void *ObjectRealloc 'PyObject_Realloc'(void *p, size_t n)
196 |     void ObjectFree 'PyObject_Free'(void *p)
197 |     object ObjectInit 'PyObject_INIT'(PyObject *obj, type cls)
198 |     PyVarObject *ObjectInitVar 'PyObject_InitVar'(PyVarObject *obj, type cls, Py_ssize_t size)
199 |     object PyObject_GenericGetDict(object o, void *context)
200 | 
201 |     object PyLong_FromString(const char *str, char **pend, int base)
202 | 
203 | 
204 | ctypedef struct AsciiObject:
205 |     PyASCIIObject base
206 |     char data[1]
207 | 
208 | 
209 | cdef extern from * nogil:
210 |     enum:
211 |         CYTHON_COMPILING_IN_PYPY
212 | 
213 | 
214 | cdef type datetime, date, time, Decimal, Mapping, IOBase
215 | cdef object saferepr
216 | 
217 | from collections.abc import Mapping
218 | from datetime import datetime, date, time
219 | from decimal import Decimal
220 | from io import IOBase
221 | from pprint import saferepr
222 | 


--------------------------------------------------------------------------------
/src/_legacy.pyx:
--------------------------------------------------------------------------------
  1 | def loads(s, *, encoding='UTF-8', **kw):
  2 |     '''
  3 |     Decodes JSON5 serialized data from a string.
  4 | 
  5 |     Use :func:`decode(…) <pyjson5.decode>` instead!
  6 | 
  7 |     .. code:: python
  8 | 
  9 |         loads(s) == decode(s)
 10 | 
 11 |     Parameters
 12 |     ----------
 13 |     s : object
 14 |         Unless the argument is an ``str``, it gets decoded according to the
 15 |         parameter ``encoding``.
 16 |     encoding : str
 17 |         Codec to use if ``s`` is not an ``str``.
 18 |     kw
 19 |         Silently ignored.
 20 | 
 21 |     Returns
 22 |     -------
 23 |     object
 24 |         see :func:`decode(…) <pyjson5.decode>`
 25 |     '''
 26 |     if not isinstance(s, unicode):
 27 |         s = PyUnicode_FromEncodedObject(s, encoding, 'strict')
 28 |     return decode(s)
 29 | 
 30 | 
 31 | def load(fp, **kw):
 32 |     '''
 33 |     Decodes JSON5 serialized data from a file-like object.
 34 | 
 35 |     Use :func:`decode_io(…) <pyjson5.decode_io>` instead!
 36 | 
 37 |     .. code:: python
 38 | 
 39 |         load(fp) == decode_io(fp, None, False)
 40 | 
 41 |     Parameters
 42 |     ----------
 43 |     fp : IOBase
 44 |         A file-like object to parse from.
 45 |     kw
 46 |         Silently ignored.
 47 | 
 48 |     Returns
 49 |     -------
 50 |     object
 51 |         see :func:`decode_io(…) <pyjson5.decode_io>`
 52 |     '''
 53 |     return decode_io(fp, None, False)
 54 | 
 55 | 
 56 | def dumps(obj, **kw):
 57 |     '''
 58 |     Serializes a Python object to a JSON5 compatible string.
 59 | 
 60 |     Use :func:`encode(…) <pyjson5.encode>` instead!
 61 | 
 62 |     .. code:: python
 63 | 
 64 |         dumps(obj) == encode(obj)
 65 | 
 66 |     Parameters
 67 |     ----------
 68 |     obj : object
 69 |         Python object to serialize.
 70 |     kw
 71 |         Silently ignored.
 72 | 
 73 |     Returns
 74 |     -------
 75 |     str
 76 |         see :func:`encode(…) <pyjson5.encode>`
 77 |     '''
 78 |     return encode(obj)
 79 | 
 80 | 
 81 | def dump(object obj, object fp, **kw):
 82 |     '''
 83 |     Serializes a Python object to a JSON5 compatible string.
 84 | 
 85 |     Use :func:`encode_io(…) <pyjson5.encode_io>` instead!
 86 | 
 87 |     .. code:: python
 88 | 
 89 |         dump(obj, fp) == encode_io(obj, fp)
 90 | 
 91 |     Parameters
 92 |     ----------
 93 |     obj : object
 94 |         Python object to serialize.
 95 |     fp : IOBase
 96 |         A file-like object to serialize into.
 97 |     kw
 98 |         Silently ignored.
 99 |     '''
100 |     encode_io(obj, fp)
101 | 


--------------------------------------------------------------------------------
/src/_raise_decoder.pyx:
--------------------------------------------------------------------------------
 1 | cdef AlwaysTrue _raise_decoder(cls, msg, extra=None, result=None) except True:
 2 |     raise _DecoderException(cls, msg, extra, result)
 3 | 
 4 | 
 5 | cdef AlwaysTrue _raise_unclosed(const char *what, Py_ssize_t start) except True:
 6 |     return _raise_decoder(
 7 |         Json5EOF,
 8 |         f'Unclosed {what} starting near {start}',
 9 |     )
10 | 
11 | 
12 | cdef AlwaysTrue _raise_no_data(Py_ssize_t where) except True:
13 |     return _raise_decoder(
14 |         Json5EOF,
15 |         f'No JSON data found near {where}',
16 |     )
17 | 
18 | 
19 | cdef AlwaysTrue _raise_stray_character(const char *what, Py_ssize_t where) except True:
20 |     return _raise_decoder(
21 |         Json5IllegalCharacter,
22 |         f'Stray {what} near {where}',
23 |         what,
24 |     )
25 | 
26 | 
27 | cdef AlwaysTrue _raise_expected_sc(const char *char_a, uint32_t char_b, Py_ssize_t near, uint32_t found) except True:
28 |     return _raise_decoder(
29 |         Json5IllegalCharacter,
30 |         f'Expected {char_a} or U+{char_b:04x} near {near}, found U+{found:04x}',
31 |         f'{found:c}',
32 |     )
33 | 
34 | 
35 | cdef AlwaysTrue _raise_expected_s(const char *char_a, Py_ssize_t near, uint32_t found) except True:
36 |     return _raise_decoder(
37 |         Json5IllegalCharacter,
38 |         f'Expected {char_a} near {near}, found U+{found:04x}',
39 |         f'{found:c}',
40 |     )
41 | 
42 | 
43 | cdef AlwaysTrue _raise_expected_c(uint32_t char_a, Py_ssize_t near, uint32_t found) except True:
44 |     return _raise_decoder(
45 |         Json5IllegalCharacter,
46 |         f'Expected U+{char_a:04x} near {near}, found U+{found:04x}',
47 |         f'{found:c}',
48 |     )
49 | 
50 | 
51 | cdef AlwaysTrue _raise_extra_data(uint32_t found, Py_ssize_t where) except True:
52 |     return _raise_decoder(
53 |         Json5ExtraData,
54 |         f'Extra data U+{found:04X} near {where}',
55 |         f'{found:c}',
56 |     )
57 | 
58 | 
59 | cdef AlwaysTrue _raise_unframed_data(uint32_t found, Py_ssize_t where) except True:
60 |     return _raise_decoder(
61 |         Json5ExtraData,
62 |         f'Lost unframed data near {where}',
63 |         f'{found:c}',
64 |     )
65 | 
66 | 
67 | cdef AlwaysTrue _raise_nesting(Py_ssize_t where, object result=None) except True:
68 |     return _raise_decoder(
69 |         Json5NestingTooDeep,
70 |         f'Maximum nesting level exceeded near {where}',
71 |         None,
72 |         result,
73 |     )
74 | 
75 | 
76 | cdef AlwaysTrue _raise_not_ord(object value, Py_ssize_t where) except True:
77 |     return _raise_decoder(
78 |         Json5IllegalType,
79 |         f'type(value)=={type(value)!r} not in (int, str, bytes) near {where} or the value is not valid.',
80 |         value,
81 |     )
82 | 


--------------------------------------------------------------------------------
/src/_raise_encoder.pyx:
--------------------------------------------------------------------------------
1 | cdef AlwaysTrue _raise_unstringifiable(object data) except True:
2 |     raise Json5UnstringifiableType(f'Unstringifiable type(data)={type(data)!r}', data)
3 | 
4 | 
5 | cdef AlwaysTrue _raise_illegal_wordlength(int32_t wordlength) except True:
6 |     raise ValueError(f'wordlength must be 1, 2 or 4, not {wordlength!r}')
7 | 


--------------------------------------------------------------------------------
/src/_reader_callback.pyx:
--------------------------------------------------------------------------------
 1 | cdef struct ReaderCallbackBase:
 2 |     Py_ssize_t position
 3 |     Py_ssize_t maxdepth
 4 |     
 5 | 
 6 | cdef struct ReaderCallback:
 7 |     ReaderCallbackBase base
 8 |     PyObject *callback
 9 |     PyObject *args
10 |     int32_t lookahead
11 | 
12 | ctypedef ReaderCallback &ReaderCallbackRef
13 | 
14 | 
15 | cdef inline uint32_t _reader_Callback_get(ReaderCallbackRef self):
16 |     cdef int32_t c = self.lookahead
17 | 
18 |     self.lookahead = -1
19 |     self.base.position += 1
20 | 
21 |     return cast_to_uint32(c)
22 | 
23 | 
24 | cdef int32_t _reader_Callback_good(ReaderCallbackRef self) except -1:
25 |     cdef Py_ssize_t c = -1
26 | 
27 |     if self.lookahead >= 0:
28 |         return True
29 | 
30 |     cdef object value = CallObject(self.callback, self.args)
31 |     if (value is None) or (value is False):
32 |         return False
33 | 
34 |     if isinstance(value, int):
35 |         c = value
36 |     elif isinstance(value, ORD_CLASSES):
37 |         if not value:
38 |             return False
39 |         c = ord(value)
40 |     else:
41 |         _raise_not_ord(value, self.base.position)
42 | 
43 |     if c < 0:
44 |         return False
45 |     elif c > 0x10ffff:
46 |         _raise_not_ord(value, self.base.position)
47 | 
48 |     self.lookahead = c
49 | 
50 |     return True
51 | 


--------------------------------------------------------------------------------
/src/_reader_ucs.pyx:
--------------------------------------------------------------------------------
 1 | cdef struct ReaderUCS:
 2 |     Py_ssize_t remaining
 3 |     Py_ssize_t position
 4 |     Py_ssize_t maxdepth
 5 | 
 6 | 
 7 | cdef struct ReaderUCS1:
 8 |     ReaderUCS base
 9 |     const Py_UCS1 *string
10 | 
11 | 
12 | cdef struct ReaderUCS2:
13 |     ReaderUCS base
14 |     const Py_UCS2 *string
15 | 
16 | 
17 | cdef struct ReaderUCS4:
18 |     ReaderUCS base
19 |     const Py_UCS4 *string
20 | 
21 | 
22 | cdef struct ReaderUTF8:
23 |     ReaderUCS base
24 |     const Py_UCS1 *string
25 | 
26 | 
27 | ctypedef ReaderUCS1 &ReaderUCS1Ref
28 | ctypedef ReaderUCS2 &ReaderUCS2Ref
29 | ctypedef ReaderUCS4 &ReaderUCS4Ref
30 | ctypedef ReaderUTF8 &ReaderUTF8Ref
31 | 
32 | ctypedef Py_UCS1 *UCS1String
33 | ctypedef Py_UCS2 *UCS2String
34 | ctypedef Py_UCS4 *UCS4String
35 | 
36 | ctypedef fused ReaderUCSRef:
37 |     ReaderUCS1Ref
38 |     ReaderUCS2Ref
39 |     ReaderUCS4Ref
40 |     ReaderUTF8Ref
41 | 
42 | ctypedef fused UCSString:
43 |     UCS1String
44 |     UCS2String
45 |     UCS4String
46 | 
47 | 
48 | cdef inline int32_t _reader_ucs_good(ReaderUCSRef self):
49 |     return self.base.remaining > 0
50 | 
51 | 
52 | cdef inline uint32_t _reader_ucs_get(ReaderUCSRef self):
53 |     cdef int32_t c = self.string[0]
54 | 
55 |     self.string += 1
56 |     self.base.remaining -= 1
57 |     self.base.position += 1
58 | 
59 |     return cast_to_uint32(c)
60 | 
61 | 
62 | cdef inline uint32_t _reader_utf8_get(ReaderUCSRef self):
63 |     cdef uint32_t c0 = _reader_ucs_get(self)
64 |     cdef unsigned int n
65 | 
66 |     if (c0 & 0b1_0000000) == 0b0_0000000:    # ASCII
67 |         return c0
68 |     elif (c0 & 0b11_000000) == 0b10_000000:  # broken continuation
69 |         return c0
70 |     elif (c0 & 0b111_00000) == 0b110_00000:  # 2 bytes
71 |         c0 = (c0 & 0b000_11111)
72 |         n = 1
73 |     elif (c0 & 0b1111_0000) == 0b1110_0000:  # 3 bytes
74 |         c0 = (c0 & 0b0000_1111)
75 |         n = 2
76 |     elif (c0 & 0b11111_000) == 0b11110_000:  # 4 bytes
77 |         c0 = (c0 & 0b00000_111)
78 |         n = 3
79 |     else:  # 5+ bytes, invalid
80 |         return c0
81 | 
82 |     for n in range(n, 0, -1):
83 |         if not _reader_ucs_good(self):
84 |             return c0
85 | 
86 |         c0 = (c0 << 6) | (_reader_ucs_get(self) & 0b00_111111)
87 | 
88 |     return c0
89 | 


--------------------------------------------------------------------------------
/src/_readers.pyx:
--------------------------------------------------------------------------------
 1 | ctypedef fused ReaderRef:
 2 |     ReaderUCSRef
 3 |     ReaderCallbackRef
 4 | 
 5 | 
 6 | cdef boolean _reader_enter(ReaderRef self) except False:
 7 |     if self.base.maxdepth == 0:
 8 |         _raise_nesting(_reader_tell(self))
 9 | 
10 |     Py_EnterRecursiveCall(' while decoding nested JSON5 object')
11 | 
12 |     self.base.maxdepth -= 1
13 | 
14 |     return True
15 | 
16 | 
17 | cdef void _reader_leave(ReaderRef self):
18 |     Py_LeaveRecursiveCall()
19 |     self.base.maxdepth += 1
20 | 
21 | 
22 | cdef inline Py_ssize_t _reader_tell(ReaderRef self):
23 |     return self.base.position
24 | 
25 | 
26 | cdef inline uint32_t _reader_get(ReaderRef self):
27 |     cdef uint32_t c0
28 |     if ReaderRef is ReaderUTF8Ref:
29 |         c0 = _reader_utf8_get(self)
30 |     elif ReaderRef in ReaderUCSRef:
31 |         c0 = _reader_ucs_get(self)
32 |     elif ReaderRef is ReaderCallbackRef:
33 |         c0 = _reader_Callback_get(self)
34 |     return c0
35 | 
36 | 
37 | cdef int32_t _reader_good(ReaderRef self) except -1:
38 |     if ReaderRef in ReaderUCSRef:
39 |         return _reader_ucs_good(self)
40 |     elif ReaderRef is ReaderCallbackRef:
41 |         return _reader_Callback_good(self)
42 | 


--------------------------------------------------------------------------------
/src/_stack_heap_string.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef JSON5EncoderCpp_StackHeapString
 2 | #define JSON5EncoderCpp_StackHeapString
 3 | 
 4 | #include <cstdint>
 5 | #include <cstring>
 6 | #include <Python.h>
 7 | 
 8 | 
 9 | namespace JSON5EncoderCpp {
10 | inline namespace {
11 | 
12 | static constexpr Py_ssize_t StackHeapStringStackSize = 64;
13 | static constexpr Py_ssize_t StackHeapStringHeapSize = 256;
14 | static constexpr Py_ssize_t StackHeapStringHeapFactor = 4;
15 | 
16 | 
17 | template <class T>
18 | class StackHeapString {
19 |     StackHeapString(const StackHeapString&) = delete;
20 |     StackHeapString(StackHeapString&&) = delete;
21 |     StackHeapString &operator =(const StackHeapString&) = delete;
22 |     StackHeapString &operator =(StackHeapString&&) = delete;
23 | 
24 | public:
25 |     StackHeapString() = default;
26 | 
27 |     ~StackHeapString() {
28 |         if (m_heap != nullptr) {
29 |             PyMem_RawFree(m_heap);
30 |         }
31 |     }
32 | 
33 |     const T *data() const& {
34 |         if (JSON5EncoderCpp_expect(m_heap == nullptr, true)) {
35 |             return m_stack;
36 |         } else {
37 |             return m_heap;
38 |         }
39 |     }
40 | 
41 |     Py_ssize_t size() const& {
42 |         return m_size;
43 |     }
44 | 
45 |     bool push_back(T c) {
46 |         if (JSON5EncoderCpp_expect(m_left == 0, false)) {
47 |             if (m_heap == nullptr) {
48 |                 void *new_ptr = PyMem_RawMalloc(sizeof(T) * StackHeapStringHeapSize);
49 |                 if (new_ptr == nullptr) {
50 |                     PyErr_NoMemory();
51 |                     return false;
52 |                 }
53 | 
54 |                 m_heap = reinterpret_cast<T*>(new_ptr);
55 |                 m_left = StackHeapStringHeapSize - StackHeapStringStackSize;
56 |                 std::memcpy(m_heap, m_stack, sizeof(T) * StackHeapStringStackSize);
57 |             } else {
58 |                 void *new_ptr = PyMem_RawRealloc(m_heap, sizeof(T) * (m_size * StackHeapStringHeapFactor));
59 |                 if (new_ptr == nullptr) {
60 |                     PyErr_NoMemory();
61 |                     return false;
62 |                 }
63 | 
64 |                 m_heap = reinterpret_cast<T*>(new_ptr);
65 |                 m_left = m_size * (StackHeapStringHeapFactor - 1);
66 |             }
67 |         }
68 | 
69 |         if (JSON5EncoderCpp_expect(m_heap == nullptr, true)) {
70 |             m_stack[m_size] = c;
71 |         } else {
72 |             m_heap[m_size] = c;
73 |         }
74 | 
75 |         ++m_size;
76 |         --m_left;
77 |         return true;
78 |     }
79 | 
80 | private:
81 |     Py_ssize_t m_size = 0;
82 |     Py_ssize_t m_left = StackHeapStringStackSize;
83 |     T *m_heap = nullptr;
84 |     T m_stack[StackHeapStringStackSize];
85 | };
86 | 
87 | }
88 | }
89 | 
90 | #endif  // ifndef JSON5EncoderCpp_StackHeapString
91 | 


--------------------------------------------------------------------------------
/src/_unicode.pyx:
--------------------------------------------------------------------------------
 1 | cdef boolean _is_line_terminator(uint32_t c) nogil:
 2 |     # https://www.ecma-international.org/ecma-262/5.1/#sec-7.3
 3 |     return c in (
 4 |         0x000A,  # Line Feed <LF>
 5 |         0x000D,  # Carriage Return <CR>
 6 |         0x2028,  # Line separator <LS>
 7 |         0x2029,  # Paragraph separator <PS>
 8 |     )
 9 | 
10 | cdef boolean _is_ws_zs(uint32_t c) nogil:
11 |     # https://spec.json5.org/#white-space
12 |     return unicode_cat_of(c) == 1
13 | 
14 | cdef boolean _is_identifier_start(uint32_t c) nogil:
15 |     # https://www.ecma-international.org/ecma-262/5.1/#sec-7.6
16 |     return unicode_cat_of(c) == 2
17 | 
18 | cdef boolean _is_identifier_part(uint32_t c) nogil:
19 |     # https://www.ecma-international.org/ecma-262/5.1/#sec-7.6
20 |     return unicode_cat_of(c) >= 2
21 | 
22 | cdef inline boolean _is_x(uint32_t c) nogil:
23 |     return (c | 0x20) == b'x'
24 | 
25 | cdef inline boolean _is_e(uint32_t c) nogil:
26 |     return (c | 0x20) == b'e'
27 | 
28 | cdef inline boolean _is_decimal(uint32_t c) nogil:
29 |     return b'0' <= c <= b'9'
30 | 
31 | cdef inline boolean _is_hex(uint32_t c) nogil:
32 |     return b'a' <= (c | 0x20) <= b'f'
33 | 
34 | cdef inline boolean _is_hexadecimal(uint32_t c) nogil:
35 |     return _is_decimal(c) or _is_hex(c)
36 | 
37 | cdef boolean _is_in_float_representation(uint32_t c) nogil:
38 |     if _is_decimal(c):
39 |         return True
40 |     if _is_e(c):
41 |         return True
42 |     elif c in b'.+-':
43 |         return True
44 |     else:
45 |         return False
46 | 


--------------------------------------------------------------------------------
/src/_writer_callback.pyx:
--------------------------------------------------------------------------------
 1 | cdef struct WriterCallback:
 2 |     Writer base
 3 |     PyObject *callback
 4 | 
 5 | 
 6 | cdef boolean _WriterCbBytes_append_c(Writer &writer_, char datum) except False:
 7 |     cdef WriterCallback *writer = <WriterCallback*> &writer_
 8 | 
 9 |     CallFunction(writer.callback, b'c', datum)
10 | 
11 |     return True
12 | 
13 | 
14 | cdef boolean _WriterCbBytes_append_s(Writer &writer_, const char *s, Py_ssize_t length) except False:
15 |     cdef WriterCallback *writer = <WriterCallback*> &writer_
16 | 
17 |     if expect(length <= 0, False):
18 |         return True
19 | 
20 |     CallFunction(writer.callback, b'y#', s, <int> length)
21 | 
22 |     return True
23 | 
24 | 
25 | cdef boolean _WriterCbStr_append_c(Writer &writer_, char datum) except False:
26 |     cdef WriterCallback *writer = <WriterCallback*> &writer_
27 | 
28 |     CallFunction(writer.callback, b'C', datum)
29 | 
30 |     return True
31 | 
32 | 
33 | cdef boolean _WriterCbStr_append_s(Writer &writer_, const char *s, Py_ssize_t length) except False:
34 |     cdef WriterCallback *writer = <WriterCallback*> &writer_
35 | 
36 |     if expect(length <= 0, False):
37 |         return True
38 | 
39 |     CallFunction(writer.callback, b'U#', s, <int> length)
40 | 
41 |     return True
42 | 


--------------------------------------------------------------------------------
/src/_writer_noop.pyx:
--------------------------------------------------------------------------------
 1 | cdef struct WriterNoop:
 2 |     Writer base
 3 | 
 4 | 
 5 | cdef boolean _WriterNoop_reserve(WriterRef writer_, size_t amount) except False:
 6 |     return True
 7 | 
 8 | 
 9 | cdef boolean _WriterNoop_append_c(Writer &writer_, char datum) except False:
10 |     return True
11 | 
12 | 
13 | cdef boolean _WriterNoop_append_s(Writer &writer_, const char *s,
14 |                                   Py_ssize_t length) except False:
15 |     return True
16 | 


--------------------------------------------------------------------------------
/src/_writer_reallocatable.pyx:
--------------------------------------------------------------------------------
 1 | cdef struct WriterReallocatable:
 2 |     Writer base
 3 |     size_t position
 4 |     size_t length
 5 |     void *obj
 6 | 
 7 | 
 8 | cdef boolean _WriterReallocatable_reserve(WriterRef writer_, size_t amount) except False:
 9 |     cdef size_t current_size
10 |     cdef size_t needed_size
11 |     cdef size_t new_size
12 |     cdef void *temp
13 |     cdef WriterReallocatable *writer = <WriterReallocatable*> &writer_
14 | 
15 |     if expect(amount <= 0, False):
16 |         return True
17 | 
18 |     needed_size = writer.position + amount
19 |     current_size = writer.length
20 |     if expect(needed_size < current_size, True):
21 |         return True
22 | 
23 |     new_size = current_size
24 |     while new_size <= needed_size:
25 |         new_size = (new_size + 32) + (new_size // 4)
26 |         if expect(new_size < current_size, False):
27 |             ErrNoMemory()
28 | 
29 |     temp = ObjectRealloc(writer.obj, new_size + 1)
30 |     if temp is NULL:
31 |         ErrNoMemory()
32 | 
33 |     writer.obj = temp
34 |     writer.length = new_size
35 | 
36 |     return True
37 | 
38 | 
39 | cdef boolean _WriterReallocatable_append_c(Writer &writer_, char datum) except False:
40 |     cdef WriterReallocatable *writer = <WriterReallocatable*> &writer_
41 | 
42 |     _WriterReallocatable_reserve(writer.base, 1)
43 |     (<char*> writer.obj)[writer.position] = datum
44 |     writer.position += 1
45 | 
46 |     return True
47 | 
48 | 
49 | cdef boolean _WriterReallocatable_append_s(Writer &writer_, const char *s, Py_ssize_t length) except False:
50 |     cdef WriterReallocatable *writer = <WriterReallocatable*> &writer_
51 | 
52 |     if expect(length <= 0, False):
53 |         return True
54 | 
55 |     _WriterReallocatable_reserve(writer.base, length)
56 |     memcpy(&(<char*> writer.obj)[writer.position], s, length)
57 |     writer.position += length
58 | 
59 |     return True
60 | 
61 | 
62 | 


--------------------------------------------------------------------------------
/src/_writers.pyx:
--------------------------------------------------------------------------------
1 | cdef struct Writer:
2 |     boolean (*reserve)(Writer &writer, size_t amount) except False
3 |     boolean (*append_c)(Writer &writer, char datum) except False
4 |     boolean (*append_s)(Writer &writer, const char *s, Py_ssize_t length) except False
5 |     PyObject *options
6 | 
7 | 
8 | ctypedef Writer &WriterRef
9 | 


--------------------------------------------------------------------------------
/src/native.hpp:
--------------------------------------------------------------------------------
  1 | #ifndef JSON5EncoderCpp_native
  2 | #define JSON5EncoderCpp_native
  3 | 
  4 | #include <array>
  5 | #include <cstdint>
  6 | #include <type_traits>
  7 | #include <utility>
  8 | 
  9 | namespace JSON5EncoderCpp {
 10 | 
 11 | template <class From>
 12 | constexpr std::uint32_t cast_to_uint32(
 13 |     const From &unsigned_from,
 14 |     typename std::enable_if<
 15 |         !std::is_signed<From>::value
 16 |     >::type* = nullptr
 17 | ) {
 18 |     return static_cast<std::uint32_t>(unsigned_from);
 19 | }
 20 | 
 21 | template <class From>
 22 | constexpr std::uint32_t cast_to_uint32(
 23 |     const From &from,
 24 |     typename std::enable_if<
 25 |         std::is_signed<From>::value
 26 |     >::type* = nullptr
 27 | ) {
 28 |     return cast_to_uint32(static_cast<typename std::make_unsigned<From>::type>(from));
 29 | }
 30 | 
 31 | template <class From>
 32 | constexpr std::int32_t cast_to_int32(const From &from) {
 33 |     return static_cast<std::int32_t>(cast_to_uint32(from));
 34 | }
 35 | 
 36 | struct AlwaysTrue {
 37 |     inline AlwaysTrue() = default;
 38 |     inline ~AlwaysTrue() = default;
 39 | 
 40 |     inline AlwaysTrue(const AlwaysTrue&) = default;
 41 |     inline AlwaysTrue(AlwaysTrue&&) = default;
 42 |     inline AlwaysTrue &operator =(const AlwaysTrue&) = default;
 43 |     inline AlwaysTrue &operator =(AlwaysTrue&&) = default;
 44 | 
 45 |     template <class T>
 46 |     inline AlwaysTrue(T&&) : AlwaysTrue() {}
 47 | 
 48 |     template <class T>
 49 |     inline bool operator ==(T&&) const { return true; }
 50 | 
 51 |     inline operator bool () const { return true; }
 52 | };
 53 | 
 54 | static inline bool obj_has_iter(const PyObject *obj) {
 55 |     const auto *cls = Py_TYPE(obj);
 56 |     return cls->tp_iter != nullptr;
 57 | }
 58 | 
 59 | constexpr char HEX[] = "0123456789abcdef";
 60 | 
 61 | struct EscapeDct {
 62 |     using Item = std::array<char, 8>;  // length, upto 6 characters, terminator (actually not needed)
 63 |     static constexpr std::size_t length = 0x100;
 64 |     using Items = Item[length];
 65 | 
 66 |     static const Items items;
 67 |     static const std::uint64_t is_escaped_lo;
 68 |     static const std::uint64_t is_escaped_hi;
 69 | 
 70 |     static inline bool is_escaped(std::uint32_t c) {
 71 |         if (c < 0x40) {
 72 |             return (is_escaped_lo & (static_cast<std::uint64_t>(1) << c)) != 0;
 73 |         } else if (c < 0x80) {
 74 |             return (is_escaped_hi & (static_cast<std::uint64_t>(1) << (c - 0x40))) != 0;
 75 |         } else {
 76 |             return true;
 77 |         }
 78 |     }
 79 | 
 80 |     template <class S>
 81 |     static inline std::size_t find_unescaped_range(const S *start, Py_ssize_t length) {
 82 |         Py_ssize_t index = 0;
 83 |         while ((index < length) && !is_escaped(start[index])) {
 84 |             ++index;
 85 |         }
 86 |         return index;
 87 |     }
 88 | };
 89 | 
 90 | static inline bool unicode_is_lo_surrogate(std::uint32_t ch) {
 91 |     return 0xDC00u <= ch && ch <= 0xDFFFu;
 92 | }
 93 | 
 94 | static inline bool unicode_is_hi_surrogate(std::uint32_t ch) {
 95 |     return 0xD800u <= ch && ch <= 0xDBFFu;
 96 | }
 97 | 
 98 | static inline std::uint32_t unicode_join_surrogates(std::uint32_t hi, std::uint32_t lo) {
 99 |     return (((hi & 0x03FFu) << 10) | (lo & 0x03FFu)) + 0x10000u;
100 | }
101 | 
102 | 
103 | template <class T>
104 | struct VoidT_ {
105 |     using Value = void*;
106 | };
107 | 
108 | 
109 | template <typename T>
110 | struct has_ob_shash {
111 |     template <typename C> static std::uint8_t test(typename VoidT_<decltype((std::declval<C>().ob_shash, true))>::Value);
112 |     template <typename C> static std::uint64_t test(...);
113 |     enum { value = sizeof(test<T>(0)) == sizeof(std::uint8_t) };
114 | };
115 | 
116 | template <typename T>
117 | struct has_hash {
118 |     template <typename C> static std::uint8_t test(typename VoidT_<decltype((std::declval<C>().hash, true))>::Value);
119 |     template <typename C> static std::uint64_t test(...);
120 |     enum { value = sizeof(test<T>(0)) == sizeof(std::uint8_t) };
121 | };
122 | 
123 | template<class T, bool ob_shash = has_ob_shash<T>::value, bool hash = has_hash<T>::value>
124 | struct ResetHash_;
125 | 
126 | template<class T>
127 | struct ResetHash_ <T, true, false> {
128 |     static inline void reset(T *obj) {
129 |         obj->ob_shash = -1;  // CPython: str
130 |     }
131 | };
132 | 
133 | template<class T>
134 | struct ResetHash_ <T, false, true> {
135 |     static inline void reset(T *obj) {
136 |         obj->hash = -1;  // CPython: bytes
137 |     }
138 | };
139 | 
140 | template<class T>
141 | struct ResetHash_ <T, false, false> {
142 |     static inline void reset(T *obj) {
143 |         (void) 0;  // PyPy
144 |     }
145 | };
146 | 
147 | template <class T>
148 | static inline void reset_hash(T *obj) {
149 |     ResetHash_<T>::reset(obj);
150 | }
151 | 
152 | 
153 | template <typename T>
154 | struct has_wstr {
155 |     template <typename C> static std::uint8_t test(typename VoidT_<decltype((std::declval<C>().wstr, true))>::Value);
156 |     template <typename C> static std::uint64_t test(...);
157 |     enum { value = sizeof(test<T>(0)) == sizeof(std::uint8_t) };
158 | };
159 | 
160 | template<class T, bool hash = has_wstr<T>::value>
161 | struct ResetWstr_;
162 | 
163 | template<class T>
164 | struct ResetWstr_ <T, true> {
165 |     static inline void reset(T *obj) {
166 |         obj->wstr = nullptr;  // CPython >= 3.12: absent
167 |     }
168 | };
169 | 
170 | template<class T>
171 | struct ResetWstr_ <T, false> {
172 |     static inline void reset(T *) {
173 |         (void) 0;
174 |     }
175 | };
176 | 
177 | template <class T>
178 | static inline void reset_wstr(T *obj) {
179 |     ResetWstr_<T>::reset(obj);
180 | }
181 | 
182 | template <typename T>
183 | struct has_ready {
184 |     template <typename C> static std::uint8_t test(typename VoidT_<decltype((std::declval<C>().state.ready, true))>::Value);
185 |     template <typename C> static std::uint64_t test(...);
186 |     enum { value = sizeof(test<T>(0)) == sizeof(std::uint8_t) };
187 | };
188 | 
189 | template<class T, bool hash = has_ready<T>::value>
190 | struct SetReady_;
191 | 
192 | template<class T>
193 | struct SetReady_ <T, true> {
194 |     static inline void set(T *obj) {
195 |         obj->state.ready = true;  // CPython >= 3.12: absent
196 |     }
197 | };
198 | 
199 | template<class T>
200 | struct SetReady_ <T, false> {
201 |     static inline void set(T *) {
202 |         (void) 0;
203 |     }
204 | };
205 | 
206 | template <class T>
207 | static inline void set_ready(T *obj) {
208 |     SetReady_<T>::set(obj);
209 | }
210 | 
211 | 
212 | static int iter_next(PyObject *iterator, PyObject **value) {
213 |     Py_XDECREF(*value);
214 |     PyObject *v = PyIter_Next(iterator);
215 |     *value = v;
216 |     if (v) {
217 |         return true;
218 |     } else if (!PyErr_Occurred()) {
219 |         return 0;
220 |     } else {
221 |         return -1;
222 |     }
223 | }
224 | 
225 | static inline AlwaysTrue exception_thrown() {
226 |     return true;
227 | }
228 | 
229 | // https://stackoverflow.com/a/65258501/416224
230 | #ifdef __GNUC__ // GCC 4.8+, Clang, Intel and other compilers compatible with GCC (-std=c++0x or above)
231 |     [[noreturn]] inline __attribute__((always_inline)) void unreachable() { __builtin_unreachable(); }
232 | #elif defined(_MSC_VER) // MSVC
233 |     [[noreturn]] __forceinline void unreachable() { __assume(false); }
234 | #else // ???
235 |     inline void unreachable() {}
236 | #endif
237 | 
238 | #include "./_escape_dct.hpp"
239 | 
240 | const EscapeDct ESCAPE_DCT;
241 | 
242 | const char VERSION[] =
243 | #   include "./VERSION.inc"
244 | ;
245 | static constexpr std::size_t VERSION_LENGTH = sizeof(VERSION) - 1;
246 | 
247 | const char LONGDESCRIPTION[] =
248 | #   include "./DESCRIPTION.inc"
249 | ;
250 | static constexpr std::size_t LONGDESCRIPTION_LENGTH = sizeof(LONGDESCRIPTION) - 1;
251 | 
252 | #ifdef __GNUC__
253 | #   define JSON5EncoderCpp_expect(cond, likely) __builtin_expect(!!(cond), !!(likely))
254 | #else
255 | #   define JSON5EncoderCpp_expect(cond, likely) !!(cond)
256 | #endif
257 | 
258 | }
259 | 
260 | #endif
261 | 


--------------------------------------------------------------------------------
/src/pyjson5/__init__.py:
--------------------------------------------------------------------------------
1 | from .pyjson5 import *
2 | from .pyjson5 import __all__, __author__, __doc__, __license__, __version__
3 | 


--------------------------------------------------------------------------------
/src/pyjson5/__init__.pyi:
--------------------------------------------------------------------------------
  1 | from typing import (
  2 |     Any,
  3 |     Callable,
  4 |     final,
  5 |     Final,
  6 |     Iterable,
  7 |     Literal,
  8 |     Optional,
  9 |     overload,
 10 |     Protocol,
 11 |     Tuple,
 12 |     TypeVar,
 13 |     Union,
 14 | )
 15 | 
 16 | _Data = TypeVar("_Data")
 17 | 
 18 | class _SupportsRead(Protocol):
 19 |     def read(self, size: int = ...) -> str: ...
 20 | 
 21 | class _SupportsWrite(Protocol[_Data]):
 22 |     def write(self, s: _Data) -> Any: ...
 23 | 
 24 | _CallbackStr = TypeVar("_CallbackStr", bound=Callable[[str], None])
 25 | 
 26 | _CallbackBytes = TypeVar("_CallbackBytes", bound=Callable[[bytes], None])
 27 | 
 28 | _SupportsWriteBytes = TypeVar("_SupportsWriteBytes", bound=_SupportsWrite[bytes])
 29 | 
 30 | _SupportsWriteStr = TypeVar("_SupportsWriteStr", bound=_SupportsWrite[str])
 31 | 
 32 | ###############################################################################
 33 | ### _exports.pyx
 34 | ###############################################################################
 35 | 
 36 | @final
 37 | class Options:
 38 |     """Customizations for the ``encoder_*(...)`` function family."""
 39 | 
 40 |     quotationmark: Final[str] = ...
 41 |     tojson: Final[Optional[str]] = ...
 42 |     mappingtypes: Final[Tuple[type, ...]] = ...
 43 | 
 44 |     def __init__(
 45 |         self,
 46 |         *,
 47 |         quotationmark: Optional[str] = ...,
 48 |         tojson: Optional[str] = ...,
 49 |         mappingtypes: Optional[Tuple[type, ...]] = ...,
 50 |     ) -> None: ...
 51 |     def update(
 52 |         self,
 53 |         *,
 54 |         quotationmark: Optional[str] = ...,
 55 |         tojson: Optional[str] = ...,
 56 |         mappingtypes: Optional[Tuple[type, ...]] = ...,
 57 |     ) -> Options:
 58 |         """Creates a new Options instance by modifying some members."""
 59 | 
 60 | def decode(data: str, maxdepth: Optional[int] = ..., some: bool = ...) -> Any:
 61 |     """Decodes JSON5 serialized data from an ``str`` object."""
 62 | 
 63 | def decode_latin1(
 64 |     data: bytes,
 65 |     maxdepth: Optional[int] = ...,
 66 |     some: bool = ...,
 67 | ) -> Any:
 68 |     """Decodes JSON5 serialized data from a ``bytes`` object."""
 69 | 
 70 | def decode_utf8(
 71 |     data: bytes,
 72 |     maxdepth: Optional[int] = ...,
 73 |     some: bool = ...,
 74 | ) -> Any:
 75 |     """Decodes JSON5 serialized data from a ``bytes`` object."""
 76 | 
 77 | def decode_buffer(
 78 |     data: bytes,
 79 |     maxdepth: Optional[int] = ...,
 80 |     some: bool = ...,
 81 |     wordlength: Optional[int] = ...,
 82 | ) -> Any:
 83 |     """Decodes JSON5 serialized data from an object that supports the buffer protocol, e.g. bytearray."""
 84 | 
 85 | def decode_callback(
 86 |     cb: Callable[..., Union[str, bytes, bytearray, int, None]],
 87 |     maxdepth: Optional[int] = ...,
 88 |     some: bool = ...,
 89 |     args: Optional[Iterable[Any]] = ...,
 90 | ) -> Any:
 91 |     """Decodes JSON5 serialized data by invoking a callback."""
 92 | 
 93 | def decode_io(
 94 |     fp: _SupportsRead,
 95 |     maxdepth: Optional[int] = ...,
 96 |     some: bool = ...,
 97 | ) -> Any:
 98 |     """Decodes JSON5 serialized data from a file-like object."""
 99 | 
100 | def encode(
101 |     data: Any,
102 |     *,
103 |     options: Optional[Options] = ...,
104 |     quotationmark: Optional[str] = ...,
105 |     tojson: Optional[str] = ...,
106 |     mappingtypes: Optional[Tuple[type, ...]] = ...,
107 | ) -> str:
108 |     """Serializes a Python object to a JSON5 compatible string."""
109 |     ...
110 | 
111 | def encode_bytes(
112 |     data: Any,
113 |     *,
114 |     options: Optional[Options] = ...,
115 |     quotationmark: Optional[str] = ...,
116 |     tojson: Optional[str] = ...,
117 |     mappingtypes: Optional[Tuple[type, ...]] = ...,
118 | ) -> bytes:
119 |     """Serializes a Python object to a JSON5 compatible bytes string."""
120 | 
121 | @overload
122 | def encode_callback(
123 |     data: Any,
124 |     cb: _CallbackStr,
125 |     supply_bytes: Literal[False] = ...,
126 |     *,
127 |     options: Optional[Options] = ...,
128 |     quotationmark: Optional[str] = ...,
129 |     tojson: Optional[str] = ...,
130 |     mappingtypes: Optional[Tuple[type, ...]] = ...,
131 | ) -> _CallbackStr:
132 |     """Serializes a Python object into a callback function."""
133 | 
134 | @overload
135 | def encode_callback(
136 |     data: Any,
137 |     cb: _CallbackBytes,
138 |     supply_bytes: Literal[True],
139 |     *,
140 |     options: Optional[Options] = ...,
141 |     quotationmark: Optional[str] = ...,
142 |     tojson: Optional[str] = ...,
143 |     mappingtypes: Optional[Tuple[type, ...]] = ...,
144 | ) -> _CallbackBytes: ...
145 | @overload
146 | def encode_io(
147 |     data: Any,
148 |     fp: _SupportsWriteBytes,
149 |     supply_bytes: Literal[True] = ...,
150 |     *,
151 |     options: Optional[Options] = ...,
152 |     quotationmark: Optional[str] = ...,
153 |     tojson: Optional[str] = ...,
154 |     mappingtypes: Optional[Tuple[type, ...]] = ...,
155 | ) -> _SupportsWriteBytes:
156 |     """Serializes a Python object into a file-object."""
157 | 
158 | @overload
159 | def encode_io(
160 |     data: Any,
161 |     fp: _SupportsWriteStr,
162 |     supply_bytes: Literal[False],
163 |     *,
164 |     options: Optional[Options] = ...,
165 |     quotationmark: Optional[str] = ...,
166 |     tojson: Optional[str] = ...,
167 |     mappingtypes: Optional[Tuple[type, ...]] = ...,
168 | ) -> _SupportsWriteStr: ...
169 | def encode_noop(
170 |     data: Any,
171 |     *,
172 |     options: Optional[Options] = ...,
173 |     quotationmark: Optional[str] = ...,
174 |     tojson: Optional[str] = ...,
175 |     mappingtypes: Optional[Tuple[type, ...]] = ...,
176 | ) -> bool:
177 |     """Test if the input is serializable."""
178 | 
179 | ###############################################################################
180 | ### _legacy.pyx
181 | ###############################################################################
182 | 
183 | def loads(s: str, *, encoding: str = ...) -> Any:
184 |     """Decodes JSON5 serialized data from a string."""
185 | 
186 | def load(fp: _SupportsRead) -> Any:
187 |     """Decodes JSON5 serialized data from a file-like object."""
188 | 
189 | def dumps(obj: Any) -> str:
190 |     """Serializes a Python object to a JSON5 compatible string."""
191 | 
192 | def dump(obj: Any, fp: _SupportsWrite[str]) -> None:
193 |     """Serializes a Python object to a JSON5 compatible string."""
194 | 
195 | ###############################################################################
196 | ### _exceptions.pyx
197 | ###############################################################################
198 | 
199 | class Json5Exception(Exception):
200 |     """Base class of any exception thrown by PyJSON5."""
201 | 
202 |     def __init__(self, message: Optional[str] = ..., *args: Any) -> None: ...
203 |     @property
204 |     def message(self) -> Optional[str]: ...
205 | 
206 | ###############################################################################
207 | ### _exceptions_encoder.pyx
208 | ###############################################################################
209 | 
210 | class Json5EncoderException(Json5Exception):
211 |     """Base class of any exception thrown by the serializer."""
212 | 
213 | @final
214 | class Json5UnstringifiableType(Json5EncoderException):
215 |     """The encoder was not able to stringify the input, or it was told not to by the supplied ``Options``."""
216 | 
217 |     def __init__(
218 |         self,
219 |         message: Optional[str] = ...,
220 |         unstringifiable: Any = ...,
221 |     ) -> None: ...
222 |     @property
223 |     def unstringifiable(self) -> Any:
224 |         """The value that caused the problem."""
225 | 
226 | ###############################################################################
227 | ### _exceptions_decoder.pyx
228 | ###############################################################################
229 | 
230 | class Json5DecoderException(Json5Exception):
231 |     """Base class of any exception thrown by the parser."""
232 | 
233 |     def __init__(
234 |         self,
235 |         message: Optional[str] = ...,
236 |         result: Any = ...,
237 |         *args: Any,
238 |     ) -> None: ...
239 |     @property
240 |     def result(self) -> Any:
241 |         """Deserialized data up until now."""
242 | 
243 | @final
244 | class Json5NestingTooDeep(Json5DecoderException):
245 |     """The maximum nesting level on the input data was exceeded."""
246 | 
247 | @final
248 | class Json5EOF(Json5DecoderException):
249 |     """The input ended prematurely."""
250 | 
251 | @final
252 | class Json5IllegalCharacter(Json5DecoderException):
253 |     """An unexpected character was encountered."""
254 | 
255 |     def __init__(
256 |         self,
257 |         message: Optional[str] = ...,
258 |         result: Any = ...,
259 |         character: Optional[str] = ...,
260 |         *args: Any,
261 |     ) -> None: ...
262 |     @property
263 |     def character(self) -> Optional[str]:
264 |         """Illegal character."""
265 | 
266 | @final
267 | class Json5ExtraData(Json5DecoderException):
268 |     """The input contained extranous data."""
269 | 
270 |     def __init__(
271 |         self,
272 |         message: Optional[str] = ...,
273 |         result: Any = ...,
274 |         character: Optional[str] = ...,
275 |         *args: Any,
276 |     ) -> None: ...
277 |     @property
278 |     def character(self) -> Optional[str]:
279 |         """Extranous character."""
280 | 
281 | @final
282 | class Json5IllegalType(Json5DecoderException):
283 |     """The user supplied callback function returned illegal data."""
284 | 
285 |     def __init__(
286 |         self,
287 |         message: Optional[str] = ...,
288 |         result: Any = ...,
289 |         value: Optional[Any] = ...,
290 |         *args: Any,
291 |     ) -> None: ...
292 |     @property
293 |     def value(self) -> Optional[Any]:
294 |         """Value that caused the problem."""
295 | 


--------------------------------------------------------------------------------
/src/pyjson5/py.typed:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kijewski/pyjson5/574b56f322e497f041ed16ba2f715f2cc612f0a8/src/pyjson5/py.typed


--------------------------------------------------------------------------------