├── .gitattributes
├── .github
├── dependabot.yml
└── workflows
│ ├── ci.yml
│ ├── codeql-analysis.yml
│ └── wheels.yml
├── .gitignore
├── .gitmodules
├── .readthedocs.yaml
├── CHANGELOG.md
├── DerivedGeneralCategory.txt.sha
├── LICENSE.Apache
├── LICENSE.MIT
├── MANIFEST.in
├── Makefile
├── README.rst
├── docs
├── changelog.md
├── conf.py
├── decoder.rst
├── encoder.rst
├── exceptions.rst
├── index.rst
└── performance.rst
├── pyjson5.pyx
├── pyproject.toml
├── requirements-dev.txt
├── requirements-readthedocs.txt
├── scripts
├── make_decoder_recursive_select.py
├── make_escape_dct.py
├── make_unicode_categories.py
├── run-minefield-test.py
├── run-tests.py
├── sha512sum.py
└── transcode-to-json.py
├── setup.cfg
├── setup.py
└── src
├── DESCRIPTION.inc
├── VERSION.inc
├── _constants.pyx
├── _decoder.pyx
├── _decoder_recursive_select.hpp
├── _encoder.pyx
├── _encoder_options.pyx
├── _escape_dct.hpp
├── _exceptions.pyx
├── _exceptions_decoder.pyx
├── _exceptions_encoder.pyx
├── _exports.pyx
├── _imports.pyx
├── _legacy.pyx
├── _raise_decoder.pyx
├── _raise_encoder.pyx
├── _reader_callback.pyx
├── _reader_ucs.pyx
├── _readers.pyx
├── _stack_heap_string.hpp
├── _unicode.pyx
├── _unicode_cat_of.hpp
├── _writer_callback.pyx
├── _writer_noop.pyx
├── _writer_reallocatable.pyx
├── _writers.pyx
├── dragonbox.cc
├── native.hpp
└── pyjson5
├── __init__.py
├── __init__.pyi
└── py.typed
/.gitattributes:
--------------------------------------------------------------------------------
1 | src/dragonbox.cc linguist-vendored
2 | src/*.hpp linguist-vendored
3 | third-party/** linguist-vendored
4 |
--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 | updates:
3 | - package-ecosystem: "pip"
4 | directory: "/"
5 | schedule:
6 | interval: "daily"
7 |
8 | - package-ecosystem: "github-actions"
9 | directory: "/"
10 | schedule:
11 | interval: "daily"
12 |
13 | - package-ecosystem: "gitsubmodule"
14 | directory: "/"
15 | schedule:
16 | interval: "daily"
17 |
--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
1 | name: CI
2 |
3 | on: [push]
4 |
5 | jobs:
6 | build:
7 | runs-on: ${{ matrix.os }}
8 | strategy:
9 | matrix:
10 | os:
11 | - ubuntu-latest
12 | - macos-latest
13 | - windows-latest
14 | python:
15 | - '3.10'
16 | - '3.13'
17 |
18 | steps:
19 | - uses: actions/checkout@v4
20 | with:
21 | submodules: true
22 |
23 | - name: Cache pip
24 | uses: actions/cache@v4
25 | with:
26 | key: cache--${{ matrix.os }}--${{ matrix.python }}--${{ hashFiles('./requirements*.txt', './Makefile') }}
27 | restore-keys: cache--${{ matrix.os }}--${{ matrix.python }}--
28 | path: ~/.cache/pip
29 |
30 | - name: Setup python
31 | uses: actions/setup-python@v5
32 | with:
33 | python-version: ${{ matrix.python }}
34 |
35 | - name: Display Python version
36 | run: python -c 'import sys; print(sys.version)'
37 |
38 | - name: Update pip
39 | run: python -m pip install -U pip wheel setuptools
40 |
41 | - name: Install requirements
42 | run: python -m pip install -Ur requirements-dev.txt
43 |
44 | - name: Compile project
45 | run: make install
46 |
47 | - name: Run JSON5 tests suite
48 | run: python scripts/run-tests.py
49 |
50 | - name: Run "JSON is a Minefield" suite
51 | run: python scripts/run-minefield-test.py
52 |
53 | lint:
54 | runs-on: ubuntu-latest
55 |
56 | steps:
57 | - uses: actions/checkout@v4
58 | with:
59 | submodules: true
60 |
61 | - name: Cache pip
62 | uses: actions/cache@v4
63 | with:
64 | key: lint--${{ hashFiles('./requirements*.txt', './Makefile') }}
65 | restore-keys: lint--
66 | path: ~/.cache/pip
67 |
68 | - name: Setup python
69 | uses: actions/setup-python@v5
70 | with:
71 | python-version: '3.13'
72 |
73 | - name: Display Python version
74 | run: python -c 'import sys; print(sys.version)'
75 |
76 | - name: Update pip
77 | run: python -m pip install -U pip wheel setuptools
78 |
79 | - name: Install requirements
80 | run: python -m pip install -Ur requirements-dev.txt
81 |
82 | - name: Compile project
83 | run: make install
84 |
--------------------------------------------------------------------------------
/.github/workflows/codeql-analysis.yml:
--------------------------------------------------------------------------------
1 | name: "CodeQL"
2 |
3 | on:
4 | push:
5 | branches: [ master ]
6 | pull_request:
7 | branches: [ master ]
8 | schedule:
9 | - cron: '41 21 * * 3'
10 |
11 | jobs:
12 | analyze:
13 | name: Analyze
14 | runs-on: ubuntu-latest
15 | permissions:
16 | actions: read
17 | contents: read
18 | security-events: write
19 |
20 | strategy:
21 | fail-fast: false
22 | matrix:
23 | language: [ 'cpp', 'python' ]
24 |
25 | steps:
26 | - name: Checkout repository
27 | uses: actions/checkout@v4
28 | with:
29 | submodules: true
30 |
31 | - name: Cache pip
32 | uses: actions/cache@v4
33 | with:
34 | key: codeql-analysis--${{ github.event.inputs.os }}--${{ github.event.inputs.python }}--${{ hashFiles('./requirements-dev.txt') }}
35 | path: ~/.cache/pip
36 |
37 | - name: Setup python
38 | uses: actions/setup-python@v5
39 | with:
40 | python-version: ${{ github.event.inputs.python }}
41 |
42 | - name: Initialize CodeQL
43 | uses: github/codeql-action/init@v3
44 | with:
45 | languages: ${{ matrix.language }}
46 |
47 | - name: Update pip
48 | run: python -m pip install -U pip wheel setuptools
49 |
50 | - name: Install requirements
51 | run: python -m pip install -Ur requirements-dev.txt
52 |
53 | - name: Compile
54 | run: make wheel
55 |
56 | - name: Perform CodeQL Analysis
57 | uses: github/codeql-action/analyze@v3
58 |
--------------------------------------------------------------------------------
/.github/workflows/wheels.yml:
--------------------------------------------------------------------------------
1 | name: Build Wheels
2 |
3 | on:
4 | workflow_dispatch:
5 |
6 | jobs:
7 | linux-define-matrix:
8 | runs-on: ubuntu-latest
9 | outputs:
10 | matrix: ${{ steps.set-matrix.outputs.matrix }}
11 | steps:
12 | - uses: actions/checkout@v4
13 | with:
14 | submodules: true
15 |
16 | - name: Setup python
17 | uses: actions/setup-python@v5
18 | with:
19 | python-version: '3.13'
20 |
21 | - run: python -m pip install -U pip wheel setuptools
22 | - run: python -m pip install -U 'cibuildwheel==2.*'
23 |
24 | - id: set-matrix
25 | run: |
26 | TARGETS="$(python -m cibuildwheel --archs "x86_64 i686 aarch64 ppc64le s390x armv7l" --print-build-identifiers)"
27 | echo 'matrix=["'$(echo $TARGETS | sed -e 's/ /","/g')'"]' >> $GITHUB_OUTPUT
28 | shell: bash
29 | env:
30 | CIBW_BUILD_FRONTEND: build
31 | CIBW_SKIP: 'cp27-* cp36-* pp*'
32 | CIBW_DEPENDENCY_VERSIONS: pinned
33 | CIBW_PLATFORM: linux
34 |
35 | macos-define-matrix:
36 | runs-on: macos-13
37 | outputs:
38 | matrix: ${{ steps.set-matrix.outputs.matrix }}
39 | steps:
40 | - uses: actions/checkout@v4
41 | with:
42 | submodules: true
43 |
44 | - name: Setup python
45 | uses: actions/setup-python@v5
46 | with:
47 | python-version: '3.13'
48 |
49 | - run: python -m pip install -U pip wheel setuptools
50 | - run: python -m pip install -U 'cibuildwheel==2.*'
51 |
52 | - id: set-matrix
53 | run: |
54 | TARGETS="$(python -m cibuildwheel --archs "x86_64 arm64 universal2" --print-build-identifiers)"
55 | echo 'matrix=["'$(echo $TARGETS | sed -e 's/ /","/g')'"]' >> $GITHUB_OUTPUT
56 | shell: bash
57 | env:
58 | CIBW_BUILD_FRONTEND: build
59 | CIBW_SKIP: 'cp27-* cp36-* pp*'
60 | CIBW_DEPENDENCY_VERSIONS: pinned
61 | CIBW_PLATFORM: macos
62 |
63 | windows-define-matrix:
64 | runs-on: windows-2022
65 | outputs:
66 | matrix: ${{ steps.set-matrix.outputs.matrix }}
67 | steps:
68 | - uses: actions/checkout@v4
69 | with:
70 | submodules: true
71 |
72 | - name: Setup python
73 | uses: actions/setup-python@v5
74 | with:
75 | python-version: '3.13'
76 |
77 | - run: python -m pip install -U pip wheel setuptools
78 | - run: python -m pip install -U 'cibuildwheel==2.*'
79 |
80 | - id: set-matrix
81 | run: |
82 | TARGETS="$(python -m cibuildwheel --archs "AMD64 x86 ARM64" --print-build-identifiers)"
83 | echo 'matrix=["'$(echo $TARGETS | sed -e 's/ /","/g')'"]' >> $GITHUB_OUTPUT
84 | shell: bash
85 | env:
86 | CIBW_BUILD_FRONTEND: build
87 | CIBW_SKIP: 'cp27-* cp36-* pp*'
88 | CIBW_DEPENDENCY_VERSIONS: pinned
89 | CIBW_PLATFORM: windows
90 |
91 | linux-build:
92 | runs-on: ubuntu-latest
93 |
94 | needs:
95 | - linux-define-matrix
96 | strategy:
97 | matrix:
98 | only: ${{ fromJSON(needs.linux-define-matrix.outputs.matrix) }}
99 |
100 | steps:
101 | - uses: actions/checkout@v4
102 | with:
103 | submodules: true
104 |
105 | - name: Set up QEMU
106 | uses: docker/setup-qemu-action@v3
107 | with:
108 | platforms: all
109 |
110 | - name: Cache pip
111 | uses: actions/cache@v4
112 | with:
113 | key: linux--${{ hashFiles('./requirements-dev.txt') }}
114 | path: ~/.cache/pip
115 |
116 | - name: Setup python
117 | uses: actions/setup-python@v5
118 | with:
119 | python-version: '3.13'
120 |
121 | - run: python -m pip install -U pip wheel setuptools
122 | - run: python -m pip install -Ur requirements-dev.txt
123 | - run: python -m pip install -U 'cibuildwheel==2.*'
124 |
125 | - run: make prepare
126 |
127 | - run: python -m cibuildwheel --output-dir wheelhouse --only ${{ matrix.only }}
128 | env:
129 | CIBW_BUILD_FRONTEND: build
130 | CIBW_SKIP: 'cp27-* pp*'
131 | CIBW_DEPENDENCY_VERSIONS: pinned
132 | CIBW_PLATFORM: linux
133 | CIBW_TEST_COMMAND: python {project}/scripts/run-tests.py
134 |
135 | - uses: actions/upload-artifact@v4
136 | with:
137 | name: ${{ matrix.only }}
138 | path: ./wheelhouse
139 | retention-days: 1
140 |
141 | macos-build:
142 | runs-on: macos-13
143 |
144 | needs:
145 | - macos-define-matrix
146 | strategy:
147 | matrix:
148 | only: ${{ fromJSON(needs.macos-define-matrix.outputs.matrix) }}
149 |
150 | steps:
151 | - uses: actions/checkout@v4
152 | with:
153 | submodules: true
154 |
155 | - name: Cache pip
156 | uses: actions/cache@v4
157 | with:
158 | key: windows--${{ hashFiles('./requirements-dev.txt') }}
159 | path: ~/.cache/pip
160 |
161 | - name: Setup python
162 | uses: actions/setup-python@v5
163 | with:
164 | python-version: '3.13'
165 |
166 | - run: python -m pip install -U pip wheel setuptools
167 | - run: python -m pip install -Ur requirements-dev.txt
168 | - run: python -m pip install -U 'cibuildwheel==2.*'
169 |
170 | - run: make prepare
171 |
172 | - run: python -m cibuildwheel --output-dir wheelhouse --only ${{ matrix.only }}
173 | env:
174 | CIBW_BUILD_FRONTEND: build
175 | CIBW_SKIP: 'cp27-* pp*'
176 | CIBW_DEPENDENCY_VERSIONS: pinned
177 | CIBW_PLATFORM: macos
178 | CIBW_TEST_COMMAND: python {project}/scripts/run-tests.py
179 |
180 | - uses: actions/upload-artifact@v4
181 | with:
182 | name: ${{ matrix.only }}
183 | path: ./wheelhouse
184 | retention-days: 1
185 |
186 | windows-build:
187 | runs-on: windows-2022
188 |
189 | needs:
190 | - windows-define-matrix
191 | strategy:
192 | matrix:
193 | only: ${{ fromJSON(needs.windows-define-matrix.outputs.matrix) }}
194 |
195 | steps:
196 | - uses: actions/checkout@v4
197 | with:
198 | submodules: true
199 |
200 | - name: Cache pip
201 | uses: actions/cache@v4
202 | with:
203 | key: windows--${{ hashFiles('./requirements-dev.txt') }}
204 | path: ~/.cache/pip
205 |
206 | - name: Setup python
207 | uses: actions/setup-python@v5
208 | with:
209 | python-version: '3.13'
210 |
211 | - run: python -m pip install -U pip wheel setuptools
212 | - run: python -m pip install -Ur requirements-dev.txt
213 | - run: python -m pip install -U 'cibuildwheel==2.*'
214 |
215 | - run: make prepare
216 |
217 | - run: python -m cibuildwheel --output-dir wheelhouse --only ${{ matrix.only }}
218 | env:
219 | CIBW_BUILD_FRONTEND: build
220 | CIBW_SKIP: 'cp27-* pp*'
221 | CIBW_DEPENDENCY_VERSIONS: pinned
222 | CIBW_PLATFORM: windows
223 | CIBW_TEST_COMMAND: python {project}/scripts/run-tests.py
224 |
225 | - uses: actions/upload-artifact@v4
226 | with:
227 | name: ${{ matrix.only }}
228 | path: ./wheelhouse
229 | retention-days: 1
230 |
231 | combine:
232 | runs-on: ubuntu-latest
233 | needs:
234 | - linux-build
235 | - macos-build
236 | - windows-build
237 | steps:
238 | - uses: actions/download-artifact@v4
239 | with:
240 | path: ./wheelhouse
241 | - run: |
242 | find -name '*.zip' -exec unzip '{}' ';'
243 | find -name '*.zip' -exec rm '{}' +
244 | find -name '*.whl' -exec mv -t. '{}' +
245 | find -type d -delete
246 | shell: bash
247 | working-directory: ./wheelhouse
248 | - uses: actions/upload-artifact@v4
249 | with:
250 | name: wheelhouse
251 | path: ./wheelhouse
252 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.py[cdo]
2 | *.egg-info/
3 |
4 | /env*/
5 | /build/
6 | /cython_debug/
7 | /dist/
8 |
9 | *.c
10 | *.cpp
11 | *.so
12 | *.o
13 |
14 | run.cgi
15 |
16 | *.swp*
17 | *.nfs*
18 | *~
19 | *.~*
20 | ~*
21 | *.tmp
22 | *.old
23 | *.bak
24 | *.pid
25 |
26 | .*
27 | !.git*
28 | !.readthedocs.yaml
29 |
30 | *.orig
31 |
32 | /DerivedGeneralCategory.txt
33 | /citylots.json
34 |
--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "third-party/json5-tests"]
2 | path = third-party/json5-tests
3 | url = https://github.com/json5/json5-tests.git
4 | [submodule "third-party/JSONTestSuite"]
5 | path = third-party/JSONTestSuite
6 | url = https://github.com/nst/JSONTestSuite.git
7 | [submodule "third-party/fast_double_parser"]
8 | path = third-party/fast_double_parser
9 | url = https://github.com/lemire/fast_double_parser.git
10 |
--------------------------------------------------------------------------------
/.readthedocs.yaml:
--------------------------------------------------------------------------------
1 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
2 |
3 | version: 2
4 |
5 | build:
6 | os: ubuntu-24.04
7 | apt_packages:
8 | - graphviz
9 | tools:
10 | python: "3.13"
11 |
12 | sphinx:
13 | configuration: docs/conf.py
14 |
15 | python:
16 | install:
17 | - requirements: requirements-readthedocs.txt
18 |
--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | # Changelog
2 |
3 | **1.6.9 (2025-05-12)**
4 |
5 | * Remove unused import to fix installation on Termux (by veka0, [#105](https://github.com/Kijewski/pyjson5/pull/105))
6 |
7 | **1.6.8 (2025-01-03)**
8 |
9 | * Requires at least Python 3.7
10 | * Update dependencies
11 | * Relicense to MIT OR Apache-2.0
12 |
13 | **1.6.7 (2024-10-08)**
14 |
15 | * Update to Unicode 16.0.0
16 | * Update for Python 3.13
17 |
18 | **1.6.6 (2024-02-09)**
19 |
20 | * Fix return type of `load()` (by Q-ten, [#88](https://github.com/Kijewski/pyjson5/pull/88))
21 |
22 | **1.6.5 (2023-12-04)**
23 |
24 | * Fix type hints for optional arguments
25 |
26 | **1.6.4 (2023-07-31)**
27 |
28 | * Update to Cython 3
29 | * Update for Python 3.12
30 |
31 | **1.6.3 (2023-06-24)**
32 |
33 | * Fix typing for `dump()` ([#61](https://github.com/Kijewski/pyjson5/issues/61))
34 |
35 | **1.6.2 (2022-09-15)**
36 |
37 | * Update to Unicode 15.0.0
38 |
39 | **1.6.1 (2022-01-18)**
40 |
41 | * Fix [PEP 517](https://www.python.org/dev/peps/pep-0517/)-like installation using [build](https://github.com/pypa/build) (by [Tomasz Kłoczko](https://github.com/kloczek))
42 |
43 | **1.6.0 (2021-11-17)**
44 |
45 | * Fallback to encode `vars(obj)` if `obj` is not stringifyable, e.g. to serialize [dataclasses](https://docs.python.org/3/library/dataclasses.html)
46 | * Update documentation to use newer [sphinx](https://www.sphinx-doc.org/) version
47 | * Use [dependabot](https://github.com/dependabot) to keep dependencies current
48 | * Update [fast_double_parser](https://github.com/lemire/fast_double_parser)
49 |
50 | **1.5.3 (2021-11-16)**
51 |
52 | * Add [PEP 484](https://www.python.org/dev/peps/pep-0484/) type hints (by [Pascal Corpet](https://github.com/pcorpet))
53 | * Update [JSONTestSuite](https://github.com/nst/JSONTestSuite)
54 |
55 | **1.5.2 (2021-07-09)**
56 |
57 | * Add file extensions to fix compilation with current Apple SDKs
58 | * Update fast_double_parser to v0.5.0
59 | * Update to Unicode 14.0.0d18
60 |
61 | **1.5.1 (2021-05-01)**
62 |
63 | * Update up Unicode 14.0.0d9
64 |
65 | **1.5.0 (2021-03-11)**
66 |
67 | * Faster floating-point number encoding using [Junekey Jeon's Dragonbox algorithm](https://github.com/abolz/Drachennest/blob/77f4889a4cd9d7f0b9da82a379f14beabcfba13e/src/dragonbox.cc) implemented by Alexander Bolz
68 | * Removed a lot of configuration options from pyjson5.Options()
69 |
70 | **1.4.9 (2021-03-03)**
71 |
72 | * Faster floating-point number decoding using [fast_double_parser](https://github.com/lemire/fast_double_parser) by Daniel Lemire
73 |
74 | **1.4.8 (2020-12-23)**
75 |
76 | * Update up Unicode 13.0.0
77 | * Don't use non-standard ``__uint128``
78 | * Add PyPy compatibility
79 | * Add ``decode_utf8(byte-like)``
80 |
81 | **1.4.7 (2019-12-20)**
82 |
83 | * Allow ``\uXXXX`` sequences in identifier names
84 | * Update to Unicode 12.1.0
85 | * Optimized encoder and decoder for a little better speed
86 | * Setup basic CI environment
87 | * Parse ``\uXXXX`` in literal keys
88 | * Understand "0."
89 | * Add CI tests
90 | * Reject unescaped newlines in strings per spec
91 | * Allow overriding default quotation mark
92 | * Make Options objects pickle-able
93 | * Bump major version number
94 |
95 | **0.4.6 (2019-02-09)**
96 |
97 | * Fix PyUnicode_AsUTF8AndSize()'s signature
98 |
99 | **0.4.5 (2018-06-02)**
100 |
101 | * Don't use C++14 features, only C++11
102 |
103 | **0.4.4 (2018-05-19)**
104 |
105 | * Better documentation
106 | * Optimized encoder for a little better speed
107 |
108 | **0.4.3 (2018-05-03)**
109 |
110 | * Initial release
111 |
--------------------------------------------------------------------------------
/DerivedGeneralCategory.txt.sha:
--------------------------------------------------------------------------------
1 | 993261c82681a5580aaa42c6184d61a289a1eaa48022fded929d00487066b0ed1014f35cbc0890c6db5f3cbf8ca51dd99362e088aceedf548cfb3cc8d72bb14e DerivedGeneralCategory.txt
2 |
--------------------------------------------------------------------------------
/LICENSE.Apache:
--------------------------------------------------------------------------------
1 |
2 | Apache License
3 | Version 2.0, January 2004
4 | http://www.apache.org/licenses/
5 |
6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
7 |
8 | 1. Definitions.
9 |
10 | "License" shall mean the terms and conditions for use, reproduction,
11 | and distribution as defined by Sections 1 through 9 of this document.
12 |
13 | "Licensor" shall mean the copyright owner or entity authorized by
14 | the copyright owner that is granting the License.
15 |
16 | "Legal Entity" shall mean the union of the acting entity and all
17 | other entities that control, are controlled by, or are under common
18 | control with that entity. For the purposes of this definition,
19 | "control" means (i) the power, direct or indirect, to cause the
20 | direction or management of such entity, whether by contract or
21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
22 | outstanding shares, or (iii) beneficial ownership of such entity.
23 |
24 | "You" (or "Your") shall mean an individual or Legal Entity
25 | exercising permissions granted by this License.
26 |
27 | "Source" form shall mean the preferred form for making modifications,
28 | including but not limited to software source code, documentation
29 | source, and configuration files.
30 |
31 | "Object" form shall mean any form resulting from mechanical
32 | transformation or translation of a Source form, including but
33 | not limited to compiled object code, generated documentation,
34 | and conversions to other media types.
35 |
36 | "Work" shall mean the work of authorship, whether in Source or
37 | Object form, made available under the License, as indicated by a
38 | copyright notice that is included in or attached to the work
39 | (an example is provided in the Appendix below).
40 |
41 | "Derivative Works" shall mean any work, whether in Source or Object
42 | form, that is based on (or derived from) the Work and for which the
43 | editorial revisions, annotations, elaborations, or other modifications
44 | represent, as a whole, an original work of authorship. For the purposes
45 | of this License, Derivative Works shall not include works that remain
46 | separable from, or merely link (or bind by name) to the interfaces of,
47 | the Work and Derivative Works thereof.
48 |
49 | "Contribution" shall mean any work of authorship, including
50 | the original version of the Work and any modifications or additions
51 | to that Work or Derivative Works thereof, that is intentionally
52 | submitted to Licensor for inclusion in the Work by the copyright owner
53 | or by an individual or Legal Entity authorized to submit on behalf of
54 | the copyright owner. For the purposes of this definition, "submitted"
55 | means any form of electronic, verbal, or written communication sent
56 | to the Licensor or its representatives, including but not limited to
57 | communication on electronic mailing lists, source code control systems,
58 | and issue tracking systems that are managed by, or on behalf of, the
59 | Licensor for the purpose of discussing and improving the Work, but
60 | excluding communication that is conspicuously marked or otherwise
61 | designated in writing by the copyright owner as "Not a Contribution."
62 |
63 | "Contributor" shall mean Licensor and any individual or Legal Entity
64 | on behalf of whom a Contribution has been received by Licensor and
65 | subsequently incorporated within the Work.
66 |
67 | 2. Grant of Copyright License. Subject to the terms and conditions of
68 | this License, each Contributor hereby grants to You a perpetual,
69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
70 | copyright license to reproduce, prepare Derivative Works of,
71 | publicly display, publicly perform, sublicense, and distribute the
72 | Work and such Derivative Works in Source or Object form.
73 |
74 | 3. Grant of Patent License. Subject to the terms and conditions of
75 | this License, each Contributor hereby grants to You a perpetual,
76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
77 | (except as stated in this section) patent license to make, have made,
78 | use, offer to sell, sell, import, and otherwise transfer the Work,
79 | where such license applies only to those patent claims licensable
80 | by such Contributor that are necessarily infringed by their
81 | Contribution(s) alone or by combination of their Contribution(s)
82 | with the Work to which such Contribution(s) was submitted. If You
83 | institute patent litigation against any entity (including a
84 | cross-claim or counterclaim in a lawsuit) alleging that the Work
85 | or a Contribution incorporated within the Work constitutes direct
86 | or contributory patent infringement, then any patent licenses
87 | granted to You under this License for that Work shall terminate
88 | as of the date such litigation is filed.
89 |
90 | 4. Redistribution. You may reproduce and distribute copies of the
91 | Work or Derivative Works thereof in any medium, with or without
92 | modifications, and in Source or Object form, provided that You
93 | meet the following conditions:
94 |
95 | (a) You must give any other recipients of the Work or
96 | Derivative Works a copy of this License; and
97 |
98 | (b) You must cause any modified files to carry prominent notices
99 | stating that You changed the files; and
100 |
101 | (c) You must retain, in the Source form of any Derivative Works
102 | that You distribute, all copyright, patent, trademark, and
103 | attribution notices from the Source form of the Work,
104 | excluding those notices that do not pertain to any part of
105 | the Derivative Works; and
106 |
107 | (d) If the Work includes a "NOTICE" text file as part of its
108 | distribution, then any Derivative Works that You distribute must
109 | include a readable copy of the attribution notices contained
110 | within such NOTICE file, excluding those notices that do not
111 | pertain to any part of the Derivative Works, in at least one
112 | of the following places: within a NOTICE text file distributed
113 | as part of the Derivative Works; within the Source form or
114 | documentation, if provided along with the Derivative Works; or,
115 | within a display generated by the Derivative Works, if and
116 | wherever such third-party notices normally appear. The contents
117 | of the NOTICE file are for informational purposes only and
118 | do not modify the License. You may add Your own attribution
119 | notices within Derivative Works that You distribute, alongside
120 | or as an addendum to the NOTICE text from the Work, provided
121 | that such additional attribution notices cannot be construed
122 | as modifying the License.
123 |
124 | You may add Your own copyright statement to Your modifications and
125 | may provide additional or different license terms and conditions
126 | for use, reproduction, or distribution of Your modifications, or
127 | for any such Derivative Works as a whole, provided Your use,
128 | reproduction, and distribution of the Work otherwise complies with
129 | the conditions stated in this License.
130 |
131 | 5. Submission of Contributions. Unless You explicitly state otherwise,
132 | any Contribution intentionally submitted for inclusion in the Work
133 | by You to the Licensor shall be under the terms and conditions of
134 | this License, without any additional terms or conditions.
135 | Notwithstanding the above, nothing herein shall supersede or modify
136 | the terms of any separate license agreement you may have executed
137 | with Licensor regarding such Contributions.
138 |
139 | 6. Trademarks. This License does not grant permission to use the trade
140 | names, trademarks, service marks, or product names of the Licensor,
141 | except as required for reasonable and customary use in describing the
142 | origin of the Work and reproducing the content of the NOTICE file.
143 |
144 | 7. Disclaimer of Warranty. Unless required by applicable law or
145 | agreed to in writing, Licensor provides the Work (and each
146 | Contributor provides its Contributions) on an "AS IS" BASIS,
147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 | implied, including, without limitation, any warranties or conditions
149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 | PARTICULAR PURPOSE. You are solely responsible for determining the
151 | appropriateness of using or redistributing the Work and assume any
152 | risks associated with Your exercise of permissions under this License.
153 |
154 | 8. Limitation of Liability. In no event and under no legal theory,
155 | whether in tort (including negligence), contract, or otherwise,
156 | unless required by applicable law (such as deliberate and grossly
157 | negligent acts) or agreed to in writing, shall any Contributor be
158 | liable to You for damages, including any direct, indirect, special,
159 | incidental, or consequential damages of any character arising as a
160 | result of this License or out of the use or inability to use the
161 | Work (including but not limited to damages for loss of goodwill,
162 | work stoppage, computer failure or malfunction, or any and all
163 | other commercial damages or losses), even if such Contributor
164 | has been advised of the possibility of such damages.
165 |
166 | 9. Accepting Warranty or Additional Liability. While redistributing
167 | the Work or Derivative Works thereof, You may choose to offer,
168 | and charge a fee for, acceptance of support, warranty, indemnity,
169 | or other liability obligations and/or rights consistent with this
170 | License. However, in accepting such obligations, You may act only
171 | on Your own behalf and on Your sole responsibility, not on behalf
172 | of any other Contributor, and only if You agree to indemnify,
173 | defend, and hold each Contributor harmless for any liability
174 | incurred by, or claims asserted against, such Contributor by reason
175 | of your accepting any such warranty or additional liability.
176 |
177 | END OF TERMS AND CONDITIONS
178 |
179 | APPENDIX: How to apply the Apache License to your work.
180 |
181 | To apply the Apache License to your work, attach the following
182 | boilerplate notice, with the fields enclosed by brackets "[]"
183 | replaced with your own identifying information. (Don't include
184 | the brackets!) The text should be enclosed in the appropriate
185 | comment syntax for the file format. We also recommend that a
186 | file or class name and description of purpose be included on the
187 | same "printed page" as the copyright notice for easier
188 | identification within third-party archives.
189 |
190 | Copyright [yyyy] [name of copyright owner]
191 |
192 | Licensed under the Apache License, Version 2.0 (the "License");
193 | you may not use this file except in compliance with the License.
194 | You may obtain a copy of the License at
195 |
196 | http://www.apache.org/licenses/LICENSE-2.0
197 |
198 | Unless required by applicable law or agreed to in writing, software
199 | distributed under the License is distributed on an "AS IS" BASIS,
200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201 | See the License for the specific language governing permissions and
202 | limitations under the License.
203 |
--------------------------------------------------------------------------------
/LICENSE.MIT:
--------------------------------------------------------------------------------
1 | Permission is hereby granted, free of charge, to any person obtaining a copy
2 | of this software and associated documentation files (the "Software"), to deal
3 | in the Software without restriction, including without limitation the rights
4 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
5 | copies of the Software, and to permit persons to whom the Software is
6 | furnished to do so, subject to the following conditions:
7 |
8 | The above copyright notice and this permission notice shall be included in all
9 | copies or substantial portions of the Software.
10 |
11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
12 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
13 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
14 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
15 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
16 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
17 | SOFTWARE.
18 |
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include CHANGELOG.md
2 | include LICENSE*
3 | include Makefile
4 | include pyjson5.cpp
5 | include pyjson5.pyx
6 | include pyproject.toml
7 | include third-party/fast_double_parser/include/fast_double_parser.h
8 | include requirements*.txt
9 | recursive-include docs **
10 | recursive-include scripts **
11 | recursive-include src **
12 | recursive-include third-party/json5-tests **
13 | recursive-include third-party/JSONTestSuite/test_parsing **
14 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | all: sdist wheel docs
2 |
3 | .DELETE_ON_ERROR:
4 |
5 | .PHONY: all sdist wheel clean docs prepare test install
6 |
7 | export PYTHONUTF8 := 1
8 | export PYTHONIOENCODING := UTF-8
9 |
10 | INCLUDES := \
11 | src/VERSION.inc src/DESCRIPTION.inc \
12 | src/_decoder_recursive_select.hpp src/_unicode_cat_of.hpp \
13 | src/_escape_dct.hpp src/_stack_heap_string.hpp src/native.hpp \
14 | src/dragonbox.cc
15 |
16 | FILES := Makefile MANIFEST.in pyjson5.pyx README.rst pyproject.toml ${INCLUDES}
17 |
18 | DerivedGeneralCategory.txt: DerivedGeneralCategory.txt.sha
19 | curl -s -o $@ https://www.unicode.org/Public/16.0.0/ucd/extracted/DerivedGeneralCategory.txt
20 | python scripts/sha512sum.py -c $@.sha
21 |
22 | src/_unicode_cat_of.hpp: DerivedGeneralCategory.txt scripts/make_unicode_categories.py
23 | python scripts/make_unicode_categories.py $< $@
24 |
25 | src/_decoder_recursive_select.py.hpp: scripts/make_decoder_recursive_select.py
26 | python $< $@
27 |
28 | src/_escape_dct.hpp: scripts/make_escape_dct.py
29 | python $< $@
30 |
31 | pyjson5.cpp: pyjson5.pyx $(wildcard src/*.pyx) $(wildcard src/*.hpp)
32 | python -m cython -f -o $@ $<
33 |
34 | prepare: pyjson5.cpp ${FILES}
35 |
36 | sdist: prepare
37 | -rm -- dist/pyjson5-*.tar.gz
38 | python -m build --sdist
39 |
40 | wheel: prepare
41 | -rm -- dist/pyjson5-*.whl
42 | python -m build --wheel
43 |
44 | install: wheel
45 | pip install --force dist/pyjson5-*.whl
46 |
47 | docs: install $(wildcard docs/* docs/*/*)
48 | python -m sphinx -M html docs/ dist/
49 |
50 | clean:
51 | [ ! -d build/ ] || rm -r -- build/
52 | [ ! -d dist/ ] || rm -r -- dist/
53 | [ ! -d pyjson5.egg-info/ ] || rm -r -- pyjson5.egg-info/
54 | -rm -- pyjson5.*.so python5.cpp
55 |
56 | test: wheel
57 | pip install --force dist/pyjson5-*.whl
58 | python scripts/run-minefield-test.py
59 | python scripts/run-tests.py
60 |
--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
1 | PyJSON5
2 | ==========
3 |
4 | A JSON5 serializer and parser library for Python 3 written in
5 | `Cython `_.
6 |
7 |
8 | Serializer
9 | ----------
10 |
11 | The serializer returns ASCII data that can safely be used in an HTML template.
12 | Apostrophes, ampersands, greater-than, and less-then signs are encoded as
13 | unicode escaped sequences. E.g. this snippet is safe for any and all input:
14 |
15 | .. code:: html
16 |
17 | "show message"
18 |
19 | Unless the input contains infinite or NaN values, the result will be valid
20 | `JSON `_ data.
21 |
22 |
23 | Parser
24 | ------
25 |
26 | All valid `JSON5 1.0.0 `_ and
27 | `JSON `_ data can be read,
28 | unless the nesting level is absurdly high.
29 |
30 | Functions
31 | ---------
32 |
33 | You can find the full documentation online at https://pyjson5.readthedocs.io/en/latest/.
34 | Or simply call ``help(pyjson5)``. :-)
35 |
36 | The library supplies load(s) and dump(s) functions, so you can use it as a
37 | drop-in replacement for Python's builtin ``json`` module, but you *should*
38 | use the functions ``encode_*()`` and ``decode_*()`` instead.
39 |
40 | Compatibility
41 | -------------
42 |
43 | At least CPython 3.7 or a recent Pypy3 version is needed.
44 |
--------------------------------------------------------------------------------
/docs/changelog.md:
--------------------------------------------------------------------------------
1 | ../CHANGELOG.md
--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 |
4 |
5 | sys.path.insert(0, os.path.abspath('..'))
6 |
7 | extensions = [
8 | 'sphinx.ext.autodoc',
9 | 'sphinx.ext.autosectionlabel',
10 | 'sphinx.ext.autosummary',
11 | 'sphinx.ext.graphviz',
12 | 'sphinx.ext.napoleon',
13 | 'sphinx.ext.intersphinx',
14 | 'sphinx.ext.inheritance_diagram',
15 | 'sphinx_autodoc_typehints',
16 | 'sphinx.ext.autosectionlabel',
17 | 'myst_parser',
18 | ]
19 |
20 | templates_path = ['_templates']
21 | source_suffix = '.rst'
22 | master_doc = 'index'
23 |
24 | project = 'PyJSON5'
25 | copyright = '2018-2025, René Kijewski'
26 | author = 'René Kijewski'
27 |
28 | with open('../src/VERSION.inc', 'rt') as f:
29 | version = eval(f.read().strip())
30 | release = version
31 |
32 | language = 'en'
33 | exclude_patterns = []
34 | pygments_style = 'sphinx'
35 | todo_include_todos = False
36 |
37 | html_theme = 'furo'
38 | htmlhelp_basename = 'PyJSON5doc'
39 |
40 | display_toc = True
41 | autodoc_default_flags = ['members']
42 | autosummary_generate = True
43 |
44 | intersphinx_mapping = {
45 | 'python': ('https://docs.python.org/3.13', None),
46 | }
47 |
48 | graphviz_output_format = 'svg'
49 |
50 | inheritance_graph_attrs = {
51 | 'size': '"8.0, 10.0"',
52 | 'fontsize': 32,
53 | 'bgcolor': 'lightgrey',
54 | }
55 | inheritance_node_attrs = {
56 | 'color': 'black',
57 | 'fillcolor': 'white',
58 | 'style': '"filled,solid"',
59 | }
60 | inheritance_edge_attrs = {
61 | 'penwidth': 1.5,
62 | 'arrowsize': 1.2,
63 | }
64 |
--------------------------------------------------------------------------------
/docs/decoder.rst:
--------------------------------------------------------------------------------
1 | Parser / Decoder
2 | ================
3 |
4 | All valid `JSON5 1.0.0 `_ and
5 | `JSON `_ data can be read,
6 | unless the nesting level is absurdly high.
7 |
8 |
9 | Quick Decoder Summary
10 | ---------------------
11 |
12 | .. autosummary::
13 |
14 | ~pyjson5.decode
15 | ~pyjson5.decode_latin1
16 | ~pyjson5.decode_buffer
17 | ~pyjson5.decode_callback
18 | ~pyjson5.decode_io
19 | ~pyjson5.load
20 | ~pyjson5.loads
21 | ~pyjson5.Json5DecoderException
22 | ~pyjson5.Json5NestingTooDeep
23 | ~pyjson5.Json5EOF
24 | ~pyjson5.Json5IllegalCharacter
25 | ~pyjson5.Json5ExtraData
26 | ~pyjson5.Json5IllegalType
27 |
28 |
29 | Full Decoder Description
30 | ------------------------
31 |
32 | .. autofunction:: pyjson5.decode
33 |
34 | .. autofunction:: pyjson5.decode_latin1
35 |
36 | .. autofunction:: pyjson5.decode_buffer
37 |
38 | .. autofunction:: pyjson5.decode_callback
39 |
40 | .. autofunction:: pyjson5.decode_io
41 |
42 |
43 | Decoder Compatibility Functions
44 | -------------------------------
45 |
46 | .. autofunction:: pyjson5.load
47 |
48 | .. autofunction:: pyjson5.loads
49 |
50 |
51 | Decoder Exceptions
52 | ------------------
53 |
54 | .. inheritance-diagram::
55 | pyjson5.Json5DecoderException
56 | pyjson5.Json5NestingTooDeep
57 | pyjson5.Json5EOF
58 | pyjson5.Json5IllegalCharacter
59 | pyjson5.Json5ExtraData
60 | pyjson5.Json5IllegalType
61 |
62 | .. autoexception:: pyjson5.Json5DecoderException
63 | :members:
64 | :inherited-members:
65 |
66 | .. autoexception:: pyjson5.Json5NestingTooDeep
67 | :members:
68 | :inherited-members:
69 |
70 | .. autoexception:: pyjson5.Json5EOF
71 | :members:
72 | :inherited-members:
73 |
74 | .. autoexception:: pyjson5.Json5IllegalCharacter
75 | :members:
76 | :inherited-members:
77 |
78 | .. autoexception:: pyjson5.Json5ExtraData
79 | :members:
80 | :inherited-members:
81 |
82 | .. autoexception:: pyjson5.Json5IllegalType
83 | :members:
84 | :inherited-members:
85 |
--------------------------------------------------------------------------------
/docs/encoder.rst:
--------------------------------------------------------------------------------
1 | Serializer / Encoder
2 | ====================
3 |
4 | The serializer returns ASCII data that can safely be used in an HTML template.
5 | Apostrophes, ampersands, greater-than, and less-then signs are encoded as
6 | unicode escaped sequences. E.g. this snippet is safe for any and all input:
7 |
8 | .. code:: html
9 |
10 | "show message"
11 |
12 | Unless the input contains infinite or NaN values, the result will be valid
13 | `JSON `_ data.
14 |
15 |
16 | Quick Encoder Summary
17 | ---------------------
18 |
19 | .. autosummary::
20 |
21 | ~pyjson5.encode
22 | ~pyjson5.encode_bytes
23 | ~pyjson5.encode_callback
24 | ~pyjson5.encode_io
25 | ~pyjson5.encode_noop
26 | ~pyjson5.dump
27 | ~pyjson5.dumps
28 | ~pyjson5.Options
29 | ~pyjson5.Json5EncoderException
30 | ~pyjson5.Json5UnstringifiableType
31 |
32 |
33 | Full Encoder Description
34 | ------------------------
35 |
36 | .. autofunction:: pyjson5.encode
37 |
38 | .. autofunction:: pyjson5.encode_bytes
39 |
40 | .. autofunction:: pyjson5.encode_callback
41 |
42 | .. autofunction:: pyjson5.encode_io
43 |
44 | .. autofunction:: pyjson5.encode_noop
45 |
46 | .. autoclass:: pyjson5.Options
47 | :members:
48 | :inherited-members:
49 |
50 |
51 | Encoder Compatibility Functions
52 | -------------------------------
53 |
54 | .. autofunction:: pyjson5.dump
55 |
56 | .. autofunction:: pyjson5.dumps
57 |
58 |
59 | Encoder Exceptions
60 | ------------------
61 |
62 | .. inheritance-diagram::
63 | pyjson5.Json5Exception
64 | pyjson5.Json5EncoderException
65 | pyjson5.Json5UnstringifiableType
66 |
67 | .. autoexception:: pyjson5.Json5EncoderException
68 | :members:
69 | :inherited-members:
70 |
71 | .. autoexception:: pyjson5.Json5UnstringifiableType
72 | :members:
73 | :inherited-members:
74 |
--------------------------------------------------------------------------------
/docs/exceptions.rst:
--------------------------------------------------------------------------------
1 | Exceptions
2 | ==========
3 |
4 | .. inheritance-diagram::
5 | pyjson5.Json5Exception
6 | pyjson5.Json5EncoderException
7 | pyjson5.Json5UnstringifiableType
8 | pyjson5.Json5DecoderException
9 | pyjson5.Json5NestingTooDeep
10 | pyjson5.Json5EOF
11 | pyjson5.Json5IllegalCharacter
12 | pyjson5.Json5ExtraData
13 | pyjson5.Json5IllegalType
14 |
15 | .. autoexception:: pyjson5.Json5Exception
16 | :members:
17 | :inherited-members:
18 |
--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
1 | PyJSON5
2 | =======
3 |
4 | A JSON5 serializer and parser library for Python 3.7 and later.
5 |
6 |
7 | The serializer returns ASCII data that can safely be used in an HTML template.
8 | Apostrophes, ampersands, greater-than, and less-then signs are encoded as
9 | unicode escaped sequences. E.g. this snippet is safe for any and all input:
10 |
11 | .. code:: html
12 |
13 | "show message"
14 |
15 | Unless the input contains infinite or NaN values, the result will be valid
16 | `JSON `_ data.
17 |
18 |
19 | All valid `JSON5 1.0.0 `_ and
20 | `JSON `_ data can be read,
21 | unless the nesting level is absurdly high.
22 |
23 |
24 | Installation
25 | ------------
26 |
27 | .. code:: bash
28 |
29 | $ pip install pyjson5
30 |
31 |
32 | Table of Contents
33 | -----------------
34 |
35 | .. toctree::
36 | :maxdepth: 2
37 |
38 | encoder.rst
39 | decoder.rst
40 | exceptions.rst
41 | performance.rst
42 | changelog.md
43 |
44 |
45 | Quick Summary
46 | -------------
47 |
48 | .. autosummary::
49 |
50 | ~pyjson5.decode
51 | ~pyjson5.decode_buffer
52 | ~pyjson5.decode_callback
53 | ~pyjson5.decode_io
54 | ~pyjson5.load
55 | ~pyjson5.loads
56 | ~pyjson5.encode
57 | ~pyjson5.encode_bytes
58 | ~pyjson5.encode_callback
59 | ~pyjson5.encode_io
60 | ~pyjson5.encode_noop
61 | ~pyjson5.dump
62 | ~pyjson5.dumps
63 | ~pyjson5.Options
64 | ~pyjson5.Json5EncoderException
65 | ~pyjson5.Json5DecoderException
66 |
67 |
68 | Compatibility
69 | -------------
70 |
71 | At least CPython / PyPy 3.7, and a C++11 compatible compiler (such as GCC 5.2+) is needed.
72 |
73 |
74 | -------------------------------------------------------------------------------
75 |
76 | :ref:`Glossary / Index `
77 |
--------------------------------------------------------------------------------
/docs/performance.rst:
--------------------------------------------------------------------------------
1 | Performance
2 | ===========
3 |
4 | This library is written in Cython for a better performance than a pure-Python implementation could give you.
5 |
6 |
7 | Decoder Performance
8 | -------------------
9 |
10 | The library has about the same speed as the shipped ``json`` module for *pure* JSON data.
11 |
12 | * Dataset: https://github.com/zemirco/sf-city-lots-json
13 | * Version: Python 3.9.1+ (default, Feb 5 2021, 13:46:56)
14 | * CPU: AMD Ryzen 7 2700 @ 3.7GHz
15 | * :func:`pyjson5.decode`: **2.08 s** ± 7.49 ms per loop *(lower is better)*
16 | * :func:`json.loads`: **2.71 s** ± 12.1 ms per loop
17 | * The decoder works correcty: ``json.loads(content) == pyjson5.loads(content)``
18 |
19 |
20 | Encoder Performance
21 | -------------------
22 |
23 | The encoder generates pure JSON data if there are no infinite or NaN values in the input, which are invalid in JSON.
24 | The serialized data is XML-safe, i.e. there are no cheverons ``<>``, ampersands ``&``, apostrophes ``'`` or control characters in the output.
25 | The output is always ASCII regardless if you call :func:`pyjson5.encode` or :func:`pyjson5.encode_bytes`.
26 |
27 | * Dataset: https://github.com/zemirco/sf-city-lots-json
28 | * Python 3.9.1+ (default, Feb 5 2021, 13:46:56)
29 | * CPU: AMD Ryzen 7 2700 @ 3.7GHz
30 | * :func:`pyjson5.encode`: **1.37** s ± 19.2 per loop *(lower is better)*
31 | * :func:`json.dumps`: **3.66** s ± 72.6 ms per loop
32 | * :func:`json.dumps` + :func:`xml.sax.saxutils.escape`: **4.01** s ± 21.3 ms per loop
33 | * The encoder works correcty: ``obj == json.loads(pyjson5.encode(obj))``
34 |
35 |
36 | Benchmark
37 | ---------
38 |
39 | Using `Ultrajson's benchmark `_
40 | you can tell for which kind of data PyJSON5 is fast, and for which data it is slow in comparison *(higher is better)*:
41 |
42 | +-----------------------------------------------------------+-------------+------------+------------+------------+
43 | | | json | pyjson5 | ujson | orjson |
44 | +===========================================================+=============+============+============+============+
45 | | **Array with 256 doubles** | | | | |
46 | +-----------------------------------------------------------+-------------+------------+------------+------------+
47 | | encode | 6,425 | 81,202 | 28,966 | 83,836 |
48 | +-----------------------------------------------------------+-------------+------------+------------+------------+
49 | | decode | 16,759 | 34,801 | 34,794 | 80,655 |
50 | +-----------------------------------------------------------+-------------+------------+------------+------------+
51 | | **Array with 256 strings** | | | | |
52 | +-----------------------------------------------------------+-------------+------------+------------+------------+
53 | | encode | 36,969 | 73,165 | 35,574 | 113,082 |
54 | +-----------------------------------------------------------+-------------+------------+------------+------------+
55 | | decode | 42,730 | 38,542 | 38,386 | 60,732 |
56 | +-----------------------------------------------------------+-------------+------------+------------+------------+
57 | | **Array with 256 UTF-8 strings** | | | | |
58 | +-----------------------------------------------------------+-------------+------------+------------+------------+
59 | | encode | 3,458 | 3,134 | 4,024 | 31,677 |
60 | +-----------------------------------------------------------+-------------+------------+------------+------------+
61 | | decode | 2,428 | 2,498 | 2,491 | 1,750 |
62 | +-----------------------------------------------------------+-------------+------------+------------+------------+
63 | | **Array with 256 True values** | | | | |
64 | +-----------------------------------------------------------+-------------+------------+------------+------------+
65 | | encode | 130,441 | 282,703 | 131,279 | 423,371 |
66 | +-----------------------------------------------------------+-------------+------------+------------+------------+
67 | | decode | 220,657 | 262,690 | 264,485 | 262,283 |
68 | +-----------------------------------------------------------+-------------+------------+------------+------------+
69 | | **Array with 256 dict{string, int} pairs** | | | | |
70 | +-----------------------------------------------------------+-------------+------------+------------+------------+
71 | | encode | 11,621 | 10,014 | 18,148 | 73,905 |
72 | +-----------------------------------------------------------+-------------+------------+------------+------------+
73 | | decode | 17,802 | 19,406 | 19,391 | 23,478 |
74 | +-----------------------------------------------------------+-------------+------------+------------+------------+
75 | | **Dict with 256 arrays with 256 dict{string, int} pairs** | | | | |
76 | +-----------------------------------------------------------+-------------+------------+------------+------------+
77 | | encode | 40 | 38 | 68 | 213 |
78 | +-----------------------------------------------------------+-------------+------------+------------+------------+
79 | | decode | 43 | 49 | 48 | 51 |
80 | +-----------------------------------------------------------+-------------+------------+------------+------------+
81 | | **Medium complex object** | | | | |
82 | +-----------------------------------------------------------+-------------+------------+------------+------------+
83 | | encode | 8,704 | 11,922 | 15,319 | 49,677 |
84 | +-----------------------------------------------------------+-------------+------------+------------+------------+
85 | | decode | 12,567 | 14,042 | 13,985 | 19,481 |
86 | +-----------------------------------------------------------+-------------+------------+------------+------------+
87 | | **Complex object** | | | | |
88 | +-----------------------------------------------------------+-------------+------------+------------+------------+
89 | | encode | 672 | 909 | 731 | |
90 | +-----------------------------------------------------------+-------------+------------+------------+------------+
91 | | decode | 462 | 700 | 700 | |
92 | +-----------------------------------------------------------+-------------+------------+------------+------------+
93 |
94 | * `ujson `_ == 4.0.3.dev9
95 | * `orjson `_ == 3.5.1
96 |
--------------------------------------------------------------------------------
/pyjson5.pyx:
--------------------------------------------------------------------------------
1 | # distutils: language = c++
2 | # cython: embedsignature = True, language_level = 3, warn.undeclared = True, warn.unreachable = True, warn.maybe_uninitialized = True
3 |
4 | # SPDX-License-Identifier: MIT OR Apache-2.0
5 | # SPDX-FileCopyrightText: 2018-2025 René Kijewski
6 |
7 | include 'src/_imports.pyx'
8 | include 'src/_constants.pyx'
9 |
10 | include 'src/_exceptions.pyx'
11 | include 'src/_exceptions_decoder.pyx'
12 | include 'src/_exceptions_encoder.pyx'
13 | include 'src/_raise_decoder.pyx'
14 | include 'src/_raise_encoder.pyx'
15 |
16 | include 'src/_unicode.pyx'
17 |
18 | include 'src/_reader_ucs.pyx'
19 | include 'src/_reader_callback.pyx'
20 | include 'src/_readers.pyx'
21 | include 'src/_decoder.pyx'
22 |
23 | include 'src/_writers.pyx'
24 | include 'src/_writer_reallocatable.pyx'
25 | include 'src/_writer_callback.pyx'
26 | include 'src/_writer_noop.pyx'
27 | include 'src/_encoder_options.pyx'
28 | include 'src/_encoder.pyx'
29 |
30 | include 'src/_exports.pyx'
31 | include 'src/_legacy.pyx'
32 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | # SPDX-License-Identifier: MIT OR Apache-2.0
2 | # SPDX-FileCopyrightText: 2018-2025 René Kijewski
3 |
4 | [build-system]
5 | requires = [
6 | "Cython < 4, >= 0.29",
7 | "setuptools >= 61",
8 | ]
9 | build-backend = "setuptools.build_meta"
10 |
--------------------------------------------------------------------------------
/requirements-dev.txt:
--------------------------------------------------------------------------------
1 | build
2 | colorama == 0.4.*
3 | cython < 4, >= 0.29
4 | more_itertools == 10.*
5 | mypy
6 | setuptools >= 61
7 | wheel
8 |
9 | # keep synchronous to requirements-readthedocs.txt
10 | docutils == 0.21.*
11 | furo
12 | myst-parser == 4.*
13 | sphinx == 8.*
14 | sphinx-autodoc-typehints == 3.*
15 |
--------------------------------------------------------------------------------
/requirements-readthedocs.txt:
--------------------------------------------------------------------------------
1 | # keep synchronous to pyproject.toml
2 | # keep synchronous to src/VERSION.inc
3 | pyjson5 == 1.6.9
4 |
5 | # keep synchronous to requirements-dev.txt
6 | docutils == 0.21.*
7 | furo
8 | myst-parser == 4.*
9 | sphinx == 8.*
10 | sphinx-autodoc-typehints == 3.*
11 |
--------------------------------------------------------------------------------
/scripts/make_decoder_recursive_select.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | from argparse import ArgumentParser
4 | from logging import basicConfig, DEBUG
5 | from pathlib import Path
6 |
7 | from more_itertools import chunked
8 |
9 |
10 | def generate(out):
11 | lst = ["DRS_fail"] * 128
12 | lst[ord("n")] = "DRS_null"
13 | lst[ord("t")] = "DRS_true"
14 | lst[ord("f")] = "DRS_false"
15 | lst[ord("I")] = "DRS_inf"
16 | lst[ord("N")] = "DRS_nan"
17 | lst[ord('"')] = "DRS_string"
18 | lst[ord("'")] = "DRS_string"
19 | lst[ord("{")] = "DRS_recursive"
20 | lst[ord("[")] = "DRS_recursive"
21 | for c in "+-.0123456789":
22 | lst[ord(c)] = "DRS_number"
23 |
24 | print("#ifndef JSON5EncoderCpp_decoder_recursive_select", file=out)
25 | print("#define JSON5EncoderCpp_decoder_recursive_select", file=out)
26 | print(file=out)
27 | print("// GENERATED FILE", file=out)
28 | print("// All changes will be lost.", file=out)
29 | print(file=out)
30 | print("#include ", file=out)
31 | print(file=out)
32 | print("namespace JSON5EncoderCpp {", file=out)
33 | print("inline namespace {", file=out)
34 | print(file=out)
35 | print("enum DrsKind : std::uint8_t {", file=out)
36 | print(
37 | " DRS_fail, DRS_null, DRS_true, DRS_false, DRS_inf, DRS_nan, DRS_string, DRS_number, DRS_recursive",
38 | file=out,
39 | )
40 | print("};", file=out)
41 | print(file=out)
42 | print("static const DrsKind drs_lookup[128] = {", file=out)
43 | for chunk in chunked(lst, 8):
44 | print(" ", end="", file=out)
45 | for t in chunk:
46 | print(" ", t, ",", sep="", end="", file=out)
47 | print(file=out)
48 | print("};", file=out)
49 | print(file=out)
50 | print("} // anonymous inline namespace", sep="", file=out)
51 | print("} // namespace JSON5EncoderCpp", sep="", file=out)
52 | print(file=out)
53 | print("#endif", sep="", file=out)
54 |
55 |
56 | argparser = ArgumentParser(description="Generate src/_decoder_recursive_select.hpp")
57 | argparser.add_argument(
58 | "input", nargs="?", type=Path, default=Path("src/_decoder_recursive_select.hpp")
59 | )
60 |
61 | if __name__ == "__main__":
62 | basicConfig(level=DEBUG)
63 | args = argparser.parse_args()
64 | with open(str(args.input.resolve()), "wt") as out:
65 | generate(out)
66 |
--------------------------------------------------------------------------------
/scripts/make_escape_dct.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | from argparse import ArgumentParser
4 | from logging import basicConfig, DEBUG
5 | from pathlib import Path
6 |
7 |
8 | def generate(f):
9 | unescaped = 0
10 | print("const EscapeDct::Items EscapeDct::items = {", file=f)
11 | for c in range(0x100):
12 | if c == ord("\\"):
13 | s = "\\\\"
14 | elif c == ord("\b"):
15 | s = "\\b"
16 | elif c == ord("\f"):
17 | s = "\\f"
18 | elif c == ord("\n"):
19 | s = "\\n"
20 | elif c == ord("\r"):
21 | s = "\\r"
22 | elif c == ord("\t"):
23 | s = "\\t"
24 | elif c == ord('"'):
25 | s = '\\"'
26 | elif (c < 0x20) or (c >= 0x7F) or (chr(c) in "'&<>\\"):
27 | s = f"\\u{c:04x}"
28 | else:
29 | s = f"{c:c}"
30 | if c < 128:
31 | unescaped |= 1 << c
32 |
33 | t = (
34 | [str(len(s))]
35 | + [f"'{c}'" if c != "\\" else f"'\\\\'" for c in s]
36 | + ["0"] * 6
37 | )
38 | l = ", ".join(t[:8])
39 | print(f" {{ {l:35s} }}, /* 0x{c:02x} {chr(c)!r} */", file=f)
40 | print("};", file=f)
41 |
42 | escaped = unescaped ^ ((1 << 128) - 1)
43 | print(
44 | f"const std::uint64_t EscapeDct::is_escaped_lo = UINT64_C(0x{(escaped & ((1 << 64) - 1)):016x});",
45 | file=f,
46 | )
47 | print(
48 | f"const std::uint64_t EscapeDct::is_escaped_hi = UINT64_C(0x{(escaped >> 64):016x});",
49 | file=f,
50 | )
51 |
52 |
53 | argparser = ArgumentParser(description="Generate src/_escape_dct.hpp")
54 | argparser.add_argument(
55 | "input", nargs="?", type=Path, default=Path("src/_escape_dct.hpp")
56 | )
57 |
58 | if __name__ == "__main__":
59 | basicConfig(level=DEBUG)
60 | args = argparser.parse_args()
61 | with open(str(args.input.resolve()), "wt") as out:
62 | generate(out)
63 |
--------------------------------------------------------------------------------
/scripts/make_unicode_categories.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | from argparse import ArgumentParser
4 | from collections import defaultdict, OrderedDict
5 | from functools import reduce
6 | from pathlib import Path
7 | from re import match
8 |
9 | from more_itertools import chunked
10 |
11 |
12 | def main(input_file, output_file):
13 | Nothing = 0
14 | WhiteSpace = 1
15 | IdentifierStart = 2
16 | IdentifierPart = 3
17 |
18 | cat_indices = {
19 | "zs": WhiteSpace,
20 | "lc": IdentifierStart,
21 | "ll": IdentifierStart,
22 | "lm": IdentifierStart,
23 | "lo": IdentifierStart,
24 | "lt": IdentifierStart,
25 | "lu": IdentifierStart,
26 | "nl": IdentifierStart,
27 | "mc": IdentifierPart,
28 | "mn": IdentifierPart,
29 | "pc": IdentifierPart,
30 | "nd": IdentifierPart,
31 | }
32 |
33 | planes = defaultdict(lambda: [0] * 0x100)
34 |
35 | for input_line in input_file:
36 | m = match(r"^([0-9A-F]+)(?:\.\.([0-9A-F]+))?\s+;\s+([A-Z][a-z])", input_line)
37 | if not m:
38 | continue
39 | start, end, cat = m.groups()
40 |
41 | idx = cat_indices.get(cat.lower())
42 | if idx:
43 | end = int(end or start, 16)
44 | start = int(start, 16)
45 | for i in range(start, end + 1):
46 | planes[i // 0x100][i % 0x100] = idx
47 |
48 | # per: https://spec.json5.org/#white-space
49 | for i in (0x9, 0xA, 0xB, 0xC, 0xD, 0x20, 0xA0, 0x2028, 0x2028, 0x2029, 0xFEFF):
50 | planes[i // 0x100][i % 0x100] = WhiteSpace
51 |
52 | # per: https://www.ecma-international.org/ecma-262/5.1/#sec-7.6
53 | for i in (ord("$"), ord("_"), ord("\\")):
54 | planes[i // 0x100][i % 0x100] = IdentifierStart
55 |
56 | # per: https://www.ecma-international.org/ecma-262/5.1/#sec-7.6
57 | for i in (0x200C, 0x200D):
58 | planes[i // 0x100][i % 0x100] = IdentifierPart
59 |
60 | # 0x110000 == NO_EXTRA_DATA is spuriously used as input at the end of an item.
61 | # FIXME: this should not be needed. %s/18/17/g once the problem it fixed in the decoder.
62 | planes[0x0011_0000 // 0x100][0x0011_0000 % 0x100] = WhiteSpace
63 |
64 | print("#ifndef JSON5EncoderCpp_unicode_cat_of", file=output_file)
65 | print("#define JSON5EncoderCpp_unicode_cat_of", file=output_file)
66 | print(file=output_file)
67 | print("// GENERATED FILE", file=output_file)
68 | print("// All changes will be lost.", file=output_file)
69 | print(file=output_file)
70 | print("#include ", file=output_file)
71 | print(file=output_file)
72 | print("namespace JSON5EncoderCpp {", file=output_file)
73 | print("inline namespace {", file=output_file)
74 | print(file=output_file)
75 | print("static unsigned unicode_cat_of(std::uint32_t codepoint) {", file=output_file)
76 |
77 | demiplane_to_idx = OrderedDict() # demiplane_idx → data_idx
78 | data_to_idx = [None] * 18 * 0x100 # demiplane data → data_idx
79 | print(" // A 'demiplane' is a 1/256th of a Unicode plane.", file=output_file)
80 | print(
81 | " // This way a 'demiplane' fits nicely into a cache line.", file=output_file
82 | )
83 | print(
84 | " alignas(64) static const std::uint8_t demiplane_data[][0x100 / 4] = {",
85 | file=output_file,
86 | )
87 | for i in range(18 * 0x100):
88 | plane_data = ""
89 | plane = planes[i]
90 | while plane and plane[-1] == 0:
91 | plane.pop()
92 |
93 | for chunk in chunked(plane, 4 * 16):
94 | plane_data += " "
95 | for value in chunked(chunk, 4):
96 | value = reduce(lambda a, i: ((a << 2) | i), reversed(value), 0)
97 | plane_data += "0x{:02x}u, ".format(value)
98 | plane_data = plane_data[:-1] + "\n"
99 |
100 | produced_idx = demiplane_to_idx.get(plane_data)
101 | if produced_idx is None:
102 | produced_idx = len(demiplane_to_idx)
103 | print(
104 | " {{ // {} -> 0x{:02x}u".format(i, produced_idx),
105 | file=output_file,
106 | )
107 | print(plane_data, file=output_file, end="")
108 | print(" },", file=output_file)
109 | demiplane_to_idx[plane_data] = produced_idx
110 |
111 | data_to_idx[i] = produced_idx
112 | print(" };", file=output_file)
113 | print(file=output_file)
114 |
115 | snd_lookup_lines = OrderedDict()
116 | snd_lookup_indices = OrderedDict()
117 | print(
118 | " alignas(64) static const std::uint8_t demiplane_snd_data[][64] = {",
119 | file=output_file,
120 | )
121 | for start in range(0, 18 * 0x100, 64):
122 | snd_lookup_line: str
123 | for i in range(start, min(start + 64, 18 * 0x100)):
124 | if i % 16 == 0:
125 | if i % 64 == 0:
126 | snd_lookup_line = " "
127 | else:
128 | snd_lookup_line += "\n "
129 | snd_lookup_line += " 0x{:02x}u,".format(data_to_idx[i])
130 |
131 | snd_lookup_idx = snd_lookup_lines.get(snd_lookup_line, None)
132 | if snd_lookup_idx is None:
133 | snd_lookup_idx = len(snd_lookup_lines)
134 | print(
135 | " {{ // {} -> 0x{:02x}u".format(start // 64, snd_lookup_idx),
136 | file=output_file,
137 | )
138 | print(snd_lookup_line, file=output_file)
139 | print(" },", file=output_file)
140 | snd_lookup_lines[snd_lookup_line] = snd_lookup_idx
141 | snd_lookup_indices[start // 64] = snd_lookup_idx
142 | print(" };", file=output_file)
143 | print(file=output_file)
144 |
145 | print(
146 | " alignas(64) static const std::uint8_t demiplane_snd[18 * 0x100 / 64] = {{".format(
147 | 68
148 | ),
149 | end="",
150 | file=output_file,
151 | )
152 | for i in range(18 * 0x100 // 64):
153 | if i % 16 == 0:
154 | print("\n ", end="", file=output_file)
155 | print(" 0x{:02x}u,".format(snd_lookup_indices[i]), end="", file=output_file)
156 | print(file=output_file)
157 | print(" };", file=output_file)
158 | print(file=output_file)
159 |
160 | print(" if (JSON5EncoderCpp_expect(codepoint < 256, true)) {", file=output_file)
161 | print(
162 | " return (demiplane_data[0][codepoint / 4] >> (2 * (codepoint % 4))) % 4;",
163 | file=output_file,
164 | )
165 | print(" }", file=output_file)
166 | print(file=output_file)
167 | print(" if (codepoint > 0x110000) codepoint = 0x110000;", file=output_file)
168 | print(" std::uint32_t fst_row = codepoint / 0x100;", file=output_file)
169 | print(" std::uint32_t fst_col = codepoint % 0x100;", file=output_file)
170 | print(" std::uint32_t snd_row = fst_row / 64;", file=output_file)
171 | print(" std::uint32_t snd_col = fst_row % 64;", file=output_file)
172 | print(file=output_file)
173 | print(
174 | " const std::uint8_t *cell = demiplane_data[demiplane_snd_data[demiplane_snd[snd_row]][snd_col]];",
175 | file=output_file,
176 | )
177 | print(
178 | " return (cell[fst_col / 4] >> (2 * (fst_col % 4))) % 4;", file=output_file
179 | )
180 | print("}", file=output_file)
181 | print(file=output_file)
182 | print("}", file=output_file)
183 | print("}", file=output_file)
184 | print(file=output_file)
185 | print("#endif", file=output_file)
186 |
187 |
188 | argparser = ArgumentParser(description="Generate Unicode Category Matcher(s)")
189 | argparser.add_argument("input", nargs="?", type=Path, default=Path("/dev/stdin"))
190 | argparser.add_argument("output", nargs="?", type=Path, default=Path("/dev/stdout"))
191 |
192 | if __name__ == "__main__":
193 | args = argparser.parse_args()
194 | with open(str(args.input.resolve()), "rt") as input_file, open(
195 | str(args.output.resolve()), "wt"
196 | ) as output_file:
197 | raise SystemExit(main(input_file, output_file))
198 |
--------------------------------------------------------------------------------
/scripts/run-minefield-test.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | from argparse import ArgumentParser
4 | from logging import basicConfig, INFO, getLogger
5 | from os import chdir, name
6 | from pathlib import Path
7 | from subprocess import Popen
8 | from sys import executable
9 |
10 |
11 | argparser = ArgumentParser(description="Run JSON5 parser tests")
12 | argparser.add_argument(
13 | "tests",
14 | nargs="?",
15 | type=Path,
16 | default=Path("third-party/JSONTestSuite/test_parsing"),
17 | )
18 |
19 | suffix_implies_success = {
20 | "json": True,
21 | "json5": True,
22 | "txt": False,
23 | }
24 |
25 | if __name__ == "__main__":
26 | basicConfig(level=INFO)
27 | logger = getLogger(__name__)
28 | chdir(Path(__file__).absolute().parent.parent)
29 |
30 | good = bad = errors = severe = 0
31 |
32 | try:
33 | from colorama import init, Fore
34 |
35 | init()
36 | except Exception:
37 | code_severe = "SEVERE"
38 | code_good = "GOOD"
39 | code_bad = "BAD"
40 | code_ignored = "IGNORED"
41 | reset = ""
42 | else:
43 | if name != "nt":
44 | code_severe = Fore.RED + "😱"
45 | code_good = Fore.CYAN + "😄"
46 | code_bad = Fore.YELLOW + "😠"
47 | code_ignored = Fore.BLUE + "🙅"
48 | else:
49 | code_severe = Fore.RED + "SEVERE"
50 | code_good = Fore.CYAN + "GOOD"
51 | code_bad = Fore.YELLOW + "BAD"
52 | code_ignored = Fore.BLUE + "IGNORED"
53 | reset = Fore.RESET
54 |
55 | script = str(Path(__file__).absolute().parent / "transcode-to-json.py")
56 |
57 | args = argparser.parse_args()
58 | index = 0
59 | for path in sorted(args.tests.glob("?_?*.json")):
60 | category, name = path.stem.split("_", 1)
61 | if category not in "yni":
62 | continue
63 |
64 | if category in "ni":
65 | # ignore anything but tests that are expected to pass for now
66 | continue
67 |
68 | try:
69 | # ignore any UTF-8 errors
70 | with open(str(path.resolve()), "rt") as f:
71 | f.read()
72 | except Exception:
73 | continue
74 |
75 | index += 1
76 | try:
77 | p = Popen((executable, script, str(path)))
78 | outcome = p.wait(5)
79 | except Exception:
80 | logger.error("Error while testing: %s", path, exc_info=True)
81 | errors += 1
82 | continue
83 |
84 | if outcome not in (0, 1):
85 | code = code_severe
86 | severe += 1
87 | elif category == "y":
88 | if outcome == 0:
89 | code = code_good
90 | good += 1
91 | else:
92 | code = code_bad
93 | bad += 1
94 | else:
95 | code = code_ignored
96 |
97 | print(
98 | "#",
99 | index,
100 | " ",
101 | code,
102 | " | " "Category <",
103 | category,
104 | "> | " "Test <",
105 | name,
106 | "> | " "Actual <",
107 | "pass" if outcome == 0 else "FAIL",
108 | ">",
109 | reset,
110 | sep="",
111 | )
112 |
113 | is_severe = severe > 0
114 | is_good = bad == 0
115 | code = code_severe if is_severe else code_good if is_good else code_bad
116 | print()
117 | print(
118 | code,
119 | " | ",
120 | good,
121 | " correct outcomes | ",
122 | bad,
123 | " wrong outcomes | ",
124 | severe,
125 | " severe errors",
126 | reset,
127 | sep="",
128 | )
129 | raise SystemExit(2 if is_severe else 0 if is_good else 1)
130 |
--------------------------------------------------------------------------------
/scripts/run-tests.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | from argparse import ArgumentParser
4 | from logging import basicConfig, INFO, getLogger
5 | from os import chdir, name
6 | from pathlib import Path
7 | from subprocess import Popen
8 | from sys import executable
9 |
10 |
11 | argparser = ArgumentParser(description="Run JSON5 parser tests")
12 | argparser.add_argument(
13 | "tests", nargs="?", type=Path, default=Path("third-party/json5-tests")
14 | )
15 |
16 | suffix_implies_success = {
17 | ".json": True,
18 | ".json5": True,
19 | ".txt": False,
20 | }
21 |
22 | if __name__ == "__main__":
23 | basicConfig(level=INFO)
24 | logger = getLogger(__name__)
25 | chdir(Path(__file__).absolute().parent.parent)
26 |
27 | try:
28 | from colorama import init, Fore
29 |
30 | init()
31 | except Exception:
32 | code_severe = "SEVERE"
33 | code_good = "GOOD"
34 | code_bad = "BAD"
35 | reset = ""
36 | else:
37 | if name != "nt":
38 | code_severe = Fore.RED + "😱"
39 | code_good = Fore.CYAN + "😄"
40 | code_bad = Fore.YELLOW + "😠"
41 | else:
42 | code_severe = Fore.RED + "SEVERE"
43 | code_good = Fore.CYAN + "GOOD"
44 | code_bad = Fore.YELLOW + "BAD"
45 | reset = Fore.RESET
46 |
47 | good = 0
48 | bad = 0
49 | severe = 0
50 |
51 | script = str(Path(__file__).absolute().parent / "transcode-to-json.py")
52 |
53 | args = argparser.parse_args()
54 | index = 0
55 | for path in sorted(args.tests.glob("*/*.*")):
56 | kind = path.suffix.split(".")[-1]
57 | expect_success = suffix_implies_success.get(path.suffix)
58 | if expect_success is None:
59 | continue
60 |
61 | index += 1
62 | category = path.parent.name
63 | name = path.stem
64 | try:
65 | p = Popen((executable, script, str(path)))
66 | outcome = p.wait(5)
67 | except Exception:
68 | logger.error("Error while testing: %s", path, exc_info=True)
69 | severe += 1
70 | continue
71 |
72 | is_success = outcome == 0
73 | is_failure = outcome == 1
74 | is_severe = outcome not in (0, 1)
75 | is_good = is_success if expect_success else is_failure
76 | code = code_severe if is_severe else code_good if is_good else code_bad
77 | print(
78 | "#",
79 | index,
80 | " ",
81 | code,
82 | " " "Category <",
83 | category,
84 | "> | " "Test <",
85 | name,
86 | "> | " "Data <",
87 | kind,
88 | "> | " "Expected <",
89 | "pass" if expect_success else "FAIL",
90 | "> | " "Actual <",
91 | "pass" if is_success else "FAIL",
92 | ">",
93 | reset,
94 | sep="",
95 | )
96 | if is_severe:
97 | severe += 1
98 | elif is_good:
99 | good += 1
100 | else:
101 | bad += 1
102 |
103 | is_severe = severe > 0
104 | is_good = bad == 0
105 | code = code_severe if is_severe else code_good if is_good else code_bad
106 | print()
107 | print(
108 | code,
109 | " ",
110 | good,
111 | " × correct outcome | ",
112 | bad,
113 | " × wrong outcome | ",
114 | severe,
115 | " × severe errors",
116 | reset,
117 | sep="",
118 | )
119 | raise SystemExit(2 if is_severe else 0 if is_good else 1)
120 |
--------------------------------------------------------------------------------
/scripts/sha512sum.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | from argparse import ArgumentParser
4 | from hashlib import sha512
5 | from logging import basicConfig, DEBUG
6 | from pathlib import Path
7 | from sys import argv, exit
8 |
9 |
10 | argparser = ArgumentParser(
11 | description="sha512sum replacement if coreutils isn't installed"
12 | )
13 | argparser.add_argument("-c", "--check", type=Path, required=True)
14 |
15 | if __name__ == "__main__":
16 | basicConfig(level=DEBUG)
17 | args = argparser.parse_args()
18 | errors = 0
19 | with open(str(args.check.resolve()), "rt") as f:
20 | for line in f:
21 | expected_hash, filename = line.rstrip("\r\n").split(" ", 1)
22 | with open(str(Path(filename).resolve()), "rb") as f:
23 | actual_hash = sha512(f.read()).hexdigest()
24 |
25 | if expected_hash == actual_hash:
26 | print(filename + ": OK")
27 | else:
28 | errors += 1
29 | print(filename + ": FAILED")
30 |
31 | if errors:
32 | print("%s: WARNING: %s computed checksum did NOT match" % (argv[0], errors))
33 | exit(1)
34 | else:
35 | exit(0)
36 |
--------------------------------------------------------------------------------
/scripts/transcode-to-json.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | from argparse import ArgumentParser
4 | from collections.abc import Mapping, Sequence
5 | from codecs import open as codecs_open
6 | from itertools import zip_longest
7 | from json import loads
8 | from logging import basicConfig, DEBUG, getLogger
9 | from math import isnan
10 | from pathlib import Path
11 |
12 | from pyjson5 import decode, encode
13 |
14 |
15 | def eq_with_nans(left, right):
16 | if left == right:
17 | return True
18 | elif isnan(left):
19 | return isnan(right)
20 | elif isnan(right):
21 | return False
22 |
23 | if not isinstance(left, Sequence) or not isinstance(right, Sequence):
24 | return False
25 | elif len(left) != len(right):
26 | return False
27 |
28 | left_mapping = isinstance(left, Mapping)
29 | right_mapping = isinstance(right, Mapping)
30 | if left_mapping != right_mapping:
31 | return False
32 |
33 | sentinel = object()
34 | if left_mapping:
35 | for k, left_value in left.items():
36 | right_value = right.pop(k, sentinel)
37 | if not eq_with_nans(left_value, right_value):
38 | return False
39 | if right:
40 | # extraneous keys
41 | return False
42 | else:
43 | for l, r in zip_longest(left, right, fillvalue=sentinel):
44 | if not eq_with_nans(l, r):
45 | return False
46 |
47 | return True
48 |
49 |
50 | argparser = ArgumentParser(description="Run JSON5 parser tests")
51 | argparser.add_argument("input", type=Path)
52 | argparser.add_argument("output", nargs="?", type=Path)
53 |
54 | if __name__ == "__main__":
55 | basicConfig(level=DEBUG)
56 | logger = getLogger(__name__)
57 |
58 | args = argparser.parse_args()
59 | try:
60 | with codecs_open(args.input.resolve(), "r", "UTF-8") as f:
61 | data = f.read()
62 | except Exception:
63 | logger.error("Could not even read file: %s", args.input, exc_info=True)
64 | raise SystemExit(-1)
65 |
66 | try:
67 | obj = decode(data)
68 | except Exception:
69 | logger.error("Could not parse content: %s", args.input)
70 | raise SystemExit(1)
71 |
72 | try:
73 | json_obj = loads(data)
74 | except Exception:
75 | pass
76 | else:
77 | if not eq_with_nans(obj, json_obj):
78 | logger.error(
79 | "JSON and PyJSON5 did not read the same data: %s, %r != %r",
80 | args.input,
81 | obj,
82 | json_obj,
83 | )
84 | raise SystemExit(2)
85 |
86 | try:
87 | data = encode(obj)
88 | except Exception:
89 | logger.error("Could open stringify content: %s", args.input, exc_info=True)
90 | raise SystemExit(2)
91 |
92 | if args.output is not None:
93 | try:
94 | with codecs_open(args.output.resolve(), "w", "UTF-8") as f:
95 | f.write(data)
96 | except Exception:
97 | logger.error("Could open output file: %s", args.output, exc_info=True)
98 | raise SystemExit(-1)
99 |
100 | raise SystemExit(0)
101 |
--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [metadata]
2 | # keep synchronous to requirements-readthedocs.txt
3 | # keep synchronous to src/VERSION.inc
4 | version = 1.6.9
5 |
6 | name = pyjson5
7 | description = JSON5 serializer and parser for Python 3 written in Cython.
8 | url = https://github.com/Kijewski/pyjson5
9 | project_urls =
10 | Changelog = https://github.com/Kijewski/pyjson5/blob/main/CHANGELOG.md
11 | Code = https://github.com/Kijewski/pyjson5
12 | Documentation = https://pyjson5.readthedocs.io/
13 | Download = https://pypi.org/project/pyjson5/
14 | Homepage = https://github.com/Kijewski/pyjson5
15 | Tracker = https://github.com/Kijewski/pyjson5/issues
16 |
17 | author = René Kijewski
18 | maintainer = René Kijewski
19 | author_email = pypi.org@k6i.de
20 | maintainer_email = pypi.org@k6i.de
21 |
22 | long_description = file: README.rst
23 | long_description_content_type = text/x-rst
24 |
25 | license = MIT OR Apache-2.0
26 | license_files = LICENSE.MIT, LICENSE.Apache
27 |
28 | classifiers =
29 | Development Status :: 5 - Production/Stable
30 | Intended Audience :: Developers
31 | Intended Audience :: System Administrators
32 | License :: OSI Approved :: Apache Software License
33 | License :: OSI Approved :: MIT License
34 | Operating System :: OS Independent
35 | Programming Language :: Cython
36 | Programming Language :: JavaScript
37 | Programming Language :: Python :: 3
38 | Programming Language :: Python :: 3.7
39 | Programming Language :: Python :: 3.8
40 | Programming Language :: Python :: 3.9
41 | Programming Language :: Python :: 3.10
42 | Programming Language :: Python :: 3.11
43 | Programming Language :: Python :: 3.12
44 | Programming Language :: Python :: 3.13
45 | Programming Language :: Python :: 3 :: Only
46 | Programming Language :: Python :: Implementation :: CPython
47 | Topic :: Text Processing :: General
48 |
49 | [options]
50 | zip_safe = False
51 |
52 | python_requires = ~= 3.7
53 | setup_requires =
54 | Cython
55 | setuptools
56 |
57 | include_package_data = True
58 | packages = pyjson5
59 | package_dir =
60 | = src
61 |
62 | [options.package_data]
63 | pyjson5 = __init__.pyi, py.typed
64 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | from setuptools import setup, Extension
4 |
5 | extra_compile_args = [
6 | "-std=c++11",
7 | "-O3",
8 | "-fPIC",
9 | "-g0",
10 | "-pipe",
11 | "-fomit-frame-pointer",
12 | ]
13 |
14 | setup(
15 | ext_modules=[
16 | Extension(
17 | "pyjson5.pyjson5",
18 | sources=["pyjson5.pyx"],
19 | include_dirs=["src"],
20 | extra_compile_args=extra_compile_args,
21 | extra_link_args=extra_compile_args,
22 | language="c++",
23 | )
24 | ],
25 | )
26 |
--------------------------------------------------------------------------------
/src/DESCRIPTION.inc:
--------------------------------------------------------------------------------
1 | """\
2 | PyJSON5\n\
3 | =======\n\
4 | \n\
5 | A `JSON5 `_ serializer and parser library for Python 3 written in Cython.\n\
6 | \n\
7 | The serializer returns ASCII data that can safely be used in an HTML template.\n\
8 | Apostrophes, ampersands, greater-than, and less-then signs are encoded as\n\
9 | unicode escaped sequences. E.g. this snippet is safe for any and all input:\n\
10 | \n\
11 | .. code:: python\n\
12 | \n\
13 | \"show message\"\n\
14 | \n\
15 | Unless the input contains infinite or NaN values, the result will be valid\n\
16 | JSON data.\n\
17 | \n\
18 | All valid JSON5 1.0.0 and `JSON `_ data can be read,\n\
19 | unless the nesting level is absurdly high.\n\
20 | """
21 |
--------------------------------------------------------------------------------
/src/VERSION.inc:
--------------------------------------------------------------------------------
1 | "1.6.9"
2 |
--------------------------------------------------------------------------------
/src/_constants.pyx:
--------------------------------------------------------------------------------
1 | cdef object CONST_POS_NAN = float('+NaN')
2 | cdef object CONST_POS_INF = float('+Infinity')
3 | cdef object CONST_NEG_NAN = float('-NaN')
4 | cdef object CONST_NEG_INF = float('-Infinity')
5 |
6 | cdef object DATETIME_CLASSES = (date, time,) # issubclass(datetime, date) == True
7 | cdef object ORD_CLASSES = (unicode, bytes, bytearray,)
8 |
9 | cdef object UCS1_COMPATIBLE_CODECS = frozenset((
10 | # ASCII
11 | 'ascii', 646, '646', 'us-ascii',
12 | # Latin-1
13 | 'latin_1', 'latin-1', 'iso-8859-1', 'iso8859-1',
14 | 8859, '8859', 'cp819', 'latin', 'latin1', 'l1',
15 | ))
16 |
17 | cdef object TEST_DECIMAL = Decimal('47.11')
18 | cdef object TEST_FLOAT = 47.11
19 | cdef object TEST_INT = 4711
20 |
--------------------------------------------------------------------------------
/src/_decoder.pyx:
--------------------------------------------------------------------------------
1 | cdef enum:
2 | NO_EXTRA_DATA = 0x0011_0000
3 |
4 |
5 | cdef boolean _skip_single_line(ReaderRef reader) except False:
6 | cdef uint32_t c0
7 | while _reader_good(reader):
8 | c0 = _reader_get(reader)
9 | if _is_line_terminator(c0):
10 | break
11 |
12 | return True
13 |
14 |
15 | cdef boolean _skip_multiline_comment(ReaderRef reader) except False:
16 | cdef uint32_t c0
17 | cdef boolean seen_asterisk = False
18 | cdef Py_ssize_t comment_start = _reader_tell(reader)
19 |
20 | while True:
21 | if expect(not _reader_good(reader), False):
22 | break
23 |
24 | c0 = _reader_get(reader)
25 | if c0 == b'*':
26 | seen_asterisk = True
27 | elif seen_asterisk:
28 | if c0 == b'/':
29 | return True
30 | seen_asterisk = False
31 |
32 | _raise_unclosed(b'comment', comment_start)
33 | return False
34 |
35 |
36 | # data found
37 | # -1: exhausted
38 | # -2: exception
39 | cdef int32_t _skip_to_data_sub(ReaderRef reader, uint32_t c0) except -2:
40 | cdef int32_t c1 = 0 # silence warning
41 | cdef boolean seen_slash
42 |
43 | seen_slash = False
44 | while True:
45 | if c0 == b'/':
46 | if seen_slash:
47 | _skip_single_line(reader)
48 | seen_slash = False
49 | else:
50 | seen_slash = True
51 | elif c0 == b'*':
52 | if expect(not seen_slash, False):
53 | _raise_stray_character('asterisk', _reader_tell(reader))
54 |
55 | _skip_multiline_comment(reader)
56 | seen_slash = False
57 | elif not _is_ws_zs(c0):
58 | c1 = cast_to_int32(c0)
59 | break
60 | elif expect(seen_slash, False):
61 | _raise_stray_character('slash', _reader_tell(reader))
62 |
63 | if not _reader_good(reader):
64 | c1 = -1
65 | break
66 |
67 | c0 = _reader_get(reader)
68 |
69 | if expect(seen_slash, False):
70 | _raise_stray_character('slash', _reader_tell(reader))
71 |
72 | return c1
73 |
74 |
75 | # data found
76 | # -1 exhausted
77 | # -2 exception
78 | cdef int32_t _skip_to_data(ReaderRef reader) except -2:
79 | cdef uint32_t c0
80 | cdef int32_t c1
81 | if _reader_good(reader):
82 | c0 = _reader_get(reader)
83 | c1 = _skip_to_data_sub(reader, c0)
84 | else:
85 | c1 = -1
86 | return c1
87 |
88 |
89 | cdef int32_t _get_hex_character(ReaderRef reader, Py_ssize_t length) except -1:
90 | cdef Py_ssize_t start
91 | cdef uint32_t c0
92 | cdef uint32_t result
93 | cdef Py_ssize_t index
94 |
95 | start = _reader_tell(reader)
96 | result = 0
97 | for index in range(length):
98 | result <<= 4
99 | if expect(not _reader_good(reader), False):
100 | _raise_unclosed(b'escape sequence', start)
101 |
102 | c0 = _reader_get(reader)
103 | if b'0' <= c0 <= b'9':
104 | result |= c0 - b'0'
105 | elif b'a' <= c0 <= b'f':
106 | result |= c0 - b'a' + 10
107 | elif b'A' <= c0 <= b'F':
108 | result |= c0 - b'A' + 10
109 | else:
110 | _raise_expected_s('hexadecimal character', start, c0)
111 |
112 | if expect(result > 0x10ffff, False):
113 | _raise_expected_s('Unicode code point', start, result)
114 |
115 | return cast_to_int32(result)
116 |
117 |
118 | # >= 0: character to append
119 | cdef int32_t _get_escaped_unicode_maybe_surrogate(ReaderRef reader, Py_ssize_t start) except -1:
120 | cdef uint32_t c0
121 | cdef uint32_t c1
122 |
123 | c0 = cast_to_uint32(_get_hex_character(reader, 4))
124 | if expect(unicode_is_lo_surrogate(c0), False):
125 | _raise_expected_s('high surrogate before low surrogate', start, c0)
126 | elif not unicode_is_hi_surrogate(c0):
127 | return c0
128 |
129 | _accept_string(reader, b'\\u')
130 |
131 | c1 = cast_to_uint32(_get_hex_character(reader, 4))
132 | if expect(not unicode_is_lo_surrogate(c1), False):
133 | _raise_expected_s('low surrogate', start, c1)
134 |
135 | return unicode_join_surrogates(c0, c1)
136 |
137 |
138 | # >= 0: character to append
139 | # -1: skip
140 | # < -1: -(next character + 1)
141 | cdef int32_t _get_escape_sequence(ReaderRef reader,
142 | Py_ssize_t start) except 0x7ffffff:
143 | cdef uint32_t c0
144 |
145 | c0 = _reader_get(reader)
146 | if expect(not _reader_good(reader), False):
147 | _raise_unclosed(b'string', start)
148 |
149 | if c0 == b'b':
150 | return 0x0008
151 | elif c0 == b'f':
152 | return 0x000c
153 | elif c0 == b'n':
154 | return 0x000a
155 | elif c0 == b'r':
156 | return 0x000d
157 | elif c0 == b't':
158 | return 0x0009
159 | elif c0 == b'v':
160 | return 0x000b
161 | elif c0 == b'0':
162 | return 0x0000
163 | elif c0 == b'x':
164 | return _get_hex_character(reader, 2)
165 | elif c0 == b'u':
166 | return _get_escaped_unicode_maybe_surrogate(reader, start)
167 | elif c0 == b'U':
168 | return _get_hex_character(reader, 8)
169 | elif expect(b'1' <= c0 <= b'9', False):
170 | _raise_expected_s('escape sequence', start, c0)
171 | return -2
172 | elif _is_line_terminator(c0):
173 | if c0 != 0x000D:
174 | return -1
175 |
176 | c0 = _reader_get(reader)
177 | if c0 == 0x000A:
178 | return -1
179 |
180 | return -cast_to_int32(c0 + 1)
181 | else:
182 | return cast_to_int32(c0)
183 |
184 |
185 | cdef object _decode_string_sub(ReaderRef reader, uint32_t delim,
186 | Py_ssize_t start, uint32_t c0):
187 | cdef int32_t c1
188 | cdef StackHeapString[uint32_t] buf
189 |
190 | while True:
191 | if expect(c0 == delim, False):
192 | break
193 |
194 | if expect(not _reader_good(reader), False):
195 | _raise_unclosed(b'string', start)
196 |
197 | if expect(c0 != b'\\', True):
198 | if expect(c0 in (0xA, 0xD), False):
199 | _raise_unclosed(b'string', start)
200 |
201 | buf.push_back(c0)
202 | c0 = _reader_get(reader)
203 | continue
204 |
205 | c1 = _get_escape_sequence(reader, start)
206 | if c1 >= -1:
207 | if expect(not _reader_good(reader), False):
208 | _raise_unclosed(b'string', start)
209 |
210 | if c1 >= 0:
211 | c0 = cast_to_uint32(c1)
212 | buf.push_back(c0)
213 |
214 | c0 = _reader_get(reader)
215 | else:
216 | c0 = cast_to_uint32(-(c1 + 1))
217 |
218 | return PyUnicode_FromKindAndData(
219 | PyUnicode_4BYTE_KIND, buf.data(), buf.size(),
220 | )
221 |
222 |
223 | cdef object _decode_string(ReaderRef reader, int32_t *c_in_out):
224 | cdef uint32_t delim
225 | cdef uint32_t c0
226 | cdef int32_t c1
227 | cdef Py_ssize_t start
228 | cdef object result
229 |
230 | c1 = c_in_out[0]
231 | delim = cast_to_uint32(c1)
232 | start = _reader_tell(reader)
233 |
234 | if expect(not _reader_good(reader), False):
235 | _raise_unclosed(b'string', start)
236 |
237 | c0 = _reader_get(reader)
238 | result = _decode_string_sub(reader, delim, start, c0)
239 |
240 | c_in_out[0] = NO_EXTRA_DATA
241 | return result
242 |
243 |
244 | cdef object _decode_double(StackHeapString[char] &buf, Py_ssize_t start):
245 | cdef double d0
246 | cdef const char *end_of_double
247 |
248 | d0 = 0.0 # silence warning
249 | end_of_double = parse_number(buf.data(), &d0)
250 | if end_of_double != NULL and end_of_double[0] == b'\0':
251 | return PyFloat_FromDouble(d0)
252 |
253 | _raise_unclosed('NumericLiteral', start)
254 |
255 |
256 | cdef object _decode_number_leading_zero(ReaderRef reader, StackHeapString[char] &buf,
257 | int32_t *c_in_out, Py_ssize_t start):
258 | cdef uint32_t c0
259 | cdef int32_t c1 = 0 # silence warning
260 |
261 | if not _reader_good(reader):
262 | c_in_out[0] = -1
263 | return 0
264 |
265 | c0 = _reader_get(reader)
266 | if _is_x(c0):
267 | while True:
268 | if not _reader_good(reader):
269 | c1 = -1
270 | break
271 |
272 | c0 = _reader_get(reader)
273 | if _is_hexadecimal(c0):
274 | buf.push_back( c0)
275 | elif c0 != b'_':
276 | c1 = cast_to_int32(c0)
277 | break
278 |
279 | c_in_out[0] = c1
280 |
281 | buf.push_back(b'\0')
282 | try:
283 | return PyLong_FromString(buf.data(), NULL, 16)
284 | except Exception:
285 | _raise_unclosed('NumericLiteral', start)
286 | elif c0 == b'.':
287 | buf.push_back(b'0')
288 | buf.push_back(b'.')
289 |
290 | while True:
291 | if not _reader_good(reader):
292 | c1 = -1
293 | break
294 |
295 | c0 = _reader_get(reader)
296 | if _is_in_float_representation(c0):
297 | buf.push_back( c0)
298 | elif c0 != b'_':
299 | c1 = cast_to_int32(c0)
300 | break
301 |
302 | c_in_out[0] = c1
303 |
304 | if buf.data()[buf.size() - 1] == b'.':
305 | ( buf.data())[buf.size() - 1] = b'\0'
306 | else:
307 | buf.push_back(b'\0')
308 |
309 | return _decode_double(buf, start)
310 | elif _is_e(c0):
311 | while True:
312 | if not _reader_good(reader):
313 | c1 = -1
314 | break
315 |
316 | c0 = _reader_get(reader)
317 | if _is_in_float_representation(c0):
318 | pass
319 | elif c0 == b'_':
320 | pass
321 | else:
322 | c1 = cast_to_int32(c0)
323 | break
324 |
325 | c_in_out[0] = c1
326 | return 0.0
327 | else:
328 | c1 = cast_to_int32(c0)
329 | c_in_out[0] = c1
330 | return 0
331 |
332 |
333 | cdef object _decode_number_any(ReaderRef reader, StackHeapString[char] &buf,
334 | int32_t *c_in_out, Py_ssize_t start):
335 | cdef uint32_t c0
336 | cdef int32_t c1
337 | cdef boolean is_float = False
338 | cdef boolean was_point = False
339 | cdef boolean leading_point = False
340 |
341 | c1 = c_in_out[0]
342 | c0 = cast_to_uint32(c1)
343 |
344 | if c0 == b'.':
345 | buf.push_back(b'0')
346 | is_float = True
347 | leading_point = True
348 |
349 | while True:
350 | if _is_decimal(c0):
351 | pass
352 | elif _is_in_float_representation(c0):
353 | is_float = True
354 | elif c0 != b'_':
355 | c1 = cast_to_int32(c0)
356 | break
357 |
358 | if c0 == b'_':
359 | pass
360 | elif c0 != b'.':
361 | if was_point:
362 | was_point = False
363 | if not _is_e(c0):
364 | buf.push_back(b'.')
365 | buf.push_back( c0)
366 | elif not was_point:
367 | was_point = True
368 | else:
369 | _raise_unclosed('NumericLiteral', start)
370 |
371 | if not _reader_good(reader):
372 | c1 = -1
373 | break
374 |
375 | c0 = _reader_get(reader)
376 |
377 | c_in_out[0] = c1
378 |
379 | if leading_point and buf.size() == 1: # single '.'
380 | _raise_unclosed('NumericLiteral', start)
381 |
382 | buf.push_back(b'\0')
383 |
384 | if not is_float:
385 | try:
386 | return PyLong_FromString(buf.data(), NULL, 10)
387 | except Exception:
388 | pass
389 | _raise_unclosed('NumericLiteral', start)
390 | else:
391 | return _decode_double(buf, start)
392 |
393 |
394 | cdef object _decode_number(ReaderRef reader, int32_t *c_in_out):
395 | cdef uint32_t c0
396 | cdef int32_t c1
397 | cdef Py_ssize_t start = _reader_tell(reader)
398 | cdef StackHeapString[char] buf
399 |
400 | c1 = c_in_out[0]
401 | c0 = cast_to_uint32(c1)
402 |
403 | if c0 == b'+':
404 | if expect(not _reader_good(reader), False):
405 | _raise_unclosed(b'number', start)
406 |
407 | c0 = _reader_get(reader)
408 | if c0 == b'I':
409 | _accept_string(reader, b'nfinity')
410 | c_in_out[0] = NO_EXTRA_DATA
411 | return CONST_POS_INF
412 | elif c0 == b'N':
413 | _accept_string(reader, b'aN')
414 | c_in_out[0] = NO_EXTRA_DATA
415 | return CONST_POS_NAN
416 | elif c0 == b'-':
417 | if expect(not _reader_good(reader), False):
418 | _raise_unclosed(b'number', start)
419 |
420 | c0 = _reader_get(reader)
421 | if c0 == b'I':
422 | _accept_string(reader, b'nfinity')
423 | c_in_out[0] = NO_EXTRA_DATA
424 | return CONST_NEG_INF
425 | elif c0 == b'N':
426 | _accept_string(reader, b'aN')
427 | c_in_out[0] = NO_EXTRA_DATA
428 | return CONST_NEG_NAN
429 |
430 | buf.push_back(b'-')
431 |
432 | if c0 == b'0':
433 | return _decode_number_leading_zero(reader, buf, c_in_out, start)
434 | else:
435 | c1 = cast_to_int32(c0)
436 | c_in_out[0] = c1
437 | return _decode_number_any(reader, buf, c_in_out, start)
438 |
439 |
440 | # 1: done
441 | # 0: data found
442 | # -1: exception (exhausted)
443 | cdef uint32_t _skip_comma(ReaderRef reader, Py_ssize_t start,
444 | uint32_t terminator, const char *what,
445 | int32_t *c_in_out) except -1:
446 | cdef int32_t c0
447 | cdef uint32_t c1
448 | cdef boolean needs_comma
449 | cdef uint32_t done
450 |
451 | c0 = c_in_out[0]
452 | c1 = cast_to_uint32(c0)
453 |
454 | needs_comma = True
455 | while True:
456 | c0 = _skip_to_data_sub(reader, c1)
457 | if c0 < 0:
458 | break
459 |
460 | c1 = cast_to_uint32(c0)
461 | if c1 == terminator:
462 | c_in_out[0] = NO_EXTRA_DATA
463 | return 1
464 |
465 | if c1 != b',':
466 | if expect(needs_comma, False):
467 | _raise_expected_sc(
468 | 'comma', terminator, _reader_tell(reader), c1,
469 | )
470 | c_in_out[0] = c0
471 | return 0
472 |
473 | if expect(not needs_comma, False):
474 | _raise_stray_character('comma', _reader_tell(reader))
475 |
476 | if expect(not _reader_good(reader), False):
477 | break
478 |
479 | c1 = _reader_get(reader)
480 | needs_comma = False
481 |
482 | _raise_unclosed(what, start)
483 | return -1
484 |
485 |
486 | cdef unicode _decode_identifier_name(ReaderRef reader, int32_t *c_in_out):
487 | cdef int32_t c0
488 | cdef uint32_t c1
489 | cdef Py_ssize_t start
490 | cdef StackHeapString[uint32_t] buf
491 |
492 | start = _reader_tell(reader)
493 |
494 | c0 = c_in_out[0]
495 | c1 = cast_to_uint32(c0)
496 | if expect(not _is_identifier_start(c1), False):
497 | _raise_expected_s('IdentifierStart', _reader_tell(reader), c1)
498 |
499 | while True:
500 | if expect(c1 == b'\\', False):
501 | if not _reader_good(reader):
502 | _raise_unclosed('IdentifierName', start)
503 | break
504 |
505 | c1 = _reader_get(reader)
506 | if c1 == b'u':
507 | c1 = cast_to_uint32(_get_escaped_unicode_maybe_surrogate(reader, _reader_tell(reader)))
508 | elif c1 == b'U':
509 | c1 = cast_to_uint32(_get_hex_character(reader, 8))
510 | else:
511 | _raise_expected_s('UnicodeEscapeSequence', _reader_tell(reader), c1)
512 |
513 | buf.push_back(c1)
514 |
515 | if not _reader_good(reader):
516 | c0 = -1
517 | break
518 |
519 | c1 = _reader_get(reader)
520 | if not _is_identifier_part(c1):
521 | c0 = cast_to_int32(c1)
522 | break
523 |
524 | c_in_out[0] = c0
525 | return PyUnicode_FromKindAndData(
526 | PyUnicode_4BYTE_KIND, buf.data(), buf.size(),
527 | )
528 |
529 |
530 | cdef boolean _decode_object(ReaderRef reader, object result) except False:
531 | cdef int32_t c0
532 | cdef uint32_t c1
533 | cdef Py_ssize_t start
534 | cdef boolean done
535 | cdef object key
536 | cdef object value
537 | cdef object ex
538 |
539 | start = _reader_tell(reader)
540 |
541 | c0 = _skip_to_data(reader)
542 | if expect(c0 >= 0, True):
543 | c1 = cast_to_uint32(c0)
544 | if c1 == b'}':
545 | return True
546 |
547 | while True:
548 | if c1 in b'"\'':
549 | key = _decode_string(reader, &c0)
550 | else:
551 | key = _decode_identifier_name(reader, &c0)
552 | if expect(c0 < 0, False):
553 | break
554 |
555 | c1 = cast_to_uint32(c0)
556 | c0 = _skip_to_data_sub(reader, c1)
557 | if expect(c0 < 0, False):
558 | break
559 |
560 | c1 = cast_to_uint32(c0)
561 | if expect(c1 != b':', False):
562 | _raise_expected_s('colon', _reader_tell(reader), c1)
563 |
564 | if expect(not _reader_good(reader), False):
565 | break
566 |
567 | c0 = _skip_to_data(reader)
568 | if expect(c0 < 0, False):
569 | break
570 |
571 | try:
572 | value = _decode_recursive(reader, &c0)
573 | except _DecoderException as ex:
574 | PyDict_SetItem(result, key, (<_DecoderException> ex).result)
575 | raise
576 |
577 | if expect(c0 < 0, False):
578 | break
579 |
580 | PyDict_SetItem(result, key, value)
581 |
582 | done = _skip_comma(
583 | reader, start, b'}', b'object', &c0,
584 | )
585 | if done:
586 | return True
587 |
588 | c1 = cast_to_uint32(c0)
589 |
590 | _raise_unclosed(b'object', start)
591 | return False
592 |
593 |
594 | cdef boolean _decode_array(ReaderRef reader, object result) except False:
595 | cdef int32_t c0
596 | cdef uint32_t c1
597 | cdef Py_ssize_t start
598 | cdef boolean done
599 | cdef object value
600 | cdef object ex
601 |
602 | start = _reader_tell(reader)
603 |
604 | c0 = _skip_to_data(reader)
605 | if expect(c0 >= 0, True):
606 | c1 = cast_to_uint32(c0)
607 | if c1 == b']':
608 | return True
609 |
610 | while True:
611 | try:
612 | value = _decode_recursive(reader, &c0)
613 | except _DecoderException as ex:
614 | PyList_Append(result, (<_DecoderException> ex).result)
615 | raise
616 |
617 | if expect(c0 < 0, False):
618 | break
619 |
620 | PyList_Append(result, value)
621 |
622 | done = _skip_comma(
623 | reader, start, b']', b'array', &c0,
624 | )
625 | if done:
626 | return True
627 |
628 | _raise_unclosed(b'array', start)
629 |
630 |
631 | cdef boolean _accept_string(ReaderRef reader, const char *string) except False:
632 | cdef uint32_t c0
633 | cdef uint32_t c1
634 | cdef Py_ssize_t start
635 |
636 | start = _reader_tell(reader)
637 | while True:
638 | c0 = string[0]
639 | string += 1
640 | if not c0:
641 | break
642 |
643 | if expect(not _reader_good(reader), False):
644 | _raise_unclosed(b'literal', start)
645 |
646 | c1 = _reader_get(reader)
647 | if expect(c0 != c1, False):
648 | _raise_expected_c(c0, start, c1)
649 |
650 | return True
651 |
652 |
653 | cdef object _decode_null(ReaderRef reader, int32_t *c_in_out):
654 | # n
655 | _accept_string(reader, b'ull')
656 | c_in_out[0] = NO_EXTRA_DATA
657 | return None
658 |
659 |
660 | cdef object _decode_true(ReaderRef reader, int32_t *c_in_out):
661 | # t
662 | _accept_string(reader, b'rue')
663 | c_in_out[0] = NO_EXTRA_DATA
664 | return True
665 |
666 |
667 | cdef object _decode_false(ReaderRef reader, int32_t *c_in_out):
668 | # f
669 | _accept_string(reader, b'alse')
670 | c_in_out[0] = NO_EXTRA_DATA
671 | return False
672 |
673 |
674 | cdef object _decode_inf(ReaderRef reader, int32_t *c_in_out):
675 | # I
676 | _accept_string(reader, b'nfinity')
677 | c_in_out[0] = NO_EXTRA_DATA
678 | return CONST_POS_INF
679 |
680 |
681 | cdef object _decode_nan(ReaderRef reader, int32_t *c_in_out):
682 | # N
683 | _accept_string(reader, b'aN')
684 | c_in_out[0] = NO_EXTRA_DATA
685 | return CONST_POS_NAN
686 |
687 |
688 | cdef object _decode_recursive_enter(ReaderRef reader, int32_t *c_in_out):
689 | cdef boolean (*fn)(ReaderRef reader, object result) except False
690 | cdef object result
691 | cdef int32_t c0
692 | cdef uint32_t c1
693 | cdef object ex
694 |
695 | c0 = c_in_out[0]
696 | c1 = cast_to_uint32(c0)
697 |
698 | if c1 == b'{':
699 | result = {}
700 | fn = _decode_object
701 | else:
702 | result = []
703 | fn = _decode_array
704 |
705 | _reader_enter(reader)
706 | try:
707 | fn(reader, result)
708 | except RecursionError:
709 | _raise_nesting(_reader_tell(reader), result)
710 | except _DecoderException as ex:
711 | (<_DecoderException> ex).result = result
712 | raise
713 | finally:
714 | _reader_leave(reader)
715 |
716 | c_in_out[0] = NO_EXTRA_DATA
717 | return result
718 |
719 |
720 | cdef object _decoder_unknown(ReaderRef reader, int32_t *c_in_out):
721 | cdef int32_t c0
722 | cdef uint32_t c1
723 | cdef Py_ssize_t start
724 |
725 | c0 = c_in_out[0]
726 | c1 = cast_to_uint32(c0)
727 | start = _reader_tell(reader)
728 |
729 | _raise_expected_s('JSON5Value', start, c1)
730 |
731 |
732 | cdef object _decode_recursive(ReaderRef reader, int32_t *c_in_out):
733 | cdef int32_t c0
734 | cdef uint32_t c1
735 | cdef Py_ssize_t start
736 | cdef DrsKind kind
737 | cdef object (*decoder)(ReaderRef, int32_t*)
738 |
739 | c0 = c_in_out[0]
740 | c1 = cast_to_uint32(c0)
741 | if c1 >= 128:
742 | start = _reader_tell(reader)
743 | _raise_expected_s('JSON5Value', start, c1)
744 |
745 | kind = drs_lookup[c1]
746 | if kind == DRS_fail:
747 | decoder = _decoder_unknown
748 | elif kind == DRS_null:
749 | decoder = _decode_null
750 | elif kind == DRS_true:
751 | decoder = _decode_true
752 | elif kind == DRS_false:
753 | decoder = _decode_false
754 | elif kind == DRS_inf:
755 | decoder = _decode_inf
756 | elif kind == DRS_nan:
757 | decoder = _decode_nan
758 | elif kind == DRS_string:
759 | decoder = _decode_string
760 | elif kind == DRS_number:
761 | decoder = _decode_number
762 | elif kind == DRS_recursive:
763 | decoder = _decode_recursive_enter
764 | else:
765 | unreachable()
766 | decoder = _decoder_unknown
767 |
768 | return decoder(reader, c_in_out)
769 |
770 |
771 | cdef object _decode_all_sub(ReaderRef reader, boolean some):
772 | cdef Py_ssize_t start
773 | cdef int32_t c0
774 | cdef uint32_t c1
775 | cdef object result
776 | cdef object ex
777 |
778 | start = _reader_tell(reader)
779 | c0 = _skip_to_data(reader)
780 | if expect(c0 < 0, False):
781 | _raise_no_data(start)
782 |
783 | result = _decode_recursive(reader, &c0)
784 | try:
785 | if c0 < 0:
786 | pass
787 | elif not some:
788 | start = _reader_tell(reader)
789 | c1 = cast_to_uint32(c0)
790 | c0 = _skip_to_data_sub(reader, c1)
791 | if expect(c0 >= 0, False):
792 | c1 = cast_to_uint32(c0)
793 | _raise_extra_data(c1, start)
794 | elif expect(not _is_ws_zs(c0), False):
795 | start = _reader_tell(reader)
796 | c1 = cast_to_uint32(c0)
797 | _raise_unframed_data(c1, start)
798 | except _DecoderException as ex:
799 | (<_DecoderException> ex).result = result
800 | raise
801 |
802 | return result
803 |
804 |
805 | cdef object _decode_all(ReaderRef reader, boolean some):
806 | cdef object ex, ex2
807 | try:
808 | return _decode_all_sub(reader, some)
809 | except _DecoderException as ex:
810 | ex2 = (<_DecoderException> ex).cls(
811 | (<_DecoderException> ex).msg,
812 | (<_DecoderException> ex).result,
813 | (<_DecoderException> ex).extra,
814 | )
815 | raise ex2
816 |
817 |
818 | cdef object _decode_ucs1(const void *string, Py_ssize_t length,
819 | Py_ssize_t maxdepth, boolean some):
820 | cdef ReaderUCS1 reader = ReaderUCS1(
821 | ReaderUCS(length, 0, maxdepth),
822 | string,
823 | )
824 | return _decode_all(reader, some)
825 |
826 |
827 | cdef object _decode_ucs2(const void *string, Py_ssize_t length,
828 | Py_ssize_t maxdepth, boolean some):
829 | cdef ReaderUCS2 reader = ReaderUCS2(
830 | ReaderUCS(length, 0, maxdepth),
831 | string,
832 | )
833 | return _decode_all(reader, some)
834 |
835 |
836 | cdef object _decode_ucs4(const void *string, Py_ssize_t length,
837 | Py_ssize_t maxdepth, boolean some):
838 | cdef ReaderUCS4 reader = ReaderUCS4(
839 | ReaderUCS(length, 0, maxdepth),
840 | string,
841 | )
842 | return _decode_all(reader, some)
843 |
844 |
845 | cdef object _decode_utf8(const void *string, Py_ssize_t length,
846 | Py_ssize_t maxdepth, boolean some):
847 | cdef ReaderUTF8 reader = ReaderUTF8(
848 | ReaderUCS(length, 0, maxdepth),
849 | string,
850 | )
851 | return _decode_all(reader, some)
852 |
853 |
854 | cdef object _decode_unicode(object data, Py_ssize_t maxdepth, boolean some):
855 | cdef Py_ssize_t length
856 | cdef int kind
857 | cdef const char *s
858 |
859 | PyUnicode_READY(data)
860 |
861 | if CYTHON_COMPILING_IN_PYPY:
862 | length = 0
863 | s = PyUnicode_AsUTF8AndSize(data, &length)
864 | return _decode_utf8(s, length, maxdepth, some)
865 |
866 | length = PyUnicode_GET_LENGTH(data)
867 | kind = PyUnicode_KIND(data)
868 |
869 | if kind == PyUnicode_1BYTE_KIND:
870 | return _decode_ucs1(PyUnicode_1BYTE_DATA(data), length, maxdepth, some)
871 | elif kind == PyUnicode_2BYTE_KIND:
872 | return _decode_ucs2(PyUnicode_2BYTE_DATA(data), length, maxdepth, some)
873 | elif kind == PyUnicode_4BYTE_KIND:
874 | return _decode_ucs4(PyUnicode_4BYTE_DATA(data), length, maxdepth, some)
875 | else:
876 | unreachable()
877 |
878 |
879 | cdef object _decode_buffer(Py_buffer &view, int32_t wordlength,
880 | Py_ssize_t maxdepth, boolean some):
881 | cdef object (*decoder)(const void*, Py_ssize_t, Py_ssize_t, boolean)
882 | cdef Py_ssize_t length = 0
883 |
884 | if wordlength == 0:
885 | decoder = _decode_utf8
886 | length = view.len // 1
887 | elif wordlength == 1:
888 | decoder = _decode_ucs1
889 | length = view.len // 1
890 | elif wordlength == 2:
891 | decoder = _decode_ucs2
892 | length = view.len // 2
893 | elif wordlength == 4:
894 | decoder = _decode_ucs4
895 | length = view.len // 4
896 | else:
897 | _raise_illegal_wordlength(wordlength)
898 | unreachable()
899 | length = 0
900 | decoder = NULL
901 |
902 | return decoder(view.buf, length, maxdepth, some)
903 |
904 |
905 | cdef object _decode_callback(object cb, object args, Py_ssize_t maxdepth,
906 | boolean some):
907 | cdef ReaderCallback reader = ReaderCallback(
908 | ReaderCallbackBase(0, maxdepth),
909 | cb,
910 | args,
911 | -1,
912 | )
913 | return _decode_all(reader, some)
914 |
--------------------------------------------------------------------------------
/src/_decoder_recursive_select.hpp:
--------------------------------------------------------------------------------
1 | #ifndef JSON5EncoderCpp_decoder_recursive_select
2 | #define JSON5EncoderCpp_decoder_recursive_select
3 |
4 | // GENERATED FILE
5 | // All changes will be lost.
6 |
7 | #include
8 |
9 | namespace JSON5EncoderCpp {
10 | inline namespace {
11 |
12 | enum DrsKind : std::uint8_t {
13 | DRS_fail, DRS_null, DRS_true, DRS_false, DRS_inf, DRS_nan, DRS_string, DRS_number, DRS_recursive
14 | };
15 |
16 | static const DrsKind drs_lookup[128] = {
17 | DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_fail,
18 | DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_fail,
19 | DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_fail,
20 | DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_fail,
21 | DRS_fail, DRS_fail, DRS_string, DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_string,
22 | DRS_fail, DRS_fail, DRS_fail, DRS_number, DRS_fail, DRS_number, DRS_number, DRS_fail,
23 | DRS_number, DRS_number, DRS_number, DRS_number, DRS_number, DRS_number, DRS_number, DRS_number,
24 | DRS_number, DRS_number, DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_fail,
25 | DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_fail,
26 | DRS_fail, DRS_inf, DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_nan, DRS_fail,
27 | DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_fail,
28 | DRS_fail, DRS_fail, DRS_fail, DRS_recursive, DRS_fail, DRS_fail, DRS_fail, DRS_fail,
29 | DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_false, DRS_fail,
30 | DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_null, DRS_fail,
31 | DRS_fail, DRS_fail, DRS_fail, DRS_fail, DRS_true, DRS_fail, DRS_fail, DRS_fail,
32 | DRS_fail, DRS_fail, DRS_fail, DRS_recursive, DRS_fail, DRS_fail, DRS_fail, DRS_fail,
33 | };
34 |
35 | } // anonymous inline namespace
36 | } // namespace JSON5EncoderCpp
37 |
38 | #endif
39 |
--------------------------------------------------------------------------------
/src/_encoder.pyx:
--------------------------------------------------------------------------------
1 | cdef int _encode_unicode_impl(WriterRef writer, UCSString data, Py_ssize_t length) except -1:
2 | cdef char buf[32]
3 | cdef uint32_t c
4 | cdef uint32_t s1, s2
5 | cdef const char *escaped_string
6 | cdef Py_ssize_t escaped_length
7 | cdef size_t unescaped_length, index
8 | cdef Py_ssize_t sublength
9 |
10 | if length > 0:
11 | writer.reserve(writer, 2 + length)
12 | writer.append_c(writer, PyUnicode_1BYTE_DATA(( writer.options).quotationmark)[0])
13 | while True:
14 | if UCSString is UCS1String:
15 | sublength = length
16 | else:
17 | sublength = min(length, sizeof(buf))
18 |
19 | unescaped_length = ESCAPE_DCT.find_unescaped_range(data, sublength)
20 | if unescaped_length > 0:
21 | if UCSString is UCS1String:
22 | writer.append_s(writer, data, unescaped_length)
23 | else:
24 | for index in range(unescaped_length):
25 | buf[index] = data[index]
26 | writer.append_s(writer, buf, unescaped_length)
27 |
28 | data += unescaped_length
29 | length -= unescaped_length
30 | if length <= 0:
31 | break
32 |
33 | if UCSString is not UCS1String:
34 | continue
35 |
36 | c = data[0]
37 | if (UCSString is UCS1String) or (c < 0x100):
38 | escaped_length = ESCAPE_DCT.items[c][0]
39 | escaped_string = &ESCAPE_DCT.items[c][1]
40 | writer.append_s(writer, escaped_string, escaped_length)
41 | elif (UCSString is UCS2String) or (c <= 0xffff):
42 | buf[0] = b'\\';
43 | buf[1] = b'u';
44 | buf[2] = HEX[(c >> (4*3)) & 0xf];
45 | buf[3] = HEX[(c >> (4*2)) & 0xf];
46 | buf[4] = HEX[(c >> (4*1)) & 0xf];
47 | buf[5] = HEX[(c >> (4*0)) & 0xf];
48 | buf[6] = 0;
49 |
50 | writer.append_s(writer, buf, 6);
51 | else:
52 | # surrogate pair
53 | c -= 0x10000
54 | s1 = 0xd800 | ((c >> 10) & 0x3ff)
55 | s2 = 0xdc00 | (c & 0x3ff)
56 |
57 | buf[0x0] = b'\\';
58 | buf[0x1] = b'u';
59 | buf[0x2] = HEX[(s1 >> (4*3)) & 0xf];
60 | buf[0x3] = HEX[(s1 >> (4*2)) & 0xf];
61 | buf[0x4] = HEX[(s1 >> (4*1)) & 0xf];
62 | buf[0x5] = HEX[(s1 >> (4*0)) & 0xf];
63 |
64 | buf[0x6] = b'\\';
65 | buf[0x7] = b'u';
66 | buf[0x8] = HEX[(s2 >> (4*3)) & 0xf];
67 | buf[0x9] = HEX[(s2 >> (4*2)) & 0xf];
68 | buf[0xa] = HEX[(s2 >> (4*1)) & 0xf];
69 | buf[0xb] = HEX[(s2 >> (4*0)) & 0xf];
70 |
71 | buf[0xc] = 0;
72 |
73 | writer.append_s(writer, buf, 12);
74 |
75 | data += 1
76 | length -= 1
77 | if length <= 0:
78 | break
79 | writer.append_c(writer, PyUnicode_1BYTE_DATA(( writer.options).quotationmark)[0])
80 | else:
81 | writer.append_s(writer, b'""', 2)
82 |
83 | return True
84 |
85 |
86 | cdef int _encode_unicode(WriterRef writer, object data) except -1:
87 | cdef Py_ssize_t length
88 | cdef int kind
89 |
90 | PyUnicode_READY(data)
91 |
92 | length = PyUnicode_GET_LENGTH(data)
93 | kind = PyUnicode_KIND(data)
94 |
95 | if kind == PyUnicode_1BYTE_KIND:
96 | _encode_unicode_impl(writer, PyUnicode_1BYTE_DATA(data), length)
97 | elif kind == PyUnicode_2BYTE_KIND:
98 | _encode_unicode_impl(writer, PyUnicode_2BYTE_DATA(data), length)
99 | elif kind == PyUnicode_4BYTE_KIND:
100 | _encode_unicode_impl(writer, PyUnicode_4BYTE_DATA(data), length)
101 | else:
102 | pass # impossible
103 |
104 | return True
105 |
106 |
107 | cdef int _encode_nested_key(WriterRef writer, object data) except -1:
108 | cdef const char *string
109 | cdef char c
110 | cdef Py_ssize_t index, length
111 | cdef int result
112 |
113 | cdef WriterReallocatable sub_writer = WriterReallocatable(
114 | Writer(
115 | _WriterReallocatable_reserve,
116 | _WriterReallocatable_append_c,
117 | _WriterReallocatable_append_s,
118 | writer.options,
119 | ),
120 | 0, 0, NULL,
121 | )
122 | try:
123 | result = _encode(sub_writer.base, data)
124 | if expect(result < 0, False):
125 | return result
126 |
127 | length = sub_writer.position
128 | string = sub_writer.obj
129 |
130 | writer.reserve(writer, 2 + length)
131 | writer.append_c(writer, PyUnicode_1BYTE_DATA(( writer.options).quotationmark)[0])
132 | for index in range(length):
133 | c = string[index]
134 | if c in b'\\"':
135 | writer.append_c(writer, b'\\')
136 | writer.append_c(writer, c)
137 | writer.append_c(writer, PyUnicode_1BYTE_DATA(( writer.options).quotationmark)[0])
138 | finally:
139 | if sub_writer.obj is not NULL:
140 | ObjectFree(sub_writer.obj)
141 |
142 | return True
143 |
144 |
145 | cdef int _append_ascii(WriterRef writer, object data) except -1:
146 | cdef Py_buffer view
147 | cdef const char *buf
148 | cdef Py_ssize_t index
149 | cdef unsigned char c
150 |
151 | if PyUnicode_Check(data):
152 | PyUnicode_READY(data)
153 | if not PyUnicode_IS_ASCII(data):
154 | raise TypeError('Expected ASCII data')
155 | writer.append_s(writer, PyUnicode_1BYTE_DATA(data), PyUnicode_GET_LENGTH(data))
156 | else:
157 | PyObject_GetBuffer(data, &view, PyBUF_CONTIG_RO)
158 | try:
159 | buf = view.buf
160 | for index in range(view.len):
161 | c = buf[index]
162 | if c & ~0x7f:
163 | raise TypeError('Expected ASCII data')
164 |
165 | writer.append_s(writer, buf, view.len)
166 | finally:
167 | PyBuffer_Release(&view)
168 |
169 | return True
170 |
171 |
172 | cdef int _encode_tojson(WriterRef writer, object data) except -1:
173 | cdef object value = getattr(data, ( writer.options).tojson, None)
174 | if value is None:
175 | return False
176 |
177 | if callable(value):
178 | Py_EnterRecursiveCall(' while encoding nested JSON5 object')
179 | try:
180 | value = value()
181 | finally:
182 | Py_LeaveRecursiveCall()
183 |
184 | _append_ascii(writer, value)
185 | return True
186 |
187 |
188 | cdef int _encode_sequence(WriterRef writer, object data) except -1:
189 | cdef boolean first
190 | cdef object iterator
191 | cdef object value
192 | cdef int result
193 |
194 | try:
195 | iterator = PyObject_GetIter(data)
196 | except TypeError:
197 | return False
198 |
199 | Py_EnterRecursiveCall(' while encoding nested JSON5 object')
200 | try:
201 | writer.append_c(writer, b'[')
202 | first = True
203 | value = None
204 | while iter_next(iterator, & value):
205 | if not first:
206 | writer.append_c(writer, b',')
207 | else:
208 | first = False
209 |
210 | result = _encode(writer, value)
211 | if expect(result < 0, False):
212 | return result
213 | writer.append_c(writer, b']')
214 | finally:
215 | Py_LeaveRecursiveCall()
216 |
217 | return True
218 |
219 |
220 | cdef int _encode_mapping(WriterRef writer, object data) except -1:
221 | cdef boolean first
222 | cdef object iterator, key, value
223 | cdef int result
224 |
225 | if not isinstance(data, ( writer.options).mappingtypes):
226 | return False
227 |
228 | iterator = PyObject_GetIter(data)
229 |
230 | Py_EnterRecursiveCall(' while encoding nested JSON5 object')
231 | try:
232 | writer.append_c(writer, b'{')
233 | first = True
234 | key = None
235 | while iter_next(iterator, & key):
236 | if not first:
237 | writer.append_c(writer, b',')
238 | else:
239 | first = False
240 | value = data[key]
241 |
242 | if PyUnicode_Check(key):
243 | _encode_unicode(writer, key)
244 | else:
245 | _encode_nested_key(writer, key)
246 |
247 | writer.append_c(writer, b':')
248 |
249 | result = _encode(writer, value)
250 | if expect(result < 0, False):
251 | return result
252 | writer.append_c(writer, b'}')
253 | finally:
254 | Py_LeaveRecursiveCall()
255 |
256 | return True
257 |
258 |
259 | cdef int _encode_none(WriterRef writer, object data) except -1:
260 | writer.append_s(writer, b'null', 4)
261 | return True
262 |
263 |
264 | cdef int _encode_bytes(WriterRef writer, object data) except -1:
265 | _encode_unicode(writer, PyUnicode_FromEncodedObject(data, 'UTF-8', 'strict'))
266 | return True
267 |
268 |
269 | cdef int _encode_datetime(WriterRef writer, object data) except -1:
270 | cdef object stringified
271 | cdef Py_ssize_t length
272 | cdef const char *string
273 |
274 | if not isinstance(data, DATETIME_CLASSES):
275 | return False
276 |
277 | stringified = data.isoformat()
278 | length = 0
279 | string = PyUnicode_AsUTF8AndSize(stringified, &length)
280 |
281 | writer.reserve(writer, 2 + length)
282 | writer.append_c(writer, PyUnicode_1BYTE_DATA(( writer.options).quotationmark)[0])
283 | writer.append_s(writer, string, length)
284 | writer.append_c(writer, PyUnicode_1BYTE_DATA(( writer.options).quotationmark)[0])
285 |
286 | return True
287 |
288 |
289 | cdef int _encode_format_string(WriterRef writer, object data, object fmt) except -1:
290 | cdef object formatted
291 | cdef const char *string
292 | cdef Py_ssize_t length = 0 # silence warning
293 |
294 | formatted = PyUnicode_Format(fmt, data)
295 | string = PyUnicode_AsUTF8AndSize(formatted, &length)
296 | writer.append_s(writer, string, length)
297 |
298 | return True
299 |
300 |
301 | cdef int _encode_float(WriterRef writer, object data) except -1:
302 | cdef double value = PyFloat_AsDouble(data)
303 | cdef int classification = fpclassify(value)
304 | cdef char buf[64]
305 | cdef char *end
306 | cdef char *string
307 | cdef Py_ssize_t length
308 |
309 | if classification == FP_NORMAL:
310 | end = Dtoa(buf, PyFloat_AsDouble(data))
311 | length = end - buf
312 | string = buf
313 | elif classification in (FP_SUBNORMAL, FP_ZERO):
314 | string = b'0.0'
315 | length = 3
316 | elif classification == FP_NAN:
317 | string = b'NaN'
318 | length = 3
319 | else:
320 | # classification == FP_INFINITE
321 | if value > 0.0:
322 | string = b'Infinity'
323 | length = 8
324 | else:
325 | string = b'-Infinity'
326 | length = 9
327 |
328 | writer.append_s(writer, string, length)
329 | return True
330 |
331 |
332 | cdef int _encode_long(WriterRef writer, object data) except -1:
333 | if PyBool_Check(data):
334 | if data is True:
335 | writer.append_s(writer, 'true', 4)
336 | else:
337 | writer.append_s(writer, 'false', 5)
338 | else:
339 | _encode_format_string(writer, data, DEFAULT_INTFORMAT)
340 | return True
341 |
342 |
343 | cdef int _encode_decimal(WriterRef writer, object data) except -1:
344 | if not isinstance(data, Decimal):
345 | return False
346 |
347 | _encode_format_string(writer, data, DEFAULT_DECIMALFORMAT)
348 | return True
349 |
350 |
351 | cdef int _encode_unstringifiable(WriterRef writer, object data) except -1:
352 | if not data:
353 | writer.append_s(writer, b'none', 4)
354 | return True
355 |
356 | Py_EnterRecursiveCall(' while encoding JSON5 object with vars(obj) fallback')
357 | try:
358 | try:
359 | data = PyObject_GenericGetDict(data, NULL)
360 | except:
361 | pass
362 | else:
363 | if _encode_mapping(writer, data):
364 | return True
365 | finally:
366 | Py_LeaveRecursiveCall()
367 |
368 | _raise_unstringifiable(data)
369 |
370 |
371 | cdef int _encode_other(WriterRef writer, object data):
372 | cdef int result = 0
373 |
374 | while True:
375 | if ( writer.options).tojson is not None:
376 | result = ( _encode_tojson)(writer, data)
377 | if result != 0:
378 | break
379 |
380 | if obj_has_iter(data):
381 | result = ( _encode_mapping)(writer, data)
382 | if result != 0:
383 | break
384 |
385 | result = ( _encode_sequence)(writer, data)
386 | if result != 0:
387 | break
388 |
389 | result = ( _encode_decimal)(writer, data)
390 | if result != 0:
391 | break
392 |
393 | result = ( _encode_datetime)(writer, data)
394 | if result != 0:
395 | break
396 |
397 | result = ( _encode_unstringifiable)(writer, data)
398 | if result != 0:
399 | break
400 |
401 | break
402 |
403 | return result
404 |
405 |
406 | cdef int _encode(WriterRef writer, object data):
407 | cdef int (*encoder)(WriterRef, object)
408 |
409 | if data is None:
410 | encoder = _encode_none
411 | elif PyUnicode_Check(data):
412 | encoder = _encode_unicode
413 | elif PyLong_Check(data):
414 | encoder = _encode_long
415 | elif PyFloat_Check(data):
416 | encoder = _encode_float
417 | elif PyBytes_Check(data):
418 | encoder = _encode_bytes
419 | else:
420 | encoder = _encode_other
421 |
422 | return encoder(writer, data)
423 |
424 |
425 | cdef int _encode_callback_bytes(object data, object cb, object options) except -1:
426 | cdef WriterCallback writer = WriterCallback(
427 | Writer(
428 | _WriterNoop_reserve,
429 | _WriterCbBytes_append_c,
430 | _WriterCbBytes_append_s,
431 | options,
432 | ),
433 | cb,
434 | )
435 |
436 | if expect(not callable(cb), False):
437 | raise TypeError(f'type(cb)=={type(cb)!r} is not callable')
438 |
439 | return _encode(writer.base, data)
440 |
441 |
442 | cdef int _encode_callback_str(object data, object cb, object options) except -1:
443 | cdef WriterCallback writer = WriterCallback(
444 | Writer(
445 | _WriterNoop_reserve,
446 | _WriterCbStr_append_c,
447 | _WriterCbStr_append_s,
448 | options,
449 | ),
450 | cb,
451 | )
452 |
453 | if expect(not callable(cb), False):
454 | raise TypeError(f'type(cb)=={type(cb)!r} is not callable')
455 |
456 | return _encode(writer.base, data)
457 |
--------------------------------------------------------------------------------
/src/_encoder_options.pyx:
--------------------------------------------------------------------------------
1 | cdef object DEFAULT_TOJSON = False
2 | cdef object DEFAULT_INTFORMAT = '%d'
3 | cdef object DEFAULT_DECIMALFORMAT = '%s'
4 | cdef object DEFAULT_MAPPINGTYPES = (Mapping,)
5 | cdef object DEFAULT_QUOTATIONMARK = '"'
6 |
7 |
8 | cdef object _options_ascii(object datum, boolean expect_ascii=True):
9 | if datum is False:
10 | return None
11 | elif PyBytes_Check(datum):
12 | datum = unicode(datum, 'UTF-8', 'strict')
13 | elif not PyUnicode_Check(datum):
14 | raise TypeError('Expected str instance or False')
15 |
16 | PyUnicode_READY(datum)
17 | if expect_ascii and not PyUnicode_IS_ASCII(datum):
18 | raise ValueError('Expected ASCII data')
19 |
20 | return datum
21 |
22 |
23 | cdef object _options_reduce_arg(object key, object value, object default):
24 | if value != default:
25 | if value is not None:
26 | return key, value
27 | else:
28 | return key, False
29 |
30 |
31 | cdef object _option_from_ascii(object name, object value, object default):
32 | if value == default:
33 | return
34 | elif value is None:
35 | return f'{name}=False'
36 | else:
37 | return f'{name}={value!r}'
38 |
39 |
40 | cdef _options_from_ascii(Options self):
41 | return ', '.join(filter(bool, (
42 | _option_from_ascii('quotationmark', self.quotationmark, DEFAULT_QUOTATIONMARK),
43 | _option_from_ascii('tojson', self.tojson, None),
44 | )))
45 |
46 |
47 | @final
48 | @no_gc
49 | @freelist(8)
50 | @auto_pickle(False)
51 | cdef class Options:
52 | '''
53 | Customizations for the :func:`encoder_*(...) ` function family.
54 |
55 | Immutable. Use :meth:`Options.update(**kw) ` to create a **new** Options instance.
56 |
57 | Parameters
58 | ----------
59 | quotationmark : str|None
60 | * **str**: One character string that is used to surround strings.
61 | * **None**: Use default: ``'"'``.
62 | tojson : str|False|None
63 | * **str:** A special method to call on objects to return a custom JSON encoded string. Must return ASCII data!
64 | * **False:** No such member exists. (Default.)
65 | * **None:** Use default.
66 | mappingtypes : Iterable[type]|False|None
67 | * **Iterable[type]:** Classes that should be encoded to objects. Must be iterable over their keys, and implement ``__getitem__``.
68 | * **False:** There are no objects. Any object will be encoded as list of keys as in list(obj).
69 | * **None:** Use default: ``[collections.abc.Mapping]``.
70 | '''
71 | cdef readonly unicode quotationmark
72 | '''The creation argument ``quotationmark``.
73 | '''
74 | cdef readonly unicode tojson
75 | '''The creation argument ``tojson``.
76 | ``None`` if ``False`` was specified.
77 | '''
78 | cdef readonly tuple mappingtypes
79 | '''The creation argument ``mappingtypes``.
80 | ``()`` if ``False`` was specified.
81 | '''
82 |
83 | def __reduce__(self):
84 | cdef object args = tuple(filter(bool, (
85 | _options_reduce_arg('quotationmark', self.quotationmark, DEFAULT_QUOTATIONMARK),
86 | _options_reduce_arg('tojson', self.tojson, None),
87 | _options_reduce_arg('mappingtypes', self.mappingtypes, DEFAULT_MAPPINGTYPES),
88 | )))
89 | return (_UnpickleOptions if args else Options), args
90 |
91 | def __repr__(self):
92 | cdef object repr_options = _options_from_ascii(self)
93 | cdef object repr_cls = (
94 | ''
95 | if self.mappingtypes == DEFAULT_MAPPINGTYPES else
96 | repr(DEFAULT_MAPPINGTYPES)
97 | )
98 | return (f'Options('
99 | f'{repr_options}'
100 | f'{repr_options and repr_cls and ", "}'
101 | f'{repr_cls}'
102 | ')')
103 |
104 | def __str__(self):
105 | return self.__repr__()
106 |
107 | def __cinit__(self, *,
108 | quotationmark=None,
109 | tojson=None, posinfinity=None, neginfinity=None, nan=None,
110 | decimalformat=None, intformat=None,
111 | mappingtypes=None):
112 | cdef object cls
113 | cdef object ex
114 |
115 | if quotationmark is None:
116 | quotationmark = DEFAULT_QUOTATIONMARK
117 | if tojson is None:
118 | tojson = DEFAULT_TOJSON
119 | if mappingtypes is None:
120 | mappingtypes = DEFAULT_MAPPINGTYPES
121 |
122 | self.quotationmark = _options_ascii(quotationmark)
123 | self.tojson = _options_ascii(tojson, False)
124 |
125 | if self.quotationmark is None or PyUnicode_GET_LENGTH(self.quotationmark) != 1:
126 | raise TypeError('quotationmark must be one ASCII character.')
127 |
128 | if mappingtypes is False:
129 | self.mappingtypes = ()
130 | else:
131 | self.mappingtypes = tuple(mappingtypes)
132 | for cls in self.mappingtypes:
133 | if not PyType_Check(cls):
134 | raise TypeError('mappingtypes must be a sequence of types '
135 | 'or False')
136 |
137 | def update(self, *args, **kw):
138 | '''
139 | Creates a new Options instance by modifying some members.
140 | '''
141 | if kw:
142 | return _to_options(self, kw)
143 | else:
144 | return self
145 |
146 |
147 | cdef Options DEFAULT_OPTIONS_OBJECT = Options()
148 |
149 |
150 | def _UnpickleOptions(*args):
151 | if args:
152 | return _to_options(None, dict(args))
153 | else:
154 | return DEFAULT_OPTIONS_OBJECT
155 |
156 |
157 | cdef object _to_options(Options arg, dict kw):
158 | if arg is None:
159 | if not kw:
160 | return DEFAULT_OPTIONS_OBJECT
161 | else:
162 | return Options(**kw)
163 | elif not kw:
164 | return arg
165 |
166 | PyDict_SetDefault(kw, 'quotationmark', ( arg).quotationmark)
167 | PyDict_SetDefault(kw, 'tojson', ( arg).tojson)
168 | PyDict_SetDefault(kw, 'mappingtypes', ( arg).mappingtypes)
169 |
170 | return Options(**kw)
171 |
--------------------------------------------------------------------------------
/src/_escape_dct.hpp:
--------------------------------------------------------------------------------
1 | const EscapeDct::Items EscapeDct::items = {
2 | { 6, '\\', 'u', '0', '0', '0', '0', 0 }, /* 0x00 '\x00' */
3 | { 6, '\\', 'u', '0', '0', '0', '1', 0 }, /* 0x01 '\x01' */
4 | { 6, '\\', 'u', '0', '0', '0', '2', 0 }, /* 0x02 '\x02' */
5 | { 6, '\\', 'u', '0', '0', '0', '3', 0 }, /* 0x03 '\x03' */
6 | { 6, '\\', 'u', '0', '0', '0', '4', 0 }, /* 0x04 '\x04' */
7 | { 6, '\\', 'u', '0', '0', '0', '5', 0 }, /* 0x05 '\x05' */
8 | { 6, '\\', 'u', '0', '0', '0', '6', 0 }, /* 0x06 '\x06' */
9 | { 6, '\\', 'u', '0', '0', '0', '7', 0 }, /* 0x07 '\x07' */
10 | { 2, '\\', 'b', 0, 0, 0, 0, 0 }, /* 0x08 '\x08' */
11 | { 2, '\\', 't', 0, 0, 0, 0, 0 }, /* 0x09 '\t' */
12 | { 2, '\\', 'n', 0, 0, 0, 0, 0 }, /* 0x0a '\n' */
13 | { 6, '\\', 'u', '0', '0', '0', 'b', 0 }, /* 0x0b '\x0b' */
14 | { 2, '\\', 'f', 0, 0, 0, 0, 0 }, /* 0x0c '\x0c' */
15 | { 2, '\\', 'r', 0, 0, 0, 0, 0 }, /* 0x0d '\r' */
16 | { 6, '\\', 'u', '0', '0', '0', 'e', 0 }, /* 0x0e '\x0e' */
17 | { 6, '\\', 'u', '0', '0', '0', 'f', 0 }, /* 0x0f '\x0f' */
18 | { 6, '\\', 'u', '0', '0', '1', '0', 0 }, /* 0x10 '\x10' */
19 | { 6, '\\', 'u', '0', '0', '1', '1', 0 }, /* 0x11 '\x11' */
20 | { 6, '\\', 'u', '0', '0', '1', '2', 0 }, /* 0x12 '\x12' */
21 | { 6, '\\', 'u', '0', '0', '1', '3', 0 }, /* 0x13 '\x13' */
22 | { 6, '\\', 'u', '0', '0', '1', '4', 0 }, /* 0x14 '\x14' */
23 | { 6, '\\', 'u', '0', '0', '1', '5', 0 }, /* 0x15 '\x15' */
24 | { 6, '\\', 'u', '0', '0', '1', '6', 0 }, /* 0x16 '\x16' */
25 | { 6, '\\', 'u', '0', '0', '1', '7', 0 }, /* 0x17 '\x17' */
26 | { 6, '\\', 'u', '0', '0', '1', '8', 0 }, /* 0x18 '\x18' */
27 | { 6, '\\', 'u', '0', '0', '1', '9', 0 }, /* 0x19 '\x19' */
28 | { 6, '\\', 'u', '0', '0', '1', 'a', 0 }, /* 0x1a '\x1a' */
29 | { 6, '\\', 'u', '0', '0', '1', 'b', 0 }, /* 0x1b '\x1b' */
30 | { 6, '\\', 'u', '0', '0', '1', 'c', 0 }, /* 0x1c '\x1c' */
31 | { 6, '\\', 'u', '0', '0', '1', 'd', 0 }, /* 0x1d '\x1d' */
32 | { 6, '\\', 'u', '0', '0', '1', 'e', 0 }, /* 0x1e '\x1e' */
33 | { 6, '\\', 'u', '0', '0', '1', 'f', 0 }, /* 0x1f '\x1f' */
34 | { 1, ' ', 0, 0, 0, 0, 0, 0 }, /* 0x20 ' ' */
35 | { 1, '!', 0, 0, 0, 0, 0, 0 }, /* 0x21 '!' */
36 | { 2, '\\', '"', 0, 0, 0, 0, 0 }, /* 0x22 '"' */
37 | { 1, '#', 0, 0, 0, 0, 0, 0 }, /* 0x23 '#' */
38 | { 1, '$', 0, 0, 0, 0, 0, 0 }, /* 0x24 '$' */
39 | { 1, '%', 0, 0, 0, 0, 0, 0 }, /* 0x25 '%' */
40 | { 6, '\\', 'u', '0', '0', '2', '6', 0 }, /* 0x26 '&' */
41 | { 6, '\\', 'u', '0', '0', '2', '7', 0 }, /* 0x27 "'" */
42 | { 1, '(', 0, 0, 0, 0, 0, 0 }, /* 0x28 '(' */
43 | { 1, ')', 0, 0, 0, 0, 0, 0 }, /* 0x29 ')' */
44 | { 1, '*', 0, 0, 0, 0, 0, 0 }, /* 0x2a '*' */
45 | { 1, '+', 0, 0, 0, 0, 0, 0 }, /* 0x2b '+' */
46 | { 1, ',', 0, 0, 0, 0, 0, 0 }, /* 0x2c ',' */
47 | { 1, '-', 0, 0, 0, 0, 0, 0 }, /* 0x2d '-' */
48 | { 1, '.', 0, 0, 0, 0, 0, 0 }, /* 0x2e '.' */
49 | { 1, '/', 0, 0, 0, 0, 0, 0 }, /* 0x2f '/' */
50 | { 1, '0', 0, 0, 0, 0, 0, 0 }, /* 0x30 '0' */
51 | { 1, '1', 0, 0, 0, 0, 0, 0 }, /* 0x31 '1' */
52 | { 1, '2', 0, 0, 0, 0, 0, 0 }, /* 0x32 '2' */
53 | { 1, '3', 0, 0, 0, 0, 0, 0 }, /* 0x33 '3' */
54 | { 1, '4', 0, 0, 0, 0, 0, 0 }, /* 0x34 '4' */
55 | { 1, '5', 0, 0, 0, 0, 0, 0 }, /* 0x35 '5' */
56 | { 1, '6', 0, 0, 0, 0, 0, 0 }, /* 0x36 '6' */
57 | { 1, '7', 0, 0, 0, 0, 0, 0 }, /* 0x37 '7' */
58 | { 1, '8', 0, 0, 0, 0, 0, 0 }, /* 0x38 '8' */
59 | { 1, '9', 0, 0, 0, 0, 0, 0 }, /* 0x39 '9' */
60 | { 1, ':', 0, 0, 0, 0, 0, 0 }, /* 0x3a ':' */
61 | { 1, ';', 0, 0, 0, 0, 0, 0 }, /* 0x3b ';' */
62 | { 6, '\\', 'u', '0', '0', '3', 'c', 0 }, /* 0x3c '<' */
63 | { 1, '=', 0, 0, 0, 0, 0, 0 }, /* 0x3d '=' */
64 | { 6, '\\', 'u', '0', '0', '3', 'e', 0 }, /* 0x3e '>' */
65 | { 1, '?', 0, 0, 0, 0, 0, 0 }, /* 0x3f '?' */
66 | { 1, '@', 0, 0, 0, 0, 0, 0 }, /* 0x40 '@' */
67 | { 1, 'A', 0, 0, 0, 0, 0, 0 }, /* 0x41 'A' */
68 | { 1, 'B', 0, 0, 0, 0, 0, 0 }, /* 0x42 'B' */
69 | { 1, 'C', 0, 0, 0, 0, 0, 0 }, /* 0x43 'C' */
70 | { 1, 'D', 0, 0, 0, 0, 0, 0 }, /* 0x44 'D' */
71 | { 1, 'E', 0, 0, 0, 0, 0, 0 }, /* 0x45 'E' */
72 | { 1, 'F', 0, 0, 0, 0, 0, 0 }, /* 0x46 'F' */
73 | { 1, 'G', 0, 0, 0, 0, 0, 0 }, /* 0x47 'G' */
74 | { 1, 'H', 0, 0, 0, 0, 0, 0 }, /* 0x48 'H' */
75 | { 1, 'I', 0, 0, 0, 0, 0, 0 }, /* 0x49 'I' */
76 | { 1, 'J', 0, 0, 0, 0, 0, 0 }, /* 0x4a 'J' */
77 | { 1, 'K', 0, 0, 0, 0, 0, 0 }, /* 0x4b 'K' */
78 | { 1, 'L', 0, 0, 0, 0, 0, 0 }, /* 0x4c 'L' */
79 | { 1, 'M', 0, 0, 0, 0, 0, 0 }, /* 0x4d 'M' */
80 | { 1, 'N', 0, 0, 0, 0, 0, 0 }, /* 0x4e 'N' */
81 | { 1, 'O', 0, 0, 0, 0, 0, 0 }, /* 0x4f 'O' */
82 | { 1, 'P', 0, 0, 0, 0, 0, 0 }, /* 0x50 'P' */
83 | { 1, 'Q', 0, 0, 0, 0, 0, 0 }, /* 0x51 'Q' */
84 | { 1, 'R', 0, 0, 0, 0, 0, 0 }, /* 0x52 'R' */
85 | { 1, 'S', 0, 0, 0, 0, 0, 0 }, /* 0x53 'S' */
86 | { 1, 'T', 0, 0, 0, 0, 0, 0 }, /* 0x54 'T' */
87 | { 1, 'U', 0, 0, 0, 0, 0, 0 }, /* 0x55 'U' */
88 | { 1, 'V', 0, 0, 0, 0, 0, 0 }, /* 0x56 'V' */
89 | { 1, 'W', 0, 0, 0, 0, 0, 0 }, /* 0x57 'W' */
90 | { 1, 'X', 0, 0, 0, 0, 0, 0 }, /* 0x58 'X' */
91 | { 1, 'Y', 0, 0, 0, 0, 0, 0 }, /* 0x59 'Y' */
92 | { 1, 'Z', 0, 0, 0, 0, 0, 0 }, /* 0x5a 'Z' */
93 | { 1, '[', 0, 0, 0, 0, 0, 0 }, /* 0x5b '[' */
94 | { 2, '\\', '\\', 0, 0, 0, 0, 0 }, /* 0x5c '\\' */
95 | { 1, ']', 0, 0, 0, 0, 0, 0 }, /* 0x5d ']' */
96 | { 1, '^', 0, 0, 0, 0, 0, 0 }, /* 0x5e '^' */
97 | { 1, '_', 0, 0, 0, 0, 0, 0 }, /* 0x5f '_' */
98 | { 1, '`', 0, 0, 0, 0, 0, 0 }, /* 0x60 '`' */
99 | { 1, 'a', 0, 0, 0, 0, 0, 0 }, /* 0x61 'a' */
100 | { 1, 'b', 0, 0, 0, 0, 0, 0 }, /* 0x62 'b' */
101 | { 1, 'c', 0, 0, 0, 0, 0, 0 }, /* 0x63 'c' */
102 | { 1, 'd', 0, 0, 0, 0, 0, 0 }, /* 0x64 'd' */
103 | { 1, 'e', 0, 0, 0, 0, 0, 0 }, /* 0x65 'e' */
104 | { 1, 'f', 0, 0, 0, 0, 0, 0 }, /* 0x66 'f' */
105 | { 1, 'g', 0, 0, 0, 0, 0, 0 }, /* 0x67 'g' */
106 | { 1, 'h', 0, 0, 0, 0, 0, 0 }, /* 0x68 'h' */
107 | { 1, 'i', 0, 0, 0, 0, 0, 0 }, /* 0x69 'i' */
108 | { 1, 'j', 0, 0, 0, 0, 0, 0 }, /* 0x6a 'j' */
109 | { 1, 'k', 0, 0, 0, 0, 0, 0 }, /* 0x6b 'k' */
110 | { 1, 'l', 0, 0, 0, 0, 0, 0 }, /* 0x6c 'l' */
111 | { 1, 'm', 0, 0, 0, 0, 0, 0 }, /* 0x6d 'm' */
112 | { 1, 'n', 0, 0, 0, 0, 0, 0 }, /* 0x6e 'n' */
113 | { 1, 'o', 0, 0, 0, 0, 0, 0 }, /* 0x6f 'o' */
114 | { 1, 'p', 0, 0, 0, 0, 0, 0 }, /* 0x70 'p' */
115 | { 1, 'q', 0, 0, 0, 0, 0, 0 }, /* 0x71 'q' */
116 | { 1, 'r', 0, 0, 0, 0, 0, 0 }, /* 0x72 'r' */
117 | { 1, 's', 0, 0, 0, 0, 0, 0 }, /* 0x73 's' */
118 | { 1, 't', 0, 0, 0, 0, 0, 0 }, /* 0x74 't' */
119 | { 1, 'u', 0, 0, 0, 0, 0, 0 }, /* 0x75 'u' */
120 | { 1, 'v', 0, 0, 0, 0, 0, 0 }, /* 0x76 'v' */
121 | { 1, 'w', 0, 0, 0, 0, 0, 0 }, /* 0x77 'w' */
122 | { 1, 'x', 0, 0, 0, 0, 0, 0 }, /* 0x78 'x' */
123 | { 1, 'y', 0, 0, 0, 0, 0, 0 }, /* 0x79 'y' */
124 | { 1, 'z', 0, 0, 0, 0, 0, 0 }, /* 0x7a 'z' */
125 | { 1, '{', 0, 0, 0, 0, 0, 0 }, /* 0x7b '{' */
126 | { 1, '|', 0, 0, 0, 0, 0, 0 }, /* 0x7c '|' */
127 | { 1, '}', 0, 0, 0, 0, 0, 0 }, /* 0x7d '}' */
128 | { 1, '~', 0, 0, 0, 0, 0, 0 }, /* 0x7e '~' */
129 | { 6, '\\', 'u', '0', '0', '7', 'f', 0 }, /* 0x7f '\x7f' */
130 | { 6, '\\', 'u', '0', '0', '8', '0', 0 }, /* 0x80 '\x80' */
131 | { 6, '\\', 'u', '0', '0', '8', '1', 0 }, /* 0x81 '\x81' */
132 | { 6, '\\', 'u', '0', '0', '8', '2', 0 }, /* 0x82 '\x82' */
133 | { 6, '\\', 'u', '0', '0', '8', '3', 0 }, /* 0x83 '\x83' */
134 | { 6, '\\', 'u', '0', '0', '8', '4', 0 }, /* 0x84 '\x84' */
135 | { 6, '\\', 'u', '0', '0', '8', '5', 0 }, /* 0x85 '\x85' */
136 | { 6, '\\', 'u', '0', '0', '8', '6', 0 }, /* 0x86 '\x86' */
137 | { 6, '\\', 'u', '0', '0', '8', '7', 0 }, /* 0x87 '\x87' */
138 | { 6, '\\', 'u', '0', '0', '8', '8', 0 }, /* 0x88 '\x88' */
139 | { 6, '\\', 'u', '0', '0', '8', '9', 0 }, /* 0x89 '\x89' */
140 | { 6, '\\', 'u', '0', '0', '8', 'a', 0 }, /* 0x8a '\x8a' */
141 | { 6, '\\', 'u', '0', '0', '8', 'b', 0 }, /* 0x8b '\x8b' */
142 | { 6, '\\', 'u', '0', '0', '8', 'c', 0 }, /* 0x8c '\x8c' */
143 | { 6, '\\', 'u', '0', '0', '8', 'd', 0 }, /* 0x8d '\x8d' */
144 | { 6, '\\', 'u', '0', '0', '8', 'e', 0 }, /* 0x8e '\x8e' */
145 | { 6, '\\', 'u', '0', '0', '8', 'f', 0 }, /* 0x8f '\x8f' */
146 | { 6, '\\', 'u', '0', '0', '9', '0', 0 }, /* 0x90 '\x90' */
147 | { 6, '\\', 'u', '0', '0', '9', '1', 0 }, /* 0x91 '\x91' */
148 | { 6, '\\', 'u', '0', '0', '9', '2', 0 }, /* 0x92 '\x92' */
149 | { 6, '\\', 'u', '0', '0', '9', '3', 0 }, /* 0x93 '\x93' */
150 | { 6, '\\', 'u', '0', '0', '9', '4', 0 }, /* 0x94 '\x94' */
151 | { 6, '\\', 'u', '0', '0', '9', '5', 0 }, /* 0x95 '\x95' */
152 | { 6, '\\', 'u', '0', '0', '9', '6', 0 }, /* 0x96 '\x96' */
153 | { 6, '\\', 'u', '0', '0', '9', '7', 0 }, /* 0x97 '\x97' */
154 | { 6, '\\', 'u', '0', '0', '9', '8', 0 }, /* 0x98 '\x98' */
155 | { 6, '\\', 'u', '0', '0', '9', '9', 0 }, /* 0x99 '\x99' */
156 | { 6, '\\', 'u', '0', '0', '9', 'a', 0 }, /* 0x9a '\x9a' */
157 | { 6, '\\', 'u', '0', '0', '9', 'b', 0 }, /* 0x9b '\x9b' */
158 | { 6, '\\', 'u', '0', '0', '9', 'c', 0 }, /* 0x9c '\x9c' */
159 | { 6, '\\', 'u', '0', '0', '9', 'd', 0 }, /* 0x9d '\x9d' */
160 | { 6, '\\', 'u', '0', '0', '9', 'e', 0 }, /* 0x9e '\x9e' */
161 | { 6, '\\', 'u', '0', '0', '9', 'f', 0 }, /* 0x9f '\x9f' */
162 | { 6, '\\', 'u', '0', '0', 'a', '0', 0 }, /* 0xa0 '\xa0' */
163 | { 6, '\\', 'u', '0', '0', 'a', '1', 0 }, /* 0xa1 '¡' */
164 | { 6, '\\', 'u', '0', '0', 'a', '2', 0 }, /* 0xa2 '¢' */
165 | { 6, '\\', 'u', '0', '0', 'a', '3', 0 }, /* 0xa3 '£' */
166 | { 6, '\\', 'u', '0', '0', 'a', '4', 0 }, /* 0xa4 '¤' */
167 | { 6, '\\', 'u', '0', '0', 'a', '5', 0 }, /* 0xa5 '¥' */
168 | { 6, '\\', 'u', '0', '0', 'a', '6', 0 }, /* 0xa6 '¦' */
169 | { 6, '\\', 'u', '0', '0', 'a', '7', 0 }, /* 0xa7 '§' */
170 | { 6, '\\', 'u', '0', '0', 'a', '8', 0 }, /* 0xa8 '¨' */
171 | { 6, '\\', 'u', '0', '0', 'a', '9', 0 }, /* 0xa9 '©' */
172 | { 6, '\\', 'u', '0', '0', 'a', 'a', 0 }, /* 0xaa 'ª' */
173 | { 6, '\\', 'u', '0', '0', 'a', 'b', 0 }, /* 0xab '«' */
174 | { 6, '\\', 'u', '0', '0', 'a', 'c', 0 }, /* 0xac '¬' */
175 | { 6, '\\', 'u', '0', '0', 'a', 'd', 0 }, /* 0xad '\xad' */
176 | { 6, '\\', 'u', '0', '0', 'a', 'e', 0 }, /* 0xae '®' */
177 | { 6, '\\', 'u', '0', '0', 'a', 'f', 0 }, /* 0xaf '¯' */
178 | { 6, '\\', 'u', '0', '0', 'b', '0', 0 }, /* 0xb0 '°' */
179 | { 6, '\\', 'u', '0', '0', 'b', '1', 0 }, /* 0xb1 '±' */
180 | { 6, '\\', 'u', '0', '0', 'b', '2', 0 }, /* 0xb2 '²' */
181 | { 6, '\\', 'u', '0', '0', 'b', '3', 0 }, /* 0xb3 '³' */
182 | { 6, '\\', 'u', '0', '0', 'b', '4', 0 }, /* 0xb4 '´' */
183 | { 6, '\\', 'u', '0', '0', 'b', '5', 0 }, /* 0xb5 'µ' */
184 | { 6, '\\', 'u', '0', '0', 'b', '6', 0 }, /* 0xb6 '¶' */
185 | { 6, '\\', 'u', '0', '0', 'b', '7', 0 }, /* 0xb7 '·' */
186 | { 6, '\\', 'u', '0', '0', 'b', '8', 0 }, /* 0xb8 '¸' */
187 | { 6, '\\', 'u', '0', '0', 'b', '9', 0 }, /* 0xb9 '¹' */
188 | { 6, '\\', 'u', '0', '0', 'b', 'a', 0 }, /* 0xba 'º' */
189 | { 6, '\\', 'u', '0', '0', 'b', 'b', 0 }, /* 0xbb '»' */
190 | { 6, '\\', 'u', '0', '0', 'b', 'c', 0 }, /* 0xbc '¼' */
191 | { 6, '\\', 'u', '0', '0', 'b', 'd', 0 }, /* 0xbd '½' */
192 | { 6, '\\', 'u', '0', '0', 'b', 'e', 0 }, /* 0xbe '¾' */
193 | { 6, '\\', 'u', '0', '0', 'b', 'f', 0 }, /* 0xbf '¿' */
194 | { 6, '\\', 'u', '0', '0', 'c', '0', 0 }, /* 0xc0 'À' */
195 | { 6, '\\', 'u', '0', '0', 'c', '1', 0 }, /* 0xc1 'Á' */
196 | { 6, '\\', 'u', '0', '0', 'c', '2', 0 }, /* 0xc2 'Â' */
197 | { 6, '\\', 'u', '0', '0', 'c', '3', 0 }, /* 0xc3 'Ã' */
198 | { 6, '\\', 'u', '0', '0', 'c', '4', 0 }, /* 0xc4 'Ä' */
199 | { 6, '\\', 'u', '0', '0', 'c', '5', 0 }, /* 0xc5 'Å' */
200 | { 6, '\\', 'u', '0', '0', 'c', '6', 0 }, /* 0xc6 'Æ' */
201 | { 6, '\\', 'u', '0', '0', 'c', '7', 0 }, /* 0xc7 'Ç' */
202 | { 6, '\\', 'u', '0', '0', 'c', '8', 0 }, /* 0xc8 'È' */
203 | { 6, '\\', 'u', '0', '0', 'c', '9', 0 }, /* 0xc9 'É' */
204 | { 6, '\\', 'u', '0', '0', 'c', 'a', 0 }, /* 0xca 'Ê' */
205 | { 6, '\\', 'u', '0', '0', 'c', 'b', 0 }, /* 0xcb 'Ë' */
206 | { 6, '\\', 'u', '0', '0', 'c', 'c', 0 }, /* 0xcc 'Ì' */
207 | { 6, '\\', 'u', '0', '0', 'c', 'd', 0 }, /* 0xcd 'Í' */
208 | { 6, '\\', 'u', '0', '0', 'c', 'e', 0 }, /* 0xce 'Î' */
209 | { 6, '\\', 'u', '0', '0', 'c', 'f', 0 }, /* 0xcf 'Ï' */
210 | { 6, '\\', 'u', '0', '0', 'd', '0', 0 }, /* 0xd0 'Ð' */
211 | { 6, '\\', 'u', '0', '0', 'd', '1', 0 }, /* 0xd1 'Ñ' */
212 | { 6, '\\', 'u', '0', '0', 'd', '2', 0 }, /* 0xd2 'Ò' */
213 | { 6, '\\', 'u', '0', '0', 'd', '3', 0 }, /* 0xd3 'Ó' */
214 | { 6, '\\', 'u', '0', '0', 'd', '4', 0 }, /* 0xd4 'Ô' */
215 | { 6, '\\', 'u', '0', '0', 'd', '5', 0 }, /* 0xd5 'Õ' */
216 | { 6, '\\', 'u', '0', '0', 'd', '6', 0 }, /* 0xd6 'Ö' */
217 | { 6, '\\', 'u', '0', '0', 'd', '7', 0 }, /* 0xd7 '×' */
218 | { 6, '\\', 'u', '0', '0', 'd', '8', 0 }, /* 0xd8 'Ø' */
219 | { 6, '\\', 'u', '0', '0', 'd', '9', 0 }, /* 0xd9 'Ù' */
220 | { 6, '\\', 'u', '0', '0', 'd', 'a', 0 }, /* 0xda 'Ú' */
221 | { 6, '\\', 'u', '0', '0', 'd', 'b', 0 }, /* 0xdb 'Û' */
222 | { 6, '\\', 'u', '0', '0', 'd', 'c', 0 }, /* 0xdc 'Ü' */
223 | { 6, '\\', 'u', '0', '0', 'd', 'd', 0 }, /* 0xdd 'Ý' */
224 | { 6, '\\', 'u', '0', '0', 'd', 'e', 0 }, /* 0xde 'Þ' */
225 | { 6, '\\', 'u', '0', '0', 'd', 'f', 0 }, /* 0xdf 'ß' */
226 | { 6, '\\', 'u', '0', '0', 'e', '0', 0 }, /* 0xe0 'à' */
227 | { 6, '\\', 'u', '0', '0', 'e', '1', 0 }, /* 0xe1 'á' */
228 | { 6, '\\', 'u', '0', '0', 'e', '2', 0 }, /* 0xe2 'â' */
229 | { 6, '\\', 'u', '0', '0', 'e', '3', 0 }, /* 0xe3 'ã' */
230 | { 6, '\\', 'u', '0', '0', 'e', '4', 0 }, /* 0xe4 'ä' */
231 | { 6, '\\', 'u', '0', '0', 'e', '5', 0 }, /* 0xe5 'å' */
232 | { 6, '\\', 'u', '0', '0', 'e', '6', 0 }, /* 0xe6 'æ' */
233 | { 6, '\\', 'u', '0', '0', 'e', '7', 0 }, /* 0xe7 'ç' */
234 | { 6, '\\', 'u', '0', '0', 'e', '8', 0 }, /* 0xe8 'è' */
235 | { 6, '\\', 'u', '0', '0', 'e', '9', 0 }, /* 0xe9 'é' */
236 | { 6, '\\', 'u', '0', '0', 'e', 'a', 0 }, /* 0xea 'ê' */
237 | { 6, '\\', 'u', '0', '0', 'e', 'b', 0 }, /* 0xeb 'ë' */
238 | { 6, '\\', 'u', '0', '0', 'e', 'c', 0 }, /* 0xec 'ì' */
239 | { 6, '\\', 'u', '0', '0', 'e', 'd', 0 }, /* 0xed 'í' */
240 | { 6, '\\', 'u', '0', '0', 'e', 'e', 0 }, /* 0xee 'î' */
241 | { 6, '\\', 'u', '0', '0', 'e', 'f', 0 }, /* 0xef 'ï' */
242 | { 6, '\\', 'u', '0', '0', 'f', '0', 0 }, /* 0xf0 'ð' */
243 | { 6, '\\', 'u', '0', '0', 'f', '1', 0 }, /* 0xf1 'ñ' */
244 | { 6, '\\', 'u', '0', '0', 'f', '2', 0 }, /* 0xf2 'ò' */
245 | { 6, '\\', 'u', '0', '0', 'f', '3', 0 }, /* 0xf3 'ó' */
246 | { 6, '\\', 'u', '0', '0', 'f', '4', 0 }, /* 0xf4 'ô' */
247 | { 6, '\\', 'u', '0', '0', 'f', '5', 0 }, /* 0xf5 'õ' */
248 | { 6, '\\', 'u', '0', '0', 'f', '6', 0 }, /* 0xf6 'ö' */
249 | { 6, '\\', 'u', '0', '0', 'f', '7', 0 }, /* 0xf7 '÷' */
250 | { 6, '\\', 'u', '0', '0', 'f', '8', 0 }, /* 0xf8 'ø' */
251 | { 6, '\\', 'u', '0', '0', 'f', '9', 0 }, /* 0xf9 'ù' */
252 | { 6, '\\', 'u', '0', '0', 'f', 'a', 0 }, /* 0xfa 'ú' */
253 | { 6, '\\', 'u', '0', '0', 'f', 'b', 0 }, /* 0xfb 'û' */
254 | { 6, '\\', 'u', '0', '0', 'f', 'c', 0 }, /* 0xfc 'ü' */
255 | { 6, '\\', 'u', '0', '0', 'f', 'd', 0 }, /* 0xfd 'ý' */
256 | { 6, '\\', 'u', '0', '0', 'f', 'e', 0 }, /* 0xfe 'þ' */
257 | { 6, '\\', 'u', '0', '0', 'f', 'f', 0 }, /* 0xff 'ÿ' */
258 | };
259 | const std::uint64_t EscapeDct::is_escaped_lo = UINT64_C(0x500000c4ffffffff);
260 | const std::uint64_t EscapeDct::is_escaped_hi = UINT64_C(0x8000000010000000);
261 |
--------------------------------------------------------------------------------
/src/_exceptions.pyx:
--------------------------------------------------------------------------------
1 | @auto_pickle(False)
2 | cdef class Json5Exception(Exception):
3 | '''
4 | Base class of any exception thrown by PyJSON5.
5 | '''
6 | def __init__(self, message=None, *args):
7 | super().__init__(message, *args)
8 |
9 | @property
10 | def message(self):
11 | '''Human readable error description'''
12 | return self.args[0]
13 |
--------------------------------------------------------------------------------
/src/_exceptions_decoder.pyx:
--------------------------------------------------------------------------------
1 | @auto_pickle(False)
2 | cdef class Json5DecoderException(Json5Exception):
3 | '''
4 | Base class of any exception thrown by the parser.
5 | '''
6 | def __init__(self, message=None, result=None, *args):
7 | super().__init__(message, result, *args)
8 |
9 | @property
10 | def result(self):
11 | '''Deserialized data up until now.'''
12 | return self.args[1]
13 |
14 |
15 | @final
16 | @auto_pickle(False)
17 | cdef class Json5NestingTooDeep(Json5DecoderException):
18 | '''
19 | The maximum nesting level on the input data was exceeded.
20 | '''
21 |
22 |
23 | @final
24 | @auto_pickle(False)
25 | cdef class Json5EOF(Json5DecoderException):
26 | '''
27 | The input ended prematurely.
28 | '''
29 |
30 |
31 | @final
32 | @auto_pickle(False)
33 | cdef class Json5IllegalCharacter(Json5DecoderException):
34 | '''
35 | An unexpected character was encountered.
36 | '''
37 | def __init__(self, message=None, result=None, character=None, *args):
38 | super().__init__(message, result, character, *args)
39 |
40 | @property
41 | def character(self):
42 | '''
43 | Illegal character.
44 | '''
45 | return self.args[2]
46 |
47 |
48 | @final
49 | @auto_pickle(False)
50 | cdef class Json5ExtraData(Json5DecoderException):
51 | '''
52 | The input contained extranous data.
53 | '''
54 | def __init__(self, message=None, result=None, character=None, *args):
55 | super().__init__(message, result, character, *args)
56 |
57 | @property
58 | def character(self):
59 | '''
60 | Extranous character.
61 | '''
62 | return self.args[2]
63 |
64 |
65 | @final
66 | @auto_pickle(False)
67 | cdef class Json5IllegalType(Json5DecoderException):
68 | '''
69 | The user supplied callback function returned illegal data.
70 | '''
71 | def __init__(self, message=None, result=None, value=None, *args):
72 | super().__init__(message, result, value, *args)
73 |
74 | @property
75 | def value(self):
76 | '''
77 | Value that caused the problem.
78 | '''
79 | return self.args[2]
80 |
81 |
82 | @final
83 | @auto_pickle(False)
84 | cdef class _DecoderException(Exception):
85 | cdef object cls
86 | cdef object msg
87 | cdef object extra
88 | cdef object result
89 |
90 | def __cinit__(self, cls, msg, extra, result):
91 | self.cls = cls
92 | self.msg = msg
93 | self.extra = extra
94 | self.result = result
95 |
--------------------------------------------------------------------------------
/src/_exceptions_encoder.pyx:
--------------------------------------------------------------------------------
1 | @auto_pickle(False)
2 | cdef class Json5EncoderException(Json5Exception):
3 | '''
4 | Base class of any exception thrown by the serializer.
5 | '''
6 |
7 |
8 | @auto_pickle(False)
9 | cdef class Json5UnstringifiableType(Json5EncoderException):
10 | '''
11 | The encoder was not able to stringify the input, or it was told not to by the supplied ``Options``.
12 | '''
13 | def __init__(self, message=None, unstringifiable=None):
14 | super().__init__(message, unstringifiable)
15 |
16 | @property
17 | def unstringifiable(self):
18 | '''
19 | The value that caused the problem.
20 | '''
21 | return self.args[1]
22 |
--------------------------------------------------------------------------------
/src/_exports.pyx:
--------------------------------------------------------------------------------
1 | global DEFAULT_MAX_NESTING_LEVEL, __all__, __author__, __doc__, __license__, __version__
2 |
3 | DEFAULT_MAX_NESTING_LEVEL = 32
4 | '''
5 | Maximum nesting level of data to decode if no ``maxdepth`` argument is specified.
6 | '''
7 |
8 | __version__ = PyUnicode_FromKindAndData(PyUnicode_1BYTE_KIND, VERSION, VERSION_LENGTH)
9 | '''
10 | Current library version.
11 | '''
12 |
13 |
14 | def decode(object data, object maxdepth=None, object some=False):
15 | '''
16 | Decodes JSON5 serialized data from an :class:`str` object.
17 |
18 | .. code:: python
19 |
20 | decode('["Hello", "world!"]') == ['Hello', 'world!']
21 |
22 | Parameters
23 | ----------
24 | data : str
25 | JSON5 serialized data
26 | maxdepth : Optional[int]
27 | Maximum nesting level before are the parsing is aborted.
28 |
29 | * If ``None`` is supplied, then the value of the global variable \
30 | ``DEFAULT_MAX_NESTING_LEVEL`` is used instead.
31 | * If the value is ``0``, then only literals are accepted, e.g. ``false``, \
32 | ``47.11``, or ``"string"``.
33 | * If the value is negative, then the any nesting level is allowed until \
34 | Python's recursion limit is hit.
35 | some : bool
36 | Allow trailing junk.
37 |
38 | Raises
39 | ------
40 | Json5DecoderException
41 | An exception occured while decoding.
42 | TypeError
43 | An argument had a wrong type.
44 |
45 | Returns
46 | -------
47 | object
48 | Deserialized data.
49 | '''
50 | if maxdepth is None:
51 | maxdepth = DEFAULT_MAX_NESTING_LEVEL
52 |
53 | if isinstance(data, unicode):
54 | return _decode_unicode(data, maxdepth, bool(some))
55 | else:
56 | raise TypeError(f'type(data) == {type(data)!r} not supported')
57 |
58 |
59 | def decode_latin1(object data, object maxdepth=None, object some=False):
60 | '''
61 | Decodes JSON5 serialized data from a :class:`bytes` object.
62 |
63 | .. code:: python
64 |
65 | decode_latin1(b'["Hello", "world!"]') == ['Hello', 'world!']
66 |
67 | Parameters
68 | ----------
69 | data : bytes
70 | JSON5 serialized data, encoded as Latin-1 or ASCII.
71 | maxdepth : Optional[int]
72 | see :func:`decode(…) `
73 | some : bool
74 | see :func:`decode(…) `
75 |
76 | Raises
77 | ------
78 | Json5DecoderException
79 | An exception occured while decoding.
80 | TypeError
81 | An argument had a wrong type.
82 |
83 | Returns
84 | -------
85 | object
86 | see :func:`decode(…) `
87 | '''
88 | return decode_buffer(data, maxdepth, bool(some), 1)
89 |
90 |
91 | def decode_utf8(object data, object maxdepth=None, object some=False):
92 | '''
93 | Decodes JSON5 serialized data from a :class:`bytes` object.
94 |
95 | .. code:: python
96 |
97 | decode_utf8(b'["H\\xe2\\x82\\xacllo", "w\\xc3\\xb6rld!"]') == ['H€llo', 'wörld!']
98 |
99 | Parameters
100 | ----------
101 | data : bytes
102 | JSON5 serialized data, encoded as UTF-8 or ASCII.
103 | maxdepth : Optional[int]
104 | see :func:`decode(…) `
105 | some : bool
106 | see :func:`decode(…) `
107 |
108 | Raises
109 | ------
110 | Json5DecoderException
111 | An exception occured while decoding.
112 | TypeError
113 | An argument had a wrong type.
114 |
115 | Returns
116 | -------
117 | object
118 | see :func:`decode(…) `
119 | '''
120 | return decode_buffer(data, maxdepth, bool(some), 0)
121 |
122 |
123 | def decode_buffer(object obj, object maxdepth=None, object some=False,
124 | object wordlength=None):
125 | '''
126 | Decodes JSON5 serialized data from an object that supports the buffer protocol,
127 | e.g. :class:`bytearray`.
128 |
129 | .. code:: python
130 |
131 | obj = memoryview(b'["Hello", "world!"]')
132 |
133 | decode_buffer(obj) == ['Hello', 'world!']
134 |
135 | Parameters
136 | ----------
137 | data : object
138 | JSON5 serialized data.
139 | The argument must support Python's buffer protocol, i.e.
140 | :class:`memoryview(…) ` must work. The buffer must be contigious.
141 | maxdepth : Optional[int]
142 | see :func:`decode(…) `
143 | some : bool
144 | see :func:`decode(…) `
145 | wordlength : Optional[int]
146 | Must be 0, 1, 2, 4 to denote UTF-8, UCS1, USC2 or USC4 data, resp.
147 | Surrogates are not supported. Decode the data to an :class:`str` if need be.
148 | If ``None`` is supplied, then the buffer's ``itemsize`` is used.
149 |
150 | Raises
151 | ------
152 | Json5DecoderException
153 | An exception occured while decoding.
154 | TypeError
155 | An argument had a wrong type.
156 | ValueError
157 | The value of ``wordlength`` was invalid.
158 |
159 | Returns
160 | -------
161 | object
162 | see :func:`decode(…) `
163 | '''
164 | cdef Py_buffer view
165 |
166 | if maxdepth is None:
167 | maxdepth = DEFAULT_MAX_NESTING_LEVEL
168 |
169 | PyObject_GetBuffer(obj, &view, PyBUF_CONTIG_RO)
170 | try:
171 | if wordlength is None:
172 | wordlength = view.itemsize
173 | return _decode_buffer(view, wordlength, maxdepth, bool(some))
174 | finally:
175 | PyBuffer_Release(&view)
176 |
177 |
178 | def decode_callback(object cb, object maxdepth=None, object some=False,
179 | object args=None):
180 | '''
181 | Decodes JSON5 serialized data by invoking a callback.
182 |
183 | .. code:: python
184 |
185 | cb = iter('["Hello","world!"]').__next__
186 |
187 | decode_callback(cb) == ['Hello', 'world!']
188 |
189 | Parameters
190 | ----------
191 | cb : Callable[Any, Union[str|bytes|bytearray|int|None]]
192 | A function to get values from.
193 | The functions is called like ``cb(*args)``, and it returns:
194 |
195 | * **str, bytes, bytearray:** \
196 | ``len(…) == 0`` denotes exhausted input. \
197 | ``len(…) == 1`` is the next character.
198 | * **int:** \
199 | ``< 0`` denotes exhausted input. \
200 | ``>= 0`` is the ordinal value of the next character.
201 | * **None:** \
202 | input exhausted
203 | maxdepth : Optional[int]
204 | see :func:`decode(…) `
205 | some : bool
206 | see :func:`decode(…) `
207 | args : Optional[Iterable[Any]]
208 | Arguments to call ``cb`` with.
209 |
210 | Raises
211 | ------
212 | Json5DecoderException
213 | An exception occured while decoding.
214 | TypeError
215 | An argument had a wrong type.
216 |
217 | Returns
218 | -------
219 | object
220 | see :func:`decode(…) `
221 | '''
222 | if not callable(cb):
223 | raise TypeError(f'type(cb)=={type(cb)!r} is not callable')
224 |
225 | if maxdepth is None:
226 | maxdepth = DEFAULT_MAX_NESTING_LEVEL
227 |
228 | if args:
229 | args = tuple(args)
230 | else:
231 | args = ()
232 |
233 | return _decode_callback(cb, args, maxdepth, bool(some))
234 |
235 |
236 | def decode_io(object fp, object maxdepth=None, object some=True):
237 | '''
238 | Decodes JSON5 serialized data from a file-like object.
239 |
240 | .. code:: python
241 |
242 | fp = io.StringIO("""
243 | ['Hello', /* TODO look into specs whom to greet */]
244 | 'Wolrd' // FIXME: look for typos
245 | """)
246 |
247 | decode_io(fp) == ['Hello']
248 | decode_io(fp) == 'Wolrd'
249 |
250 | fp.seek(0)
251 |
252 | decode_io(fp, some=False)
253 | # raises Json5ExtraData('Extra data U+0027 near 56', ['Hello'], "'")
254 |
255 | Parameters
256 | ----------
257 | fp : IOBase
258 | A file-like object to parse from.
259 | maxdepth : Optional[int] = None
260 | see :func:`decode(…) `
261 | some : bool
262 | see :func:`decode(…) `
263 |
264 | Raises
265 | ------
266 | Json5DecoderException
267 | An exception occured while decoding.
268 | TypeError
269 | An argument had a wrong type.
270 |
271 | Returns
272 | -------
273 | object
274 | see :func:`decode(…) `
275 | '''
276 | if not isinstance(fp, IOBase):
277 | raise TypeError(f'type(fp)=={type(fp)!r} is not IOBase compatible')
278 | elif not fp.readable():
279 | raise TypeError(f'fp is not readable')
280 | elif fp.closed:
281 | raise TypeError(f'fp is closed')
282 |
283 | if maxdepth is None:
284 | maxdepth = DEFAULT_MAX_NESTING_LEVEL
285 |
286 | return _decode_callback(fp.read, (1,), maxdepth, bool(some))
287 |
288 |
289 | def encode(object data, *, options=None, **options_kw):
290 | '''
291 | Serializes a Python object as a JSON5 compatible string.
292 |
293 | .. code:: python
294 |
295 | encode(['Hello', 'world!']) == '["Hello","world!"]'
296 |
297 | Parameters
298 | ----------
299 | data : object
300 | Python object to serialize.
301 | options : Optional[Options]
302 | Extra options for the encoder.
303 | If ``options`` **and** ``options_kw`` are specified, then ``options.update(**options_kw)`` is used.
304 | options_kw
305 | See Option's arguments.
306 |
307 | Raises
308 | ------
309 | Json5EncoderException
310 | An exception occured while encoding.
311 | TypeError
312 | An argument had a wrong type.
313 |
314 | Returns
315 | -------
316 | str
317 | Unless ``float('inf')`` or ``float('nan')`` is encountered, the result
318 | will be valid JSON data (as of RFC8259).
319 |
320 | The result is always ASCII. All characters outside of the ASCII range
321 | are escaped.
322 |
323 | The result safe to use in an HTML template, e.g.
324 | ``show message``.
325 | Apostrophes ``"'"`` are encoded as ``"\\u0027"``, less-than,
326 | greater-than, and ampersand likewise.
327 | '''
328 | cdef void *temp
329 | cdef object result
330 | cdef Py_ssize_t start = (
331 | &( NULL).data[0]
332 | )
333 | cdef Py_ssize_t length
334 | cdef object opts = _to_options(options, options_kw)
335 | cdef WriterReallocatable writer = WriterReallocatable(
336 | Writer(
337 | _WriterReallocatable_reserve,
338 | _WriterReallocatable_append_c,
339 | _WriterReallocatable_append_s,
340 | opts,
341 | ),
342 | start, 0, NULL,
343 | )
344 |
345 | try:
346 | if expect(_encode(writer.base, data) < 0, False):
347 | exception_thrown()
348 |
349 | length = writer.position - start
350 | if length <= 0:
351 | # impossible
352 | return u''
353 |
354 | temp = ObjectRealloc(writer.obj, writer.position + 1)
355 | if temp is not NULL:
356 | writer.obj = temp
357 | ( writer.obj)[writer.position] = 0
358 |
359 | result = ObjectInit( writer.obj, unicode)
360 | writer.obj = NULL
361 |
362 | ( result).length = length
363 | reset_hash( result)
364 | reset_wstr( result)
365 | ( result).state.interned = SSTATE_NOT_INTERNED
366 | ( result).state.kind = PyUnicode_1BYTE_KIND
367 | ( result).state.compact = True
368 | set_ready( result)
369 | ( result).state.ascii = True
370 |
371 | return result
372 | finally:
373 | if writer.obj is not NULL:
374 | ObjectFree(writer.obj)
375 |
376 |
377 | def encode_bytes(object data, *, options=None, **options_kw):
378 | '''
379 | Serializes a Python object to a JSON5 compatible bytes string.
380 |
381 | .. code:: python
382 |
383 | encode_bytes(['Hello', 'world!']) == b'["Hello","world!"]'
384 |
385 | Parameters
386 | ----------
387 | data : object
388 | see :func:`encode(…) `
389 | options : Optional[Options]
390 | see :func:`encode(…) `
391 | options_kw
392 | see :func:`encode(…) `
393 |
394 | Raises
395 | ------
396 | Json5EncoderException
397 | An exception occured while encoding.
398 | TypeError
399 | An argument had a wrong type.
400 |
401 | Returns
402 | -------
403 | bytes
404 | see :func:`encode(…) `
405 | '''
406 | cdef void *temp
407 | cdef object result
408 | cdef Py_ssize_t start = (
409 | &( NULL).ob_sval[0]
410 | )
411 | cdef Py_ssize_t length
412 | cdef object opts = _to_options(options, options_kw)
413 | cdef WriterReallocatable writer = WriterReallocatable(
414 | Writer(
415 | _WriterReallocatable_reserve,
416 | _WriterReallocatable_append_c,
417 | _WriterReallocatable_append_s,
418 | opts,
419 | ),
420 | start, 0, NULL,
421 | )
422 |
423 | try:
424 | if expect(_encode(writer.base, data) < 0, False):
425 | exception_thrown()
426 |
427 | length = writer.position - start
428 | if length <= 0:
429 | # impossible
430 | return b''
431 |
432 | temp = ObjectRealloc(writer.obj, writer.position + 1)
433 | if temp is not NULL:
434 | writer.obj = temp
435 | ( writer.obj)[writer.position] = 0
436 |
437 | result =