├── .coveragerc
├── .github
    └── workflows
    │   ├── release.yml
    │   └── unittests.yml
├── .pre-commit-config.yaml
├── LICENSE
├── MANIFEST.in
├── README.md
├── docs
    ├── Makefile
    ├── QuickStart.rst
    ├── comments.rst
    ├── conf.py
    ├── docrequirements.txt
    ├── extending.rst
    ├── how.rst
    ├── index.rst
    └── make.bat
├── json5
    ├── __init__.py
    ├── dumper.py
    ├── loader.py
    ├── model.py
    ├── parser.py
    ├── py.typed
    ├── tokenizer.py
    └── utils.py
├── requirements-dev.txt
├── setup.cfg
├── setup.py
├── tests
    ├── test_errors.py
    ├── test_json5_dump.py
    ├── test_json5_load.py
    ├── test_json5_official_tests.py
    ├── test_json_helpers.py
    ├── test_loads_options.py
    ├── test_model.py
    ├── test_model_loader_dumper.py
    ├── test_modelizer.py
    ├── test_regressions.py
    └── test_roundtrip.py
└── tox.ini


/.coveragerc:
--------------------------------------------------------------------------------
1 | [run]
2 | source = json5
3 | 


--------------------------------------------------------------------------------
/.github/workflows/release.yml:
--------------------------------------------------------------------------------
 1 | name: release
 2 | 
 3 | on:
 4 |   push:
 5 |     tags:
 6 |       - 'v*.*.*'
 7 | 
 8 | jobs:
 9 |   build:
10 |     runs-on: ubuntu-latest
11 |     steps:
12 |     - name: Checkout
13 |       uses: actions/checkout@v2
14 |     - name: setup python
15 |       uses: actions/setup-python@v2
16 |       with:
17 |         python-version: 3.11
18 | 
19 |     - name: build
20 |       shell: bash
21 |       run: |
22 |         python -m pip install --upgrade wheel setuptools sly regex build
23 |         python -m build
24 | 
25 |     - name: Release PyPI
26 |       shell: bash
27 |       env:
28 |         TWINE_USERNAME: ${{ secrets.TWINE_USERNAME }}
29 |         TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }}
30 |       run: |
31 |         pip install --upgrade twine
32 |         twine upload dist/*
33 | 
34 | 
35 |     - name: Release GitHub
36 |       uses: softprops/action-gh-release@v1
37 |       with:
38 |         files: "dist/*"
39 |       env:
40 |         GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
41 | 


--------------------------------------------------------------------------------
/.github/workflows/unittests.yml:
--------------------------------------------------------------------------------
 1 | on: [ push, pull_request ]
 2 | 
 3 | jobs:
 4 |   build:
 5 |     strategy:
 6 |       fail-fast: false
 7 |       matrix:
 8 |         python_version: ["3.10", "3.9", "3.8", "3.11"]
 9 |     runs-on: ubuntu-latest
10 |     steps:
11 |       - name: Checkout
12 |         uses: actions/checkout@v2
13 |       - name: Setup Python
14 |         uses: actions/setup-python@v2
15 |         with:
16 |           python-version: ${{ matrix.python_version }}
17 | 
18 |       - name: Install dependencies
19 |         run: |
20 |           python -m pip install --upgrade pip
21 |           python -m pip install -r requirements-dev.txt
22 |           python -m pip install .
23 |           python -m pip install tox
24 |           git clone https://github.com/json5/json5-tests.git
25 | 
26 |       - name: Test with coverage/pytest
27 |         env:
28 |           PYTHONUNBUFFERED: "1"
29 |         run: |
30 |           tox -e py
31 |       - name: Coveralls
32 |         env:
33 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
34 |         run: |
35 |           pip install --upgrade coveralls
36 |           coveralls --service=github
37 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 | -   repo: https://github.com/pre-commit/pre-commit-hooks
 3 |     rev: v4.5.0
 4 |     hooks:
 5 |     -   id: check-yaml
 6 |     -   id: end-of-file-fixer
 7 |     -   id: trailing-whitespace
 8 |         exclude: ^(tests/.*)
 9 | -   repo: https://github.com/asottile/reorder-python-imports
10 |     rev: v3.12.0
11 |     hooks:
12 |     -   id: reorder-python-imports
13 | 
14 | -   repo: https://github.com/psf/black
15 |     rev: '23.10.1'
16 |     hooks:
17 |     -   id: black
18 |         args:
19 |             - "-S"
20 |             - "-l"
21 |             - "120"
22 | 
23 | -   repo: https://github.com/asottile/pyupgrade
24 |     rev: v3.15.0
25 |     hooks:
26 |     -   id: pyupgrade
27 |         args: ["--py38-plus"]
28 | 
29 | -   repo: https://github.com/pre-commit/mirrors-mypy
30 |     rev: 'v1.6.1'
31 |     hooks:
32 |     -   id: mypy
33 |         args:
34 |             - "--strict"
35 |             - "--disable-error-code"
36 |             - "name-defined"
37 |         exclude: ^(tests/.*|setup.py|docs/.*)
38 |         additional_dependencies:
39 |           - types-regex
40 | 
41 | -   repo: https://github.com/pycqa/flake8
42 |     rev: '6.1.0'  # pick a git hash / tag to point to
43 |     hooks:
44 |     -   id: flake8
45 |         args:
46 |           - "--ignore"
47 |           - "E501,E704,E301,W503,F405,F811,F821,F403,"
48 |         exclude: ^(tests/.*)
49 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include LICENSE
2 | include README.md
3 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # json-five
  2 | 
  3 | JSON5 for Python
  4 | 
  5 | [![Documentation Status](https://readthedocs.org/projects/json-five/badge/?version=latest)](https://json-five.readthedocs.io/en/latest/?badge=latest)
  6 | [![Build](https://github.com/spyoungtech/json-five/actions/workflows/unittests.yml/badge.svg)](https://github.com/spyoungtech/json-five/actions/workflows/unittests.yaml)
  7 | [![version](https://img.shields.io/pypi/v/json-five.svg?colorB=blue)](https://pypi.org/project/json-five/)
  8 | [![pyversion](https://img.shields.io/pypi/pyversions/json-five.svg?)](https://pypi.org/project/json-five/)
  9 | [![Coverage](https://coveralls.io/repos/github/spyoungtech/json-five/badge.svg?branch=main)](https://coveralls.io/github/spyoungtech/json-five?branch=main)
 10 | 
 11 | ## Installation
 12 | 
 13 | ```
 14 | pip install json-five
 15 | ```
 16 | 
 17 | This project requires Python 3.8+
 18 | 
 19 | 
 20 | ## Key features
 21 | 
 22 | - Supports the JSON5 spec
 23 | - Supports similar interfaces to stdlib `json` module
 24 | - Provides an API for working with abstract model representations of JSON5 documents.
 25 | - Supports round-trip loading, editing, and dumping, preserving non-data elements such as comments (in model-based load/dump)
 26 | 
 27 | 
 28 | 
 29 | # Usage
 30 | 
 31 | **NOTE:** the import name is `json5` which differs from the install name.
 32 | 
 33 | 
 34 | For basic loading/dumping, the interface is nearly identical to that of the `json` module.
 35 | ```python
 36 | import json5
 37 | json_text = """{ // This is a JSON5 comment
 38 | "foo": "bar", /* this is a JSON5 block
 39 | comment that can span lines */
 40 | bacon: "eggs"  // unquoted Identifiers also work
 41 | }
 42 | """
 43 | print(json5.loads(json_text))
 44 | # {"foo": "bar", "bacon": "eggs"}
 45 | 
 46 | with open('myfile.json5') as f:
 47 |     data = json5.load(f)
 48 | ```
 49 | 
 50 | For loading JSON5, the same parameters `object_hook`, `object_pairs_hook` and `parse_*` keyword arguments are available
 51 | here for `load`/`loads`.
 52 | 
 53 | Additionally, a new hook, `parse_json5_identifiers`, is available to help users control the
 54 | output of parsing identifiers. By default, JSON5 Identifiers in object keys are returned as a `JsonIdentifier` object,
 55 | which is a subclass of `str` (meaning it's compatible anywhere `str` is accepted).
 56 | This helps keep keys the same round-trip, rather than converting unquoted identifiers into
 57 |  strings:
 58 | 
 59 | ```python
 60 | >>> text = '{bacon: "eggs"}'
 61 | >>> json5.dumps(json5.loads(text)) == text
 62 | True
 63 | ```
 64 | 
 65 | You can change this behavior with the `parse_json5_identifiers` argument with a callable that receives the `JsonIdentifier` object
 66 | and its return value is used instead. For example, you can specify `parse_json5_identifiers=str` to convert identifiers
 67 | to strings.
 68 | 
 69 | ```python
 70 | >>> json5.dumps(json5.loads(text, parse_json5_identifiers=str))
 71 | '{"bacon": "eggs"}'
 72 | ```
 73 | 
 74 | 
 75 | ## Custom loaders; Abstract JSON5 Models
 76 | 
 77 | **Note:** the underlying model API and tokens are not stable and are subject to breaking changes, even in minor releases.
 78 | 
 79 | json-five also features an API for representing JSON5 as an abstract model. This enables a wide degree of capabilities for
 80 | various use-cases, such as linters, formatters, custom serialization/deserialization, and more.
 81 | 
 82 | 
 83 | Example: a simple model
 84 | 
 85 | ```python
 86 | from json5.loader import loads, ModelLoader
 87 | json_string = """{foo: "bar"}"""
 88 | model = loads(json_string, loader=ModelLoader())
 89 | ```
 90 | The resulting model object looks something like this:
 91 | ```python
 92 | JSONText(
 93 |     value=JSONObject(
 94 |         keys=[Identifier(name="foo", raw_value="foo")],
 95 |         values=[DoubleQuotedString(characters="bar", raw_value='"bar"')],
 96 |         trailing_comma=None,
 97 |     )
 98 | )
 99 | ```
100 | 
101 | 
102 | It is possible to make edits to the model, which will affect the output when dumped using the model dumper. However,
103 | there is (currently) no validation to ensure your model edits won't result in invalid JSON5 when dumped.
104 | 
105 | You may also implement custom loaders and dumpers to control serialization and deserialization. See the [full documentation](https://json-five.readthedocs.io/en/latest/extending.html#custom-loaders-and-dumpers)
106 | for more information.
107 | 
108 | ## Tokenization
109 | 
110 | You can also leverage tokenization of JSON5:
111 | 
112 | ```python
113 | from json5.tokenizer import tokenize
114 | 
115 | json_string = """{foo: "bar"}"""
116 | for tok in tokenize(json_string):
117 |     print(tok.type)
118 | ```
119 | Output would be:
120 | ```text
121 | LBRACE
122 | NAME
123 | COLON
124 | WHITESPACE
125 | DOUBLE_QUOTE_STRING
126 | RBRACE
127 | ```
128 | 
129 | # Status
130 | 
131 | This project currently fully supports the JSON5 spec and its interfaces for loading and dumping JSON5 is stable as of v1.0.0.
132 | There is still active development underway, particularly for the underlying abstract JSON5 model representations and
133 | ability to perform edits using the abstract model.
134 | 
135 | In the future, this project may take advantage of its sister project currently being developed in Rust: [json-five-rs](https://github.com/spyoungtech/json-five-rs).
136 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/docs/QuickStart.rst:
--------------------------------------------------------------------------------
 1 | QuickStart
 2 | ==========
 3 | 
 4 | Installation
 5 | ------------
 6 | 
 7 | .. code-block::
 8 | 
 9 |    pip install json-five
10 | 
11 | 
12 | Basic Usage
13 | -----------
14 | 
15 | Suppose you have a JSON5 compliant file ``my-json-file.json``
16 | 
17 | .. code-block::
18 | 
19 |     // This is a JSON5 file!
20 |     {'foo': 'bar'}
21 | 
22 | You can load this file to Python like so:
23 | 
24 | .. code-block::
25 | 
26 |    import json5
27 |    with open('my-json-file.json') as f:
28 |        data = json5.load(f)
29 | 
30 | You can also work directly with strings
31 | 
32 | .. code-block::
33 | 
34 |     import json5
35 |     json_string = '{json5 /* identifiers dont need quotes */: "values do though"}'
36 |     data = json5.loads(json_string)
37 | 
38 | 
39 | Want to do more? Check out :doc:`/extending` to dive deeper!
40 | 


--------------------------------------------------------------------------------
/docs/comments.rst:
--------------------------------------------------------------------------------
 1 | Working with comments; round-trip support
 2 | =========================================
 3 | 
 4 | In order to work with comments, you must work with the raw model.
 5 | 
 6 | Each node in the model has two special attributes: ``.wsc_before`` and ``.wsc_after``. These attributes are a list of
 7 | any whitespace or comments that appear before or after the node.
 8 | 
 9 | .. code-block::
10 | 
11 |     from json5.loader import loads, ModelLoader
12 |     from json5.dumper import dumps, ModelDumper
13 |     from json5.model import BlockComment
14 |     json_string = """{"foo": "bar"}"""
15 |     model = loads(json_string, loader=ModelLoader())
16 |     print(model.value.key_value_pairs[0].value.wsc_before)  # [' ']
17 |     model.value.key_value_pairs[0].key.wsc_before.append(BlockComment("/* comment */"))
18 |     dumps(model, dumper=ModelDumper()) # '{/* comment */"foo": "bar"}'
19 | 
20 | 
21 | This section will be expanded with time, the API for working with comments will likely change alot in future
22 | versions.
23 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
 1 | # Configuration file for the Sphinx documentation builder.
 2 | #
 3 | # This file only contains a selection of the most common options. For a full
 4 | # list see the documentation:
 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
 6 | # -- Path setup --------------------------------------------------------------
 7 | # If extensions (or modules to document with autodoc) are in another directory,
 8 | # add these directories to sys.path here. If the directory is relative to the
 9 | # documentation root, use os.path.abspath to make it absolute, like shown here.
10 | #
11 | # import os
12 | # import sys
13 | # sys.path.insert(0, os.path.abspath('.'))
14 | # -- Project information -----------------------------------------------------
15 | 
16 | project = 'json-five'
17 | copyright = '2020, Spencer Phillip Young'
18 | author = 'Spencer Phillip Young'
19 | 
20 | # The full version, including alpha/beta/rc tags
21 | release = '0.1.0'
22 | 
23 | 
24 | # -- General configuration ---------------------------------------------------
25 | 
26 | # Add any Sphinx extension module names here, as strings. They can be
27 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
28 | # ones.
29 | extensions = []
30 | 
31 | # Add any paths that contain templates here, relative to this directory.
32 | templates_path = ['_templates']
33 | 
34 | # List of patterns, relative to source directory, that match files and
35 | # directories to ignore when looking for source files.
36 | # This pattern also affects html_static_path and html_extra_path.
37 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
38 | 
39 | 
40 | # -- Options for HTML output -------------------------------------------------
41 | 
42 | # The theme to use for HTML and HTML Help pages.  See the documentation for
43 | # a list of builtin themes.
44 | #
45 | html_theme = 'sphinx_rtd_theme'
46 | 
47 | # Add any paths that contain custom static files (such as style sheets) here,
48 | # relative to this directory. They are copied after the builtin static files,
49 | # so a file named "default.css" will overwrite the builtin "default.css".
50 | html_static_path = ['_static']
51 | 


--------------------------------------------------------------------------------
/docs/docrequirements.txt:
--------------------------------------------------------------------------------
1 | sphinx==3.0.3
2 | sphinx_rtd_theme==0.4.3
3 | 


--------------------------------------------------------------------------------
/docs/extending.rst:
--------------------------------------------------------------------------------
  1 | Extending json-five
  2 | ===================
  3 | 
  4 | 
  5 | The ``json`` way
  6 | ----------------
  7 | 
  8 | 
  9 | ``json5.load`` and ``json5.loads`` support a similar interface to the stdlib ``json`` module. Specifically,
 10 | you can provide the following arguments that have the same meaning as in ``json.load``:
 11 | 
 12 | - ``parse_int``
 13 | - ``parse_float``
 14 | - ``parse_constant``
 15 | - ``object_hook``
 16 | - ``object_pairs_hook``
 17 | 
 18 | This is convenient if you have existing code that uses these arguments with the ``json`` module, but want to also
 19 | support JSON5. These options are also useful as a simple way to customize parsing of json types.
 20 | 
 21 | Additionally, a new hook keyword argument, ``parse_json5_identifiers``, is available to help users control the
 22 | output of parsing identifiers. By default, JSON5 Identifiers in object keys are returned as a ``JsonIdentifier`` object,
 23 | which is a subclass of ``str`` (meaning it's compatible anywhere ``str`` is accepted).
 24 | This helps keep keys the same round-trip, rather than converting unquoted identifiers into quoted strings, such that
 25 | ``dumps(loads(text)) == text`` (in this case).
 26 | 
 27 | You can change this behavior with the ``parse_json5_identifiers`` keyword argument with a callable that receives the `JsonIdentifier` object
 28 | and its return value is used instead. For example, you can specify ``parse_json5_identifiers=str`` to convert identifiers
 29 | to normal strings, such that ``dumps(loads('{foo: "bar"}')) == '{"foo": "bar"}'``.
 30 | 
 31 | However, this package does not support the ``cls`` keyword found in the standard library ``json`` module.
 32 | If you want to implement custom serializers/deserializers, read on about custom loaders/dumpers.
 33 | 
 34 | 
 35 | Custom Loaders and Dumpers
 36 | --------------------------
 37 | 
 38 | This package uses "Loaders" as part of the deserialization of JSON text to Python. "Dumpers" are used to
 39 | serialize Python objects to JSON text.
 40 | 
 41 | The entry points for loaders and dumpers are the ``load`` and ``dump`` methods, respectively.
 42 | You can override these methods to implement custom loading of models or dumping of objects.
 43 | 
 44 | Extending the default loader
 45 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 46 | 
 47 | The default loader takes in a model and produces, in the default case, Python objects.
 48 | 
 49 | As a simple example, you can extend the default loader with your own to customize loading of lists. Here,
 50 | I'll create a custom loader that, when it encounters an array (``json5.model.JSONArray``) with with only one value, it will return
 51 | the single value, rather than a single-item array.
 52 | 
 53 | .. code-block::
 54 | 
 55 |     from json5.loader import DefaultLoader, loads
 56 |     from json5.model import JSONArray
 57 | 
 58 | 
 59 |     class MyCustomLoader(DefaultLoader):
 60 |         def load(self, node):
 61 |             if isinstance(node, JSONArray):
 62 |                 return self.json_array_to_python(node)
 63 |             else:
 64 |                 return super().load(node)
 65 | 
 66 |         def json_array_to_python(self, node):
 67 |             if len(node.values) == 1:
 68 |                 return self.load(node.values[0])
 69 |             else:
 70 |                 return super().json_array_to_python(node)
 71 | 
 72 | The ``loads`` function accepts a ``loader`` keyword argument, where the custom loader can be passed in.
 73 | 
 74 | .. code-block::
 75 | 
 76 |     json_string = "{foo: ['bar', 'baz'], bacon: ['eggs']}"
 77 |     loads(json_string)  # Using the regular default loader
 78 |     # {'foo': ['bar', 'baz'], 'bacon': ['eggs']}
 79 | 
 80 |     loads(json_string, loader=MyCustomLoader())  # use the custom loader instead
 81 |     # {'foo': ['bar', 'baz'], 'bacon': 'eggs'}
 82 | 
 83 | 
 84 | Extending the default dumper
 85 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 86 | 
 87 | Extending the dumper follows a similar principle as extending the loader.
 88 | 
 89 | As an example, I'll make a custom dumper that dumps booleans ``True`` and ``False`` to integers instead of the
 90 | JSON ``true`` or ``false``.
 91 | 
 92 | .. code-block::
 93 | 
 94 |     from json5.dumper import DefaultDumper, dumps
 95 | 
 96 |     class MyCustomDumper(DefaultDumper):
 97 |         def dump(self, node):
 98 |             if isinstance(node, bool):
 99 |                 return self.bool_to_json(node)
100 |             else:
101 |                 return super().dump(node)
102 | 
103 |         def bool_to_json(self, node):
104 |             super().dump(int(node.value))
105 | 
106 | And you can see the effects
107 | 
108 | .. code-block::
109 | 
110 |     >>> dumps([True, False])
111 |     '[true, false]'
112 |     >>> dumps([True, False], dumper=MyCustomDumper())
113 |     '[1, 2]'
114 | 
115 | 
116 | Other loaders/dumpers and tools
117 | -------------------------------
118 | 
119 | Besides the default loader, there is also the ``ModelLoader`` which simply returns the raw model
120 | with no additional processing.
121 | 
122 | Besides the default dumper, there is also the ``ModelDumper`` which takes a model and serializes it to JSON.
123 | 
124 | The ``json5.dumper.modelize`` function can take python objects and convert them to a model.
125 | 
126 | 
127 | .. code-block::
128 | 
129 |     from json5.dumper import modelize
130 |     obj = ['foo', 123, True]
131 |     modelize(obj)
132 | 
133 | The resulting model:
134 | 
135 | .. code-block::
136 | 
137 |     JSONArray(
138 |         values=[
139 |             SingleQuotedString(characters='foo', raw_value="'foo'"),
140 |             Integer(raw_value='123', value=123, is_hex=False),
141 |             BooleanLiteral(value=True),
142 |         ],
143 |         trailing_comma=None,
144 |     )
145 | 


--------------------------------------------------------------------------------
/docs/how.rst:
--------------------------------------------------------------------------------
  1 | How this package works
  2 | ======================
  3 | 
  4 | This is an overview of how the internals of this package work. The code demonstrated here is not
  5 | necessarily intended to be used by users!
  6 | 
  7 | If you're wondering how to use this package, see :doc:`/QuickStart` instead.
  8 | 
  9 | 
 10 | 
 11 | Deserializing JSON to Python; the journey
 12 | -----------------------------------------
 13 | 
 14 | The first step in deserialization is tokenizing. Text, assuming it is conforming to the JSON5 spec,
 15 | is parsed into _tokens_. The tokens are then _parsed_ to produce a representative _model_ of the JSON structure.
 16 | Finally, that model is _loaded_ where each node in the model is turned into an instance of a Python data type.
 17 | 
 18 | Let's explore this process interactively.
 19 | 
 20 | tokenizing
 21 | ^^^^^^^^^^
 22 | 
 23 | Tokenizing is the first step in turning JSON text into Python objects. Let's look at tokenizing
 24 | a very simple empty JSON object ``{}``
 25 | 
 26 | .. code-block::
 27 | 
 28 |     >>> from json5.tokenizer import tokenize
 29 |     >>> json_string = "{}"
 30 |     >>> tokens = tokenize(json_string)
 31 |     >>> for token in tokens:
 32 |     ...     print(token)
 33 |     ...
 34 |     Token(type='LBRACE', value='{', lineno=1, index=0)
 35 |     Token(type='RBRACE', value='}', lineno=1, index=1)
 36 | 
 37 | As you can see, this broke down into two tokens: the left brace and the right brace.
 38 | 
 39 | For good measure, let's see a slightly more complex tokenization
 40 | 
 41 | .. code-block::
 42 | 
 43 |     for token in tokenize("{foo: 'bar'}"):
 44 |         print(token)
 45 | 
 46 |     Token(type='LBRACE', value='{', lineno=1, index=0)
 47 |     Token(type='NAME', value='foo', lineno=1, index=1)
 48 |     Token(type='COLON', value=':', lineno=1, index=4)
 49 |     Token(type='WHITESPACE', value=' ', lineno=1, index=5)
 50 |     Token(type='SINGLE_QUOTE_STRING', value="'bar'", lineno=1, index=6)
 51 |     Token(type='RBRACE', value='}', lineno=1, index=11)
 52 | 
 53 | These tokens will be used to build a model in the next step.
 54 | 
 55 | 
 56 | Parsing and models
 57 | ^^^^^^^^^^^^^^^^^^
 58 | 
 59 | As the text is processed into tokens, the stream of tokens is parsed into a model representing the JSON structure.
 60 | 
 61 | Let's start with the same simple example of an empty JSON object ``{}``
 62 | 
 63 | .. code-block::
 64 | 
 65 |     >>> from json5.tokenizer import tokenize
 66 |     >>> from json5.parser import parse_tokens
 67 |     >>> tokens = tokenize("{}")
 68 |     >>> model = parse_tokens(tokens)
 69 |     >>> model
 70 |     JSONText(value=JSONObject(key_value_pairs=[], trailing_comma=None))
 71 | 
 72 | The tokens were parsed to produce a model. Each production (part) in the model more or less represents a part of the
 73 | `JSON5 grammar`_. ``JSONText`` is always the root production of the model for any JSON5 document.
 74 | 
 75 | Let's look at a more complex model for the JSON text ``{foo: 0xC0FFEE}`` -- This model has been 'prettified' for this doc:
 76 | 
 77 | .. code-block::
 78 | 
 79 |     JSONText(
 80 |         value=JSONObject(
 81 |             key_value_pairs=[
 82 |                 KeyValuePair(
 83 |                     key=Identifier(name='foo'),
 84 |                     value=Integer(raw_value='0xC0FFEE', value=12648430, is_hex=True),
 85 |                 )
 86 |             ],
 87 |             trailing_comma=None,
 88 |         )
 89 |     )
 90 | 
 91 | 
 92 | You can also build model objects 'manually' without any source text.
 93 | 
 94 | .. code-block::
 95 | 
 96 |     from json5.model import *
 97 |     model = JSONText(value=JSONObject(KeyValuePair(key=Identifier('bacon'), value=Infinity())))
 98 | 
 99 | 
100 | Loading
101 | ^^^^^^^
102 | 
103 | Once we have a model in-hand, we can use it to generate Python object representation from the model. To do this,
104 | specialized classes, called Loaders, are used. Loaders take a model and produce something else, like Python data types.
105 | 
106 | 
107 | In this example, we'll just create a model instead of parsing one from text and turn it into Python using the
108 | default loader (the default loader is used when calling ``loads`` by default.
109 | 
110 | .. code-block::
111 | 
112 |     >>> from json5.loader import DefaultLoader
113 |     >>> from json5.model import *
114 |     >>> loader = DefaultLoader()
115 |     >>> model = JSONText(value=JSONObject(KeyValuePair(key=Identifier('bacon'), value=Infinity())))
116 |     >>> loader.load(model)
117 |     {'bacon': inf}
118 | 
119 | 
120 | 
121 | Serializing to JSON
122 | -------------------
123 | 
124 | Objects can be serialized to JSON using _dumpers_. A dumper takes and object and writes JSON text representing the object.
125 | The default dumper dumps python objects directly to JSON text.
126 | 
127 | .. code-block::
128 | 
129 |     >>> from json5 import dumps
130 |     >>> dumps(['foo', 'bar', 'baz'])
131 |     '["foo", "bar", "baz"]'
132 | 
133 | 
134 | 
135 | .. _JSON5 grammar: https://spec.json5.org/#grammar
136 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | .. json-five documentation master file, created by
 2 |    sphinx-quickstart on Tue May 19 18:32:47 2020.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | Welcome to json-five's documentation!
 7 | =====================================
 8 | 
 9 | `GitHub`_
10 | 
11 | .. _GitHub: https://github.com/spyoungtech/json-five
12 | 
13 | .. toctree::
14 |    :maxdepth: 2
15 |    :caption: Contents:
16 | 
17 |    QuickStart
18 |    extending
19 |    how
20 |    comments
21 | 
22 | Indices and tables
23 | ==================
24 | 
25 | * :ref:`genindex`
26 | * :ref:`modindex`
27 | * :ref:`search`
28 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 | 
13 | if "%1" == "" goto help
14 | 
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | 	echo.
18 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | 	echo.installed, then set the SPHINXBUILD environment variable to point
20 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | 	echo.may add the Sphinx directory to PATH.
22 | 	echo.
23 | 	echo.If you don't have Sphinx installed, grab it from
24 | 	echo.http://sphinx-doc.org/
25 | 	exit /b 1
26 | )
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/json5/__init__.py:
--------------------------------------------------------------------------------
1 | from .dumper import dump
2 | from .dumper import dumps
3 | from .loader import JsonIdentifier
4 | from .loader import load
5 | from .loader import loads
6 | from .utils import JSON5DecodeError
7 | 
8 | __all__ = ['dump', 'dumps', 'load', 'loads', 'JSON5DecodeError', 'JsonIdentifier']
9 | 


--------------------------------------------------------------------------------
/json5/dumper.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | import io
  4 | import json
  5 | import math
  6 | import typing
  7 | from abc import abstractmethod
  8 | from functools import singledispatchmethod
  9 | from typing import Any
 10 | 
 11 | from .loader import JsonIdentifier
 12 | from .model import BlockComment
 13 | from .model import BooleanLiteral
 14 | from .model import Comment
 15 | from .model import DoubleQuotedString
 16 | from .model import Float
 17 | from .model import Identifier
 18 | from .model import Infinity
 19 | from .model import Integer
 20 | from .model import JSONArray
 21 | from .model import JSONObject
 22 | from .model import JSONText
 23 | from .model import KeyValuePair
 24 | from .model import LineComment
 25 | from .model import NaN
 26 | from .model import Node
 27 | from .model import NullLiteral
 28 | from .model import SingleQuotedString
 29 | from .model import String
 30 | from .model import TrailingComma
 31 | from .model import UnaryOp
 32 | from .model import Value
 33 | 
 34 | 
 35 | class Environment:
 36 |     def __init__(self) -> None:
 37 |         self.outfile: typing.TextIO = io.StringIO()
 38 |         self.indent_level: int = 0
 39 |         self.indent: int = 0
 40 | 
 41 |     def write(self, s: str, indent: int | None = None) -> None:
 42 |         if indent is None:
 43 |             indent = self.indent_level
 44 |         whitespace = ' ' * self.indent * indent
 45 |         s = f'{whitespace}{s}'
 46 |         self.outfile.write(s)
 47 | 
 48 | 
 49 | def dump(obj: Any, f: typing.TextIO, **kwargs: Any) -> int:
 50 |     text = dumps(obj, **kwargs)
 51 |     return f.write(text)
 52 | 
 53 | 
 54 | def dumps(obj: Any, dumper: BaseDumper | None = None, indent: int = 0) -> str:
 55 |     env = Environment()
 56 |     env.indent = indent
 57 |     if dumper is None:
 58 |         dumper = DefaultDumper(env=env)
 59 |     dumper.dump(obj)
 60 |     dumper.env.outfile.seek(0)
 61 |     ret: str = dumper.env.outfile.read()
 62 |     return ret
 63 | 
 64 | 
 65 | class BaseDumper:
 66 |     def __init__(self, env: Environment | None = None):
 67 |         if env is None:
 68 |             env = Environment()
 69 |         self.env = env
 70 | 
 71 |     @singledispatchmethod
 72 |     @abstractmethod
 73 |     def dump(self, obj: Any) -> Any:
 74 |         return NotImplemented
 75 | 
 76 | 
 77 | class DefaultDumper(BaseDumper):
 78 |     """
 79 |     Dump Python objects to a JSON string
 80 |     """
 81 | 
 82 |     @singledispatchmethod
 83 |     def dump(self, obj: Any) -> Any:
 84 |         raise NotImplementedError(f"Cannot dump node {repr(obj)}")
 85 | 
 86 |     to_json = dump.register
 87 | 
 88 |     @to_json(dict)
 89 |     def dict_to_json(self, d: dict[Any, Any]) -> Any:
 90 |         self.env.write('{', indent=0)
 91 |         if self.env.indent:
 92 |             self.env.write('\n', indent=0)
 93 |             self.env.indent_level += 1
 94 |         index = 0
 95 |         for index, (key, value) in enumerate(d.items(), start=1):
 96 |             if self.env.indent:
 97 |                 self.env.write('')
 98 |             self.dump(key)
 99 |             self.env.write(': ', indent=0)
100 |             self.dump(value)
101 |             if index == len(d):
102 |                 break
103 |             if self.env.indent:
104 |                 self.env.write(',', indent=0)
105 |                 self.env.write('\n', indent=0)
106 |             else:
107 |                 self.env.write(', ', indent=0)
108 | 
109 |         if self.env.indent:
110 |             self.env.indent_level -= 1
111 |             if index != 0:
112 |                 self.env.write('\n', indent=0)
113 |             self.env.write('}')
114 |         else:
115 |             self.env.write('}', indent=0)
116 | 
117 |     @to_json(int)
118 |     def int_to_json(self, i: int) -> Any:
119 |         self.env.write(str(i), indent=0)
120 | 
121 |     @to_json(JsonIdentifier)
122 |     def identifier_to_json(self, s: JsonIdentifier) -> Any:
123 |         self.env.write(s, indent=0)
124 | 
125 |     @to_json(str)
126 |     def str_to_json(self, s: str) -> Any:
127 |         self.env.write(json.dumps(s), indent=0)
128 | 
129 |     @to_json(list)
130 |     def list_to_json(self, the_list: list[Any]) -> Any:
131 |         self.env.write('[', indent=0)
132 |         if self.env.indent:
133 |             self.env.indent_level += 1
134 |             self.env.write('\n', indent=0)
135 |         list_length = len(the_list)
136 |         for index, item in enumerate(the_list, start=1):
137 |             if self.env.indent:
138 |                 self.env.write('')
139 |             self.dump(item)
140 |             if index != list_length:
141 |                 if self.env.indent:
142 |                     self.env.write(',', indent=0)
143 |                 else:
144 |                     self.env.write(', ', indent=0)
145 |             if self.env.indent:
146 |                 self.env.write('\n', indent=0)
147 |         if self.env.indent:
148 |             self.env.indent_level -= 1
149 |         self.env.write(']')
150 | 
151 |     @to_json(float)
152 |     def float_to_json(self, f: float) -> Any:
153 |         if f == math.inf:
154 |             self.env.write('Infinity', indent=0)
155 |         elif f == -math.inf:
156 |             self.env.write('-Infinity', indent=0)
157 |         elif f is math.nan:
158 |             self.env.write('NaN', indent=0)
159 |         else:
160 |             self.env.write(str(f), indent=0)
161 | 
162 |     @to_json(bool)
163 |     def bool_to_json(self, b: bool) -> Any:
164 |         self.env.write(str(b).lower(), indent=0)
165 | 
166 |     @to_json(type(None))
167 |     def none_to_json(self, _: Any) -> Any:
168 |         self.env.write('null', indent=0)
169 | 
170 | 
171 | class ModelDumper:
172 |     """
173 |     Dump a model to a JSON string
174 |     """
175 | 
176 |     def __init__(self, env: Environment | None = None):
177 |         #  any provided environment is ignored
178 |         self.env = Environment()
179 | 
180 |     def process_wsc_before(self, node: Node) -> None:
181 |         for wsc in node.wsc_before:
182 |             if isinstance(wsc, Comment):
183 |                 self.dump(wsc)
184 |             elif isinstance(wsc, str):
185 |                 self.env.write(wsc)
186 |             else:
187 |                 raise ValueError(f"Did not expect {type(node)}")
188 | 
189 |     def process_wsc_after(self, node: Node) -> None:
190 |         for wsc in node.wsc_after:
191 |             if isinstance(wsc, Comment):
192 |                 self.dump(wsc)
193 |             elif isinstance(wsc, str):
194 |                 self.env.write(wsc)
195 |             else:
196 |                 raise ValueError(f"Did not expect {type(wsc)}")
197 | 
198 |     def process_leading_wsc(self, node: JSONObject | JSONArray) -> None:
199 |         for wsc in node.leading_wsc:
200 |             if isinstance(wsc, Comment):
201 |                 self.dump(wsc)
202 |             elif isinstance(wsc, str):
203 |                 self.env.write(wsc)
204 |             else:
205 |                 raise ValueError(f"Did not expect {type(wsc)}")
206 | 
207 |     @singledispatchmethod
208 |     def dump(self, node: Node) -> Any:
209 |         raise NotImplementedError('foo')
210 | 
211 |     to_json = dump.register
212 | 
213 |     @to_json(JSONText)
214 |     def json_model_to_json(self, node: JSONText) -> Any:
215 |         self.process_wsc_before(node)
216 |         self.dump(node.value)
217 |         self.process_wsc_after(node)
218 | 
219 |     @to_json(JSONObject)
220 |     def json_object_to_json(self, node: JSONObject) -> Any:
221 |         self.process_wsc_before(node)
222 |         self.env.write('{')
223 |         if node.leading_wsc:
224 |             self.process_leading_wsc(node)
225 |         key_value_pairs = node.key_value_pairs
226 |         num_pairs = len(key_value_pairs)
227 |         for index, kvp in enumerate(key_value_pairs, start=1):
228 |             self.dump(kvp.key)
229 |             self.env.write(':')
230 |             self.dump(kvp.value)
231 |             if index != num_pairs:
232 |                 self.env.write(',')
233 |         if node.trailing_comma:
234 |             self.dump(node.trailing_comma)
235 |         self.env.write('}')
236 |         self.process_wsc_after(node)
237 | 
238 |     @to_json(JSONArray)
239 |     def json_array_to_json(self, node: JSONArray) -> Any:
240 |         self.process_wsc_before(node)
241 |         self.env.write('[')
242 |         if node.leading_wsc:
243 |             self.process_leading_wsc(node)
244 |         for index, value in enumerate(node.values, start=1):
245 |             self.dump(value)
246 |             if index != len(node.values):
247 |                 self.env.write(',')
248 |         if node.trailing_comma:
249 |             self.dump(node.trailing_comma)
250 |         self.env.write(']')
251 |         self.process_wsc_after(node)
252 | 
253 |     @to_json(Identifier)
254 |     def identifier_to_json(self, node: Identifier) -> Any:
255 |         self.process_wsc_before(node)
256 |         self.env.write(node.raw_value)
257 |         self.process_wsc_after(node)
258 | 
259 |     @to_json(Integer)
260 |     def integer_to_json(self, node: Integer) -> Any:
261 |         self.process_wsc_before(node)
262 |         self.env.write(node.raw_value)
263 |         self.process_wsc_after(node)
264 | 
265 |     @to_json(Float)
266 |     def float_to_json(self, node: Float) -> Any:
267 |         self.process_wsc_before(node)
268 |         self.env.write(node.raw_value)
269 |         self.process_wsc_after(node)
270 | 
271 |     @to_json(UnaryOp)
272 |     def unary_to_json(self, node: UnaryOp) -> Any:
273 |         self.process_wsc_before(node)
274 |         self.env.write(node.op)
275 |         self.dump(node.value)
276 |         self.process_wsc_after(node)
277 | 
278 |     @to_json(String)
279 |     def string_to_json(self, node: SingleQuotedString | DoubleQuotedString) -> Any:
280 |         self.process_wsc_before(node)
281 |         self.env.write(node.raw_value)  # The original value, including any escape sequences or line continuations
282 |         self.process_wsc_after(node)
283 | 
284 |     @to_json(NullLiteral)
285 |     def null_to_json(self, node: NullLiteral) -> Any:
286 |         self.process_wsc_before(node)
287 |         self.env.write('null')
288 |         self.process_wsc_after(node)
289 | 
290 |     @to_json(BooleanLiteral)
291 |     def boolean_to_json(self, node: BooleanLiteral) -> Any:
292 |         self.process_wsc_before(node)
293 |         if node.value:
294 |             self.env.write('true')
295 |         else:
296 |             self.env.write('false')
297 |         self.process_wsc_after(node)
298 | 
299 |     @to_json(LineComment)
300 |     def line_comment_to_json(self, node: LineComment) -> Any:
301 |         self.process_wsc_before(node)
302 |         self.env.write(node.value)
303 |         self.process_wsc_after(node)
304 | 
305 |     @to_json(BlockComment)
306 |     def block_comment_to_json(self, node: BlockComment) -> Any:
307 |         self.process_wsc_before(node)
308 |         self.env.write(node.value)
309 |         self.process_wsc_after(node)
310 | 
311 |     @to_json(TrailingComma)
312 |     def trailing_comma_to_json(self, node: TrailingComma) -> Any:
313 |         self.process_wsc_before(node)
314 |         self.env.write(',')
315 |         self.process_wsc_after(node)
316 | 
317 |     @to_json(Infinity)
318 |     def infinity_to_json(self, node: Infinity) -> Any:
319 |         self.process_wsc_before(node)
320 | 
321 |         self.env.write('Infinity')
322 |         self.process_wsc_after(node)
323 | 
324 |     @to_json(NaN)
325 |     def nan_to_json(self, node: NaN) -> Any:
326 |         self.process_wsc_before(node)
327 |         self.env.write('NaN')
328 |         self.process_wsc_after(node)
329 | 
330 | 
331 | class Modelizer:
332 |     """
333 |     Turn Python objects into a model
334 |     """
335 | 
336 |     @singledispatchmethod
337 |     def modelize(self, obj: Any) -> Node:
338 |         raise NotImplementedError(f"Cannot modelize object of type {type(obj)}")
339 | 
340 |     to_model = modelize.register
341 | 
342 |     @to_model(str)
343 |     def str_to_model(self, s: str) -> SingleQuotedString | DoubleQuotedString:
344 |         if repr(s).startswith("'"):
345 |             return SingleQuotedString(s, raw_value=repr(s))
346 |         else:
347 |             return DoubleQuotedString(s, raw_value=repr(s))
348 | 
349 |     @to_model(dict)
350 |     def dict_to_model(self, d: dict[Any, Any]) -> JSONObject:
351 |         kvps: list[KeyValuePair] = []
352 |         for key, value in d.items():
353 |             kvp = KeyValuePair(key=self.modelize(key), value=self.modelize(value))  # type: ignore[arg-type]
354 |             kvps.append(kvp)
355 |         return JSONObject(*kvps)
356 | 
357 |     @to_model(list)
358 |     def list_to_model(self, lst: list[Any]) -> JSONArray:
359 |         list_values: list[Value] = []
360 |         for v in lst:
361 |             list_values.append(self.modelize(v))  # type: ignore[arg-type]
362 |         return JSONArray(*list_values)
363 | 
364 |     @to_model(int)
365 |     def int_to_model(self, i: int) -> Integer:
366 |         return Integer(str(i))
367 | 
368 |     @to_model(float)
369 |     def float_to_model(self, f: float) -> Infinity | NaN | Float | UnaryOp:
370 |         if f == math.inf:
371 |             return Infinity()
372 |         elif f == -math.inf:
373 |             return UnaryOp('-', Infinity())
374 |         elif f is math.nan:
375 |             return NaN()
376 |         else:
377 |             return Float(str(f))
378 | 
379 |     @to_model(bool)
380 |     def bool_to_model(self, b: bool) -> BooleanLiteral:
381 |         return BooleanLiteral(b)
382 | 
383 |     @to_model(type(None))
384 |     def none_to_model(self, _: Any) -> NullLiteral:
385 |         return NullLiteral()
386 | 
387 | 
388 | def modelize(obj: Any) -> Node:
389 |     """
390 | 
391 |     :param obj: a python object
392 |     :return: a model representing the python object
393 |     """
394 |     return Modelizer().modelize(obj)
395 | 


--------------------------------------------------------------------------------
/json5/loader.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | import logging
  4 | import typing
  5 | from abc import abstractmethod
  6 | from functools import singledispatchmethod
  7 | from typing import Callable
  8 | from typing import Literal
  9 | 
 10 | from .model import BooleanLiteral
 11 | from .model import Comment
 12 | from .model import DoubleQuotedString
 13 | from .model import Float
 14 | from .model import Identifier
 15 | from .model import Infinity
 16 | from .model import Integer
 17 | from .model import JSONArray
 18 | from .model import JSONObject
 19 | from .model import JSONText
 20 | from .model import NaN
 21 | from .model import Node
 22 | from .model import NullLiteral
 23 | from .model import SingleQuotedString
 24 | from .model import String
 25 | from .model import UnaryOp
 26 | from .parser import parse_source
 27 | 
 28 | logger = logging.getLogger(__name__)
 29 | # logger.setLevel(level=logging.DEBUG)
 30 | # logger.addHandler(logging.StreamHandler(stream=sys.stderr))
 31 | 
 32 | 
 33 | class Environment:
 34 |     def __init__(
 35 |         self,
 36 |         object_hook: Callable[[dict[typing.Any, typing.Any]], typing.Any] | None = None,
 37 |         parse_float: Callable[[str], typing.Any] | None = None,
 38 |         parse_int: Callable[[str], typing.Any] | None = None,
 39 |         parse_constant: Callable[[Literal['-Infinity', 'Infinity', 'NaN']], typing.Any] | None = None,
 40 |         strict: bool = True,
 41 |         object_pairs_hook: Callable[[list[tuple[str | JsonIdentifier, typing.Any]]], typing.Any] | None = None,
 42 |         parse_json5_identifiers: Callable[[JsonIdentifier], typing.Any] | None = None,
 43 |     ):
 44 |         self.object_hook: Callable[[dict[typing.Any, typing.Any]], typing.Any] | None = object_hook
 45 |         self.parse_float: Callable[[str], typing.Any] | None = parse_float
 46 |         self.parse_int: Callable[[str], typing.Any] | None = parse_int
 47 |         self.parse_constant: Callable[[Literal['-Infinity', 'Infinity', 'NaN']], typing.Any] | None = parse_constant
 48 |         self.strict: bool = strict
 49 |         self.object_pairs_hook: None | (
 50 |             Callable[[list[tuple[str | JsonIdentifier, typing.Any]]], typing.Any]
 51 |         ) = object_pairs_hook
 52 |         self.parse_json5_identifiers: Callable[[JsonIdentifier], typing.Any] | None = parse_json5_identifiers
 53 | 
 54 | 
 55 | class JsonIdentifier(str):
 56 |     ...
 57 | 
 58 | 
 59 | def load(
 60 |     f: typing.TextIO,
 61 |     *,
 62 |     loader: LoaderBase | None = None,
 63 |     object_hook: Callable[[dict[typing.Any, typing.Any]], typing.Any] | None = None,
 64 |     parse_float: Callable[[str], typing.Any] | None = None,
 65 |     parse_int: Callable[[str], typing.Any] | None = None,
 66 |     parse_constant: Callable[[Literal['-Infinity', 'Infinity', 'NaN']], typing.Any] | None = None,
 67 |     strict: bool = True,
 68 |     object_pairs_hook: Callable[[list[tuple[str | JsonIdentifier, typing.Any]]], typing.Any] | None = None,
 69 |     parse_json5_identifiers: Callable[[JsonIdentifier], typing.Any] | None = None,
 70 | ) -> typing.Any:
 71 |     """
 72 |     Like loads, but takes a file-like object with a read method.
 73 | 
 74 |     :param f:
 75 |     :param kwargs:
 76 |     :return:
 77 |     """
 78 |     text = f.read()
 79 |     return loads(
 80 |         text,
 81 |         loader=loader,
 82 |         object_hook=object_hook,
 83 |         parse_float=parse_float,
 84 |         parse_int=parse_int,
 85 |         parse_constant=parse_constant,
 86 |         strict=strict,
 87 |         object_pairs_hook=object_pairs_hook,
 88 |         parse_json5_identifiers=parse_json5_identifiers,
 89 |     )
 90 | 
 91 | 
 92 | def loads(
 93 |     s: str,
 94 |     *,
 95 |     loader: LoaderBase | None = None,
 96 |     object_hook: Callable[[dict[typing.Any, typing.Any]], typing.Any] | None = None,
 97 |     parse_float: Callable[[str], typing.Any] | None = None,
 98 |     parse_int: Callable[[str], typing.Any] | None = None,
 99 |     parse_constant: Callable[[Literal['-Infinity', 'Infinity', 'NaN']], typing.Any] | None = None,
100 |     strict: bool = True,
101 |     object_pairs_hook: Callable[[list[tuple[str | JsonIdentifier, typing.Any]]], typing.Any] | None = None,
102 |     parse_json5_identifiers: Callable[[JsonIdentifier], typing.Any] | None = None,
103 | ) -> typing.Any:
104 |     """
105 |     Take a string of JSON text and deserialize it
106 | 
107 |     :param s:
108 |     :param loader: The loader class to use
109 |     :param object_hook: same meaning as in ``json.loads``
110 |     :param parse_float: same meaning as in ``json.loads``
111 |     :param parse_int: same meaning as in ``json.loads``
112 |     :param parse_constant: same meaning as in ``json.loads``
113 |     :param strict: same meaning as in ``json.loads`` (currently has no effect)
114 |     :param object_pairs_hook: same meaning as in ``json.loads``
115 |     :param parse_json5_identifiers: callable that is passed a JsonIdentifer. The return value of the callable is used to load JSON Identifiers (unquoted keys) in JSON5 objects
116 |     :return:
117 |     """
118 |     model = parse_source(s)
119 |     # logger.debug('Model is %r', model)
120 |     if loader is None:
121 |         loader = DefaultLoader(
122 |             object_hook=object_hook,
123 |             parse_float=parse_float,
124 |             parse_int=parse_int,
125 |             parse_constant=parse_constant,
126 |             strict=strict,
127 |             object_pairs_hook=object_pairs_hook,
128 |             parse_json5_identifiers=parse_json5_identifiers,
129 |         )
130 |     return loader.load(model)
131 | 
132 | 
133 | class LoaderBase:
134 |     def __init__(self, env: Environment | None = None, **env_kwargs: typing.Any):
135 |         if env is None:
136 |             env = Environment(**env_kwargs)
137 |         self.env: Environment = env
138 | 
139 |     @singledispatchmethod
140 |     @abstractmethod
141 |     def load(self, node: Node) -> typing.Any:
142 |         return NotImplemented
143 | 
144 | 
145 | class DefaultLoader(LoaderBase):
146 |     @singledispatchmethod
147 |     def load(self, node: Node) -> typing.Any:
148 |         raise NotImplementedError(f"Can't load node {node}")
149 | 
150 |     to_python = load.register
151 | 
152 |     @to_python(JSONText)
153 |     def json_model_to_python(self, node: JSONText) -> typing.Any:
154 |         logger.debug('json_model_to_python evaluating node %r', node)
155 |         return self.load(node.value)
156 | 
157 |     @to_python(JSONObject)
158 |     def json_object_to_python(self, node: JSONObject) -> typing.Any:
159 |         logger.debug('json_object_to_python evaluating node %r', node)
160 |         d = {}
161 |         for key_value_pair in node.key_value_pairs:
162 |             key = self.load(key_value_pair.key)
163 |             value = self.load(key_value_pair.value)
164 |             d[key] = value
165 |         if self.env.object_pairs_hook:
166 |             return self.env.object_pairs_hook(list(d.items()))
167 |         elif self.env.object_hook:
168 |             return self.env.object_hook(d)
169 |         else:
170 |             return d
171 | 
172 |     @to_python(JSONArray)
173 |     def json_array_to_python(self, node: JSONArray) -> list[typing.Any]:
174 |         logger.debug('json_array_to_python evaluating node %r', node)
175 |         return [self.load(value) for value in node.values]
176 | 
177 |     @to_python(Identifier)
178 |     def identifier_to_python(self, node: Identifier) -> typing.Any:
179 |         logger.debug('identifier_to_python evaluating node %r', node)
180 |         res = JsonIdentifier(node.name)
181 |         if self.env.parse_json5_identifiers:
182 |             return self.env.parse_json5_identifiers(res)
183 |         return res
184 | 
185 |     @to_python(Infinity)  # NaN/Infinity are covered here
186 |     def inf_to_python(self, node: Infinity) -> typing.Any:
187 |         logger.debug('inf_to_python evaluating node %r', node)
188 |         if self.env.parse_constant:
189 |             return self.env.parse_constant(node.const)
190 |         return node.value
191 | 
192 |     @to_python(NaN)  # NaN/Infinity are covered here
193 |     def nan_to_python(self, node: NaN) -> typing.Any:
194 |         logger.debug('nan_to_python evaluating node %r', node)
195 |         if self.env.parse_constant:
196 |             return self.env.parse_constant(node.const)
197 |         return node.value
198 | 
199 |     @to_python(Integer)
200 |     def integer_to_python(self, node: Integer) -> typing.Any:
201 |         if self.env.parse_int:
202 |             return self.env.parse_int(node.raw_value)
203 |         else:
204 |             return node.value
205 | 
206 |     @to_python(Float)
207 |     def float_to_python(self, node: Float) -> typing.Any:
208 |         if self.env.parse_float:
209 |             return self.env.parse_float(node.raw_value)
210 |         else:
211 |             return node.value
212 | 
213 |     @to_python(UnaryOp)
214 |     def unary_to_python(self, node: UnaryOp) -> typing.Any:
215 |         logger.debug('unary_to_python evaluating node %r', node)
216 |         if isinstance(node.value, Infinity):
217 |             return self.load(node.value)
218 |         value = self.load(node.value)
219 |         if node.op == '-':
220 |             return value * -1
221 |         else:
222 |             return value
223 | 
224 |     @to_python(String)
225 |     def string_to_python(self, node: DoubleQuotedString | SingleQuotedString) -> str:
226 |         logger.debug('string_to_python evaluating node %r', node)
227 |         ret: str = node.characters
228 |         return ret
229 | 
230 |     @to_python(NullLiteral)
231 |     def null_to_python(self, node: NullLiteral) -> None:
232 |         logger.debug('null_to_python evaluating node %r', node)
233 |         return None
234 | 
235 |     @to_python(BooleanLiteral)
236 |     def boolean_to_python(self, node: BooleanLiteral) -> bool:
237 |         logger.debug('boolean_to_python evaluating node %r', node)
238 |         return node.value
239 | 
240 |     @to_python(Comment)
241 |     def comment_or_whitespace_to_python(self, node: Comment) -> typing.NoReturn:
242 |         raise RuntimeError("Comments are not supported in the default loader!")
243 | 
244 | 
245 | class ModelLoader(LoaderBase):
246 |     @singledispatchmethod
247 |     def load(self, node: Node) -> typing.Any:
248 |         return node
249 | 


--------------------------------------------------------------------------------
/json5/model.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | import math
  4 | import typing
  5 | from collections import deque
  6 | from typing import Any
  7 | from typing import Literal
  8 | from typing import NamedTuple
  9 | 
 10 | from .tokenizer import JSON5Token
 11 | 
 12 | __all__ = [
 13 |     'Node',
 14 |     'JSONText',
 15 |     'Value',
 16 |     'Key',
 17 |     'JSONObject',
 18 |     'JSONArray',
 19 |     'KeyValuePair',
 20 |     'Identifier',
 21 |     'Number',
 22 |     'Integer',
 23 |     'Float',
 24 |     'Infinity',
 25 |     'NaN',
 26 |     'String',
 27 |     'DoubleQuotedString',
 28 |     'SingleQuotedString',
 29 |     'BooleanLiteral',
 30 |     'NullLiteral',
 31 |     'UnaryOp',
 32 |     'TrailingComma',
 33 |     'Comment',
 34 |     'LineComment',
 35 |     'BlockComment',
 36 | ]
 37 | 
 38 | 
 39 | class KeyValuePair(NamedTuple):
 40 |     key: Key
 41 |     value: Value
 42 | 
 43 | 
 44 | def walk(root: Node) -> typing.Generator[Node, None, None]:
 45 |     todo = deque([root])
 46 |     while todo:
 47 |         node: Node = todo.popleft()
 48 |         todo.extend(iter_child_nodes(node))
 49 |         yield node
 50 | 
 51 | 
 52 | def iter_child_nodes(node: Node) -> typing.Generator[Node, None, None]:
 53 |     for attr, value in iter_fields(node):
 54 |         if isinstance(value, Node):
 55 |             yield value
 56 |         elif isinstance(value, list):
 57 |             for item in value:
 58 |                 if isinstance(item, Node):
 59 |                     yield item
 60 | 
 61 | 
 62 | def iter_fields(node: Node) -> typing.Generator[tuple[str, Any], None, None]:
 63 |     for field_name in node._fields:
 64 |         try:
 65 |             value = getattr(node, field_name)
 66 |             yield field_name, value
 67 |         except AttributeError:
 68 |             pass
 69 | 
 70 | 
 71 | class Node:
 72 |     excluded_names = ['excluded_names', 'wsc_before', 'wsc_after', 'leading_wsc', 'tok', 'end_tok']
 73 | 
 74 |     def __init__(self, tok: JSON5Token | None = None, end_tok: JSON5Token | None = None):
 75 |         # Whitespace/Comments before/after the node
 76 |         self.wsc_before: list[str | Comment] = []
 77 |         self.wsc_after: list[str | Comment] = []
 78 |         self._tok: JSON5Token | None = tok
 79 |         self._end_tok: JSON5Token | None = end_tok
 80 | 
 81 |     @property
 82 |     def col_offset(self) -> int | None:
 83 |         if self._tok is None:
 84 |             return None
 85 |         return self._tok.colno
 86 | 
 87 |     @property
 88 |     def end_col_offset(self) -> int | None:
 89 |         if self._end_tok is None:
 90 |             return None
 91 |         return self._end_tok.end_colno
 92 | 
 93 |     @property
 94 |     def lineno(self) -> int | None:
 95 |         if self._tok is None:
 96 |             return None
 97 |         return self._tok.lineno
 98 | 
 99 |     @property
100 |     def end_lineno(self) -> int | None:
101 |         if self._end_tok is None:
102 |             return None
103 |         r = self._end_tok.end_lineno
104 |         return r
105 | 
106 |     def __repr__(self) -> str:
107 |         rep = (
108 |             f"{self.__class__.__name__}("
109 |             + ", ".join(
110 |                 f"{key}={repr(value)}"
111 |                 for key, value in self.__dict__.items()
112 |                 if not key.startswith('_') and key not in self.excluded_names
113 |             )
114 |             + ")"
115 |         )
116 |         return rep
117 | 
118 |     @property
119 |     def _fields(self) -> list[str]:
120 |         fields = [item for item in list(self.__dict__) if not item.startswith('_') and item not in self.excluded_names]
121 |         fields.extend(['wsc_before', 'wsc_after'])
122 |         return fields
123 | 
124 | 
125 | class JSONText(Node):
126 |     def __init__(self, value: Value, tok: JSON5Token | None = None, end_tok: JSON5Token | None = None):
127 |         assert isinstance(value, Value)
128 |         self.value: Value = value
129 |         super().__init__(tok=tok, end_tok=tok)
130 | 
131 | 
132 | class Value(Node):
133 |     pass
134 | 
135 | 
136 | class Key(Node):
137 |     ...
138 | 
139 | 
140 | class JSONObject(Value):
141 |     def __init__(
142 |         self,
143 |         *key_value_pairs: KeyValuePair,
144 |         trailing_comma: TrailingComma | None = None,
145 |         leading_wsc: list[str | Comment] | None = None,
146 |         tok: JSON5Token | None = None,
147 |         end_tok: JSON5Token | None = None,
148 |     ):
149 |         keys: list[Key] = []
150 |         values: list[Value] = []
151 |         for key, value in key_value_pairs:
152 |             assert isinstance(key, Key)
153 |             assert isinstance(value, Value)
154 |             keys.append(key)
155 |             values.append(value)
156 |         assert len(keys) == len(values)
157 |         self.keys: list[Key] = keys
158 |         self.values: list[Value] = values
159 |         assert leading_wsc is None or all(isinstance(item, str) or isinstance(item, Comment) for item in leading_wsc)
160 |         self.trailing_comma: TrailingComma | None = trailing_comma
161 |         self.leading_wsc: list[str | Comment] = leading_wsc or []
162 | 
163 |         super().__init__(tok=tok, end_tok=end_tok)
164 | 
165 |     @property
166 |     def key_value_pairs(self) -> list[KeyValuePair]:
167 |         return list(KeyValuePair(key, value) for key, value in zip(self.keys, self.values))
168 | 
169 | 
170 | class JSONArray(Value):
171 |     def __init__(
172 |         self,
173 |         *values: Value,
174 |         trailing_comma: TrailingComma | None = None,
175 |         leading_wsc: list[str | Comment] | None = None,
176 |         tok: JSON5Token | None = None,
177 |         end_tok: JSON5Token | None = None,
178 |     ):
179 |         vals = list(values)
180 |         for value in vals:
181 |             assert isinstance(value, Value), f"Was expecting object with type Value. Got {type(value)}"
182 |         assert leading_wsc is None or all(isinstance(item, str) or isinstance(item, Comment) for item in leading_wsc)
183 |         self.values: list[Value] = vals
184 |         self.trailing_comma: TrailingComma | None = trailing_comma
185 |         self.leading_wsc: list[str | Comment] = leading_wsc or []
186 | 
187 |         super().__init__(tok=tok, end_tok=end_tok)
188 | 
189 | 
190 | class Identifier(Key):
191 |     def __init__(
192 |         self, name: str, raw_value: str | None = None, tok: JSON5Token | None = None, end_tok: JSON5Token | None = None
193 |     ):
194 |         assert isinstance(name, str)
195 |         if raw_value is None:
196 |             raw_value = name
197 |         assert isinstance(raw_value, str)
198 |         assert len(name) > 0
199 |         self.name: str = name
200 |         self.raw_value: str = raw_value
201 | 
202 |         super().__init__(tok=tok, end_tok=tok)
203 | 
204 |     def __hash__(self) -> int:
205 |         return hash(self.name)
206 | 
207 |     def __eq__(self, other: Any) -> bool:
208 |         return hash(self) == hash(other)
209 | 
210 | 
211 | class Number(Value):
212 |     ...
213 | 
214 | 
215 | class Integer(Number):
216 |     def __init__(
217 |         self,
218 |         raw_value: str,
219 |         is_hex: bool = False,
220 |         is_octal: bool = False,
221 |         tok: JSON5Token | None = None,
222 |         end_tok: JSON5Token | None = None,
223 |     ):
224 |         assert isinstance(raw_value, str)
225 |         if is_hex and is_octal:
226 |             raise ValueError("is_hex and is_octal are mutually exclusive")
227 |         if is_hex:
228 |             value = int(raw_value, 0)
229 |         elif is_octal:
230 |             if raw_value.startswith('0o'):
231 |                 value = int(raw_value, 8)
232 |             else:
233 |                 value = int(raw_value.replace('0', '0o', 1), 8)
234 |         else:
235 |             value = int(raw_value)
236 |         self.value: int = value
237 |         self.raw_value: str = raw_value
238 |         self.is_hex: bool = is_hex
239 |         self.is_octal: bool = is_octal
240 | 
241 |         super().__init__(tok=tok, end_tok=end_tok or tok)
242 | 
243 | 
244 | class Float(Number):
245 |     def __init__(
246 |         self,
247 |         raw_value: str,
248 |         exp_notation: str | None = None,
249 |         tok: JSON5Token | None = None,
250 |         end_tok: JSON5Token | None = None,
251 |     ):
252 |         value = float(raw_value)
253 |         assert exp_notation is None or exp_notation in ('e', 'E')
254 |         self.raw_value: str = raw_value
255 |         self.exp_notation: str | None = exp_notation
256 | 
257 |         self.value: float = value
258 |         super().__init__(tok=tok, end_tok=end_tok or tok)
259 | 
260 | 
261 | class Infinity(Number):
262 |     def __init__(self, negative: bool = False, tok: JSON5Token | None = None, end_tok: JSON5Token | None = None):
263 |         self.negative: bool = negative
264 | 
265 |         super().__init__(tok=tok, end_tok=tok)
266 | 
267 |     @property
268 |     def value(self) -> float:
269 |         return math.inf if not self.negative else -math.inf
270 | 
271 |     @property
272 |     def const(self) -> Literal['Infinity', '-Infinity']:
273 |         if self.negative:
274 |             return '-Infinity'
275 |         else:
276 |             return 'Infinity'
277 | 
278 | 
279 | class NaN(Number):
280 |     def __init__(self, tok: JSON5Token | None = None, end_tok: JSON5Token | None = None):
281 |         super().__init__(tok=tok, end_tok=tok)
282 | 
283 |     @property
284 |     def value(self) -> float:
285 |         return math.nan
286 | 
287 |     @property
288 |     def const(self) -> Literal['NaN']:
289 |         return 'NaN'
290 | 
291 | 
292 | class String(Value, Key):
293 |     def __init__(
294 |         self, characters: str, raw_value: str, tok: JSON5Token | None = None, end_tok: JSON5Token | None = None
295 |     ):
296 |         assert isinstance(raw_value, str)
297 |         assert isinstance(characters, str)
298 |         self.characters: str = characters
299 |         self.raw_value: str = raw_value
300 | 
301 |         super().__init__(tok=tok, end_tok=tok)
302 | 
303 | 
304 | class DoubleQuotedString(String):
305 |     ...
306 | 
307 | 
308 | class SingleQuotedString(String):
309 |     ...
310 | 
311 | 
312 | class BooleanLiteral(Value):
313 |     def __init__(self, value: bool, tok: JSON5Token | None = None, end_tok: JSON5Token | None = None):
314 |         assert value in (True, False)
315 |         self.value: bool = value
316 | 
317 |         super().__init__(tok=tok, end_tok=tok)
318 | 
319 | 
320 | class NullLiteral(Value):
321 |     value = None
322 | 
323 |     def __init__(self, tok: JSON5Token | None = None, end_tok: JSON5Token | None = None):
324 |         super().__init__(tok=tok, end_tok=tok)
325 | 
326 | 
327 | class UnaryOp(Value):
328 |     def __init__(
329 |         self, op: Literal['-', '+'], value: Number, tok: JSON5Token | None = None, end_tok: JSON5Token | None = None
330 |     ):
331 |         assert op in ('-', '+')
332 |         assert isinstance(value, Number)
333 |         self.op: Literal['-', '+'] = op
334 |         self.value: Number = value
335 | 
336 |         super().__init__(tok=tok, end_tok=end_tok)
337 | 
338 | 
339 | class TrailingComma(Node):
340 |     def __init__(self, tok: JSON5Token | None = None, end_tok: JSON5Token | None = None):
341 |         super().__init__(tok=tok, end_tok=tok)  # Trailing comma is always a single COMMA token
342 | 
343 | 
344 | class Comment(Node):
345 |     def __init__(self, value: str, tok: JSON5Token | None = None, end_tok: JSON5Token | None = None):
346 |         assert isinstance(value, str), f"Expected str got {type(value)}"
347 |         self.value: str = value
348 |         super().__init__(tok=tok, end_tok=tok)  # Comments are always a single token
349 | 
350 | 
351 | class LineComment(Comment):
352 |     ...
353 | 
354 | 
355 | class BlockComment(Comment):
356 |     ...
357 | 


--------------------------------------------------------------------------------
/json5/parser.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | import ast
  4 | import sys
  5 | import typing
  6 | from functools import lru_cache
  7 | from typing import Any
  8 | from typing import Literal
  9 | from typing import Protocol
 10 | 
 11 | import regex as re
 12 | from sly import Parser  # type: ignore
 13 | from sly.yacc import SlyLogger  # type: ignore
 14 | 
 15 | from .model import BlockComment
 16 | from .model import BooleanLiteral
 17 | from .model import Comment
 18 | from .model import DoubleQuotedString
 19 | from .model import Float
 20 | from .model import Identifier
 21 | from .model import Infinity
 22 | from .model import Integer
 23 | from .model import JSONArray
 24 | from .model import JSONObject
 25 | from .model import JSONText
 26 | from .model import Key
 27 | from .model import KeyValuePair
 28 | from .model import LineComment
 29 | from .model import NaN
 30 | from .model import NullLiteral
 31 | from .model import SingleQuotedString
 32 | from .model import TrailingComma
 33 | from .model import UnaryOp
 34 | from .model import Value
 35 | from .tokenizer import JSON5Token
 36 | from .tokenizer import JSONLexer
 37 | from .tokenizer import tokenize
 38 | from .utils import JSON5DecodeError
 39 | 
 40 | 
 41 | class QuietSlyLogger(SlyLogger):  # type: ignore[misc]
 42 |     def warning(self, *args: Any, **kwargs: Any) -> None:
 43 |         return
 44 | 
 45 |     debug = warning
 46 |     info = warning
 47 | 
 48 | 
 49 | ESCAPE_SEQUENCES = {
 50 |     'b': '\u0008',
 51 |     'f': '\u000C',
 52 |     'n': '\u000A',
 53 |     'r': '\u000D',
 54 |     't': '\u0009',
 55 |     'v': '\u000B',
 56 |     '0': '\u0000',
 57 |     '\\': '\u005c',
 58 |     '"': '\u0022',
 59 |     "'": '\u0027',
 60 | }
 61 | 
 62 | # class TrailingComma:
 63 | #     pass
 64 | 
 65 | 
 66 | def replace_escape_literals(matchobj: re.Match[str]) -> str:
 67 |     s = matchobj.group(0)
 68 |     if s.startswith('\\0') and len(s) == 3:
 69 |         raise JSON5DecodeError("'\\0' MUST NOT be followed by a decimal digit", None)
 70 |     seq = matchobj.group(1)
 71 |     return ESCAPE_SEQUENCES.get(seq, seq)
 72 | 
 73 | 
 74 | @lru_cache(maxsize=1024)
 75 | def _latin_escape_replace(s: str) -> str:
 76 |     if s.startswith('\\x') and len(s) != 4:
 77 |         raise JSON5DecodeError("'\\x' MUST be followed by two hexadecimal digits", None)
 78 |     val: str = ast.literal_eval(f'"{s}"')
 79 |     if val == '\\':
 80 |         val = '\\\\'  # this is important; the subsequent regex will sub it back to \\
 81 |     return val
 82 | 
 83 | 
 84 | def latin_unicode_escape_replace(matchobj: re.Match[str]) -> str:
 85 |     s = matchobj.group(0)
 86 |     return _latin_escape_replace(s)
 87 | 
 88 | 
 89 | def _unicode_escape_replace(s: str) -> str:
 90 |     ret: str = ast.literal_eval(f'"{s}"')
 91 |     return ret
 92 | 
 93 | 
 94 | def unicode_escape_replace(matchobj: re.Match[str]) -> str:
 95 |     s = matchobj.group(0)
 96 |     return _unicode_escape_replace(s)
 97 | 
 98 | 
 99 | class T_TokenSlice(Protocol):
100 |     def __getitem__(self, item: int) -> JSON5Token:
101 |         ...
102 | 
103 | 
104 | class T_AnyProduction(Protocol):
105 |     _slice: T_TokenSlice
106 | 
107 | 
108 | class T_TextProduction(Protocol):
109 |     wsc0: list[Comment | str]
110 |     wsc1: list[Comment | str]
111 |     value: Value
112 | 
113 |     def __getitem__(self, i: Literal[1]) -> Value:
114 |         ...
115 | 
116 | 
117 | class T_FirstKeyValuePairProduction(Protocol):
118 |     wsc0: list[Comment | str]
119 |     wsc1: list[Comment | str]
120 |     wsc2: list[Comment | str]
121 |     key: Key
122 |     value: Value
123 |     _slice: T_TokenSlice
124 | 
125 |     def __getitem__(self, item: int) -> Key | Value:
126 |         ...
127 | 
128 | 
129 | class T_WSCProduction(Protocol):
130 |     _slice: T_TokenSlice
131 | 
132 |     def __getitem__(self, item: Literal[0]) -> str | Comment:
133 |         ...
134 | 
135 | 
136 | class T_CommentProduction(Protocol):
137 |     _slice: T_TokenSlice
138 | 
139 |     def __getitem__(self, item: Literal[0]) -> str:
140 |         ...
141 | 
142 | 
143 | class T_KeyValuePairsProduction(Protocol):
144 |     _slice: T_TokenSlice
145 |     first_key_value_pair: KeyValuePair
146 |     subsequent_key_value_pair: list[KeyValuePair]
147 | 
148 | 
149 | class T_JsonObjectProduction(Protocol):
150 |     _slice: T_TokenSlice
151 |     key_value_pairs: tuple[list[KeyValuePair], TrailingComma | None] | None
152 |     wsc: list[Comment | str]
153 | 
154 | 
155 | class SubsequentKeyValuePairProduction(Protocol):
156 |     _slice: T_TokenSlice
157 |     wsc: list[Comment | str]
158 |     first_key_value_pair: KeyValuePair | None
159 | 
160 | 
161 | class T_FirstArrayValueProduction(Protocol):
162 |     _slice: T_TokenSlice
163 | 
164 |     def __getitem__(self, item: Literal[1]) -> Value:
165 |         ...
166 | 
167 |     wsc: list[Comment | str]
168 | 
169 | 
170 | class T_SubsequentArrayValueProduction(Protocol):
171 |     _slice: T_TokenSlice
172 |     first_array_value: Value | None
173 |     wsc: list[Comment | str]
174 | 
175 | 
176 | class T_ArrayValuesProduction(Protocol):
177 |     _slice: T_TokenSlice
178 |     first_array_value: Value
179 |     subsequent_array_value: list[Value]
180 | 
181 | 
182 | class T_JsonArrayProduction(Protocol):
183 |     _slice: T_TokenSlice
184 |     array_values: tuple[list[Value], TrailingComma | None] | None
185 |     wsc: list[Comment | str]
186 | 
187 | 
188 | class T_IdentifierProduction(Protocol):
189 |     _slice: T_TokenSlice
190 | 
191 |     def __getitem__(self, item: Literal[0]) -> str:
192 |         ...
193 | 
194 | 
195 | class T_KeyProduction(Protocol):
196 |     def __getitem__(self, item: Literal[1]) -> Identifier | DoubleQuotedString | SingleQuotedString:
197 |         ...
198 | 
199 | 
200 | class T_NumberProduction(Protocol):
201 |     _slice: T_TokenSlice
202 | 
203 |     def __getitem__(self, item: Literal[0]) -> str:
204 |         ...
205 | 
206 | 
207 | class T_ValueNumberProduction(Protocol):
208 |     _slice: T_TokenSlice
209 |     number: Infinity | NaN | Float | Integer
210 | 
211 | 
212 | class T_ExponentNotationProduction(Protocol):
213 |     _slice: T_TokenSlice
214 | 
215 |     def __getitem__(self, item: int) -> str:
216 |         ...
217 | 
218 | 
219 | class T_StringTokenProduction(Protocol):
220 |     _slice: T_TokenSlice
221 | 
222 |     def __getitem__(self, item: Literal[0]) -> str:
223 |         ...
224 | 
225 | 
226 | class T_StringProduction(Protocol):
227 |     _slice: T_TokenSlice
228 | 
229 |     def __getitem__(self, item: Literal[0]) -> DoubleQuotedString | SingleQuotedString:
230 |         ...
231 | 
232 | 
233 | class T_ValueProduction(Protocol):
234 |     _slice: T_TokenSlice
235 | 
236 |     def __getitem__(
237 |         self, item: Literal[0]
238 |     ) -> (
239 |         DoubleQuotedString
240 |         | SingleQuotedString
241 |         | JSONObject
242 |         | JSONArray
243 |         | BooleanLiteral
244 |         | NullLiteral
245 |         | Infinity
246 |         | Integer
247 |         | Float
248 |         | NaN
249 |     ):
250 |         ...
251 | 
252 | 
253 | T_CallArg = typing.TypeVar('T_CallArg')
254 | _: typing.Callable[..., typing.Callable[[T_CallArg], T_CallArg]]
255 | 
256 | 
257 | class JSONParser(Parser):  # type: ignore[misc]
258 |     # debugfile = 'parser.out'
259 |     tokens = JSONLexer.tokens
260 |     log = QuietSlyLogger(sys.stderr)
261 | 
262 |     def __init__(self, *args: Any, **kwargs: Any):
263 |         super().__init__(*args, **kwargs)
264 |         self.errors: list[JSON5DecodeError] = []
265 |         self.last_token: JSON5Token | None = None
266 |         self.seen_tokens: list[JSON5Token] = []
267 |         self.expecting: list[list[str]] = []
268 | 
269 |     @_('{ wsc } value { wsc }')
270 |     def text(self, p: T_TextProduction) -> JSONText:
271 |         node = JSONText(value=p[1], tok=p.value._tok)
272 |         for wsc in p.wsc0:
273 |             node.wsc_before.append(wsc)
274 |         for wsc in p.wsc1:
275 |             node.wsc_after.append(wsc)
276 |         return node
277 | 
278 |     @_('key { wsc } seen_colon COLON { wsc } object_value_seen value { wsc }')
279 |     def first_key_value_pair(self, p: T_FirstKeyValuePairProduction) -> KeyValuePair:
280 |         key = p[0]
281 |         for wsc in p.wsc0:
282 |             key.wsc_after.append(wsc)
283 |         value = p[6]
284 |         for wsc in p.wsc1:
285 |             value.wsc_before.append(wsc)
286 |         for wsc in p.wsc2:
287 |             value.wsc_after.append(wsc)
288 |         return KeyValuePair(key=p.key, value=p.value)
289 | 
290 |     @_('object_delimiter_seen COMMA { wsc } [ first_key_value_pair ]')
291 |     def subsequent_key_value_pair(self, p: SubsequentKeyValuePairProduction) -> KeyValuePair | TrailingComma:
292 |         node: KeyValuePair | TrailingComma
293 |         if p.first_key_value_pair:
294 |             node = p.first_key_value_pair
295 |             for wsc in p.wsc:
296 |                 node.key.wsc_before.append(wsc)
297 |         else:
298 |             node = TrailingComma(tok=p._slice[1])
299 |             for wsc in p.wsc:
300 |                 node.wsc_after.append(wsc)
301 |         return node
302 | 
303 |     @_('WHITESPACE', 'comment')
304 |     def wsc(self, p: T_WSCProduction) -> str | Comment:
305 |         return p[0]
306 | 
307 |     @_('BLOCK_COMMENT')
308 |     def comment(self, p: T_CommentProduction) -> BlockComment:
309 |         return BlockComment(p[0], tok=p._slice[0])
310 | 
311 |     @_('LINE_COMMENT')  # type: ignore[no-redef]
312 |     def comment(self, p: T_CommentProduction):
313 |         return LineComment(p[0], tok=p._slice[0])
314 | 
315 |     @_('first_key_value_pair { subsequent_key_value_pair }')
316 |     def key_value_pairs(self, p: T_KeyValuePairsProduction) -> tuple[list[KeyValuePair], TrailingComma | None]:
317 |         ret = [
318 |             p.first_key_value_pair,
319 |         ]
320 |         num_sqvp = len(p.subsequent_key_value_pair)
321 |         for index, value in enumerate(p.subsequent_key_value_pair):
322 |             if isinstance(value, TrailingComma):
323 |                 if index + 1 != num_sqvp:
324 |                     offending_token = value._tok
325 |                     self.errors.append(JSON5DecodeError("Syntax Error: multiple trailing commas", offending_token))
326 |                 return ret, value
327 |             else:
328 |                 ret.append(value)
329 |         return ret, None
330 | 
331 |     @_('')
332 |     def seen_LBRACE(self, p: Any) -> None:
333 |         self.expecting.append(['RBRACE', 'key'])
334 | 
335 |     @_('')
336 |     def seen_key(self, p: Any) -> None:
337 |         self.expecting.pop()
338 |         self.expecting.append(['COLON'])
339 | 
340 |     @_('')
341 |     def seen_colon(self, p: Any) -> None:
342 |         self.expecting.pop()
343 |         self.expecting.append(['value'])
344 | 
345 |     @_('')
346 |     def object_value_seen(self, p: Any) -> None:
347 |         self.expecting.pop()
348 |         self.expecting.append(['COMMA', 'RBRACE'])
349 | 
350 |     @_('')
351 |     def object_delimiter_seen(self, p: Any) -> None:
352 |         self.expecting.pop()
353 |         self.expecting.append(['RBRACE', 'key'])
354 | 
355 |     @_('')
356 |     def seen_RBRACE(self, p: Any) -> None:
357 |         self.expecting.pop()
358 | 
359 |     @_('seen_LBRACE LBRACE { wsc } [ key_value_pairs ] seen_RBRACE RBRACE')
360 |     def json_object(self, p: T_JsonObjectProduction) -> JSONObject:
361 |         if not p.key_value_pairs:
362 |             node = JSONObject(leading_wsc=list(p.wsc or []), tok=p._slice[1], end_tok=p._slice[5])
363 |         else:
364 |             kvps, trailing_comma = p.key_value_pairs
365 |             node = JSONObject(
366 |                 *kvps,
367 |                 trailing_comma=trailing_comma,
368 |                 leading_wsc=list(p.wsc or []),
369 |                 tok=p._slice[1],
370 |                 end_tok=p._slice[5],
371 |             )
372 | 
373 |         return node
374 | 
375 |     @_('array_value_seen value { wsc }')
376 |     def first_array_value(self, p: T_FirstArrayValueProduction) -> Value:
377 |         node = p[1]
378 |         for wsc in p.wsc:
379 |             node.wsc_after.append(wsc)
380 |         return node
381 | 
382 |     @_('array_delimiter_seen COMMA { wsc } [ first_array_value ]')
383 |     def subsequent_array_value(self, p: T_SubsequentArrayValueProduction) -> Value | TrailingComma:
384 |         node: Value | TrailingComma
385 |         if p.first_array_value:
386 |             node = p.first_array_value
387 |             for wsc in p.wsc:
388 |                 node.wsc_before.append(wsc)
389 |         else:
390 |             node = TrailingComma(tok=p._slice[1])
391 |             for wsc in p.wsc:
392 |                 node.wsc_after.append(wsc)
393 |         return node
394 | 
395 |     @_('first_array_value { subsequent_array_value }')
396 |     def array_values(self, p: T_ArrayValuesProduction) -> tuple[list[Value], TrailingComma | None]:
397 |         ret = [
398 |             p.first_array_value,
399 |         ]
400 |         num_values = len(p.subsequent_array_value)
401 |         for index, value in enumerate(p.subsequent_array_value):
402 |             if isinstance(value, TrailingComma):
403 |                 if index + 1 != num_values:
404 |                     self.errors.append(JSON5DecodeError("Syntax Error: multiple trailing commas", value._tok))
405 |                     return ret, value
406 |                 return ret, value
407 |             else:
408 |                 ret.append(value)
409 |         return ret, None
410 | 
411 |     @_('seen_LBRACKET LBRACKET { wsc } [ array_values ] seen_RBRACKET RBRACKET')
412 |     def json_array(self, p: T_JsonArrayProduction) -> JSONArray:
413 |         if not p.array_values:
414 |             node = JSONArray(tok=p._slice[1], end_tok=p._slice[5])
415 |         else:
416 |             values, trailing_comma = p.array_values
417 |             node = JSONArray(*values, trailing_comma=trailing_comma, tok=p._slice[1], end_tok=p._slice[5])
418 | 
419 |         for wsc in p.wsc:
420 |             node.leading_wsc.append(wsc)
421 | 
422 |         return node
423 | 
424 |     @_('')
425 |     def seen_LBRACKET(self, p: Any) -> None:
426 |         self.expecting.append(['RBRACKET', 'value'])
427 | 
428 |     @_('')
429 |     def seen_RBRACKET(self, p: Any) -> None:
430 |         self.expecting.pop()
431 | 
432 |     @_('')
433 |     def array_delimiter_seen(self, p: Any) -> None:
434 |         assert len(self.expecting[-1]) == 2
435 |         self.expecting[-1].pop()
436 |         self.expecting[-1].append('value')
437 | 
438 |     @_('')
439 |     def array_value_seen(self, p: Any) -> None:
440 |         assert len(self.expecting[-1]) == 2
441 |         assert self.expecting[-1][-1] == 'value'
442 |         self.expecting[-1].pop()
443 |         self.expecting[-1].append('COMMA')
444 | 
445 |     @_('NAME')
446 |     def identifier(self, p: T_IdentifierProduction) -> Identifier:
447 |         raw_value = p[0]
448 |         name = re.sub(r'\\u[0-9a-fA-F]{4}', unicode_escape_replace, raw_value)
449 |         pattern = r'[\w_\$]([\w_\d\$\p{Pc}\p{Mn}\p{Mc}\u200C\u200D])*'
450 |         if not re.fullmatch(pattern, name):
451 |             self.errors.append(JSON5DecodeError("Invalid identifier name", p._slice[0]))
452 |         return Identifier(name=name, raw_value=raw_value, tok=p._slice[0])
453 | 
454 |     @_('seen_key identifier', 'seen_key string')
455 |     def key(self, p: T_KeyProduction) -> Identifier | DoubleQuotedString | SingleQuotedString:
456 |         node = p[1]
457 |         return node
458 | 
459 |     @_('INTEGER')
460 |     def number(self, p: T_NumberProduction):
461 |         return Integer(p[0], tok=p._slice[0])
462 | 
463 |     @_('FLOAT')  # type: ignore[no-redef]
464 |     def number(self, p: T_NumberProduction):
465 |         return Float(p[0], tok=p._slice[0])
466 | 
467 |     @_('OCTAL')  # type: ignore[no-redef]
468 |     def number(self, p: T_NumberProduction):
469 |         self.errors.append(JSON5DecodeError("Invalid integer literal. Octals are not allowed", p._slice[0]))
470 |         raw_value = p[0]
471 |         if re.search(r'[89]+', raw_value):
472 |             self.errors.append(JSON5DecodeError("Invalid octal format. Octal digits must be in range 0-7", p._slice[0]))
473 |             return Integer(raw_value=oct(0), is_octal=True, tok=p._slice[0])
474 |         return Integer(raw_value, is_octal=True, tok=p._slice[0])
475 | 
476 |     @_('INFINITY')  # type: ignore[no-redef]
477 |     def number(self, p: T_AnyProduction) -> Infinity:
478 |         return Infinity(tok=p._slice[0])
479 | 
480 |     @_('NAN')  # type: ignore[no-redef]
481 |     def number(self, p: T_AnyProduction) -> NaN:
482 |         return NaN(tok=p._slice[0])
483 | 
484 |     @_('MINUS number')
485 |     def value(self, p: T_ValueNumberProduction) -> UnaryOp:
486 |         if isinstance(p.number, Infinity):
487 |             p.number.negative = True
488 |         node = UnaryOp(op='-', value=p.number, tok=p._slice[0], end_tok=p.number._end_tok)
489 |         return node
490 | 
491 |     @_('PLUS number')  # type: ignore[no-redef]
492 |     def value(self, p: T_ValueNumberProduction):
493 |         node = UnaryOp(op='+', value=p.number, tok=p._slice[0], end_tok=p.number._end_tok)
494 |         return node
495 | 
496 |     @_('INTEGER EXPONENT', 'FLOAT EXPONENT')  # type: ignore[no-redef]
497 |     def number(self, p: T_ExponentNotationProduction) -> Float:
498 |         exp_notation = p[1][0]  # e or E
499 |         return Float(p[0] + p[1], exp_notation=exp_notation, tok=p._slice[0], end_tok=p._slice[1])
500 | 
501 |     @_('HEXADECIMAL')  # type: ignore[no-redef]
502 |     def number(self, p: T_NumberProduction) -> Integer:
503 |         return Integer(p[0], is_hex=True, tok=p._slice[0])
504 | 
505 |     @_('DOUBLE_QUOTE_STRING')
506 |     def double_quoted_string(self, p: T_StringTokenProduction) -> DoubleQuotedString:
507 |         raw_value = p[0]
508 |         contents = raw_value[1:-1]
509 |         terminator_in_string = re.search(r'(?<!\\)([\u000D\u2028\u2029]|(?<!\r)\n)', contents)
510 |         if terminator_in_string:
511 |             end = terminator_in_string.span()[0]
512 |             before_terminator = terminator_in_string.string[:end]
513 |             tok = p._slice[0]
514 |             pos = tok.index + len(before_terminator)
515 |             doc = tok.doc
516 |             lineno = doc.count('\n', 0, pos) + 1
517 |             colno = pos - doc.rfind('\n', 0, pos) + 1
518 |             index = pos + 1
519 |             errmsg = f"Illegal line terminator (line {lineno} column {colno} (char {index}) without continuation"
520 |             self.errors.append(JSON5DecodeError(errmsg, tok))
521 |         contents = re.sub(r'\\(\r\n|[\u000A\u000D\u2028\u2029])', '', contents)
522 |         try:
523 |             contents = re.sub(r'(\\x[a-fA-F0-9]{0,2}|\\u[0-9a-fA-F]{4})', latin_unicode_escape_replace, contents)
524 |         except JSON5DecodeError as exc:
525 |             self.errors.append(JSON5DecodeError(exc.args[0], p._slice[0]))
526 |         try:
527 |             contents = re.sub(r'\\(0\d|.)', replace_escape_literals, contents)
528 |         except JSON5DecodeError as exc:
529 |             self.errors.append(JSON5DecodeError(exc.args[0], p._slice[0]))
530 |         return DoubleQuotedString(contents, raw_value=raw_value, tok=p._slice[0])
531 | 
532 |     @_("SINGLE_QUOTE_STRING")
533 |     def single_quoted_string(self, p: T_StringTokenProduction) -> SingleQuotedString:
534 |         raw_value = p[0]
535 |         contents = raw_value[1:-1]
536 |         terminator_in_string = re.search(r'(?<!\\)([\u000D\u2028\u2029]|(?<!\r)\n)', contents)
537 |         if terminator_in_string:
538 |             end = terminator_in_string.span()[0]
539 |             before_terminator = terminator_in_string.string[:end]
540 |             tok = p._slice[0]
541 |             pos = tok.index + len(before_terminator)
542 |             doc = tok.doc
543 |             lineno = doc.count('\n', 0, pos) + 1
544 |             colno = pos - doc.rfind('\n', 0, pos) + 1
545 |             index = pos + 1
546 |             errmsg = f"Illegal line terminator (line {lineno} column {colno} (char {index}) without continuation"
547 |             self.errors.append(JSON5DecodeError(errmsg, tok))
548 |         contents = re.sub(r'\\(\r\n|[\u000A\u000D\u2028\u2029])', '', contents)
549 |         try:
550 |             contents = re.sub(r'(\\x[a-fA-F0-9]{0,2}|\\u[0-9a-fA-F]{4})', latin_unicode_escape_replace, contents)
551 |         except JSON5DecodeError as exc:
552 |             self.errors.append(JSON5DecodeError(exc.args[0], p._slice[0]))
553 |         try:
554 |             contents = re.sub(r'\\(0\d|.)', replace_escape_literals, contents)
555 |         except JSON5DecodeError as exc:
556 |             self.errors.append(JSON5DecodeError(exc.args[0], p._slice[0]))
557 |         return SingleQuotedString(contents, raw_value=raw_value, tok=p._slice[0])
558 | 
559 |     @_('double_quoted_string', 'single_quoted_string')
560 |     def string(self, p: T_StringProduction) -> SingleQuotedString | DoubleQuotedString:
561 |         return p[0]
562 | 
563 |     @_('TRUE')
564 |     def boolean(self, p: T_AnyProduction) -> BooleanLiteral:
565 |         return BooleanLiteral(True, tok=p._slice[0])
566 | 
567 |     @_('FALSE')  # type: ignore[no-redef]
568 |     def boolean(self, p: T_AnyProduction) -> BooleanLiteral:
569 |         return BooleanLiteral(False, tok=p._slice[0])
570 | 
571 |     @_('NULL')
572 |     def null(self, p: T_AnyProduction) -> NullLiteral:
573 |         return NullLiteral(tok=p._slice[0])
574 | 
575 |     @_(  # type: ignore[no-redef]
576 |         'string',
577 |         'json_object',
578 |         'json_array',
579 |         'boolean',
580 |         'null',
581 |         'number',
582 |     )
583 |     def value(
584 |         self, p: T_ValueProduction
585 |     ) -> (
586 |         DoubleQuotedString
587 |         | SingleQuotedString
588 |         | JSONObject
589 |         | JSONArray
590 |         | BooleanLiteral
591 |         | NullLiteral
592 |         | Infinity
593 |         | Integer
594 |         | Float
595 |         | NaN
596 |     ):
597 |         node = p[0]
598 |         return node
599 | 
600 |     @_('UNTERMINATED_SINGLE_QUOTE_STRING', 'UNTERMINATED_DOUBLE_QUOTE_STRING')  # type: ignore[no-redef]
601 |     def string(self, p: T_StringTokenProduction) -> SingleQuotedString | DoubleQuotedString:
602 |         self.error(p._slice[0])
603 |         raw = p[0]
604 |         if raw.startswith('"'):
605 |             return DoubleQuotedString(raw[1:], raw_value=raw, tok=p._slice[0])
606 |         return SingleQuotedString(raw[1:], raw_value=raw, tok=p._slice[0])
607 | 
608 |     def error(self, token: JSON5Token | None) -> JSON5Token | None:
609 |         if token:
610 |             if self.expecting:
611 |                 expected = self.expecting[-1]
612 | 
613 |                 message = f"Syntax Error. Was expecting {' or '.join(expected)}"
614 |             else:
615 |                 message = 'Syntax Error'
616 | 
617 |             self.errors.append(JSON5DecodeError(message, token))
618 |             try:
619 |                 return next(self.tokens)  # type: ignore
620 |             except StopIteration:
621 |                 # EOF
622 |                 class tok:
623 |                     type = '$end'
624 |                     value = None
625 |                     lineno = None
626 |                     index = None
627 |                     end = None
628 | 
629 |                 return JSON5Token(tok(), None)  # type: ignore[arg-type]
630 |         elif self.last_token:
631 |             doc = self.last_token.doc
632 |             pos = len(doc)
633 |             lineno = doc.count('\n', 0, pos) + 1
634 |             colno = pos - doc.rfind('\n', 0, pos)
635 |             message = f'Expecting value. Unexpected EOF at: ' f'line {lineno} column {colno} (char {pos})'
636 |             if self.expecting:
637 |                 expected = self.expecting[-1]
638 |                 message += f'. Was expecting {f" or ".join(expected)}'
639 |             self.errors.append(JSON5DecodeError(message, None))
640 |         else:
641 |             #  Empty file
642 |             self.errors.append(JSON5DecodeError('Expecting value. Received unexpected EOF', None))
643 |         return None
644 | 
645 |     def _token_gen(self, tokens: typing.Iterable[JSON5Token]) -> typing.Generator[JSON5Token, None, None]:
646 |         for tok in tokens:
647 |             self.last_token = tok
648 |             self.seen_tokens.append(tok)
649 |             yield tok
650 | 
651 |     def parse(self, tokens: typing.Iterable[JSON5Token]) -> JSONText:
652 |         tokens = self._token_gen(tokens)
653 |         model: JSONText = super().parse(tokens)
654 |         if self.errors:
655 |             if len(self.errors) > 1:
656 |                 primary_error = self.errors[0]
657 |                 msg = (
658 |                     "There were multiple errors parsing the JSON5 document.\n"
659 |                     "The primary error was: \n\t{}\n"
660 |                     "Additionally, the following errors were also detected:\n\t{}"
661 |                 )
662 | 
663 |                 num_additional_errors = len(self.errors) - 1
664 |                 additional_errors = '\n\t'.join(err.args[0] for err in self.errors[1:6])
665 |                 if num_additional_errors > 5:
666 |                     additional_errors += f'\n\t{num_additional_errors - 5} additional error(s) truncated'
667 |                 msg = msg.format(primary_error.args[0], additional_errors)
668 |                 err = JSON5DecodeError(msg, None)
669 |                 err.lineno = primary_error.lineno
670 |                 err.token = primary_error.token
671 |                 err.index = primary_error.index
672 |                 raise err
673 |             else:
674 |                 raise self.errors[0]
675 |         return model
676 | 
677 | 
678 | def parse_tokens(raw_tokens: typing.Iterable[JSON5Token]) -> JSONText:
679 |     parser = JSONParser()
680 |     return parser.parse(raw_tokens)
681 | 
682 | 
683 | def parse_source(text: str) -> JSONText:
684 |     tokens = tokenize(text)
685 |     model = parse_tokens(tokens)
686 |     return model
687 | 


--------------------------------------------------------------------------------
/json5/py.typed:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/spyoungtech/json-five/f95e527c7121113674c3621d8244c9c7162a0348/json5/py.typed


--------------------------------------------------------------------------------
/json5/tokenizer.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | import logging
  4 | import typing
  5 | from typing import Generator
  6 | from typing import NoReturn
  7 | 
  8 | import regex as re
  9 | from sly import Lexer  # type: ignore
 10 | from sly.lex import Token  # type: ignore
 11 | 
 12 | from .utils import JSON5DecodeError
 13 | 
 14 | logger = logging.getLogger(__name__)
 15 | # logger.addHandler(logging.StreamHandler(stream=sys.stderr))
 16 | # logger.setLevel(level=logging.DEBUG)
 17 | 
 18 | 
 19 | class JSON5Token(Token):  # type: ignore[misc]
 20 |     '''
 21 |     Representation of a single token.
 22 |     '''
 23 | 
 24 |     def __init__(self, tok: Token, doc: str):
 25 |         self.type: str | None = tok.type
 26 |         self.value: str = tok.value
 27 |         self.lineno: int = tok.lineno
 28 |         self.index: int = tok.index
 29 |         self.doc: str = doc
 30 |         self.end: int = tok.end
 31 | 
 32 |     @property
 33 |     def colno(self) -> int:
 34 |         line_start_index = self.doc.rfind('\n', 0, self.index) + 1
 35 |         return self.index - line_start_index
 36 | 
 37 |     @property
 38 |     def end_colno(self) -> int:
 39 |         return self.colno + self.end - self.index
 40 | 
 41 |     @property
 42 |     def end_lineno(self) -> int:
 43 |         return self.lineno + self.value.count('\n')
 44 | 
 45 |     __slots__ = ('type', 'value', 'lineno', 'index', 'doc', 'end')
 46 | 
 47 |     def __str__(self) -> str:
 48 |         if self.value:
 49 |             return self.value
 50 |         else:
 51 |             return ''
 52 | 
 53 |     def __repr__(self) -> str:
 54 |         return f'JSON5Token(type={self.type!r}, value={self.value!r}, lineno={self.lineno}, index={self.index}, end={self.end})'
 55 | 
 56 | 
 57 | T_CallArg = typing.TypeVar('T_CallArg')
 58 | _: typing.Callable[[str], typing.Callable[[T_CallArg], T_CallArg]]
 59 | 
 60 | 
 61 | class JSONLexer(Lexer):  # type: ignore[misc]
 62 |     regex_module = re
 63 |     reflags = re.DOTALL
 64 |     tokens = {
 65 |         LBRACE,
 66 |         RBRACE,
 67 |         LBRACKET,
 68 |         RBRACKET,
 69 |         DOUBLE_QUOTE_STRING,
 70 |         SINGLE_QUOTE_STRING,
 71 |         UNTERMINATED_DOUBLE_QUOTE_STRING,
 72 |         UNTERMINATED_SINGLE_QUOTE_STRING,
 73 |         NAME,
 74 |         COMMA,
 75 |         BLOCK_COMMENT,
 76 |         LINE_COMMENT,
 77 |         WHITESPACE,
 78 |         TRUE,
 79 |         FALSE,
 80 |         NULL,
 81 |         COLON,
 82 |         # Numbers
 83 |         PLUS,
 84 |         MINUS,
 85 |         FLOAT,
 86 |         INTEGER,
 87 |         INFINITY,
 88 |         NAN,
 89 |         EXPONENT,
 90 |         HEXADECIMAL,
 91 |         OCTAL,  # Not allowed, but we capture as a token to raise error later
 92 |     }
 93 | 
 94 |     def tokenize(self, text: str, lineno: int = 1, index: int = 0) -> Generator[JSON5Token, None, None]:
 95 |         for tok in super().tokenize(text, lineno, index):
 96 |             tok = JSON5Token(tok, text)
 97 |             yield tok
 98 | 
 99 |     LBRACE = r'{'
100 |     RBRACE = r'}'
101 |     LBRACKET = r'\['
102 |     RBRACKET = r'\]'
103 |     COLON = r"\:"
104 |     COMMA = r"\,"
105 | 
106 |     @_(r'"(?:[^"\\]|\\.)*"')
107 |     def DOUBLE_QUOTE_STRING(self, tok: JSON5Token) -> JSON5Token:
108 |         self.lineno += tok.value.count('\n')
109 |         return tok
110 | 
111 |     @_(r"'(?:[^'\\]|\\.)*'")
112 |     def SINGLE_QUOTE_STRING(self, tok: JSON5Token) -> JSON5Token:
113 |         self.lineno += tok.value.count('\n')
114 |         return tok
115 | 
116 |     LINE_COMMENT = r"//[^\n]*"
117 | 
118 |     @_(r'/\*((.|\n))*?\*/')
119 |     def BLOCK_COMMENT(self, tok: JSON5Token) -> JSON5Token:
120 |         self.lineno += tok.value.count('\n')
121 |         return tok
122 | 
123 |     @_("[\u0009\u000A\u000B\u000C\u000D\u0020\u00A0\u2028\u2029\ufeff]+")
124 |     def WHITESPACE(self, tok: JSON5Token) -> JSON5Token:
125 |         self.lineno += tok.value.count('\n')
126 |         return tok
127 | 
128 |     MINUS = r'\-'
129 |     PLUS = r'\+'
130 |     EXPONENT = r"(e|E)(\-|\+)?\d+"
131 |     HEXADECIMAL = r'0(x|X)[0-9a-fA-F]+'
132 |     OCTAL = r'(0\d+|0o\d+)'
133 |     FLOAT = r'(\d+\.\d*)|(\d*\.\d+)'  # 23.45
134 |     INTEGER = r'\d+'
135 |     NAME = r'[\w_\$\\]([\w_\d\$\\\p{Pc}\p{Mn}\p{Mc}\u200C\u200D])*'
136 | 
137 |     NAME['true'] = TRUE  # type: ignore[index]
138 |     NAME['false'] = FALSE  # type: ignore[index]
139 |     NAME['null'] = NULL  # type: ignore[index]
140 |     NAME['Infinity'] = INFINITY  # type: ignore[index]
141 |     NAME['NaN'] = NAN  # type: ignore[index]
142 | 
143 |     UNTERMINATED_DOUBLE_QUOTE_STRING = r'"(?:[^"\\]|\\.)*'
144 |     UNTERMINATED_SINGLE_QUOTE_STRING = r"'(?:[^'\\]|\\.)*"
145 | 
146 |     def error(self, t: JSON5Token) -> NoReturn:
147 |         raise JSON5DecodeError(f'Illegal character {t.value[0]!r} at index {self.index}', None)
148 | 
149 | 
150 | def tokenize(text: str) -> Generator[JSON5Token, None, None]:
151 |     lexer = JSONLexer()
152 |     tokens = lexer.tokenize(text)
153 |     return tokens
154 | 
155 | 
156 | def reversed_enumerate(tokens: typing.Sequence[JSON5Token]) -> typing.Generator[tuple[int, JSON5Token], None, None]:
157 |     for i in reversed(range(len(tokens))):
158 |         tok = tokens[i]
159 |         yield i, tok
160 | 


--------------------------------------------------------------------------------
/json5/utils.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import typing
 4 | from json import JSONDecodeError
 5 | 
 6 | __all__ = ['JSON5DecodeError']
 7 | 
 8 | if typing.TYPE_CHECKING:
 9 |     from .tokenizer import JSON5Token
10 | 
11 | 
12 | class JSON5DecodeError(JSONDecodeError):
13 |     def __init__(self, msg: str, token: JSON5Token | None):
14 |         lineno = getattr(token, 'lineno', 0)
15 |         index = getattr(token, 'index', 0)
16 |         doc = getattr(token, 'doc', None)
17 |         self.token = token
18 |         self.index = index
19 |         if token and doc:
20 |             errmsg = f'{msg} in or near token {token.type} at'
21 |             super().__init__(errmsg, doc, index)
22 |         else:
23 |             ValueError.__init__(self, msg)
24 |             self.msg = msg
25 |             self.lineno = lineno
26 | 
27 |     def __reduce__(self) -> tuple[type[JSON5DecodeError], tuple[str, JSON5Token | None]]:
28 |         return self.__class__, (self.msg, self.token)
29 | 


--------------------------------------------------------------------------------
/requirements-dev.txt:
--------------------------------------------------------------------------------
1 | sly>=0.5
2 | regex
3 | pytest
4 | mypy
5 | coverage
6 | types-regex
7 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [metadata]
 2 | name = json-five
 3 | version = 1.1.2
 4 | url = https://github.com/spyoungtech/json-five
 5 | license = Apache
 6 | author = Spencer Phillip Young
 7 | author_email = spencer.young@spyoung.com
 8 | description = A JSON5 parser that, among other features, supports round-trip preservation of comments
 9 | long_description = file: README.md
10 | long_description_content_type = text/markdown
11 | classifiers =
12 |     License :: OSI Approved :: Apache Software License
13 |     Programming Language :: Python :: 3 :: Only
14 |     Programming Language :: Python :: 3.8
15 |     Programming Language :: Python :: 3.9
16 |     Programming Language :: Python :: 3.10
17 |     Programming Language :: Python :: 3.11
18 | 
19 | license_files = LICENSE
20 | 
21 | [options]
22 | packages = json5
23 | python_requires = >=3.8.0
24 | install_requires =
25 |     sly>=0.5
26 |     regex
27 | 
28 | [options.package_data]
29 | json5 =
30 |     py.typed
31 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup
2 | 
3 | setup()
4 | 


--------------------------------------------------------------------------------
/tests/test_errors.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | 
  3 | import pytest
  4 | 
  5 | from json5.dumper import DefaultDumper
  6 | from json5.dumper import ModelDumper
  7 | from json5.dumper import modelize
  8 | from json5.loader import DefaultLoader
  9 | from json5.loader import loads
 10 | from json5.loader import ModelLoader
 11 | from json5.model import Integer
 12 | from json5.model import LineComment
 13 | from json5.utils import JSON5DecodeError
 14 | 
 15 | 
 16 | def test_loading_comment_raises_runtime_error_default_loader():
 17 |     model = LineComment('// foo')
 18 |     with pytest.raises(RuntimeError):
 19 |         DefaultLoader().load(model)
 20 | 
 21 | 
 22 | def test_loading_unknown_node_raises_error():
 23 |     class Foo:
 24 |         ...
 25 | 
 26 |     f = Foo()
 27 |     with pytest.raises(NotImplementedError):
 28 |         DefaultLoader().load(f)
 29 | 
 30 | 
 31 | def test_dumping_unknown_node_raises_error():
 32 |     class Foo:
 33 |         ...
 34 | 
 35 |     f = Foo()
 36 |     with pytest.raises(NotImplementedError):
 37 |         DefaultDumper().dump(f)
 38 | 
 39 | 
 40 | def test_known_type_in_wsc_raises_error():
 41 |     class Foo:
 42 |         ...
 43 | 
 44 |     f = Foo()
 45 |     model = loads('{foo: "bar"}', loader=ModelLoader())
 46 |     model.value.key_value_pairs[0].key.wsc_before.append(f)
 47 |     with pytest.raises(ValueError):
 48 |         ModelDumper().dump(model)
 49 |     model = loads('{foo: "bar"}', loader=ModelLoader())
 50 |     model.value.key_value_pairs[0].key.wsc_after.append(f)
 51 |     with pytest.raises(ValueError):
 52 |         ModelDumper().dump(model)
 53 | 
 54 | 
 55 | def test_modelizing_unknown_object_raises_error():
 56 |     class Foo:
 57 |         ...
 58 | 
 59 |     f = Foo()
 60 |     with pytest.raises(NotImplementedError):
 61 |         modelize(f)
 62 | 
 63 | 
 64 | def test_model_dumper_raises_error_for_unknown_node():
 65 |     class Foo:
 66 |         ...
 67 | 
 68 |     f = Foo()
 69 |     with pytest.raises(NotImplementedError):
 70 |         ModelDumper().dump(f)
 71 | 
 72 | 
 73 | def test_multiple_errors_all_surface_at_once():
 74 |     json_string = """[\n"foo",\n"bar"\n"baz",\n"bacon"\n"eggs"]"""
 75 |     # 2 errors due to missing comma  ^                ^
 76 |     with pytest.raises(JSON5DecodeError) as exc_info:
 77 |         loads(json_string)
 78 |     assert str(exc_info.value).count('Syntax Error') == 2
 79 | 
 80 | 
 81 | def test_linebreak_without_continuation_fails():
 82 |     json_string = """'Hello \nworld!'"""
 83 |     with pytest.raises(JSON5DecodeError) as exc_info:
 84 |         loads(json_string)
 85 |     assert "Illegal" in str(exc_info.value)
 86 | 
 87 | 
 88 | def test_linebreak_without_continuation_fails_double():
 89 |     json_string = '''"Hello \nworld!"'''
 90 |     with pytest.raises(JSON5DecodeError) as exc_info:
 91 |         loads(json_string)
 92 |     assert "Illegal" in str(exc_info.value)
 93 | 
 94 | 
 95 | def test_empty_input_raises_error():
 96 |     with pytest.raises(JSON5DecodeError) as exc_info:
 97 |         loads("")
 98 |     assert "unexpected EOF" in str(exc_info.value)
 99 | 
100 | 
101 | def test_backslash_x_without_two_hexadecimals_raises_error():
102 |     with pytest.raises(JSON5DecodeError) as exc_info:
103 |         loads(r"'\x1'")
104 |     assert "'\\x' MUST be followed by two hexadecimal digits" in str(exc_info.value)
105 | 
106 | 
107 | def test_null_escape_may_not_be_followed_by_decimal_digit():
108 |     with pytest.raises(JSON5DecodeError) as exc_info:
109 |         loads(r"'\01'")
110 |     assert "'\\0' MUST NOT be followed by a decimal digit" in str(exc_info.value)
111 | 
112 | 
113 | def test_backslash_x_without_two_hexadecimals_raises_error_but_for_double_quotes():
114 |     with pytest.raises(JSON5DecodeError) as exc_info:
115 |         loads(r'"\x1"')
116 |     assert "'\\x' MUST be followed by two hexadecimal digits" in str(exc_info.value)
117 | 
118 | 
119 | def test_null_escape_may_not_be_followed_by_decimal_digit_but_for_double_quotes():
120 |     with pytest.raises(JSON5DecodeError) as exc_info:
121 |         loads(r'"\01"')
122 |     assert "'\\0' MUST NOT be followed by a decimal digit" in str(exc_info.value)
123 | 
124 | 
125 | def test_integer_octal_hex_mutually_exclusive():
126 |     with pytest.raises(ValueError):
127 |         Integer(raw_value='0o0', is_hex=True, is_octal=True)
128 | 
129 | 
130 | def test_invalid_identifier_via_escape_sequence():
131 |     json_string = """{\\u005Cfoo: 1}"""
132 |     with pytest.raises(JSON5DecodeError) as exc_info:
133 |         loads(json_string)
134 |     assert "Invalid identifier name" in str(exc_info.value)
135 | 
136 | 
137 | @pytest.mark.parametrize(
138 |     'json_string', [""""foo \\\nbar baz \\\nbacon \neggs\"""", """'foo \\\nbar baz \\\nbacon \neggs'"""]
139 | )
140 | def test_illegal_line_terminator_error_message(json_string):
141 |     with pytest.raises(JSON5DecodeError) as exc_info:
142 |         loads(json_string)
143 | 
144 |     exc_message = str(exc_info.value)
145 |     exc_lineno_match = re.search(r'line (\d+)', exc_message)
146 |     if exc_lineno_match:
147 |         exc_lineno = int(exc_lineno_match.groups()[0])
148 |     else:
149 |         exc_lineno = None
150 |     exc_col_match = re.search(r'column (\d+)', exc_message)
151 |     if exc_col_match:
152 |         exc_col = int(exc_col_match.groups()[0])
153 |     else:
154 |         exc_col = None
155 |     exc_index_match = re.search(r'char (\d+)', exc_message)
156 |     if exc_index_match:
157 |         exc_index = int(exc_index_match.groups()[0])
158 |     else:
159 |         exc_index = None
160 |     assert (3, 7, 23) == (exc_lineno, exc_col, exc_index)
161 | 
162 | 
163 | def test_octals_are_rejected_by_default():
164 |     json_string = "0o123"
165 |     with pytest.raises(JSON5DecodeError) as exc_info:
166 |         loads(json_string)
167 |     assert "Invalid integer literal" in str(exc_info.value)
168 | 
169 | 
170 | def test_malformed_octals_result_in_additional_error():
171 |     json_string = "058"
172 |     with pytest.raises(JSON5DecodeError) as exc_info:
173 |         loads(json_string)
174 |     assert "Invalid octal format" in str(exc_info.value)
175 | 
176 | 
177 | @pytest.mark.parametrize('json_string', ['{foo: "bar}', "{foo: 'bar}"])
178 | def test_unterminated_string(json_string):
179 |     with pytest.raises(JSON5DecodeError) as exc_info:
180 |         loads(json_string)
181 |     assert "UNTERMINATED" in str(exc_info.value)
182 |     assert "7" in str(exc_info.value)  # The index where the underminated string begins
183 | 
184 | 
185 | def test_array_multiple_trailing_commas_raises_error():
186 |     with pytest.raises(JSON5DecodeError) as exc_info:
187 |         loads('["foo",,]')
188 |     assert "multiple trailing commas" in str(exc_info.value)
189 | 
190 | 
191 | def test_object_multiple_trailing_commas_raises_error():
192 |     with pytest.raises(JSON5DecodeError) as exc_info:
193 |         loads('{foo: "bar",,}')
194 |     assert "multiple trailing commas" in str(exc_info.value)
195 | 
196 | 
197 | def test_expecting_rbracket():
198 |     json_string = """[true, false"""
199 |     with pytest.raises(JSON5DecodeError):
200 |         loads(json_string)
201 | 
202 | 
203 | def test_array_expecting_value_or_bracket():
204 |     json_string = '['
205 |     with pytest.raises(JSON5DecodeError) as exc_info:
206 |         loads(json_string)
207 |     assert 'RBRACKET or value' in str(exc_info.value)
208 | 
209 | 
210 | def test_array_expecting_comma_or_bracket():
211 |     json_string = '[true'
212 |     with pytest.raises(JSON5DecodeError) as exc_info:
213 |         loads(json_string)
214 |     assert "RBRACKET or COMMA" in str(exc_info.value)
215 | 
216 | 
217 | def test_array_expecting_value_or_bracket_trailing_comma():
218 |     json_string = '[true,'
219 |     with pytest.raises(JSON5DecodeError) as exc_info:
220 |         loads(json_string)
221 | 
222 |     assert 'RBRACKET or value' in str(exc_info.value)
223 | 
224 | 
225 | def test_object_expecting_value_or_brace():
226 |     json_string = '{'
227 |     with pytest.raises(JSON5DecodeError) as exc_info:
228 |         loads(json_string)
229 |     assert 'RBRACE or key' in str(exc_info.value)
230 | 
231 | 
232 | def test_object_expecting_comma_or_brace():
233 |     json_string = '{foo: true'
234 |     with pytest.raises(JSON5DecodeError) as exc_info:
235 |         loads(json_string)
236 |     assert "COMMA or RBRACE" in str(exc_info.value)
237 | 
238 | 
239 | def test_object_expecting_key_or_brace_trailing_comma():
240 |     json_string = '{foo: true,'
241 |     with pytest.raises(JSON5DecodeError) as exc_info:
242 |         loads(json_string)
243 |     assert 'RBRACE or key' in str(exc_info.value)
244 | 


--------------------------------------------------------------------------------
/tests/test_json5_dump.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import math
 3 | from io import StringIO
 4 | 
 5 | from json5 import dump
 6 | from json5 import dumps
 7 | from json5.dumper import ModelDumper
 8 | from json5.model import Integer
 9 | from json5.model import UnaryOp
10 | 
11 | 
12 | def test_json_dump_empty_object():
13 |     d = {}
14 |     assert dumps(d) == '{}'
15 | 
16 | 
17 | def test_json_dump_empty_array():
18 |     d = []
19 |     assert dumps(d) == '[]'
20 | 
21 | 
22 | def test_single_key_value_dump():
23 |     d = {'foo': 'bar'}
24 |     assert dumps(d) == json.dumps(d)
25 | 
26 | 
27 | def test_dump_same_as_json():
28 |     d = {
29 |         "strings": ["foo", "bar", "baz"],
30 |         "numbers": [1, -1, 1.0, math.inf, -math.inf, math.nan],
31 |         "lists": ['foo', ['nested_list']],
32 |     }
33 |     assert dumps(d) == json.dumps(d)
34 | 
35 | 
36 | def test_dump_indent_same_as_json():
37 |     d = {
38 |         "strings": ["foo", "bar", "baz"],
39 |         "numbers": [1, -1, 1.0, math.inf, -math.inf, math.nan],
40 |         "lists": ['foo', ['nested_list']],
41 |     }
42 |     assert dumps(d, indent=4) == json.dumps(d, indent=4)
43 | 
44 | 
45 | def test_dump_boolean():
46 |     d = {'foo': True}
47 |     assert dumps(d) == json.dumps(d)
48 | 
49 | 
50 | def test_dump_bool_false():
51 |     d = {'foo': False}
52 |     assert dumps(d) == json.dumps(d)
53 | 
54 | 
55 | def test_dump_none():
56 |     d = {'foo': None}
57 |     assert dumps(d) == json.dumps(d)
58 | 
59 | 
60 | def test_dump_unary_plus():
61 |     assert dumps(UnaryOp('+', Integer('1')), dumper=ModelDumper()) == '+1'
62 | 
63 | 
64 | def test_dump_file():
65 |     f = StringIO()
66 |     dump("foo", f)
67 |     f.seek(0)
68 |     assert f.read() == '"foo"'
69 | 


--------------------------------------------------------------------------------
/tests/test_json5_load.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | from io import StringIO
  3 | 
  4 | import pytest
  5 | 
  6 | from json5.loader import JsonIdentifier
  7 | from json5.loader import load
  8 | from json5.loader import loads
  9 | 
 10 | 
 11 | def test_object_string_key_value_pair():
 12 |     json_string = """{"foo":"bar"}"""
 13 |     assert loads(json_string) == {"foo": "bar"}
 14 | 
 15 | 
 16 | def test_object_string_key_value_pair_with_whitespace_before_value():
 17 |     json_string = """{"foo": "bar"}"""
 18 |     assert loads(json_string) == {"foo": "bar"}
 19 | 
 20 | 
 21 | def test_multiple_key_values():
 22 |     json_string = """{"foo":"bar","bacon":"eggs"}"""
 23 |     assert loads(json_string) == {"foo": "bar", "bacon": "eggs"}
 24 | 
 25 | 
 26 | def test_multiple_string_key_values_with_whitespace():
 27 |     json_string = """{"foo":  "bar", "bacon" :  "eggs"}"""
 28 |     assert loads(json_string) == {"foo": "bar", "bacon": "eggs"}
 29 | 
 30 | 
 31 | def test_array_load():
 32 |     json_string = """["foo","bar","baz"]"""
 33 |     assert loads(json_string) == ["foo", "bar", "baz"]
 34 | 
 35 | 
 36 | def test_array_load_with_whitespace():
 37 |     json_string = """[ "foo", "bar"  , "baz"   ]"""
 38 |     assert loads(json_string) == ["foo", "bar", "baz"]
 39 | 
 40 | 
 41 | def test_object_load_with_newlines():
 42 |     json_string = """{"foo":\n "bar"\n}"""
 43 |     assert loads(json_string) == {"foo": "bar"}
 44 | 
 45 | 
 46 | def test_object_load_with_line_comment():
 47 |     json_string = """{ // line comment
 48 |     "foo": "bar"
 49 |     }"""
 50 |     assert loads(json_string) == {"foo": "bar"}
 51 | 
 52 | 
 53 | def test_object_with_multiline_comment():
 54 |     json_string = """{ /* foo bar
 55 |     */ "foo": "bar"
 56 |     }"""
 57 |     assert loads(json_string) == {"foo": "bar"}
 58 | 
 59 | 
 60 | def test_array_load_with_line_comment():
 61 |     json_string = """[ // line comment
 62 |     "foo", "bar"
 63 |     ]"""
 64 |     assert loads(json_string) == ["foo", "bar"]
 65 | 
 66 | 
 67 | def test_array_with_multiline_comment():
 68 |     json_string = """[ /* foo bar
 69 |     */ "foo", "bar"
 70 |     ]"""
 71 |     assert loads(json_string) == ["foo", "bar"]
 72 | 
 73 | 
 74 | def test_nested_object():
 75 |     json_string = """{"foo": {"bacon": "eggs"}}"""
 76 |     assert loads(json_string) == {"foo": {"bacon": "eggs"}}
 77 | 
 78 | 
 79 | def test_leading_whitespace_object():
 80 |     json_string = """   {"foo":"bar"}"""
 81 |     assert loads(json_string) == {"foo": "bar"}
 82 | 
 83 | 
 84 | def test_trailing_whitespace_object():
 85 |     json_string = """{"foo": "bar"}   """
 86 |     assert loads(json_string) == {"foo": "bar"}
 87 | 
 88 | 
 89 | def test_single_quoted_string():
 90 |     json_string = """{'foo': 'bar'}"""
 91 |     assert loads(json_string) == {"foo": "bar"}
 92 | 
 93 | 
 94 | def test_mixed_usage_quotes():
 95 |     json_string = """{"foo": 'bar'}"""
 96 |     assert loads(json_string) == {"foo": "bar"}
 97 | 
 98 | 
 99 | def test_trailing_comma_object():
100 |     json_string = """{"foo": "bar", }"""
101 |     assert loads(json_string) == {"foo": "bar"}
102 | 
103 | 
104 | def test_trailing_comma_array():
105 |     json_string = """["foo","bar", "baz",]"""
106 |     assert loads(json_string) == ["foo", "bar", "baz"]
107 | 
108 | 
109 | def test_trailing_comma_array_with_trailing_whitespace():
110 |     json_string = """["foo", "bar", "baz", ]"""
111 |     assert loads(json_string) == ["foo", "bar", "baz"]
112 | 
113 | 
114 | def test_trailing_comma_array_with_leading_whitespace_before_comma():
115 |     json_string = """["foo", "bar", "baz"  ,]"""
116 |     assert loads(json_string) == ["foo", "bar", "baz"]
117 | 
118 | 
119 | def test_nested_arrays():
120 |     json_string = """[["foo"], ["foo","bar"], "baz"]"""
121 |     assert loads(json_string) == [["foo"], ["foo", "bar"], "baz"]
122 | 
123 | 
124 | def test_single_quote_with_escape_single_quote():
125 |     json_string = r"""{'fo\'o': 'bar'}"""
126 |     assert loads(json_string) == {"fo'o": "bar"}
127 | 
128 | 
129 | def test_double_quote_with_escape_double_quote():
130 |     json_string = r"""{"fo\"o": "bar"}"""
131 |     assert loads(json_string) == {'fo"o': "bar"}
132 | 
133 | 
134 | def test_escape_sequence_strings():
135 |     json_string = r"""'\A\C\/\D\C'"""
136 |     assert loads(json_string) == "AC/DC"
137 | 
138 | 
139 | def test_line_continuations():
140 |     json_string = r"""'Hello \
141 | world!'"""
142 |     assert loads(json_string) == "Hello world!"
143 | 
144 | 
145 | @pytest.mark.parametrize("terminator", ["\r\n", "\n", "\u2028", "\u2029"])
146 | def test_line_continuations_alternate_terminators(terminator):
147 |     json_string = f"""'Hello \\{terminator}world!'"""
148 |     assert loads(json_string) == "Hello world!"
149 | 
150 | 
151 | def test_number_literals_inf_nan():
152 |     json_string = """{
153 |     "positiveInfinity": Infinity,
154 |     "negativeInfinity": -Infinity,
155 |     "notANumber": NaN,}"""
156 |     assert loads(json_string) == {
157 |         "positiveInfinity": math.inf,
158 |         "negativeInfinity": -math.inf,
159 |         "notANumber": math.nan,
160 |     }
161 | 
162 | 
163 | def test_number_literals():
164 |     json_string = """{
165 |     "integer": 123,
166 |     "withFractionPart": 123.456,
167 |     "onlyFractionPart": .456,
168 |     "withExponent": 123e-2}"""
169 |     assert loads(json_string) == {
170 |         "integer": 123,
171 |         "withFractionPart": 123.456,
172 |         "onlyFractionPart": 0.456,
173 |         "withExponent": 123e-2,
174 |     }
175 | 
176 | 
177 | def test_escape_sequences():
178 |     json_string = r"""{
179 |     "foo": "foo\nbar\nbaz",
180 |     "bar": "foo\\bar\\baz",
181 |     "baz": "foo\tbar\tbaz"}"""
182 |     assert loads(json_string) == {
183 |         "foo": "foo\nbar\nbaz",
184 |         "bar": "foo\\bar\\baz",
185 |         "baz": "foo\tbar\tbaz",
186 |     }
187 | 
188 | 
189 | def test_empty_object():
190 |     json_string = "{}"
191 |     assert loads(json_string) == {}
192 | 
193 | 
194 | def test_empty_array():
195 |     json_string = "[]"
196 |     assert loads(json_string) == []
197 | 
198 | 
199 | @pytest.mark.parametrize(
200 |     "json_string",
201 |     [
202 |         '{"foo": "bar", "bar" "baz"',
203 |         '["foo" "bar"]',
204 |         "[,]",
205 |         "{,}",
206 |         "!",
207 |         '{"foo": "bar" "bacon": "eggs"}',
208 |     ],
209 | )
210 | def test_invalid_json(json_string):
211 |     with pytest.raises(Exception):
212 |         loads(json_string)
213 | 
214 | 
215 | def test_object_with_identifier_key():
216 |     json_string = """{unquoted: "foo"}"""
217 |     assert loads(json_string) == {"unquoted": "foo"}
218 | 
219 | 
220 | def test_identifier_persists_load():
221 |     json_string = """{unquoted: "foo"}"""
222 |     assert isinstance(list(loads(json_string).keys())[0], JsonIdentifier)
223 | 
224 | 
225 | def test_single_item_array():
226 |     json_string = """["foo"]"""
227 |     assert loads(json_string) == ["foo"]
228 | 
229 | 
230 | def test_single_item_array_with_trailing_comma():
231 |     json_string = """["foo" , ]"""
232 |     assert loads(json_string) == ["foo"]
233 | 
234 | 
235 | def test_hexadecimal_load():
236 |     json_string = """
237 |     {
238 |     positiveHex: 0xdecaf,
239 |     negativeHex: -0xC0FFEE ,}"""
240 |     assert loads(json_string) == {"positiveHex": 0xDECAF, "negativeHex": -0xC0FFEE}
241 | 
242 | 
243 | def test_boolean_load_true():
244 |     json_string = """{foo: true}"""
245 |     assert loads(json_string) == {'foo': True}
246 | 
247 | 
248 | def test_boolean_load_false():
249 |     json_string = """{foo: false}"""
250 |     assert loads(json_string) == {'foo': False}
251 | 
252 | 
253 | def test_null_load():
254 |     json_string = """{foo: null}"""
255 |     assert loads(json_string) == {'foo': None}
256 | 
257 | 
258 | def test_unary_plus_load():
259 |     json_string = """{foo: +12 }"""
260 |     assert loads(json_string) == {'foo': 12}
261 | 
262 | 
263 | def test_load_from_file():
264 |     f = StringIO('{foo: 123}')
265 |     assert load(f) == {'foo': 123}
266 | 
267 | 
268 | def test_load_empty_array_with_whitespace():
269 |     json_string = "{   }"
270 |     assert loads(json_string) == {}
271 | 
272 | 
273 | def test_load_empty_object_wtih_whitespace():
274 |     json_string = "[   ]"
275 |     assert loads(json_string) == []
276 | 
277 | 
278 | def test_load_empty_object_with_comments():
279 |     json_string = "{ // foo \n}"
280 |     assert loads(json_string) == {}
281 | 
282 | 
283 | def test_load_empty_array_with_comments():
284 |     json_string = "[ // foo \n]"
285 |     assert loads(json_string) == []
286 | 
287 | 
288 | def test_load_array_with_comment_before_additional_element():
289 |     json_string = "['foo',/* comment */ 'bar', // foo\n'baz']"
290 |     assert loads(json_string) == ['foo', 'bar', 'baz']
291 | 
292 | 
293 | def test_load_object_with_additional_comments():
294 |     json_string = """{
295 |     "foo": /* comment */ "bar",
296 |     // another comment
297 |     bacon /* breakfast */: "eggs" // better than spam
298 |     }
299 |     """
300 |     assert loads(json_string) == {'foo': 'bar', 'bacon': 'eggs'}
301 | 
302 | 
303 | def test_load_latin_escape():
304 |     json_string = r'"\x5C"'
305 |     assert loads(json_string) == '\\'
306 | 
307 | 
308 | def test_latin_escape_backslash_is_not_real_backslack():
309 |     assert loads("""'\\x5C01'""") == "\\01"
310 | 
311 | 
312 | def test_escape_unicode():
313 |     json_string = """
314 |     {
315 |         sig\\u03A3ma: "\\u03A3 is the sum of all things"
316 |     }
317 |     """
318 |     assert loads(json_string) == {"sig\u03A3ma": "\u03A3 is the sum of all things"}
319 | 
320 | 
321 | def test_load_identifier_with_connector_punctuation():
322 |     json_string = """{foo⁀bar: 1}"""
323 |     assert loads(json_string) == {"foo⁀bar": 1}
324 | 


--------------------------------------------------------------------------------
/tests/test_json5_official_tests.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import re
  3 | from collections import namedtuple
  4 | 
  5 | import pytest
  6 | 
  7 | from json5 import dumps
  8 | from json5 import JSON5DecodeError
  9 | from json5 import load
 10 | from json5 import loads
 11 | from json5.dumper import ModelDumper
 12 | from json5.loader import ModelLoader
 13 | 
 14 | tests_path = os.path.abspath(os.path.join(os.path.dirname(os.path.abspath(__file__)), '../json5-tests'))
 15 | 
 16 | error_specs = []
 17 | specs = []
 18 | 
 19 | for root, dirs, files in os.walk(tests_path):
 20 |     for f in files:
 21 |         if f.endswith('.json5') or f.endswith('.json'):
 22 |             specs.append(os.path.join(root, f))
 23 |         elif f.endswith('.txt') or f.endswith('.js'):
 24 |             error_spec = f.replace('.txt', '.errorSpec').replace('.js', '.errorSpec')
 25 |             error_specs.append((os.path.join(root, f), os.path.join(root, error_spec)))
 26 | 
 27 | 
 28 | @pytest.mark.parametrize('fp', specs)
 29 | def test_official_files(fp):
 30 |     if not os.path.exists(tests_path):
 31 |         pytest.mark.skip("Tests repo was not present in expected location. Skipping.")
 32 |         return
 33 |     load(open(fp, encoding='utf-8'))
 34 | 
 35 | 
 36 | @pytest.mark.parametrize('fp', specs)
 37 | def test_official_files_rt_dumps_no_error(fp):
 38 |     if not os.path.exists(tests_path):
 39 |         pytest.mark.skip("Tests repo was not present in expected location. Skipping.")
 40 |     with open(fp, encoding='utf-8') as f:
 41 |         json_string = f.read()
 42 |     dumps(loads(json_string))
 43 | 
 44 | 
 45 | @pytest.mark.parametrize('fp', specs)
 46 | def test_official_files_rt_model(fp):
 47 |     if not os.path.exists(tests_path):
 48 |         pytest.mark.skip("Tests repo was not present in expected location. Skipping.")
 49 |     with open(fp, encoding='utf-8') as f:
 50 |         json_string = f.read()
 51 |     assert dumps(loads(json_string, loader=ModelLoader()), dumper=ModelDumper()) == json_string
 52 | 
 53 | 
 54 | @pytest.mark.parametrize(('input_file', 'expected'), error_specs)
 55 | def test_official_error_specs(input_file, expected):
 56 |     if not os.path.exists(tests_path):
 57 |         pytest.mark.skip("Tests repo was not present in expected location. Skipping.")
 58 |         return
 59 |     with pytest.raises(JSON5DecodeError):
 60 |         load(open(input_file, encoding='utf-8'))
 61 | 
 62 | 
 63 | @pytest.mark.parametrize(('input_file', 'expected'), error_specs)
 64 | def test_official_error_specs(input_file, expected):
 65 |     ErrorInfo = namedtuple('ErrorInfo', field_names=['line', 'col', 'at'])
 66 |     if not os.path.exists(tests_path):
 67 |         pytest.mark.skip("Tests repo was not present in expected location. Skipping.")
 68 |         return
 69 |     if any(name in input_file for name in ['top-level-inline-comment.txt', 'unescaped-multi-line-string.txt']):
 70 |         pytest.xfail("We make better error messages for these")
 71 |     if os.path.exists(expected):
 72 |         errorspec = load(open(expected, encoding='utf-8'))
 73 |     else:
 74 |         pytest.mark.skip("No error spec")
 75 |         return
 76 | 
 77 |     with pytest.raises(JSON5DecodeError) as exc_info:
 78 |         load(open(input_file, encoding='utf-8'))
 79 | 
 80 |     at = errorspec['at']
 81 |     lineno = errorspec['lineNumber']
 82 |     col = errorspec['columnNumber']
 83 |     # msg = errorspec['message']
 84 |     exc_message = str(exc_info.value)
 85 |     exc_lineno_match = re.search(r'line (\d+)', exc_message)
 86 |     if exc_lineno_match:
 87 |         exc_lineno = int(exc_lineno_match.groups()[0])
 88 |     else:
 89 |         exc_lineno = None
 90 |     exc_col_match = re.search(r'column (\d+)', exc_message)
 91 |     if exc_col_match:
 92 |         exc_col = int(exc_col_match.groups()[0])
 93 |     else:
 94 |         exc_col = None
 95 |     exc_index_match = re.search(r'char (\d+)', exc_message)
 96 |     if exc_index_match:
 97 |         exc_index = int(exc_index_match.groups()[0])
 98 |     else:
 99 |         exc_index = None
100 |     assert ErrorInfo(exc_lineno, exc_col, exc_index) == ErrorInfo(lineno, col, at - 1), f"{input_file} {exc_message}"
101 | 


--------------------------------------------------------------------------------
/tests/test_json_helpers.py:
--------------------------------------------------------------------------------
 1 | from json5.dumper import modelize
 2 | from json5.model import Identifier
 3 | 
 4 | 
 5 | def test_identifier_can_hash_like_string():
 6 |     d = {Identifier('foo', raw_value='foo'): 'bar'}
 7 |     assert d['foo'] == 'bar'
 8 | 
 9 | 
10 | def test_identifier_equals_like_string():
11 |     assert Identifier('foo', raw_value='foo') == 'foo'
12 | 
13 | 
14 | def test_repr_does_not_contain_wsc():
15 |     model = modelize({'foo': 'bar'})
16 |     assert 'wsc' not in repr(model)
17 | 
18 | 
19 | def test_identifier_does_not_need_explicit_raw_value():
20 |     assert Identifier('foo').raw_value == 'foo'
21 | 


--------------------------------------------------------------------------------
/tests/test_loads_options.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from decimal import Decimal
 3 | 
 4 | import json5
 5 | 
 6 | 
 7 | def int_plus_one(int_string):
 8 |     assert isinstance(int_string, str)
 9 |     return int(int_string) + 1
10 | 
11 | 
12 | def float_to_decimal(float_string):
13 |     assert isinstance(float_string, str)
14 |     return Decimal(float_string)
15 | 
16 | 
17 | def const_to_silly(const_string):
18 |     assert isinstance(const_string, str)
19 |     return f'Something Silly {const_string}'
20 | 
21 | 
22 | def true_object_hook(d):
23 |     return {k: True for k in d}
24 | 
25 | 
26 | def true_object_pair_hook(kvpairs):
27 |     return {k: True for k, v in kvpairs}
28 | 
29 | 
30 | def test_parse_int():
31 |     json_string = """{"foo": 5}"""
32 |     assert json5.loads(json_string, parse_int=int_plus_one) == json.loads(json_string, parse_int=int_plus_one)
33 |     assert json5.loads(json_string, parse_int=int_plus_one)['foo'] == 6
34 | 
35 | 
36 | def test_parse_float():
37 |     json_string = """{"foo": 5.0}"""
38 |     assert json5.loads(json_string, parse_float=float_to_decimal) == json.loads(
39 |         json_string, parse_float=float_to_decimal
40 |     )
41 | 
42 | 
43 | def test_parse_constant_nan():
44 |     json_string = """{"foo": NaN}"""
45 |     assert json5.loads(json_string, parse_constant=const_to_silly) == {'foo': 'Something Silly NaN'}
46 |     assert json5.loads(json_string, parse_constant=const_to_silly) == json.loads(
47 |         json_string, parse_constant=const_to_silly
48 |     )
49 | 
50 | 
51 | def test_parse_constant_positive_infinity():
52 |     json_string = """{"foo": Infinity}"""
53 |     assert json5.loads(json_string, parse_constant=const_to_silly) == {'foo': 'Something Silly Infinity'}
54 |     assert json5.loads(json_string, parse_constant=const_to_silly) == json.loads(
55 |         json_string, parse_constant=const_to_silly
56 |     )
57 | 
58 | 
59 | def test_parse_constant_negative_infinity():
60 |     json_string = """{"foo": -Infinity}"""
61 |     assert json5.loads(json_string, parse_constant=const_to_silly) == {'foo': 'Something Silly -Infinity'}
62 |     assert json5.loads(json_string, parse_constant=const_to_silly) == json.loads(
63 |         json_string, parse_constant=const_to_silly
64 |     )
65 | 
66 | 
67 | def test_object_hook():
68 |     json_string = """{"foo": "bar", "bacon": "eggs"}"""
69 |     result = json5.loads(json_string, object_hook=true_object_hook)
70 |     assert result == json.loads(json_string, object_hook=true_object_hook)
71 |     assert all(value is True for key, value in result.items())
72 | 
73 | 
74 | def test_object_pairs_hook():
75 |     json_string = """{"foo": "bar", "bacon": "eggs"}"""
76 |     result = json5.loads(json_string, object_pairs_hook=true_object_pair_hook)
77 |     assert result == json.loads(json_string, object_pairs_hook=true_object_pair_hook)
78 |     assert all(value is True for key, value in result.items())
79 | 


--------------------------------------------------------------------------------
/tests/test_model.py:
--------------------------------------------------------------------------------
 1 | import ast
 2 | 
 3 | import pytest
 4 | 
 5 | import json5.loader
 6 | import json5.model
 7 | 
 8 | TEST_TEXT = '''\
 9 | {
10 |     "string_on_same_line":     "string on same line",
11 |           "multiline_dq_string": "this line has a \
12 | continuation",  
13 |       "leadingDecimalPoint": .8675309  ,    
14 |       "andTrailing":     8675309.,  
15 |     "trailingComma": 'in objects',   
16 |         "backwardsCompatible": "with JSON",
17 | }
18 | '''
19 | 
20 | model = json5.loads(TEST_TEXT, loader=json5.loader.ModelLoader())
21 | tree = ast.parse(TEST_TEXT)
22 | ast_nodes = [
23 |     node for node in list(ast.walk(tree)) if not isinstance(node, (ast.Expr, ast.Load, ast.Module, ast.UnaryOp))
24 | ]
25 | json5_nodes = [
26 |     node
27 |     for node in list(json5.model.walk(model))
28 |     if not isinstance(node, (json5.model.TrailingComma, json5.model.JSONText))
29 | ]
30 | 
31 | assert len(ast_nodes) == len(json5_nodes)
32 | 
33 | 
34 | @pytest.mark.parametrize('ast_node, json5_node', list(zip(ast_nodes, json5_nodes)))
35 | @pytest.mark.parametrize(
36 |     'attr_name',
37 |     [
38 |         'col_offset',
39 |         'end_col_offset',
40 |         'lineno',
41 |         'end_lineno',
42 |     ],
43 | )
44 | def test_node_attribute_accuracy(attr_name: str, ast_node, json5_node):
45 |     assert getattr(json5_node, attr_name) == getattr(
46 |         ast_node, attr_name
47 |     ), f'{attr_name} did not match {ast_node!r}, {json5_node!r}'
48 | 


--------------------------------------------------------------------------------
/tests/test_model_loader_dumper.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | 
  3 | from json5.dumper import dumps
  4 | from json5.dumper import ModelDumper
  5 | from json5.loader import loads
  6 | from json5.loader import ModelLoader
  7 | 
  8 | 
  9 | @pytest.mark.parametrize(
 10 |     'json_string',
 11 |     [
 12 |         """{"foo":"bar"}""",
 13 |         """{"foo": "bar"}""",
 14 |         """{"foo":"bar","bacon":"eggs"}""",
 15 |         """{"foo":  "bar", "bacon" :  "eggs"}""",
 16 |         """["foo","bar","baz"]""",
 17 |         """[ "foo", "bar"  , "baz"   ]""",
 18 |         """{"foo":\n "bar"\n}""",
 19 |         """{"foo": {"bacon": "eggs"}}""",
 20 |         """   {"foo":"bar"}""",
 21 |         """{"foo": "bar"}   """,
 22 |         """{'foo': 'bar'}""",
 23 |         """{"foo": 'bar'}""",
 24 |         """{"foo": "bar",}""",
 25 |         """["foo","bar", "baz",]""",
 26 |         """["foo", "bar", "baz", ]""",
 27 |         """["foo", "bar", "baz"  ,]""",
 28 |         """[["foo"], ["foo","bar"], "baz"]""",
 29 |         """{unquoted: "foo"}""",
 30 |         """{unquoted: "foo"}""",
 31 |         """["foo"]""",
 32 |         """["foo" , ]""",
 33 |     ],
 34 | )
 35 | def test_round_trip_model_load_dump(json_string):
 36 |     assert dumps(loads(json_string, loader=ModelLoader()), dumper=ModelDumper()) == json_string
 37 | 
 38 | 
 39 | def test_object_load_with_line_comment():
 40 |     json_string = """{ // line comment
 41 |     "foo": "bar"
 42 |     }"""
 43 |     assert dumps(loads(json_string, loader=ModelLoader()), dumper=ModelDumper()) == json_string
 44 | 
 45 | 
 46 | def test_object_with_multiline_comment():
 47 |     json_string = """{ /* foo bar
 48 |     */ "foo": "bar" // Foobar
 49 |     }"""
 50 |     assert dumps(loads(json_string, loader=ModelLoader()), dumper=ModelDumper()) == json_string
 51 | 
 52 | 
 53 | def test_array_load_with_line_comment():
 54 |     json_string = """[ // line comment
 55 |     "foo", "bar"
 56 |     ]"""
 57 |     assert dumps(loads(json_string, loader=ModelLoader()), dumper=ModelDumper()) == json_string
 58 | 
 59 | 
 60 | def test_array_with_multiline_comment():
 61 |     json_string = """[ /* foo bar
 62 |     */ "foo", "bar"
 63 |     ]"""
 64 |     assert dumps(loads(json_string, loader=ModelLoader()), dumper=ModelDumper()) == json_string
 65 | 
 66 | 
 67 | def test_nested_object():
 68 |     json_string = """{"foo": {"bacon": "eggs"}}"""
 69 |     assert dumps(loads(json_string, loader=ModelLoader()), dumper=ModelDumper()) == json_string
 70 | 
 71 | 
 72 | def test_single_quote_with_escape_single_quote():
 73 |     json_string = r"""{'fo\'o': 'bar'}"""
 74 |     assert dumps(loads(json_string, loader=ModelLoader()), dumper=ModelDumper()) == json_string
 75 | 
 76 | 
 77 | def test_double_quote_with_escape_double_quote():
 78 |     json_string = r"""{"fo\"o": "bar"}"""
 79 |     assert dumps(loads(json_string, loader=ModelLoader()), dumper=ModelDumper()) == json_string
 80 | 
 81 | 
 82 | def test_escape_sequence_strings():
 83 |     json_string = r"""'\A\C\/\D\C'"""
 84 |     assert dumps(loads(json_string, loader=ModelLoader()), dumper=ModelDumper()) == json_string
 85 | 
 86 | 
 87 | def test_line_continuations():
 88 |     json_string = r"""'Hello \
 89 | world!'"""
 90 |     assert dumps(loads(json_string, loader=ModelLoader()), dumper=ModelDumper()) == json_string
 91 | 
 92 | 
 93 | @pytest.mark.parametrize("terminator", ["\r\n", "\n", "\u2028", "\u2029"])
 94 | def test_line_continuations_alternate_terminators(terminator):
 95 |     json_string = f"""'Hello \\{terminator}world!'"""
 96 |     assert dumps(loads(json_string, loader=ModelLoader()), dumper=ModelDumper()) == json_string
 97 | 
 98 | 
 99 | def test_number_literals_inf_nan():
100 |     json_string = """{
101 |     "positiveInfinity": Infinity,
102 |     "negativeInfinity": -Infinity,
103 |     "notANumber": NaN,}"""
104 |     assert dumps(loads(json_string, loader=ModelLoader()), dumper=ModelDumper()) == json_string
105 | 
106 | 
107 | def test_number_literals():
108 |     json_string = """{
109 |     "integer": 123,
110 |     "withFractionPart": 123.456,
111 |     "onlyFractionPart": .456,
112 |     "withExponent": 123e-2}"""
113 |     assert dumps(loads(json_string, loader=ModelLoader()), dumper=ModelDumper()) == json_string
114 | 
115 | 
116 | def test_escape_sequences():
117 |     json_string = r"""{
118 |     "foo": "foo\nbar\nbaz",
119 |     "bar": "foo\\bar\\baz",
120 |     "baz": "foo\tbar\tbaz"}"""
121 |     assert dumps(loads(json_string, loader=ModelLoader()), dumper=ModelDumper()) == json_string
122 | 
123 | 
124 | def test_empty_object():
125 |     json_string = "{}"
126 |     assert dumps(loads(json_string, loader=ModelLoader()), dumper=ModelDumper()) == json_string
127 | 
128 | 
129 | def test_empty_array():
130 |     json_string = "[]"
131 |     assert dumps(loads(json_string, loader=ModelLoader()), dumper=ModelDumper()) == json_string
132 | 
133 | 
134 | def test_hexadecimal_load():
135 |     json_string = """
136 |     {
137 |     positiveHex: 0xdecaf,
138 |     negativeHex: -0xC0FFEE,}"""
139 |     assert dumps(loads(json_string, loader=ModelLoader()), dumper=ModelDumper()) == json_string
140 | 
141 | 
142 | def test_load_empty_array_with_whitespace():
143 |     json_string = "{   }"
144 |     assert dumps(loads(json_string, loader=ModelLoader()), dumper=ModelDumper()) == json_string
145 | 
146 | 
147 | def test_load_empty_object_wtih_whitespace():
148 |     json_string = "[   ]"
149 |     assert dumps(loads(json_string, loader=ModelLoader()), dumper=ModelDumper()) == json_string
150 | 
151 | 
152 | def test_load_empty_object_with_comments():
153 |     json_string = "{ // foo \n}"
154 |     assert dumps(loads(json_string, loader=ModelLoader()), dumper=ModelDumper()) == json_string
155 | 
156 | 
157 | def test_load_empty_array_with_comments():
158 |     json_string = "[ // foo \n]"
159 |     assert dumps(loads(json_string, loader=ModelLoader()), dumper=ModelDumper()) == json_string
160 | 


--------------------------------------------------------------------------------
/tests/test_modelizer.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | 
 3 | import pytest
 4 | 
 5 | from json5.dumper import dumps
 6 | from json5.dumper import ModelDumper
 7 | from json5.dumper import modelize
 8 | from json5.loader import loads
 9 | 
10 | 
11 | @pytest.mark.parametrize(
12 |     'obj',
13 |     [
14 |         {'foo': 'bar', 'bacon': 'eggs'},
15 |         ['foo', 'bar', 'baz'],
16 |         {},
17 |         [],
18 |         ['foo'],
19 |         {'foo': 'bar'},
20 |         "Hello world!",
21 |         123,
22 |         1.0,
23 |         -1.0,
24 |         -2,
25 |         math.inf,
26 |         -math.inf,
27 |         True,
28 |         False,
29 |         None,
30 |     ],
31 | )
32 | def test_modelize_objects(obj):
33 |     assert loads(dumps(modelize(obj), dumper=ModelDumper())) == obj
34 | 
35 | 
36 | def test_modelize_nan():
37 |     obj = math.nan
38 |     assert loads(dumps(modelize(obj), dumper=ModelDumper())) is obj
39 | 
40 | 
41 | def test_modelize_double_quote_string():
42 |     s = "'"
43 |     assert loads(dumps(modelize(s), dumper=ModelDumper())) == s
44 | 


--------------------------------------------------------------------------------
/tests/test_regressions.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from json5 import JSON5DecodeError
 4 | from json5 import loads
 5 | 
 6 | 
 7 | # These tests used to cause the program to hang indefinitely
 8 | def test_no_hang():
 9 |     json_string = '{"foo": ["foo", [0o11]}, ["baz"]]'
10 |     with pytest.raises(JSON5DecodeError):
11 |         loads(json_string)
12 | 
13 | 
14 | def test_no_hang2():
15 |     json_string = '[{foo:]}'
16 |     with pytest.raises(JSON5DecodeError):
17 |         loads(json_string)
18 | 
19 | 
20 | def test_no_hang3():
21 |     json_string = '[true, {foo:]false}'
22 |     with pytest.raises(JSON5DecodeError):
23 |         loads(json_string)
24 | 


--------------------------------------------------------------------------------
/tests/test_roundtrip.py:
--------------------------------------------------------------------------------
 1 | # from json5.loader import RoundTripLoader, loads
 2 | # from json5.dumper import dumps, RoundTripDumper
 3 | #
 4 | #
 5 | # def test_load_string():
 6 | #     json_string = """{"foo":"bar"}"""
 7 | #     data = loads(json_string, loader=RoundTripLoader())
 8 | #     assert data['foo'] == 'bar'
 9 | #
10 | #
11 | # def test_load_change_dump_string():
12 | #     json_string = """{"foo": "bar"}"""
13 | #     data = loads(json_string, loader=RoundTripLoader())
14 | #     data['foo'] = 'baz'
15 | #     new_json_string = dumps(data, dumper=RoundTripDumper())
16 | #     assert 'baz' in new_json_string
17 | #     assert new_json_string == """{"foo": "baz"}"""
18 | #
19 | #
20 | # def test_load_change_whitespace_dump_string():
21 | #     json_string = """["foo" ]"""
22 | #     data = loads(json_string, loader=RoundTripLoader())
23 | #     elem = data[0]
24 | #     elem.wsc_after = []
25 | #     new_json_string = dumps(data, dumper=RoundTripDumper())
26 | #     assert new_json_string == """["foo"]"""
27 | 


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
 1 | [tox]
 2 | envlist = py38,py39,py310,py311
 3 | 
 4 | [testenv]
 5 | deps = -rrequirements-dev.txt
 6 | passenv =
 7 |     CI
 8 |     PYTHONUNBUFFERED
 9 | commands =
10 |     coverage run -m pytest -s -vvv
11 |     mypy --strict --disable-error-code name-defined json5
12 | 


--------------------------------------------------------------------------------