├── .coveragerc ├── .github └── workflows │ ├── release.yml │ └── unittests.yml ├── .pre-commit-config.yaml ├── LICENSE ├── MANIFEST.in ├── README.md ├── docs ├── Makefile ├── QuickStart.rst ├── comments.rst ├── conf.py ├── docrequirements.txt ├── extending.rst ├── how.rst ├── index.rst └── make.bat ├── json5 ├── __init__.py ├── dumper.py ├── loader.py ├── model.py ├── parser.py ├── py.typed ├── tokenizer.py └── utils.py ├── requirements-dev.txt ├── setup.cfg ├── setup.py ├── tests ├── test_errors.py ├── test_json5_dump.py ├── test_json5_load.py ├── test_json5_official_tests.py ├── test_json_helpers.py ├── test_loads_options.py ├── test_model.py ├── test_model_loader_dumper.py ├── test_modelizer.py ├── test_regressions.py └── test_roundtrip.py └── tox.ini /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | source = json5 3 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: release 2 | 3 | on: 4 | push: 5 | tags: 6 | - 'v*.*.*' 7 | 8 | jobs: 9 | build: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - name: Checkout 13 | uses: actions/checkout@v2 14 | - name: setup python 15 | uses: actions/setup-python@v2 16 | with: 17 | python-version: 3.11 18 | 19 | - name: build 20 | shell: bash 21 | run: | 22 | python -m pip install --upgrade wheel setuptools sly regex build 23 | python -m build 24 | 25 | - name: Release PyPI 26 | shell: bash 27 | env: 28 | TWINE_USERNAME: ${{ secrets.TWINE_USERNAME }} 29 | TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }} 30 | run: | 31 | pip install --upgrade twine 32 | twine upload dist/* 33 | 34 | 35 | - name: Release GitHub 36 | uses: softprops/action-gh-release@v1 37 | with: 38 | files: "dist/*" 39 | env: 40 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 41 | -------------------------------------------------------------------------------- /.github/workflows/unittests.yml: -------------------------------------------------------------------------------- 1 | on: [ push, pull_request ] 2 | 3 | jobs: 4 | build: 5 | strategy: 6 | fail-fast: false 7 | matrix: 8 | python_version: ["3.10", "3.9", "3.8", "3.11"] 9 | runs-on: ubuntu-latest 10 | steps: 11 | - name: Checkout 12 | uses: actions/checkout@v2 13 | - name: Setup Python 14 | uses: actions/setup-python@v2 15 | with: 16 | python-version: ${{ matrix.python_version }} 17 | 18 | - name: Install dependencies 19 | run: | 20 | python -m pip install --upgrade pip 21 | python -m pip install -r requirements-dev.txt 22 | python -m pip install . 23 | python -m pip install tox 24 | git clone https://github.com/json5/json5-tests.git 25 | 26 | - name: Test with coverage/pytest 27 | env: 28 | PYTHONUNBUFFERED: "1" 29 | run: | 30 | tox -e py 31 | - name: Coveralls 32 | env: 33 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 34 | run: | 35 | pip install --upgrade coveralls 36 | coveralls --service=github 37 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/pre-commit/pre-commit-hooks 3 | rev: v4.5.0 4 | hooks: 5 | - id: check-yaml 6 | - id: end-of-file-fixer 7 | - id: trailing-whitespace 8 | exclude: ^(tests/.*) 9 | - repo: https://github.com/asottile/reorder-python-imports 10 | rev: v3.12.0 11 | hooks: 12 | - id: reorder-python-imports 13 | 14 | - repo: https://github.com/psf/black 15 | rev: '23.10.1' 16 | hooks: 17 | - id: black 18 | args: 19 | - "-S" 20 | - "-l" 21 | - "120" 22 | 23 | - repo: https://github.com/asottile/pyupgrade 24 | rev: v3.15.0 25 | hooks: 26 | - id: pyupgrade 27 | args: ["--py38-plus"] 28 | 29 | - repo: https://github.com/pre-commit/mirrors-mypy 30 | rev: 'v1.6.1' 31 | hooks: 32 | - id: mypy 33 | args: 34 | - "--strict" 35 | - "--disable-error-code" 36 | - "name-defined" 37 | exclude: ^(tests/.*|setup.py|docs/.*) 38 | additional_dependencies: 39 | - types-regex 40 | 41 | - repo: https://github.com/pycqa/flake8 42 | rev: '6.1.0' # pick a git hash / tag to point to 43 | hooks: 44 | - id: flake8 45 | args: 46 | - "--ignore" 47 | - "E501,E704,E301,W503,F405,F811,F821,F403," 48 | exclude: ^(tests/.*) 49 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include LICENSE 2 | include README.md 3 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # json-five 2 | 3 | JSON5 for Python 4 | 5 | [![Documentation Status](https://readthedocs.org/projects/json-five/badge/?version=latest)](https://json-five.readthedocs.io/en/latest/?badge=latest) 6 | [![Build](https://github.com/spyoungtech/json-five/actions/workflows/unittests.yml/badge.svg)](https://github.com/spyoungtech/json-five/actions/workflows/unittests.yaml) 7 | [![version](https://img.shields.io/pypi/v/json-five.svg?colorB=blue)](https://pypi.org/project/json-five/) 8 | [![pyversion](https://img.shields.io/pypi/pyversions/json-five.svg?)](https://pypi.org/project/json-five/) 9 | [![Coverage](https://coveralls.io/repos/github/spyoungtech/json-five/badge.svg?branch=main)](https://coveralls.io/github/spyoungtech/json-five?branch=main) 10 | 11 | ## Installation 12 | 13 | ``` 14 | pip install json-five 15 | ``` 16 | 17 | This project requires Python 3.8+ 18 | 19 | 20 | ## Key features 21 | 22 | - Supports the JSON5 spec 23 | - Supports similar interfaces to stdlib `json` module 24 | - Provides an API for working with abstract model representations of JSON5 documents. 25 | - Supports round-trip loading, editing, and dumping, preserving non-data elements such as comments (in model-based load/dump) 26 | 27 | 28 | 29 | # Usage 30 | 31 | **NOTE:** the import name is `json5` which differs from the install name. 32 | 33 | 34 | For basic loading/dumping, the interface is nearly identical to that of the `json` module. 35 | ```python 36 | import json5 37 | json_text = """{ // This is a JSON5 comment 38 | "foo": "bar", /* this is a JSON5 block 39 | comment that can span lines */ 40 | bacon: "eggs" // unquoted Identifiers also work 41 | } 42 | """ 43 | print(json5.loads(json_text)) 44 | # {"foo": "bar", "bacon": "eggs"} 45 | 46 | with open('myfile.json5') as f: 47 | data = json5.load(f) 48 | ``` 49 | 50 | For loading JSON5, the same parameters `object_hook`, `object_pairs_hook` and `parse_*` keyword arguments are available 51 | here for `load`/`loads`. 52 | 53 | Additionally, a new hook, `parse_json5_identifiers`, is available to help users control the 54 | output of parsing identifiers. By default, JSON5 Identifiers in object keys are returned as a `JsonIdentifier` object, 55 | which is a subclass of `str` (meaning it's compatible anywhere `str` is accepted). 56 | This helps keep keys the same round-trip, rather than converting unquoted identifiers into 57 | strings: 58 | 59 | ```python 60 | >>> text = '{bacon: "eggs"}' 61 | >>> json5.dumps(json5.loads(text)) == text 62 | True 63 | ``` 64 | 65 | You can change this behavior with the `parse_json5_identifiers` argument with a callable that receives the `JsonIdentifier` object 66 | and its return value is used instead. For example, you can specify `parse_json5_identifiers=str` to convert identifiers 67 | to strings. 68 | 69 | ```python 70 | >>> json5.dumps(json5.loads(text, parse_json5_identifiers=str)) 71 | '{"bacon": "eggs"}' 72 | ``` 73 | 74 | 75 | ## Custom loaders; Abstract JSON5 Models 76 | 77 | **Note:** the underlying model API and tokens are not stable and are subject to breaking changes, even in minor releases. 78 | 79 | json-five also features an API for representing JSON5 as an abstract model. This enables a wide degree of capabilities for 80 | various use-cases, such as linters, formatters, custom serialization/deserialization, and more. 81 | 82 | 83 | Example: a simple model 84 | 85 | ```python 86 | from json5.loader import loads, ModelLoader 87 | json_string = """{foo: "bar"}""" 88 | model = loads(json_string, loader=ModelLoader()) 89 | ``` 90 | The resulting model object looks something like this: 91 | ```python 92 | JSONText( 93 | value=JSONObject( 94 | keys=[Identifier(name="foo", raw_value="foo")], 95 | values=[DoubleQuotedString(characters="bar", raw_value='"bar"')], 96 | trailing_comma=None, 97 | ) 98 | ) 99 | ``` 100 | 101 | 102 | It is possible to make edits to the model, which will affect the output when dumped using the model dumper. However, 103 | there is (currently) no validation to ensure your model edits won't result in invalid JSON5 when dumped. 104 | 105 | You may also implement custom loaders and dumpers to control serialization and deserialization. See the [full documentation](https://json-five.readthedocs.io/en/latest/extending.html#custom-loaders-and-dumpers) 106 | for more information. 107 | 108 | ## Tokenization 109 | 110 | You can also leverage tokenization of JSON5: 111 | 112 | ```python 113 | from json5.tokenizer import tokenize 114 | 115 | json_string = """{foo: "bar"}""" 116 | for tok in tokenize(json_string): 117 | print(tok.type) 118 | ``` 119 | Output would be: 120 | ```text 121 | LBRACE 122 | NAME 123 | COLON 124 | WHITESPACE 125 | DOUBLE_QUOTE_STRING 126 | RBRACE 127 | ``` 128 | 129 | # Status 130 | 131 | This project currently fully supports the JSON5 spec and its interfaces for loading and dumping JSON5 is stable as of v1.0.0. 132 | There is still active development underway, particularly for the underlying abstract JSON5 model representations and 133 | ability to perform edits using the abstract model. 134 | 135 | In the future, this project may take advantage of its sister project currently being developed in Rust: [json-five-rs](https://github.com/spyoungtech/json-five-rs). 136 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/QuickStart.rst: -------------------------------------------------------------------------------- 1 | QuickStart 2 | ========== 3 | 4 | Installation 5 | ------------ 6 | 7 | .. code-block:: 8 | 9 | pip install json-five 10 | 11 | 12 | Basic Usage 13 | ----------- 14 | 15 | Suppose you have a JSON5 compliant file ``my-json-file.json`` 16 | 17 | .. code-block:: 18 | 19 | // This is a JSON5 file! 20 | {'foo': 'bar'} 21 | 22 | You can load this file to Python like so: 23 | 24 | .. code-block:: 25 | 26 | import json5 27 | with open('my-json-file.json') as f: 28 | data = json5.load(f) 29 | 30 | You can also work directly with strings 31 | 32 | .. code-block:: 33 | 34 | import json5 35 | json_string = '{json5 /* identifiers dont need quotes */: "values do though"}' 36 | data = json5.loads(json_string) 37 | 38 | 39 | Want to do more? Check out :doc:`/extending` to dive deeper! 40 | -------------------------------------------------------------------------------- /docs/comments.rst: -------------------------------------------------------------------------------- 1 | Working with comments; round-trip support 2 | ========================================= 3 | 4 | In order to work with comments, you must work with the raw model. 5 | 6 | Each node in the model has two special attributes: ``.wsc_before`` and ``.wsc_after``. These attributes are a list of 7 | any whitespace or comments that appear before or after the node. 8 | 9 | .. code-block:: 10 | 11 | from json5.loader import loads, ModelLoader 12 | from json5.dumper import dumps, ModelDumper 13 | from json5.model import BlockComment 14 | json_string = """{"foo": "bar"}""" 15 | model = loads(json_string, loader=ModelLoader()) 16 | print(model.value.key_value_pairs[0].value.wsc_before) # [' '] 17 | model.value.key_value_pairs[0].key.wsc_before.append(BlockComment("/* comment */")) 18 | dumps(model, dumper=ModelDumper()) # '{/* comment */"foo": "bar"}' 19 | 20 | 21 | This section will be expanded with time, the API for working with comments will likely change alot in future 22 | versions. 23 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # This file only contains a selection of the most common options. For a full 4 | # list see the documentation: 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 6 | # -- Path setup -------------------------------------------------------------- 7 | # If extensions (or modules to document with autodoc) are in another directory, 8 | # add these directories to sys.path here. If the directory is relative to the 9 | # documentation root, use os.path.abspath to make it absolute, like shown here. 10 | # 11 | # import os 12 | # import sys 13 | # sys.path.insert(0, os.path.abspath('.')) 14 | # -- Project information ----------------------------------------------------- 15 | 16 | project = 'json-five' 17 | copyright = '2020, Spencer Phillip Young' 18 | author = 'Spencer Phillip Young' 19 | 20 | # The full version, including alpha/beta/rc tags 21 | release = '0.1.0' 22 | 23 | 24 | # -- General configuration --------------------------------------------------- 25 | 26 | # Add any Sphinx extension module names here, as strings. They can be 27 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 28 | # ones. 29 | extensions = [] 30 | 31 | # Add any paths that contain templates here, relative to this directory. 32 | templates_path = ['_templates'] 33 | 34 | # List of patterns, relative to source directory, that match files and 35 | # directories to ignore when looking for source files. 36 | # This pattern also affects html_static_path and html_extra_path. 37 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] 38 | 39 | 40 | # -- Options for HTML output ------------------------------------------------- 41 | 42 | # The theme to use for HTML and HTML Help pages. See the documentation for 43 | # a list of builtin themes. 44 | # 45 | html_theme = 'sphinx_rtd_theme' 46 | 47 | # Add any paths that contain custom static files (such as style sheets) here, 48 | # relative to this directory. They are copied after the builtin static files, 49 | # so a file named "default.css" will overwrite the builtin "default.css". 50 | html_static_path = ['_static'] 51 | -------------------------------------------------------------------------------- /docs/docrequirements.txt: -------------------------------------------------------------------------------- 1 | sphinx==3.0.3 2 | sphinx_rtd_theme==0.4.3 3 | -------------------------------------------------------------------------------- /docs/extending.rst: -------------------------------------------------------------------------------- 1 | Extending json-five 2 | =================== 3 | 4 | 5 | The ``json`` way 6 | ---------------- 7 | 8 | 9 | ``json5.load`` and ``json5.loads`` support a similar interface to the stdlib ``json`` module. Specifically, 10 | you can provide the following arguments that have the same meaning as in ``json.load``: 11 | 12 | - ``parse_int`` 13 | - ``parse_float`` 14 | - ``parse_constant`` 15 | - ``object_hook`` 16 | - ``object_pairs_hook`` 17 | 18 | This is convenient if you have existing code that uses these arguments with the ``json`` module, but want to also 19 | support JSON5. These options are also useful as a simple way to customize parsing of json types. 20 | 21 | Additionally, a new hook keyword argument, ``parse_json5_identifiers``, is available to help users control the 22 | output of parsing identifiers. By default, JSON5 Identifiers in object keys are returned as a ``JsonIdentifier`` object, 23 | which is a subclass of ``str`` (meaning it's compatible anywhere ``str`` is accepted). 24 | This helps keep keys the same round-trip, rather than converting unquoted identifiers into quoted strings, such that 25 | ``dumps(loads(text)) == text`` (in this case). 26 | 27 | You can change this behavior with the ``parse_json5_identifiers`` keyword argument with a callable that receives the `JsonIdentifier` object 28 | and its return value is used instead. For example, you can specify ``parse_json5_identifiers=str`` to convert identifiers 29 | to normal strings, such that ``dumps(loads('{foo: "bar"}')) == '{"foo": "bar"}'``. 30 | 31 | However, this package does not support the ``cls`` keyword found in the standard library ``json`` module. 32 | If you want to implement custom serializers/deserializers, read on about custom loaders/dumpers. 33 | 34 | 35 | Custom Loaders and Dumpers 36 | -------------------------- 37 | 38 | This package uses "Loaders" as part of the deserialization of JSON text to Python. "Dumpers" are used to 39 | serialize Python objects to JSON text. 40 | 41 | The entry points for loaders and dumpers are the ``load`` and ``dump`` methods, respectively. 42 | You can override these methods to implement custom loading of models or dumping of objects. 43 | 44 | Extending the default loader 45 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 46 | 47 | The default loader takes in a model and produces, in the default case, Python objects. 48 | 49 | As a simple example, you can extend the default loader with your own to customize loading of lists. Here, 50 | I'll create a custom loader that, when it encounters an array (``json5.model.JSONArray``) with with only one value, it will return 51 | the single value, rather than a single-item array. 52 | 53 | .. code-block:: 54 | 55 | from json5.loader import DefaultLoader, loads 56 | from json5.model import JSONArray 57 | 58 | 59 | class MyCustomLoader(DefaultLoader): 60 | def load(self, node): 61 | if isinstance(node, JSONArray): 62 | return self.json_array_to_python(node) 63 | else: 64 | return super().load(node) 65 | 66 | def json_array_to_python(self, node): 67 | if len(node.values) == 1: 68 | return self.load(node.values[0]) 69 | else: 70 | return super().json_array_to_python(node) 71 | 72 | The ``loads`` function accepts a ``loader`` keyword argument, where the custom loader can be passed in. 73 | 74 | .. code-block:: 75 | 76 | json_string = "{foo: ['bar', 'baz'], bacon: ['eggs']}" 77 | loads(json_string) # Using the regular default loader 78 | # {'foo': ['bar', 'baz'], 'bacon': ['eggs']} 79 | 80 | loads(json_string, loader=MyCustomLoader()) # use the custom loader instead 81 | # {'foo': ['bar', 'baz'], 'bacon': 'eggs'} 82 | 83 | 84 | Extending the default dumper 85 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 86 | 87 | Extending the dumper follows a similar principle as extending the loader. 88 | 89 | As an example, I'll make a custom dumper that dumps booleans ``True`` and ``False`` to integers instead of the 90 | JSON ``true`` or ``false``. 91 | 92 | .. code-block:: 93 | 94 | from json5.dumper import DefaultDumper, dumps 95 | 96 | class MyCustomDumper(DefaultDumper): 97 | def dump(self, node): 98 | if isinstance(node, bool): 99 | return self.bool_to_json(node) 100 | else: 101 | return super().dump(node) 102 | 103 | def bool_to_json(self, node): 104 | super().dump(int(node.value)) 105 | 106 | And you can see the effects 107 | 108 | .. code-block:: 109 | 110 | >>> dumps([True, False]) 111 | '[true, false]' 112 | >>> dumps([True, False], dumper=MyCustomDumper()) 113 | '[1, 2]' 114 | 115 | 116 | Other loaders/dumpers and tools 117 | ------------------------------- 118 | 119 | Besides the default loader, there is also the ``ModelLoader`` which simply returns the raw model 120 | with no additional processing. 121 | 122 | Besides the default dumper, there is also the ``ModelDumper`` which takes a model and serializes it to JSON. 123 | 124 | The ``json5.dumper.modelize`` function can take python objects and convert them to a model. 125 | 126 | 127 | .. code-block:: 128 | 129 | from json5.dumper import modelize 130 | obj = ['foo', 123, True] 131 | modelize(obj) 132 | 133 | The resulting model: 134 | 135 | .. code-block:: 136 | 137 | JSONArray( 138 | values=[ 139 | SingleQuotedString(characters='foo', raw_value="'foo'"), 140 | Integer(raw_value='123', value=123, is_hex=False), 141 | BooleanLiteral(value=True), 142 | ], 143 | trailing_comma=None, 144 | ) 145 | -------------------------------------------------------------------------------- /docs/how.rst: -------------------------------------------------------------------------------- 1 | How this package works 2 | ====================== 3 | 4 | This is an overview of how the internals of this package work. The code demonstrated here is not 5 | necessarily intended to be used by users! 6 | 7 | If you're wondering how to use this package, see :doc:`/QuickStart` instead. 8 | 9 | 10 | 11 | Deserializing JSON to Python; the journey 12 | ----------------------------------------- 13 | 14 | The first step in deserialization is tokenizing. Text, assuming it is conforming to the JSON5 spec, 15 | is parsed into _tokens_. The tokens are then _parsed_ to produce a representative _model_ of the JSON structure. 16 | Finally, that model is _loaded_ where each node in the model is turned into an instance of a Python data type. 17 | 18 | Let's explore this process interactively. 19 | 20 | tokenizing 21 | ^^^^^^^^^^ 22 | 23 | Tokenizing is the first step in turning JSON text into Python objects. Let's look at tokenizing 24 | a very simple empty JSON object ``{}`` 25 | 26 | .. code-block:: 27 | 28 | >>> from json5.tokenizer import tokenize 29 | >>> json_string = "{}" 30 | >>> tokens = tokenize(json_string) 31 | >>> for token in tokens: 32 | ... print(token) 33 | ... 34 | Token(type='LBRACE', value='{', lineno=1, index=0) 35 | Token(type='RBRACE', value='}', lineno=1, index=1) 36 | 37 | As you can see, this broke down into two tokens: the left brace and the right brace. 38 | 39 | For good measure, let's see a slightly more complex tokenization 40 | 41 | .. code-block:: 42 | 43 | for token in tokenize("{foo: 'bar'}"): 44 | print(token) 45 | 46 | Token(type='LBRACE', value='{', lineno=1, index=0) 47 | Token(type='NAME', value='foo', lineno=1, index=1) 48 | Token(type='COLON', value=':', lineno=1, index=4) 49 | Token(type='WHITESPACE', value=' ', lineno=1, index=5) 50 | Token(type='SINGLE_QUOTE_STRING', value="'bar'", lineno=1, index=6) 51 | Token(type='RBRACE', value='}', lineno=1, index=11) 52 | 53 | These tokens will be used to build a model in the next step. 54 | 55 | 56 | Parsing and models 57 | ^^^^^^^^^^^^^^^^^^ 58 | 59 | As the text is processed into tokens, the stream of tokens is parsed into a model representing the JSON structure. 60 | 61 | Let's start with the same simple example of an empty JSON object ``{}`` 62 | 63 | .. code-block:: 64 | 65 | >>> from json5.tokenizer import tokenize 66 | >>> from json5.parser import parse_tokens 67 | >>> tokens = tokenize("{}") 68 | >>> model = parse_tokens(tokens) 69 | >>> model 70 | JSONText(value=JSONObject(key_value_pairs=[], trailing_comma=None)) 71 | 72 | The tokens were parsed to produce a model. Each production (part) in the model more or less represents a part of the 73 | `JSON5 grammar`_. ``JSONText`` is always the root production of the model for any JSON5 document. 74 | 75 | Let's look at a more complex model for the JSON text ``{foo: 0xC0FFEE}`` -- This model has been 'prettified' for this doc: 76 | 77 | .. code-block:: 78 | 79 | JSONText( 80 | value=JSONObject( 81 | key_value_pairs=[ 82 | KeyValuePair( 83 | key=Identifier(name='foo'), 84 | value=Integer(raw_value='0xC0FFEE', value=12648430, is_hex=True), 85 | ) 86 | ], 87 | trailing_comma=None, 88 | ) 89 | ) 90 | 91 | 92 | You can also build model objects 'manually' without any source text. 93 | 94 | .. code-block:: 95 | 96 | from json5.model import * 97 | model = JSONText(value=JSONObject(KeyValuePair(key=Identifier('bacon'), value=Infinity()))) 98 | 99 | 100 | Loading 101 | ^^^^^^^ 102 | 103 | Once we have a model in-hand, we can use it to generate Python object representation from the model. To do this, 104 | specialized classes, called Loaders, are used. Loaders take a model and produce something else, like Python data types. 105 | 106 | 107 | In this example, we'll just create a model instead of parsing one from text and turn it into Python using the 108 | default loader (the default loader is used when calling ``loads`` by default. 109 | 110 | .. code-block:: 111 | 112 | >>> from json5.loader import DefaultLoader 113 | >>> from json5.model import * 114 | >>> loader = DefaultLoader() 115 | >>> model = JSONText(value=JSONObject(KeyValuePair(key=Identifier('bacon'), value=Infinity()))) 116 | >>> loader.load(model) 117 | {'bacon': inf} 118 | 119 | 120 | 121 | Serializing to JSON 122 | ------------------- 123 | 124 | Objects can be serialized to JSON using _dumpers_. A dumper takes and object and writes JSON text representing the object. 125 | The default dumper dumps python objects directly to JSON text. 126 | 127 | .. code-block:: 128 | 129 | >>> from json5 import dumps 130 | >>> dumps(['foo', 'bar', 'baz']) 131 | '["foo", "bar", "baz"]' 132 | 133 | 134 | 135 | .. _JSON5 grammar: https://spec.json5.org/#grammar 136 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. json-five documentation master file, created by 2 | sphinx-quickstart on Tue May 19 18:32:47 2020. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to json-five's documentation! 7 | ===================================== 8 | 9 | `GitHub`_ 10 | 11 | .. _GitHub: https://github.com/spyoungtech/json-five 12 | 13 | .. toctree:: 14 | :maxdepth: 2 15 | :caption: Contents: 16 | 17 | QuickStart 18 | extending 19 | how 20 | comments 21 | 22 | Indices and tables 23 | ================== 24 | 25 | * :ref:`genindex` 26 | * :ref:`modindex` 27 | * :ref:`search` 28 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /json5/__init__.py: -------------------------------------------------------------------------------- 1 | from .dumper import dump 2 | from .dumper import dumps 3 | from .loader import JsonIdentifier 4 | from .loader import load 5 | from .loader import loads 6 | from .utils import JSON5DecodeError 7 | 8 | __all__ = ['dump', 'dumps', 'load', 'loads', 'JSON5DecodeError', 'JsonIdentifier'] 9 | -------------------------------------------------------------------------------- /json5/dumper.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import io 4 | import json 5 | import math 6 | import typing 7 | from abc import abstractmethod 8 | from functools import singledispatchmethod 9 | from typing import Any 10 | 11 | from .loader import JsonIdentifier 12 | from .model import BlockComment 13 | from .model import BooleanLiteral 14 | from .model import Comment 15 | from .model import DoubleQuotedString 16 | from .model import Float 17 | from .model import Identifier 18 | from .model import Infinity 19 | from .model import Integer 20 | from .model import JSONArray 21 | from .model import JSONObject 22 | from .model import JSONText 23 | from .model import KeyValuePair 24 | from .model import LineComment 25 | from .model import NaN 26 | from .model import Node 27 | from .model import NullLiteral 28 | from .model import SingleQuotedString 29 | from .model import String 30 | from .model import TrailingComma 31 | from .model import UnaryOp 32 | from .model import Value 33 | 34 | 35 | class Environment: 36 | def __init__(self) -> None: 37 | self.outfile: typing.TextIO = io.StringIO() 38 | self.indent_level: int = 0 39 | self.indent: int = 0 40 | 41 | def write(self, s: str, indent: int | None = None) -> None: 42 | if indent is None: 43 | indent = self.indent_level 44 | whitespace = ' ' * self.indent * indent 45 | s = f'{whitespace}{s}' 46 | self.outfile.write(s) 47 | 48 | 49 | def dump(obj: Any, f: typing.TextIO, **kwargs: Any) -> int: 50 | text = dumps(obj, **kwargs) 51 | return f.write(text) 52 | 53 | 54 | def dumps(obj: Any, dumper: BaseDumper | None = None, indent: int = 0) -> str: 55 | env = Environment() 56 | env.indent = indent 57 | if dumper is None: 58 | dumper = DefaultDumper(env=env) 59 | dumper.dump(obj) 60 | dumper.env.outfile.seek(0) 61 | ret: str = dumper.env.outfile.read() 62 | return ret 63 | 64 | 65 | class BaseDumper: 66 | def __init__(self, env: Environment | None = None): 67 | if env is None: 68 | env = Environment() 69 | self.env = env 70 | 71 | @singledispatchmethod 72 | @abstractmethod 73 | def dump(self, obj: Any) -> Any: 74 | return NotImplemented 75 | 76 | 77 | class DefaultDumper(BaseDumper): 78 | """ 79 | Dump Python objects to a JSON string 80 | """ 81 | 82 | @singledispatchmethod 83 | def dump(self, obj: Any) -> Any: 84 | raise NotImplementedError(f"Cannot dump node {repr(obj)}") 85 | 86 | to_json = dump.register 87 | 88 | @to_json(dict) 89 | def dict_to_json(self, d: dict[Any, Any]) -> Any: 90 | self.env.write('{', indent=0) 91 | if self.env.indent: 92 | self.env.write('\n', indent=0) 93 | self.env.indent_level += 1 94 | index = 0 95 | for index, (key, value) in enumerate(d.items(), start=1): 96 | if self.env.indent: 97 | self.env.write('') 98 | self.dump(key) 99 | self.env.write(': ', indent=0) 100 | self.dump(value) 101 | if index == len(d): 102 | break 103 | if self.env.indent: 104 | self.env.write(',', indent=0) 105 | self.env.write('\n', indent=0) 106 | else: 107 | self.env.write(', ', indent=0) 108 | 109 | if self.env.indent: 110 | self.env.indent_level -= 1 111 | if index != 0: 112 | self.env.write('\n', indent=0) 113 | self.env.write('}') 114 | else: 115 | self.env.write('}', indent=0) 116 | 117 | @to_json(int) 118 | def int_to_json(self, i: int) -> Any: 119 | self.env.write(str(i), indent=0) 120 | 121 | @to_json(JsonIdentifier) 122 | def identifier_to_json(self, s: JsonIdentifier) -> Any: 123 | self.env.write(s, indent=0) 124 | 125 | @to_json(str) 126 | def str_to_json(self, s: str) -> Any: 127 | self.env.write(json.dumps(s), indent=0) 128 | 129 | @to_json(list) 130 | def list_to_json(self, the_list: list[Any]) -> Any: 131 | self.env.write('[', indent=0) 132 | if self.env.indent: 133 | self.env.indent_level += 1 134 | self.env.write('\n', indent=0) 135 | list_length = len(the_list) 136 | for index, item in enumerate(the_list, start=1): 137 | if self.env.indent: 138 | self.env.write('') 139 | self.dump(item) 140 | if index != list_length: 141 | if self.env.indent: 142 | self.env.write(',', indent=0) 143 | else: 144 | self.env.write(', ', indent=0) 145 | if self.env.indent: 146 | self.env.write('\n', indent=0) 147 | if self.env.indent: 148 | self.env.indent_level -= 1 149 | self.env.write(']') 150 | 151 | @to_json(float) 152 | def float_to_json(self, f: float) -> Any: 153 | if f == math.inf: 154 | self.env.write('Infinity', indent=0) 155 | elif f == -math.inf: 156 | self.env.write('-Infinity', indent=0) 157 | elif f is math.nan: 158 | self.env.write('NaN', indent=0) 159 | else: 160 | self.env.write(str(f), indent=0) 161 | 162 | @to_json(bool) 163 | def bool_to_json(self, b: bool) -> Any: 164 | self.env.write(str(b).lower(), indent=0) 165 | 166 | @to_json(type(None)) 167 | def none_to_json(self, _: Any) -> Any: 168 | self.env.write('null', indent=0) 169 | 170 | 171 | class ModelDumper: 172 | """ 173 | Dump a model to a JSON string 174 | """ 175 | 176 | def __init__(self, env: Environment | None = None): 177 | # any provided environment is ignored 178 | self.env = Environment() 179 | 180 | def process_wsc_before(self, node: Node) -> None: 181 | for wsc in node.wsc_before: 182 | if isinstance(wsc, Comment): 183 | self.dump(wsc) 184 | elif isinstance(wsc, str): 185 | self.env.write(wsc) 186 | else: 187 | raise ValueError(f"Did not expect {type(node)}") 188 | 189 | def process_wsc_after(self, node: Node) -> None: 190 | for wsc in node.wsc_after: 191 | if isinstance(wsc, Comment): 192 | self.dump(wsc) 193 | elif isinstance(wsc, str): 194 | self.env.write(wsc) 195 | else: 196 | raise ValueError(f"Did not expect {type(wsc)}") 197 | 198 | def process_leading_wsc(self, node: JSONObject | JSONArray) -> None: 199 | for wsc in node.leading_wsc: 200 | if isinstance(wsc, Comment): 201 | self.dump(wsc) 202 | elif isinstance(wsc, str): 203 | self.env.write(wsc) 204 | else: 205 | raise ValueError(f"Did not expect {type(wsc)}") 206 | 207 | @singledispatchmethod 208 | def dump(self, node: Node) -> Any: 209 | raise NotImplementedError('foo') 210 | 211 | to_json = dump.register 212 | 213 | @to_json(JSONText) 214 | def json_model_to_json(self, node: JSONText) -> Any: 215 | self.process_wsc_before(node) 216 | self.dump(node.value) 217 | self.process_wsc_after(node) 218 | 219 | @to_json(JSONObject) 220 | def json_object_to_json(self, node: JSONObject) -> Any: 221 | self.process_wsc_before(node) 222 | self.env.write('{') 223 | if node.leading_wsc: 224 | self.process_leading_wsc(node) 225 | key_value_pairs = node.key_value_pairs 226 | num_pairs = len(key_value_pairs) 227 | for index, kvp in enumerate(key_value_pairs, start=1): 228 | self.dump(kvp.key) 229 | self.env.write(':') 230 | self.dump(kvp.value) 231 | if index != num_pairs: 232 | self.env.write(',') 233 | if node.trailing_comma: 234 | self.dump(node.trailing_comma) 235 | self.env.write('}') 236 | self.process_wsc_after(node) 237 | 238 | @to_json(JSONArray) 239 | def json_array_to_json(self, node: JSONArray) -> Any: 240 | self.process_wsc_before(node) 241 | self.env.write('[') 242 | if node.leading_wsc: 243 | self.process_leading_wsc(node) 244 | for index, value in enumerate(node.values, start=1): 245 | self.dump(value) 246 | if index != len(node.values): 247 | self.env.write(',') 248 | if node.trailing_comma: 249 | self.dump(node.trailing_comma) 250 | self.env.write(']') 251 | self.process_wsc_after(node) 252 | 253 | @to_json(Identifier) 254 | def identifier_to_json(self, node: Identifier) -> Any: 255 | self.process_wsc_before(node) 256 | self.env.write(node.raw_value) 257 | self.process_wsc_after(node) 258 | 259 | @to_json(Integer) 260 | def integer_to_json(self, node: Integer) -> Any: 261 | self.process_wsc_before(node) 262 | self.env.write(node.raw_value) 263 | self.process_wsc_after(node) 264 | 265 | @to_json(Float) 266 | def float_to_json(self, node: Float) -> Any: 267 | self.process_wsc_before(node) 268 | self.env.write(node.raw_value) 269 | self.process_wsc_after(node) 270 | 271 | @to_json(UnaryOp) 272 | def unary_to_json(self, node: UnaryOp) -> Any: 273 | self.process_wsc_before(node) 274 | self.env.write(node.op) 275 | self.dump(node.value) 276 | self.process_wsc_after(node) 277 | 278 | @to_json(String) 279 | def string_to_json(self, node: SingleQuotedString | DoubleQuotedString) -> Any: 280 | self.process_wsc_before(node) 281 | self.env.write(node.raw_value) # The original value, including any escape sequences or line continuations 282 | self.process_wsc_after(node) 283 | 284 | @to_json(NullLiteral) 285 | def null_to_json(self, node: NullLiteral) -> Any: 286 | self.process_wsc_before(node) 287 | self.env.write('null') 288 | self.process_wsc_after(node) 289 | 290 | @to_json(BooleanLiteral) 291 | def boolean_to_json(self, node: BooleanLiteral) -> Any: 292 | self.process_wsc_before(node) 293 | if node.value: 294 | self.env.write('true') 295 | else: 296 | self.env.write('false') 297 | self.process_wsc_after(node) 298 | 299 | @to_json(LineComment) 300 | def line_comment_to_json(self, node: LineComment) -> Any: 301 | self.process_wsc_before(node) 302 | self.env.write(node.value) 303 | self.process_wsc_after(node) 304 | 305 | @to_json(BlockComment) 306 | def block_comment_to_json(self, node: BlockComment) -> Any: 307 | self.process_wsc_before(node) 308 | self.env.write(node.value) 309 | self.process_wsc_after(node) 310 | 311 | @to_json(TrailingComma) 312 | def trailing_comma_to_json(self, node: TrailingComma) -> Any: 313 | self.process_wsc_before(node) 314 | self.env.write(',') 315 | self.process_wsc_after(node) 316 | 317 | @to_json(Infinity) 318 | def infinity_to_json(self, node: Infinity) -> Any: 319 | self.process_wsc_before(node) 320 | 321 | self.env.write('Infinity') 322 | self.process_wsc_after(node) 323 | 324 | @to_json(NaN) 325 | def nan_to_json(self, node: NaN) -> Any: 326 | self.process_wsc_before(node) 327 | self.env.write('NaN') 328 | self.process_wsc_after(node) 329 | 330 | 331 | class Modelizer: 332 | """ 333 | Turn Python objects into a model 334 | """ 335 | 336 | @singledispatchmethod 337 | def modelize(self, obj: Any) -> Node: 338 | raise NotImplementedError(f"Cannot modelize object of type {type(obj)}") 339 | 340 | to_model = modelize.register 341 | 342 | @to_model(str) 343 | def str_to_model(self, s: str) -> SingleQuotedString | DoubleQuotedString: 344 | if repr(s).startswith("'"): 345 | return SingleQuotedString(s, raw_value=repr(s)) 346 | else: 347 | return DoubleQuotedString(s, raw_value=repr(s)) 348 | 349 | @to_model(dict) 350 | def dict_to_model(self, d: dict[Any, Any]) -> JSONObject: 351 | kvps: list[KeyValuePair] = [] 352 | for key, value in d.items(): 353 | kvp = KeyValuePair(key=self.modelize(key), value=self.modelize(value)) # type: ignore[arg-type] 354 | kvps.append(kvp) 355 | return JSONObject(*kvps) 356 | 357 | @to_model(list) 358 | def list_to_model(self, lst: list[Any]) -> JSONArray: 359 | list_values: list[Value] = [] 360 | for v in lst: 361 | list_values.append(self.modelize(v)) # type: ignore[arg-type] 362 | return JSONArray(*list_values) 363 | 364 | @to_model(int) 365 | def int_to_model(self, i: int) -> Integer: 366 | return Integer(str(i)) 367 | 368 | @to_model(float) 369 | def float_to_model(self, f: float) -> Infinity | NaN | Float | UnaryOp: 370 | if f == math.inf: 371 | return Infinity() 372 | elif f == -math.inf: 373 | return UnaryOp('-', Infinity()) 374 | elif f is math.nan: 375 | return NaN() 376 | else: 377 | return Float(str(f)) 378 | 379 | @to_model(bool) 380 | def bool_to_model(self, b: bool) -> BooleanLiteral: 381 | return BooleanLiteral(b) 382 | 383 | @to_model(type(None)) 384 | def none_to_model(self, _: Any) -> NullLiteral: 385 | return NullLiteral() 386 | 387 | 388 | def modelize(obj: Any) -> Node: 389 | """ 390 | 391 | :param obj: a python object 392 | :return: a model representing the python object 393 | """ 394 | return Modelizer().modelize(obj) 395 | -------------------------------------------------------------------------------- /json5/loader.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import logging 4 | import typing 5 | from abc import abstractmethod 6 | from functools import singledispatchmethod 7 | from typing import Callable 8 | from typing import Literal 9 | 10 | from .model import BooleanLiteral 11 | from .model import Comment 12 | from .model import DoubleQuotedString 13 | from .model import Float 14 | from .model import Identifier 15 | from .model import Infinity 16 | from .model import Integer 17 | from .model import JSONArray 18 | from .model import JSONObject 19 | from .model import JSONText 20 | from .model import NaN 21 | from .model import Node 22 | from .model import NullLiteral 23 | from .model import SingleQuotedString 24 | from .model import String 25 | from .model import UnaryOp 26 | from .parser import parse_source 27 | 28 | logger = logging.getLogger(__name__) 29 | # logger.setLevel(level=logging.DEBUG) 30 | # logger.addHandler(logging.StreamHandler(stream=sys.stderr)) 31 | 32 | 33 | class Environment: 34 | def __init__( 35 | self, 36 | object_hook: Callable[[dict[typing.Any, typing.Any]], typing.Any] | None = None, 37 | parse_float: Callable[[str], typing.Any] | None = None, 38 | parse_int: Callable[[str], typing.Any] | None = None, 39 | parse_constant: Callable[[Literal['-Infinity', 'Infinity', 'NaN']], typing.Any] | None = None, 40 | strict: bool = True, 41 | object_pairs_hook: Callable[[list[tuple[str | JsonIdentifier, typing.Any]]], typing.Any] | None = None, 42 | parse_json5_identifiers: Callable[[JsonIdentifier], typing.Any] | None = None, 43 | ): 44 | self.object_hook: Callable[[dict[typing.Any, typing.Any]], typing.Any] | None = object_hook 45 | self.parse_float: Callable[[str], typing.Any] | None = parse_float 46 | self.parse_int: Callable[[str], typing.Any] | None = parse_int 47 | self.parse_constant: Callable[[Literal['-Infinity', 'Infinity', 'NaN']], typing.Any] | None = parse_constant 48 | self.strict: bool = strict 49 | self.object_pairs_hook: None | ( 50 | Callable[[list[tuple[str | JsonIdentifier, typing.Any]]], typing.Any] 51 | ) = object_pairs_hook 52 | self.parse_json5_identifiers: Callable[[JsonIdentifier], typing.Any] | None = parse_json5_identifiers 53 | 54 | 55 | class JsonIdentifier(str): 56 | ... 57 | 58 | 59 | def load( 60 | f: typing.TextIO, 61 | *, 62 | loader: LoaderBase | None = None, 63 | object_hook: Callable[[dict[typing.Any, typing.Any]], typing.Any] | None = None, 64 | parse_float: Callable[[str], typing.Any] | None = None, 65 | parse_int: Callable[[str], typing.Any] | None = None, 66 | parse_constant: Callable[[Literal['-Infinity', 'Infinity', 'NaN']], typing.Any] | None = None, 67 | strict: bool = True, 68 | object_pairs_hook: Callable[[list[tuple[str | JsonIdentifier, typing.Any]]], typing.Any] | None = None, 69 | parse_json5_identifiers: Callable[[JsonIdentifier], typing.Any] | None = None, 70 | ) -> typing.Any: 71 | """ 72 | Like loads, but takes a file-like object with a read method. 73 | 74 | :param f: 75 | :param kwargs: 76 | :return: 77 | """ 78 | text = f.read() 79 | return loads( 80 | text, 81 | loader=loader, 82 | object_hook=object_hook, 83 | parse_float=parse_float, 84 | parse_int=parse_int, 85 | parse_constant=parse_constant, 86 | strict=strict, 87 | object_pairs_hook=object_pairs_hook, 88 | parse_json5_identifiers=parse_json5_identifiers, 89 | ) 90 | 91 | 92 | def loads( 93 | s: str, 94 | *, 95 | loader: LoaderBase | None = None, 96 | object_hook: Callable[[dict[typing.Any, typing.Any]], typing.Any] | None = None, 97 | parse_float: Callable[[str], typing.Any] | None = None, 98 | parse_int: Callable[[str], typing.Any] | None = None, 99 | parse_constant: Callable[[Literal['-Infinity', 'Infinity', 'NaN']], typing.Any] | None = None, 100 | strict: bool = True, 101 | object_pairs_hook: Callable[[list[tuple[str | JsonIdentifier, typing.Any]]], typing.Any] | None = None, 102 | parse_json5_identifiers: Callable[[JsonIdentifier], typing.Any] | None = None, 103 | ) -> typing.Any: 104 | """ 105 | Take a string of JSON text and deserialize it 106 | 107 | :param s: 108 | :param loader: The loader class to use 109 | :param object_hook: same meaning as in ``json.loads`` 110 | :param parse_float: same meaning as in ``json.loads`` 111 | :param parse_int: same meaning as in ``json.loads`` 112 | :param parse_constant: same meaning as in ``json.loads`` 113 | :param strict: same meaning as in ``json.loads`` (currently has no effect) 114 | :param object_pairs_hook: same meaning as in ``json.loads`` 115 | :param parse_json5_identifiers: callable that is passed a JsonIdentifer. The return value of the callable is used to load JSON Identifiers (unquoted keys) in JSON5 objects 116 | :return: 117 | """ 118 | model = parse_source(s) 119 | # logger.debug('Model is %r', model) 120 | if loader is None: 121 | loader = DefaultLoader( 122 | object_hook=object_hook, 123 | parse_float=parse_float, 124 | parse_int=parse_int, 125 | parse_constant=parse_constant, 126 | strict=strict, 127 | object_pairs_hook=object_pairs_hook, 128 | parse_json5_identifiers=parse_json5_identifiers, 129 | ) 130 | return loader.load(model) 131 | 132 | 133 | class LoaderBase: 134 | def __init__(self, env: Environment | None = None, **env_kwargs: typing.Any): 135 | if env is None: 136 | env = Environment(**env_kwargs) 137 | self.env: Environment = env 138 | 139 | @singledispatchmethod 140 | @abstractmethod 141 | def load(self, node: Node) -> typing.Any: 142 | return NotImplemented 143 | 144 | 145 | class DefaultLoader(LoaderBase): 146 | @singledispatchmethod 147 | def load(self, node: Node) -> typing.Any: 148 | raise NotImplementedError(f"Can't load node {node}") 149 | 150 | to_python = load.register 151 | 152 | @to_python(JSONText) 153 | def json_model_to_python(self, node: JSONText) -> typing.Any: 154 | logger.debug('json_model_to_python evaluating node %r', node) 155 | return self.load(node.value) 156 | 157 | @to_python(JSONObject) 158 | def json_object_to_python(self, node: JSONObject) -> typing.Any: 159 | logger.debug('json_object_to_python evaluating node %r', node) 160 | d = {} 161 | for key_value_pair in node.key_value_pairs: 162 | key = self.load(key_value_pair.key) 163 | value = self.load(key_value_pair.value) 164 | d[key] = value 165 | if self.env.object_pairs_hook: 166 | return self.env.object_pairs_hook(list(d.items())) 167 | elif self.env.object_hook: 168 | return self.env.object_hook(d) 169 | else: 170 | return d 171 | 172 | @to_python(JSONArray) 173 | def json_array_to_python(self, node: JSONArray) -> list[typing.Any]: 174 | logger.debug('json_array_to_python evaluating node %r', node) 175 | return [self.load(value) for value in node.values] 176 | 177 | @to_python(Identifier) 178 | def identifier_to_python(self, node: Identifier) -> typing.Any: 179 | logger.debug('identifier_to_python evaluating node %r', node) 180 | res = JsonIdentifier(node.name) 181 | if self.env.parse_json5_identifiers: 182 | return self.env.parse_json5_identifiers(res) 183 | return res 184 | 185 | @to_python(Infinity) # NaN/Infinity are covered here 186 | def inf_to_python(self, node: Infinity) -> typing.Any: 187 | logger.debug('inf_to_python evaluating node %r', node) 188 | if self.env.parse_constant: 189 | return self.env.parse_constant(node.const) 190 | return node.value 191 | 192 | @to_python(NaN) # NaN/Infinity are covered here 193 | def nan_to_python(self, node: NaN) -> typing.Any: 194 | logger.debug('nan_to_python evaluating node %r', node) 195 | if self.env.parse_constant: 196 | return self.env.parse_constant(node.const) 197 | return node.value 198 | 199 | @to_python(Integer) 200 | def integer_to_python(self, node: Integer) -> typing.Any: 201 | if self.env.parse_int: 202 | return self.env.parse_int(node.raw_value) 203 | else: 204 | return node.value 205 | 206 | @to_python(Float) 207 | def float_to_python(self, node: Float) -> typing.Any: 208 | if self.env.parse_float: 209 | return self.env.parse_float(node.raw_value) 210 | else: 211 | return node.value 212 | 213 | @to_python(UnaryOp) 214 | def unary_to_python(self, node: UnaryOp) -> typing.Any: 215 | logger.debug('unary_to_python evaluating node %r', node) 216 | if isinstance(node.value, Infinity): 217 | return self.load(node.value) 218 | value = self.load(node.value) 219 | if node.op == '-': 220 | return value * -1 221 | else: 222 | return value 223 | 224 | @to_python(String) 225 | def string_to_python(self, node: DoubleQuotedString | SingleQuotedString) -> str: 226 | logger.debug('string_to_python evaluating node %r', node) 227 | ret: str = node.characters 228 | return ret 229 | 230 | @to_python(NullLiteral) 231 | def null_to_python(self, node: NullLiteral) -> None: 232 | logger.debug('null_to_python evaluating node %r', node) 233 | return None 234 | 235 | @to_python(BooleanLiteral) 236 | def boolean_to_python(self, node: BooleanLiteral) -> bool: 237 | logger.debug('boolean_to_python evaluating node %r', node) 238 | return node.value 239 | 240 | @to_python(Comment) 241 | def comment_or_whitespace_to_python(self, node: Comment) -> typing.NoReturn: 242 | raise RuntimeError("Comments are not supported in the default loader!") 243 | 244 | 245 | class ModelLoader(LoaderBase): 246 | @singledispatchmethod 247 | def load(self, node: Node) -> typing.Any: 248 | return node 249 | -------------------------------------------------------------------------------- /json5/model.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import math 4 | import typing 5 | from collections import deque 6 | from typing import Any 7 | from typing import Literal 8 | from typing import NamedTuple 9 | 10 | from .tokenizer import JSON5Token 11 | 12 | __all__ = [ 13 | 'Node', 14 | 'JSONText', 15 | 'Value', 16 | 'Key', 17 | 'JSONObject', 18 | 'JSONArray', 19 | 'KeyValuePair', 20 | 'Identifier', 21 | 'Number', 22 | 'Integer', 23 | 'Float', 24 | 'Infinity', 25 | 'NaN', 26 | 'String', 27 | 'DoubleQuotedString', 28 | 'SingleQuotedString', 29 | 'BooleanLiteral', 30 | 'NullLiteral', 31 | 'UnaryOp', 32 | 'TrailingComma', 33 | 'Comment', 34 | 'LineComment', 35 | 'BlockComment', 36 | ] 37 | 38 | 39 | class KeyValuePair(NamedTuple): 40 | key: Key 41 | value: Value 42 | 43 | 44 | def walk(root: Node) -> typing.Generator[Node, None, None]: 45 | todo = deque([root]) 46 | while todo: 47 | node: Node = todo.popleft() 48 | todo.extend(iter_child_nodes(node)) 49 | yield node 50 | 51 | 52 | def iter_child_nodes(node: Node) -> typing.Generator[Node, None, None]: 53 | for attr, value in iter_fields(node): 54 | if isinstance(value, Node): 55 | yield value 56 | elif isinstance(value, list): 57 | for item in value: 58 | if isinstance(item, Node): 59 | yield item 60 | 61 | 62 | def iter_fields(node: Node) -> typing.Generator[tuple[str, Any], None, None]: 63 | for field_name in node._fields: 64 | try: 65 | value = getattr(node, field_name) 66 | yield field_name, value 67 | except AttributeError: 68 | pass 69 | 70 | 71 | class Node: 72 | excluded_names = ['excluded_names', 'wsc_before', 'wsc_after', 'leading_wsc', 'tok', 'end_tok'] 73 | 74 | def __init__(self, tok: JSON5Token | None = None, end_tok: JSON5Token | None = None): 75 | # Whitespace/Comments before/after the node 76 | self.wsc_before: list[str | Comment] = [] 77 | self.wsc_after: list[str | Comment] = [] 78 | self._tok: JSON5Token | None = tok 79 | self._end_tok: JSON5Token | None = end_tok 80 | 81 | @property 82 | def col_offset(self) -> int | None: 83 | if self._tok is None: 84 | return None 85 | return self._tok.colno 86 | 87 | @property 88 | def end_col_offset(self) -> int | None: 89 | if self._end_tok is None: 90 | return None 91 | return self._end_tok.end_colno 92 | 93 | @property 94 | def lineno(self) -> int | None: 95 | if self._tok is None: 96 | return None 97 | return self._tok.lineno 98 | 99 | @property 100 | def end_lineno(self) -> int | None: 101 | if self._end_tok is None: 102 | return None 103 | r = self._end_tok.end_lineno 104 | return r 105 | 106 | def __repr__(self) -> str: 107 | rep = ( 108 | f"{self.__class__.__name__}(" 109 | + ", ".join( 110 | f"{key}={repr(value)}" 111 | for key, value in self.__dict__.items() 112 | if not key.startswith('_') and key not in self.excluded_names 113 | ) 114 | + ")" 115 | ) 116 | return rep 117 | 118 | @property 119 | def _fields(self) -> list[str]: 120 | fields = [item for item in list(self.__dict__) if not item.startswith('_') and item not in self.excluded_names] 121 | fields.extend(['wsc_before', 'wsc_after']) 122 | return fields 123 | 124 | 125 | class JSONText(Node): 126 | def __init__(self, value: Value, tok: JSON5Token | None = None, end_tok: JSON5Token | None = None): 127 | assert isinstance(value, Value) 128 | self.value: Value = value 129 | super().__init__(tok=tok, end_tok=tok) 130 | 131 | 132 | class Value(Node): 133 | pass 134 | 135 | 136 | class Key(Node): 137 | ... 138 | 139 | 140 | class JSONObject(Value): 141 | def __init__( 142 | self, 143 | *key_value_pairs: KeyValuePair, 144 | trailing_comma: TrailingComma | None = None, 145 | leading_wsc: list[str | Comment] | None = None, 146 | tok: JSON5Token | None = None, 147 | end_tok: JSON5Token | None = None, 148 | ): 149 | keys: list[Key] = [] 150 | values: list[Value] = [] 151 | for key, value in key_value_pairs: 152 | assert isinstance(key, Key) 153 | assert isinstance(value, Value) 154 | keys.append(key) 155 | values.append(value) 156 | assert len(keys) == len(values) 157 | self.keys: list[Key] = keys 158 | self.values: list[Value] = values 159 | assert leading_wsc is None or all(isinstance(item, str) or isinstance(item, Comment) for item in leading_wsc) 160 | self.trailing_comma: TrailingComma | None = trailing_comma 161 | self.leading_wsc: list[str | Comment] = leading_wsc or [] 162 | 163 | super().__init__(tok=tok, end_tok=end_tok) 164 | 165 | @property 166 | def key_value_pairs(self) -> list[KeyValuePair]: 167 | return list(KeyValuePair(key, value) for key, value in zip(self.keys, self.values)) 168 | 169 | 170 | class JSONArray(Value): 171 | def __init__( 172 | self, 173 | *values: Value, 174 | trailing_comma: TrailingComma | None = None, 175 | leading_wsc: list[str | Comment] | None = None, 176 | tok: JSON5Token | None = None, 177 | end_tok: JSON5Token | None = None, 178 | ): 179 | vals = list(values) 180 | for value in vals: 181 | assert isinstance(value, Value), f"Was expecting object with type Value. Got {type(value)}" 182 | assert leading_wsc is None or all(isinstance(item, str) or isinstance(item, Comment) for item in leading_wsc) 183 | self.values: list[Value] = vals 184 | self.trailing_comma: TrailingComma | None = trailing_comma 185 | self.leading_wsc: list[str | Comment] = leading_wsc or [] 186 | 187 | super().__init__(tok=tok, end_tok=end_tok) 188 | 189 | 190 | class Identifier(Key): 191 | def __init__( 192 | self, name: str, raw_value: str | None = None, tok: JSON5Token | None = None, end_tok: JSON5Token | None = None 193 | ): 194 | assert isinstance(name, str) 195 | if raw_value is None: 196 | raw_value = name 197 | assert isinstance(raw_value, str) 198 | assert len(name) > 0 199 | self.name: str = name 200 | self.raw_value: str = raw_value 201 | 202 | super().__init__(tok=tok, end_tok=tok) 203 | 204 | def __hash__(self) -> int: 205 | return hash(self.name) 206 | 207 | def __eq__(self, other: Any) -> bool: 208 | return hash(self) == hash(other) 209 | 210 | 211 | class Number(Value): 212 | ... 213 | 214 | 215 | class Integer(Number): 216 | def __init__( 217 | self, 218 | raw_value: str, 219 | is_hex: bool = False, 220 | is_octal: bool = False, 221 | tok: JSON5Token | None = None, 222 | end_tok: JSON5Token | None = None, 223 | ): 224 | assert isinstance(raw_value, str) 225 | if is_hex and is_octal: 226 | raise ValueError("is_hex and is_octal are mutually exclusive") 227 | if is_hex: 228 | value = int(raw_value, 0) 229 | elif is_octal: 230 | if raw_value.startswith('0o'): 231 | value = int(raw_value, 8) 232 | else: 233 | value = int(raw_value.replace('0', '0o', 1), 8) 234 | else: 235 | value = int(raw_value) 236 | self.value: int = value 237 | self.raw_value: str = raw_value 238 | self.is_hex: bool = is_hex 239 | self.is_octal: bool = is_octal 240 | 241 | super().__init__(tok=tok, end_tok=end_tok or tok) 242 | 243 | 244 | class Float(Number): 245 | def __init__( 246 | self, 247 | raw_value: str, 248 | exp_notation: str | None = None, 249 | tok: JSON5Token | None = None, 250 | end_tok: JSON5Token | None = None, 251 | ): 252 | value = float(raw_value) 253 | assert exp_notation is None or exp_notation in ('e', 'E') 254 | self.raw_value: str = raw_value 255 | self.exp_notation: str | None = exp_notation 256 | 257 | self.value: float = value 258 | super().__init__(tok=tok, end_tok=end_tok or tok) 259 | 260 | 261 | class Infinity(Number): 262 | def __init__(self, negative: bool = False, tok: JSON5Token | None = None, end_tok: JSON5Token | None = None): 263 | self.negative: bool = negative 264 | 265 | super().__init__(tok=tok, end_tok=tok) 266 | 267 | @property 268 | def value(self) -> float: 269 | return math.inf if not self.negative else -math.inf 270 | 271 | @property 272 | def const(self) -> Literal['Infinity', '-Infinity']: 273 | if self.negative: 274 | return '-Infinity' 275 | else: 276 | return 'Infinity' 277 | 278 | 279 | class NaN(Number): 280 | def __init__(self, tok: JSON5Token | None = None, end_tok: JSON5Token | None = None): 281 | super().__init__(tok=tok, end_tok=tok) 282 | 283 | @property 284 | def value(self) -> float: 285 | return math.nan 286 | 287 | @property 288 | def const(self) -> Literal['NaN']: 289 | return 'NaN' 290 | 291 | 292 | class String(Value, Key): 293 | def __init__( 294 | self, characters: str, raw_value: str, tok: JSON5Token | None = None, end_tok: JSON5Token | None = None 295 | ): 296 | assert isinstance(raw_value, str) 297 | assert isinstance(characters, str) 298 | self.characters: str = characters 299 | self.raw_value: str = raw_value 300 | 301 | super().__init__(tok=tok, end_tok=tok) 302 | 303 | 304 | class DoubleQuotedString(String): 305 | ... 306 | 307 | 308 | class SingleQuotedString(String): 309 | ... 310 | 311 | 312 | class BooleanLiteral(Value): 313 | def __init__(self, value: bool, tok: JSON5Token | None = None, end_tok: JSON5Token | None = None): 314 | assert value in (True, False) 315 | self.value: bool = value 316 | 317 | super().__init__(tok=tok, end_tok=tok) 318 | 319 | 320 | class NullLiteral(Value): 321 | value = None 322 | 323 | def __init__(self, tok: JSON5Token | None = None, end_tok: JSON5Token | None = None): 324 | super().__init__(tok=tok, end_tok=tok) 325 | 326 | 327 | class UnaryOp(Value): 328 | def __init__( 329 | self, op: Literal['-', '+'], value: Number, tok: JSON5Token | None = None, end_tok: JSON5Token | None = None 330 | ): 331 | assert op in ('-', '+') 332 | assert isinstance(value, Number) 333 | self.op: Literal['-', '+'] = op 334 | self.value: Number = value 335 | 336 | super().__init__(tok=tok, end_tok=end_tok) 337 | 338 | 339 | class TrailingComma(Node): 340 | def __init__(self, tok: JSON5Token | None = None, end_tok: JSON5Token | None = None): 341 | super().__init__(tok=tok, end_tok=tok) # Trailing comma is always a single COMMA token 342 | 343 | 344 | class Comment(Node): 345 | def __init__(self, value: str, tok: JSON5Token | None = None, end_tok: JSON5Token | None = None): 346 | assert isinstance(value, str), f"Expected str got {type(value)}" 347 | self.value: str = value 348 | super().__init__(tok=tok, end_tok=tok) # Comments are always a single token 349 | 350 | 351 | class LineComment(Comment): 352 | ... 353 | 354 | 355 | class BlockComment(Comment): 356 | ... 357 | -------------------------------------------------------------------------------- /json5/parser.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import ast 4 | import sys 5 | import typing 6 | from functools import lru_cache 7 | from typing import Any 8 | from typing import Literal 9 | from typing import Protocol 10 | 11 | import regex as re 12 | from sly import Parser # type: ignore 13 | from sly.yacc import SlyLogger # type: ignore 14 | 15 | from .model import BlockComment 16 | from .model import BooleanLiteral 17 | from .model import Comment 18 | from .model import DoubleQuotedString 19 | from .model import Float 20 | from .model import Identifier 21 | from .model import Infinity 22 | from .model import Integer 23 | from .model import JSONArray 24 | from .model import JSONObject 25 | from .model import JSONText 26 | from .model import Key 27 | from .model import KeyValuePair 28 | from .model import LineComment 29 | from .model import NaN 30 | from .model import NullLiteral 31 | from .model import SingleQuotedString 32 | from .model import TrailingComma 33 | from .model import UnaryOp 34 | from .model import Value 35 | from .tokenizer import JSON5Token 36 | from .tokenizer import JSONLexer 37 | from .tokenizer import tokenize 38 | from .utils import JSON5DecodeError 39 | 40 | 41 | class QuietSlyLogger(SlyLogger): # type: ignore[misc] 42 | def warning(self, *args: Any, **kwargs: Any) -> None: 43 | return 44 | 45 | debug = warning 46 | info = warning 47 | 48 | 49 | ESCAPE_SEQUENCES = { 50 | 'b': '\u0008', 51 | 'f': '\u000C', 52 | 'n': '\u000A', 53 | 'r': '\u000D', 54 | 't': '\u0009', 55 | 'v': '\u000B', 56 | '0': '\u0000', 57 | '\\': '\u005c', 58 | '"': '\u0022', 59 | "'": '\u0027', 60 | } 61 | 62 | # class TrailingComma: 63 | # pass 64 | 65 | 66 | def replace_escape_literals(matchobj: re.Match[str]) -> str: 67 | s = matchobj.group(0) 68 | if s.startswith('\\0') and len(s) == 3: 69 | raise JSON5DecodeError("'\\0' MUST NOT be followed by a decimal digit", None) 70 | seq = matchobj.group(1) 71 | return ESCAPE_SEQUENCES.get(seq, seq) 72 | 73 | 74 | @lru_cache(maxsize=1024) 75 | def _latin_escape_replace(s: str) -> str: 76 | if s.startswith('\\x') and len(s) != 4: 77 | raise JSON5DecodeError("'\\x' MUST be followed by two hexadecimal digits", None) 78 | val: str = ast.literal_eval(f'"{s}"') 79 | if val == '\\': 80 | val = '\\\\' # this is important; the subsequent regex will sub it back to \\ 81 | return val 82 | 83 | 84 | def latin_unicode_escape_replace(matchobj: re.Match[str]) -> str: 85 | s = matchobj.group(0) 86 | return _latin_escape_replace(s) 87 | 88 | 89 | def _unicode_escape_replace(s: str) -> str: 90 | ret: str = ast.literal_eval(f'"{s}"') 91 | return ret 92 | 93 | 94 | def unicode_escape_replace(matchobj: re.Match[str]) -> str: 95 | s = matchobj.group(0) 96 | return _unicode_escape_replace(s) 97 | 98 | 99 | class T_TokenSlice(Protocol): 100 | def __getitem__(self, item: int) -> JSON5Token: 101 | ... 102 | 103 | 104 | class T_AnyProduction(Protocol): 105 | _slice: T_TokenSlice 106 | 107 | 108 | class T_TextProduction(Protocol): 109 | wsc0: list[Comment | str] 110 | wsc1: list[Comment | str] 111 | value: Value 112 | 113 | def __getitem__(self, i: Literal[1]) -> Value: 114 | ... 115 | 116 | 117 | class T_FirstKeyValuePairProduction(Protocol): 118 | wsc0: list[Comment | str] 119 | wsc1: list[Comment | str] 120 | wsc2: list[Comment | str] 121 | key: Key 122 | value: Value 123 | _slice: T_TokenSlice 124 | 125 | def __getitem__(self, item: int) -> Key | Value: 126 | ... 127 | 128 | 129 | class T_WSCProduction(Protocol): 130 | _slice: T_TokenSlice 131 | 132 | def __getitem__(self, item: Literal[0]) -> str | Comment: 133 | ... 134 | 135 | 136 | class T_CommentProduction(Protocol): 137 | _slice: T_TokenSlice 138 | 139 | def __getitem__(self, item: Literal[0]) -> str: 140 | ... 141 | 142 | 143 | class T_KeyValuePairsProduction(Protocol): 144 | _slice: T_TokenSlice 145 | first_key_value_pair: KeyValuePair 146 | subsequent_key_value_pair: list[KeyValuePair] 147 | 148 | 149 | class T_JsonObjectProduction(Protocol): 150 | _slice: T_TokenSlice 151 | key_value_pairs: tuple[list[KeyValuePair], TrailingComma | None] | None 152 | wsc: list[Comment | str] 153 | 154 | 155 | class SubsequentKeyValuePairProduction(Protocol): 156 | _slice: T_TokenSlice 157 | wsc: list[Comment | str] 158 | first_key_value_pair: KeyValuePair | None 159 | 160 | 161 | class T_FirstArrayValueProduction(Protocol): 162 | _slice: T_TokenSlice 163 | 164 | def __getitem__(self, item: Literal[1]) -> Value: 165 | ... 166 | 167 | wsc: list[Comment | str] 168 | 169 | 170 | class T_SubsequentArrayValueProduction(Protocol): 171 | _slice: T_TokenSlice 172 | first_array_value: Value | None 173 | wsc: list[Comment | str] 174 | 175 | 176 | class T_ArrayValuesProduction(Protocol): 177 | _slice: T_TokenSlice 178 | first_array_value: Value 179 | subsequent_array_value: list[Value] 180 | 181 | 182 | class T_JsonArrayProduction(Protocol): 183 | _slice: T_TokenSlice 184 | array_values: tuple[list[Value], TrailingComma | None] | None 185 | wsc: list[Comment | str] 186 | 187 | 188 | class T_IdentifierProduction(Protocol): 189 | _slice: T_TokenSlice 190 | 191 | def __getitem__(self, item: Literal[0]) -> str: 192 | ... 193 | 194 | 195 | class T_KeyProduction(Protocol): 196 | def __getitem__(self, item: Literal[1]) -> Identifier | DoubleQuotedString | SingleQuotedString: 197 | ... 198 | 199 | 200 | class T_NumberProduction(Protocol): 201 | _slice: T_TokenSlice 202 | 203 | def __getitem__(self, item: Literal[0]) -> str: 204 | ... 205 | 206 | 207 | class T_ValueNumberProduction(Protocol): 208 | _slice: T_TokenSlice 209 | number: Infinity | NaN | Float | Integer 210 | 211 | 212 | class T_ExponentNotationProduction(Protocol): 213 | _slice: T_TokenSlice 214 | 215 | def __getitem__(self, item: int) -> str: 216 | ... 217 | 218 | 219 | class T_StringTokenProduction(Protocol): 220 | _slice: T_TokenSlice 221 | 222 | def __getitem__(self, item: Literal[0]) -> str: 223 | ... 224 | 225 | 226 | class T_StringProduction(Protocol): 227 | _slice: T_TokenSlice 228 | 229 | def __getitem__(self, item: Literal[0]) -> DoubleQuotedString | SingleQuotedString: 230 | ... 231 | 232 | 233 | class T_ValueProduction(Protocol): 234 | _slice: T_TokenSlice 235 | 236 | def __getitem__( 237 | self, item: Literal[0] 238 | ) -> ( 239 | DoubleQuotedString 240 | | SingleQuotedString 241 | | JSONObject 242 | | JSONArray 243 | | BooleanLiteral 244 | | NullLiteral 245 | | Infinity 246 | | Integer 247 | | Float 248 | | NaN 249 | ): 250 | ... 251 | 252 | 253 | T_CallArg = typing.TypeVar('T_CallArg') 254 | _: typing.Callable[..., typing.Callable[[T_CallArg], T_CallArg]] 255 | 256 | 257 | class JSONParser(Parser): # type: ignore[misc] 258 | # debugfile = 'parser.out' 259 | tokens = JSONLexer.tokens 260 | log = QuietSlyLogger(sys.stderr) 261 | 262 | def __init__(self, *args: Any, **kwargs: Any): 263 | super().__init__(*args, **kwargs) 264 | self.errors: list[JSON5DecodeError] = [] 265 | self.last_token: JSON5Token | None = None 266 | self.seen_tokens: list[JSON5Token] = [] 267 | self.expecting: list[list[str]] = [] 268 | 269 | @_('{ wsc } value { wsc }') 270 | def text(self, p: T_TextProduction) -> JSONText: 271 | node = JSONText(value=p[1], tok=p.value._tok) 272 | for wsc in p.wsc0: 273 | node.wsc_before.append(wsc) 274 | for wsc in p.wsc1: 275 | node.wsc_after.append(wsc) 276 | return node 277 | 278 | @_('key { wsc } seen_colon COLON { wsc } object_value_seen value { wsc }') 279 | def first_key_value_pair(self, p: T_FirstKeyValuePairProduction) -> KeyValuePair: 280 | key = p[0] 281 | for wsc in p.wsc0: 282 | key.wsc_after.append(wsc) 283 | value = p[6] 284 | for wsc in p.wsc1: 285 | value.wsc_before.append(wsc) 286 | for wsc in p.wsc2: 287 | value.wsc_after.append(wsc) 288 | return KeyValuePair(key=p.key, value=p.value) 289 | 290 | @_('object_delimiter_seen COMMA { wsc } [ first_key_value_pair ]') 291 | def subsequent_key_value_pair(self, p: SubsequentKeyValuePairProduction) -> KeyValuePair | TrailingComma: 292 | node: KeyValuePair | TrailingComma 293 | if p.first_key_value_pair: 294 | node = p.first_key_value_pair 295 | for wsc in p.wsc: 296 | node.key.wsc_before.append(wsc) 297 | else: 298 | node = TrailingComma(tok=p._slice[1]) 299 | for wsc in p.wsc: 300 | node.wsc_after.append(wsc) 301 | return node 302 | 303 | @_('WHITESPACE', 'comment') 304 | def wsc(self, p: T_WSCProduction) -> str | Comment: 305 | return p[0] 306 | 307 | @_('BLOCK_COMMENT') 308 | def comment(self, p: T_CommentProduction) -> BlockComment: 309 | return BlockComment(p[0], tok=p._slice[0]) 310 | 311 | @_('LINE_COMMENT') # type: ignore[no-redef] 312 | def comment(self, p: T_CommentProduction): 313 | return LineComment(p[0], tok=p._slice[0]) 314 | 315 | @_('first_key_value_pair { subsequent_key_value_pair }') 316 | def key_value_pairs(self, p: T_KeyValuePairsProduction) -> tuple[list[KeyValuePair], TrailingComma | None]: 317 | ret = [ 318 | p.first_key_value_pair, 319 | ] 320 | num_sqvp = len(p.subsequent_key_value_pair) 321 | for index, value in enumerate(p.subsequent_key_value_pair): 322 | if isinstance(value, TrailingComma): 323 | if index + 1 != num_sqvp: 324 | offending_token = value._tok 325 | self.errors.append(JSON5DecodeError("Syntax Error: multiple trailing commas", offending_token)) 326 | return ret, value 327 | else: 328 | ret.append(value) 329 | return ret, None 330 | 331 | @_('') 332 | def seen_LBRACE(self, p: Any) -> None: 333 | self.expecting.append(['RBRACE', 'key']) 334 | 335 | @_('') 336 | def seen_key(self, p: Any) -> None: 337 | self.expecting.pop() 338 | self.expecting.append(['COLON']) 339 | 340 | @_('') 341 | def seen_colon(self, p: Any) -> None: 342 | self.expecting.pop() 343 | self.expecting.append(['value']) 344 | 345 | @_('') 346 | def object_value_seen(self, p: Any) -> None: 347 | self.expecting.pop() 348 | self.expecting.append(['COMMA', 'RBRACE']) 349 | 350 | @_('') 351 | def object_delimiter_seen(self, p: Any) -> None: 352 | self.expecting.pop() 353 | self.expecting.append(['RBRACE', 'key']) 354 | 355 | @_('') 356 | def seen_RBRACE(self, p: Any) -> None: 357 | self.expecting.pop() 358 | 359 | @_('seen_LBRACE LBRACE { wsc } [ key_value_pairs ] seen_RBRACE RBRACE') 360 | def json_object(self, p: T_JsonObjectProduction) -> JSONObject: 361 | if not p.key_value_pairs: 362 | node = JSONObject(leading_wsc=list(p.wsc or []), tok=p._slice[1], end_tok=p._slice[5]) 363 | else: 364 | kvps, trailing_comma = p.key_value_pairs 365 | node = JSONObject( 366 | *kvps, 367 | trailing_comma=trailing_comma, 368 | leading_wsc=list(p.wsc or []), 369 | tok=p._slice[1], 370 | end_tok=p._slice[5], 371 | ) 372 | 373 | return node 374 | 375 | @_('array_value_seen value { wsc }') 376 | def first_array_value(self, p: T_FirstArrayValueProduction) -> Value: 377 | node = p[1] 378 | for wsc in p.wsc: 379 | node.wsc_after.append(wsc) 380 | return node 381 | 382 | @_('array_delimiter_seen COMMA { wsc } [ first_array_value ]') 383 | def subsequent_array_value(self, p: T_SubsequentArrayValueProduction) -> Value | TrailingComma: 384 | node: Value | TrailingComma 385 | if p.first_array_value: 386 | node = p.first_array_value 387 | for wsc in p.wsc: 388 | node.wsc_before.append(wsc) 389 | else: 390 | node = TrailingComma(tok=p._slice[1]) 391 | for wsc in p.wsc: 392 | node.wsc_after.append(wsc) 393 | return node 394 | 395 | @_('first_array_value { subsequent_array_value }') 396 | def array_values(self, p: T_ArrayValuesProduction) -> tuple[list[Value], TrailingComma | None]: 397 | ret = [ 398 | p.first_array_value, 399 | ] 400 | num_values = len(p.subsequent_array_value) 401 | for index, value in enumerate(p.subsequent_array_value): 402 | if isinstance(value, TrailingComma): 403 | if index + 1 != num_values: 404 | self.errors.append(JSON5DecodeError("Syntax Error: multiple trailing commas", value._tok)) 405 | return ret, value 406 | return ret, value 407 | else: 408 | ret.append(value) 409 | return ret, None 410 | 411 | @_('seen_LBRACKET LBRACKET { wsc } [ array_values ] seen_RBRACKET RBRACKET') 412 | def json_array(self, p: T_JsonArrayProduction) -> JSONArray: 413 | if not p.array_values: 414 | node = JSONArray(tok=p._slice[1], end_tok=p._slice[5]) 415 | else: 416 | values, trailing_comma = p.array_values 417 | node = JSONArray(*values, trailing_comma=trailing_comma, tok=p._slice[1], end_tok=p._slice[5]) 418 | 419 | for wsc in p.wsc: 420 | node.leading_wsc.append(wsc) 421 | 422 | return node 423 | 424 | @_('') 425 | def seen_LBRACKET(self, p: Any) -> None: 426 | self.expecting.append(['RBRACKET', 'value']) 427 | 428 | @_('') 429 | def seen_RBRACKET(self, p: Any) -> None: 430 | self.expecting.pop() 431 | 432 | @_('') 433 | def array_delimiter_seen(self, p: Any) -> None: 434 | assert len(self.expecting[-1]) == 2 435 | self.expecting[-1].pop() 436 | self.expecting[-1].append('value') 437 | 438 | @_('') 439 | def array_value_seen(self, p: Any) -> None: 440 | assert len(self.expecting[-1]) == 2 441 | assert self.expecting[-1][-1] == 'value' 442 | self.expecting[-1].pop() 443 | self.expecting[-1].append('COMMA') 444 | 445 | @_('NAME') 446 | def identifier(self, p: T_IdentifierProduction) -> Identifier: 447 | raw_value = p[0] 448 | name = re.sub(r'\\u[0-9a-fA-F]{4}', unicode_escape_replace, raw_value) 449 | pattern = r'[\w_\$]([\w_\d\$\p{Pc}\p{Mn}\p{Mc}\u200C\u200D])*' 450 | if not re.fullmatch(pattern, name): 451 | self.errors.append(JSON5DecodeError("Invalid identifier name", p._slice[0])) 452 | return Identifier(name=name, raw_value=raw_value, tok=p._slice[0]) 453 | 454 | @_('seen_key identifier', 'seen_key string') 455 | def key(self, p: T_KeyProduction) -> Identifier | DoubleQuotedString | SingleQuotedString: 456 | node = p[1] 457 | return node 458 | 459 | @_('INTEGER') 460 | def number(self, p: T_NumberProduction): 461 | return Integer(p[0], tok=p._slice[0]) 462 | 463 | @_('FLOAT') # type: ignore[no-redef] 464 | def number(self, p: T_NumberProduction): 465 | return Float(p[0], tok=p._slice[0]) 466 | 467 | @_('OCTAL') # type: ignore[no-redef] 468 | def number(self, p: T_NumberProduction): 469 | self.errors.append(JSON5DecodeError("Invalid integer literal. Octals are not allowed", p._slice[0])) 470 | raw_value = p[0] 471 | if re.search(r'[89]+', raw_value): 472 | self.errors.append(JSON5DecodeError("Invalid octal format. Octal digits must be in range 0-7", p._slice[0])) 473 | return Integer(raw_value=oct(0), is_octal=True, tok=p._slice[0]) 474 | return Integer(raw_value, is_octal=True, tok=p._slice[0]) 475 | 476 | @_('INFINITY') # type: ignore[no-redef] 477 | def number(self, p: T_AnyProduction) -> Infinity: 478 | return Infinity(tok=p._slice[0]) 479 | 480 | @_('NAN') # type: ignore[no-redef] 481 | def number(self, p: T_AnyProduction) -> NaN: 482 | return NaN(tok=p._slice[0]) 483 | 484 | @_('MINUS number') 485 | def value(self, p: T_ValueNumberProduction) -> UnaryOp: 486 | if isinstance(p.number, Infinity): 487 | p.number.negative = True 488 | node = UnaryOp(op='-', value=p.number, tok=p._slice[0], end_tok=p.number._end_tok) 489 | return node 490 | 491 | @_('PLUS number') # type: ignore[no-redef] 492 | def value(self, p: T_ValueNumberProduction): 493 | node = UnaryOp(op='+', value=p.number, tok=p._slice[0], end_tok=p.number._end_tok) 494 | return node 495 | 496 | @_('INTEGER EXPONENT', 'FLOAT EXPONENT') # type: ignore[no-redef] 497 | def number(self, p: T_ExponentNotationProduction) -> Float: 498 | exp_notation = p[1][0] # e or E 499 | return Float(p[0] + p[1], exp_notation=exp_notation, tok=p._slice[0], end_tok=p._slice[1]) 500 | 501 | @_('HEXADECIMAL') # type: ignore[no-redef] 502 | def number(self, p: T_NumberProduction) -> Integer: 503 | return Integer(p[0], is_hex=True, tok=p._slice[0]) 504 | 505 | @_('DOUBLE_QUOTE_STRING') 506 | def double_quoted_string(self, p: T_StringTokenProduction) -> DoubleQuotedString: 507 | raw_value = p[0] 508 | contents = raw_value[1:-1] 509 | terminator_in_string = re.search(r'(? SingleQuotedString: 534 | raw_value = p[0] 535 | contents = raw_value[1:-1] 536 | terminator_in_string = re.search(r'(? SingleQuotedString | DoubleQuotedString: 561 | return p[0] 562 | 563 | @_('TRUE') 564 | def boolean(self, p: T_AnyProduction) -> BooleanLiteral: 565 | return BooleanLiteral(True, tok=p._slice[0]) 566 | 567 | @_('FALSE') # type: ignore[no-redef] 568 | def boolean(self, p: T_AnyProduction) -> BooleanLiteral: 569 | return BooleanLiteral(False, tok=p._slice[0]) 570 | 571 | @_('NULL') 572 | def null(self, p: T_AnyProduction) -> NullLiteral: 573 | return NullLiteral(tok=p._slice[0]) 574 | 575 | @_( # type: ignore[no-redef] 576 | 'string', 577 | 'json_object', 578 | 'json_array', 579 | 'boolean', 580 | 'null', 581 | 'number', 582 | ) 583 | def value( 584 | self, p: T_ValueProduction 585 | ) -> ( 586 | DoubleQuotedString 587 | | SingleQuotedString 588 | | JSONObject 589 | | JSONArray 590 | | BooleanLiteral 591 | | NullLiteral 592 | | Infinity 593 | | Integer 594 | | Float 595 | | NaN 596 | ): 597 | node = p[0] 598 | return node 599 | 600 | @_('UNTERMINATED_SINGLE_QUOTE_STRING', 'UNTERMINATED_DOUBLE_QUOTE_STRING') # type: ignore[no-redef] 601 | def string(self, p: T_StringTokenProduction) -> SingleQuotedString | DoubleQuotedString: 602 | self.error(p._slice[0]) 603 | raw = p[0] 604 | if raw.startswith('"'): 605 | return DoubleQuotedString(raw[1:], raw_value=raw, tok=p._slice[0]) 606 | return SingleQuotedString(raw[1:], raw_value=raw, tok=p._slice[0]) 607 | 608 | def error(self, token: JSON5Token | None) -> JSON5Token | None: 609 | if token: 610 | if self.expecting: 611 | expected = self.expecting[-1] 612 | 613 | message = f"Syntax Error. Was expecting {' or '.join(expected)}" 614 | else: 615 | message = 'Syntax Error' 616 | 617 | self.errors.append(JSON5DecodeError(message, token)) 618 | try: 619 | return next(self.tokens) # type: ignore 620 | except StopIteration: 621 | # EOF 622 | class tok: 623 | type = '$end' 624 | value = None 625 | lineno = None 626 | index = None 627 | end = None 628 | 629 | return JSON5Token(tok(), None) # type: ignore[arg-type] 630 | elif self.last_token: 631 | doc = self.last_token.doc 632 | pos = len(doc) 633 | lineno = doc.count('\n', 0, pos) + 1 634 | colno = pos - doc.rfind('\n', 0, pos) 635 | message = f'Expecting value. Unexpected EOF at: ' f'line {lineno} column {colno} (char {pos})' 636 | if self.expecting: 637 | expected = self.expecting[-1] 638 | message += f'. Was expecting {f" or ".join(expected)}' 639 | self.errors.append(JSON5DecodeError(message, None)) 640 | else: 641 | # Empty file 642 | self.errors.append(JSON5DecodeError('Expecting value. Received unexpected EOF', None)) 643 | return None 644 | 645 | def _token_gen(self, tokens: typing.Iterable[JSON5Token]) -> typing.Generator[JSON5Token, None, None]: 646 | for tok in tokens: 647 | self.last_token = tok 648 | self.seen_tokens.append(tok) 649 | yield tok 650 | 651 | def parse(self, tokens: typing.Iterable[JSON5Token]) -> JSONText: 652 | tokens = self._token_gen(tokens) 653 | model: JSONText = super().parse(tokens) 654 | if self.errors: 655 | if len(self.errors) > 1: 656 | primary_error = self.errors[0] 657 | msg = ( 658 | "There were multiple errors parsing the JSON5 document.\n" 659 | "The primary error was: \n\t{}\n" 660 | "Additionally, the following errors were also detected:\n\t{}" 661 | ) 662 | 663 | num_additional_errors = len(self.errors) - 1 664 | additional_errors = '\n\t'.join(err.args[0] for err in self.errors[1:6]) 665 | if num_additional_errors > 5: 666 | additional_errors += f'\n\t{num_additional_errors - 5} additional error(s) truncated' 667 | msg = msg.format(primary_error.args[0], additional_errors) 668 | err = JSON5DecodeError(msg, None) 669 | err.lineno = primary_error.lineno 670 | err.token = primary_error.token 671 | err.index = primary_error.index 672 | raise err 673 | else: 674 | raise self.errors[0] 675 | return model 676 | 677 | 678 | def parse_tokens(raw_tokens: typing.Iterable[JSON5Token]) -> JSONText: 679 | parser = JSONParser() 680 | return parser.parse(raw_tokens) 681 | 682 | 683 | def parse_source(text: str) -> JSONText: 684 | tokens = tokenize(text) 685 | model = parse_tokens(tokens) 686 | return model 687 | -------------------------------------------------------------------------------- /json5/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/spyoungtech/json-five/f95e527c7121113674c3621d8244c9c7162a0348/json5/py.typed -------------------------------------------------------------------------------- /json5/tokenizer.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import logging 4 | import typing 5 | from typing import Generator 6 | from typing import NoReturn 7 | 8 | import regex as re 9 | from sly import Lexer # type: ignore 10 | from sly.lex import Token # type: ignore 11 | 12 | from .utils import JSON5DecodeError 13 | 14 | logger = logging.getLogger(__name__) 15 | # logger.addHandler(logging.StreamHandler(stream=sys.stderr)) 16 | # logger.setLevel(level=logging.DEBUG) 17 | 18 | 19 | class JSON5Token(Token): # type: ignore[misc] 20 | ''' 21 | Representation of a single token. 22 | ''' 23 | 24 | def __init__(self, tok: Token, doc: str): 25 | self.type: str | None = tok.type 26 | self.value: str = tok.value 27 | self.lineno: int = tok.lineno 28 | self.index: int = tok.index 29 | self.doc: str = doc 30 | self.end: int = tok.end 31 | 32 | @property 33 | def colno(self) -> int: 34 | line_start_index = self.doc.rfind('\n', 0, self.index) + 1 35 | return self.index - line_start_index 36 | 37 | @property 38 | def end_colno(self) -> int: 39 | return self.colno + self.end - self.index 40 | 41 | @property 42 | def end_lineno(self) -> int: 43 | return self.lineno + self.value.count('\n') 44 | 45 | __slots__ = ('type', 'value', 'lineno', 'index', 'doc', 'end') 46 | 47 | def __str__(self) -> str: 48 | if self.value: 49 | return self.value 50 | else: 51 | return '' 52 | 53 | def __repr__(self) -> str: 54 | return f'JSON5Token(type={self.type!r}, value={self.value!r}, lineno={self.lineno}, index={self.index}, end={self.end})' 55 | 56 | 57 | T_CallArg = typing.TypeVar('T_CallArg') 58 | _: typing.Callable[[str], typing.Callable[[T_CallArg], T_CallArg]] 59 | 60 | 61 | class JSONLexer(Lexer): # type: ignore[misc] 62 | regex_module = re 63 | reflags = re.DOTALL 64 | tokens = { 65 | LBRACE, 66 | RBRACE, 67 | LBRACKET, 68 | RBRACKET, 69 | DOUBLE_QUOTE_STRING, 70 | SINGLE_QUOTE_STRING, 71 | UNTERMINATED_DOUBLE_QUOTE_STRING, 72 | UNTERMINATED_SINGLE_QUOTE_STRING, 73 | NAME, 74 | COMMA, 75 | BLOCK_COMMENT, 76 | LINE_COMMENT, 77 | WHITESPACE, 78 | TRUE, 79 | FALSE, 80 | NULL, 81 | COLON, 82 | # Numbers 83 | PLUS, 84 | MINUS, 85 | FLOAT, 86 | INTEGER, 87 | INFINITY, 88 | NAN, 89 | EXPONENT, 90 | HEXADECIMAL, 91 | OCTAL, # Not allowed, but we capture as a token to raise error later 92 | } 93 | 94 | def tokenize(self, text: str, lineno: int = 1, index: int = 0) -> Generator[JSON5Token, None, None]: 95 | for tok in super().tokenize(text, lineno, index): 96 | tok = JSON5Token(tok, text) 97 | yield tok 98 | 99 | LBRACE = r'{' 100 | RBRACE = r'}' 101 | LBRACKET = r'\[' 102 | RBRACKET = r'\]' 103 | COLON = r"\:" 104 | COMMA = r"\," 105 | 106 | @_(r'"(?:[^"\\]|\\.)*"') 107 | def DOUBLE_QUOTE_STRING(self, tok: JSON5Token) -> JSON5Token: 108 | self.lineno += tok.value.count('\n') 109 | return tok 110 | 111 | @_(r"'(?:[^'\\]|\\.)*'") 112 | def SINGLE_QUOTE_STRING(self, tok: JSON5Token) -> JSON5Token: 113 | self.lineno += tok.value.count('\n') 114 | return tok 115 | 116 | LINE_COMMENT = r"//[^\n]*" 117 | 118 | @_(r'/\*((.|\n))*?\*/') 119 | def BLOCK_COMMENT(self, tok: JSON5Token) -> JSON5Token: 120 | self.lineno += tok.value.count('\n') 121 | return tok 122 | 123 | @_("[\u0009\u000A\u000B\u000C\u000D\u0020\u00A0\u2028\u2029\ufeff]+") 124 | def WHITESPACE(self, tok: JSON5Token) -> JSON5Token: 125 | self.lineno += tok.value.count('\n') 126 | return tok 127 | 128 | MINUS = r'\-' 129 | PLUS = r'\+' 130 | EXPONENT = r"(e|E)(\-|\+)?\d+" 131 | HEXADECIMAL = r'0(x|X)[0-9a-fA-F]+' 132 | OCTAL = r'(0\d+|0o\d+)' 133 | FLOAT = r'(\d+\.\d*)|(\d*\.\d+)' # 23.45 134 | INTEGER = r'\d+' 135 | NAME = r'[\w_\$\\]([\w_\d\$\\\p{Pc}\p{Mn}\p{Mc}\u200C\u200D])*' 136 | 137 | NAME['true'] = TRUE # type: ignore[index] 138 | NAME['false'] = FALSE # type: ignore[index] 139 | NAME['null'] = NULL # type: ignore[index] 140 | NAME['Infinity'] = INFINITY # type: ignore[index] 141 | NAME['NaN'] = NAN # type: ignore[index] 142 | 143 | UNTERMINATED_DOUBLE_QUOTE_STRING = r'"(?:[^"\\]|\\.)*' 144 | UNTERMINATED_SINGLE_QUOTE_STRING = r"'(?:[^'\\]|\\.)*" 145 | 146 | def error(self, t: JSON5Token) -> NoReturn: 147 | raise JSON5DecodeError(f'Illegal character {t.value[0]!r} at index {self.index}', None) 148 | 149 | 150 | def tokenize(text: str) -> Generator[JSON5Token, None, None]: 151 | lexer = JSONLexer() 152 | tokens = lexer.tokenize(text) 153 | return tokens 154 | 155 | 156 | def reversed_enumerate(tokens: typing.Sequence[JSON5Token]) -> typing.Generator[tuple[int, JSON5Token], None, None]: 157 | for i in reversed(range(len(tokens))): 158 | tok = tokens[i] 159 | yield i, tok 160 | -------------------------------------------------------------------------------- /json5/utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import typing 4 | from json import JSONDecodeError 5 | 6 | __all__ = ['JSON5DecodeError'] 7 | 8 | if typing.TYPE_CHECKING: 9 | from .tokenizer import JSON5Token 10 | 11 | 12 | class JSON5DecodeError(JSONDecodeError): 13 | def __init__(self, msg: str, token: JSON5Token | None): 14 | lineno = getattr(token, 'lineno', 0) 15 | index = getattr(token, 'index', 0) 16 | doc = getattr(token, 'doc', None) 17 | self.token = token 18 | self.index = index 19 | if token and doc: 20 | errmsg = f'{msg} in or near token {token.type} at' 21 | super().__init__(errmsg, doc, index) 22 | else: 23 | ValueError.__init__(self, msg) 24 | self.msg = msg 25 | self.lineno = lineno 26 | 27 | def __reduce__(self) -> tuple[type[JSON5DecodeError], tuple[str, JSON5Token | None]]: 28 | return self.__class__, (self.msg, self.token) 29 | -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- 1 | sly>=0.5 2 | regex 3 | pytest 4 | mypy 5 | coverage 6 | types-regex 7 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | name = json-five 3 | version = 1.1.2 4 | url = https://github.com/spyoungtech/json-five 5 | license = Apache 6 | author = Spencer Phillip Young 7 | author_email = spencer.young@spyoung.com 8 | description = A JSON5 parser that, among other features, supports round-trip preservation of comments 9 | long_description = file: README.md 10 | long_description_content_type = text/markdown 11 | classifiers = 12 | License :: OSI Approved :: Apache Software License 13 | Programming Language :: Python :: 3 :: Only 14 | Programming Language :: Python :: 3.8 15 | Programming Language :: Python :: 3.9 16 | Programming Language :: Python :: 3.10 17 | Programming Language :: Python :: 3.11 18 | 19 | license_files = LICENSE 20 | 21 | [options] 22 | packages = json5 23 | python_requires = >=3.8.0 24 | install_requires = 25 | sly>=0.5 26 | regex 27 | 28 | [options.package_data] 29 | json5 = 30 | py.typed 31 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | setup() 4 | -------------------------------------------------------------------------------- /tests/test_errors.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | import pytest 4 | 5 | from json5.dumper import DefaultDumper 6 | from json5.dumper import ModelDumper 7 | from json5.dumper import modelize 8 | from json5.loader import DefaultLoader 9 | from json5.loader import loads 10 | from json5.loader import ModelLoader 11 | from json5.model import Integer 12 | from json5.model import LineComment 13 | from json5.utils import JSON5DecodeError 14 | 15 | 16 | def test_loading_comment_raises_runtime_error_default_loader(): 17 | model = LineComment('// foo') 18 | with pytest.raises(RuntimeError): 19 | DefaultLoader().load(model) 20 | 21 | 22 | def test_loading_unknown_node_raises_error(): 23 | class Foo: 24 | ... 25 | 26 | f = Foo() 27 | with pytest.raises(NotImplementedError): 28 | DefaultLoader().load(f) 29 | 30 | 31 | def test_dumping_unknown_node_raises_error(): 32 | class Foo: 33 | ... 34 | 35 | f = Foo() 36 | with pytest.raises(NotImplementedError): 37 | DefaultDumper().dump(f) 38 | 39 | 40 | def test_known_type_in_wsc_raises_error(): 41 | class Foo: 42 | ... 43 | 44 | f = Foo() 45 | model = loads('{foo: "bar"}', loader=ModelLoader()) 46 | model.value.key_value_pairs[0].key.wsc_before.append(f) 47 | with pytest.raises(ValueError): 48 | ModelDumper().dump(model) 49 | model = loads('{foo: "bar"}', loader=ModelLoader()) 50 | model.value.key_value_pairs[0].key.wsc_after.append(f) 51 | with pytest.raises(ValueError): 52 | ModelDumper().dump(model) 53 | 54 | 55 | def test_modelizing_unknown_object_raises_error(): 56 | class Foo: 57 | ... 58 | 59 | f = Foo() 60 | with pytest.raises(NotImplementedError): 61 | modelize(f) 62 | 63 | 64 | def test_model_dumper_raises_error_for_unknown_node(): 65 | class Foo: 66 | ... 67 | 68 | f = Foo() 69 | with pytest.raises(NotImplementedError): 70 | ModelDumper().dump(f) 71 | 72 | 73 | def test_multiple_errors_all_surface_at_once(): 74 | json_string = """[\n"foo",\n"bar"\n"baz",\n"bacon"\n"eggs"]""" 75 | # 2 errors due to missing comma ^ ^ 76 | with pytest.raises(JSON5DecodeError) as exc_info: 77 | loads(json_string) 78 | assert str(exc_info.value).count('Syntax Error') == 2 79 | 80 | 81 | def test_linebreak_without_continuation_fails(): 82 | json_string = """'Hello \nworld!'""" 83 | with pytest.raises(JSON5DecodeError) as exc_info: 84 | loads(json_string) 85 | assert "Illegal" in str(exc_info.value) 86 | 87 | 88 | def test_linebreak_without_continuation_fails_double(): 89 | json_string = '''"Hello \nworld!"''' 90 | with pytest.raises(JSON5DecodeError) as exc_info: 91 | loads(json_string) 92 | assert "Illegal" in str(exc_info.value) 93 | 94 | 95 | def test_empty_input_raises_error(): 96 | with pytest.raises(JSON5DecodeError) as exc_info: 97 | loads("") 98 | assert "unexpected EOF" in str(exc_info.value) 99 | 100 | 101 | def test_backslash_x_without_two_hexadecimals_raises_error(): 102 | with pytest.raises(JSON5DecodeError) as exc_info: 103 | loads(r"'\x1'") 104 | assert "'\\x' MUST be followed by two hexadecimal digits" in str(exc_info.value) 105 | 106 | 107 | def test_null_escape_may_not_be_followed_by_decimal_digit(): 108 | with pytest.raises(JSON5DecodeError) as exc_info: 109 | loads(r"'\01'") 110 | assert "'\\0' MUST NOT be followed by a decimal digit" in str(exc_info.value) 111 | 112 | 113 | def test_backslash_x_without_two_hexadecimals_raises_error_but_for_double_quotes(): 114 | with pytest.raises(JSON5DecodeError) as exc_info: 115 | loads(r'"\x1"') 116 | assert "'\\x' MUST be followed by two hexadecimal digits" in str(exc_info.value) 117 | 118 | 119 | def test_null_escape_may_not_be_followed_by_decimal_digit_but_for_double_quotes(): 120 | with pytest.raises(JSON5DecodeError) as exc_info: 121 | loads(r'"\01"') 122 | assert "'\\0' MUST NOT be followed by a decimal digit" in str(exc_info.value) 123 | 124 | 125 | def test_integer_octal_hex_mutually_exclusive(): 126 | with pytest.raises(ValueError): 127 | Integer(raw_value='0o0', is_hex=True, is_octal=True) 128 | 129 | 130 | def test_invalid_identifier_via_escape_sequence(): 131 | json_string = """{\\u005Cfoo: 1}""" 132 | with pytest.raises(JSON5DecodeError) as exc_info: 133 | loads(json_string) 134 | assert "Invalid identifier name" in str(exc_info.value) 135 | 136 | 137 | @pytest.mark.parametrize( 138 | 'json_string', [""""foo \\\nbar baz \\\nbacon \neggs\"""", """'foo \\\nbar baz \\\nbacon \neggs'"""] 139 | ) 140 | def test_illegal_line_terminator_error_message(json_string): 141 | with pytest.raises(JSON5DecodeError) as exc_info: 142 | loads(json_string) 143 | 144 | exc_message = str(exc_info.value) 145 | exc_lineno_match = re.search(r'line (\d+)', exc_message) 146 | if exc_lineno_match: 147 | exc_lineno = int(exc_lineno_match.groups()[0]) 148 | else: 149 | exc_lineno = None 150 | exc_col_match = re.search(r'column (\d+)', exc_message) 151 | if exc_col_match: 152 | exc_col = int(exc_col_match.groups()[0]) 153 | else: 154 | exc_col = None 155 | exc_index_match = re.search(r'char (\d+)', exc_message) 156 | if exc_index_match: 157 | exc_index = int(exc_index_match.groups()[0]) 158 | else: 159 | exc_index = None 160 | assert (3, 7, 23) == (exc_lineno, exc_col, exc_index) 161 | 162 | 163 | def test_octals_are_rejected_by_default(): 164 | json_string = "0o123" 165 | with pytest.raises(JSON5DecodeError) as exc_info: 166 | loads(json_string) 167 | assert "Invalid integer literal" in str(exc_info.value) 168 | 169 | 170 | def test_malformed_octals_result_in_additional_error(): 171 | json_string = "058" 172 | with pytest.raises(JSON5DecodeError) as exc_info: 173 | loads(json_string) 174 | assert "Invalid octal format" in str(exc_info.value) 175 | 176 | 177 | @pytest.mark.parametrize('json_string', ['{foo: "bar}', "{foo: 'bar}"]) 178 | def test_unterminated_string(json_string): 179 | with pytest.raises(JSON5DecodeError) as exc_info: 180 | loads(json_string) 181 | assert "UNTERMINATED" in str(exc_info.value) 182 | assert "7" in str(exc_info.value) # The index where the underminated string begins 183 | 184 | 185 | def test_array_multiple_trailing_commas_raises_error(): 186 | with pytest.raises(JSON5DecodeError) as exc_info: 187 | loads('["foo",,]') 188 | assert "multiple trailing commas" in str(exc_info.value) 189 | 190 | 191 | def test_object_multiple_trailing_commas_raises_error(): 192 | with pytest.raises(JSON5DecodeError) as exc_info: 193 | loads('{foo: "bar",,}') 194 | assert "multiple trailing commas" in str(exc_info.value) 195 | 196 | 197 | def test_expecting_rbracket(): 198 | json_string = """[true, false""" 199 | with pytest.raises(JSON5DecodeError): 200 | loads(json_string) 201 | 202 | 203 | def test_array_expecting_value_or_bracket(): 204 | json_string = '[' 205 | with pytest.raises(JSON5DecodeError) as exc_info: 206 | loads(json_string) 207 | assert 'RBRACKET or value' in str(exc_info.value) 208 | 209 | 210 | def test_array_expecting_comma_or_bracket(): 211 | json_string = '[true' 212 | with pytest.raises(JSON5DecodeError) as exc_info: 213 | loads(json_string) 214 | assert "RBRACKET or COMMA" in str(exc_info.value) 215 | 216 | 217 | def test_array_expecting_value_or_bracket_trailing_comma(): 218 | json_string = '[true,' 219 | with pytest.raises(JSON5DecodeError) as exc_info: 220 | loads(json_string) 221 | 222 | assert 'RBRACKET or value' in str(exc_info.value) 223 | 224 | 225 | def test_object_expecting_value_or_brace(): 226 | json_string = '{' 227 | with pytest.raises(JSON5DecodeError) as exc_info: 228 | loads(json_string) 229 | assert 'RBRACE or key' in str(exc_info.value) 230 | 231 | 232 | def test_object_expecting_comma_or_brace(): 233 | json_string = '{foo: true' 234 | with pytest.raises(JSON5DecodeError) as exc_info: 235 | loads(json_string) 236 | assert "COMMA or RBRACE" in str(exc_info.value) 237 | 238 | 239 | def test_object_expecting_key_or_brace_trailing_comma(): 240 | json_string = '{foo: true,' 241 | with pytest.raises(JSON5DecodeError) as exc_info: 242 | loads(json_string) 243 | assert 'RBRACE or key' in str(exc_info.value) 244 | -------------------------------------------------------------------------------- /tests/test_json5_dump.py: -------------------------------------------------------------------------------- 1 | import json 2 | import math 3 | from io import StringIO 4 | 5 | from json5 import dump 6 | from json5 import dumps 7 | from json5.dumper import ModelDumper 8 | from json5.model import Integer 9 | from json5.model import UnaryOp 10 | 11 | 12 | def test_json_dump_empty_object(): 13 | d = {} 14 | assert dumps(d) == '{}' 15 | 16 | 17 | def test_json_dump_empty_array(): 18 | d = [] 19 | assert dumps(d) == '[]' 20 | 21 | 22 | def test_single_key_value_dump(): 23 | d = {'foo': 'bar'} 24 | assert dumps(d) == json.dumps(d) 25 | 26 | 27 | def test_dump_same_as_json(): 28 | d = { 29 | "strings": ["foo", "bar", "baz"], 30 | "numbers": [1, -1, 1.0, math.inf, -math.inf, math.nan], 31 | "lists": ['foo', ['nested_list']], 32 | } 33 | assert dumps(d) == json.dumps(d) 34 | 35 | 36 | def test_dump_indent_same_as_json(): 37 | d = { 38 | "strings": ["foo", "bar", "baz"], 39 | "numbers": [1, -1, 1.0, math.inf, -math.inf, math.nan], 40 | "lists": ['foo', ['nested_list']], 41 | } 42 | assert dumps(d, indent=4) == json.dumps(d, indent=4) 43 | 44 | 45 | def test_dump_boolean(): 46 | d = {'foo': True} 47 | assert dumps(d) == json.dumps(d) 48 | 49 | 50 | def test_dump_bool_false(): 51 | d = {'foo': False} 52 | assert dumps(d) == json.dumps(d) 53 | 54 | 55 | def test_dump_none(): 56 | d = {'foo': None} 57 | assert dumps(d) == json.dumps(d) 58 | 59 | 60 | def test_dump_unary_plus(): 61 | assert dumps(UnaryOp('+', Integer('1')), dumper=ModelDumper()) == '+1' 62 | 63 | 64 | def test_dump_file(): 65 | f = StringIO() 66 | dump("foo", f) 67 | f.seek(0) 68 | assert f.read() == '"foo"' 69 | -------------------------------------------------------------------------------- /tests/test_json5_load.py: -------------------------------------------------------------------------------- 1 | import math 2 | from io import StringIO 3 | 4 | import pytest 5 | 6 | from json5.loader import JsonIdentifier 7 | from json5.loader import load 8 | from json5.loader import loads 9 | 10 | 11 | def test_object_string_key_value_pair(): 12 | json_string = """{"foo":"bar"}""" 13 | assert loads(json_string) == {"foo": "bar"} 14 | 15 | 16 | def test_object_string_key_value_pair_with_whitespace_before_value(): 17 | json_string = """{"foo": "bar"}""" 18 | assert loads(json_string) == {"foo": "bar"} 19 | 20 | 21 | def test_multiple_key_values(): 22 | json_string = """{"foo":"bar","bacon":"eggs"}""" 23 | assert loads(json_string) == {"foo": "bar", "bacon": "eggs"} 24 | 25 | 26 | def test_multiple_string_key_values_with_whitespace(): 27 | json_string = """{"foo": "bar", "bacon" : "eggs"}""" 28 | assert loads(json_string) == {"foo": "bar", "bacon": "eggs"} 29 | 30 | 31 | def test_array_load(): 32 | json_string = """["foo","bar","baz"]""" 33 | assert loads(json_string) == ["foo", "bar", "baz"] 34 | 35 | 36 | def test_array_load_with_whitespace(): 37 | json_string = """[ "foo", "bar" , "baz" ]""" 38 | assert loads(json_string) == ["foo", "bar", "baz"] 39 | 40 | 41 | def test_object_load_with_newlines(): 42 | json_string = """{"foo":\n "bar"\n}""" 43 | assert loads(json_string) == {"foo": "bar"} 44 | 45 | 46 | def test_object_load_with_line_comment(): 47 | json_string = """{ // line comment 48 | "foo": "bar" 49 | }""" 50 | assert loads(json_string) == {"foo": "bar"} 51 | 52 | 53 | def test_object_with_multiline_comment(): 54 | json_string = """{ /* foo bar 55 | */ "foo": "bar" 56 | }""" 57 | assert loads(json_string) == {"foo": "bar"} 58 | 59 | 60 | def test_array_load_with_line_comment(): 61 | json_string = """[ // line comment 62 | "foo", "bar" 63 | ]""" 64 | assert loads(json_string) == ["foo", "bar"] 65 | 66 | 67 | def test_array_with_multiline_comment(): 68 | json_string = """[ /* foo bar 69 | */ "foo", "bar" 70 | ]""" 71 | assert loads(json_string) == ["foo", "bar"] 72 | 73 | 74 | def test_nested_object(): 75 | json_string = """{"foo": {"bacon": "eggs"}}""" 76 | assert loads(json_string) == {"foo": {"bacon": "eggs"}} 77 | 78 | 79 | def test_leading_whitespace_object(): 80 | json_string = """ {"foo":"bar"}""" 81 | assert loads(json_string) == {"foo": "bar"} 82 | 83 | 84 | def test_trailing_whitespace_object(): 85 | json_string = """{"foo": "bar"} """ 86 | assert loads(json_string) == {"foo": "bar"} 87 | 88 | 89 | def test_single_quoted_string(): 90 | json_string = """{'foo': 'bar'}""" 91 | assert loads(json_string) == {"foo": "bar"} 92 | 93 | 94 | def test_mixed_usage_quotes(): 95 | json_string = """{"foo": 'bar'}""" 96 | assert loads(json_string) == {"foo": "bar"} 97 | 98 | 99 | def test_trailing_comma_object(): 100 | json_string = """{"foo": "bar", }""" 101 | assert loads(json_string) == {"foo": "bar"} 102 | 103 | 104 | def test_trailing_comma_array(): 105 | json_string = """["foo","bar", "baz",]""" 106 | assert loads(json_string) == ["foo", "bar", "baz"] 107 | 108 | 109 | def test_trailing_comma_array_with_trailing_whitespace(): 110 | json_string = """["foo", "bar", "baz", ]""" 111 | assert loads(json_string) == ["foo", "bar", "baz"] 112 | 113 | 114 | def test_trailing_comma_array_with_leading_whitespace_before_comma(): 115 | json_string = """["foo", "bar", "baz" ,]""" 116 | assert loads(json_string) == ["foo", "bar", "baz"] 117 | 118 | 119 | def test_nested_arrays(): 120 | json_string = """[["foo"], ["foo","bar"], "baz"]""" 121 | assert loads(json_string) == [["foo"], ["foo", "bar"], "baz"] 122 | 123 | 124 | def test_single_quote_with_escape_single_quote(): 125 | json_string = r"""{'fo\'o': 'bar'}""" 126 | assert loads(json_string) == {"fo'o": "bar"} 127 | 128 | 129 | def test_double_quote_with_escape_double_quote(): 130 | json_string = r"""{"fo\"o": "bar"}""" 131 | assert loads(json_string) == {'fo"o': "bar"} 132 | 133 | 134 | def test_escape_sequence_strings(): 135 | json_string = r"""'\A\C\/\D\C'""" 136 | assert loads(json_string) == "AC/DC" 137 | 138 | 139 | def test_line_continuations(): 140 | json_string = r"""'Hello \ 141 | world!'""" 142 | assert loads(json_string) == "Hello world!" 143 | 144 | 145 | @pytest.mark.parametrize("terminator", ["\r\n", "\n", "\u2028", "\u2029"]) 146 | def test_line_continuations_alternate_terminators(terminator): 147 | json_string = f"""'Hello \\{terminator}world!'""" 148 | assert loads(json_string) == "Hello world!" 149 | 150 | 151 | def test_number_literals_inf_nan(): 152 | json_string = """{ 153 | "positiveInfinity": Infinity, 154 | "negativeInfinity": -Infinity, 155 | "notANumber": NaN,}""" 156 | assert loads(json_string) == { 157 | "positiveInfinity": math.inf, 158 | "negativeInfinity": -math.inf, 159 | "notANumber": math.nan, 160 | } 161 | 162 | 163 | def test_number_literals(): 164 | json_string = """{ 165 | "integer": 123, 166 | "withFractionPart": 123.456, 167 | "onlyFractionPart": .456, 168 | "withExponent": 123e-2}""" 169 | assert loads(json_string) == { 170 | "integer": 123, 171 | "withFractionPart": 123.456, 172 | "onlyFractionPart": 0.456, 173 | "withExponent": 123e-2, 174 | } 175 | 176 | 177 | def test_escape_sequences(): 178 | json_string = r"""{ 179 | "foo": "foo\nbar\nbaz", 180 | "bar": "foo\\bar\\baz", 181 | "baz": "foo\tbar\tbaz"}""" 182 | assert loads(json_string) == { 183 | "foo": "foo\nbar\nbaz", 184 | "bar": "foo\\bar\\baz", 185 | "baz": "foo\tbar\tbaz", 186 | } 187 | 188 | 189 | def test_empty_object(): 190 | json_string = "{}" 191 | assert loads(json_string) == {} 192 | 193 | 194 | def test_empty_array(): 195 | json_string = "[]" 196 | assert loads(json_string) == [] 197 | 198 | 199 | @pytest.mark.parametrize( 200 | "json_string", 201 | [ 202 | '{"foo": "bar", "bar" "baz"', 203 | '["foo" "bar"]', 204 | "[,]", 205 | "{,}", 206 | "!", 207 | '{"foo": "bar" "bacon": "eggs"}', 208 | ], 209 | ) 210 | def test_invalid_json(json_string): 211 | with pytest.raises(Exception): 212 | loads(json_string) 213 | 214 | 215 | def test_object_with_identifier_key(): 216 | json_string = """{unquoted: "foo"}""" 217 | assert loads(json_string) == {"unquoted": "foo"} 218 | 219 | 220 | def test_identifier_persists_load(): 221 | json_string = """{unquoted: "foo"}""" 222 | assert isinstance(list(loads(json_string).keys())[0], JsonIdentifier) 223 | 224 | 225 | def test_single_item_array(): 226 | json_string = """["foo"]""" 227 | assert loads(json_string) == ["foo"] 228 | 229 | 230 | def test_single_item_array_with_trailing_comma(): 231 | json_string = """["foo" , ]""" 232 | assert loads(json_string) == ["foo"] 233 | 234 | 235 | def test_hexadecimal_load(): 236 | json_string = """ 237 | { 238 | positiveHex: 0xdecaf, 239 | negativeHex: -0xC0FFEE ,}""" 240 | assert loads(json_string) == {"positiveHex": 0xDECAF, "negativeHex": -0xC0FFEE} 241 | 242 | 243 | def test_boolean_load_true(): 244 | json_string = """{foo: true}""" 245 | assert loads(json_string) == {'foo': True} 246 | 247 | 248 | def test_boolean_load_false(): 249 | json_string = """{foo: false}""" 250 | assert loads(json_string) == {'foo': False} 251 | 252 | 253 | def test_null_load(): 254 | json_string = """{foo: null}""" 255 | assert loads(json_string) == {'foo': None} 256 | 257 | 258 | def test_unary_plus_load(): 259 | json_string = """{foo: +12 }""" 260 | assert loads(json_string) == {'foo': 12} 261 | 262 | 263 | def test_load_from_file(): 264 | f = StringIO('{foo: 123}') 265 | assert load(f) == {'foo': 123} 266 | 267 | 268 | def test_load_empty_array_with_whitespace(): 269 | json_string = "{ }" 270 | assert loads(json_string) == {} 271 | 272 | 273 | def test_load_empty_object_wtih_whitespace(): 274 | json_string = "[ ]" 275 | assert loads(json_string) == [] 276 | 277 | 278 | def test_load_empty_object_with_comments(): 279 | json_string = "{ // foo \n}" 280 | assert loads(json_string) == {} 281 | 282 | 283 | def test_load_empty_array_with_comments(): 284 | json_string = "[ // foo \n]" 285 | assert loads(json_string) == [] 286 | 287 | 288 | def test_load_array_with_comment_before_additional_element(): 289 | json_string = "['foo',/* comment */ 'bar', // foo\n'baz']" 290 | assert loads(json_string) == ['foo', 'bar', 'baz'] 291 | 292 | 293 | def test_load_object_with_additional_comments(): 294 | json_string = """{ 295 | "foo": /* comment */ "bar", 296 | // another comment 297 | bacon /* breakfast */: "eggs" // better than spam 298 | } 299 | """ 300 | assert loads(json_string) == {'foo': 'bar', 'bacon': 'eggs'} 301 | 302 | 303 | def test_load_latin_escape(): 304 | json_string = r'"\x5C"' 305 | assert loads(json_string) == '\\' 306 | 307 | 308 | def test_latin_escape_backslash_is_not_real_backslack(): 309 | assert loads("""'\\x5C01'""") == "\\01" 310 | 311 | 312 | def test_escape_unicode(): 313 | json_string = """ 314 | { 315 | sig\\u03A3ma: "\\u03A3 is the sum of all things" 316 | } 317 | """ 318 | assert loads(json_string) == {"sig\u03A3ma": "\u03A3 is the sum of all things"} 319 | 320 | 321 | def test_load_identifier_with_connector_punctuation(): 322 | json_string = """{foo⁀bar: 1}""" 323 | assert loads(json_string) == {"foo⁀bar": 1} 324 | -------------------------------------------------------------------------------- /tests/test_json5_official_tests.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | from collections import namedtuple 4 | 5 | import pytest 6 | 7 | from json5 import dumps 8 | from json5 import JSON5DecodeError 9 | from json5 import load 10 | from json5 import loads 11 | from json5.dumper import ModelDumper 12 | from json5.loader import ModelLoader 13 | 14 | tests_path = os.path.abspath(os.path.join(os.path.dirname(os.path.abspath(__file__)), '../json5-tests')) 15 | 16 | error_specs = [] 17 | specs = [] 18 | 19 | for root, dirs, files in os.walk(tests_path): 20 | for f in files: 21 | if f.endswith('.json5') or f.endswith('.json'): 22 | specs.append(os.path.join(root, f)) 23 | elif f.endswith('.txt') or f.endswith('.js'): 24 | error_spec = f.replace('.txt', '.errorSpec').replace('.js', '.errorSpec') 25 | error_specs.append((os.path.join(root, f), os.path.join(root, error_spec))) 26 | 27 | 28 | @pytest.mark.parametrize('fp', specs) 29 | def test_official_files(fp): 30 | if not os.path.exists(tests_path): 31 | pytest.mark.skip("Tests repo was not present in expected location. Skipping.") 32 | return 33 | load(open(fp, encoding='utf-8')) 34 | 35 | 36 | @pytest.mark.parametrize('fp', specs) 37 | def test_official_files_rt_dumps_no_error(fp): 38 | if not os.path.exists(tests_path): 39 | pytest.mark.skip("Tests repo was not present in expected location. Skipping.") 40 | with open(fp, encoding='utf-8') as f: 41 | json_string = f.read() 42 | dumps(loads(json_string)) 43 | 44 | 45 | @pytest.mark.parametrize('fp', specs) 46 | def test_official_files_rt_model(fp): 47 | if not os.path.exists(tests_path): 48 | pytest.mark.skip("Tests repo was not present in expected location. Skipping.") 49 | with open(fp, encoding='utf-8') as f: 50 | json_string = f.read() 51 | assert dumps(loads(json_string, loader=ModelLoader()), dumper=ModelDumper()) == json_string 52 | 53 | 54 | @pytest.mark.parametrize(('input_file', 'expected'), error_specs) 55 | def test_official_error_specs(input_file, expected): 56 | if not os.path.exists(tests_path): 57 | pytest.mark.skip("Tests repo was not present in expected location. Skipping.") 58 | return 59 | with pytest.raises(JSON5DecodeError): 60 | load(open(input_file, encoding='utf-8')) 61 | 62 | 63 | @pytest.mark.parametrize(('input_file', 'expected'), error_specs) 64 | def test_official_error_specs(input_file, expected): 65 | ErrorInfo = namedtuple('ErrorInfo', field_names=['line', 'col', 'at']) 66 | if not os.path.exists(tests_path): 67 | pytest.mark.skip("Tests repo was not present in expected location. Skipping.") 68 | return 69 | if any(name in input_file for name in ['top-level-inline-comment.txt', 'unescaped-multi-line-string.txt']): 70 | pytest.xfail("We make better error messages for these") 71 | if os.path.exists(expected): 72 | errorspec = load(open(expected, encoding='utf-8')) 73 | else: 74 | pytest.mark.skip("No error spec") 75 | return 76 | 77 | with pytest.raises(JSON5DecodeError) as exc_info: 78 | load(open(input_file, encoding='utf-8')) 79 | 80 | at = errorspec['at'] 81 | lineno = errorspec['lineNumber'] 82 | col = errorspec['columnNumber'] 83 | # msg = errorspec['message'] 84 | exc_message = str(exc_info.value) 85 | exc_lineno_match = re.search(r'line (\d+)', exc_message) 86 | if exc_lineno_match: 87 | exc_lineno = int(exc_lineno_match.groups()[0]) 88 | else: 89 | exc_lineno = None 90 | exc_col_match = re.search(r'column (\d+)', exc_message) 91 | if exc_col_match: 92 | exc_col = int(exc_col_match.groups()[0]) 93 | else: 94 | exc_col = None 95 | exc_index_match = re.search(r'char (\d+)', exc_message) 96 | if exc_index_match: 97 | exc_index = int(exc_index_match.groups()[0]) 98 | else: 99 | exc_index = None 100 | assert ErrorInfo(exc_lineno, exc_col, exc_index) == ErrorInfo(lineno, col, at - 1), f"{input_file} {exc_message}" 101 | -------------------------------------------------------------------------------- /tests/test_json_helpers.py: -------------------------------------------------------------------------------- 1 | from json5.dumper import modelize 2 | from json5.model import Identifier 3 | 4 | 5 | def test_identifier_can_hash_like_string(): 6 | d = {Identifier('foo', raw_value='foo'): 'bar'} 7 | assert d['foo'] == 'bar' 8 | 9 | 10 | def test_identifier_equals_like_string(): 11 | assert Identifier('foo', raw_value='foo') == 'foo' 12 | 13 | 14 | def test_repr_does_not_contain_wsc(): 15 | model = modelize({'foo': 'bar'}) 16 | assert 'wsc' not in repr(model) 17 | 18 | 19 | def test_identifier_does_not_need_explicit_raw_value(): 20 | assert Identifier('foo').raw_value == 'foo' 21 | -------------------------------------------------------------------------------- /tests/test_loads_options.py: -------------------------------------------------------------------------------- 1 | import json 2 | from decimal import Decimal 3 | 4 | import json5 5 | 6 | 7 | def int_plus_one(int_string): 8 | assert isinstance(int_string, str) 9 | return int(int_string) + 1 10 | 11 | 12 | def float_to_decimal(float_string): 13 | assert isinstance(float_string, str) 14 | return Decimal(float_string) 15 | 16 | 17 | def const_to_silly(const_string): 18 | assert isinstance(const_string, str) 19 | return f'Something Silly {const_string}' 20 | 21 | 22 | def true_object_hook(d): 23 | return {k: True for k in d} 24 | 25 | 26 | def true_object_pair_hook(kvpairs): 27 | return {k: True for k, v in kvpairs} 28 | 29 | 30 | def test_parse_int(): 31 | json_string = """{"foo": 5}""" 32 | assert json5.loads(json_string, parse_int=int_plus_one) == json.loads(json_string, parse_int=int_plus_one) 33 | assert json5.loads(json_string, parse_int=int_plus_one)['foo'] == 6 34 | 35 | 36 | def test_parse_float(): 37 | json_string = """{"foo": 5.0}""" 38 | assert json5.loads(json_string, parse_float=float_to_decimal) == json.loads( 39 | json_string, parse_float=float_to_decimal 40 | ) 41 | 42 | 43 | def test_parse_constant_nan(): 44 | json_string = """{"foo": NaN}""" 45 | assert json5.loads(json_string, parse_constant=const_to_silly) == {'foo': 'Something Silly NaN'} 46 | assert json5.loads(json_string, parse_constant=const_to_silly) == json.loads( 47 | json_string, parse_constant=const_to_silly 48 | ) 49 | 50 | 51 | def test_parse_constant_positive_infinity(): 52 | json_string = """{"foo": Infinity}""" 53 | assert json5.loads(json_string, parse_constant=const_to_silly) == {'foo': 'Something Silly Infinity'} 54 | assert json5.loads(json_string, parse_constant=const_to_silly) == json.loads( 55 | json_string, parse_constant=const_to_silly 56 | ) 57 | 58 | 59 | def test_parse_constant_negative_infinity(): 60 | json_string = """{"foo": -Infinity}""" 61 | assert json5.loads(json_string, parse_constant=const_to_silly) == {'foo': 'Something Silly -Infinity'} 62 | assert json5.loads(json_string, parse_constant=const_to_silly) == json.loads( 63 | json_string, parse_constant=const_to_silly 64 | ) 65 | 66 | 67 | def test_object_hook(): 68 | json_string = """{"foo": "bar", "bacon": "eggs"}""" 69 | result = json5.loads(json_string, object_hook=true_object_hook) 70 | assert result == json.loads(json_string, object_hook=true_object_hook) 71 | assert all(value is True for key, value in result.items()) 72 | 73 | 74 | def test_object_pairs_hook(): 75 | json_string = """{"foo": "bar", "bacon": "eggs"}""" 76 | result = json5.loads(json_string, object_pairs_hook=true_object_pair_hook) 77 | assert result == json.loads(json_string, object_pairs_hook=true_object_pair_hook) 78 | assert all(value is True for key, value in result.items()) 79 | -------------------------------------------------------------------------------- /tests/test_model.py: -------------------------------------------------------------------------------- 1 | import ast 2 | 3 | import pytest 4 | 5 | import json5.loader 6 | import json5.model 7 | 8 | TEST_TEXT = '''\ 9 | { 10 | "string_on_same_line": "string on same line", 11 | "multiline_dq_string": "this line has a \ 12 | continuation", 13 | "leadingDecimalPoint": .8675309 , 14 | "andTrailing": 8675309., 15 | "trailingComma": 'in objects', 16 | "backwardsCompatible": "with JSON", 17 | } 18 | ''' 19 | 20 | model = json5.loads(TEST_TEXT, loader=json5.loader.ModelLoader()) 21 | tree = ast.parse(TEST_TEXT) 22 | ast_nodes = [ 23 | node for node in list(ast.walk(tree)) if not isinstance(node, (ast.Expr, ast.Load, ast.Module, ast.UnaryOp)) 24 | ] 25 | json5_nodes = [ 26 | node 27 | for node in list(json5.model.walk(model)) 28 | if not isinstance(node, (json5.model.TrailingComma, json5.model.JSONText)) 29 | ] 30 | 31 | assert len(ast_nodes) == len(json5_nodes) 32 | 33 | 34 | @pytest.mark.parametrize('ast_node, json5_node', list(zip(ast_nodes, json5_nodes))) 35 | @pytest.mark.parametrize( 36 | 'attr_name', 37 | [ 38 | 'col_offset', 39 | 'end_col_offset', 40 | 'lineno', 41 | 'end_lineno', 42 | ], 43 | ) 44 | def test_node_attribute_accuracy(attr_name: str, ast_node, json5_node): 45 | assert getattr(json5_node, attr_name) == getattr( 46 | ast_node, attr_name 47 | ), f'{attr_name} did not match {ast_node!r}, {json5_node!r}' 48 | -------------------------------------------------------------------------------- /tests/test_model_loader_dumper.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from json5.dumper import dumps 4 | from json5.dumper import ModelDumper 5 | from json5.loader import loads 6 | from json5.loader import ModelLoader 7 | 8 | 9 | @pytest.mark.parametrize( 10 | 'json_string', 11 | [ 12 | """{"foo":"bar"}""", 13 | """{"foo": "bar"}""", 14 | """{"foo":"bar","bacon":"eggs"}""", 15 | """{"foo": "bar", "bacon" : "eggs"}""", 16 | """["foo","bar","baz"]""", 17 | """[ "foo", "bar" , "baz" ]""", 18 | """{"foo":\n "bar"\n}""", 19 | """{"foo": {"bacon": "eggs"}}""", 20 | """ {"foo":"bar"}""", 21 | """{"foo": "bar"} """, 22 | """{'foo': 'bar'}""", 23 | """{"foo": 'bar'}""", 24 | """{"foo": "bar",}""", 25 | """["foo","bar", "baz",]""", 26 | """["foo", "bar", "baz", ]""", 27 | """["foo", "bar", "baz" ,]""", 28 | """[["foo"], ["foo","bar"], "baz"]""", 29 | """{unquoted: "foo"}""", 30 | """{unquoted: "foo"}""", 31 | """["foo"]""", 32 | """["foo" , ]""", 33 | ], 34 | ) 35 | def test_round_trip_model_load_dump(json_string): 36 | assert dumps(loads(json_string, loader=ModelLoader()), dumper=ModelDumper()) == json_string 37 | 38 | 39 | def test_object_load_with_line_comment(): 40 | json_string = """{ // line comment 41 | "foo": "bar" 42 | }""" 43 | assert dumps(loads(json_string, loader=ModelLoader()), dumper=ModelDumper()) == json_string 44 | 45 | 46 | def test_object_with_multiline_comment(): 47 | json_string = """{ /* foo bar 48 | */ "foo": "bar" // Foobar 49 | }""" 50 | assert dumps(loads(json_string, loader=ModelLoader()), dumper=ModelDumper()) == json_string 51 | 52 | 53 | def test_array_load_with_line_comment(): 54 | json_string = """[ // line comment 55 | "foo", "bar" 56 | ]""" 57 | assert dumps(loads(json_string, loader=ModelLoader()), dumper=ModelDumper()) == json_string 58 | 59 | 60 | def test_array_with_multiline_comment(): 61 | json_string = """[ /* foo bar 62 | */ "foo", "bar" 63 | ]""" 64 | assert dumps(loads(json_string, loader=ModelLoader()), dumper=ModelDumper()) == json_string 65 | 66 | 67 | def test_nested_object(): 68 | json_string = """{"foo": {"bacon": "eggs"}}""" 69 | assert dumps(loads(json_string, loader=ModelLoader()), dumper=ModelDumper()) == json_string 70 | 71 | 72 | def test_single_quote_with_escape_single_quote(): 73 | json_string = r"""{'fo\'o': 'bar'}""" 74 | assert dumps(loads(json_string, loader=ModelLoader()), dumper=ModelDumper()) == json_string 75 | 76 | 77 | def test_double_quote_with_escape_double_quote(): 78 | json_string = r"""{"fo\"o": "bar"}""" 79 | assert dumps(loads(json_string, loader=ModelLoader()), dumper=ModelDumper()) == json_string 80 | 81 | 82 | def test_escape_sequence_strings(): 83 | json_string = r"""'\A\C\/\D\C'""" 84 | assert dumps(loads(json_string, loader=ModelLoader()), dumper=ModelDumper()) == json_string 85 | 86 | 87 | def test_line_continuations(): 88 | json_string = r"""'Hello \ 89 | world!'""" 90 | assert dumps(loads(json_string, loader=ModelLoader()), dumper=ModelDumper()) == json_string 91 | 92 | 93 | @pytest.mark.parametrize("terminator", ["\r\n", "\n", "\u2028", "\u2029"]) 94 | def test_line_continuations_alternate_terminators(terminator): 95 | json_string = f"""'Hello \\{terminator}world!'""" 96 | assert dumps(loads(json_string, loader=ModelLoader()), dumper=ModelDumper()) == json_string 97 | 98 | 99 | def test_number_literals_inf_nan(): 100 | json_string = """{ 101 | "positiveInfinity": Infinity, 102 | "negativeInfinity": -Infinity, 103 | "notANumber": NaN,}""" 104 | assert dumps(loads(json_string, loader=ModelLoader()), dumper=ModelDumper()) == json_string 105 | 106 | 107 | def test_number_literals(): 108 | json_string = """{ 109 | "integer": 123, 110 | "withFractionPart": 123.456, 111 | "onlyFractionPart": .456, 112 | "withExponent": 123e-2}""" 113 | assert dumps(loads(json_string, loader=ModelLoader()), dumper=ModelDumper()) == json_string 114 | 115 | 116 | def test_escape_sequences(): 117 | json_string = r"""{ 118 | "foo": "foo\nbar\nbaz", 119 | "bar": "foo\\bar\\baz", 120 | "baz": "foo\tbar\tbaz"}""" 121 | assert dumps(loads(json_string, loader=ModelLoader()), dumper=ModelDumper()) == json_string 122 | 123 | 124 | def test_empty_object(): 125 | json_string = "{}" 126 | assert dumps(loads(json_string, loader=ModelLoader()), dumper=ModelDumper()) == json_string 127 | 128 | 129 | def test_empty_array(): 130 | json_string = "[]" 131 | assert dumps(loads(json_string, loader=ModelLoader()), dumper=ModelDumper()) == json_string 132 | 133 | 134 | def test_hexadecimal_load(): 135 | json_string = """ 136 | { 137 | positiveHex: 0xdecaf, 138 | negativeHex: -0xC0FFEE,}""" 139 | assert dumps(loads(json_string, loader=ModelLoader()), dumper=ModelDumper()) == json_string 140 | 141 | 142 | def test_load_empty_array_with_whitespace(): 143 | json_string = "{ }" 144 | assert dumps(loads(json_string, loader=ModelLoader()), dumper=ModelDumper()) == json_string 145 | 146 | 147 | def test_load_empty_object_wtih_whitespace(): 148 | json_string = "[ ]" 149 | assert dumps(loads(json_string, loader=ModelLoader()), dumper=ModelDumper()) == json_string 150 | 151 | 152 | def test_load_empty_object_with_comments(): 153 | json_string = "{ // foo \n}" 154 | assert dumps(loads(json_string, loader=ModelLoader()), dumper=ModelDumper()) == json_string 155 | 156 | 157 | def test_load_empty_array_with_comments(): 158 | json_string = "[ // foo \n]" 159 | assert dumps(loads(json_string, loader=ModelLoader()), dumper=ModelDumper()) == json_string 160 | -------------------------------------------------------------------------------- /tests/test_modelizer.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import pytest 4 | 5 | from json5.dumper import dumps 6 | from json5.dumper import ModelDumper 7 | from json5.dumper import modelize 8 | from json5.loader import loads 9 | 10 | 11 | @pytest.mark.parametrize( 12 | 'obj', 13 | [ 14 | {'foo': 'bar', 'bacon': 'eggs'}, 15 | ['foo', 'bar', 'baz'], 16 | {}, 17 | [], 18 | ['foo'], 19 | {'foo': 'bar'}, 20 | "Hello world!", 21 | 123, 22 | 1.0, 23 | -1.0, 24 | -2, 25 | math.inf, 26 | -math.inf, 27 | True, 28 | False, 29 | None, 30 | ], 31 | ) 32 | def test_modelize_objects(obj): 33 | assert loads(dumps(modelize(obj), dumper=ModelDumper())) == obj 34 | 35 | 36 | def test_modelize_nan(): 37 | obj = math.nan 38 | assert loads(dumps(modelize(obj), dumper=ModelDumper())) is obj 39 | 40 | 41 | def test_modelize_double_quote_string(): 42 | s = "'" 43 | assert loads(dumps(modelize(s), dumper=ModelDumper())) == s 44 | -------------------------------------------------------------------------------- /tests/test_regressions.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from json5 import JSON5DecodeError 4 | from json5 import loads 5 | 6 | 7 | # These tests used to cause the program to hang indefinitely 8 | def test_no_hang(): 9 | json_string = '{"foo": ["foo", [0o11]}, ["baz"]]' 10 | with pytest.raises(JSON5DecodeError): 11 | loads(json_string) 12 | 13 | 14 | def test_no_hang2(): 15 | json_string = '[{foo:]}' 16 | with pytest.raises(JSON5DecodeError): 17 | loads(json_string) 18 | 19 | 20 | def test_no_hang3(): 21 | json_string = '[true, {foo:]false}' 22 | with pytest.raises(JSON5DecodeError): 23 | loads(json_string) 24 | -------------------------------------------------------------------------------- /tests/test_roundtrip.py: -------------------------------------------------------------------------------- 1 | # from json5.loader import RoundTripLoader, loads 2 | # from json5.dumper import dumps, RoundTripDumper 3 | # 4 | # 5 | # def test_load_string(): 6 | # json_string = """{"foo":"bar"}""" 7 | # data = loads(json_string, loader=RoundTripLoader()) 8 | # assert data['foo'] == 'bar' 9 | # 10 | # 11 | # def test_load_change_dump_string(): 12 | # json_string = """{"foo": "bar"}""" 13 | # data = loads(json_string, loader=RoundTripLoader()) 14 | # data['foo'] = 'baz' 15 | # new_json_string = dumps(data, dumper=RoundTripDumper()) 16 | # assert 'baz' in new_json_string 17 | # assert new_json_string == """{"foo": "baz"}""" 18 | # 19 | # 20 | # def test_load_change_whitespace_dump_string(): 21 | # json_string = """["foo" ]""" 22 | # data = loads(json_string, loader=RoundTripLoader()) 23 | # elem = data[0] 24 | # elem.wsc_after = [] 25 | # new_json_string = dumps(data, dumper=RoundTripDumper()) 26 | # assert new_json_string == """["foo"]""" 27 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = py38,py39,py310,py311 3 | 4 | [testenv] 5 | deps = -rrequirements-dev.txt 6 | passenv = 7 | CI 8 | PYTHONUNBUFFERED 9 | commands = 10 | coverage run -m pytest -s -vvv 11 | mypy --strict --disable-error-code name-defined json5 12 | --------------------------------------------------------------------------------