├── .github ├── CODEOWNERS └── workflows │ └── tests.yaml ├── .gitignore ├── CHANGES.md ├── LICENSE ├── MANIFEST.in ├── README.rst ├── pyproject.toml ├── releasing.md ├── setup.cfg ├── src └── canonicaljson │ ├── __init__.py │ └── py.typed ├── tests ├── __init__.py └── test_canonicaljson.py └── tox.ini /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | # Automatically request reviews from the synapse-core team when a pull request comes in. 2 | * @matrix-org/synapse-core 3 | -------------------------------------------------------------------------------- /.github/workflows/tests.yaml: -------------------------------------------------------------------------------- 1 | name: Tests 2 | on: 3 | push: 4 | branches: ["main"] 5 | pull_request: 6 | 7 | concurrency: 8 | group: ${{ github.workflow }}-${{ github.ref }} 9 | cancel-in-progress: true 10 | 11 | jobs: 12 | lint: 13 | runs-on: ubuntu-latest 14 | strategy: 15 | matrix: 16 | toxenv: 17 | - "pep8" 18 | - "isort" 19 | - "black" 20 | - "mypy" 21 | 22 | steps: 23 | - uses: actions/checkout@v2 24 | - uses: actions/setup-python@v2 25 | with: 26 | python-version: "3.7" 27 | - run: pip install tox 28 | - run: tox -e ${{ matrix.toxenv }} 29 | 30 | unittest: 31 | runs-on: ubuntu-latest 32 | strategy: 33 | matrix: 34 | python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "pypy-3.7"] 35 | 36 | steps: 37 | - uses: actions/checkout@v2 38 | - uses: actions/setup-python@v2 39 | with: 40 | python-version: ${{ matrix.python-version }} 41 | - run: pip install tox 42 | - run: tox -e py 43 | 44 | packaging: 45 | runs-on: ${{ matrix.os }} 46 | strategy: 47 | matrix: 48 | os: [ubuntu-latest, macos-latest] 49 | 50 | steps: 51 | - uses: actions/checkout@v2 52 | - uses: actions/setup-python@v2 53 | - run: python -m pip install tox 54 | - run: tox -e packaging 55 | 56 | macos-tests: 57 | runs-on: macos-latest 58 | strategy: 59 | matrix: 60 | python-version: ["3.8", "3.9", "3.10", "3.11"] 61 | 62 | steps: 63 | - uses: actions/checkout@v2 64 | - uses: actions/setup-python@v2 65 | with: 66 | python-version: ${{ matrix.python-version }} 67 | - run: pip install tox 68 | - run: tox -e py 69 | 70 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /.coverage 2 | /.idea/ 3 | /.tox/ 4 | __pycache__/ 5 | *.egg-info/ 6 | *.pyc 7 | /build/ 8 | /dist/ 9 | .python-version 10 | -------------------------------------------------------------------------------- /CHANGES.md: -------------------------------------------------------------------------------- 1 | Version 2.0.0 released 2023-03-15 2 | 3 | Additions: 4 | 5 | * Add a generic `register_preserialisation_callback` mechanism, which 6 | allows users to teach canonicaljson how to JSON-encode custom types. 7 | 8 | Breaking changes: 9 | 10 | * Remove support for serialising `frozendict` instances. Use the new 11 | `register_preserialisation_callback` mechanism to replace this 12 | functionality if needed. 13 | * Remove support for `simplejson` and the `set_json_library`alternative 14 | json libraries. 15 | 16 | Version 1.6.5 released 2023-02-15 17 | 18 | * Update type hints to pass under mypy 1.0. 19 | 20 | Version 1.6.4 released 2022-11-04 21 | 22 | * Remove unused `setuptools_scm` build requirement. 23 | 24 | Version 1.6.3 released 2022-09-23 25 | 26 | * Properly package the `canonicaljson` module. 27 | 28 | Version 1.6.2 released 2022-06-08 29 | 30 | * Bump version to 1.6.2 31 | 32 | The v1.6.1 tag was created without having bumped the version in 33 | `canonicaljson.py`. This means that installing from source at the `v1.6.1` tag will 34 | install the package with the wrong version in its metadata. The 1.6.1 artefacts 35 | uploaded to PyPI did have the correct version; `pip install`ing 36 | directly from the archive resulted in a consistent version number. 37 | 38 | Version 1.6.1 released 2022-05-02 39 | 40 | * Add type annotations. 41 | 42 | Version 1.6.0 released 2022-03-04 43 | 44 | * `frozendict` is now an optional dependency; it is no longer required. 45 | 46 | Version 1.5.0 released 2021-10-20 47 | 48 | * Switch CI from Travis to Github Actions 49 | * Add code to handle frozendict implementations using c-extension 50 | * Add tests for Python 3.10 51 | * Remove outdated Debian packaging code 52 | 53 | Version 1.4.0 released 2020-09-03 54 | 55 | * Fix producing non-standard JSON for Infinity, -Infinity, and NaN. This could 56 | cause errors when encoding objects into canonical JSON that previously used to 57 | work, but were incompatible with JSON implementations in other languages. 58 | * Use UTF-8 to fix ASCII encoding errors when data containing Unicode was 59 | attempted to be pretty-printed. 60 | 61 | Version 1.3.0 released 2020-08-14 62 | 63 | * The minimum version of simplejson was bumped to 3.14.0. 64 | * Obsolete workaround for slow encoding of Unicode characters was removed. 65 | * New APIs were added to iteratively encode JSON. 66 | 67 | Version 1.2.0 released 2020-07-27 68 | 69 | * JSON from the standard library is used automatically on PyPy. 70 | * Support for Python versions which are end-of-lifed was dropped, Python >= 3.5 71 | is supported and tested in continuous integration. 72 | * An API to configure the underlying JSON library was added (`set_json_library`). 73 | 74 | Version 1.1.4 released 2018-05-23 75 | 76 | * Fix error when encoding non-BMP characters on UCS-2 python builds 77 | (fixes issue #12). 78 | 79 | Version 1.1.3 released 2018-04-13 80 | 81 | * Bump depencency on frozendict to >=1.0, to fix conflicts with older 82 | versions. 83 | 84 | Version 1.1.2 released 2018-04-12 85 | 86 | * Fix escaping of control characters U+0000 to U+001F AGAIN, which was STILL 87 | broken in the previous release 88 | 89 | Version 1.1.1 released 2018-04-11 90 | 91 | * Fix escaping of control characters U+0000 to U+001F, which was broken in 92 | the previous release 93 | 94 | Version 1.1.0 released 2018-04-06 95 | 96 | * Significant performance improvements 97 | ([\#7](https://github.com/matrix-org/python-canonicaljson/pull/7), 98 | [\#8](https://github.com/matrix-org/python-canonicaljson/pull/8), 99 | [\#9](https://github.com/matrix-org/python-canonicaljson/pull/9)) 100 | 101 | Version 1.0.0 released 2015-08-21 102 | 103 | * Initial release 104 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | prune .github 2 | include *.md 3 | include tox.ini 4 | recursive-include tests *.py -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | Canonical JSON 2 | ============== 3 | 4 | .. image:: https://img.shields.io/pypi/v/canonicaljson.svg 5 | :target: https://pypi.python.org/pypi/canonicaljson/ 6 | :alt: Latest Version 7 | 8 | Features 9 | -------- 10 | 11 | * Encodes objects and arrays as `RFC 7159`_ JSON. 12 | * Sorts object keys so that you get the same result each time. 13 | * Has no insignificant whitespace to make the output as small as possible. 14 | * Escapes only the characters that must be escaped, U+0000 to U+0019 / U+0022 / 15 | U+0056, to keep the output as small as possible. 16 | * Uses the shortest escape sequence for each escaped character. 17 | * Encodes the JSON as UTF-8. 18 | * Can be configured to encode custom types unknown to the stdlib JSON encoder. 19 | 20 | Supports Python versions 3.7 and newer. 21 | 22 | .. _`RFC 7159`: https://tools.ietf.org/html/rfc7159 23 | 24 | Installing 25 | ---------- 26 | 27 | .. code:: bash 28 | 29 | pip install canonicaljson 30 | 31 | Using 32 | ----- 33 | 34 | To encode an object into the canonicaljson: 35 | 36 | .. code:: python 37 | 38 | import canonicaljson 39 | assert canonicaljson.encode_canonical_json({}) == b'{}' 40 | 41 | There's also an iterator version: 42 | 43 | .. code:: python 44 | 45 | import canonicaljson 46 | assert b''.join(canonicaljson.iterencode_canonical_json({})) == b'{}' 47 | 48 | A preserialisation hook allows you to encode objects which aren't encodable by the 49 | standard library ``JSONEncoder``. 50 | 51 | .. code:: python 52 | 53 | import canonicaljson 54 | from typing import Dict 55 | 56 | class CustomType: 57 | pass 58 | 59 | def callback(c: CustomType) -> Dict[str, str]: 60 | return {"Hello": "world!"} 61 | 62 | canonicaljson.register_preserialisation_callback(CustomType, callback) 63 | assert canonicaljson.encode_canonical_json(CustomType()) == b'{"Hello":"world!"}' 64 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.mypy] 2 | show_error_codes = true 3 | strict = true 4 | 5 | files = ["."] 6 | 7 | [build-system] 8 | requires = [ 9 | "setuptools >= 35.0.2", 10 | ] 11 | build-backend = "setuptools.build_meta" 12 | -------------------------------------------------------------------------------- /releasing.md: -------------------------------------------------------------------------------- 1 | Releasing python-canonicaljson 2 | ============================== 3 | 4 | * bump version in `src/canonicaljson/__init__.py` 5 | * update changelog 6 | * Build and upload to pypi: 7 | * `rm -r ./**/*.egg-info` 8 | * `rm -r dist` 9 | * `python -m build` 10 | * `twine upload -s dist/*` 11 | * `git tag -s v` 12 | * `git push` 13 | * `git push --tags` -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | name = canonicaljson 3 | description = Canonical JSON 4 | long_description = file: README.rst 5 | long_description_content_type = text/x-rst 6 | version = attr: canonicaljson.__version__ 7 | keywords = json 8 | url = https://github.com/matrix-org/python-canonicaljson 9 | license = Apache License, Version 2.0 10 | author = Matrix.org Team and Contributors 11 | author_email = packages@matrix.org 12 | license_file = LICENSE 13 | classifiers = 14 | Development Status :: 5 - Production/Stable 15 | Intended Audience :: Developers 16 | License :: OSI Approved :: Apache Software License 17 | Programming Language :: Python :: 3 18 | 19 | 20 | [options] 21 | zip_safe = true 22 | python_requires = >=3.7 23 | 24 | package_dir = =src 25 | packages = 26 | canonicaljson 27 | 28 | [options.package_data] 29 | canonicaljson = py.typed 30 | 31 | [flake8] 32 | # see https://pycodestyle.readthedocs.io/en/latest/intro.html#error-codes 33 | # for error codes. The ones we ignore are: 34 | # E501: Line too long (black enforces this for us) 35 | ignore=E501 36 | 37 | [isort] 38 | profile=black 39 | src_paths=src,tests 40 | -------------------------------------------------------------------------------- /src/canonicaljson/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2014 OpenMarket Ltd 2 | # Copyright 2018 New Vector Ltd 3 | # Copyright 2022 The Matrix.org Foundation C.I.C. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | import functools 17 | import json 18 | from typing import Callable, Generator, Type, TypeVar 19 | 20 | __version__ = "2.0.0" 21 | 22 | 23 | @functools.singledispatch 24 | def _preprocess_for_serialisation(obj: object) -> object: # pragma: no cover 25 | """Transform an `obj` into something the JSON library knows how to encode. 26 | 27 | This is only called for types that the JSON library does not recognise. 28 | """ 29 | raise TypeError( 30 | "Object of type %s is not JSON serializable" % obj.__class__.__name__ 31 | ) 32 | 33 | 34 | T = TypeVar("T") 35 | 36 | 37 | def register_preserialisation_callback( 38 | data_type: Type[T], callback: Callable[[T], object] 39 | ) -> None: 40 | """ 41 | Register a `callback` to preprocess `data_type` objects unknown to the JSON encoder. 42 | 43 | When canonicaljson encodes an object `x` at runtime that its JSON library does not 44 | know how to encode, it will 45 | - select a `callback`, 46 | - compute `y = callback(x)`, then 47 | - JSON-encode `y` and return the result. 48 | 49 | The `callback` should return an object that is JSON-serialisable by the stdlib 50 | json module. 51 | 52 | If this is called multiple times with the same `data_type`, the most recently 53 | registered callback is used when serialising that `data_type`. 54 | """ 55 | if data_type is object: 56 | raise ValueError("Cannot register callback for the `object` type") 57 | _preprocess_for_serialisation.register(data_type, callback) 58 | 59 | 60 | # Declare these once for re-use. 61 | _canonical_encoder = json.JSONEncoder( 62 | ensure_ascii=False, 63 | allow_nan=False, 64 | separators=(",", ":"), 65 | sort_keys=True, 66 | default=_preprocess_for_serialisation, 67 | ) 68 | _pretty_encoder = json.JSONEncoder( 69 | ensure_ascii=False, 70 | allow_nan=False, 71 | indent=4, 72 | sort_keys=True, 73 | default=_preprocess_for_serialisation, 74 | ) 75 | 76 | 77 | def encode_canonical_json(data: object) -> bytes: 78 | """Encodes the given `data` as a UTF-8 canonical JSON bytestring. 79 | 80 | This encoding is the shortest possible. Dictionary keys are 81 | lexicographically sorted by unicode code point. 82 | """ 83 | s = _canonical_encoder.encode(data) 84 | return s.encode("utf-8") 85 | 86 | 87 | def iterencode_canonical_json(data: object) -> Generator[bytes, None, None]: 88 | """Iteratively encodes the given `data` as a UTF-8 canonical JSON bytestring. 89 | 90 | This yields one or more bytestrings; concatenating them all together yields the 91 | full encoding of `data`. Building up the encoding gradually in this way allows us to 92 | encode large pieces of `data` without blocking other tasks. 93 | 94 | This encoding is the shortest possible. Dictionary keys are 95 | lexicographically sorted by unicode code point. 96 | """ 97 | for chunk in _canonical_encoder.iterencode(data): 98 | yield chunk.encode("utf-8") 99 | 100 | 101 | def encode_pretty_printed_json(data: object) -> bytes: 102 | """Encodes the given `data` as a UTF-8 human-readable JSON bytestring.""" 103 | 104 | return _pretty_encoder.encode(data).encode("utf-8") 105 | 106 | 107 | def iterencode_pretty_printed_json(data: object) -> Generator[bytes, None, None]: 108 | """Iteratively encodes the given `data` as a UTF-8 human-readable JSON bytestring. 109 | 110 | This yields one or more bytestrings; concatenating them all together yields the 111 | full encoding of `data`. Building up the encoding gradually in this way allows us to 112 | encode large pieces of `data` without blocking other tasks. 113 | """ 114 | for chunk in _pretty_encoder.iterencode(data): 115 | yield chunk.encode("utf-8") 116 | -------------------------------------------------------------------------------- /src/canonicaljson/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matrix-org/python-canonicaljson/f7926e82d2c65199e5c99b54544ba17507dc63b4/src/canonicaljson/py.typed -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 The Matrix.org Foundation C.I.C. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /tests/test_canonicaljson.py: -------------------------------------------------------------------------------- 1 | # Copyright 2015 OpenMarket Ltd 2 | # Copyright 2018 New Vector Ltd 3 | # Copyright 2022 The Matrix.org Foundation C.I.C. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | import unittest 17 | from math import inf, nan 18 | from unittest.mock import Mock 19 | 20 | from canonicaljson import ( 21 | encode_canonical_json, 22 | encode_pretty_printed_json, 23 | iterencode_canonical_json, 24 | iterencode_pretty_printed_json, 25 | register_preserialisation_callback, 26 | ) 27 | 28 | 29 | class TestCanonicalJson(unittest.TestCase): 30 | def test_encode_canonical(self) -> None: 31 | self.assertEqual(encode_canonical_json({}), b"{}") 32 | 33 | # ctrl-chars should be encoded. 34 | self.assertEqual( 35 | encode_canonical_json("text\u0003\r\n"), 36 | b'"text\\u0003\\r\\n"', 37 | ) 38 | 39 | # quotes and backslashes should be escaped. 40 | self.assertEqual( 41 | encode_canonical_json(r'"\ test'), 42 | b'"\\"\\\\ test"', 43 | ) 44 | 45 | # non-ascii should come out utf8-encoded. 46 | self.assertEqual( 47 | encode_canonical_json({"la merde amusée": "💩"}), 48 | b'{"la merde amus\xc3\xa9e":"\xF0\x9F\x92\xA9"}', 49 | ) 50 | 51 | # so should U+2028 and U+2029 52 | self.assertEqual( 53 | encode_canonical_json({"spaces": "\u2028 \u2029"}), 54 | b'{"spaces":"\xe2\x80\xa8 \xe2\x80\xa9"}', 55 | ) 56 | 57 | # but we need to watch out for 'u1234' after backslash, which should 58 | # get encoded to an escaped backslash, followed by u1234 59 | self.assertEqual( 60 | encode_canonical_json("\\u1234"), 61 | b'"\\\\u1234"', 62 | ) 63 | 64 | # Iteratively encoding should work. 65 | self.assertEqual(list(iterencode_canonical_json({})), [b"{}"]) 66 | 67 | def test_ascii(self) -> None: 68 | """ 69 | Ensure the proper ASCII characters are escaped. 70 | 71 | See https://matrix.org/docs/spec/appendices#grammar. 72 | """ 73 | # Some characters go to their common shorthands. 74 | escaped = { 75 | 0x08: b'"\\b"', 76 | 0x09: b'"\\t"', 77 | 0x0A: b'"\\n"', 78 | 0x0C: b'"\\f"', 79 | 0x0D: b'"\\r"', 80 | 0x22: b'"\\""', 81 | 0x5C: b'"\\\\"', 82 | } 83 | for c, expected in escaped.items(): 84 | self.assertEqual(encode_canonical_json(chr(c)), expected) 85 | 86 | # Others go to the \uXXXX. 87 | hex_escaped = list(range(0x08)) + [0x0B] + list(range(0x0E, 0x20)) 88 | for c in hex_escaped: 89 | self.assertEqual(encode_canonical_json(chr(c)), b'"\\u00%02x"' % (c,)) 90 | 91 | # And other characters are passed unescaped. 92 | unescaped = [0x20, 0x21] + list(range(0x23, 0x5C)) + list(range(0x5D, 0x7E)) 93 | for c in unescaped: 94 | s = chr(c) 95 | self.assertEqual(encode_canonical_json(s), b'"' + s.encode("ascii") + b'"') 96 | 97 | def test_encode_pretty_printed(self) -> None: 98 | self.assertEqual(encode_pretty_printed_json({}), b"{}") 99 | self.assertEqual(list(iterencode_pretty_printed_json({})), [b"{}"]) 100 | 101 | # non-ascii should come out utf8-encoded. 102 | self.assertEqual( 103 | encode_pretty_printed_json({"la merde amusée": "💩"}), 104 | b'{\n "la merde amus\xc3\xa9e": "\xF0\x9F\x92\xA9"\n}', 105 | ) 106 | 107 | def test_unknown_type(self) -> None: 108 | class Unknown(object): 109 | pass 110 | 111 | unknown_object = Unknown() 112 | with self.assertRaises(Exception): 113 | encode_canonical_json(unknown_object) 114 | 115 | with self.assertRaises(Exception): 116 | encode_pretty_printed_json(unknown_object) 117 | 118 | def test_invalid_float_values(self) -> None: 119 | """Infinity/-Infinity/NaN are not allowed in canonicaljson.""" 120 | 121 | with self.assertRaises(ValueError): 122 | encode_canonical_json(inf) 123 | 124 | with self.assertRaises(ValueError): 125 | encode_pretty_printed_json(inf) 126 | 127 | with self.assertRaises(ValueError): 128 | encode_canonical_json(-inf) 129 | 130 | with self.assertRaises(ValueError): 131 | encode_pretty_printed_json(-inf) 132 | 133 | with self.assertRaises(ValueError): 134 | encode_canonical_json(nan) 135 | 136 | with self.assertRaises(ValueError): 137 | encode_pretty_printed_json(nan) 138 | 139 | def test_encode_unknown_class_raises(self) -> None: 140 | class C: 141 | pass 142 | 143 | with self.assertRaises(Exception): 144 | encode_canonical_json(C()) 145 | 146 | def test_preserialisation_callback(self) -> None: 147 | class C: 148 | pass 149 | 150 | # Naughty: this alters the global state of the module. However this 151 | # `C` class is limited to this test only, so this shouldn't affect 152 | # other types and other tests. 153 | register_preserialisation_callback(C, lambda c: "I am a C instance") 154 | 155 | result = encode_canonical_json(C()) 156 | self.assertEqual(result, b'"I am a C instance"') 157 | 158 | def test_cannot_register_preserialisation_callback_for_object(self) -> None: 159 | with self.assertRaises(Exception): 160 | register_preserialisation_callback( 161 | object, lambda c: "shouldn't be able to do this" 162 | ) 163 | 164 | def test_most_recent_preserialisation_callback_called(self) -> None: 165 | class C: 166 | pass 167 | 168 | callback1 = Mock(return_value="callback 1 was called") 169 | callback2 = Mock(return_value="callback 2 was called") 170 | 171 | # Naughty: this alters the global state of the module. However this 172 | # `C` class is limited to this test only, so this shouldn't affect 173 | # other types and other tests. 174 | register_preserialisation_callback(C, callback1) 175 | register_preserialisation_callback(C, callback2) 176 | 177 | encode_canonical_json(C()) 178 | 179 | callback1.assert_not_called() 180 | callback2.assert_called_once() 181 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = packaging, pep8, black, py37, py38, py39, py310, pypy3, mypy, isort 3 | isolated_build = True 4 | 5 | [testenv:py] 6 | deps = 7 | coverage 8 | 9 | commands = 10 | coverage run --source canonicaljson -m unittest 11 | coverage report -m --fail-under 100 12 | 13 | [testenv:packaging] 14 | deps = 15 | check-manifest 16 | commands = check-manifest 17 | 18 | [testenv:pep8] 19 | basepython = python3.7 20 | deps = 21 | flake8 22 | commands = flake8 src tests 23 | 24 | [testenv:isort] 25 | basepython = python3.7 26 | deps = 27 | isort 28 | commands = isort --check src tests 29 | 30 | [testenv:black] 31 | basepython = python3.7 32 | deps = 33 | black==23.1.0 34 | commands = python -m black --check --diff src tests 35 | 36 | [testenv:mypy] 37 | deps = 38 | mypy==1.0 39 | types-setuptools==57.4.14 40 | commands = mypy src tests 41 | --------------------------------------------------------------------------------