├── .codecov.yml ├── .gitattributes ├── .github ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── ISSUE_TEMPLATE │ ├── bug.yml │ ├── config.yml │ ├── feature.yml │ └── question.yml ├── SECURITY.md └── workflows │ ├── ci.yml │ └── docs.yml ├── .gitignore ├── .pre-commit-config.yaml ├── LICENSE ├── MANIFEST.in ├── README.md ├── benchmarks ├── __init__.py ├── bench_encodings.py ├── bench_gc.py ├── bench_large_json.py ├── bench_library_size.py ├── bench_structs.py ├── bench_validation │ ├── __init__.py │ ├── __main__.py │ ├── bench_cattrs.py │ ├── bench_mashumaro.py │ ├── bench_msgspec.py │ ├── bench_pydantic.py │ └── runner.py └── generate_data.py ├── docs ├── Makefile ├── make.bat └── source │ ├── _static │ ├── anywidget.png │ ├── bench-1.png │ ├── bench-1.svg │ ├── bench-validation.svg │ ├── converters-dark.svg │ ├── converters-light.svg │ ├── custom.css │ ├── edgedb.svg │ ├── esmerald.png │ ├── litestar.png │ ├── mosec.png │ ├── msgspec-logo-dark.svg │ ├── msgspec-logo-light.svg │ ├── nautilus-trader.png │ ├── pioreactor.png │ ├── sanic.png │ └── zero.png │ ├── _templates │ └── help.html │ ├── api.rst │ ├── benchmarks.rst │ ├── changelog.rst │ ├── conf.py │ ├── constraints.rst │ ├── converters.rst │ ├── examples │ ├── asyncio-kv.rst │ ├── conda-repodata.rst │ ├── edgedb.rst │ ├── geojson.rst │ ├── index.rst │ └── pyproject-toml.rst │ ├── extending.rst │ ├── index.rst │ ├── inspect.rst │ ├── install.rst │ ├── jsonschema.rst │ ├── perf-tips.rst │ ├── schema-evolution.rst │ ├── structs.rst │ ├── supported-types.rst │ ├── usage.rst │ └── why.rst ├── examples ├── asyncio-kv │ └── kv.py ├── conda-repodata │ └── query_repodata.py ├── edgedb │ ├── dbschema │ │ ├── default.esdl │ │ └── migrations │ │ │ └── 00001.edgeql │ ├── edgedb.toml │ └── insert_data.edgeql ├── geojson │ ├── canada.json │ └── msgspec_geojson.py └── pyproject-toml │ └── pyproject.py ├── msgspec ├── __init__.py ├── __init__.pyi ├── _core.c ├── _json_schema.py ├── _utils.py ├── _version.py ├── atof.h ├── atof_consts.h ├── common.h ├── inspect.py ├── itoa.h ├── json.py ├── json.pyi ├── msgpack.py ├── msgpack.pyi ├── py.typed ├── ryu.h ├── structs.py ├── structs.pyi ├── toml.py └── yaml.py ├── pyproject.toml ├── scripts └── generate_atof_consts.py ├── setup.cfg ├── setup.py ├── tests ├── basic_typing_examples.py ├── conftest.py ├── test_JSONTestSuite.py ├── test_common.py ├── test_constraints.py ├── test_convert.py ├── test_cpylint.py ├── test_inspect.py ├── test_integration.py ├── test_json.py ├── test_msgpack.py ├── test_mypy.py ├── test_performance.py ├── test_pyright.py ├── test_raw.py ├── test_schema.py ├── test_struct.py ├── test_to_builtins.py ├── test_toml.py ├── test_utils.py ├── test_yaml.py └── utils.py └── versioneer.py /.codecov.yml: -------------------------------------------------------------------------------- 1 | comment: false 2 | 3 | coverage: 4 | status: 5 | project: 6 | default: 7 | target: auto 8 | threshold: 1% 9 | patch: off 10 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | msgspec/_version.py export-subst 2 | -------------------------------------------------------------------------------- /.github/CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as 6 | contributors and maintainers pledge to making participation in our project and 7 | our community a harassment-free experience for everyone, regardless of age, body 8 | size, disability, ethnicity, gender identity and expression, level of experience, 9 | nationality, personal appearance, race, religion, or sexual identity and 10 | orientation. 11 | 12 | ## Our Standards 13 | 14 | Examples of behavior that contributes to creating a positive environment 15 | include: 16 | 17 | * Using welcoming and inclusive language 18 | * Being respectful of differing viewpoints and experiences 19 | * Gracefully accepting constructive criticism 20 | * Focusing on what is best for the community 21 | * Showing empathy towards other community members 22 | 23 | Examples of unacceptable behavior by participants include: 24 | 25 | * The use of sexualized language or imagery and unwelcome sexual attention or 26 | advances 27 | * Trolling, insulting/derogatory comments, and personal or political attacks 28 | * Public or private harassment 29 | * Publishing others' private information, such as a physical or electronic 30 | address, without explicit permission 31 | * Other conduct which could reasonably be considered inappropriate in a 32 | professional setting 33 | 34 | ## Our Responsibilities 35 | 36 | Project maintainers are responsible for clarifying the standards of acceptable 37 | behavior and are expected to take appropriate and fair corrective action in 38 | response to any instances of unacceptable behavior. 39 | 40 | Project maintainers have the right and responsibility to remove, edit, or 41 | reject comments, commits, code, wiki edits, issues, and other contributions 42 | that are not aligned to this Code of Conduct, or to ban temporarily or 43 | permanently any contributor for other behaviors that they deem inappropriate, 44 | threatening, offensive, or harmful. 45 | 46 | ## Scope 47 | 48 | This Code of Conduct applies both within project spaces and in public spaces 49 | when an individual is representing the project or its community. Examples of 50 | representing a project or community include using an official project e-mail 51 | address, posting via an official social media account, or acting as an appointed 52 | representative at an online or offline event. Representation of a project may be 53 | further defined and clarified by project maintainers. 54 | 55 | ## Enforcement 56 | 57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 58 | reported by contacting the project team at jcristharif@gmail.com. All 59 | complaints will be reviewed and investigated and will result in a response that 60 | is deemed necessary and appropriate to the circumstances. The project team is 61 | obligated to maintain confidentiality with regard to the reporter of an incident. 62 | Further details of specific enforcement policies may be posted separately. 63 | 64 | Project maintainers who do not follow or enforce the Code of Conduct in good 65 | faith may face temporary or permanent repercussions as determined by other 66 | members of the project's leadership. 67 | 68 | ## Attribution 69 | 70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, 71 | available at [http://contributor-covenant.org/version/1/4][version] 72 | 73 | [homepage]: http://contributor-covenant.org 74 | [version]: http://contributor-covenant.org/version/1/4/ 75 | -------------------------------------------------------------------------------- /.github/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # How to Contribute 2 | 3 | Thank you for taking the time to contribute to `msgspec`! 4 | 5 | Here we document some contribution guidelines to help you ensure that your 6 | contribution is at its best. 7 | 8 | ## Setting up your Development Environment 9 | 10 | Before getting started, you will need to already have installed: 11 | 12 | - Python (3.8+ only), with development headers installed 13 | - A C compiler (`gcc`, `clang`, and `msvc` are all tested) 14 | - `git` 15 | 16 | Once you have those installed, you're ready to: 17 | 18 | - Clone the repository 19 | - Install all development dependencies 20 | - Build a development version of `msgspec` 21 | - Install the `pre-commit` hooks 22 | 23 | ```bash 24 | # Clone the repository 25 | git clone https://github.com/jcrist/msgspec.git 26 | 27 | # cd into the repo root directory 28 | cd msgspec/ 29 | 30 | # Build and install msgspec & all dev dependencies 31 | pip install -e ".[dev]" 32 | 33 | # Install the pre-commit hooks 34 | pre-commit install 35 | ``` 36 | 37 | ## Editing and Rebuilding 38 | 39 | You now have a "development" build of `msgspec` installed. This means that you 40 | can make changes to the `.py` files and test them without requiring a rebuild 41 | of msgspec's C extension. Edit away! 42 | 43 | If you do make changes to a `.c` file, you'll need to recompile. You can do 44 | this by running 45 | 46 | ```bash 47 | pip install -e . 48 | ``` 49 | 50 | By default `msgspec` is built in release mode, with optimizations enabled. To 51 | build a debug build instead (for use with e.g. `gdb` or `lldb`) define the 52 | `MSGSPEC_DEBUG` environment variable before building. 53 | 54 | ```bash 55 | MSGSPEC_DEBUG=1 pip install -e . 56 | ``` 57 | 58 | ## Testing 59 | 60 | Tests are located in the `tests/` directory. Any code changes should include 61 | additional tests to ensure correctness. The tests are broken into various 62 | `test_*.py` files specific to the functionality that they're testing. 63 | 64 | The tests can be run using `pytest` as follows: 65 | 66 | ```bash 67 | pytest 68 | ``` 69 | 70 | If you want to run a specific test file, you may specify that file explicitly: 71 | 72 | ```bash 73 | pytest tests/test_json.py 74 | ``` 75 | 76 | ## Linting 77 | 78 | We use `pre-commit` to automatically run a few code linters before every 79 | commit. If you followed the development setup above, you should already have 80 | `pre-commit` and all the commit hooks installed. 81 | 82 | These hooks will run whenever you try to commit changes. 83 | 84 | ```bash 85 | git commit # linters will run automatically here 86 | ``` 87 | 88 | If you wish to run the linters manually without committing, you can run: 89 | 90 | ```bash 91 | pre-commit run 92 | ``` 93 | 94 | ## Documentation 95 | 96 | The source of the documentation can be found under `docs/source/`. They are 97 | built using `Sphinx` and can be built locally by running the following steps: 98 | 99 | ```bash 100 | cd docs/ # Make sure we are in the docs/ folder 101 | 102 | make html # Build the html 103 | 104 | # Output can now be found under docs/build/html and can be viewed in the browser 105 | ``` 106 | 107 | ## Continuous Integration (CI) 108 | 109 | We use GitHub Actions to provide "continuous integration" testing for all Pull 110 | Requests (PRs). When submitting a PR, please check to see that all tests pass, 111 | and fix any issues that come up. 112 | 113 | ## Code of Conduct 114 | 115 | ``msgspec`` has a code of conduct that must be followed by all contributors to 116 | the project. You may read the code of conduct 117 | [here](https://github.com/jcrist/msgspec/blob/main/CODE_OF_CONDUCT.md). 118 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug.yml: -------------------------------------------------------------------------------- 1 | name: 🪲 Bug Report 2 | description: Report a bug or unexpected behavior in msgspec 3 | body: 4 | - type: markdown 5 | attributes: 6 | value: Thanks for taking the time to fill out a bug report! 7 | 8 | - type: textarea 9 | id: description 10 | attributes: 11 | label: Description 12 | description: > 13 | Describe the bug. What happened? What did you expect to happen? 14 | 15 | 16 | When possible, please also include a [minimal, complete, verifiable 17 | example](https://stackoverflow.com/help/minimal-reproducible-example). 18 | Ideally this should be code that can be run without modification to 19 | demonstrate the problem. 20 | 21 | 22 | When including errors and tracebacks, please include the _full 23 | traceback_ as well as the code that generated the error (or at least 24 | the line that caused it). 25 | validations: 26 | required: true 27 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: true 2 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature.yml: -------------------------------------------------------------------------------- 1 | name: 🙌 Feature Request 2 | description: Suggest a new feature or change to msgspec 3 | body: 4 | - type: markdown 5 | attributes: 6 | value: Thanks for taking the time to fill out a feature request! 7 | 8 | - type: textarea 9 | id: description 10 | attributes: 11 | label: Description 12 | description: > 13 | Describe the feature. What problems does it solve? 14 | 15 | 16 | If the feature is to related to a problem, please describe in detail 17 | your use case. What would this new feature help you do that you 18 | couldn't do before? Why is this useful? 19 | 20 | 21 | When relevant, please also include example code making use of your 22 | proposed feature. How would you use this feature? What would code using 23 | it look like? 24 | validations: 25 | required: true 26 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/question.yml: -------------------------------------------------------------------------------- 1 | name: ❓ Question 2 | description: Ask a question 3 | body: 4 | - type: markdown 5 | attributes: 6 | value: Thanks for taking the time to ask a question! 7 | 8 | - type: textarea 9 | id: description 10 | attributes: 11 | label: Question 12 | description: > 13 | Ask your question here. Please search through existing and closed 14 | issues first to ensure your question hasn't already been answered 15 | elsewhere. 16 | validations: 17 | required: true 18 | -------------------------------------------------------------------------------- /.github/SECURITY.md: -------------------------------------------------------------------------------- 1 | # Security Policy 2 | 3 | If you believe you have found a security-related bug with `msgspec`, **do not 4 | open a public GitHub issue**. Instead, please email jcristharif@gmail.com. 5 | 6 | Please include as much detail as you would for a normal issue in your report. 7 | In particular, including a minimal reproducible example will help the 8 | maintainers diagnose and resolve the issue quickly and efficiently. 9 | 10 | After the issue is resolved, we will make a release and announce the security 11 | fix through our normal communication channels. When it makes sense we may also 12 | obtain a CVE ID. If you would like to be credited with the report, please 13 | include your name and any links in the email. 14 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: Build and Test 2 | 3 | on: 4 | push: 5 | branches: [main] 6 | pull_request: 7 | branches: [main] 8 | paths-ignore: 9 | - "docs/**" 10 | - "benchmarks/**" 11 | - "examples/**" 12 | - ".github/**" 13 | - "README.rst" 14 | release: 15 | types: [published] 16 | 17 | jobs: 18 | lint: 19 | name: Lint and ruff code 20 | runs-on: ubuntu-latest 21 | 22 | steps: 23 | - uses: actions/checkout@v4 24 | 25 | - name: Install Python 26 | uses: actions/setup-python@v5 27 | with: 28 | python-version: "3.11" 29 | 30 | - name: Build msgspec and install dependencies 31 | run: | 32 | pip install -e ".[dev]" 33 | 34 | - name: Run pre-commit hooks 35 | uses: pre-commit/action@v3.0.0 36 | 37 | - name: mypy 38 | run: pytest tests/test_mypy.py 39 | 40 | - name: pyright 41 | run: pytest tests/test_pyright.py 42 | 43 | - name: doctests 44 | run: pytest --doctest-modules msgspec 45 | 46 | - name: Rebuild with sanitizers & coverage 47 | env: 48 | MSGSPEC_SANITIZE: "true" 49 | MSGSPEC_COVERAGE: "true" 50 | run: | 51 | python setup.py clean --all 52 | # I know this is deprecated, but I can't find a way to keep the build 53 | # directory around anymore on new versions of setuptools 54 | python setup.py develop 55 | 56 | - name: Run tests with sanitizers 57 | env: 58 | PYTHONMALLOC: "malloc" 59 | ASAN_OPTIONS: "detect_leaks=0" 60 | run: | 61 | LD_PRELOAD=`gcc -print-file-name=libasan.so` coverage run -m pytest -s -m "not mypy and not pyright" 62 | 63 | - name: Generate coverage files 64 | run: | 65 | coverage xml 66 | gcov -abcu `find build/ -name *.o` 67 | 68 | - name: Upload Codecov 69 | uses: codecov/codecov-action@v3 70 | with: 71 | files: coverage.xml,_core.c.gcov,atof.h.gcov,ryu.h.gcov 72 | 73 | build_wheels: 74 | name: Build wheels on ${{ matrix.os }} 75 | runs-on: ${{ matrix.os }} 76 | strategy: 77 | matrix: 78 | os: [ubuntu-latest, macos-13, windows-latest] 79 | 80 | env: 81 | CIBW_TEST_EXTRAS: "test" 82 | CIBW_TEST_COMMAND: "pytest {project}/tests" 83 | CIBW_BUILD: "cp39-* cp310-* cp311-* cp312-* cp313-*" 84 | CIBW_SKIP: "*-win32 *_i686 *_s390x *_ppc64le" 85 | CIBW_ARCHS_MACOS: "x86_64 arm64" 86 | CIBW_ARCHS_LINUX: "x86_64 aarch64" 87 | CIBW_TEST_SKIP: "*_arm64 *-musllinux_*" 88 | CIBW_ENVIRONMENT: "CFLAGS=-g0" 89 | 90 | steps: 91 | - uses: actions/checkout@v4 92 | 93 | - name: Set up QEMU 94 | if: runner.os == 'Linux' 95 | uses: docker/setup-qemu-action@v1 96 | with: 97 | platforms: all 98 | 99 | - name: Set up Environment 100 | if: github.event_name != 'release' 101 | run: | 102 | echo "CIBW_SKIP=${CIBW_SKIP} *-musllinux_* cp39-*_aarch64 cp311-*_aarch64 cp312-*_aarch64 cp313-*_aarch64" >> $GITHUB_ENV 103 | 104 | - name: Build & Test Wheels 105 | uses: pypa/cibuildwheel@v2.22.0 106 | 107 | - name: Upload artifact 108 | uses: actions/upload-artifact@v4 109 | if: github.event_name == 'release' && github.event.action == 'published' 110 | with: 111 | name: artifact-wheels-${{ matrix.os }} 112 | path: ./wheelhouse/*.whl 113 | 114 | build_sdist: 115 | name: Build Source Distribution 116 | runs-on: ubuntu-latest 117 | if: github.event_name == 'release' && github.event.action == 'published' 118 | 119 | steps: 120 | - uses: actions/checkout@v4 121 | 122 | - name: Install Python 123 | uses: actions/setup-python@v5 124 | with: 125 | python-version: "3.11" 126 | 127 | - name: Build source distribution 128 | run: python setup.py sdist 129 | 130 | - name: Upload artifact 131 | uses: actions/upload-artifact@v4 132 | with: 133 | name: artifact-sdist 134 | path: dist/*.tar.gz 135 | 136 | upload_pypi: 137 | needs: [build_wheels, build_sdist] 138 | runs-on: ubuntu-latest 139 | permissions: 140 | id-token: write 141 | if: github.event_name == 'release' && github.event.action == 'published' 142 | steps: 143 | - uses: actions/download-artifact@v4 144 | with: 145 | merge-multiple: true 146 | path: dist 147 | pattern: artifact-* 148 | 149 | - name: Publish package distributions to PyPI 150 | uses: pypa/gh-action-pypi-publish@release/v1 151 | -------------------------------------------------------------------------------- /.github/workflows/docs.yml: -------------------------------------------------------------------------------- 1 | name: documentation 2 | 3 | on: 4 | push: 5 | branches: [main] 6 | pull_request: null 7 | 8 | jobs: 9 | build-docs: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - uses: actions/checkout@v4 13 | 14 | - name: Install Python 15 | uses: actions/setup-python@v5 16 | with: 17 | python-version: "3.11" 18 | 19 | - name: Install msgspec and dependencies 20 | run: | 21 | pip install -e ".[doc]" 22 | 23 | - name: Build Docs 24 | run: | 25 | pushd docs 26 | make html 27 | popd 28 | 29 | - name: Deploy 30 | uses: peaceiris/actions-gh-pages@v3 31 | if: github.ref == 'refs/heads/main' 32 | with: 33 | github_token: ${{ secrets.GITHUB_TOKEN }} 34 | publish_dir: ./docs/build/html 35 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Editor config folders 2 | ## Vscode 3 | .settings/ 4 | .project 5 | .vscode/ 6 | .vs/ 7 | ## PyCharm/IntelliJ-generated files 8 | *.iml 9 | .idea/ 10 | 11 | # Python cached sources 12 | __pycache__/ 13 | *.pyc 14 | 15 | # Virtual environments 16 | .venv*/ 17 | venv*/ 18 | 19 | # Pytest and coverage 20 | .coverage 21 | .pytest/ 22 | .pytest_cache/ 23 | htmlcov/ 24 | 25 | # Mypy Cache 26 | .mypy_cache/ 27 | 28 | # Docs build 29 | docs/build/ 30 | 31 | # Benchmark outputs 32 | benchmarks/*.html 33 | benchmarks/*.json 34 | 35 | # Setuptools/twine-generated files, compiled sources. 36 | build/ 37 | dist/ 38 | *.egg-info/ 39 | pip-wheel-metadata/ 40 | *.so 41 | *.o 42 | *.pyd 43 | 44 | # Misc 45 | *.pem 46 | out/ 47 | .cache/ 48 | .DS_Store 49 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/astral-sh/ruff-pre-commit 3 | rev: v0.7.1 4 | hooks: 5 | - id: ruff 6 | args: [ --fix ] 7 | - id: ruff-format 8 | 9 | - repo: https://github.com/codespell-project/codespell 10 | rev: v2.2.2 11 | hooks: 12 | - id: codespell 13 | language_version: python3 14 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2021, Jim Crist-Harif 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | 1. Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 10 | 2. Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | 3. Neither the name of the copyright holder nor the names of its contributors 15 | may be used to endorse or promote products derived from this software 16 | without specific prior written permission. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE 22 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include msgspec/*.c 2 | include msgspec/*.h 3 | include msgspec/*.py 4 | include msgspec/*.pyi 5 | include msgspec/py.typed 6 | include setup.py 7 | include versioneer.py 8 | include README.md 9 | include LICENSE 10 | include MANIFEST.in 11 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

2 | 3 | msgspec 4 | 5 |

6 | 7 |

8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 |

27 | 28 | 29 | `msgspec` is a *fast* serialization and validation library, with builtin 30 | support for [JSON](https://json.org), [MessagePack](https://msgpack.org), 31 | [YAML](https://yaml.org), and [TOML](https://toml.io). It features: 32 | 33 | - 🚀 **High performance encoders/decoders** for common protocols. The JSON and 34 | MessagePack implementations regularly 35 | [benchmark](https://jcristharif.com/msgspec/benchmarks.html) as the fastest 36 | options for Python. 37 | 38 | - 🎉 **Support for a wide variety of Python types**. Additional types may be 39 | supported through 40 | [extensions](https://jcristharif.com/msgspec/extending.html). 41 | 42 | - 🔍 **Zero-cost schema validation** using familiar Python type annotations. In 43 | [benchmarks](https://jcristharif.com/msgspec/benchmarks.html) `msgspec` 44 | decodes *and* validates JSON faster than 45 | [orjson](https://github.com/ijl/orjson) can decode it alone. 46 | 47 | - ✨ **A speedy Struct type** for representing structured data. If you already 48 | use [dataclasses](https://docs.python.org/3/library/dataclasses.html) or 49 | [attrs](https://www.attrs.org), 50 | [structs](https://jcristharif.com/msgspec/structs.html) should feel familiar. 51 | However, they're 52 | [5-60x faster](https://jcristharif.com/msgspec/benchmarks.html#benchmark-structs>) 53 | for common operations. 54 | 55 | All of this is included in a 56 | [lightweight library](https://jcristharif.com/msgspec/benchmarks.html#benchmark-library-size) 57 | with no required dependencies. 58 | 59 | --- 60 | 61 | `msgspec` may be used for serialization alone, as a faster JSON or 62 | MessagePack library. For the greatest benefit though, we recommend using 63 | `msgspec` to handle the full serialization & validation workflow: 64 | 65 | **Define** your message schemas using standard Python type annotations. 66 | 67 | ```python 68 | >>> import msgspec 69 | 70 | >>> class User(msgspec.Struct): 71 | ... """A new type describing a User""" 72 | ... name: str 73 | ... groups: set[str] = set() 74 | ... email: str | None = None 75 | ``` 76 | 77 | **Encode** messages as JSON, or one of the many other supported protocols. 78 | 79 | ```python 80 | >>> alice = User("alice", groups={"admin", "engineering"}) 81 | 82 | >>> alice 83 | User(name='alice', groups={"admin", "engineering"}, email=None) 84 | 85 | >>> msg = msgspec.json.encode(alice) 86 | 87 | >>> msg 88 | b'{"name":"alice","groups":["admin","engineering"],"email":null}' 89 | ``` 90 | 91 | **Decode** messages back into Python objects, with optional schema validation. 92 | 93 | ```python 94 | >>> msgspec.json.decode(msg, type=User) 95 | User(name='alice', groups={"admin", "engineering"}, email=None) 96 | 97 | >>> msgspec.json.decode(b'{"name":"bob","groups":[123]}', type=User) 98 | Traceback (most recent call last): 99 | File "", line 1, in 100 | msgspec.ValidationError: Expected `str`, got `int` - at `$.groups[0]` 101 | ``` 102 | 103 | `msgspec` is designed to be as performant as possible, while retaining some of 104 | the nicities of validation libraries like 105 | [pydantic](https://pydantic-docs.helpmanual.io/). For supported types, 106 | encoding/decoding a message with `msgspec` can be 107 | [~10-80x faster than alternative libraries](https://jcristharif.com/msgspec/benchmarks.html). 108 | 109 |

110 | 111 | 112 | 113 |

114 | 115 | See [the documentation](https://jcristharif.com/msgspec/) for more information. 116 | 117 | 118 | ## LICENSE 119 | 120 | New BSD. See the 121 | [License File](https://github.com/jcrist/msgspec/blob/main/LICENSE). 122 | -------------------------------------------------------------------------------- /benchmarks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jcrist/msgspec/bc60e96772c5e8a3babff967d86a9e7dfcdbfb1b/benchmarks/__init__.py -------------------------------------------------------------------------------- /benchmarks/bench_encodings.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import sys 4 | import dataclasses 5 | import json 6 | import timeit 7 | import importlib.metadata 8 | from typing import Any, Literal, Callable 9 | 10 | from .generate_data import make_filesystem_data 11 | 12 | import msgspec 13 | 14 | 15 | class File(msgspec.Struct, kw_only=True, omit_defaults=True, tag="file"): 16 | name: str 17 | created_by: str 18 | created_at: str 19 | updated_by: str | None = None 20 | updated_at: str | None = None 21 | nbytes: int 22 | permissions: Literal["READ", "WRITE", "READ_WRITE"] 23 | 24 | 25 | class Directory(msgspec.Struct, kw_only=True, omit_defaults=True, tag="directory"): 26 | name: str 27 | created_by: str 28 | created_at: str 29 | updated_by: str | None = None 30 | updated_at: str | None = None 31 | contents: list[File | Directory] 32 | 33 | 34 | @dataclasses.dataclass 35 | class Benchmark: 36 | label: str 37 | version: str 38 | encode: Callable 39 | decode: Callable 40 | schema: Any = None 41 | 42 | def run(self, data: bytes) -> dict: 43 | if self.schema is not None: 44 | data = msgspec.convert(data, self.schema) 45 | timer = timeit.Timer("func(data)", globals={"func": self.encode, "data": data}) 46 | n, t = timer.autorange() 47 | encode_time = t / n 48 | 49 | data = self.encode(data) 50 | 51 | timer = timeit.Timer("func(data)", globals={"func": self.decode, "data": data}) 52 | n, t = timer.autorange() 53 | decode_time = t / n 54 | 55 | return { 56 | "label": self.label, 57 | "encode": encode_time, 58 | "decode": decode_time, 59 | } 60 | 61 | 62 | def json_benchmarks(): 63 | import orjson 64 | import ujson 65 | import rapidjson 66 | import simdjson 67 | 68 | simdjson_ver = importlib.metadata.version("pysimdjson") 69 | 70 | rj_dumps = rapidjson.Encoder() 71 | rj_loads = rapidjson.Decoder() 72 | 73 | def uj_dumps(obj): 74 | return ujson.dumps(obj) 75 | 76 | enc = msgspec.json.Encoder() 77 | dec = msgspec.json.Decoder(Directory) 78 | dec2 = msgspec.json.Decoder() 79 | 80 | return [ 81 | Benchmark("msgspec structs", None, enc.encode, dec.decode, Directory), 82 | Benchmark("msgspec", msgspec.__version__, enc.encode, dec2.decode), 83 | Benchmark("json", None, json.dumps, json.loads), 84 | Benchmark("orjson", orjson.__version__, orjson.dumps, orjson.loads), 85 | Benchmark("ujson", ujson.__version__, uj_dumps, ujson.loads), 86 | Benchmark("rapidjson", rapidjson.__version__, rj_dumps, rj_loads), 87 | Benchmark("simdjson", simdjson_ver, simdjson.dumps, simdjson.loads), 88 | ] 89 | 90 | 91 | def msgpack_benchmarks(): 92 | import msgpack 93 | import ormsgpack 94 | 95 | enc = msgspec.msgpack.Encoder() 96 | dec = msgspec.msgpack.Decoder(Directory) 97 | dec2 = msgspec.msgpack.Decoder() 98 | 99 | return [ 100 | Benchmark("msgspec structs", None, enc.encode, dec.decode, Directory), 101 | Benchmark("msgspec", msgspec.__version__, enc.encode, dec2.decode), 102 | Benchmark("msgpack", msgpack.__version__, msgpack.dumps, msgpack.loads), 103 | Benchmark( 104 | "ormsgpack", ormsgpack.__version__, ormsgpack.packb, ormsgpack.unpackb 105 | ), 106 | ] 107 | 108 | 109 | def main(): 110 | import argparse 111 | 112 | parser = argparse.ArgumentParser( 113 | description="Benchmark different python serialization libraries" 114 | ) 115 | parser.add_argument( 116 | "--versions", 117 | action="store_true", 118 | help="Output library version info, and exit immediately", 119 | ) 120 | parser.add_argument( 121 | "-n", 122 | type=int, 123 | help="The number of objects in the generated data, defaults to 1000", 124 | default=1000, 125 | ) 126 | parser.add_argument( 127 | "-p", 128 | "--protocol", 129 | choices=["json", "msgpack"], 130 | default="json", 131 | help="The protocol to benchmark, defaults to JSON", 132 | ) 133 | parser.add_argument( 134 | "--json", 135 | action="store_true", 136 | help="whether to output the results as json", 137 | ) 138 | args = parser.parse_args() 139 | 140 | benchmarks = json_benchmarks() if args.protocol == "json" else msgpack_benchmarks() 141 | 142 | if args.versions: 143 | for bench in benchmarks: 144 | if bench.version is not None: 145 | print(f"- {bench.label}: {bench.version}") 146 | sys.exit(0) 147 | 148 | data = make_filesystem_data(args.n) 149 | 150 | results = [benchmark.run(data) for benchmark in benchmarks] 151 | 152 | if args.json: 153 | for line in results: 154 | print(json.dumps(line)) 155 | else: 156 | # Compose the results table 157 | results.sort(key=lambda row: row["encode"] + row["decode"]) 158 | best_et = results[0]["encode"] 159 | best_dt = results[0]["decode"] 160 | best_tt = best_et + best_dt 161 | 162 | columns = ( 163 | "", 164 | "encode (μs)", 165 | "vs.", 166 | "decode (μs)", 167 | "vs.", 168 | "total (μs)", 169 | "vs.", 170 | ) 171 | rows = [ 172 | ( 173 | r["label"], 174 | f"{1_000_000 * r['encode']:.1f}", 175 | f"{r['encode'] / best_et:.1f}", 176 | f"{1_000_000 * r['decode']:.1f}", 177 | f"{r['decode'] / best_dt:.1f}", 178 | f"{1_000_000 * (r['encode'] + r['decode']):.1f}", 179 | f"{(r['encode'] + r['decode']) / best_tt:.1f}", 180 | ) 181 | for r in results 182 | ] 183 | widths = tuple( 184 | max(max(map(len, x)), len(c)) for x, c in zip(zip(*rows), columns) 185 | ) 186 | row_template = ("|" + (" %%-%ds |" * len(columns))) % widths 187 | header = row_template % tuple(columns) 188 | bar_underline = "+%s+" % "+".join("=" * (w + 2) for w in widths) 189 | bar = "+%s+" % "+".join("-" * (w + 2) for w in widths) 190 | parts = [bar, header, bar_underline] 191 | for r in rows: 192 | parts.append(row_template % r) 193 | parts.append(bar) 194 | print("\n".join(parts)) 195 | 196 | 197 | if __name__ == "__main__": 198 | main() 199 | -------------------------------------------------------------------------------- /benchmarks/bench_gc.py: -------------------------------------------------------------------------------- 1 | """This file benchmarks GC collection time for a large number of tiny 2 | dataclass-like instances. 3 | 4 | For each type, the following is measured: 5 | 6 | - Time for a single full GC pass over all the data. 7 | - Amount of memory used to hold all the data 8 | """ 9 | 10 | import gc 11 | import sys 12 | import time 13 | 14 | import msgspec 15 | 16 | 17 | def sizeof(x, _seen=None): 18 | """Get the recursive sizeof for an object (memoized). 19 | 20 | Not generic, works on types used in this benchmark. 21 | """ 22 | if _seen is None: 23 | _seen = set() 24 | 25 | _id = id(x) 26 | if _id in _seen: 27 | return 0 28 | 29 | _seen.add(_id) 30 | 31 | size = sys.getsizeof(x) 32 | 33 | if isinstance(x, dict): 34 | for k, v in x.items(): 35 | size += sizeof(k, _seen) 36 | size += sizeof(v, _seen) 37 | if hasattr(x, "__dict__"): 38 | size += sizeof(x.__dict__, _seen) 39 | if hasattr(x, "__slots__"): 40 | for k in x.__slots__: 41 | size += sizeof(k, _seen) 42 | size += sizeof(getattr(x, k), _seen) 43 | return size 44 | 45 | 46 | class Point(msgspec.Struct): 47 | x: int 48 | y: int 49 | z: int 50 | 51 | 52 | class PointGCFalse(msgspec.Struct, gc=False): 53 | x: int 54 | y: int 55 | z: int 56 | 57 | 58 | class PointClass: 59 | def __init__(self, x, y, z): 60 | self.x = x 61 | self.y = y 62 | self.z = z 63 | 64 | 65 | class PointClassSlots: 66 | __slots__ = ("x", "y", "z") 67 | 68 | def __init__(self, x, y, z): 69 | self.x = x 70 | self.y = y 71 | self.z = z 72 | 73 | 74 | def bench_gc(cls): 75 | # Allocate a dict of structs 76 | data = {i: cls(i, i, i) for i in range(1_000_000)} 77 | 78 | # Run a full collection 79 | start = time.perf_counter() 80 | gc.collect() 81 | stop = time.perf_counter() 82 | gc_time = (stop - start) * 1e3 83 | mibytes = sizeof(data) / (2**20) 84 | return gc_time, mibytes 85 | 86 | 87 | def format_table(results): 88 | columns = ("", "GC time (ms)", "Memory Used (MiB)") 89 | 90 | rows = [] 91 | for name, t, mem in results: 92 | rows.append((f"**{name}**", f"{t:.2f}", f"{mem:.2f}")) 93 | 94 | widths = tuple(max(max(map(len, x)), len(c)) for x, c in zip(zip(*rows), columns)) 95 | row_template = ("|" + (" %%-%ds |" * len(columns))) % widths 96 | header = row_template % tuple(columns) 97 | bar_underline = "+%s+" % "+".join("=" * (w + 2) for w in widths) 98 | bar = "+%s+" % "+".join("-" * (w + 2) for w in widths) 99 | parts = [bar, header, bar_underline] 100 | for r in rows: 101 | parts.append(row_template % r) 102 | parts.append(bar) 103 | return "\n".join(parts) 104 | 105 | 106 | def main(): 107 | results = [] 108 | for name, cls in [ 109 | ("standard class", PointClass), 110 | ("standard class with __slots__", PointClassSlots), 111 | ("msgspec struct", Point), 112 | ("msgspec struct with gc=False", PointGCFalse), 113 | ]: 114 | print(f"Benchmarking {name}...") 115 | gc_time, mibytes = bench_gc(cls) 116 | results.append((name, gc_time, mibytes)) 117 | 118 | print(format_table(results)) 119 | 120 | 121 | if __name__ == "__main__": 122 | main() 123 | -------------------------------------------------------------------------------- /benchmarks/bench_large_json.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | import sys 3 | import tempfile 4 | 5 | import requests 6 | 7 | TEMPLATE = """ 8 | import resource 9 | import time 10 | 11 | with open({path!r}, "rb") as f: 12 | data = f.read() 13 | 14 | initial_rss = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss 15 | 16 | {setup} 17 | 18 | start = time.perf_counter() 19 | for _ in range(5): 20 | decode(data) 21 | stop = time.perf_counter() 22 | 23 | max_rss = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss 24 | # mem_mib = (max_rss * 1024 - len(data)) / (1024 * 1024) 25 | mem_mib = (max_rss - initial_rss) / 1024 26 | time_ms = ((stop - start) / 5) * 1000 27 | print([mem_mib, time_ms]) 28 | """ 29 | 30 | JSON = """ 31 | import json 32 | decode = json.loads 33 | """ 34 | 35 | UJSON = """ 36 | import ujson 37 | decode = ujson.loads 38 | """ 39 | 40 | ORJSON = """ 41 | import orjson 42 | decode = orjson.loads 43 | """ 44 | 45 | RAPIDJSON = """ 46 | import rapidjson 47 | decode = rapidjson.loads 48 | """ 49 | 50 | SIMDJSON = """ 51 | import simdjson 52 | decode = simdjson.loads 53 | """ 54 | 55 | MSGSPEC = """ 56 | import msgspec 57 | decode = msgspec.json.decode 58 | """ 59 | 60 | MSGSPEC_STRUCTS = """ 61 | import msgspec 62 | from typing import Union 63 | 64 | class Package(msgspec.Struct, gc=False): 65 | build: str 66 | build_number: int 67 | depends: tuple[str, ...] 68 | md5: str 69 | name: str 70 | sha256: str 71 | subdir: str 72 | version: str 73 | license: str = "" 74 | noarch: Union[str, bool, None] = None 75 | size: int = 0 76 | timestamp: int = 0 77 | 78 | class RepoData(msgspec.Struct, gc=False): 79 | repodata_version: int 80 | info: dict 81 | packages: dict[str, Package] 82 | removed: tuple[str, ...] 83 | 84 | decode = msgspec.json.Decoder(RepoData).decode 85 | """ 86 | 87 | 88 | def main(): 89 | import argparse 90 | 91 | parser = argparse.ArgumentParser( 92 | description="Benchmark decoding a large JSON message using various JSON libraries" 93 | ) 94 | parser.add_argument( 95 | "--versions", 96 | action="store_true", 97 | help="Output library version info, and exit immediately", 98 | ) 99 | args = parser.parse_args() 100 | 101 | benchmarks = [ 102 | ("json", None, JSON), 103 | ("ujson", "ujson", UJSON), 104 | ("orjson", "orjson", ORJSON), 105 | ("rapidjson", "python-rapidjson", RAPIDJSON), 106 | ("simdjson", "pysimdjson", SIMDJSON), 107 | ("msgspec", "msgspec", MSGSPEC), 108 | ("msgspec structs", None, MSGSPEC_STRUCTS), 109 | ] 110 | 111 | if args.versions: 112 | import importlib.metadata 113 | 114 | for _, lib, _ in benchmarks: 115 | if lib is not None: 116 | version = importlib.metadata.version(lib) 117 | print(f"- {lib}: {version}") 118 | sys.exit(0) 119 | 120 | with tempfile.NamedTemporaryFile() as f: 121 | # Download the repodata.json 122 | resp = requests.get( 123 | "https://conda.anaconda.org/conda-forge/noarch/repodata.json" 124 | ) 125 | resp.raise_for_status() 126 | f.write(resp.content) 127 | 128 | # Run the benchmark for each library 129 | results = {} 130 | import ast 131 | 132 | for lib, _, setup in benchmarks: 133 | script = TEMPLATE.format(path=f.name, setup=setup) 134 | # We execute each script in a subprocess to isolate their memory usage 135 | output = subprocess.check_output([sys.executable, "-c", script]) 136 | results[lib] = ast.literal_eval(output.decode()) 137 | 138 | # Compose the results table 139 | best_mem, best_time = results["msgspec structs"] 140 | columns = ( 141 | "", 142 | "memory (MiB)", 143 | "vs.", 144 | "time (ms)", 145 | "vs.", 146 | ) 147 | rows = [ 148 | ( 149 | f"**{lib}**", 150 | f"{mem:.1f}", 151 | f"{mem / best_mem:.1f}x", 152 | f"{time:.1f}", 153 | f"{time / best_time:.1f}x", 154 | ) 155 | for lib, (mem, time) in results.items() 156 | ] 157 | rows.sort(key=lambda x: float(x[1])) 158 | widths = tuple( 159 | max(max(map(len, x)), len(c)) for x, c in zip(zip(*rows), columns) 160 | ) 161 | row_template = ("|" + (" %%-%ds |" * len(columns))) % widths 162 | header = row_template % tuple(columns) 163 | bar_underline = "+%s+" % "+".join("=" * (w + 2) for w in widths) 164 | bar = "+%s+" % "+".join("-" * (w + 2) for w in widths) 165 | parts = [bar, header, bar_underline] 166 | for r in rows: 167 | parts.append(row_template % r) 168 | parts.append(bar) 169 | print("\n".join(parts)) 170 | 171 | 172 | if __name__ == "__main__": 173 | main() 174 | -------------------------------------------------------------------------------- /benchmarks/bench_library_size.py: -------------------------------------------------------------------------------- 1 | """ 2 | This benchmark compares the installed library size between msgspec and pydantic 3 | in a Python 3.10 x86 environment. 4 | """ 5 | 6 | import io 7 | import zipfile 8 | 9 | import requests 10 | 11 | 12 | def get_latest_noarch_wheel_size(library): 13 | """Get the total uncompressed size of the latest noarch wheel""" 14 | resp = requests.get(f"https://pypi.org/pypi/{library}/json").json() 15 | version = resp["info"]["version"] 16 | files = {} 17 | for file_info in resp["releases"][version]: 18 | name = file_info["filename"] 19 | url = file_info["url"] 20 | if name.endswith(".whl"): 21 | files[name] = url 22 | if len(files) != 1: 23 | raise ValueError( 24 | f"Expected to find only 1 matching file for {library}, got {list(files)}" 25 | ) 26 | 27 | url = list(files.values())[0] 28 | 29 | resp = requests.get(url) 30 | fil = io.BytesIO(resp.content) 31 | zfil = zipfile.ZipFile(fil) 32 | size = sum(f.file_size for f in zfil.filelist) 33 | return version, size 34 | 35 | 36 | def get_latest_manylinux_wheel_size(library): 37 | """Get the total uncompressed size of the latest Python 3.10 manylinux 38 | x86_64 wheel for the library""" 39 | resp = requests.get(f"https://pypi.org/pypi/{library}/json").json() 40 | version = resp["info"]["version"] 41 | files = {} 42 | for file_info in resp["releases"][version]: 43 | name = file_info["filename"] 44 | url = file_info["url"] 45 | if "310" in name and "manylinux_2_17_x86_64" in name and "pp73" not in name: 46 | files[name] = url 47 | if len(files) != 1: 48 | raise ValueError( 49 | f"Expected to find only 1 matching file for {library}, got {list(files)}" 50 | ) 51 | 52 | url = list(files.values())[0] 53 | 54 | resp = requests.get(url) 55 | fil = io.BytesIO(resp.content) 56 | zfil = zipfile.ZipFile(fil) 57 | size = sum(f.file_size for f in zfil.filelist) 58 | return version, size 59 | 60 | 61 | def main(): 62 | msgspec_version, msgspec_size = get_latest_manylinux_wheel_size("msgspec") 63 | pydantic_version, pydantic_size = get_latest_noarch_wheel_size("pydantic") 64 | _, pydantic_core_size = get_latest_manylinux_wheel_size("pydantic-core") 65 | _, typing_extensions_size = get_latest_noarch_wheel_size("typing-extensions") 66 | _, annotated_types_size = get_latest_noarch_wheel_size("annotated-types") 67 | 68 | data = [ 69 | ("msgspec", msgspec_version, msgspec_size), 70 | ( 71 | "pydantic", 72 | pydantic_version, 73 | pydantic_size 74 | + pydantic_core_size 75 | + typing_extensions_size 76 | + annotated_types_size, 77 | ), 78 | ] 79 | data.sort(key=lambda x: x[2]) 80 | msgspec_size = next(s for l, _, s in data if l == "msgspec") 81 | 82 | columns = ("", "version", "size (MiB)", "vs. msgspec") 83 | rows = [ 84 | ( 85 | f"**{lib}**", 86 | version, 87 | f"{size / (1024 * 1024):.2f}", 88 | f"{size / msgspec_size:.2f}x", 89 | ) 90 | for lib, version, size in data 91 | ] 92 | 93 | widths = tuple(max(max(map(len, x)), len(c)) for x, c in zip(zip(*rows), columns)) 94 | row_template = ("|" + (" %%-%ds |" * len(columns))) % widths 95 | header = row_template % tuple(columns) 96 | bar_underline = "+%s+" % "+".join("=" * (w + 2) for w in widths) 97 | bar = "+%s+" % "+".join("-" * (w + 2) for w in widths) 98 | parts = [bar, header, bar_underline] 99 | for r in rows: 100 | parts.append(row_template % r) 101 | parts.append(bar) 102 | print("\n".join(parts)) 103 | 104 | 105 | if __name__ == "__main__": 106 | main() 107 | -------------------------------------------------------------------------------- /benchmarks/bench_structs.py: -------------------------------------------------------------------------------- 1 | """This file benchmarks dataclass-like libraries. It measures the following 2 | operations: 3 | 4 | - Time to import a new class definition 5 | - Time to create an instance of that class 6 | - Time to compare an instance of that class with another instance. 7 | """ 8 | 9 | from time import perf_counter 10 | 11 | order_template = """ 12 | def __{method}__(self, other): 13 | if type(self) is not type(other): 14 | return NotImplemented 15 | return ( 16 | (self.a, self.b, self.c, self.d, self.e) {op} 17 | (other.a, other.b, other.c, other.d, other.e) 18 | ) 19 | """ 20 | 21 | 22 | classes_template = """ 23 | import reprlib 24 | 25 | class C{n}: 26 | def __init__(self, a, b, c, d, e): 27 | self.a = a 28 | self.b = b 29 | self.c = c 30 | self.d = d 31 | self.e = e 32 | 33 | @reprlib.recursive_repr() 34 | def __repr__(self): 35 | return ( 36 | f"{{type(self).__name__}}(a={{self.a!r}}, b={{self.b!r}}, " 37 | f"c={{self.c!r}}, d={{self.d!r}}, e={{self.e!r}})" 38 | ) 39 | 40 | def __eq__(self, other): 41 | if type(self) is not type(other): 42 | return NotImplemented 43 | return ( 44 | self.a == other.a and 45 | self.b == other.b and 46 | self.c == other.c and 47 | self.d == other.d and 48 | self.e == other.e 49 | ) 50 | """ + "".join( 51 | [ 52 | order_template.format(method="lt", op="<"), 53 | order_template.format(method="le", op="<="), 54 | order_template.format(method="gt", op=">"), 55 | order_template.format(method="ge", op=">="), 56 | ] 57 | ) 58 | 59 | attrs_template = """ 60 | from attr import define 61 | 62 | @define(order=True) 63 | class C{n}: 64 | a: int 65 | b: int 66 | c: int 67 | d: int 68 | e: int 69 | """ 70 | 71 | dataclasses_template = """ 72 | from dataclasses import dataclass 73 | 74 | @dataclass(order=True) 75 | class C{n}: 76 | a: int 77 | b: int 78 | c: int 79 | d: int 80 | e: int 81 | """ 82 | 83 | pydantic_template = """ 84 | from pydantic import BaseModel 85 | 86 | class C{n}(BaseModel): 87 | a: int 88 | b: int 89 | c: int 90 | d: int 91 | e: int 92 | """ 93 | 94 | msgspec_template = """ 95 | from msgspec import Struct 96 | 97 | class C{n}(Struct, order=True): 98 | a: int 99 | b: int 100 | c: int 101 | d: int 102 | e: int 103 | """ 104 | 105 | 106 | BENCHMARKS = [ 107 | ("msgspec", "msgspec", msgspec_template), 108 | ("standard classes", None, classes_template), 109 | ("attrs", "attrs", attrs_template), 110 | ("dataclasses", None, dataclasses_template), 111 | ("pydantic", "pydantic", pydantic_template), 112 | ] 113 | 114 | 115 | def bench(name, template): 116 | N_classes = 100 117 | 118 | source = "\n".join(template.format(n=i) for i in range(N_classes)) 119 | code_obj = compile(source, "__main__", "exec") 120 | 121 | # Benchmark defining new types 122 | N = 200 123 | start = perf_counter() 124 | for _ in range(N): 125 | ns = {} 126 | exec(code_obj, ns) 127 | end = perf_counter() 128 | define_time = ((end - start) / (N * N_classes)) * 1e6 129 | 130 | C = ns["C0"] 131 | 132 | # Benchmark creating new instances 133 | N = 1000 134 | M = 1000 135 | start = perf_counter() 136 | for _ in range(N): 137 | [C(a=i, b=i, c=i, d=i, e=i) for i in range(M)] 138 | end = perf_counter() 139 | init_time = ((end - start) / (N * M)) * 1e6 140 | 141 | # Benchmark equality 142 | N = 1000 143 | M = 1000 144 | val = M - 1 145 | needle = C(a=val, b=val, c=val, d=val, e=val) 146 | haystack = [C(a=i, b=i, c=i, d=i, e=i) for i in range(M)] 147 | start = perf_counter() 148 | for _ in range(N): 149 | haystack.index(needle) 150 | end = perf_counter() 151 | equality_time = ((end - start) / (N * M)) * 1e6 152 | 153 | # Benchmark order 154 | try: 155 | needle < needle 156 | except TypeError: 157 | order_time = None 158 | else: 159 | start = perf_counter() 160 | for _ in range(N): 161 | for obj in haystack: 162 | if obj >= needle: 163 | break 164 | end = perf_counter() 165 | order_time = ((end - start) / (N * M)) * 1e6 166 | 167 | return (name, define_time, init_time, equality_time, order_time) 168 | 169 | 170 | def format_table(results): 171 | columns = ( 172 | "", 173 | "import (μs)", 174 | "create (μs)", 175 | "equality (μs)", 176 | "order (μs)", 177 | ) 178 | 179 | def f(n): 180 | return "N/A" if n is None else f"{n:.2f}" 181 | 182 | rows = [] 183 | for name, *times in results: 184 | rows.append((f"**{name}**", *(f(t) for t in times))) 185 | 186 | widths = tuple(max(max(map(len, x)), len(c)) for x, c in zip(zip(*rows), columns)) 187 | row_template = ("|" + (" %%-%ds |" * len(columns))) % widths 188 | header = row_template % tuple(columns) 189 | bar_underline = "+%s+" % "+".join("=" * (w + 2) for w in widths) 190 | bar = "+%s+" % "+".join("-" * (w + 2) for w in widths) 191 | parts = [bar, header, bar_underline] 192 | for r in rows: 193 | parts.append(row_template % r) 194 | parts.append(bar) 195 | return "\n".join(parts) 196 | 197 | 198 | def main(): 199 | import argparse 200 | 201 | parser = argparse.ArgumentParser(description="Benchmark msgspec Struct operations") 202 | parser.add_argument( 203 | "--versions", 204 | action="store_true", 205 | help="Output library version info, and exit immediately", 206 | ) 207 | args = parser.parse_args() 208 | 209 | if args.versions: 210 | import sys 211 | import importlib.metadata 212 | 213 | for _, lib, _ in BENCHMARKS: 214 | if lib is not None: 215 | version = importlib.metadata.version(lib) 216 | print(f"- {lib}: {version}") 217 | sys.exit(0) 218 | 219 | results = [] 220 | for name, _, source in BENCHMARKS: 221 | results.append(bench(name, source)) 222 | 223 | print(format_table(results)) 224 | 225 | 226 | if __name__ == "__main__": 227 | main() 228 | -------------------------------------------------------------------------------- /benchmarks/bench_validation/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jcrist/msgspec/bc60e96772c5e8a3babff967d86a9e7dfcdbfb1b/benchmarks/bench_validation/__init__.py -------------------------------------------------------------------------------- /benchmarks/bench_validation/__main__.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import json 3 | import tempfile 4 | from ..generate_data import make_filesystem_data 5 | import sys 6 | import subprocess 7 | 8 | 9 | LIBRARIES = ["msgspec", "mashumaro", "cattrs", "pydantic"] 10 | 11 | 12 | def parse_list(value): 13 | libs = [lib.strip() for lib in value.split(",")] 14 | for lib in libs: 15 | if lib not in LIBRARIES: 16 | print(f"{lib!r} is not a supported library, choose from {LIBRARIES}") 17 | sys.exit(1) 18 | return libs 19 | 20 | 21 | parser = argparse.ArgumentParser( 22 | description="Benchmark different python validation libraries" 23 | ) 24 | parser.add_argument( 25 | "--json", 26 | action="store_true", 27 | help="Whether to output the results as json", 28 | ) 29 | parser.add_argument( 30 | "-n", 31 | type=int, 32 | help="The number of objects in the generated data, defaults to 1000", 33 | default=1000, 34 | ) 35 | parser.add_argument( 36 | "--libs", 37 | type=parse_list, 38 | help="A comma-separated list of libraries to benchmark. Defaults to all.", 39 | default=LIBRARIES, 40 | ) 41 | parser.add_argument( 42 | "--versions", 43 | action="store_true", 44 | help="Output library version info, and exit immediately", 45 | ) 46 | args = parser.parse_args() 47 | 48 | if args.versions: 49 | import importlib.metadata 50 | 51 | for lib in args.libs: 52 | version = importlib.metadata.version(lib) 53 | print(f"- {lib}: {version}") 54 | sys.exit(0) 55 | 56 | 57 | data = json.dumps(make_filesystem_data(args.n)).encode("utf-8") 58 | 59 | results = [] 60 | with tempfile.NamedTemporaryFile() as f: 61 | f.write(data) 62 | f.flush() 63 | 64 | for lib in args.libs: 65 | res = subprocess.check_output( 66 | [sys.executable, "-m", "benchmarks.bench_validation.runner", lib, f.name] 67 | ) 68 | results.append(json.loads(res)) 69 | 70 | if args.json: 71 | for line in results: 72 | print(json.dumps(line)) 73 | else: 74 | # Compose the results table 75 | results.sort(key=lambda row: row["encode"] + row["decode"]) 76 | best_et = results[0]["encode"] 77 | best_dt = results[0]["decode"] 78 | best_tt = best_et + best_dt 79 | best_mem = results[0]["memory"] 80 | 81 | columns = ( 82 | "", 83 | "encode (μs)", 84 | "vs.", 85 | "decode (μs)", 86 | "vs.", 87 | "total (μs)", 88 | "vs.", 89 | "memory (MiB)", 90 | "vs.", 91 | ) 92 | rows = [ 93 | ( 94 | r["label"], 95 | f"{1_000_000 * r['encode']:.1f}", 96 | f"{r['encode'] / best_et:.1f}", 97 | f"{1_000_000 * r['decode']:.1f}", 98 | f"{r['decode'] / best_dt:.1f}", 99 | f"{1_000_000 * (r['encode'] + r['decode']):.1f}", 100 | f"{(r['encode'] + r['decode']) / best_tt:.1f}", 101 | f"{r['memory']:.1f}", 102 | f"{r['memory'] / best_mem:.1f}", 103 | ) 104 | for r in results 105 | ] 106 | widths = tuple(max(max(map(len, x)), len(c)) for x, c in zip(zip(*rows), columns)) 107 | row_template = ("|" + (" %%-%ds |" * len(columns))) % widths 108 | header = row_template % tuple(columns) 109 | bar_underline = "+%s+" % "+".join("=" * (w + 2) for w in widths) 110 | bar = "+%s+" % "+".join("-" * (w + 2) for w in widths) 111 | parts = [bar, header, bar_underline] 112 | for r in rows: 113 | parts.append(row_template % r) 114 | parts.append(bar) 115 | print("\n".join(parts)) 116 | -------------------------------------------------------------------------------- /benchmarks/bench_validation/bench_cattrs.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import enum 4 | import datetime 5 | from typing import Literal 6 | 7 | import attrs 8 | import cattrs.preconf.orjson 9 | 10 | 11 | class Permissions(enum.Enum): 12 | READ = "READ" 13 | WRITE = "WRITE" 14 | READ_WRITE = "READ_WRITE" 15 | 16 | 17 | @attrs.define(kw_only=True) 18 | class File: 19 | name: str 20 | created_by: str 21 | created_at: datetime.datetime 22 | updated_by: str | None = None 23 | updated_at: datetime.datetime | None = None 24 | nbytes: int 25 | permissions: Permissions 26 | type: Literal["file"] = "file" 27 | 28 | 29 | @attrs.define(kw_only=True) 30 | class Directory: 31 | name: str 32 | created_by: str 33 | created_at: datetime.datetime 34 | updated_by: str | None = None 35 | updated_at: datetime.datetime | None = None 36 | contents: list[File | Directory] 37 | type: Literal["directory"] = "directory" 38 | 39 | 40 | converter = cattrs.preconf.orjson.make_converter(omit_if_default=True) 41 | 42 | 43 | def encode(obj): 44 | return converter.dumps(obj) 45 | 46 | 47 | def decode(msg): 48 | return converter.loads(msg, Directory) 49 | 50 | 51 | label = "cattrs" 52 | -------------------------------------------------------------------------------- /benchmarks/bench_validation/bench_mashumaro.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import enum 4 | import dataclasses 5 | import datetime 6 | from typing import Literal 7 | 8 | from mashumaro.mixins.orjson import DataClassORJSONMixin 9 | 10 | 11 | class Permissions(enum.Enum): 12 | READ = "READ" 13 | WRITE = "WRITE" 14 | READ_WRITE = "READ_WRITE" 15 | 16 | 17 | @dataclasses.dataclass(kw_only=True) 18 | class File(DataClassORJSONMixin): 19 | name: str 20 | created_by: str 21 | created_at: datetime.datetime 22 | updated_by: str | None = None 23 | updated_at: datetime.datetime | None = None 24 | nbytes: int 25 | permissions: Permissions 26 | type: Literal["file"] = "file" 27 | 28 | class Config: 29 | omit_default = True 30 | lazy_compilation = True 31 | 32 | 33 | @dataclasses.dataclass(kw_only=True) 34 | class Directory(DataClassORJSONMixin): 35 | name: str 36 | created_by: str 37 | created_at: datetime.datetime 38 | updated_by: str | None = None 39 | updated_at: datetime.datetime | None = None 40 | contents: list[File | Directory] 41 | type: Literal["directory"] = "directory" 42 | 43 | class Config: 44 | omit_default = True 45 | lazy_compilation = True 46 | 47 | 48 | label = "mashumaro" 49 | 50 | 51 | def encode(x): 52 | return x.to_json() 53 | 54 | 55 | def decode(msg): 56 | return Directory.from_json(msg) 57 | -------------------------------------------------------------------------------- /benchmarks/bench_validation/bench_msgspec.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import enum 4 | import datetime 5 | 6 | import msgspec 7 | 8 | 9 | class Permissions(enum.Enum): 10 | READ = "READ" 11 | WRITE = "WRITE" 12 | READ_WRITE = "READ_WRITE" 13 | 14 | 15 | class File(msgspec.Struct, kw_only=True, omit_defaults=True, tag="file"): 16 | name: str 17 | created_by: str 18 | created_at: datetime.datetime 19 | updated_by: str | None = None 20 | updated_at: datetime.datetime | None = None 21 | nbytes: int 22 | permissions: Permissions 23 | 24 | 25 | class Directory(msgspec.Struct, kw_only=True, omit_defaults=True, tag="directory"): 26 | name: str 27 | created_by: str 28 | created_at: datetime.datetime 29 | updated_by: str | None = None 30 | updated_at: datetime.datetime | None = None 31 | contents: list[File | Directory] 32 | 33 | 34 | enc = msgspec.json.Encoder() 35 | dec = msgspec.json.Decoder(Directory) 36 | 37 | label = "msgspec" 38 | encode = enc.encode 39 | decode = dec.decode 40 | -------------------------------------------------------------------------------- /benchmarks/bench_validation/bench_pydantic.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import enum 4 | import datetime 5 | from typing import Literal, Annotated 6 | 7 | import pydantic 8 | 9 | 10 | class Permissions(enum.Enum): 11 | READ = "READ" 12 | WRITE = "WRITE" 13 | READ_WRITE = "READ_WRITE" 14 | 15 | 16 | class File(pydantic.BaseModel): 17 | type: Literal["file"] = "file" 18 | name: str 19 | created_by: str 20 | created_at: datetime.datetime 21 | updated_by: str | None = None 22 | updated_at: datetime.datetime | None = None 23 | nbytes: int 24 | permissions: Permissions 25 | 26 | 27 | class Directory(pydantic.BaseModel): 28 | type: Literal["directory"] = "directory" 29 | name: str 30 | created_by: str 31 | created_at: datetime.datetime 32 | updated_by: str | None = None 33 | updated_at: datetime.datetime | None = None 34 | contents: list[Annotated[File | Directory, pydantic.Field(discriminator="type")]] 35 | 36 | 37 | if pydantic.__version__.startswith("2."): 38 | label = "pydantic v2" 39 | 40 | def encode(obj): 41 | return obj.model_dump_json(exclude_defaults=True) 42 | 43 | def decode(msg): 44 | return Directory.model_validate_json(msg) 45 | 46 | else: 47 | label = "pydantic v1" 48 | 49 | def encode(obj): 50 | return obj.json(exclude_defaults=True) 51 | 52 | def decode(msg): 53 | return Directory.parse_raw(msg) 54 | -------------------------------------------------------------------------------- /benchmarks/bench_validation/runner.py: -------------------------------------------------------------------------------- 1 | import importlib 2 | import json 3 | import timeit 4 | import resource 5 | import sys 6 | import gc 7 | 8 | library, path = sys.argv[1:3] 9 | 10 | with open(path, "rb") as f: 11 | json_data = f.read() 12 | 13 | initial_rss = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss 14 | 15 | mod = importlib.import_module(f"benchmarks.bench_validation.bench_{library}") 16 | 17 | msg = mod.decode(json_data) 18 | 19 | gc.collect() 20 | timer = timeit.Timer("func(data)", setup="", globals={"func": mod.encode, "data": msg}) 21 | n, t = timer.autorange() 22 | encode_time = t / n 23 | 24 | del msg 25 | 26 | gc.collect() 27 | timer = timeit.Timer( 28 | "func(data)", setup="", globals={"func": mod.decode, "data": json_data} 29 | ) 30 | n, t = timer.autorange() 31 | decode_time = t / n 32 | 33 | max_rss = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss 34 | 35 | 36 | report = json.dumps( 37 | { 38 | "label": mod.label, 39 | "encode": encode_time, 40 | "decode": decode_time, 41 | "memory": (max_rss - initial_rss) / 1024, 42 | } 43 | ) 44 | print(report) 45 | -------------------------------------------------------------------------------- /benchmarks/generate_data.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import random 3 | import string 4 | 5 | 6 | class Generator: 7 | UTC = datetime.timezone.utc 8 | DATE_2018 = datetime.datetime(2018, 1, 1, tzinfo=UTC) 9 | DATE_2023 = datetime.datetime(2023, 1, 1, tzinfo=UTC) 10 | PERMISSIONS = ["READ", "WRITE", "READ_WRITE"] 11 | NAMES = [ 12 | "alice", 13 | "ben", 14 | "carol", 15 | "daniel", 16 | "esther", 17 | "franklin", 18 | "genevieve", 19 | "harold", 20 | "ilana", 21 | "jerome", 22 | "katelyn", 23 | "leonard", 24 | "monique", 25 | "nathan", 26 | "ora", 27 | "patrick", 28 | "quinn", 29 | "ronald", 30 | "stephanie", 31 | "thomas", 32 | "uma", 33 | "vince", 34 | "wendy", 35 | "xavier", 36 | "yitzchak", 37 | "zahra", 38 | ] 39 | 40 | def __init__(self, capacity, seed=42): 41 | self.capacity = capacity 42 | self.random = random.Random(seed) 43 | 44 | def randdt(self, min, max): 45 | ts = self.random.randint(min.timestamp(), max.timestamp()) 46 | return datetime.datetime.fromtimestamp(ts).replace(tzinfo=self.UTC) 47 | 48 | def randstr(self, min=None, max=None): 49 | if max is not None: 50 | min = self.random.randint(min, max) 51 | return "".join(self.random.choices(string.ascii_letters, k=min)) 52 | 53 | def make(self, is_dir): 54 | name = self.randstr(4, 30) 55 | created_by = self.random.choice(self.NAMES) 56 | created_at = self.randdt(self.DATE_2018, self.DATE_2023) 57 | data = { 58 | "type": "directory" if is_dir else "file", 59 | "name": name, 60 | "created_by": created_by, 61 | "created_at": created_at.isoformat(), 62 | } 63 | if self.random.random() > 0.75: 64 | updated_by = self.random.choice(self.NAMES) 65 | updated_at = self.randdt(created_at, self.DATE_2023) 66 | data.update( 67 | updated_by=updated_by, 68 | updated_at=updated_at.isoformat(), 69 | ) 70 | if is_dir: 71 | n = min(self.random.randint(0, 30), self.capacity) 72 | self.capacity -= n 73 | data["contents"] = [self.make_node() for _ in range(n)] 74 | else: 75 | data["nbytes"] = self.random.randint(0, 1000000) 76 | data["permissions"] = self.random.choice(self.PERMISSIONS) 77 | return data 78 | 79 | def make_node(self): 80 | return self.make(self.random.random() > 0.8) 81 | 82 | def generate(self): 83 | self.capacity -= 1 84 | if self.capacity == 0: 85 | out = self.make(False) 86 | else: 87 | out = self.make(True) 88 | while self.capacity: 89 | self.capacity -= 1 90 | out["contents"].append(self.make_node()) 91 | return out 92 | 93 | 94 | def make_filesystem_data(n): 95 | return Generator(n).generate() 96 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= -W --keep-going 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = source 9 | BUILDDIR = build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=source 11 | set BUILDDIR=build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /docs/source/_static/anywidget.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jcrist/msgspec/bc60e96772c5e8a3babff967d86a9e7dfcdbfb1b/docs/source/_static/anywidget.png -------------------------------------------------------------------------------- /docs/source/_static/bench-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jcrist/msgspec/bc60e96772c5e8a3babff967d86a9e7dfcdbfb1b/docs/source/_static/bench-1.png -------------------------------------------------------------------------------- /docs/source/_static/custom.css: -------------------------------------------------------------------------------- 1 | body[data-theme]:not([data-theme="dark"]) .highlight .gp, .highlight .gh { 2 | color: #808080; 3 | font-weight: normal; 4 | } 5 | 6 | body[data-theme]:not([data-theme="dark"]) .highlight .go { 7 | color: #203060; 8 | } 9 | 10 | body[data-theme="dark"] .highlight .gh, .highlight .gp { 11 | color: #aaaaaa; 12 | font-weight: normal; 13 | } 14 | -------------------------------------------------------------------------------- /docs/source/_static/edgedb.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /docs/source/_static/esmerald.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jcrist/msgspec/bc60e96772c5e8a3babff967d86a9e7dfcdbfb1b/docs/source/_static/esmerald.png -------------------------------------------------------------------------------- /docs/source/_static/litestar.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jcrist/msgspec/bc60e96772c5e8a3babff967d86a9e7dfcdbfb1b/docs/source/_static/litestar.png -------------------------------------------------------------------------------- /docs/source/_static/mosec.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jcrist/msgspec/bc60e96772c5e8a3babff967d86a9e7dfcdbfb1b/docs/source/_static/mosec.png -------------------------------------------------------------------------------- /docs/source/_static/msgspec-logo-dark.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 15 | 16 | msgspec -------------------------------------------------------------------------------- /docs/source/_static/msgspec-logo-light.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 15 | 16 | msgspec -------------------------------------------------------------------------------- /docs/source/_static/nautilus-trader.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jcrist/msgspec/bc60e96772c5e8a3babff967d86a9e7dfcdbfb1b/docs/source/_static/nautilus-trader.png -------------------------------------------------------------------------------- /docs/source/_static/pioreactor.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jcrist/msgspec/bc60e96772c5e8a3babff967d86a9e7dfcdbfb1b/docs/source/_static/pioreactor.png -------------------------------------------------------------------------------- /docs/source/_static/sanic.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jcrist/msgspec/bc60e96772c5e8a3babff967d86a9e7dfcdbfb1b/docs/source/_static/sanic.png -------------------------------------------------------------------------------- /docs/source/_static/zero.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jcrist/msgspec/bc60e96772c5e8a3babff967d86a9e7dfcdbfb1b/docs/source/_static/zero.png -------------------------------------------------------------------------------- /docs/source/_templates/help.html: -------------------------------------------------------------------------------- 1 |

Need help?

2 | 3 |

4 | Open an issue in the issue tracker. 5 |

6 | -------------------------------------------------------------------------------- /docs/source/api.rst: -------------------------------------------------------------------------------- 1 | API Docs 2 | ======== 3 | 4 | Structs 5 | ------- 6 | 7 | .. currentmodule:: msgspec 8 | 9 | .. autoclass:: Struct 10 | 11 | .. autofunction:: field 12 | 13 | .. autofunction:: defstruct 14 | 15 | .. autofunction:: msgspec.structs.replace 16 | 17 | .. autofunction:: msgspec.structs.asdict 18 | 19 | .. autofunction:: msgspec.structs.astuple 20 | 21 | .. autofunction:: msgspec.structs.force_setattr 22 | 23 | .. autofunction:: msgspec.structs.fields 24 | 25 | .. autoclass:: msgspec.structs.FieldInfo 26 | 27 | .. autoclass:: msgspec.structs.StructConfig 28 | 29 | .. autodata:: NODEFAULT 30 | :no-value: 31 | 32 | Meta 33 | ---- 34 | 35 | .. autoclass:: Meta 36 | :members: 37 | 38 | 39 | Raw 40 | --- 41 | 42 | .. currentmodule:: msgspec 43 | 44 | .. autoclass:: Raw 45 | :members: 46 | 47 | Unset 48 | ----- 49 | 50 | .. autodata:: UNSET 51 | :no-value: 52 | 53 | .. autoclass:: UnsetType 54 | 55 | 56 | JSON 57 | ---- 58 | 59 | .. currentmodule:: msgspec.json 60 | 61 | .. autoclass:: Encoder 62 | :members: encode, encode_lines, encode_into 63 | 64 | .. autoclass:: Decoder 65 | :members: decode, decode_lines 66 | 67 | .. autofunction:: encode 68 | 69 | .. autofunction:: decode 70 | 71 | .. autofunction:: format 72 | 73 | 74 | MessagePack 75 | ----------- 76 | 77 | .. currentmodule:: msgspec.msgpack 78 | 79 | .. autoclass:: Encoder 80 | :members: encode, encode_into 81 | 82 | .. autoclass:: Decoder 83 | :members: decode 84 | 85 | .. autoclass:: Ext 86 | :members: 87 | 88 | .. autofunction:: encode 89 | 90 | .. autofunction:: decode 91 | 92 | 93 | YAML 94 | ---- 95 | 96 | .. currentmodule:: msgspec.yaml 97 | 98 | .. autofunction:: encode 99 | 100 | .. autofunction:: decode 101 | 102 | 103 | TOML 104 | ---- 105 | 106 | .. currentmodule:: msgspec.toml 107 | 108 | .. autofunction:: encode 109 | 110 | .. autofunction:: decode 111 | 112 | 113 | JSON Schema 114 | ----------- 115 | 116 | .. currentmodule:: msgspec.json 117 | 118 | .. autofunction:: schema 119 | 120 | .. autofunction:: schema_components 121 | 122 | 123 | .. _inspect-api: 124 | 125 | 126 | Converters 127 | ---------- 128 | 129 | .. currentmodule:: msgspec 130 | 131 | .. autofunction:: convert 132 | 133 | .. autofunction:: to_builtins 134 | 135 | 136 | Inspect 137 | ------- 138 | 139 | .. currentmodule:: msgspec.inspect 140 | 141 | .. autofunction:: type_info 142 | .. autofunction:: multi_type_info 143 | .. autoclass:: Type 144 | .. autoclass:: Metadata 145 | .. autoclass:: AnyType 146 | .. autoclass:: NoneType 147 | .. autoclass:: BoolType 148 | .. autoclass:: IntType 149 | .. autoclass:: FloatType 150 | .. autoclass:: StrType 151 | .. autoclass:: BytesType 152 | .. autoclass:: ByteArrayType 153 | .. autoclass:: MemoryViewType 154 | .. autoclass:: DateTimeType 155 | .. autoclass:: TimeType 156 | .. autoclass:: DateType 157 | .. autoclass:: TimeDeltaType 158 | .. autoclass:: UUIDType 159 | .. autoclass:: DecimalType 160 | .. autoclass:: ExtType 161 | .. autoclass:: RawType 162 | .. autoclass:: EnumType 163 | .. autoclass:: LiteralType 164 | .. autoclass:: CustomType 165 | .. autoclass:: UnionType 166 | :members: 167 | .. autoclass:: CollectionType 168 | .. autoclass:: ListType 169 | .. autoclass:: SetType 170 | .. autoclass:: FrozenSetType 171 | .. autoclass:: VarTupleType 172 | .. autoclass:: TupleType 173 | .. autoclass:: DictType 174 | .. autoclass:: Field 175 | .. autoclass:: TypedDictType 176 | .. autoclass:: NamedTupleType 177 | .. autoclass:: DataclassType 178 | .. autoclass:: StructType 179 | 180 | 181 | Exceptions 182 | ---------- 183 | 184 | .. currentmodule:: msgspec 185 | 186 | .. autoexception:: MsgspecError 187 | :show-inheritance: 188 | 189 | .. autoexception:: EncodeError 190 | :show-inheritance: 191 | 192 | .. autoexception:: DecodeError 193 | :show-inheritance: 194 | 195 | .. autoexception:: ValidationError 196 | :show-inheritance: 197 | -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | # We want to document both the UNSET singleton, and the UnsetType class, but we 2 | # don't want them to have the same docstring. I couldn't find an easy way to 3 | # do this in sphinx. For now, we patch the UnsetType object when building types 4 | # to override the docstring handling. 5 | try: 6 | import msgspec 7 | 8 | class UnsetType: 9 | """The type of `UNSET`. 10 | 11 | See Also 12 | -------- 13 | UNSET 14 | """ 15 | 16 | msgspec.UnsetType = UnsetType 17 | except ImportError: 18 | pass 19 | 20 | 21 | project = "msgspec" 22 | copyright = "Jim Crist-Harif" 23 | author = "Jim Crist-Harif" 24 | 25 | GITHUB_LOGO = """ 26 | 27 | 28 | 29 | """.strip() 30 | 31 | html_theme = "furo" 32 | html_title = "" 33 | templates_path = ["_templates"] 34 | html_static_path = ["_static"] 35 | html_css_files = ["custom.css"] 36 | pygments_style = "default" 37 | 38 | _link_color_light = "#024bb0" 39 | _link_color_dark = "#5192d2" 40 | 41 | html_theme_options = { 42 | "light_logo": "msgspec-logo-light.svg", 43 | "dark_logo": "msgspec-logo-dark.svg", 44 | "light_css_variables": { 45 | "color-brand-primary": "black", 46 | "color-brand-content": _link_color_light, 47 | "color-foreground-muted": "#808080", 48 | "color-highlight-on-target": "inherit", 49 | "color-highlighted-background": "#ffffcc", 50 | "color-sidebar-link-text": "black", 51 | "color-sidebar-link-text--top-level": "black", 52 | "color-link": _link_color_light, 53 | "color-link--hover": _link_color_light, 54 | "color-link-underline": "transparent", 55 | "color-link-underline--hover": _link_color_light, 56 | }, 57 | "dark_css_variables": { 58 | "color-brand-primary": "#ffffff", 59 | "color-brand-content": _link_color_dark, 60 | "color-highlight-on-target": "inherit", 61 | "color-highlighted-background": "#333300", 62 | "color-sidebar-link-text": "#ffffffcc", 63 | "color-sidebar-link-text--top-level": "#ffffffcc", 64 | "color-link": _link_color_dark, 65 | "color-link--hover": _link_color_dark, 66 | "color-link-underline": "transparent", 67 | "color-link-underline--hover": _link_color_dark, 68 | }, 69 | "sidebar_hide_name": True, 70 | "footer_icons": [ 71 | { 72 | "name": "GitHub", 73 | "url": "https://github.com/jcrist/msgspec", 74 | "html": GITHUB_LOGO, 75 | "class": "", 76 | }, 77 | ], 78 | } 79 | 80 | extensions = [ 81 | "sphinx.ext.autodoc", 82 | "sphinx.ext.napoleon", 83 | "sphinx.ext.extlinks", 84 | "sphinx.ext.intersphinx", 85 | "sphinx_copybutton", 86 | "sphinx_design", 87 | "IPython.sphinxext.ipython_console_highlighting", 88 | ] 89 | intersphinx_mapping = { 90 | "python": ("https://docs.python.org/3", None), 91 | "attrs": ("https://www.attrs.org/en/stable/", None), 92 | } 93 | autodoc_typehints = "none" 94 | napoleon_numpy_docstring = True 95 | napoleon_google_docstring = False 96 | napoleon_use_rtype = False 97 | napoleon_custom_sections = [("Configuration", "params_style")] 98 | default_role = "obj" 99 | extlinks = { 100 | "issue": ("https://github.com/jcrist/msgspec/issues/%s", "Issue #%s"), 101 | "pr": ("https://github.com/jcrist/msgspec/pull/%s", "PR #%s"), 102 | } 103 | copybutton_prompt_text = r">>> |\.\.\. |\$ |In \[\d*\]: | {2,5}\.\.\.: " 104 | copybutton_prompt_is_regexp = True 105 | -------------------------------------------------------------------------------- /docs/source/converters.rst: -------------------------------------------------------------------------------- 1 | Converters 2 | ========== 3 | 4 | .. currentmodule:: msgspec 5 | 6 | ``msgspec`` provides builtin support for several common protocols (``json``, 7 | ``msgpack``, ``yaml``, and ``toml``). Support for additional protocols may be 8 | added by combining a serialization library with msgspec's *converter 9 | functions*: `msgspec.to_builtins` and `msgspec.convert`. 10 | 11 | - `msgspec.to_builtins`: takes an object composed of any :doc:`supported type 12 | ` and converts it into one composed of only simple builtin 13 | types typically supported by Python serialization libraries. 14 | 15 | - `msgspec.convert`: takes an object composed of any :doc:`supported type 16 | `, and converts it to match a specified schema (validating 17 | along the way). If the conversion fails due to a schema mismatch, a nice 18 | error message is raised. 19 | 20 | These functions are designed to be paired with a Python serialization library as 21 | pre/post processors for typical ``dumps`` and ``loads`` functions. 22 | 23 | .. image:: _static/converters-light.svg 24 | :align: center 25 | :class: only-light 26 | 27 | .. image:: _static/converters-dark.svg 28 | :align: center 29 | :class: only-dark 30 | 31 | For example, if ``msgspec`` didn't already provide support for ``json``, you 32 | could add support by wrapping the standard library's `json` module as follows: 33 | 34 | .. code-block:: ipython 35 | 36 | In [1]: import json 37 | ...: from typing import Any 38 | ...: 39 | ...: import msgspec 40 | 41 | In [2]: def encode(obj): 42 | ...: return json.dumps(msgspec.to_builtins(obj)) 43 | 44 | In [3]: def decode(msg, type=Any): 45 | ...: return msgspec.convert(json.loads(msg), type=type) 46 | 47 | In [4]: class Point(msgspec.Struct): 48 | ...: x: int 49 | ...: y: int 50 | 51 | In [5]: x = Point(1, 2) 52 | 53 | In [6]: msg = encode(x) # Encoding a high-level type works 54 | 55 | In [7]: msg 56 | '{"x": 1, "y": 2}' 57 | 58 | In [8]: decode(msg, type=Point) # Decoding a high-level type works 59 | Point(x=1, y=2) 60 | 61 | In [9]: decode('{"x": "oops", "y": 2}', type=Point) # Schema mismatches error 62 | --------------------------------------------------------------------------- 63 | ValidationError Traceback (most recent call last) 64 | Cell In[9], line 1 65 | ----> 1 decode('{"x": "oops", "y": 2}', type=Point) # Schema mismatches error 66 | 67 | Cell In[3], line 2, in decode(msg, type) 68 | 1 def decode(msg, type=Any): 69 | ---> 2 return msgspec.convert(json.loads(msg), type=type) 70 | 71 | ValidationError: Expected `int`, got `str` - at `$.x` 72 | 73 | 74 | Since all protocols are different, `to_builtins` and `convert` have a few 75 | configuration options: 76 | 77 | - ``builtin_types``: an iterable of additional types to treat as builtin types, 78 | beyond the standard `dict`, `list`, `tuple`, `set`, `frozenset`, `str`, 79 | `int`, `float`, `bool`, and `None`. 80 | 81 | - ``str_keys``: whether the wrapped protocol only supports strings for object 82 | keys, rather than any hashable type. 83 | 84 | - ``strict``: `convert` only. Whether type coercion rules should be strict. 85 | Defaults is True, setting to False enables a wider set of coercion rules from 86 | string to non-string types for all values. Among other uses, this may be used 87 | to handle completely untyped protocols like URL querystrings, where only 88 | string values exist. See :ref:`strict-vs-lax` for more information. 89 | 90 | - ``from_attributes``: `convert` only. If True, input objects may be coerced 91 | to ``Struct``/``dataclass``/``attrs`` types by extracting attributes from the 92 | input matching fields in the output type. One use case is converting database 93 | query results (ORM or otherwise) to msgspec structured types. The default is 94 | False. 95 | 96 | - ``enc_hook``/``dec_hook``: the standard keyword arguments used for 97 | :doc:`extending` msgspec to support additional types. 98 | 99 | ----- 100 | 101 | Taking a look at another protocol - TOML_. This protocol 102 | 103 | - Includes native support for `datetime.datetime`, `datetime.date`, and 104 | `datetime.time` types. 105 | - Only supports strings for object keys. 106 | 107 | If ``msgspec`` didn't already provide support for ``toml``, you could add 108 | support by wrapping the standard library's `tomllib` module as follows: 109 | 110 | .. code-block:: python 111 | 112 | import datetime 113 | import tomllib 114 | from typing import Any 115 | 116 | import msgspec 117 | 118 | def decode(msg, *, type=Any, dec_hook=None): 119 | return msgspec.convert( 120 | toml.loads(msg), 121 | type, 122 | builtin_types=(datetime.datetime, datetime.date, datetime.time), 123 | str_keys=True, 124 | dec_hook=dec_hook, 125 | ) 126 | 127 | ``msgspec`` uses these APIs to implement ``toml`` and ``yaml`` support, 128 | wrapping external serialization libraries: 129 | 130 | - ``msgspec.toml`` (`code `__) 131 | 132 | - ``msgspec.yaml`` (`code `__) 133 | 134 | The implementation in ``msgspec.toml`` is *almost* identical to the one above, 135 | with some additional code for error handling. 136 | 137 | 138 | .. _TOML: https://toml.io 139 | -------------------------------------------------------------------------------- /docs/source/examples/asyncio-kv.rst: -------------------------------------------------------------------------------- 1 | Asyncio TCP Key-Value Server 2 | ============================ 3 | 4 | This example demonstrates writing a small TCP server and client using `asyncio` 5 | and ``msgspec``. 6 | 7 | The server defines a few operations: 8 | 9 | - ``get(key: str) -> str | None``: get the value for a single key from the 10 | store if it exists. 11 | - ``put(key: str, val: str) -> None``: add a new key-value pair to the store. 12 | - ``delete(key: str) -> None``: delete a key-value pair from the store if it exists. 13 | - ``list_keys() -> list[str]``: list all the keys currently set in the store. 14 | 15 | Each operation has a corresponding request type defined as a :doc:`Struct <../structs>` 16 | type. Note that these structs are :ref:`tagged ` so they 17 | can be part of a ``Union`` of all request types the server handles. 18 | 19 | `msgspec.msgpack` is used to handle the encoding/decoding of the various 20 | messages. The length of each message is prefixed to each message 21 | (`Length-prefix framing 22 | `__) 23 | to make it easier to efficiently determine message boundaries. 24 | 25 | The full example source can be found `here 26 | `__. 27 | 28 | .. literalinclude:: ../../../examples/asyncio-kv/kv.py 29 | :language: python 30 | 31 | 32 | An example usage session: 33 | 34 | **Server** 35 | 36 | .. code-block:: shell 37 | 38 | $ python kv.py 39 | Serving on tcp://127.0.0.1:8888... 40 | Connection opened 41 | Connection closed 42 | 43 | 44 | **Client** 45 | 46 | .. code-block:: ipython3 47 | 48 | In [1]: from kv import Client 49 | 50 | In [2]: client = await Client.create() 51 | 52 | In [3]: await client.put("foo", "bar") 53 | 54 | In [4]: await client.put("fizz", "buzz") 55 | 56 | In [5]: await client.get("foo") 57 | Out[5]: 'bar' 58 | 59 | In [6]: await client.list_keys() 60 | Out[6]: ['fizz', 'foo'] 61 | 62 | In [7]: await client.delete("fizz") 63 | 64 | In [8]: await client.list_keys() 65 | Out[8]: ['foo'] 66 | -------------------------------------------------------------------------------- /docs/source/examples/conda-repodata.rst: -------------------------------------------------------------------------------- 1 | Conda Repodata 2 | ============== 3 | 4 | This example benchmarks using different JSON libraries to parse and query the 5 | `current_repodata.json`_ file from conda-forge_. This is a medium-sized (~14 6 | MiB) JSON file containing nested metadata about every package on conda-forge. 7 | 8 | The following libraries are compared: 9 | 10 | - json_ 11 | - ujson_ 12 | - orjson_ 13 | - simdjson_ 14 | - msgspec_ 15 | 16 | This benchmark measures how long it takes each library to decode the 17 | ``current_repodata.json`` file, extract the name and size of each package, and 18 | determine the top 10 packages by file size. 19 | 20 | **Results** 21 | 22 | .. raw:: html 23 | 24 |
25 | 26 | .. code-block:: text 27 | 28 | $ python query_repodata.py 29 | json: 139.14 ms 30 | ujson: 124.91 ms 31 | orjson: 91.69 ms 32 | simdjson: 66.40 ms 33 | msgspec: 25.73 ms 34 | 35 | 36 | **Commentary** 37 | 38 | - All of these are fairly quick, library choice likely doesn't matter at all 39 | for simple scripts on small- to medium-sized data. 40 | 41 | - While ``orjson`` is faster than ``json``, the difference between them is only 42 | ~30%. Creating python objects dominates the execution time of any well 43 | optimized decoding library. How fast the underlying JSON parser is matters, 44 | but JSON optimizations can only get you so far if you're still creating a new 45 | Python object for every node in the JSON object. 46 | 47 | - ``simdjson`` is much more performant. This is partly due to the SIMD 48 | optimizations it uses, but mostly it's due to not creating so many Python 49 | objects. ``simdjson`` first parses a JSON blob into a proxy object. It then 50 | lazily creates Python objects as needed as different fields are accessed. 51 | This means you only pay the cost of creating Python objects for the fields 52 | you use; a query that only accesses a few fields runs much faster since not 53 | as many Python objects are created. The downside is every attribute access 54 | results in some indirection as new objects are created 55 | 56 | - ``msgspec`` is the fastest option tested. It relies on defining a known 57 | schema beforehand. We don't define the schema for the entire structure, only 58 | for the fields we access. Only fields that are part of the schema are 59 | decoded, with a new Python object created for each. This allocates the same 60 | number of objects as ``simdjson``, but does it all at once, avoiding 61 | indirection costs later on during use. See :ref:`this performance tip 62 | ` for more information. 63 | 64 | **Source** 65 | 66 | The full example source can be found `here 67 | `__. 68 | 69 | .. literalinclude:: ../../../examples/conda-repodata/query_repodata.py 70 | :language: python 71 | 72 | .. raw:: html 73 | 74 | 75 | 76 | 77 | 109 | 110 | 111 | .. _conda-forge: https://conda-forge.org/ 112 | .. _current_repodata.json: https://conda.anaconda.org/conda-forge/noarch/current_repodata.json 113 | .. _json: https://docs.python.org/3/library/json.html 114 | .. _ujson: https://github.com/ultrajson/ultrajson 115 | .. _msgspec: https://jcristharif.com/msgspec/ 116 | .. _orjson: https://github.com/ijl/orjson 117 | .. _simdjson: https://github.com/TkTech/pysimdjson 118 | -------------------------------------------------------------------------------- /docs/source/examples/geojson.rst: -------------------------------------------------------------------------------- 1 | GeoJSON 2 | ======= 3 | 4 | `GeoJSON `__ is a popular format for encoding geographic 5 | data. Its specification_ describes nine different types a message may take 6 | (seven "geometry" types, plus two "feature" types). Here we provide one way of 7 | implementing that specification using ``msgspec`` to handle the parsing and 8 | validation. 9 | 10 | The ``loads`` and ``dumps`` methods defined below work similar to the 11 | standard library's ``json.loads``/``json.dumps``, but: 12 | 13 | - Will result in high-level `msgspec.Struct` objects representing GeoJSON types 14 | - Will error nicely if a field is missing or the wrong type 15 | - Will fill in default values for optional fields 16 | - Decodes and encodes *significantly faster* than the `json` module (as well as 17 | most other ``json`` implementations in Python). 18 | 19 | This example makes use `msgspec.Struct` types to define the different GeoJSON 20 | types, and :ref:`struct-tagged-unions` to differentiate between them. See the 21 | relevant docs for more information. 22 | 23 | The full example source can be found `here 24 | `__. 25 | 26 | .. literalinclude:: ../../../examples/geojson/msgspec_geojson.py 27 | :language: python 28 | 29 | 30 | Here we use the ``loads`` method defined above to read some `example GeoJSON`_. 31 | 32 | .. code-block:: ipython3 33 | 34 | In [1]: import msgspec_geojson 35 | 36 | In [2]: with open("canada.json", "rb") as f: 37 | ...: data = f.read() 38 | 39 | In [3]: canada = msgspec_geojson.loads(data) 40 | 41 | In [4]: type(canada) # loaded as high-level, validated object 42 | Out[4]: msgspec_geojson.FeatureCollection 43 | 44 | In [5]: canada.features[0].properties 45 | Out[5]: {'name': 'Canada'} 46 | 47 | Comparing performance to: 48 | 49 | - orjson_ 50 | - `json` 51 | - geojson_ (another validating Python implementation) 52 | 53 | .. code-block:: ipython3 54 | 55 | In [6]: %timeit msgspec_geojson.loads(data) # benchmark msgspec 56 | 6.15 ms ± 13.8 µs per loop (mean ± std. dev. of 7 runs, 100 loops each) 57 | 58 | In [7]: %timeit orjson.loads(data) # benchmark orjson 59 | 8.67 ms ± 20.8 µs per loop (mean ± std. dev. of 7 runs, 100 loops each) 60 | 61 | In [8]: %timeit json.loads(data) # benchmark json 62 | 27.6 ms ± 102 µs per loop (mean ± std. dev. of 7 runs, 10 loops each) 63 | 64 | In [9]: %timeit geojson.loads(data) # benchmark geojson 65 | 93.9 ms ± 88.1 µs per loop (mean ± std. dev. of 7 runs, 10 loops each) 66 | 67 | 68 | This shows that the readable ``msgspec`` implementation above is 1.4x faster 69 | than `orjson` (on this data), while also ensuring the loaded data is valid 70 | GeoJSON. Compared to geojson_ (another validating geojson library for python), 71 | loading the data using ``msgspec`` was **15.3x faster**. 72 | 73 | .. _specification: https://datatracker.ietf.org/doc/html/rfc7946 74 | .. _example GeoJSON: https://github.com/jcrist/msgspec/blob/main/examples/geojson/canada.json 75 | .. _orjson: https://github.com/ijl/orjson 76 | .. _geojson: https://github.com/jazzband/geojson 77 | -------------------------------------------------------------------------------- /docs/source/examples/index.rst: -------------------------------------------------------------------------------- 1 | Examples 2 | ======== 3 | 4 | Here we provide a few examples using ``msgspec`` to accomplish various tasks. 5 | 6 | .. toctree:: 7 | :maxdepth: 1 8 | 9 | geojson.rst 10 | asyncio-kv.rst 11 | conda-repodata.rst 12 | pyproject-toml.rst 13 | edgedb.rst 14 | -------------------------------------------------------------------------------- /docs/source/examples/pyproject-toml.rst: -------------------------------------------------------------------------------- 1 | Parsing ``pyproject.toml`` 2 | ========================== 3 | 4 | `PEP 518`_ defined a new ``pyproject.toml`` configuration file Python projects 5 | can use for configuring: 6 | 7 | - Metadata (name, version, ...) 8 | - Dependencies 9 | - Build systems 10 | - Additional development tools (black_, mypy_, pytest_, ... all support 11 | ``pyproject.toml`` files for configuration). 12 | 13 | The format was defined in a series of Python Enhancement Proposals (PEPs), 14 | which also serve as the main documentation for the file schema. 15 | 16 | - `PEP 517`_: A build-system independent format for source trees 17 | - `PEP 518`_: Specifying minimum build system requirements for Python projects 18 | - `PEP 621`_: Storing project metadata in pyproject.toml 19 | 20 | Here we define a msgspec schema for parsing and validating a ``pyproject.toml`` 21 | file. This includes full schema definitions for all fields in the 22 | ``build-system`` and ``project`` tables, as well as an untyped table under 23 | ``tool``. 24 | 25 | The full example source can be found `here 26 | `__. 27 | 28 | .. literalinclude:: ../../../examples/pyproject-toml/pyproject.py 29 | :language: python 30 | 31 | Here we use it to load the `pyproject.toml for Starlette 32 | `__: 33 | 34 | .. code-block:: ipython3 35 | 36 | In [1]: import pyproject 37 | 38 | In [2]: import urllib.request 39 | 40 | In [3]: url = "https://raw.githubusercontent.com/encode/starlette/master/pyproject.toml" 41 | 42 | In [4]: with urllib.request.urlopen(url) as f: 43 | ...: data = f.read() 44 | 45 | In [5]: result = pyproject.decode(data) # decode the pyproject.toml 46 | 47 | In [6]: result.build_system 48 | Out[6]: BuildSystem(requires=['hatchling'], build_backend='hatchling.build', backend_path=[]) 49 | 50 | In [7]: result.project.name 51 | Out[7]: 'starlette' 52 | 53 | Note that this only validates that fields are of the proper type. It doesn't 54 | check: 55 | 56 | - Whether strings like URLs or `dependency specifiers`_ are valid. Some of 57 | these could be handled using msgspec's existing :doc:`../constraints` system, 58 | but not all of them. 59 | - Mutually exclusive field restrictions (for example, you can't set both 60 | ``project.license.file`` and ``project.license.text``). ``msgspec`` currently 61 | has no way of declaring these restrictions. 62 | 63 | Even with these caveats, the schemas here are still useful: 64 | 65 | - Since ``forbid_unknown_fields=True`` is configured, any extra fields will 66 | raise a nice error message. This is very useful for catching typos in 67 | configuration files, as the misspelled field names won't be silently ignored. 68 | - Type errors for fields will also be caught, with a nice error raised. 69 | - Any downstream consumers of ``decode`` have a nice high-level object to work 70 | with, complete with type annotations. This plays well with tab-completion and 71 | tools like mypy_ or pyright_, improving usability. 72 | 73 | For example, here's an invalid ``pyproject.toml``. 74 | 75 | .. code-block:: toml 76 | 77 | [build-system] 78 | requires = "hatchling" 79 | build-backend = "hatchling.build" 80 | 81 | [project] 82 | name = "myproject" 83 | version = "0.1.0" 84 | description = "a super great library" 85 | authors = [ 86 | {name = "alice shmalice", email = "alice@company.com"} 87 | ] 88 | 89 | Can you spot the error? Using the schemas defined above, ``msgpspec`` can 90 | detect schema issues like this, and raise a nice error message. In this case 91 | the issue is that ``build-system.requires`` should be an array of strings, not 92 | a single string: 93 | 94 | .. code-block:: ipython 95 | 96 | In [1]: import pyproject 97 | 98 | In [2]: with open("pyproject.toml", "rb") as f: 99 | ...: invalid = f.read() 100 | 101 | In [3]: pyproject.decode(invalid) 102 | --------------------------------------------------------------------------- 103 | ValidationError Traceback (most recent call last) 104 | Cell In [3], line 1 105 | ----> 1 pyproject.decode(invalid) 106 | ValidationError: Expected `array`, got `str` - at `$.build-system.requires` 107 | 108 | 109 | .. _PEP 517: https://peps.python.org/pep-0517/ 110 | .. _PEP 518: https://peps.python.org/pep-0518/ 111 | .. _PEP 621: https://peps.python.org/pep-0621/ 112 | .. _black: https://black.readthedocs.io 113 | .. _mypy: https://mypy.readthedocs.io 114 | .. _pyright: https://github.com/microsoft/pyright 115 | .. _pytest: https://docs.pytest.org 116 | .. _dependency specifiers: https://packaging.python.org/en/latest/specifications/dependency-specifiers/ 117 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | msgspec 2 | ======= 3 | 4 | ``msgspec`` is a *fast* serialization and validation library, with builtin 5 | support for JSON_, MessagePack_, YAML_, and TOML_. It features: 6 | 7 | - 🚀 **High performance encoders/decoders** for common protocols. The JSON and 8 | MessagePack implementations regularly :doc:`benchmark ` as the 9 | fastest options for Python. 10 | 11 | - 🎉 **Support for a wide variety of Python types**. Additional types may 12 | be supported through :doc:`extensions `. 13 | 14 | - 🔍 **Zero-cost schema validation** using familiar Python type annotations. 15 | In :doc:`benchmarks ` ``msgspec`` decodes *and* validates JSON 16 | faster than orjson_ can decode it alone. 17 | 18 | - ✨ **A speedy Struct type** for representing structured data. If you already 19 | use dataclasses_ or attrs_, :doc:`structs` should feel familiar. However, 20 | they're :ref:`5-60x ` faster for common operations. 21 | 22 | All of this is included in a :ref:`lightweight library 23 | ` with no required dependencies. 24 | 25 | ----- 26 | 27 | ``msgspec`` may be used for serialization alone, as a faster JSON or 28 | MessagePack library. For the greatest benefit though, we recommend using 29 | ``msgspec`` to handle the full serialization & validation workflow: 30 | 31 | **Define** your message schemas using standard Python type annotations. 32 | 33 | .. code-block:: python 34 | 35 | >>> import msgspec 36 | 37 | >>> class User(msgspec.Struct): 38 | ... """A new type describing a User""" 39 | ... name: str 40 | ... groups: set[str] = set() 41 | ... email: str | None = None 42 | 43 | **Encode** messages as JSON, or one of the many other supported protocols. 44 | 45 | .. code-block:: python 46 | 47 | >>> alice = User("alice", groups={"admin", "engineering"}) 48 | 49 | >>> alice 50 | User(name='alice', groups={"admin", "engineering"}, email=None) 51 | 52 | >>> msg = msgspec.json.encode(alice) 53 | 54 | >>> msg 55 | b'{"name":"alice","groups":["admin","engineering"],"email":null}' 56 | 57 | **Decode** messages back into Python objects, with optional schema validation. 58 | 59 | .. code-block:: python 60 | 61 | >>> msgspec.json.decode(msg, type=User) 62 | User(name='alice', groups={"admin", "engineering"}, email=None) 63 | 64 | >>> msgspec.json.decode(b'{"name":"bob","groups":[123]}', type=User) 65 | Traceback (most recent call last): 66 | File "", line 1, in 67 | msgspec.ValidationError: Expected `str`, got `int` - at `$.groups[0]` 68 | 69 | ``msgspec`` is designed to be as performant as possible, while retaining some 70 | of the nicities of validation libraries like pydantic_. For supported types, 71 | encoding/decoding a message with ``msgspec`` can be :doc:`~10-80x faster than 72 | alternative libraries `. 73 | 74 | Highlights 75 | ---------- 76 | 77 | - ``msgspec`` is **fast**. It :doc:`benchmarks ` as the fastest 78 | serialization library for Python, outperforming all other JSON/MessagePack 79 | libraries compared. 80 | 81 | - ``msgspec`` is **friendly**. Through use of Python's type annotations, 82 | messages are :ref:`validated ` during deserialization in a 83 | declarative way. ``msgspec`` also works well with other type-checking tooling 84 | like mypy_ and pyright_, providing excellent editor integration. 85 | 86 | - ``msgspec`` is **flexible**. It natively supports a :doc:`wide range of 87 | Python builtin types `. Support for additional types can 88 | also be added through :doc:`extensions `. 89 | 90 | - ``msgspec`` is **lightweight**. It has no required dependencies, and the 91 | binary size is :ref:`a fraction of that of comparable libraries 92 | `. 93 | 94 | - ``msgspec`` is **correct**. The encoders/decoders implemented are strictly 95 | compliant with their respective specifications, providing stronger guarantees 96 | of compatibility with other systems. 97 | 98 | Used By 99 | ------- 100 | 101 | ``msgspec`` is used by many organizations and `open source projects 102 | `__, here we highlight a 103 | few: 104 | 105 | .. grid:: 2 2 4 4 106 | 107 | .. grid-item-card:: NautilusTrader 108 | :link: https://nautilustrader.io/ 109 | 110 | .. image:: _static/nautilus-trader.png 111 | 112 | .. grid-item-card:: Litestar 113 | :link: https://litestar.dev/ 114 | 115 | .. image:: _static/litestar.png 116 | 117 | .. grid-item-card:: Sanic 118 | :link: https://sanic.dev/en/ 119 | 120 | .. image:: _static/sanic.png 121 | 122 | .. grid-item-card:: Mosec 123 | :link: https://mosecorg.github.io/mosec/ 124 | 125 | .. image:: _static/mosec.png 126 | 127 | .. grid-item-card:: Pioreactor 128 | :link: https://pioreactor.com/ 129 | 130 | .. image:: _static/pioreactor.png 131 | 132 | .. grid-item-card:: Zero 133 | :link: https://github.com/Ananto30/zero 134 | 135 | .. image:: _static/zero.png 136 | 137 | .. grid-item-card:: anywidget 138 | :link: https://anywidget.dev/ 139 | 140 | .. image:: _static/anywidget.png 141 | 142 | .. grid-item-card:: esmerald 143 | :link: https://esmerald.dev/ 144 | 145 | .. image:: _static/esmerald.png 146 | 147 | 148 | .. _type annotations: https://docs.python.org/3/library/typing.html 149 | .. _JSON: https://json.org 150 | .. _MessagePack: https://msgpack.org 151 | .. _YAML: https://yaml.org 152 | .. _TOML: https://toml.io 153 | .. _attrs: https://www.attrs.org 154 | .. _dataclasses: https://docs.python.org/3/library/dataclasses.html 155 | .. _orjson: https://github.com/ijl/orjson 156 | .. _pydantic: https://pydantic-docs.helpmanual.io/ 157 | .. _mypy: https://mypy.readthedocs.io 158 | .. _pyright: https://github.com/microsoft/pyright 159 | 160 | .. toctree:: 161 | :hidden: 162 | :maxdepth: 2 163 | :caption: Overview 164 | 165 | why.rst 166 | install.rst 167 | benchmarks.rst 168 | 169 | .. toctree:: 170 | :hidden: 171 | :maxdepth: 2 172 | :caption: User Guide 173 | 174 | usage.rst 175 | supported-types.rst 176 | structs.rst 177 | constraints.rst 178 | converters.rst 179 | jsonschema.rst 180 | schema-evolution.rst 181 | 182 | .. toctree:: 183 | :hidden: 184 | :maxdepth: 2 185 | :caption: Advanced 186 | 187 | extending.rst 188 | inspect.rst 189 | perf-tips.rst 190 | 191 | .. toctree:: 192 | :hidden: 193 | :maxdepth: 2 194 | :caption: Reference 195 | 196 | api.rst 197 | examples/index.rst 198 | changelog.rst 199 | -------------------------------------------------------------------------------- /docs/source/inspect.rst: -------------------------------------------------------------------------------- 1 | Inspecting Types 2 | ---------------- 3 | 4 | .. currentmodule:: msgspec.inspect 5 | 6 | .. warning:: 7 | 8 | This module is experimental. While we don't expect any breaking changes, we 9 | also don't promise not to break things between releases while this interface 10 | stabilizes. 11 | 12 | ``msgspec`` provides type-introspection support, which can be used to build 13 | tooling on top of msgspec-compatible types. Possible use cases include: 14 | 15 | - Generating OpenAPI_ specifications from msgspec-compatible types (note that 16 | the builtin :doc:`jsonschema` support may be a better starting point for 17 | this). 18 | - Generating example instances of types for testing or documentation purposes 19 | - Integration with hypothesis_ for testing 20 | 21 | The main function here is `msgspec.inspect.type_info` for converting a type 22 | annotation into a corresponding `msgspec.inspect.Type` object. There's also 23 | `msgspec.inspect.multi_type_info` which converts an iterable of annotations; 24 | this function is more efficient than calling `type_info` in a loop. 25 | 26 | .. code-block:: python 27 | 28 | >>> import msgspec 29 | 30 | >>> msgspec.inspect.type_info(bool) 31 | BoolType() 32 | 33 | >>> msgspec.inspect.type_info(int) 34 | IntType(gt=None, ge=None, lt=None, le=None, multiple_of=None) 35 | 36 | >>> msgspec.inspect.type_info(list[int]) # nested types are traversed 37 | ListType( 38 | item_type=IntType(gt=None, ge=None, lt=None, le=None, multiple_of=None), 39 | min_length=None, 40 | max_length=None 41 | ) 42 | 43 | >>> msgspec.inspect.multi_type_info([bool, int]) # inspect multiple types 44 | (BoolType(), IntType(gt=None, ge=None, lt=None, le=None, multiple_of=None)) 45 | 46 | 47 | Types with :doc:`constraints` will include the constraint information as well: 48 | 49 | .. code-block:: python 50 | 51 | >>> from typing import Annotated 52 | 53 | >>> from msgspec import Meta 54 | 55 | >>> PositiveInt = Annotated[int, Meta(gt=0)] 56 | 57 | >>> msgspec.inspect.type_info(PositiveInt) 58 | IntType(gt=0, ge=None, lt=None, le=None, multiple_of=None) 59 | 60 | Compound types like :doc:`structs` are also supported: 61 | 62 | .. code-block:: python 63 | 64 | >>> class User(msgspec.Struct): 65 | ... name: str 66 | ... groups: list[str] = [] 67 | ... email: str | None = None 68 | 69 | >>> msgspec.inspect.type_info(User) 70 | StructType( 71 | cls=User, 72 | fields=( 73 | Field( 74 | name='name', 75 | encode_name='name', 76 | type=StrType(min_length=None, max_length=None, pattern=None), 77 | required=True, 78 | default=UNSET, 79 | default_factory=UNSET 80 | ), 81 | Field( 82 | name='groups', 83 | encode_name='groups', 84 | type=ListType( 85 | item_type=StrType(min_length=None, max_length=None, pattern=None), 86 | min_length=None, 87 | max_length=None 88 | ), 89 | required=False, 90 | default=[], 91 | default_factory=UNSET 92 | ), 93 | Field( 94 | name='email', 95 | encode_name='email', 96 | type=UnionType( 97 | types=( 98 | StrType(min_length=None, max_length=None, pattern=None), 99 | NoneType() 100 | ) 101 | ), 102 | required=False, 103 | default=None, 104 | default_factory=UNSET 105 | ) 106 | ), 107 | tag_field=None, 108 | tag=None, 109 | array_like=False, 110 | forbid_unknown_fields=False 111 | ) 112 | 113 | Types with additional metadata like ``extra_json_schema`` or ``title`` will be 114 | wrapped in a `msgspec.inspect.Metadata` object. Note that all JSON schema 115 | specific fields are merged into a single ``extra_json_schema`` dict. 116 | 117 | .. code-block:: python 118 | 119 | >>> UnixName = Annotated[ 120 | ... str, 121 | ... Meta( 122 | ... min_length=1, 123 | ... max_length=32, 124 | ... pattern="^[a-z_][a-z0-9_-]*$", 125 | ... description="A valid UNIX username" 126 | ... ) 127 | ... ] 128 | 129 | >>> msgspec.inspect.type_info(UnixName) 130 | Metadata( 131 | type=StrType( 132 | min_length=1, 133 | max_length=32, 134 | pattern='^[a-z_][a-z0-9_-]*$' 135 | ), 136 | extra_json_schema={'description': 'A valid UNIX username'} 137 | ) 138 | 139 | Every type supported by ``msgspec`` has a corresponding `msgspec.inspect.Type` 140 | subclass. See the :ref:`API docs ` for a complete list of types. 141 | 142 | For an example of using these functions, you might find our builtin 143 | :doc:`jsonschema` generator implementation useful - the code for this can be 144 | found `here 145 | `__. In 146 | particular, take a look at the large if-else statement in ``_to_schema``. 147 | 148 | 149 | .. _OpenAPI: https://www.openapis.org/ 150 | .. _hypothesis: https://hypothesis.readthedocs.io/en/latest/ 151 | -------------------------------------------------------------------------------- /docs/source/install.rst: -------------------------------------------------------------------------------- 1 | Installation 2 | ============ 3 | 4 | ``msgspec`` may be installed via ``pip`` or ``conda``. Note that Python >= 3.8 5 | is required. The basic install has no required dependencies. 6 | 7 | **pip** 8 | 9 | .. code-block:: shell 10 | 11 | pip install msgspec 12 | 13 | **conda** 14 | 15 | .. code-block:: shell 16 | 17 | conda install msgspec -c conda-forge 18 | 19 | 20 | Optional Dependencies 21 | --------------------- 22 | 23 | Depending on your platform, the base install of ``msgspec`` may not support 24 | TOML_ or YAML_ without additional dependencies. 25 | 26 | TOML 27 | ~~~~ 28 | 29 | The TOML_ protocol requires: 30 | 31 | - Python < 3.11: `tomli`_ and `tomli_w`_ for reading and writing TOML. 32 | 33 | - Python >= 3.11: `tomli_w`_ for writing TOML. Reading TOML is done using 34 | the standard library's `tomllib` and requires no additional dependencies. 35 | 36 | You may either install these dependencies manually, or depend on the ``toml`` 37 | extra: 38 | 39 | **pip** 40 | 41 | .. code-block:: shell 42 | 43 | pip install "msgspec[toml]" 44 | 45 | **conda** 46 | 47 | .. code-block:: shell 48 | 49 | conda install msgspec-toml -c conda-forge 50 | 51 | YAML 52 | ~~~~ 53 | 54 | The YAML_ protocol requires PyYAML_ on all platforms. You may either install 55 | this dependency manually, or depend on the ``yaml`` extra: 56 | 57 | **pip** 58 | 59 | .. code-block:: shell 60 | 61 | pip install "msgspec[yaml]" 62 | 63 | **conda** 64 | 65 | .. code-block:: shell 66 | 67 | conda install msgspec-yaml -c conda-forge 68 | 69 | 70 | Installing from GitHub 71 | ---------------------- 72 | 73 | If you want wish to use a feature that hasn't been released yet, you may 74 | install from the `development branch on GitHub 75 | `__ using ``pip``: 76 | 77 | .. code-block:: shell 78 | 79 | pip install git+https://github.com/jcrist/msgspec.git 80 | 81 | 82 | .. _YAML: https://yaml.org 83 | .. _TOML: https://toml.io 84 | .. _PyYAML: https://pyyaml.org/ 85 | .. _tomli: https://github.com/hukkin/tomli 86 | .. _tomli_w: https://github.com/hukkin/tomli-w 87 | -------------------------------------------------------------------------------- /docs/source/jsonschema.rst: -------------------------------------------------------------------------------- 1 | JSON Schema 2 | =========== 3 | 4 | ``msgspec`` provides a few utilities for generating `JSON Schema`_ 5 | specifications from msgspec-compatible :doc:`types ` and 6 | :doc:`constraints `. 7 | 8 | - `msgspec.json.schema`: generates a complete JSON Schema for a single type. 9 | - `msgspec.json.schema_components`: generates JSON schemas for multiple types, 10 | along with a corresponding ``components`` mapping. This is mainly useful when 11 | generating multiple schemas to include in a larger specification like OpenAPI_. 12 | 13 | 14 | The generated schemas are compatible with `JSON Schema`_ 2020-12 and OpenAPI_ 15 | 3.1. 16 | 17 | 18 | Example 19 | ------- 20 | 21 | 22 | .. code-block:: python 23 | 24 | import msgspec 25 | from msgspec import Struct, Meta 26 | from typing import Annotated, Optional 27 | 28 | 29 | # A float constrained to values > 0 30 | PositiveFloat = Annotated[float, Meta(gt=0)] 31 | 32 | 33 | class Dimensions(Struct): 34 | """Dimensions for a product, all measurements in centimeters""" 35 | length: PositiveFloat 36 | width: PositiveFloat 37 | height: PositiveFloat 38 | 39 | 40 | class Product(Struct): 41 | """A product in a catalog""" 42 | id: int 43 | name: str 44 | price: PositiveFloat 45 | tags: set[str] = set() 46 | dimensions: Optional[Dimensions] = None 47 | 48 | 49 | # Generate a schema for a list of products 50 | schema = msgspec.json.schema(list[Product]) 51 | 52 | # Print out that schema as JSON 53 | print(msgspec.json.encode(schema)) 54 | 55 | 56 | .. code-block:: json 57 | 58 | { 59 | "type": "array", 60 | "items": {"$ref": "#/$defs/Product"}, 61 | "$defs": { 62 | "Dimensions": { 63 | "title": "Dimensions", 64 | "description": "Dimensions for a product, all measurements in centimeters", 65 | "type": "object", 66 | "properties": { 67 | "length": {"type": "number", "exclusiveMinimum": 0}, 68 | "width": {"type": "number", "exclusiveMinimum": 0}, 69 | "height": {"type": "number", "exclusiveMinimum": 0} 70 | }, 71 | "required": ["length", "width", "height"] 72 | }, 73 | "Product": { 74 | "title": "Product", 75 | "description": "A product in a catalog", 76 | "type": "object", 77 | "properties": { 78 | "id": {"type": "integer"}, 79 | "name": {"type": "string"}, 80 | "price": {"type": "number", "exclusiveMinimum": 0}, 81 | "tags": { 82 | "type": "array", 83 | "items": {"type": "string"}, 84 | "default": [], 85 | }, 86 | "dimensions": { 87 | "anyOf": [{"type": "null"}, {"$ref": "#/$defs/Dimensions"}], 88 | "default": null, 89 | } 90 | }, 91 | "required": ["id", "name", "price"] 92 | } 93 | } 94 | } 95 | 96 | 97 | .. _JSON Schema: https://json-schema.org/ 98 | .. _OpenAPI: https://www.openapis.org/ 99 | -------------------------------------------------------------------------------- /docs/source/schema-evolution.rst: -------------------------------------------------------------------------------- 1 | Schema Evolution 2 | ================ 3 | 4 | ``msgspec`` includes support for "schema evolution", meaning that: 5 | 6 | - Messages serialized with an older version of a schema will be deserializable 7 | using a newer version of the schema. 8 | - Messages serialized with a newer version of the schema will be deserializable 9 | using an older version of the schema. 10 | 11 | This can be useful if, for example, you have clients and servers with 12 | mismatched versions. 13 | 14 | For schema evolution to work smoothly, you need to follow a few guidelines: 15 | 16 | 1. Any new fields on a `msgspec.Struct` must specify default values. 17 | 2. Structs with ``array_like=True`` must not reorder fields, and any new fields 18 | must be appended to the end (and have defaults). 19 | 3. Don't change the type annotations for existing messages or fields. 20 | 4. Don't change the type codes or implementations for any defined 21 | :ref:`extensions ` (MessagePack only). 22 | 23 | For example, suppose we had a `msgspec.Struct` type representing a user: 24 | 25 | .. code-block:: python 26 | 27 | >>> import msgspec 28 | 29 | >>> from typing import Set, Optional 30 | 31 | >>> class User(msgspec.Struct): 32 | ... """A struct representing a user""" 33 | ... name: str 34 | ... groups: Set[str] = set() 35 | ... email: Optional[str] = None 36 | 37 | Then suppose we wanted to add a new ``phone`` field to this struct in a way 38 | that wouldn't break clients/servers still using the prior definition. To 39 | accomplish this, we add ``phone`` as an _optional_ field (defaulting to 40 | ``None``), at the end of the struct. 41 | 42 | .. code-block:: python 43 | 44 | >>> class User2(msgspec.Struct): 45 | ... """An updated version of the User struct, now with a phone number""" 46 | ... name: str 47 | ... groups: Set[str] = set() 48 | ... email: Optional[str] = None 49 | ... phone : Optional[str] = None 50 | 51 | Messages serialized using both the old and new schemas can still be exchanged 52 | without error. If an old message is deserialized using the new schema, the 53 | missing fields all have default values that will be used. Likewise, if a new 54 | message is deserialized with the old schema the unknown new fields will be 55 | efficiently skipped without decoding. 56 | 57 | .. code-block:: python 58 | 59 | >>> old_dec = msgspec.json.Decoder(User) 60 | 61 | >>> new_dec = msgspec.json.Decoder(User2) 62 | 63 | >>> new_msg = msgspec.json.encode( 64 | ... User2("bob", groups={"finance"}, phone="512-867-5309") 65 | ... ) 66 | 67 | >>> old_dec.decode(new_msg) # deserializing a new msg with an older decoder 68 | User(name='bob', groups={'finance'}, email=None) 69 | 70 | >>> old_msg = msgspec.json.encode( 71 | ... User("alice", groups={"admin", "engineering"}) 72 | ... ) 73 | 74 | >>> new_dec.decode(old_msg) # deserializing an old msg with a new decoder 75 | User2(name="alice", groups={"admin", "engineering"}, email=None, phone=None) 76 | -------------------------------------------------------------------------------- /docs/source/usage.rst: -------------------------------------------------------------------------------- 1 | Usage 2 | ===== 3 | 4 | ``msgspec`` supports multiple serialization protocols, accessed through 5 | separate submodules: 6 | 7 | - ``msgspec.json`` (JSON_) 8 | - ``msgspec.msgpack`` (MessagePack_) 9 | - ``msgspec.yaml`` (YAML_) 10 | - ``msgspec.toml`` (TOML_) 11 | 12 | Each supports a consistent interface, making it simple to switch between 13 | protocols as needed. 14 | 15 | Encoding 16 | -------- 17 | 18 | Each submodule has an ``encode`` method for encoding Python objects using the 19 | respective protocol. 20 | 21 | .. code-block:: python 22 | 23 | >>> import msgspec 24 | 25 | >>> # Encode as JSON 26 | ... msgspec.json.encode({"hello": "world"}) 27 | b'{"hello":"world"}' 28 | 29 | >>> # Encode as msgpack 30 | ... msgspec.msgpack.encode({"hello": "world"}) 31 | b'\x81\xa5hello\xa5world' 32 | 33 | Note that if you're making multiple calls to ``encode``, it's more efficient to 34 | create an ``Encoder`` once and use the ``Encoder.encode`` method instead. 35 | 36 | .. code-block:: python 37 | 38 | >>> import msgspec 39 | 40 | >>> # Create a JSON encoder 41 | ... encoder = msgspec.json.Encoder() 42 | 43 | >>> # Encode as JSON using the encoder 44 | ... encoder.encode({"hello": "world"}) 45 | b'{"hello":"world"}' 46 | 47 | Decoding 48 | -------- 49 | 50 | Each submodule has ``decode`` method for decoding messages using the respective 51 | protocol. 52 | 53 | .. code-block:: python 54 | 55 | >>> import msgspec 56 | 57 | >>> # Decode JSON 58 | ... msgspec.json.decode(b'{"hello":"world"}') 59 | {'hello': 'world'} 60 | 61 | >>> # Decode msgpack 62 | ... msgspec.msgpack.decode(b'\x81\xa5hello\xa5world') 63 | {'hello': 'world'} 64 | 65 | Note that if you're making multiple calls to ``decode``, it's more efficient to 66 | create a ``Decoder`` once and use the ``Decoder.decode`` method instead. 67 | 68 | .. code-block:: python 69 | 70 | >>> import msgspec 71 | 72 | >>> # Create a JSON decoder 73 | ... decoder = msgspec.json.Decoder() 74 | 75 | >>> # Decode JSON using the decoder 76 | ... decoder.decode(b'{"hello":"world"}') 77 | {'hello': 'world'} 78 | 79 | 80 | .. _typed-decoding: 81 | 82 | Typed Decoding 83 | -------------- 84 | 85 | ``msgspec`` optionally supports specifying the expected output types during 86 | decoding. This serves a few purposes: 87 | 88 | - Often serialized data has a fixed schema (e.g. a request handler in a REST 89 | api expects a certain JSON structure). Specifying the expected types allows 90 | ``msgspec`` to perform validation during decoding, with *no* added runtime 91 | cost. 92 | 93 | - Python has a much richer type system than serialization protocols like JSON_ 94 | or MessagePack_. Specifying the output types lets ``msgspec`` decode messages 95 | into types other than the defaults described above (e.g. decoding JSON 96 | objects into a :doc:`Struct ` instead of the default `dict`). 97 | 98 | - The `type annotations`_ used to describe the expected types are compatible 99 | with tools like mypy_ or pyright_, providing excellent editor integration. 100 | 101 | ``msgspec`` uses Python `type annotations`_ to describe the expected types. A 102 | :doc:`wide variety of builtin types are supported `. 103 | 104 | Here we define a user schema as a :doc:`Struct ` type. We then pass 105 | the type to ``decode`` via the ``type`` keyword argument: 106 | 107 | .. code-block:: python 108 | 109 | >>> import msgspec 110 | 111 | >>> class User(msgspec.Struct): 112 | ... name: str 113 | ... groups: set[str] = set() 114 | ... email: str | None = None 115 | 116 | >>> msgspec.json.decode( 117 | ... b'{"name": "alice", "groups": ["admin", "engineering"]}', 118 | ... type=User 119 | ... ) 120 | User(name='alice', groups={'admin', 'engineering'}, email=None) 121 | 122 | If a message doesn't match the expected type, an error is raised. 123 | 124 | .. code-block:: python 125 | 126 | >>> msgspec.json.decode( 127 | ... b'{"name": "bill", "groups": ["devops", 123]}', 128 | ... type=User 129 | ... ) 130 | Traceback (most recent call last): 131 | File "", line 1, in 132 | msgspec.ValidationError: Expected `str`, got `int` - at `$.groups[1]` 133 | 134 | .. _strict-vs-lax: 135 | 136 | "Strict" vs "Lax" Mode 137 | ~~~~~~~~~~~~~~~~~~~~~~ 138 | 139 | Unlike some other libraries (e.g. pydantic_), ``msgspec`` won't perform any 140 | unsafe implicit conversion by default ("strict" mode). For example, if an 141 | integer is specified and a string is provided instead, an error is raised 142 | rather than attempting to cast the string to an int. 143 | 144 | .. code-block:: python 145 | 146 | >>> msgspec.json.decode(b'[1, 2, "3"]', type=list[int]) 147 | Traceback (most recent call last): 148 | File "", line 1, in 149 | msgspec.ValidationError: Expected `int`, got `str` - at `$[2]` 150 | 151 | For cases where you'd like a more lax set of conversion rules, you can pass 152 | ``strict=False`` to any ``decode`` function or ``Decoder`` class ("lax" mode). 153 | See :doc:`supported-types` for information on how this affects individual 154 | types. 155 | 156 | .. code-block:: python 157 | 158 | >>> msgspec.json.decode(b'[1, 2, "3"]', type=list[int], strict=False) 159 | [1, 2, 3] 160 | 161 | 162 | .. _JSON: https://json.org 163 | .. _MessagePack: https://msgpack.org 164 | .. _YAML: https://yaml.org 165 | .. _TOML: https://toml.io 166 | .. _type annotations: https://docs.python.org/3/library/typing.html 167 | .. _pydantic: https://pydantic-docs.helpmanual.io/ 168 | .. _mypy: https://mypy.readthedocs.io 169 | .. _pyright: https://github.com/microsoft/pyright 170 | -------------------------------------------------------------------------------- /docs/source/why.rst: -------------------------------------------------------------------------------- 1 | Why msgspec? 2 | ------------ 3 | 4 | If you're writing a networked application, you'll need some agreed upon 5 | protocol that your clients and servers can use to communicate. JSON is a decent 6 | choice here (though there are many other options). It's ubiquitous, and Python 7 | has many libraries for parsing it into builtin types (``json``, ``ujson``, 8 | ``orjson``, ...). 9 | 10 | *However, servers don't just parse JSON, they also need to do something with 11 | it*. 12 | 13 | ``msgspec`` goes above and beyond other Python JSON libraries to help with the 14 | following: 15 | 16 | - **Validation** 17 | 18 | If a field is missing from a request or has the wrong type, you probably want 19 | to raise a nice error message rather than just throwing a 500 error. 20 | 21 | ``msgspec`` lets you describe your schema via type annotations, and will 22 | efficiently :ref:`validate ` messages against this 23 | schema while decoding. 24 | 25 | It also integrates well with static analysis tools like mypy_ and pyright_, 26 | helping you avoid whole classes of runtime errors. 27 | 28 | - **Application Logic** 29 | 30 | What your application actually does! While builtin types like dicts are 31 | fine for writing application logic, they aren't as ergonomic as custom 32 | classes (no attribute access, poor type checking, ...). 33 | 34 | ``msgspec`` supports a :doc:`wide variety of types `, 35 | letting you decouple the objects your application logic uses from those that 36 | JSON natively supports. 37 | 38 | - **Future Flexibility** 39 | 40 | Application needs change; you'll want to make sure your clients/servers won't 41 | break if the JSON schema evolves over time. 42 | 43 | To handle this, ``msgspec`` supports :doc:`"schema evolution" 44 | `. Messages can be sent between clients with different 45 | schemas without error, allowing systems to evolve over time. 46 | 47 | While there are other tools in this space, ``msgspec`` should be an :doc:`order 48 | of magnitude faster ` than other options. We also hope that it's 49 | quick to learn and friendly to use, letting you focus less on serialization and 50 | more on your application code. 51 | 52 | 53 | .. _mypy: https://mypy.readthedocs.io 54 | .. _pyright: https://github.com/microsoft/pyright 55 | -------------------------------------------------------------------------------- /examples/asyncio-kv/kv.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import asyncio 4 | import msgspec 5 | from typing import Any 6 | 7 | 8 | # Some utilities for writing and reading length-prefix framed messages. Using 9 | # length-prefixed framing makes it easier for the reader to determine the 10 | # boundaries of each message before passing it to msgspec to be decoded. 11 | async def prefixed_send(stream: asyncio.StreamWriter, buffer: bytes) -> None: 12 | """Write a length-prefixed buffer to the stream""" 13 | # Encode the message length as a 4 byte big-endian integer. 14 | prefix = len(buffer).to_bytes(4, "big") 15 | 16 | # Write the prefix and buffer to the stream. 17 | stream.write(prefix) 18 | stream.write(buffer) 19 | await stream.drain() 20 | 21 | 22 | async def prefixed_recv(stream: asyncio.StreamReader) -> bytes: 23 | """Read a length-prefixed buffer from the stream""" 24 | # Read the next 4 byte prefix 25 | prefix = await stream.readexactly(4) 26 | 27 | # Convert the prefix back into an integer for the next message length 28 | n = int.from_bytes(prefix, "big") 29 | 30 | # Read in the full message buffer 31 | return await stream.readexactly(n) 32 | 33 | 34 | # Define some request types. We set `tag=True` on each type so they can be used 35 | # in a "tagged-union" defining the request types. 36 | class Get(msgspec.Struct, tag=True): 37 | key: str 38 | 39 | 40 | class Put(msgspec.Struct, tag=True): 41 | key: str 42 | val: str 43 | 44 | 45 | class Del(msgspec.Struct, tag=True): 46 | key: str 47 | 48 | 49 | class ListKeys(msgspec.Struct, tag=True): 50 | pass 51 | 52 | 53 | # A union of all valid request types 54 | Request = Get | Put | Del | ListKeys 55 | 56 | 57 | class Server: 58 | """An example TCP key-value server using asyncio and msgspec""" 59 | 60 | def __init__(self, host: str = "127.0.0.1", port: int = 8888): 61 | self.host = host 62 | self.port = port 63 | self.kv: dict[str, str] = {} 64 | # A msgpack encoder for encoding responses 65 | self.encoder = msgspec.msgpack.Encoder() 66 | # A *typed* msgpack decoder for decoding requests. If a request doesn't 67 | # match the specified types, a nice error will be raised. 68 | self.decoder = msgspec.msgpack.Decoder(Request) 69 | 70 | async def handle_connection( 71 | self, reader: asyncio.StreamReader, writer: asyncio.StreamWriter 72 | ): 73 | """Handle the full lifetime of a single connection""" 74 | print("Connection opened") 75 | while True: 76 | try: 77 | # Receive and decode a request 78 | buffer = await prefixed_recv(reader) 79 | req = self.decoder.decode(buffer) 80 | 81 | # Process the request 82 | resp = await self.handle_request(req) 83 | 84 | # Encode and write the response 85 | buffer = self.encoder.encode(resp) 86 | await prefixed_send(writer, buffer) 87 | except EOFError: 88 | print("Connection closed") 89 | return 90 | 91 | async def handle_request(self, req: Request) -> Any: 92 | """Handle a single request and return the result (if any)""" 93 | # We use pattern matching here to branch on the different message types. 94 | # You could just as well use an if-else statement, but pattern matching 95 | # works pretty well here. 96 | match req: 97 | case Get(key): 98 | # Return the value for a key, or None if missing 99 | return self.kv.get(key) 100 | case Put(key, val): 101 | # Add a new key-value pair 102 | self.kv[key] = val 103 | return None 104 | case Del(key): 105 | # Remove a key-value pair if it exists 106 | self.kv.pop(key, None) 107 | return None 108 | case ListKeys(): 109 | # Return a list of all keys in the store 110 | return sorted(self.kv) 111 | 112 | async def serve_forever(self) -> None: 113 | server = await asyncio.start_server( 114 | self.handle_connection, self.host, self.port 115 | ) 116 | print(f"Serving on tcp://{self.host}:{self.port}...") 117 | async with server: 118 | await server.serve_forever() 119 | 120 | def run(self) -> None: 121 | """Run the server until ctrl-C""" 122 | asyncio.run(self.serve_forever()) 123 | 124 | 125 | class Client: 126 | """An example TCP key-value client using asyncio and msgspec.""" 127 | 128 | def __init__(self, reader: asyncio.StreamReader, writer: asyncio.StreamWriter): 129 | self.reader = reader 130 | self.writer = writer 131 | 132 | @classmethod 133 | async def create(cls, host: str = "127.0.0.1", port: int = 8888): 134 | """Create a new client""" 135 | reader, writer = await asyncio.open_connection(host, port) 136 | return cls(reader, writer) 137 | 138 | async def close(self) -> None: 139 | """Close the client.""" 140 | self.writer.close() 141 | await self.writer.wait_closed() 142 | 143 | async def request(self, req): 144 | """Send a request and await the response""" 145 | # Encode and send the request 146 | buffer = msgspec.msgpack.encode(req) 147 | await prefixed_send(self.writer, buffer) 148 | 149 | # Receive and decode the response 150 | buffer = await prefixed_recv(self.reader) 151 | return msgspec.msgpack.decode(buffer) 152 | 153 | async def get(self, key: str) -> str | None: 154 | """Get a key from the KV store, returning None if not present""" 155 | return await self.request(Get(key)) 156 | 157 | async def put(self, key: str, val: str) -> None: 158 | """Put a key-val pair in the KV store""" 159 | return await self.request(Put(key, val)) 160 | 161 | async def delete(self, key: str) -> None: 162 | """Delete a key-val pair from the KV store""" 163 | return await self.request(Del(key)) 164 | 165 | async def list_keys(self) -> list[str]: 166 | """List all keys in the KV store""" 167 | return await self.request(ListKeys()) 168 | 169 | 170 | if __name__ == "__main__": 171 | Server().run() 172 | -------------------------------------------------------------------------------- /examples/conda-repodata/query_repodata.py: -------------------------------------------------------------------------------- 1 | import json 2 | import time 3 | 4 | import orjson 5 | import requests 6 | import simdjson 7 | import ujson 8 | 9 | import msgspec 10 | 11 | 12 | def query_msgspec(data: bytes) -> list[tuple[int, str]]: 13 | # Use Struct types to define the JSON schema. For efficiency we only define 14 | # the fields we actually need. 15 | class Package(msgspec.Struct): 16 | name: str 17 | size: int 18 | 19 | class RepoData(msgspec.Struct): 20 | packages: dict[str, Package] 21 | 22 | # Decode the data as a `RepoData` type 23 | repo_data = msgspec.json.decode(data, type=RepoData) 24 | 25 | # Sort packages by `size`, and return the top 10 26 | return sorted( 27 | ((p.size, p.name) for p in repo_data.packages.values()), reverse=True 28 | )[:10] 29 | 30 | 31 | def query_orjson(data: bytes) -> list[tuple[int, str]]: 32 | repo_data = orjson.loads(data) 33 | return sorted( 34 | ((p["size"], p["name"]) for p in repo_data["packages"].values()), reverse=True 35 | )[:10] 36 | 37 | 38 | def query_json(data: bytes) -> list[tuple[int, str]]: 39 | repo_data = json.loads(data) 40 | return sorted( 41 | ((p["size"], p["name"]) for p in repo_data["packages"].values()), reverse=True 42 | )[:10] 43 | 44 | 45 | def query_ujson(data: bytes) -> list[tuple[int, str]]: 46 | repo_data = ujson.loads(data) 47 | return sorted( 48 | ((p["size"], p["name"]) for p in repo_data["packages"].values()), reverse=True 49 | )[:10] 50 | 51 | 52 | def query_simdjson(data: bytes) -> list[tuple[int, str]]: 53 | repo_data = simdjson.Parser().parse(data) 54 | return sorted( 55 | ((p["size"], p["name"]) for p in repo_data["packages"].values()), reverse=True 56 | )[:10] 57 | 58 | 59 | # Download the current_repodata.json file 60 | resp = requests.get( 61 | "https://conda.anaconda.org/conda-forge/noarch/current_repodata.json" 62 | ) 63 | resp.raise_for_status() 64 | data = resp.content 65 | 66 | libraries = [ 67 | ("json", query_json), 68 | ("ujson", query_ujson), 69 | ("orjson", query_orjson), 70 | ("simdjson", query_simdjson), 71 | ("msgspec", query_msgspec), 72 | ] 73 | 74 | # Run the query with each JSON library, timing the execution 75 | for lib, func in libraries: 76 | start = time.perf_counter() 77 | func(data) 78 | stop = time.perf_counter() 79 | print(f"{lib}: {(stop - start) * 1000:.2f} ms") 80 | -------------------------------------------------------------------------------- /examples/edgedb/dbschema/default.esdl: -------------------------------------------------------------------------------- 1 | module default { 2 | type Person { 3 | required name: str; 4 | } 5 | 6 | type Movie { 7 | required title: str; 8 | multi actors: Person; 9 | } 10 | }; 11 | -------------------------------------------------------------------------------- /examples/edgedb/dbschema/migrations/00001.edgeql: -------------------------------------------------------------------------------- 1 | CREATE MIGRATION m1vegpxb3odf7j6rsioor2j5zcassvioypuixdcfujquycuufa3k2a 2 | ONTO initial 3 | { 4 | CREATE TYPE default::Person { 5 | CREATE REQUIRED PROPERTY name: std::str; 6 | }; 7 | CREATE TYPE default::Movie { 8 | CREATE MULTI LINK actors: default::Person; 9 | CREATE REQUIRED PROPERTY title: std::str; 10 | }; 11 | }; 12 | -------------------------------------------------------------------------------- /examples/edgedb/edgedb.toml: -------------------------------------------------------------------------------- 1 | [edgedb] 2 | server-version = "3.2" 3 | -------------------------------------------------------------------------------- /examples/edgedb/insert_data.edgeql: -------------------------------------------------------------------------------- 1 | INSERT Movie { 2 | title := "Dune", 3 | actors := { 4 | (INSERT Person { name := "Timothée Chalamet" }), 5 | (INSERT Person { name := "Zendaya" }) 6 | } 7 | }; 8 | -------------------------------------------------------------------------------- /examples/geojson/msgspec_geojson.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import msgspec 4 | 5 | Position = tuple[float, float] 6 | 7 | 8 | # Define the 7 standard Geometry types. 9 | # All types set `tag=True`, meaning that they'll make use of a `type` field to 10 | # disambiguate between types when decoding. 11 | class Point(msgspec.Struct, tag=True): 12 | coordinates: Position 13 | 14 | 15 | class MultiPoint(msgspec.Struct, tag=True): 16 | coordinates: list[Position] 17 | 18 | 19 | class LineString(msgspec.Struct, tag=True): 20 | coordinates: list[Position] 21 | 22 | 23 | class MultiLineString(msgspec.Struct, tag=True): 24 | coordinates: list[list[Position]] 25 | 26 | 27 | class Polygon(msgspec.Struct, tag=True): 28 | coordinates: list[list[Position]] 29 | 30 | 31 | class MultiPolygon(msgspec.Struct, tag=True): 32 | coordinates: list[list[list[Position]]] 33 | 34 | 35 | class GeometryCollection(msgspec.Struct, tag=True): 36 | geometries: list[Geometry] 37 | 38 | 39 | Geometry = ( 40 | Point 41 | | MultiPoint 42 | | LineString 43 | | MultiLineString 44 | | Polygon 45 | | MultiPolygon 46 | | GeometryCollection 47 | ) 48 | 49 | 50 | # Define the two Feature types 51 | class Feature(msgspec.Struct, tag=True): 52 | geometry: Geometry | None = None 53 | properties: dict | None = None 54 | id: str | int | None = None 55 | 56 | 57 | class FeatureCollection(msgspec.Struct, tag=True): 58 | features: list[Feature] 59 | 60 | 61 | # A union of all 9 GeoJSON types 62 | GeoJSON = Geometry | Feature | FeatureCollection 63 | 64 | 65 | # Create a decoder and an encoder to use for decoding & encoding GeoJSON types 66 | loads = msgspec.json.Decoder(GeoJSON).decode 67 | dumps = msgspec.json.Encoder().encode 68 | -------------------------------------------------------------------------------- /examples/pyproject-toml/pyproject.py: -------------------------------------------------------------------------------- 1 | from typing import Any 2 | 3 | import msgspec 4 | 5 | 6 | class Base( 7 | msgspec.Struct, 8 | omit_defaults=True, 9 | forbid_unknown_fields=True, 10 | rename="kebab", 11 | ): 12 | """A base class holding some common settings. 13 | 14 | - We set ``omit_defaults = True`` to omit any fields containing only their 15 | default value from the output when encoding. 16 | - We set ``forbid_unknown_fields = True`` to error nicely if an unknown 17 | field is present in the input TOML. This helps catch typo errors early, 18 | and is also required per PEP 621. 19 | - We set ``rename = "kebab"`` to rename all fields to use kebab case when 20 | encoding/decoding, as this is the convention used in pyproject.toml. For 21 | example, this will rename ``requires_python`` to ``requires-python``. 22 | """ 23 | 24 | pass 25 | 26 | 27 | class BuildSystem(Base): 28 | requires: list[str] = [] 29 | build_backend: str | None = None 30 | backend_path: list[str] = [] 31 | 32 | 33 | class Readme(Base): 34 | file: str | None = None 35 | text: str | None = None 36 | content_type: str | None = None 37 | 38 | 39 | class License(Base): 40 | file: str | None = None 41 | text: str | None = None 42 | 43 | 44 | class Contributor(Base): 45 | name: str | None = None 46 | email: str | None = None 47 | 48 | 49 | class Project(Base): 50 | name: str | None = None 51 | version: str | None = None 52 | description: str | None = None 53 | readme: str | Readme | None = None 54 | license: str | License | None = None 55 | authors: list[Contributor] = [] 56 | maintainers: list[Contributor] = [] 57 | keywords: list[str] = [] 58 | classifiers: list[str] = [] 59 | urls: dict[str, str] = {} 60 | requires_python: str | None = None 61 | dependencies: list[str] = [] 62 | optional_dependencies: dict[str, list[str]] = {} 63 | scripts: dict[str, str] = {} 64 | gui_scripts: dict[str, str] = {} 65 | entry_points: dict[str, dict[str, str]] = {} 66 | dynamic: list[str] = [] 67 | 68 | 69 | class PyProject(Base): 70 | build_system: BuildSystem | None = None 71 | project: Project | None = None 72 | tool: dict[str, dict[str, Any]] = {} 73 | 74 | 75 | def decode(data: bytes | str) -> PyProject: 76 | """Decode a ``pyproject.toml`` file from TOML""" 77 | return msgspec.toml.decode(data, type=PyProject) 78 | 79 | 80 | def encode(msg: PyProject) -> bytes: 81 | """Encode a ``PyProject`` object to TOML""" 82 | return msgspec.toml.encode(msg) 83 | -------------------------------------------------------------------------------- /msgspec/__init__.py: -------------------------------------------------------------------------------- 1 | from ._core import ( 2 | DecodeError, 3 | EncodeError, 4 | Field as _Field, 5 | Meta, 6 | MsgspecError, 7 | Raw, 8 | Struct, 9 | UnsetType, 10 | UNSET, 11 | NODEFAULT, 12 | ValidationError, 13 | defstruct, 14 | convert, 15 | to_builtins, 16 | ) 17 | 18 | 19 | def field(*, default=NODEFAULT, default_factory=NODEFAULT, name=None): 20 | return _Field(default=default, default_factory=default_factory, name=name) 21 | 22 | 23 | field.__doc__ = _Field.__doc__ 24 | 25 | 26 | from . import msgpack 27 | from . import json 28 | from . import yaml 29 | from . import toml 30 | from . import inspect 31 | from . import structs 32 | from ._version import get_versions 33 | 34 | __version__ = get_versions()["version"] 35 | del get_versions 36 | -------------------------------------------------------------------------------- /msgspec/__init__.pyi: -------------------------------------------------------------------------------- 1 | import enum 2 | from typing import ( 3 | Any, 4 | Callable, 5 | ClassVar, 6 | Dict, 7 | Final, 8 | Iterable, 9 | Literal, 10 | Mapping, 11 | Optional, 12 | Tuple, 13 | Type, 14 | TypeVar, 15 | Union, 16 | overload, 17 | ) 18 | 19 | from typing_extensions import dataclass_transform, Buffer 20 | 21 | from . import inspect, json, msgpack, structs, toml, yaml 22 | 23 | T = TypeVar("T") 24 | 25 | class UnsetType(enum.Enum): 26 | UNSET = "UNSET" 27 | 28 | UNSET = UnsetType.UNSET 29 | 30 | class _NoDefault(enum.Enum): 31 | NODEFAULT = "NODEFAULT" 32 | 33 | NODEFAULT = _NoDefault.NODEFAULT 34 | 35 | @overload 36 | def field(*, default: T, name: Optional[str] = None) -> T: ... 37 | @overload 38 | def field(*, default_factory: Callable[[], T], name: Optional[str] = None) -> T: ... 39 | @overload 40 | def field(*, name: Optional[str] = None) -> Any: ... 41 | @dataclass_transform(field_specifiers=(field,)) 42 | class Struct: 43 | __struct_fields__: ClassVar[Tuple[str, ...]] 44 | __struct_config__: ClassVar[structs.StructConfig] 45 | __match_args__: ClassVar[Tuple[str, ...]] 46 | # A default __init__ so that Structs with unknown field types (say 47 | # constructed by `defstruct`) won't error on every call to `__init__` 48 | def __init__(self, *args: Any, **kwargs: Any) -> None: ... 49 | def __init_subclass__( 50 | cls, 51 | tag: Union[None, bool, str, int, Callable[[str], Union[str, int]]] = None, 52 | tag_field: Union[None, str] = None, 53 | rename: Union[ 54 | None, 55 | Literal["lower", "upper", "camel", "pascal", "kebab"], 56 | Callable[[str], Optional[str]], 57 | Mapping[str, str], 58 | ] = None, 59 | omit_defaults: bool = False, 60 | forbid_unknown_fields: bool = False, 61 | frozen: bool = False, 62 | eq: bool = True, 63 | order: bool = False, 64 | kw_only: bool = False, 65 | repr_omit_defaults: bool = False, 66 | array_like: bool = False, 67 | gc: bool = True, 68 | weakref: bool = False, 69 | dict: bool = False, 70 | cache_hash: bool = False, 71 | ) -> None: ... 72 | def __rich_repr__( 73 | self, 74 | ) -> Iterable[Union[Any, Tuple[Any], Tuple[str, Any], Tuple[str, Any, Any]]]: ... 75 | 76 | def defstruct( 77 | name: str, 78 | fields: Iterable[Union[str, Tuple[str, type], Tuple[str, type, Any]]], 79 | *, 80 | bases: Optional[Tuple[Type[Struct], ...]] = None, 81 | module: Optional[str] = None, 82 | namespace: Optional[Dict[str, Any]] = None, 83 | tag: Union[None, bool, str, int, Callable[[str], Union[str, int]]] = None, 84 | tag_field: Union[None, str] = None, 85 | rename: Union[ 86 | None, 87 | Literal["lower", "upper", "camel", "pascal", "kebab"], 88 | Callable[[str], Optional[str]], 89 | Mapping[str, str], 90 | ] = None, 91 | omit_defaults: bool = False, 92 | forbid_unknown_fields: bool = False, 93 | frozen: bool = False, 94 | eq: bool = True, 95 | order: bool = False, 96 | kw_only: bool = False, 97 | repr_omit_defaults: bool = False, 98 | array_like: bool = False, 99 | gc: bool = True, 100 | weakref: bool = False, 101 | dict: bool = False, 102 | cache_hash: bool = False, 103 | ) -> Type[Struct]: ... 104 | 105 | # Lie and say `Raw` is a subclass of `bytes`, so mypy will accept it in most 106 | # places where an object that implements the buffer protocol is valid 107 | class Raw(bytes): 108 | @overload 109 | def __new__(cls) -> "Raw": ... 110 | @overload 111 | def __new__(cls, msg: Union[Buffer, str]) -> "Raw": ... 112 | def copy(self) -> "Raw": ... 113 | 114 | class Meta: 115 | def __init__( 116 | self, 117 | *, 118 | gt: Union[int, float, None] = None, 119 | ge: Union[int, float, None] = None, 120 | lt: Union[int, float, None] = None, 121 | le: Union[int, float, None] = None, 122 | multiple_of: Union[int, float, None] = None, 123 | pattern: Union[str, None] = None, 124 | min_length: Union[int, None] = None, 125 | max_length: Union[int, None] = None, 126 | tz: Union[bool, None] = None, 127 | title: Union[str, None] = None, 128 | description: Union[str, None] = None, 129 | examples: Union[list, None] = None, 130 | extra_json_schema: Union[dict, None] = None, 131 | extra: Union[dict, None] = None, 132 | ): ... 133 | gt: Final[Union[int, float, None]] 134 | ge: Final[Union[int, float, None]] 135 | lt: Final[Union[int, float, None]] 136 | le: Final[Union[int, float, None]] 137 | multiple_of: Final[Union[int, float, None]] 138 | pattern: Final[Union[str, None]] 139 | min_length: Final[Union[int, None]] 140 | max_length: Final[Union[int, None]] 141 | tz: Final[Union[int, None]] 142 | title: Final[Union[str, None]] 143 | description: Final[Union[str, None]] 144 | examples: Final[Union[list, None]] 145 | extra_json_schema: Final[Union[dict, None]] 146 | extra: Final[Union[dict, None]] 147 | def __rich_repr__(self) -> Iterable[Tuple[str, Any]]: ... 148 | 149 | def to_builtins( 150 | obj: Any, 151 | *, 152 | str_keys: bool = False, 153 | builtin_types: Union[Iterable[type], None] = None, 154 | enc_hook: Optional[Callable[[Any], Any]] = None, 155 | order: Literal[None, "deterministic", "sorted"] = None, 156 | ) -> Any: ... 157 | @overload 158 | def convert( 159 | obj: Any, 160 | type: Type[T], 161 | *, 162 | strict: bool = True, 163 | from_attributes: bool = False, 164 | dec_hook: Optional[Callable[[type, Any], Any]] = None, 165 | builtin_types: Union[Iterable[type], None] = None, 166 | str_keys: bool = False, 167 | ) -> T: ... 168 | @overload 169 | def convert( 170 | obj: Any, 171 | type: Any, 172 | *, 173 | strict: bool = True, 174 | from_attributes: bool = False, 175 | dec_hook: Optional[Callable[[type, Any], Any]] = None, 176 | builtin_types: Union[Iterable[type], None] = None, 177 | str_keys: bool = False, 178 | ) -> Any: ... 179 | 180 | class MsgspecError(Exception): ... 181 | class EncodeError(MsgspecError): ... 182 | class DecodeError(MsgspecError): ... 183 | class ValidationError(DecodeError): ... 184 | 185 | __version__: str 186 | -------------------------------------------------------------------------------- /msgspec/common.h: -------------------------------------------------------------------------------- 1 | #ifndef MS_COMMON_H 2 | #define MS_COMMON_H 3 | 4 | #ifdef __GNUC__ 5 | #define MS_LIKELY(pred) __builtin_expect(!!(pred), 1) 6 | #define MS_UNLIKELY(pred) __builtin_expect(!!(pred), 0) 7 | #else 8 | #define MS_LIKELY(pred) (pred) 9 | #define MS_UNLIKELY(pred) (pred) 10 | #endif 11 | 12 | #ifdef __GNUC__ 13 | #define MS_INLINE __attribute__((always_inline)) inline 14 | #define MS_NOINLINE __attribute__((noinline)) 15 | #elif defined(_MSC_VER) 16 | #define MS_INLINE __forceinline 17 | #define MS_NOINLINE __declspec(noinline) 18 | #else 19 | #define MS_INLINE inline 20 | #define MS_NOINLINE 21 | #endif 22 | 23 | #endif 24 | -------------------------------------------------------------------------------- /msgspec/json.py: -------------------------------------------------------------------------------- 1 | from ._core import ( 2 | JSONDecoder as Decoder, 3 | JSONEncoder as Encoder, 4 | json_decode as decode, 5 | json_encode as encode, 6 | json_format as format, 7 | ) 8 | from ._json_schema import schema, schema_components 9 | -------------------------------------------------------------------------------- /msgspec/json.pyi: -------------------------------------------------------------------------------- 1 | from collections.abc import Iterable 2 | from typing import ( 3 | Any, 4 | Callable, 5 | Dict, 6 | Generic, 7 | Iterable, 8 | Literal, 9 | Optional, 10 | Tuple, 11 | Type, 12 | TypeVar, 13 | Union, 14 | overload, 15 | ) 16 | 17 | from typing_extensions import Buffer 18 | 19 | T = TypeVar("T") 20 | 21 | enc_hook_sig = Optional[Callable[[Any], Any]] 22 | dec_hook_sig = Optional[Callable[[type, Any], Any]] 23 | float_hook_sig = Optional[Callable[[str], Any]] 24 | schema_hook_sig = Optional[Callable[[type], dict[str, Any]]] 25 | 26 | class Encoder: 27 | enc_hook: enc_hook_sig 28 | decimal_format: Literal["string", "number"] 29 | uuid_format: Literal["canonical", "hex"] 30 | order: Literal[None, "deterministic", "sorted"] 31 | 32 | def __init__( 33 | self, 34 | *, 35 | enc_hook: enc_hook_sig = None, 36 | decimal_format: Literal["string", "number"] = "string", 37 | uuid_format: Literal["canonical", "hex"] = "canonical", 38 | order: Literal[None, "deterministic", "sorted"] = None, 39 | ): ... 40 | def encode(self, obj: Any, /) -> bytes: ... 41 | def encode_lines(self, items: Iterable, /) -> bytes: ... 42 | def encode_into( 43 | self, obj: Any, buffer: bytearray, offset: Optional[int] = 0, / 44 | ) -> None: ... 45 | 46 | class Decoder(Generic[T]): 47 | type: Type[T] 48 | strict: bool 49 | dec_hook: dec_hook_sig 50 | float_hook: float_hook_sig 51 | 52 | @overload 53 | def __init__( 54 | self: Decoder[Any], 55 | *, 56 | strict: bool = True, 57 | dec_hook: dec_hook_sig = None, 58 | float_hook: float_hook_sig = None, 59 | ) -> None: ... 60 | @overload 61 | def __init__( 62 | self: Decoder[T], 63 | type: Type[T] = ..., 64 | *, 65 | strict: bool = True, 66 | dec_hook: dec_hook_sig = None, 67 | float_hook: float_hook_sig = None, 68 | ) -> None: ... 69 | @overload 70 | def __init__( 71 | self: Decoder[Any], 72 | type: Any = ..., 73 | *, 74 | strict: bool = True, 75 | dec_hook: dec_hook_sig = None, 76 | float_hook: float_hook_sig = None, 77 | ) -> None: ... 78 | def decode(self, buf: Union[Buffer, str], /) -> T: ... 79 | def decode_lines(self, buf: Union[Buffer, str], /) -> list[T]: ... 80 | 81 | @overload 82 | def decode( 83 | buf: Union[Buffer, str], 84 | /, 85 | *, 86 | strict: bool = True, 87 | dec_hook: dec_hook_sig = None, 88 | ) -> Any: ... 89 | @overload 90 | def decode( 91 | buf: Union[Buffer, str], 92 | /, 93 | *, 94 | type: Type[T] = ..., 95 | strict: bool = True, 96 | dec_hook: dec_hook_sig = None, 97 | ) -> T: ... 98 | @overload 99 | def decode( 100 | buf: Union[Buffer, str], 101 | /, 102 | *, 103 | type: Any = ..., 104 | strict: bool = True, 105 | dec_hook: dec_hook_sig = None, 106 | ) -> Any: ... 107 | def encode(obj: Any, /, *, enc_hook: enc_hook_sig = None, order: Literal[None, "deterministic", "sorted"] = None) -> bytes: ... 108 | def schema(type: Any, *, schema_hook: schema_hook_sig = None) -> Dict[str, Any]: ... 109 | def schema_components( 110 | types: Iterable[Any], 111 | *, 112 | schema_hook: schema_hook_sig = None, 113 | ref_template: str = "#/$defs/{name}" 114 | ) -> Tuple[Tuple[Dict[str, Any], ...], Dict[str, Any]]: ... 115 | @overload 116 | def format(buf: str, /, *, indent: int = 2) -> str: ... 117 | @overload 118 | def format(buf: Buffer, /, *, indent: int = 2) -> bytes: ... 119 | -------------------------------------------------------------------------------- /msgspec/msgpack.py: -------------------------------------------------------------------------------- 1 | from ._core import ( 2 | Ext, 3 | MsgpackDecoder as Decoder, 4 | MsgpackEncoder as Encoder, 5 | msgpack_decode as decode, 6 | msgpack_encode as encode, 7 | ) 8 | -------------------------------------------------------------------------------- /msgspec/msgpack.pyi: -------------------------------------------------------------------------------- 1 | from typing import ( 2 | Any, 3 | Callable, 4 | Generic, 5 | Literal, 6 | Optional, 7 | Type, 8 | TypeVar, 9 | Union, 10 | overload, 11 | ) 12 | 13 | from typing_extensions import Buffer 14 | 15 | 16 | T = TypeVar("T") 17 | 18 | enc_hook_sig = Optional[Callable[[Any], Any]] 19 | ext_hook_sig = Optional[Callable[[int, memoryview], Any]] 20 | dec_hook_sig = Optional[Callable[[type, Any], Any]] 21 | 22 | class Ext: 23 | code: int 24 | data: Union[bytes, bytearray, memoryview] 25 | def __init__( 26 | self, code: int, data: Union[bytes, bytearray, memoryview] 27 | ) -> None: ... 28 | 29 | class Decoder(Generic[T]): 30 | type: Type[T] 31 | strict: bool 32 | dec_hook: dec_hook_sig 33 | ext_hook: ext_hook_sig 34 | @overload 35 | def __init__( 36 | self: Decoder[Any], 37 | *, 38 | strict: bool = True, 39 | dec_hook: dec_hook_sig = None, 40 | ext_hook: ext_hook_sig = None, 41 | ) -> None: ... 42 | @overload 43 | def __init__( 44 | self: Decoder[T], 45 | type: Type[T] = ..., 46 | *, 47 | strict: bool = True, 48 | dec_hook: dec_hook_sig = None, 49 | ext_hook: ext_hook_sig = None, 50 | ) -> None: ... 51 | @overload 52 | def __init__( 53 | self: Decoder[Any], 54 | type: Any = ..., 55 | *, 56 | strict: bool = True, 57 | dec_hook: dec_hook_sig = None, 58 | ext_hook: ext_hook_sig = None, 59 | ) -> None: ... 60 | def decode(self, buf: Buffer, /) -> T: ... 61 | 62 | class Encoder: 63 | enc_hook: enc_hook_sig 64 | decimal_format: Literal["string", "number"] 65 | uuid_format: Literal["canonical", "hex", "bytes"] 66 | order: Literal[None, "deterministic", "sorted"] 67 | def __init__( 68 | self, 69 | *, 70 | enc_hook: enc_hook_sig = None, 71 | decimal_format: Literal["string", "number"] = "string", 72 | uuid_format: Literal["canonical", "hex", "bytes"] = "canonical", 73 | order: Literal[None, "deterministic", "sorted"] = None, 74 | ): ... 75 | def encode(self, obj: Any, /) -> bytes: ... 76 | def encode_into( 77 | self, obj: Any, buffer: bytearray, offset: Optional[int] = 0, / 78 | ) -> None: ... 79 | 80 | @overload 81 | def decode( 82 | buf: Buffer, 83 | /, 84 | *, 85 | strict: bool = True, 86 | dec_hook: dec_hook_sig = None, 87 | ext_hook: ext_hook_sig = None, 88 | ) -> Any: ... 89 | @overload 90 | def decode( 91 | buf: Buffer, 92 | /, 93 | *, 94 | type: Type[T] = ..., 95 | strict: bool = True, 96 | dec_hook: dec_hook_sig = None, 97 | ext_hook: ext_hook_sig = None, 98 | ) -> T: ... 99 | @overload 100 | def decode( 101 | buf: Buffer, 102 | /, 103 | *, 104 | type: Any = ..., 105 | strict: bool = True, 106 | dec_hook: dec_hook_sig = None, 107 | ext_hook: ext_hook_sig = None, 108 | ) -> Any: ... 109 | def encode(obj: Any, /, *, enc_hook: enc_hook_sig = None, order: Literal[None, "deterministic", "sorted"] = None) -> bytes: ... 110 | -------------------------------------------------------------------------------- /msgspec/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jcrist/msgspec/bc60e96772c5e8a3babff967d86a9e7dfcdbfb1b/msgspec/py.typed -------------------------------------------------------------------------------- /msgspec/structs.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import Any 4 | 5 | from . import NODEFAULT, Struct, field 6 | from ._core import ( # noqa 7 | Factory as _Factory, 8 | StructConfig, 9 | asdict, 10 | astuple, 11 | replace, 12 | force_setattr, 13 | ) 14 | from ._utils import get_class_annotations as _get_class_annotations 15 | 16 | __all__ = ( 17 | "FieldInfo", 18 | "StructConfig", 19 | "asdict", 20 | "astuple", 21 | "fields", 22 | "force_setattr", 23 | "replace", 24 | ) 25 | 26 | 27 | def __dir__(): 28 | return __all__ 29 | 30 | 31 | class FieldInfo(Struct): 32 | """A record describing a field in a struct type. 33 | 34 | Parameters 35 | ---------- 36 | name: str 37 | The field name as seen by Python code (e.g. ``field_one``). 38 | encode_name: str 39 | The name used when encoding/decoding the field. This may differ if 40 | the field is renamed (e.g. ``fieldOne``). 41 | type: Any 42 | The full field type annotation. 43 | default: Any, optional 44 | A default value for the field. Will be `NODEFAULT` if no default value 45 | is set. 46 | default_factory: Any, optional 47 | A callable that creates a default value for the field. Will be 48 | `NODEFAULT` if no ``default_factory`` is set. 49 | """ 50 | 51 | name: str 52 | encode_name: str 53 | type: Any 54 | default: Any = field(default_factory=lambda: NODEFAULT) 55 | default_factory: Any = field(default_factory=lambda: NODEFAULT) 56 | 57 | @property 58 | def required(self) -> bool: 59 | """A helper for checking whether a field is required""" 60 | return self.default is NODEFAULT and self.default_factory is NODEFAULT 61 | 62 | 63 | def fields(type_or_instance: Struct | type[Struct]) -> tuple[FieldInfo]: 64 | """Get information about the fields in a Struct. 65 | 66 | Parameters 67 | ---------- 68 | type_or_instance: 69 | A struct type or instance. 70 | 71 | Returns 72 | ------- 73 | tuple[FieldInfo] 74 | """ 75 | if isinstance(type_or_instance, Struct): 76 | annotated_cls = cls = type(type_or_instance) 77 | else: 78 | annotated_cls = type_or_instance 79 | cls = getattr(type_or_instance, "__origin__", type_or_instance) 80 | if not (isinstance(cls, type) and issubclass(cls, Struct)): 81 | raise TypeError("Must be called with a struct type or instance") 82 | 83 | hints = _get_class_annotations(annotated_cls) 84 | npos = len(cls.__struct_fields__) - len(cls.__struct_defaults__) 85 | fields = [] 86 | for name, encode_name, default_obj in zip( 87 | cls.__struct_fields__, 88 | cls.__struct_encode_fields__, 89 | (NODEFAULT,) * npos + cls.__struct_defaults__, 90 | ): 91 | default = default_factory = NODEFAULT 92 | if isinstance(default_obj, _Factory): 93 | default_factory = default_obj.factory 94 | elif default_obj is not NODEFAULT: 95 | default = default_obj 96 | 97 | field = FieldInfo( 98 | name=name, 99 | encode_name=encode_name, 100 | type=hints[name], 101 | default=default, 102 | default_factory=default_factory, 103 | ) 104 | fields.append(field) 105 | 106 | return tuple(fields) 107 | -------------------------------------------------------------------------------- /msgspec/structs.pyi: -------------------------------------------------------------------------------- 1 | from typing import Any, TypeVar, Union 2 | 3 | from . import NODEFAULT, Struct 4 | 5 | S = TypeVar("S", bound=Struct) 6 | 7 | def replace(struct: S, /, **changes: Any) -> S: ... 8 | def asdict(struct: Struct) -> dict[str, Any]: ... 9 | def astuple(struct: Struct) -> tuple[Any, ...]: ... 10 | def force_setattr(struct: Struct, name: str, value: Any) -> None: ... 11 | 12 | class StructConfig: 13 | frozen: bool 14 | eq: bool 15 | order: bool 16 | array_like: bool 17 | gc: bool 18 | repr_omit_defaults: bool 19 | omit_defaults: bool 20 | forbid_unknown_fields: bool 21 | weakref: bool 22 | dict: bool 23 | cache_hash: bool 24 | tag: Union[str, int, None] 25 | tag_field: Union[str, None] 26 | 27 | class FieldInfo(Struct): 28 | name: str 29 | encode_name: str 30 | type: Any 31 | default: Any = NODEFAULT 32 | default_factory: Any = NODEFAULT 33 | 34 | @property 35 | def required(self) -> bool: ... 36 | 37 | def fields(type_or_instance: Struct | type[Struct]) -> tuple[FieldInfo]: ... 38 | -------------------------------------------------------------------------------- /msgspec/toml.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import datetime as _datetime 4 | from typing import TYPE_CHECKING, overload, TypeVar, Any 5 | 6 | from . import ( 7 | DecodeError as _DecodeError, 8 | convert as _convert, 9 | to_builtins as _to_builtins, 10 | ) 11 | 12 | if TYPE_CHECKING: 13 | from typing import Callable, Optional, Type, Union, Literal 14 | from typing_extensions import Buffer 15 | 16 | 17 | __all__ = ("encode", "decode") 18 | 19 | 20 | def __dir__(): 21 | return __all__ 22 | 23 | 24 | def _import_tomllib(): 25 | try: 26 | import tomllib # type: ignore 27 | 28 | return tomllib 29 | except ImportError: 30 | pass 31 | 32 | try: 33 | import tomli # type: ignore 34 | 35 | return tomli 36 | except ImportError: 37 | raise ImportError( 38 | "`msgspec.toml.decode` requires `tomli` be installed.\n\n" 39 | "Please either `pip` or `conda` install it as follows:\n\n" 40 | " $ python -m pip install tomli # using pip\n" 41 | " $ conda install tomli # or using conda" 42 | ) from None 43 | 44 | 45 | def _import_tomli_w(): 46 | try: 47 | import tomli_w # type: ignore 48 | 49 | return tomli_w 50 | except ImportError: 51 | raise ImportError( 52 | "`msgspec.toml.encode` requires `tomli_w` be installed.\n\n" 53 | "Please either `pip` or `conda` install it as follows:\n\n" 54 | " $ python -m pip install tomli_w # using pip\n" 55 | " $ conda install tomli_w # or using conda" 56 | ) from None 57 | 58 | 59 | def encode( 60 | obj: Any, 61 | *, 62 | enc_hook: Optional[Callable[[Any], Any]] = None, 63 | order: Literal[None, "deterministic", "sorted"] = None, 64 | ) -> bytes: 65 | """Serialize an object as TOML. 66 | 67 | Parameters 68 | ---------- 69 | obj : Any 70 | The object to serialize. 71 | enc_hook : callable, optional 72 | A callable to call for objects that aren't supported msgspec types. 73 | Takes the unsupported object and should return a supported object, or 74 | raise a ``NotImplementedError`` if unsupported. 75 | order : {None, 'deterministic', 'sorted'}, optional 76 | The ordering to use when encoding unordered compound types. 77 | 78 | - ``None``: All objects are encoded in the most efficient manner 79 | matching their in-memory representations. The default. 80 | - `'deterministic'`: Unordered collections (sets, dicts) are sorted to 81 | ensure a consistent output between runs. Useful when 82 | comparison/hashing of the encoded binary output is necessary. 83 | - `'sorted'`: Like `'deterministic'`, but *all* object-like types 84 | (structs, dataclasses, ...) are also sorted by field name before 85 | encoding. This is slower than `'deterministic'`, but may produce more 86 | human-readable output. 87 | 88 | Returns 89 | ------- 90 | data : bytes 91 | The serialized object. 92 | 93 | See Also 94 | -------- 95 | decode 96 | """ 97 | toml = _import_tomli_w() 98 | msg = _to_builtins( 99 | obj, 100 | builtin_types=(_datetime.datetime, _datetime.date, _datetime.time), 101 | str_keys=True, 102 | enc_hook=enc_hook, 103 | order=order, 104 | ) 105 | return toml.dumps(msg).encode("utf-8") 106 | 107 | 108 | T = TypeVar("T") 109 | 110 | 111 | @overload 112 | def decode( 113 | buf: Union[Buffer, str], 114 | *, 115 | strict: bool = True, 116 | dec_hook: Optional[Callable[[type, Any], Any]] = None, 117 | ) -> Any: 118 | pass 119 | 120 | 121 | @overload 122 | def decode( 123 | buf: Union[Buffer, str], 124 | *, 125 | type: Type[T] = ..., 126 | strict: bool = True, 127 | dec_hook: Optional[Callable[[type, Any], Any]] = None, 128 | ) -> T: 129 | pass 130 | 131 | 132 | @overload 133 | def decode( 134 | buf: Union[Buffer, str], 135 | *, 136 | type: Any = ..., 137 | strict: bool = True, 138 | dec_hook: Optional[Callable[[type, Any], Any]] = None, 139 | ) -> Any: 140 | pass 141 | 142 | 143 | def decode(buf, *, type=Any, strict=True, dec_hook=None): 144 | """Deserialize an object from TOML. 145 | 146 | Parameters 147 | ---------- 148 | buf : bytes-like or str 149 | The message to decode. 150 | type : type, optional 151 | A Python type (in type annotation form) to decode the object as. If 152 | provided, the message will be type checked and decoded as the specified 153 | type. Defaults to `Any`, in which case the message will be decoded 154 | using the default TOML types. 155 | strict : bool, optional 156 | Whether type coercion rules should be strict. Setting to False enables 157 | a wider set of coercion rules from string to non-string types for all 158 | values. Default is True. 159 | dec_hook : callable, optional 160 | An optional callback for handling decoding custom types. Should have 161 | the signature ``dec_hook(type: Type, obj: Any) -> Any``, where ``type`` 162 | is the expected message type, and ``obj`` is the decoded representation 163 | composed of only basic TOML types. This hook should transform ``obj`` 164 | into type ``type``, or raise a ``NotImplementedError`` if unsupported. 165 | 166 | Returns 167 | ------- 168 | obj : Any 169 | The deserialized object. 170 | 171 | See Also 172 | -------- 173 | encode 174 | """ 175 | toml = _import_tomllib() 176 | if isinstance(buf, str): 177 | str_buf = buf 178 | elif isinstance(buf, (bytes, bytearray)): 179 | str_buf = buf.decode("utf-8") 180 | else: 181 | # call `memoryview` first, since `bytes(1)` is actually valid 182 | str_buf = bytes(memoryview(buf)).decode("utf-8") 183 | try: 184 | obj = toml.loads(str_buf) 185 | except toml.TOMLDecodeError as exc: 186 | raise _DecodeError(str(exc)) from None 187 | 188 | if type is Any: 189 | return obj 190 | return _convert( 191 | obj, 192 | type, 193 | builtin_types=(_datetime.datetime, _datetime.date, _datetime.time), 194 | str_keys=True, 195 | strict=strict, 196 | dec_hook=dec_hook, 197 | ) 198 | -------------------------------------------------------------------------------- /msgspec/yaml.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import datetime as _datetime 4 | from typing import TYPE_CHECKING, overload, TypeVar, Any 5 | 6 | from . import ( 7 | DecodeError as _DecodeError, 8 | convert as _convert, 9 | to_builtins as _to_builtins, 10 | ) 11 | 12 | if TYPE_CHECKING: 13 | from typing import Callable, Optional, Type, Union, Literal 14 | from typing_extensions import Buffer 15 | 16 | 17 | __all__ = ("encode", "decode") 18 | 19 | 20 | def __dir__(): 21 | return __all__ 22 | 23 | 24 | def _import_pyyaml(name): 25 | try: 26 | import yaml # type: ignore 27 | except ImportError: 28 | raise ImportError( 29 | f"`msgspec.yaml.{name}` requires PyYAML be installed.\n\n" 30 | "Please either `pip` or `conda` install it as follows:\n\n" 31 | " $ python -m pip install pyyaml # using pip\n" 32 | " $ conda install pyyaml # or using conda" 33 | ) from None 34 | else: 35 | return yaml 36 | 37 | 38 | def encode( 39 | obj: Any, 40 | *, 41 | enc_hook: Optional[Callable[[Any], Any]] = None, 42 | order: Literal[None, "deterministic", "sorted"] = None, 43 | ) -> bytes: 44 | """Serialize an object as YAML. 45 | 46 | Parameters 47 | ---------- 48 | obj : Any 49 | The object to serialize. 50 | enc_hook : callable, optional 51 | A callable to call for objects that aren't supported msgspec types. 52 | Takes the unsupported object and should return a supported object, or 53 | raise a ``NotImplementedError`` if unsupported. 54 | order : {None, 'deterministic', 'sorted'}, optional 55 | The ordering to use when encoding unordered compound types. 56 | 57 | - ``None``: All objects are encoded in the most efficient manner 58 | matching their in-memory representations. The default. 59 | - `'deterministic'`: Unordered collections (sets, dicts) are sorted to 60 | ensure a consistent output between runs. Useful when 61 | comparison/hashing of the encoded binary output is necessary. 62 | - `'sorted'`: Like `'deterministic'`, but *all* object-like types 63 | (structs, dataclasses, ...) are also sorted by field name before 64 | encoding. This is slower than `'deterministic'`, but may produce more 65 | human-readable output. 66 | 67 | Returns 68 | ------- 69 | data : bytes 70 | The serialized object. 71 | 72 | Notes 73 | ----- 74 | This function requires that the third-party `PyYAML library 75 | `_ is installed. 76 | 77 | See Also 78 | -------- 79 | decode 80 | """ 81 | yaml = _import_pyyaml("encode") 82 | # Use the C extension if available 83 | Dumper = getattr(yaml, "CSafeDumper", yaml.SafeDumper) 84 | 85 | return yaml.dump_all( 86 | [ 87 | _to_builtins( 88 | obj, 89 | builtin_types=(_datetime.datetime, _datetime.date), 90 | enc_hook=enc_hook, 91 | order=order, 92 | ) 93 | ], 94 | encoding="utf-8", 95 | Dumper=Dumper, 96 | allow_unicode=True, 97 | sort_keys=False, 98 | ) 99 | 100 | 101 | T = TypeVar("T") 102 | 103 | 104 | @overload 105 | def decode( 106 | buf: Union[Buffer, str], 107 | *, 108 | strict: bool = True, 109 | dec_hook: Optional[Callable[[type, Any], Any]] = None, 110 | ) -> Any: 111 | pass 112 | 113 | 114 | @overload 115 | def decode( 116 | buf: Union[bytes, str], 117 | *, 118 | type: Type[T] = ..., 119 | strict: bool = True, 120 | dec_hook: Optional[Callable[[type, Any], Any]] = None, 121 | ) -> T: 122 | pass 123 | 124 | 125 | @overload 126 | def decode( 127 | buf: Union[bytes, str], 128 | *, 129 | type: Any = ..., 130 | strict: bool = True, 131 | dec_hook: Optional[Callable[[type, Any], Any]] = None, 132 | ) -> Any: 133 | pass 134 | 135 | 136 | def decode(buf, *, type=Any, strict=True, dec_hook=None): 137 | """Deserialize an object from YAML. 138 | 139 | Parameters 140 | ---------- 141 | buf : bytes-like or str 142 | The message to decode. 143 | type : type, optional 144 | A Python type (in type annotation form) to decode the object as. If 145 | provided, the message will be type checked and decoded as the specified 146 | type. Defaults to `Any`, in which case the message will be decoded 147 | using the default YAML types. 148 | strict : bool, optional 149 | Whether type coercion rules should be strict. Setting to False enables 150 | a wider set of coercion rules from string to non-string types for all 151 | values. Default is True. 152 | dec_hook : callable, optional 153 | An optional callback for handling decoding custom types. Should have 154 | the signature ``dec_hook(type: Type, obj: Any) -> Any``, where ``type`` 155 | is the expected message type, and ``obj`` is the decoded representation 156 | composed of only basic YAML types. This hook should transform ``obj`` 157 | into type ``type``, or raise a ``NotImplementedError`` if unsupported. 158 | 159 | Returns 160 | ------- 161 | obj : Any 162 | The deserialized object. 163 | 164 | Notes 165 | ----- 166 | This function requires that the third-party `PyYAML library 167 | `_ is installed. 168 | 169 | See Also 170 | -------- 171 | encode 172 | """ 173 | yaml = _import_pyyaml("decode") 174 | # Use the C extension if available 175 | Loader = getattr(yaml, "CSafeLoader", yaml.SafeLoader) 176 | if not isinstance(buf, (str, bytes)): 177 | # call `memoryview` first, since `bytes(1)` is actually valid 178 | buf = bytes(memoryview(buf)) 179 | try: 180 | obj = yaml.load(buf, Loader) 181 | except yaml.YAMLError as exc: 182 | raise _DecodeError(str(exc)) from None 183 | 184 | if type is Any: 185 | return obj 186 | return _convert( 187 | obj, 188 | type, 189 | builtin_types=(_datetime.datetime, _datetime.date), 190 | strict=strict, 191 | dec_hook=dec_hook, 192 | ) 193 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.ruff] 2 | exclude = [ 3 | "*.pyi", 4 | "__init__.py", 5 | "_version.py", 6 | "versioneer.py", 7 | "basic_typing_examples.py", 8 | "json.py", 9 | "msgpack.py", 10 | "test_JSONTestSuite.py", 11 | "conf.py", 12 | ] 13 | line-length = 88 14 | 15 | [tool.ruff.lint] 16 | ignore = [ 17 | "E721", # Comparing types instead of isinstance 18 | "E741", # Ambiguous variable names 19 | "E501", # Conflicts with ruff format 20 | "W191", # Conflicts with ruff format 21 | ] 22 | select = [ 23 | "E", # PEP8 Errors 24 | "F", # Pyflakes 25 | "W", # PEP8 Warnings 26 | ] 27 | 28 | [tool.ruff.lint.isort] 29 | combine-as-imports = true 30 | -------------------------------------------------------------------------------- /scripts/generate_atof_consts.py: -------------------------------------------------------------------------------- 1 | """This script generates msgspec/atof_consts.h""" 2 | 3 | import math 4 | import os 5 | import textwrap 6 | 7 | 8 | def gen_hpd_tables(): 9 | log2log10 = math.log(2) / math.log(10) 10 | shifts = ["0x0000"] 11 | powers = [] 12 | for i in range(1, 61): 13 | offset = len(powers) 14 | assert offset <= 0x07FF 15 | num_new_digits = int(log2log10 * float(i)) + 1 16 | assert num_new_digits <= 31 17 | code = (num_new_digits << 11) | offset 18 | p = str(5**i) 19 | powers.extend(p) 20 | shifts.append("0x%04X" % code) 21 | 22 | for i in range(61, 65): 23 | shifts.append("0x%04X" % len(powers)) 24 | 25 | n_shifts = len(shifts) 26 | n_powers = len(powers) 27 | assert n_powers <= 0x07FF 28 | 29 | shifts_str = "\n".join(textwrap.wrap(", ".join(shifts), width=78)) 30 | powers_str = "\n".join(textwrap.wrap(", ".join(powers), width=78)) 31 | 32 | return n_shifts, shifts_str, n_powers, powers_str 33 | 34 | 35 | def gen_row(e): 36 | z = 1 << 2048 37 | if e >= 0: 38 | exp = 10**e 39 | z = z * exp 40 | else: 41 | exp = 10 ** (-e) 42 | z = z // exp 43 | 44 | n = -2048 45 | 46 | while z >= (1 << 128): 47 | z = z >> 1 48 | n += 1 49 | 50 | h = hex(z)[2:] 51 | assert len(h) == 32 52 | 53 | approx_n = ((217706 * e) >> 16) + 1087 54 | biased_n = 1214 + n 55 | 56 | assert approx_n == biased_n 57 | 58 | return "{0x%s, 0x%s}, // 1e%-04d" % (h[16:], h[:16], e) 59 | 60 | 61 | table_rows = [gen_row(e) for e in range(-307, 289)] 62 | 63 | f64_powers = [f"1e{i}" for i in range(23)] 64 | 65 | n_shifts, shifts, n_powers, powers = gen_hpd_tables() 66 | 67 | text = """\ 68 | /* DO NOT EDIT - generated by scripts/generate_atof_consts.py */ 69 | 70 | #ifndef MSGSPEC_ATOF_CONSTS_H 71 | #define MSGSPEC_ATOF_CONSTS_H 72 | 73 | static const uint64_t ms_atof_powers_of_10[%d][2] = { 74 | %s 75 | }; 76 | 77 | static const double ms_atof_f64_powers_of_10[%d] = { 78 | %s 79 | }; 80 | 81 | static const uint16_t ms_atof_left_shift[%d] = { 82 | %s 83 | }; 84 | 85 | static const uint8_t ms_atof_powers_of_5[%d] = { 86 | %s 87 | }; 88 | 89 | #endif 90 | """ % ( 91 | len(table_rows), 92 | "\n".join(table_rows), 93 | len(f64_powers), 94 | "\n".join(textwrap.wrap(", ".join(f64_powers), width=78)), 95 | n_shifts, 96 | shifts, 97 | n_powers, 98 | powers, 99 | ) 100 | 101 | 102 | if __name__ == "__main__": 103 | repo = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) 104 | path = os.path.join(repo, "msgspec", "atof_consts.h") 105 | with open(path, "wb") as f: 106 | f.write(text.encode("utf-8")) 107 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [codespell] 2 | skip=*.py,*.c,*.h 3 | 4 | [coverage:run] 5 | omit = 6 | msgspec/_version.py 7 | tests/basic_typing_examples.py 8 | tests/test_mypy.py 9 | tests/test_pyright.py 10 | 11 | [tool:pytest] 12 | markers = 13 | mypy 14 | pyright 15 | filterwarnings = 16 | error 17 | 18 | [versioneer] 19 | VCS = git 20 | style = pep440 21 | versionfile_source = msgspec/_version.py 22 | versionfile_build = msgspec/_version.py 23 | tag_prefix = 24 | parentdir_prefix = msgspec- 25 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | 4 | from setuptools import setup 5 | from setuptools.extension import Extension 6 | 7 | import versioneer 8 | 9 | # Check for 32-bit windows builds, which currently aren't supported. We can't 10 | # rely on `platform.architecture` here since users can still run 32-bit python 11 | # builds on 64 bit architectures. 12 | if sys.platform == "win32" and sys.maxsize == (2**31 - 1): 13 | import textwrap 14 | 15 | error = """ 16 | ==================================================================== 17 | `msgspec` currently doesn't support 32-bit Python windows builds. If 18 | this is important for your use case, please open an issue on GitHub: 19 | 20 | https://github.com/jcrist/msgspec/issues 21 | ==================================================================== 22 | """ 23 | print(textwrap.dedent(error)) 24 | exit(1) 25 | 26 | 27 | SANITIZE = os.environ.get("MSGSPEC_SANITIZE", False) 28 | COVERAGE = os.environ.get("MSGSPEC_COVERAGE", False) 29 | DEBUG = os.environ.get("MSGSPEC_DEBUG", SANITIZE or COVERAGE) 30 | 31 | extra_compile_args = [] 32 | extra_link_args = [] 33 | if SANITIZE: 34 | extra_compile_args.extend(["-fsanitize=address", "-fsanitize=undefined"]) 35 | extra_link_args.extend(["-lasan", "-lubsan"]) 36 | if COVERAGE: 37 | extra_compile_args.append("--coverage") 38 | extra_link_args.append("-lgcov") 39 | if DEBUG: 40 | extra_compile_args.extend(["-O0", "-g", "-UNDEBUG"]) 41 | 42 | ext_modules = [ 43 | Extension( 44 | "msgspec._core", 45 | [os.path.join("msgspec", "_core.c")], 46 | extra_compile_args=extra_compile_args, 47 | extra_link_args=extra_link_args, 48 | ) 49 | ] 50 | 51 | yaml_deps = ["pyyaml"] 52 | toml_deps = ['tomli ; python_version < "3.11"', "tomli_w"] 53 | doc_deps = ["sphinx", "furo", "sphinx-copybutton", "sphinx-design", "ipython"] 54 | test_deps = [ 55 | "pytest", 56 | "msgpack", 57 | "attrs", 58 | 'eval-type-backport ; python_version < "3.10"', 59 | *yaml_deps, 60 | *toml_deps, 61 | ] 62 | dev_deps = ["pre-commit", "coverage", "mypy", "pyright", *doc_deps, *test_deps] 63 | 64 | extras_require = { 65 | "yaml": yaml_deps, 66 | "toml": toml_deps, 67 | "doc": doc_deps, 68 | "test": test_deps, 69 | "dev": dev_deps, 70 | } 71 | 72 | setup( 73 | name="msgspec", 74 | version=versioneer.get_version(), 75 | cmdclass=versioneer.get_cmdclass(), 76 | maintainer="Jim Crist-Harif", 77 | maintainer_email="jcristharif@gmail.com", 78 | url="https://jcristharif.com/msgspec/", 79 | project_urls={ 80 | "Documentation": "https://jcristharif.com/msgspec/", 81 | "Source": "https://github.com/jcrist/msgspec/", 82 | "Issue Tracker": "https://github.com/jcrist/msgspec/issues", 83 | }, 84 | description=( 85 | "A fast serialization and validation library, with builtin support for " 86 | "JSON, MessagePack, YAML, and TOML." 87 | ), 88 | keywords="JSON msgpack MessagePack TOML YAML serialization validation schema", 89 | classifiers=[ 90 | "License :: OSI Approved :: BSD License", 91 | "Development Status :: 4 - Beta", 92 | "Programming Language :: Python :: 3.9", 93 | "Programming Language :: Python :: 3.10", 94 | "Programming Language :: Python :: 3.11", 95 | "Programming Language :: Python :: 3.12", 96 | "Programming Language :: Python :: 3.13", 97 | ], 98 | extras_require=extras_require, 99 | license="BSD", 100 | packages=["msgspec"], 101 | package_data={"msgspec": ["py.typed", "*.pyi"]}, 102 | ext_modules=ext_modules, 103 | long_description=( 104 | open("README.md", encoding="utf-8").read() 105 | if os.path.exists("README.md") 106 | else "" 107 | ), 108 | long_description_content_type="text/markdown", 109 | python_requires=">=3.9", 110 | zip_safe=False, 111 | ) 112 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | import math 2 | import random 3 | import string 4 | import struct 5 | 6 | import pytest 7 | 8 | 9 | class Rand: 10 | """Random source, pulled out into fixture with repr so the seed is 11 | displayed on failing tests""" 12 | 13 | def __init__(self, seed=0): 14 | self.seed = seed or random.randint(0, 2**32 - 1) 15 | self.rand = random.Random(self.seed) 16 | 17 | def __repr__(self): 18 | return f"Rand({self.seed})" 19 | 20 | def str(self, n, m=0): 21 | """ 22 | str(n) -> random string of length `n`. 23 | str(n, m) -> random string between lengths `n` & `m` 24 | """ 25 | if m: 26 | n = self.rand.randint(n, m) 27 | return "".join(self.rand.choices(string.ascii_letters, k=n)) 28 | 29 | def bytes(self, n): 30 | """random bytes of length `n`""" 31 | return self.rand.getrandbits(8 * n).to_bytes(n, "little") 32 | 33 | def float(self): 34 | """random finite float""" 35 | while True: 36 | dbytes = self.rand.getrandbits(64).to_bytes(8, "big") 37 | x = struct.unpack("!d", dbytes)[0] 38 | if math.isfinite(x): 39 | return x 40 | 41 | def shuffle(self, obj): 42 | """random shuffle""" 43 | self.rand.shuffle(obj) 44 | 45 | 46 | @pytest.fixture 47 | def rand(): 48 | yield Rand() 49 | -------------------------------------------------------------------------------- /tests/test_cpylint.py: -------------------------------------------------------------------------------- 1 | """This file contains some simple linters for catching some common but easy to 2 | catch cpython capi bugs. These are naive string-munging checks, if you write 3 | some code that _is_ correct but is failing, add `/* cpylint-ignore */` on the 4 | failing source line and it will be ignored.""" 5 | 6 | import os 7 | 8 | import pytest 9 | 10 | MSGSPEC_CORE_PATH = os.path.join( 11 | os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "msgspec", "_core.c" 12 | ) 13 | 14 | 15 | @pytest.fixture 16 | def source(): 17 | with open(MSGSPEC_CORE_PATH, "r") as f: 18 | return f.read().splitlines() 19 | 20 | 21 | def test_recursive_call_blocks(source): 22 | """Ensure all code that calls `Py_EnterRecursiveCall` doesn't return 23 | without calling `Py_LeaveRecursiveCall`""" 24 | 25 | in_block = False 26 | for lineno, line in enumerate(source, 1): 27 | if "cpylint-ignore" in line: 28 | continue 29 | 30 | if "Py_EnterRecursiveCall" in line: 31 | in_block = True 32 | elif "return " in line and in_block: 33 | raise ValueError( 34 | f"return without calling Py_LeaveRecursiveCall on line {lineno}" 35 | ) 36 | elif "Py_LeaveRecursiveCall" in line: 37 | in_block = False 38 | 39 | 40 | def test_recursive_repr_blocks(source): 41 | """Ensure all code that calls `Py_ReprEnter` doesn't return without 42 | calling `Py_ReprLeave`""" 43 | in_block = False 44 | for lineno, line in enumerate(source, 1): 45 | if "cpylint-ignore" in line: 46 | continue 47 | 48 | if "Py_ReprEnter" in line: 49 | in_block = True 50 | elif "return " in line and in_block: 51 | raise ValueError(f"return without calling Py_ReprLeave on line {lineno}") 52 | elif "Py_ReprLeave" in line: 53 | in_block = False 54 | -------------------------------------------------------------------------------- /tests/test_integration.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import sys 3 | 4 | import pytest 5 | 6 | import msgspec 7 | 8 | 9 | @pytest.fixture(params=["json", "msgpack"]) 10 | def proto(request): 11 | if request.param == "json": 12 | return msgspec.json 13 | elif request.param == "msgpack": 14 | return msgspec.msgpack 15 | 16 | 17 | def test_decode_naive_datetime(proto): 18 | """See https://github.com/jcrist/msgspec/issues/408""" 19 | dt = datetime.datetime(2001, 2, 3, 4, 5, 6, 7) 20 | msg = proto.encode(dt) 21 | 22 | start = sys.getrefcount(None) 23 | for _ in range(1000): 24 | proto.decode(msg, type=datetime.datetime) 25 | end = sys.getrefcount(None) 26 | assert start == end 27 | 28 | 29 | def test_decode_naive_time(proto): 30 | """See https://github.com/jcrist/msgspec/issues/408""" 31 | dt = datetime.time(12, 20) 32 | msg = proto.encode(dt) 33 | 34 | start = sys.getrefcount(None) 35 | for _ in range(1000): 36 | proto.decode(msg, type=datetime.time) 37 | end = sys.getrefcount(None) 38 | assert start == end 39 | -------------------------------------------------------------------------------- /tests/test_mypy.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | 4 | import pytest 5 | 6 | pytestmark = pytest.mark.mypy 7 | 8 | api = pytest.importorskip("mypy.api") 9 | 10 | PATH = os.path.join(os.path.dirname(__file__), "basic_typing_examples.py") 11 | 12 | 13 | def get_lineno_type(line): 14 | assert "revealed type" in line.lower() 15 | _, lineno, msg = line.split(":", 2) 16 | lineno = int(lineno) 17 | pat = re.search("[\"'](.*)[\"']", msg) 18 | typ = pat.groups()[0] 19 | return lineno, typ 20 | 21 | 22 | def test_mypy(): 23 | with open(PATH, "r") as fil: 24 | ex_lines = fil.readlines() 25 | 26 | stdout, stderr, code = api.run([PATH]) 27 | lines = stdout.splitlines() 28 | for line in lines: 29 | if "revealed type" in line.lower(): 30 | lineno, typ = get_lineno_type(line) 31 | check = ex_lines[lineno - 1].split("#")[1].strip() 32 | try: 33 | exec(check, {"typ": typ}) 34 | except Exception: 35 | assert ( 36 | False 37 | ), f"Failed check at {PATH}:{lineno}: {check!r}, where 'typ' is {typ!r}" 38 | elif "success" not in line.lower(): 39 | assert False, line 40 | -------------------------------------------------------------------------------- /tests/test_performance.py: -------------------------------------------------------------------------------- 1 | import textwrap 2 | 3 | import msgspec 4 | 5 | from utils import temp_module 6 | 7 | 8 | def test_process_large_recursive_union(): 9 | """ 10 | A recursive schema processing perf test from 11 | https://github.com/pydantic/pydantic/issues/8499 12 | 13 | This test is mostly to ensure that processing deeply recursive schemas with 14 | unions succeeds. 15 | """ 16 | 17 | def gen_code(): 18 | yield "from __future__ import annotations" 19 | yield "from msgspec import Struct" 20 | yield "from typing import Union" 21 | 22 | for i in range(50): 23 | yield textwrap.dedent( 24 | f""" 25 | class Node{i}(Struct, tag='node{i}'): 26 | data: Union[Node, None] 27 | """ 28 | ) 29 | yield "Node = Union[" 30 | for i in range(50): 31 | yield f" Node{i}," 32 | yield "]" 33 | 34 | code = "\n".join(gen_code()) 35 | 36 | with temp_module(code) as mod: 37 | dec = msgspec.json.Decoder(mod.Node) 38 | 39 | msg = b""" 40 | { 41 | "type": "node25", 42 | "data": { 43 | "type": "node13", 44 | "data": null 45 | } 46 | } 47 | """ 48 | 49 | sol = mod.Node25(mod.Node13(None)) 50 | 51 | assert dec.decode(msg) == sol 52 | -------------------------------------------------------------------------------- /tests/test_pyright.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | import subprocess 4 | 5 | import pytest 6 | 7 | pytestmark = pytest.mark.pyright 8 | 9 | pyright = pytest.importorskip("pyright") 10 | 11 | PATH = os.path.join(os.path.dirname(__file__), "basic_typing_examples.py") 12 | 13 | 14 | def test_pyright(): 15 | with open(PATH, "r") as fil: 16 | ex_lines = fil.readlines() 17 | 18 | result = pyright.run(PATH, stdout=subprocess.PIPE) 19 | if result.returncode != 0: 20 | assert False, f"Unexpected pyright error:\n{result.stdout}" 21 | for line in result.stdout.decode().splitlines(): 22 | try: 23 | _, lineno, _, msg = line.split(":", 3) 24 | except ValueError: 25 | continue 26 | lineno = int(lineno) 27 | pat = re.search("[\"'](.*)[\"']", msg) 28 | typ = pat.groups()[0] 29 | check = ex_lines[lineno - 1].split("#")[1].strip() 30 | try: 31 | exec(check, {"typ": typ}) 32 | except Exception: 33 | assert ( 34 | False 35 | ), f"Failed check at {PATH}:{lineno}: {check!r}, where 'typ' is {typ!r}" 36 | -------------------------------------------------------------------------------- /tests/test_raw.py: -------------------------------------------------------------------------------- 1 | import operator 2 | import subprocess 3 | import sys 4 | import textwrap 5 | import weakref 6 | 7 | import pytest 8 | 9 | import msgspec 10 | 11 | 12 | def test_raw_noargs(): 13 | r = msgspec.Raw() 14 | assert bytes(r) == b"" 15 | assert len(r) == 0 16 | assert not r 17 | 18 | 19 | @pytest.mark.parametrize("type", [bytes, bytearray, memoryview, str]) 20 | def test_raw_constructor(type): 21 | msg = "test" if type is str else type(b"test") 22 | r = msgspec.Raw(msg) 23 | assert bytes(r) == b"test" 24 | assert len(r) == 4 25 | assert r 26 | 27 | 28 | def test_raw_constructor_errors(): 29 | with pytest.raises(TypeError): 30 | msgspec.Raw(1) 31 | 32 | with pytest.raises(TypeError): 33 | msgspec.Raw(msg=b"test") 34 | 35 | with pytest.raises(TypeError): 36 | msgspec.Raw(b"test", b"extra") 37 | 38 | 39 | def test_raw_from_view(): 40 | r = msgspec.Raw(memoryview(b"123456")[:3]) 41 | assert bytes(r) == b"123" 42 | assert len(r) == 3 43 | assert r 44 | 45 | 46 | def test_raw_copy(): 47 | r = msgspec.Raw(b"test") 48 | c1 = sys.getrefcount(r) 49 | r2 = r.copy() 50 | c2 = sys.getrefcount(r) 51 | assert c1 + 1 == c2 52 | assert r2 is r 53 | 54 | r = msgspec.Raw() 55 | assert r.copy() is r 56 | 57 | m = memoryview(b"test") 58 | ref = weakref.ref(m) 59 | r = msgspec.Raw(m) 60 | del m 61 | # Raw holds a ref 62 | assert ref() is not None 63 | r2 = r.copy() 64 | # Actually copied 65 | assert r2 is not r 66 | assert bytes(r2) == b"test" 67 | # Copy doesn't accidentally release buffer 68 | assert ref() is not None 69 | del r 70 | # Copy doesn't hold a reference to original view 71 | assert ref() is None 72 | 73 | 74 | def test_raw_copy_doesnt_leak(): 75 | """See https://github.com/jcrist/msgspec/pull/709""" 76 | script = textwrap.dedent( 77 | """ 78 | import msgspec 79 | import tracemalloc 80 | 81 | tracemalloc.start() 82 | 83 | raw = msgspec.Raw(bytearray(1000)) 84 | for _ in range(10000): 85 | raw.copy() 86 | 87 | _, peak = tracemalloc.get_traced_memory() 88 | print(peak) 89 | """ 90 | ) 91 | 92 | output = subprocess.check_output([sys.executable, "-c", script]) 93 | peak = int(output.decode().strip()) 94 | assert peak < 10_000 # should really be ~2000 95 | 96 | 97 | def test_raw_pickle_bytes(): 98 | orig_buffer = b"test" 99 | r = msgspec.Raw(orig_buffer) 100 | o = r.__reduce__() 101 | assert o == (msgspec.Raw, (b"test",)) 102 | assert o[1][0] is orig_buffer 103 | 104 | 105 | def test_raw_pickle_str(): 106 | orig_buffer = "test" 107 | r = msgspec.Raw(orig_buffer) 108 | o = r.__reduce__() 109 | assert o == (msgspec.Raw, ("test",)) 110 | assert o[1][0] is orig_buffer 111 | 112 | 113 | def test_raw_pickle_view(): 114 | r = msgspec.Raw(memoryview(b"test")[:3]) 115 | o = r.__reduce__() 116 | assert o == (msgspec.Raw, (b"tes",)) 117 | 118 | 119 | def test_raw_comparison(): 120 | r = msgspec.Raw() 121 | assert r == r 122 | assert not r != r 123 | assert msgspec.Raw() == msgspec.Raw() 124 | assert msgspec.Raw(b"") == msgspec.Raw() 125 | assert not msgspec.Raw(b"") == msgspec.Raw(b"other") 126 | assert msgspec.Raw(b"test") == msgspec.Raw(memoryview(b"testy")[:4]) 127 | assert msgspec.Raw(b"test") != msgspec.Raw(b"tesp") 128 | assert msgspec.Raw(b"test") != msgspec.Raw(b"") 129 | assert msgspec.Raw(b"") != msgspec.Raw(b"test") 130 | assert msgspec.Raw() != 1 131 | assert 1 != msgspec.Raw() 132 | 133 | for op in [operator.lt, operator.gt, operator.le, operator.ge]: 134 | with pytest.raises(TypeError): 135 | op(msgspec.Raw(), msgspec.Raw()) 136 | -------------------------------------------------------------------------------- /tests/test_toml.py: -------------------------------------------------------------------------------- 1 | import dataclasses 2 | import datetime 3 | import enum 4 | import sys 5 | import uuid 6 | from decimal import Decimal 7 | from typing import Dict, FrozenSet, List, Set, Tuple 8 | 9 | import pytest 10 | 11 | import msgspec 12 | 13 | try: 14 | import tomllib 15 | except ImportError: 16 | try: 17 | import tomli as tomllib 18 | except ImportError: 19 | tomllib = None 20 | 21 | try: 22 | import tomli_w 23 | except ImportError: 24 | tomli_w = None 25 | 26 | 27 | needs_decode = pytest.mark.skipif( 28 | tomllib is None, reason="Neither tomllib or tomli are installed" 29 | ) 30 | needs_encode = pytest.mark.skipif(tomli_w is None, reason="tomli_w is not installed") 31 | 32 | PY311 = sys.version_info[:2] >= (3, 11) 33 | 34 | UTC = datetime.timezone.utc 35 | 36 | 37 | class ExStruct(msgspec.Struct): 38 | x: int 39 | y: str 40 | 41 | 42 | @dataclasses.dataclass 43 | class ExDataclass: 44 | x: int 45 | y: str 46 | 47 | 48 | class ExEnum(enum.Enum): 49 | one = "one" 50 | two = "two" 51 | 52 | 53 | class ExIntEnum(enum.IntEnum): 54 | one = 1 55 | two = 2 56 | 57 | 58 | def test_module_dir(): 59 | assert set(dir(msgspec.toml)) == {"encode", "decode"} 60 | 61 | 62 | @pytest.mark.skipif(PY311, reason="tomllib is builtin in 3.11+") 63 | def test_tomli_not_installed_error(monkeypatch): 64 | monkeypatch.setitem(sys.modules, "tomli", None) 65 | 66 | with pytest.raises(ImportError, match="conda install"): 67 | msgspec.toml.decode("a = 1", type=int) 68 | 69 | 70 | def test_tomli_w_not_installed_error(monkeypatch): 71 | monkeypatch.setitem(sys.modules, "tomli_w", None) 72 | 73 | with pytest.raises(ImportError, match="conda install"): 74 | msgspec.toml.encode({"a": 1}) 75 | 76 | 77 | @pytest.mark.parametrize( 78 | "val", 79 | [ 80 | True, 81 | False, 82 | 1, 83 | 1.5, 84 | "fizz", 85 | datetime.datetime(2022, 1, 2, 3, 4, 5, 6), 86 | datetime.datetime(2022, 1, 2, 3, 4, 5, 6, UTC), 87 | datetime.date(2022, 1, 2), 88 | datetime.time(12, 34), 89 | [1, 2], 90 | {"one": 2}, 91 | ], 92 | ) 93 | @needs_encode 94 | @needs_decode 95 | def test_roundtrip_any(val): 96 | msg = msgspec.toml.encode({"x": val}) 97 | res = msgspec.toml.decode(msg)["x"] 98 | assert res == val 99 | 100 | 101 | @pytest.mark.parametrize( 102 | "val, type", 103 | [ 104 | (True, bool), 105 | (False, bool), 106 | (1, int), 107 | (1.5, float), 108 | ("fizz", str), 109 | (b"fizz", bytes), 110 | (b"fizz", bytearray), 111 | (datetime.datetime(2022, 1, 2, 3, 4, 5, 6), datetime.datetime), 112 | (datetime.datetime(2022, 1, 2, 3, 4, 5, 6, UTC), datetime.datetime), 113 | (datetime.date(2022, 1, 2), datetime.date), 114 | (datetime.time(12, 34), datetime.time), 115 | (uuid.uuid4(), uuid.UUID), 116 | (ExEnum.one, ExEnum), 117 | (ExIntEnum.one, ExIntEnum), 118 | ([1, 2], List[int]), 119 | ((1, 2), Tuple[int, ...]), 120 | ({1, 2}, Set[int]), 121 | (frozenset({1, 2}), FrozenSet[int]), 122 | (("one", 2), Tuple[str, int]), 123 | ({"one": 2}, Dict[str, int]), 124 | ({1: "two"}, Dict[int, str]), 125 | (ExStruct(1, "two"), ExStruct), 126 | (ExDataclass(1, "two"), ExDataclass), 127 | ], 128 | ) 129 | @needs_encode 130 | @needs_decode 131 | def test_roundtrip_typed(val, type): 132 | msg = msgspec.toml.encode({"x": val}) 133 | res = msgspec.toml.decode(msg, type=Dict[str, type])["x"] 134 | assert res == val 135 | 136 | 137 | @needs_encode 138 | def test_encode_output_type(): 139 | msg = msgspec.toml.encode({"x": 1}) 140 | assert isinstance(msg, bytes) 141 | 142 | 143 | @needs_encode 144 | def test_encode_error(): 145 | class Oops: 146 | pass 147 | 148 | with pytest.raises(TypeError, match="Encoding objects of type Oops is unsupported"): 149 | msgspec.toml.encode({"x": Oops()}) 150 | 151 | 152 | @needs_encode 153 | @needs_decode 154 | def test_encode_enc_hook(): 155 | msg = msgspec.toml.encode({"x": Decimal(1.5)}, enc_hook=str) 156 | assert msgspec.toml.decode(msg) == {"x": "1.5"} 157 | 158 | 159 | @needs_encode 160 | @pytest.mark.parametrize("order", [None, "deterministic"]) 161 | def test_encode_order(order): 162 | msg = {"y": 1, "x": ({"n": 1, "m": 2},), "z": [{"b": 1, "a": 2}]} 163 | res = msgspec.toml.encode(msg, order=order) 164 | if order: 165 | sol_msg = {"x": ({"m": 2, "n": 1},), "y": 1, "z": [{"a": 2, "b": 1}]} 166 | else: 167 | sol_msg = msg 168 | sol = tomli_w.dumps(sol_msg).encode("utf-8") 169 | assert res == sol 170 | 171 | 172 | @needs_decode 173 | def test_decode_str_or_bytes_like(): 174 | assert msgspec.toml.decode("a = 1") == {"a": 1} 175 | assert msgspec.toml.decode(b"a = 1") == {"a": 1} 176 | assert msgspec.toml.decode(bytearray(b"a = 1")) == {"a": 1} 177 | assert msgspec.toml.decode(memoryview(b"a = 1")) == {"a": 1} 178 | with pytest.raises(TypeError): 179 | msgspec.toml.decode(1) 180 | 181 | 182 | @needs_decode 183 | @pytest.mark.parametrize("msg", [b"{{", b"!!binary 123"]) 184 | def test_decode_parse_error(msg): 185 | with pytest.raises(msgspec.DecodeError): 186 | msgspec.toml.decode(msg) 187 | 188 | 189 | @needs_decode 190 | def test_decode_validation_error(): 191 | with pytest.raises(msgspec.ValidationError, match="Expected `str`"): 192 | msgspec.toml.decode(b"a = [1, 2, 3]", type=Dict[str, List[str]]) 193 | 194 | 195 | @needs_decode 196 | @pytest.mark.parametrize("strict", [True, False]) 197 | def test_decode_strict_or_lax(strict): 198 | msg = b"a = ['1', '2']" 199 | typ = Dict[str, List[int]] 200 | 201 | if strict: 202 | with pytest.raises(msgspec.ValidationError, match="Expected `int`"): 203 | msgspec.toml.decode(msg, type=typ, strict=strict) 204 | else: 205 | res = msgspec.toml.decode(msg, type=typ, strict=strict) 206 | assert res == {"a": [1, 2]} 207 | 208 | 209 | @needs_decode 210 | def test_decode_dec_hook(): 211 | def dec_hook(typ, val): 212 | if typ is Decimal: 213 | return Decimal(val) 214 | raise TypeError 215 | 216 | res = msgspec.toml.decode("a = '1.5'", type=Dict[str, Decimal], dec_hook=dec_hook) 217 | assert res == {"a": Decimal("1.5")} 218 | -------------------------------------------------------------------------------- /tests/test_utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import sys 4 | from typing import Generic, List, Optional, Set, TypeVar 5 | 6 | import pytest 7 | from utils import temp_module, package_not_installed 8 | 9 | from msgspec._utils import get_class_annotations 10 | 11 | PY310 = sys.version_info[:2] >= (3, 10) 12 | 13 | T = TypeVar("T") 14 | S = TypeVar("S") 15 | U = TypeVar("U") 16 | 17 | 18 | class Base(Generic[T]): 19 | x: T 20 | 21 | 22 | class Base2(Generic[T, S]): 23 | a: T 24 | b: S 25 | 26 | 27 | class TestGetClassAnnotations: 28 | @pytest.mark.parametrize("future_annotations", [False, True]) 29 | def test_eval_scopes(self, future_annotations): 30 | header = "from __future__ import annotations" if future_annotations else "" 31 | source = f""" 32 | {header} 33 | STR = str 34 | 35 | class Ex: 36 | LOCAL = float 37 | x: int 38 | y: LOCAL 39 | z: STR 40 | """ 41 | with temp_module(source) as mod: 42 | assert get_class_annotations(mod.Ex) == {"x": int, "y": float, "z": str} 43 | 44 | def test_none_to_nonetype(self): 45 | class Ex: 46 | x: None 47 | 48 | assert get_class_annotations(Ex) == {"x": type(None)} 49 | 50 | def test_subclass(self): 51 | class Base: 52 | x: int 53 | y: str 54 | 55 | class Sub(Base): 56 | x: float 57 | z: list 58 | 59 | class Base2: 60 | a: int 61 | 62 | class Sub2(Sub, Base2): 63 | b: float 64 | y: list 65 | 66 | assert get_class_annotations(Base) == {"x": int, "y": str} 67 | assert get_class_annotations(Sub) == {"x": float, "y": str, "z": list} 68 | assert get_class_annotations(Sub2) == { 69 | "x": float, 70 | "y": list, 71 | "z": list, 72 | "a": int, 73 | "b": float, 74 | } 75 | 76 | def test_simple_generic(self): 77 | class Test(Generic[T]): 78 | x: T 79 | y: List[T] 80 | z: int 81 | 82 | assert get_class_annotations(Test) == {"x": T, "y": List[T], "z": int} 83 | assert get_class_annotations(Test[int]) == {"x": int, "y": List[int], "z": int} 84 | assert get_class_annotations(Test[Set[T]]) == { 85 | "x": Set[T], 86 | "y": List[Set[T]], 87 | "z": int, 88 | } 89 | 90 | def test_generic_sub1(self): 91 | class Sub(Base): 92 | y: int 93 | 94 | assert get_class_annotations(Sub) == {"x": T, "y": int} 95 | 96 | def test_generic_sub2(self): 97 | class Sub(Base, Generic[T]): 98 | y: List[T] 99 | 100 | assert get_class_annotations(Sub) == {"x": T, "y": List[T]} 101 | assert get_class_annotations(Sub[int]) == {"x": T, "y": List[int]} 102 | 103 | def test_generic_sub3(self): 104 | class Sub(Base[int], Generic[T]): 105 | y: List[T] 106 | 107 | assert get_class_annotations(Sub) == {"x": int, "y": List[T]} 108 | assert get_class_annotations(Sub[float]) == {"x": int, "y": List[float]} 109 | 110 | def test_generic_sub4(self): 111 | class Sub(Base[T]): 112 | y: List[T] 113 | 114 | assert get_class_annotations(Sub) == {"x": T, "y": List[T]} 115 | assert get_class_annotations(Sub[int]) == {"x": int, "y": List[int]} 116 | 117 | def test_generic_sub5(self): 118 | class Sub(Base[T], Generic[T]): 119 | y: List[T] 120 | 121 | assert get_class_annotations(Sub) == {"x": T, "y": List[T]} 122 | assert get_class_annotations(Sub[int]) == {"x": int, "y": List[int]} 123 | 124 | def test_generic_sub6(self): 125 | class Sub(Base[S]): 126 | y: List[S] 127 | 128 | assert get_class_annotations(Sub) == {"x": S, "y": List[S]} 129 | assert get_class_annotations(Sub[int]) == {"x": int, "y": List[int]} 130 | 131 | def test_generic_sub7(self): 132 | class Sub(Base[List[T]]): 133 | y: Set[T] 134 | 135 | assert get_class_annotations(Sub) == {"x": List[T], "y": Set[T]} 136 | assert get_class_annotations(Sub[int]) == {"x": List[int], "y": Set[int]} 137 | 138 | def test_generic_sub8(self): 139 | class Sub(Base[int], Base2[float, str]): 140 | pass 141 | 142 | assert get_class_annotations(Sub) == {"x": int, "a": float, "b": str} 143 | 144 | def test_generic_sub9(self): 145 | class Sub(Base[U], Base2[List[U], U]): 146 | y: str 147 | 148 | assert get_class_annotations(Sub) == {"y": str, "x": U, "a": List[U], "b": U} 149 | assert get_class_annotations(Sub[int]) == { 150 | "y": str, 151 | "x": int, 152 | "a": List[int], 153 | "b": int, 154 | } 155 | 156 | class Sub2(Sub[int]): 157 | x: list 158 | 159 | assert get_class_annotations(Sub2) == { 160 | "x": list, 161 | "y": str, 162 | "a": List[int], 163 | "b": int, 164 | } 165 | 166 | def test_generic_sub10(self): 167 | class Sub(Base[U], Base2[List[U], U]): 168 | y: str 169 | 170 | class Sub3(Sub[List[T]]): 171 | c: T 172 | 173 | assert get_class_annotations(Sub3) == { 174 | "c": T, 175 | "y": str, 176 | "x": List[T], 177 | "a": List[List[T]], 178 | "b": List[T], 179 | } 180 | assert get_class_annotations(Sub3[int]) == { 181 | "c": int, 182 | "y": str, 183 | "x": List[int], 184 | "a": List[List[int]], 185 | "b": List[int], 186 | } 187 | 188 | def test_generic_sub11(self): 189 | class Sub(Base[int]): 190 | y: float 191 | 192 | class Sub2(Sub, Base[int]): 193 | z: str 194 | 195 | assert get_class_annotations(Sub2) == {"x": int, "y": float, "z": str} 196 | 197 | def test_generic_invalid_parameters(self): 198 | class Invalid: 199 | @property 200 | def __parameters__(self): 201 | pass 202 | 203 | class Sub(Base[Invalid]): 204 | pass 205 | 206 | assert get_class_annotations(Sub) == {"x": Invalid} 207 | 208 | @pytest.mark.skipif(PY310, reason="<3.10 only") 209 | def test_union_backport_not_installed(self): 210 | class Ex: 211 | x: int | None = None 212 | 213 | with package_not_installed("eval_type_backport"): 214 | with pytest.raises( 215 | TypeError, match=r"or install the `eval_type_backport` package." 216 | ): 217 | get_class_annotations(Ex) 218 | 219 | @pytest.mark.skipif(PY310, reason="<3.10 only") 220 | def test_union_backport_installed(self): 221 | class Ex: 222 | x: int | None = None 223 | 224 | pytest.importorskip("eval_type_backport") 225 | 226 | assert get_class_annotations(Ex) == {"x": Optional[int]} 227 | -------------------------------------------------------------------------------- /tests/test_yaml.py: -------------------------------------------------------------------------------- 1 | import dataclasses 2 | import datetime 3 | import enum 4 | import sys 5 | import uuid 6 | from decimal import Decimal 7 | from typing import Dict, FrozenSet, List, Set, Tuple 8 | 9 | import pytest 10 | 11 | import msgspec 12 | 13 | try: 14 | import yaml # noqa 15 | except ImportError: 16 | pytestmark = pytest.mark.skip(reason="PyYAML is not installed") 17 | 18 | 19 | UTC = datetime.timezone.utc 20 | 21 | 22 | class ExStruct(msgspec.Struct): 23 | x: int 24 | y: str 25 | 26 | 27 | @dataclasses.dataclass 28 | class ExDataclass: 29 | x: int 30 | y: str 31 | 32 | 33 | class ExEnum(enum.Enum): 34 | one = "one" 35 | two = "two" 36 | 37 | 38 | class ExIntEnum(enum.IntEnum): 39 | one = 1 40 | two = 2 41 | 42 | 43 | def test_module_dir(): 44 | assert set(dir(msgspec.yaml)) == {"encode", "decode"} 45 | 46 | 47 | def test_pyyaml_not_installed_error(monkeypatch): 48 | monkeypatch.setitem(sys.modules, "yaml", None) 49 | 50 | with pytest.raises(ImportError, match="PyYAML"): 51 | msgspec.yaml.encode(1) 52 | 53 | with pytest.raises(ImportError, match="PyYAML"): 54 | msgspec.yaml.decode("1", type=int) 55 | 56 | 57 | @pytest.mark.parametrize( 58 | "val", 59 | [ 60 | None, 61 | True, 62 | False, 63 | 1, 64 | 1.5, 65 | "fizz", 66 | datetime.datetime(2022, 1, 2, 3, 4, 5, 6), 67 | datetime.datetime(2022, 1, 2, 3, 4, 5, 6, UTC), 68 | datetime.date(2022, 1, 2), 69 | [1, 2], 70 | {"one": 2}, 71 | {1: "two"}, 72 | ], 73 | ) 74 | def test_roundtrip_any(val): 75 | msg = msgspec.yaml.encode(val) 76 | res = msgspec.yaml.decode(msg) 77 | assert res == val 78 | 79 | 80 | @pytest.mark.parametrize( 81 | "val, type", 82 | [ 83 | (None, None), 84 | (True, bool), 85 | (False, bool), 86 | (1, int), 87 | (1.5, float), 88 | ("fizz", str), 89 | (b"fizz", bytes), 90 | (b"fizz", bytearray), 91 | (datetime.datetime(2022, 1, 2, 3, 4, 5, 6), datetime.datetime), 92 | (datetime.datetime(2022, 1, 2, 3, 4, 5, 6, UTC), datetime.datetime), 93 | (datetime.date(2022, 1, 2), datetime.date), 94 | (datetime.time(12, 34), datetime.time), 95 | (uuid.uuid4(), uuid.UUID), 96 | (ExEnum.one, ExEnum), 97 | (ExIntEnum.one, ExIntEnum), 98 | ([1, 2], List[int]), 99 | ((1, 2), Tuple[int, ...]), 100 | ({1, 2}, Set[int]), 101 | (frozenset({1, 2}), FrozenSet[int]), 102 | (("one", 2), Tuple[str, int]), 103 | ({"one": 2}, Dict[str, int]), 104 | ({1: "two"}, Dict[int, str]), 105 | (ExStruct(1, "two"), ExStruct), 106 | (ExDataclass(1, "two"), ExDataclass), 107 | ], 108 | ) 109 | def test_roundtrip_typed(val, type): 110 | msg = msgspec.yaml.encode(val) 111 | res = msgspec.yaml.decode(msg, type=type) 112 | assert res == val 113 | 114 | 115 | def test_encode_error(): 116 | class Oops: 117 | pass 118 | 119 | with pytest.raises(TypeError, match="Encoding objects of type Oops is unsupported"): 120 | msgspec.yaml.encode(Oops()) 121 | 122 | 123 | def test_encode_enc_hook(): 124 | msg = msgspec.yaml.encode(Decimal(1.5), enc_hook=str) 125 | assert msgspec.yaml.decode(msg) == "1.5" 126 | 127 | 128 | @pytest.mark.parametrize("order", [None, "deterministic"]) 129 | def test_encode_order(order): 130 | msg = {"y": 1, "x": 2, "z": 3} 131 | res = msgspec.yaml.encode(msg, order=order) 132 | sol = yaml.safe_dump(msg, sort_keys=bool(order)).encode("utf-8") 133 | assert res == sol 134 | 135 | 136 | def test_decode_str_or_bytes_like(): 137 | assert msgspec.yaml.decode("[1, 2]") == [1, 2] 138 | assert msgspec.yaml.decode(b"[1, 2]") == [1, 2] 139 | assert msgspec.yaml.decode(bytearray(b"[1, 2]")) == [1, 2] 140 | assert msgspec.yaml.decode(memoryview(b"[1, 2]")) == [1, 2] 141 | with pytest.raises(TypeError): 142 | msgspec.yaml.decode(1) 143 | 144 | 145 | @pytest.mark.parametrize("msg", [b"{{", b"!!binary 123"]) 146 | def test_decode_parse_error(msg): 147 | with pytest.raises(msgspec.DecodeError): 148 | msgspec.yaml.decode(msg) 149 | 150 | 151 | def test_decode_validation_error(): 152 | with pytest.raises(msgspec.ValidationError, match="Expected `str`"): 153 | msgspec.yaml.decode(b"[1, 2, 3]", type=List[str]) 154 | 155 | 156 | @pytest.mark.parametrize("strict", [True, False]) 157 | def test_decode_strict_or_lax(strict): 158 | msg = b"a: ['1', '2']" 159 | typ = Dict[str, List[int]] 160 | 161 | if strict: 162 | with pytest.raises(msgspec.ValidationError, match="Expected `int`"): 163 | msgspec.yaml.decode(msg, type=typ, strict=strict) 164 | else: 165 | res = msgspec.yaml.decode(msg, type=typ, strict=strict) 166 | assert res == {"a": [1, 2]} 167 | 168 | 169 | def test_decode_dec_hook(): 170 | def dec_hook(typ, val): 171 | if typ is Decimal: 172 | return Decimal(val) 173 | raise TypeError 174 | 175 | res = msgspec.yaml.decode("'1.5'", type=Decimal, dec_hook=dec_hook) 176 | assert res == Decimal("1.5") 177 | -------------------------------------------------------------------------------- /tests/utils.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import inspect 3 | import textwrap 4 | import types 5 | import uuid 6 | from contextlib import contextmanager 7 | 8 | 9 | @contextmanager 10 | def temp_module(code): 11 | """Mutually recursive struct types defined inside functions don't work (and 12 | probably never will). To avoid populating a bunch of test structs in the 13 | top level of this module, we instead create a temporary module per test to 14 | exec whatever is needed for that test""" 15 | code = textwrap.dedent(code) 16 | name = f"temp_{uuid.uuid4().hex}" 17 | mod = types.ModuleType(name) 18 | sys.modules[name] = mod 19 | try: 20 | exec(code, mod.__dict__) 21 | yield mod 22 | finally: 23 | sys.modules.pop(name, None) 24 | 25 | 26 | @contextmanager 27 | def max_call_depth(n): 28 | cur_depth = len(inspect.stack(0)) 29 | orig = sys.getrecursionlimit() 30 | try: 31 | # Our measure of the current stack depth can be off by a bit. Trying to 32 | # set a recursionlimit < the current depth will raise a RecursionError. 33 | # We just try again with a slightly higher limit, bailing after an 34 | # unreasonable amount of adjustments. 35 | for i in range(64): 36 | try: 37 | sys.setrecursionlimit(cur_depth + i + n) 38 | break 39 | except RecursionError: 40 | pass 41 | else: 42 | raise ValueError( 43 | "Failed to set low recursion limit, something is wrong here" 44 | ) 45 | yield 46 | finally: 47 | sys.setrecursionlimit(orig) 48 | 49 | 50 | @contextmanager 51 | def package_not_installed(name): 52 | try: 53 | orig = sys.modules.get(name) 54 | sys.modules[name] = None 55 | yield 56 | finally: 57 | if orig is not None: 58 | sys.modules[name] = orig 59 | else: 60 | del sys.modules[name] 61 | --------------------------------------------------------------------------------