├── .codecov.yml
├── .gitattributes
├── .github
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── ISSUE_TEMPLATE
│ ├── bug.yml
│ ├── config.yml
│ ├── feature.yml
│ └── question.yml
├── SECURITY.md
└── workflows
│ ├── ci.yml
│ └── docs.yml
├── .gitignore
├── .pre-commit-config.yaml
├── LICENSE
├── MANIFEST.in
├── README.md
├── benchmarks
├── __init__.py
├── bench_encodings.py
├── bench_gc.py
├── bench_large_json.py
├── bench_library_size.py
├── bench_structs.py
├── bench_validation
│ ├── __init__.py
│ ├── __main__.py
│ ├── bench_cattrs.py
│ ├── bench_mashumaro.py
│ ├── bench_msgspec.py
│ ├── bench_pydantic.py
│ └── runner.py
└── generate_data.py
├── docs
├── Makefile
├── make.bat
└── source
│ ├── _static
│ ├── anywidget.png
│ ├── bench-1.png
│ ├── bench-1.svg
│ ├── bench-validation.svg
│ ├── converters-dark.svg
│ ├── converters-light.svg
│ ├── custom.css
│ ├── edgedb.svg
│ ├── esmerald.png
│ ├── litestar.png
│ ├── mosec.png
│ ├── msgspec-logo-dark.svg
│ ├── msgspec-logo-light.svg
│ ├── nautilus-trader.png
│ ├── pioreactor.png
│ ├── sanic.png
│ └── zero.png
│ ├── _templates
│ └── help.html
│ ├── api.rst
│ ├── benchmarks.rst
│ ├── changelog.rst
│ ├── conf.py
│ ├── constraints.rst
│ ├── converters.rst
│ ├── examples
│ ├── asyncio-kv.rst
│ ├── conda-repodata.rst
│ ├── edgedb.rst
│ ├── geojson.rst
│ ├── index.rst
│ └── pyproject-toml.rst
│ ├── extending.rst
│ ├── index.rst
│ ├── inspect.rst
│ ├── install.rst
│ ├── jsonschema.rst
│ ├── perf-tips.rst
│ ├── schema-evolution.rst
│ ├── structs.rst
│ ├── supported-types.rst
│ ├── usage.rst
│ └── why.rst
├── examples
├── asyncio-kv
│ └── kv.py
├── conda-repodata
│ └── query_repodata.py
├── edgedb
│ ├── dbschema
│ │ ├── default.esdl
│ │ └── migrations
│ │ │ └── 00001.edgeql
│ ├── edgedb.toml
│ └── insert_data.edgeql
├── geojson
│ ├── canada.json
│ └── msgspec_geojson.py
└── pyproject-toml
│ └── pyproject.py
├── msgspec
├── __init__.py
├── __init__.pyi
├── _core.c
├── _json_schema.py
├── _utils.py
├── _version.py
├── atof.h
├── atof_consts.h
├── common.h
├── inspect.py
├── itoa.h
├── json.py
├── json.pyi
├── msgpack.py
├── msgpack.pyi
├── py.typed
├── ryu.h
├── structs.py
├── structs.pyi
├── toml.py
└── yaml.py
├── pyproject.toml
├── scripts
└── generate_atof_consts.py
├── setup.cfg
├── setup.py
├── tests
├── basic_typing_examples.py
├── conftest.py
├── test_JSONTestSuite.py
├── test_common.py
├── test_constraints.py
├── test_convert.py
├── test_cpylint.py
├── test_inspect.py
├── test_integration.py
├── test_json.py
├── test_msgpack.py
├── test_mypy.py
├── test_performance.py
├── test_pyright.py
├── test_raw.py
├── test_schema.py
├── test_struct.py
├── test_to_builtins.py
├── test_toml.py
├── test_utils.py
├── test_yaml.py
└── utils.py
└── versioneer.py
/.codecov.yml:
--------------------------------------------------------------------------------
1 | comment: false
2 |
3 | coverage:
4 | status:
5 | project:
6 | default:
7 | target: auto
8 | threshold: 1%
9 | patch: off
10 |
--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | msgspec/_version.py export-subst
2 |
--------------------------------------------------------------------------------
/.github/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | # Code of Conduct
2 |
3 | ## Our Pledge
4 |
5 | In the interest of fostering an open and welcoming environment, we as
6 | contributors and maintainers pledge to making participation in our project and
7 | our community a harassment-free experience for everyone, regardless of age, body
8 | size, disability, ethnicity, gender identity and expression, level of experience,
9 | nationality, personal appearance, race, religion, or sexual identity and
10 | orientation.
11 |
12 | ## Our Standards
13 |
14 | Examples of behavior that contributes to creating a positive environment
15 | include:
16 |
17 | * Using welcoming and inclusive language
18 | * Being respectful of differing viewpoints and experiences
19 | * Gracefully accepting constructive criticism
20 | * Focusing on what is best for the community
21 | * Showing empathy towards other community members
22 |
23 | Examples of unacceptable behavior by participants include:
24 |
25 | * The use of sexualized language or imagery and unwelcome sexual attention or
26 | advances
27 | * Trolling, insulting/derogatory comments, and personal or political attacks
28 | * Public or private harassment
29 | * Publishing others' private information, such as a physical or electronic
30 | address, without explicit permission
31 | * Other conduct which could reasonably be considered inappropriate in a
32 | professional setting
33 |
34 | ## Our Responsibilities
35 |
36 | Project maintainers are responsible for clarifying the standards of acceptable
37 | behavior and are expected to take appropriate and fair corrective action in
38 | response to any instances of unacceptable behavior.
39 |
40 | Project maintainers have the right and responsibility to remove, edit, or
41 | reject comments, commits, code, wiki edits, issues, and other contributions
42 | that are not aligned to this Code of Conduct, or to ban temporarily or
43 | permanently any contributor for other behaviors that they deem inappropriate,
44 | threatening, offensive, or harmful.
45 |
46 | ## Scope
47 |
48 | This Code of Conduct applies both within project spaces and in public spaces
49 | when an individual is representing the project or its community. Examples of
50 | representing a project or community include using an official project e-mail
51 | address, posting via an official social media account, or acting as an appointed
52 | representative at an online or offline event. Representation of a project may be
53 | further defined and clarified by project maintainers.
54 |
55 | ## Enforcement
56 |
57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
58 | reported by contacting the project team at jcristharif@gmail.com. All
59 | complaints will be reviewed and investigated and will result in a response that
60 | is deemed necessary and appropriate to the circumstances. The project team is
61 | obligated to maintain confidentiality with regard to the reporter of an incident.
62 | Further details of specific enforcement policies may be posted separately.
63 |
64 | Project maintainers who do not follow or enforce the Code of Conduct in good
65 | faith may face temporary or permanent repercussions as determined by other
66 | members of the project's leadership.
67 |
68 | ## Attribution
69 |
70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
71 | available at [http://contributor-covenant.org/version/1/4][version]
72 |
73 | [homepage]: http://contributor-covenant.org
74 | [version]: http://contributor-covenant.org/version/1/4/
75 |
--------------------------------------------------------------------------------
/.github/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # How to Contribute
2 |
3 | Thank you for taking the time to contribute to `msgspec`!
4 |
5 | Here we document some contribution guidelines to help you ensure that your
6 | contribution is at its best.
7 |
8 | ## Setting up your Development Environment
9 |
10 | Before getting started, you will need to already have installed:
11 |
12 | - Python (3.8+ only), with development headers installed
13 | - A C compiler (`gcc`, `clang`, and `msvc` are all tested)
14 | - `git`
15 |
16 | Once you have those installed, you're ready to:
17 |
18 | - Clone the repository
19 | - Install all development dependencies
20 | - Build a development version of `msgspec`
21 | - Install the `pre-commit` hooks
22 |
23 | ```bash
24 | # Clone the repository
25 | git clone https://github.com/jcrist/msgspec.git
26 |
27 | # cd into the repo root directory
28 | cd msgspec/
29 |
30 | # Build and install msgspec & all dev dependencies
31 | pip install -e ".[dev]"
32 |
33 | # Install the pre-commit hooks
34 | pre-commit install
35 | ```
36 |
37 | ## Editing and Rebuilding
38 |
39 | You now have a "development" build of `msgspec` installed. This means that you
40 | can make changes to the `.py` files and test them without requiring a rebuild
41 | of msgspec's C extension. Edit away!
42 |
43 | If you do make changes to a `.c` file, you'll need to recompile. You can do
44 | this by running
45 |
46 | ```bash
47 | pip install -e .
48 | ```
49 |
50 | By default `msgspec` is built in release mode, with optimizations enabled. To
51 | build a debug build instead (for use with e.g. `gdb` or `lldb`) define the
52 | `MSGSPEC_DEBUG` environment variable before building.
53 |
54 | ```bash
55 | MSGSPEC_DEBUG=1 pip install -e .
56 | ```
57 |
58 | ## Testing
59 |
60 | Tests are located in the `tests/` directory. Any code changes should include
61 | additional tests to ensure correctness. The tests are broken into various
62 | `test_*.py` files specific to the functionality that they're testing.
63 |
64 | The tests can be run using `pytest` as follows:
65 |
66 | ```bash
67 | pytest
68 | ```
69 |
70 | If you want to run a specific test file, you may specify that file explicitly:
71 |
72 | ```bash
73 | pytest tests/test_json.py
74 | ```
75 |
76 | ## Linting
77 |
78 | We use `pre-commit` to automatically run a few code linters before every
79 | commit. If you followed the development setup above, you should already have
80 | `pre-commit` and all the commit hooks installed.
81 |
82 | These hooks will run whenever you try to commit changes.
83 |
84 | ```bash
85 | git commit # linters will run automatically here
86 | ```
87 |
88 | If you wish to run the linters manually without committing, you can run:
89 |
90 | ```bash
91 | pre-commit run
92 | ```
93 |
94 | ## Documentation
95 |
96 | The source of the documentation can be found under `docs/source/`. They are
97 | built using `Sphinx` and can be built locally by running the following steps:
98 |
99 | ```bash
100 | cd docs/ # Make sure we are in the docs/ folder
101 |
102 | make html # Build the html
103 |
104 | # Output can now be found under docs/build/html and can be viewed in the browser
105 | ```
106 |
107 | ## Continuous Integration (CI)
108 |
109 | We use GitHub Actions to provide "continuous integration" testing for all Pull
110 | Requests (PRs). When submitting a PR, please check to see that all tests pass,
111 | and fix any issues that come up.
112 |
113 | ## Code of Conduct
114 |
115 | ``msgspec`` has a code of conduct that must be followed by all contributors to
116 | the project. You may read the code of conduct
117 | [here](https://github.com/jcrist/msgspec/blob/main/CODE_OF_CONDUCT.md).
118 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug.yml:
--------------------------------------------------------------------------------
1 | name: 🪲 Bug Report
2 | description: Report a bug or unexpected behavior in msgspec
3 | body:
4 | - type: markdown
5 | attributes:
6 | value: Thanks for taking the time to fill out a bug report!
7 |
8 | - type: textarea
9 | id: description
10 | attributes:
11 | label: Description
12 | description: >
13 | Describe the bug. What happened? What did you expect to happen?
14 |
15 |
16 | When possible, please also include a [minimal, complete, verifiable
17 | example](https://stackoverflow.com/help/minimal-reproducible-example).
18 | Ideally this should be code that can be run without modification to
19 | demonstrate the problem.
20 |
21 |
22 | When including errors and tracebacks, please include the _full
23 | traceback_ as well as the code that generated the error (or at least
24 | the line that caused it).
25 | validations:
26 | required: true
27 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/config.yml:
--------------------------------------------------------------------------------
1 | blank_issues_enabled: true
2 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature.yml:
--------------------------------------------------------------------------------
1 | name: 🙌 Feature Request
2 | description: Suggest a new feature or change to msgspec
3 | body:
4 | - type: markdown
5 | attributes:
6 | value: Thanks for taking the time to fill out a feature request!
7 |
8 | - type: textarea
9 | id: description
10 | attributes:
11 | label: Description
12 | description: >
13 | Describe the feature. What problems does it solve?
14 |
15 |
16 | If the feature is to related to a problem, please describe in detail
17 | your use case. What would this new feature help you do that you
18 | couldn't do before? Why is this useful?
19 |
20 |
21 | When relevant, please also include example code making use of your
22 | proposed feature. How would you use this feature? What would code using
23 | it look like?
24 | validations:
25 | required: true
26 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/question.yml:
--------------------------------------------------------------------------------
1 | name: ❓ Question
2 | description: Ask a question
3 | body:
4 | - type: markdown
5 | attributes:
6 | value: Thanks for taking the time to ask a question!
7 |
8 | - type: textarea
9 | id: description
10 | attributes:
11 | label: Question
12 | description: >
13 | Ask your question here. Please search through existing and closed
14 | issues first to ensure your question hasn't already been answered
15 | elsewhere.
16 | validations:
17 | required: true
18 |
--------------------------------------------------------------------------------
/.github/SECURITY.md:
--------------------------------------------------------------------------------
1 | # Security Policy
2 |
3 | If you believe you have found a security-related bug with `msgspec`, **do not
4 | open a public GitHub issue**. Instead, please email jcristharif@gmail.com.
5 |
6 | Please include as much detail as you would for a normal issue in your report.
7 | In particular, including a minimal reproducible example will help the
8 | maintainers diagnose and resolve the issue quickly and efficiently.
9 |
10 | After the issue is resolved, we will make a release and announce the security
11 | fix through our normal communication channels. When it makes sense we may also
12 | obtain a CVE ID. If you would like to be credited with the report, please
13 | include your name and any links in the email.
14 |
--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
1 | name: Build and Test
2 |
3 | on:
4 | push:
5 | branches: [main]
6 | pull_request:
7 | branches: [main]
8 | paths-ignore:
9 | - "docs/**"
10 | - "benchmarks/**"
11 | - "examples/**"
12 | - ".github/**"
13 | - "README.rst"
14 | release:
15 | types: [published]
16 |
17 | jobs:
18 | lint:
19 | name: Lint and ruff code
20 | runs-on: ubuntu-latest
21 |
22 | steps:
23 | - uses: actions/checkout@v4
24 |
25 | - name: Install Python
26 | uses: actions/setup-python@v5
27 | with:
28 | python-version: "3.11"
29 |
30 | - name: Build msgspec and install dependencies
31 | run: |
32 | pip install -e ".[dev]"
33 |
34 | - name: Run pre-commit hooks
35 | uses: pre-commit/action@v3.0.0
36 |
37 | - name: mypy
38 | run: pytest tests/test_mypy.py
39 |
40 | - name: pyright
41 | run: pytest tests/test_pyright.py
42 |
43 | - name: doctests
44 | run: pytest --doctest-modules msgspec
45 |
46 | - name: Rebuild with sanitizers & coverage
47 | env:
48 | MSGSPEC_SANITIZE: "true"
49 | MSGSPEC_COVERAGE: "true"
50 | run: |
51 | python setup.py clean --all
52 | # I know this is deprecated, but I can't find a way to keep the build
53 | # directory around anymore on new versions of setuptools
54 | python setup.py develop
55 |
56 | - name: Run tests with sanitizers
57 | env:
58 | PYTHONMALLOC: "malloc"
59 | ASAN_OPTIONS: "detect_leaks=0"
60 | run: |
61 | LD_PRELOAD=`gcc -print-file-name=libasan.so` coverage run -m pytest -s -m "not mypy and not pyright"
62 |
63 | - name: Generate coverage files
64 | run: |
65 | coverage xml
66 | gcov -abcu `find build/ -name *.o`
67 |
68 | - name: Upload Codecov
69 | uses: codecov/codecov-action@v3
70 | with:
71 | files: coverage.xml,_core.c.gcov,atof.h.gcov,ryu.h.gcov
72 |
73 | build_wheels:
74 | name: Build wheels on ${{ matrix.os }}
75 | runs-on: ${{ matrix.os }}
76 | strategy:
77 | matrix:
78 | os: [ubuntu-latest, macos-13, windows-latest]
79 |
80 | env:
81 | CIBW_TEST_EXTRAS: "test"
82 | CIBW_TEST_COMMAND: "pytest {project}/tests"
83 | CIBW_BUILD: "cp39-* cp310-* cp311-* cp312-* cp313-*"
84 | CIBW_SKIP: "*-win32 *_i686 *_s390x *_ppc64le"
85 | CIBW_ARCHS_MACOS: "x86_64 arm64"
86 | CIBW_ARCHS_LINUX: "x86_64 aarch64"
87 | CIBW_TEST_SKIP: "*_arm64 *-musllinux_*"
88 | CIBW_ENVIRONMENT: "CFLAGS=-g0"
89 |
90 | steps:
91 | - uses: actions/checkout@v4
92 |
93 | - name: Set up QEMU
94 | if: runner.os == 'Linux'
95 | uses: docker/setup-qemu-action@v1
96 | with:
97 | platforms: all
98 |
99 | - name: Set up Environment
100 | if: github.event_name != 'release'
101 | run: |
102 | echo "CIBW_SKIP=${CIBW_SKIP} *-musllinux_* cp39-*_aarch64 cp311-*_aarch64 cp312-*_aarch64 cp313-*_aarch64" >> $GITHUB_ENV
103 |
104 | - name: Build & Test Wheels
105 | uses: pypa/cibuildwheel@v2.22.0
106 |
107 | - name: Upload artifact
108 | uses: actions/upload-artifact@v4
109 | if: github.event_name == 'release' && github.event.action == 'published'
110 | with:
111 | name: artifact-wheels-${{ matrix.os }}
112 | path: ./wheelhouse/*.whl
113 |
114 | build_sdist:
115 | name: Build Source Distribution
116 | runs-on: ubuntu-latest
117 | if: github.event_name == 'release' && github.event.action == 'published'
118 |
119 | steps:
120 | - uses: actions/checkout@v4
121 |
122 | - name: Install Python
123 | uses: actions/setup-python@v5
124 | with:
125 | python-version: "3.11"
126 |
127 | - name: Build source distribution
128 | run: python setup.py sdist
129 |
130 | - name: Upload artifact
131 | uses: actions/upload-artifact@v4
132 | with:
133 | name: artifact-sdist
134 | path: dist/*.tar.gz
135 |
136 | upload_pypi:
137 | needs: [build_wheels, build_sdist]
138 | runs-on: ubuntu-latest
139 | permissions:
140 | id-token: write
141 | if: github.event_name == 'release' && github.event.action == 'published'
142 | steps:
143 | - uses: actions/download-artifact@v4
144 | with:
145 | merge-multiple: true
146 | path: dist
147 | pattern: artifact-*
148 |
149 | - name: Publish package distributions to PyPI
150 | uses: pypa/gh-action-pypi-publish@release/v1
151 |
--------------------------------------------------------------------------------
/.github/workflows/docs.yml:
--------------------------------------------------------------------------------
1 | name: documentation
2 |
3 | on:
4 | push:
5 | branches: [main]
6 | pull_request: null
7 |
8 | jobs:
9 | build-docs:
10 | runs-on: ubuntu-latest
11 | steps:
12 | - uses: actions/checkout@v4
13 |
14 | - name: Install Python
15 | uses: actions/setup-python@v5
16 | with:
17 | python-version: "3.11"
18 |
19 | - name: Install msgspec and dependencies
20 | run: |
21 | pip install -e ".[doc]"
22 |
23 | - name: Build Docs
24 | run: |
25 | pushd docs
26 | make html
27 | popd
28 |
29 | - name: Deploy
30 | uses: peaceiris/actions-gh-pages@v3
31 | if: github.ref == 'refs/heads/main'
32 | with:
33 | github_token: ${{ secrets.GITHUB_TOKEN }}
34 | publish_dir: ./docs/build/html
35 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Editor config folders
2 | ## Vscode
3 | .settings/
4 | .project
5 | .vscode/
6 | .vs/
7 | ## PyCharm/IntelliJ-generated files
8 | *.iml
9 | .idea/
10 |
11 | # Python cached sources
12 | __pycache__/
13 | *.pyc
14 |
15 | # Virtual environments
16 | .venv*/
17 | venv*/
18 |
19 | # Pytest and coverage
20 | .coverage
21 | .pytest/
22 | .pytest_cache/
23 | htmlcov/
24 |
25 | # Mypy Cache
26 | .mypy_cache/
27 |
28 | # Docs build
29 | docs/build/
30 |
31 | # Benchmark outputs
32 | benchmarks/*.html
33 | benchmarks/*.json
34 |
35 | # Setuptools/twine-generated files, compiled sources.
36 | build/
37 | dist/
38 | *.egg-info/
39 | pip-wheel-metadata/
40 | *.so
41 | *.o
42 | *.pyd
43 |
44 | # Misc
45 | *.pem
46 | out/
47 | .cache/
48 | .DS_Store
49 |
--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | repos:
2 | - repo: https://github.com/astral-sh/ruff-pre-commit
3 | rev: v0.7.1
4 | hooks:
5 | - id: ruff
6 | args: [ --fix ]
7 | - id: ruff-format
8 |
9 | - repo: https://github.com/codespell-project/codespell
10 | rev: v2.2.2
11 | hooks:
12 | - id: codespell
13 | language_version: python3
14 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright (c) 2021, Jim Crist-Harif
2 | All rights reserved.
3 |
4 | Redistribution and use in source and binary forms, with or without
5 | modification, are permitted provided that the following conditions are met:
6 |
7 | 1. Redistributions of source code must retain the above copyright notice, this
8 | list of conditions and the following disclaimer.
9 |
10 | 2. Redistributions in binary form must reproduce the above copyright notice,
11 | this list of conditions and the following disclaimer in the documentation
12 | and/or other materials provided with the distribution.
13 |
14 | 3. Neither the name of the copyright holder nor the names of its contributors
15 | may be used to endorse or promote products derived from this software
16 | without specific prior written permission.
17 |
18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
22 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 |
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include msgspec/*.c
2 | include msgspec/*.h
3 | include msgspec/*.py
4 | include msgspec/*.pyi
5 | include msgspec/py.typed
6 | include setup.py
7 | include versioneer.py
8 | include README.md
9 | include LICENSE
10 | include MANIFEST.in
11 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 | `msgspec` is a *fast* serialization and validation library, with builtin
30 | support for [JSON](https://json.org), [MessagePack](https://msgpack.org),
31 | [YAML](https://yaml.org), and [TOML](https://toml.io). It features:
32 |
33 | - 🚀 **High performance encoders/decoders** for common protocols. The JSON and
34 | MessagePack implementations regularly
35 | [benchmark](https://jcristharif.com/msgspec/benchmarks.html) as the fastest
36 | options for Python.
37 |
38 | - 🎉 **Support for a wide variety of Python types**. Additional types may be
39 | supported through
40 | [extensions](https://jcristharif.com/msgspec/extending.html).
41 |
42 | - 🔍 **Zero-cost schema validation** using familiar Python type annotations. In
43 | [benchmarks](https://jcristharif.com/msgspec/benchmarks.html) `msgspec`
44 | decodes *and* validates JSON faster than
45 | [orjson](https://github.com/ijl/orjson) can decode it alone.
46 |
47 | - ✨ **A speedy Struct type** for representing structured data. If you already
48 | use [dataclasses](https://docs.python.org/3/library/dataclasses.html) or
49 | [attrs](https://www.attrs.org),
50 | [structs](https://jcristharif.com/msgspec/structs.html) should feel familiar.
51 | However, they're
52 | [5-60x faster](https://jcristharif.com/msgspec/benchmarks.html#benchmark-structs>)
53 | for common operations.
54 |
55 | All of this is included in a
56 | [lightweight library](https://jcristharif.com/msgspec/benchmarks.html#benchmark-library-size)
57 | with no required dependencies.
58 |
59 | ---
60 |
61 | `msgspec` may be used for serialization alone, as a faster JSON or
62 | MessagePack library. For the greatest benefit though, we recommend using
63 | `msgspec` to handle the full serialization & validation workflow:
64 |
65 | **Define** your message schemas using standard Python type annotations.
66 |
67 | ```python
68 | >>> import msgspec
69 |
70 | >>> class User(msgspec.Struct):
71 | ... """A new type describing a User"""
72 | ... name: str
73 | ... groups: set[str] = set()
74 | ... email: str | None = None
75 | ```
76 |
77 | **Encode** messages as JSON, or one of the many other supported protocols.
78 |
79 | ```python
80 | >>> alice = User("alice", groups={"admin", "engineering"})
81 |
82 | >>> alice
83 | User(name='alice', groups={"admin", "engineering"}, email=None)
84 |
85 | >>> msg = msgspec.json.encode(alice)
86 |
87 | >>> msg
88 | b'{"name":"alice","groups":["admin","engineering"],"email":null}'
89 | ```
90 |
91 | **Decode** messages back into Python objects, with optional schema validation.
92 |
93 | ```python
94 | >>> msgspec.json.decode(msg, type=User)
95 | User(name='alice', groups={"admin", "engineering"}, email=None)
96 |
97 | >>> msgspec.json.decode(b'{"name":"bob","groups":[123]}', type=User)
98 | Traceback (most recent call last):
99 | File "", line 1, in
100 | msgspec.ValidationError: Expected `str`, got `int` - at `$.groups[0]`
101 | ```
102 |
103 | `msgspec` is designed to be as performant as possible, while retaining some of
104 | the nicities of validation libraries like
105 | [pydantic](https://pydantic-docs.helpmanual.io/). For supported types,
106 | encoding/decoding a message with `msgspec` can be
107 | [~10-80x faster than alternative libraries](https://jcristharif.com/msgspec/benchmarks.html).
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 | See [the documentation](https://jcristharif.com/msgspec/) for more information.
116 |
117 |
118 | ## LICENSE
119 |
120 | New BSD. See the
121 | [License File](https://github.com/jcrist/msgspec/blob/main/LICENSE).
122 |
--------------------------------------------------------------------------------
/benchmarks/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jcrist/msgspec/bc60e96772c5e8a3babff967d86a9e7dfcdbfb1b/benchmarks/__init__.py
--------------------------------------------------------------------------------
/benchmarks/bench_encodings.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | import sys
4 | import dataclasses
5 | import json
6 | import timeit
7 | import importlib.metadata
8 | from typing import Any, Literal, Callable
9 |
10 | from .generate_data import make_filesystem_data
11 |
12 | import msgspec
13 |
14 |
15 | class File(msgspec.Struct, kw_only=True, omit_defaults=True, tag="file"):
16 | name: str
17 | created_by: str
18 | created_at: str
19 | updated_by: str | None = None
20 | updated_at: str | None = None
21 | nbytes: int
22 | permissions: Literal["READ", "WRITE", "READ_WRITE"]
23 |
24 |
25 | class Directory(msgspec.Struct, kw_only=True, omit_defaults=True, tag="directory"):
26 | name: str
27 | created_by: str
28 | created_at: str
29 | updated_by: str | None = None
30 | updated_at: str | None = None
31 | contents: list[File | Directory]
32 |
33 |
34 | @dataclasses.dataclass
35 | class Benchmark:
36 | label: str
37 | version: str
38 | encode: Callable
39 | decode: Callable
40 | schema: Any = None
41 |
42 | def run(self, data: bytes) -> dict:
43 | if self.schema is not None:
44 | data = msgspec.convert(data, self.schema)
45 | timer = timeit.Timer("func(data)", globals={"func": self.encode, "data": data})
46 | n, t = timer.autorange()
47 | encode_time = t / n
48 |
49 | data = self.encode(data)
50 |
51 | timer = timeit.Timer("func(data)", globals={"func": self.decode, "data": data})
52 | n, t = timer.autorange()
53 | decode_time = t / n
54 |
55 | return {
56 | "label": self.label,
57 | "encode": encode_time,
58 | "decode": decode_time,
59 | }
60 |
61 |
62 | def json_benchmarks():
63 | import orjson
64 | import ujson
65 | import rapidjson
66 | import simdjson
67 |
68 | simdjson_ver = importlib.metadata.version("pysimdjson")
69 |
70 | rj_dumps = rapidjson.Encoder()
71 | rj_loads = rapidjson.Decoder()
72 |
73 | def uj_dumps(obj):
74 | return ujson.dumps(obj)
75 |
76 | enc = msgspec.json.Encoder()
77 | dec = msgspec.json.Decoder(Directory)
78 | dec2 = msgspec.json.Decoder()
79 |
80 | return [
81 | Benchmark("msgspec structs", None, enc.encode, dec.decode, Directory),
82 | Benchmark("msgspec", msgspec.__version__, enc.encode, dec2.decode),
83 | Benchmark("json", None, json.dumps, json.loads),
84 | Benchmark("orjson", orjson.__version__, orjson.dumps, orjson.loads),
85 | Benchmark("ujson", ujson.__version__, uj_dumps, ujson.loads),
86 | Benchmark("rapidjson", rapidjson.__version__, rj_dumps, rj_loads),
87 | Benchmark("simdjson", simdjson_ver, simdjson.dumps, simdjson.loads),
88 | ]
89 |
90 |
91 | def msgpack_benchmarks():
92 | import msgpack
93 | import ormsgpack
94 |
95 | enc = msgspec.msgpack.Encoder()
96 | dec = msgspec.msgpack.Decoder(Directory)
97 | dec2 = msgspec.msgpack.Decoder()
98 |
99 | return [
100 | Benchmark("msgspec structs", None, enc.encode, dec.decode, Directory),
101 | Benchmark("msgspec", msgspec.__version__, enc.encode, dec2.decode),
102 | Benchmark("msgpack", msgpack.__version__, msgpack.dumps, msgpack.loads),
103 | Benchmark(
104 | "ormsgpack", ormsgpack.__version__, ormsgpack.packb, ormsgpack.unpackb
105 | ),
106 | ]
107 |
108 |
109 | def main():
110 | import argparse
111 |
112 | parser = argparse.ArgumentParser(
113 | description="Benchmark different python serialization libraries"
114 | )
115 | parser.add_argument(
116 | "--versions",
117 | action="store_true",
118 | help="Output library version info, and exit immediately",
119 | )
120 | parser.add_argument(
121 | "-n",
122 | type=int,
123 | help="The number of objects in the generated data, defaults to 1000",
124 | default=1000,
125 | )
126 | parser.add_argument(
127 | "-p",
128 | "--protocol",
129 | choices=["json", "msgpack"],
130 | default="json",
131 | help="The protocol to benchmark, defaults to JSON",
132 | )
133 | parser.add_argument(
134 | "--json",
135 | action="store_true",
136 | help="whether to output the results as json",
137 | )
138 | args = parser.parse_args()
139 |
140 | benchmarks = json_benchmarks() if args.protocol == "json" else msgpack_benchmarks()
141 |
142 | if args.versions:
143 | for bench in benchmarks:
144 | if bench.version is not None:
145 | print(f"- {bench.label}: {bench.version}")
146 | sys.exit(0)
147 |
148 | data = make_filesystem_data(args.n)
149 |
150 | results = [benchmark.run(data) for benchmark in benchmarks]
151 |
152 | if args.json:
153 | for line in results:
154 | print(json.dumps(line))
155 | else:
156 | # Compose the results table
157 | results.sort(key=lambda row: row["encode"] + row["decode"])
158 | best_et = results[0]["encode"]
159 | best_dt = results[0]["decode"]
160 | best_tt = best_et + best_dt
161 |
162 | columns = (
163 | "",
164 | "encode (μs)",
165 | "vs.",
166 | "decode (μs)",
167 | "vs.",
168 | "total (μs)",
169 | "vs.",
170 | )
171 | rows = [
172 | (
173 | r["label"],
174 | f"{1_000_000 * r['encode']:.1f}",
175 | f"{r['encode'] / best_et:.1f}",
176 | f"{1_000_000 * r['decode']:.1f}",
177 | f"{r['decode'] / best_dt:.1f}",
178 | f"{1_000_000 * (r['encode'] + r['decode']):.1f}",
179 | f"{(r['encode'] + r['decode']) / best_tt:.1f}",
180 | )
181 | for r in results
182 | ]
183 | widths = tuple(
184 | max(max(map(len, x)), len(c)) for x, c in zip(zip(*rows), columns)
185 | )
186 | row_template = ("|" + (" %%-%ds |" * len(columns))) % widths
187 | header = row_template % tuple(columns)
188 | bar_underline = "+%s+" % "+".join("=" * (w + 2) for w in widths)
189 | bar = "+%s+" % "+".join("-" * (w + 2) for w in widths)
190 | parts = [bar, header, bar_underline]
191 | for r in rows:
192 | parts.append(row_template % r)
193 | parts.append(bar)
194 | print("\n".join(parts))
195 |
196 |
197 | if __name__ == "__main__":
198 | main()
199 |
--------------------------------------------------------------------------------
/benchmarks/bench_gc.py:
--------------------------------------------------------------------------------
1 | """This file benchmarks GC collection time for a large number of tiny
2 | dataclass-like instances.
3 |
4 | For each type, the following is measured:
5 |
6 | - Time for a single full GC pass over all the data.
7 | - Amount of memory used to hold all the data
8 | """
9 |
10 | import gc
11 | import sys
12 | import time
13 |
14 | import msgspec
15 |
16 |
17 | def sizeof(x, _seen=None):
18 | """Get the recursive sizeof for an object (memoized).
19 |
20 | Not generic, works on types used in this benchmark.
21 | """
22 | if _seen is None:
23 | _seen = set()
24 |
25 | _id = id(x)
26 | if _id in _seen:
27 | return 0
28 |
29 | _seen.add(_id)
30 |
31 | size = sys.getsizeof(x)
32 |
33 | if isinstance(x, dict):
34 | for k, v in x.items():
35 | size += sizeof(k, _seen)
36 | size += sizeof(v, _seen)
37 | if hasattr(x, "__dict__"):
38 | size += sizeof(x.__dict__, _seen)
39 | if hasattr(x, "__slots__"):
40 | for k in x.__slots__:
41 | size += sizeof(k, _seen)
42 | size += sizeof(getattr(x, k), _seen)
43 | return size
44 |
45 |
46 | class Point(msgspec.Struct):
47 | x: int
48 | y: int
49 | z: int
50 |
51 |
52 | class PointGCFalse(msgspec.Struct, gc=False):
53 | x: int
54 | y: int
55 | z: int
56 |
57 |
58 | class PointClass:
59 | def __init__(self, x, y, z):
60 | self.x = x
61 | self.y = y
62 | self.z = z
63 |
64 |
65 | class PointClassSlots:
66 | __slots__ = ("x", "y", "z")
67 |
68 | def __init__(self, x, y, z):
69 | self.x = x
70 | self.y = y
71 | self.z = z
72 |
73 |
74 | def bench_gc(cls):
75 | # Allocate a dict of structs
76 | data = {i: cls(i, i, i) for i in range(1_000_000)}
77 |
78 | # Run a full collection
79 | start = time.perf_counter()
80 | gc.collect()
81 | stop = time.perf_counter()
82 | gc_time = (stop - start) * 1e3
83 | mibytes = sizeof(data) / (2**20)
84 | return gc_time, mibytes
85 |
86 |
87 | def format_table(results):
88 | columns = ("", "GC time (ms)", "Memory Used (MiB)")
89 |
90 | rows = []
91 | for name, t, mem in results:
92 | rows.append((f"**{name}**", f"{t:.2f}", f"{mem:.2f}"))
93 |
94 | widths = tuple(max(max(map(len, x)), len(c)) for x, c in zip(zip(*rows), columns))
95 | row_template = ("|" + (" %%-%ds |" * len(columns))) % widths
96 | header = row_template % tuple(columns)
97 | bar_underline = "+%s+" % "+".join("=" * (w + 2) for w in widths)
98 | bar = "+%s+" % "+".join("-" * (w + 2) for w in widths)
99 | parts = [bar, header, bar_underline]
100 | for r in rows:
101 | parts.append(row_template % r)
102 | parts.append(bar)
103 | return "\n".join(parts)
104 |
105 |
106 | def main():
107 | results = []
108 | for name, cls in [
109 | ("standard class", PointClass),
110 | ("standard class with __slots__", PointClassSlots),
111 | ("msgspec struct", Point),
112 | ("msgspec struct with gc=False", PointGCFalse),
113 | ]:
114 | print(f"Benchmarking {name}...")
115 | gc_time, mibytes = bench_gc(cls)
116 | results.append((name, gc_time, mibytes))
117 |
118 | print(format_table(results))
119 |
120 |
121 | if __name__ == "__main__":
122 | main()
123 |
--------------------------------------------------------------------------------
/benchmarks/bench_large_json.py:
--------------------------------------------------------------------------------
1 | import subprocess
2 | import sys
3 | import tempfile
4 |
5 | import requests
6 |
7 | TEMPLATE = """
8 | import resource
9 | import time
10 |
11 | with open({path!r}, "rb") as f:
12 | data = f.read()
13 |
14 | initial_rss = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
15 |
16 | {setup}
17 |
18 | start = time.perf_counter()
19 | for _ in range(5):
20 | decode(data)
21 | stop = time.perf_counter()
22 |
23 | max_rss = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
24 | # mem_mib = (max_rss * 1024 - len(data)) / (1024 * 1024)
25 | mem_mib = (max_rss - initial_rss) / 1024
26 | time_ms = ((stop - start) / 5) * 1000
27 | print([mem_mib, time_ms])
28 | """
29 |
30 | JSON = """
31 | import json
32 | decode = json.loads
33 | """
34 |
35 | UJSON = """
36 | import ujson
37 | decode = ujson.loads
38 | """
39 |
40 | ORJSON = """
41 | import orjson
42 | decode = orjson.loads
43 | """
44 |
45 | RAPIDJSON = """
46 | import rapidjson
47 | decode = rapidjson.loads
48 | """
49 |
50 | SIMDJSON = """
51 | import simdjson
52 | decode = simdjson.loads
53 | """
54 |
55 | MSGSPEC = """
56 | import msgspec
57 | decode = msgspec.json.decode
58 | """
59 |
60 | MSGSPEC_STRUCTS = """
61 | import msgspec
62 | from typing import Union
63 |
64 | class Package(msgspec.Struct, gc=False):
65 | build: str
66 | build_number: int
67 | depends: tuple[str, ...]
68 | md5: str
69 | name: str
70 | sha256: str
71 | subdir: str
72 | version: str
73 | license: str = ""
74 | noarch: Union[str, bool, None] = None
75 | size: int = 0
76 | timestamp: int = 0
77 |
78 | class RepoData(msgspec.Struct, gc=False):
79 | repodata_version: int
80 | info: dict
81 | packages: dict[str, Package]
82 | removed: tuple[str, ...]
83 |
84 | decode = msgspec.json.Decoder(RepoData).decode
85 | """
86 |
87 |
88 | def main():
89 | import argparse
90 |
91 | parser = argparse.ArgumentParser(
92 | description="Benchmark decoding a large JSON message using various JSON libraries"
93 | )
94 | parser.add_argument(
95 | "--versions",
96 | action="store_true",
97 | help="Output library version info, and exit immediately",
98 | )
99 | args = parser.parse_args()
100 |
101 | benchmarks = [
102 | ("json", None, JSON),
103 | ("ujson", "ujson", UJSON),
104 | ("orjson", "orjson", ORJSON),
105 | ("rapidjson", "python-rapidjson", RAPIDJSON),
106 | ("simdjson", "pysimdjson", SIMDJSON),
107 | ("msgspec", "msgspec", MSGSPEC),
108 | ("msgspec structs", None, MSGSPEC_STRUCTS),
109 | ]
110 |
111 | if args.versions:
112 | import importlib.metadata
113 |
114 | for _, lib, _ in benchmarks:
115 | if lib is not None:
116 | version = importlib.metadata.version(lib)
117 | print(f"- {lib}: {version}")
118 | sys.exit(0)
119 |
120 | with tempfile.NamedTemporaryFile() as f:
121 | # Download the repodata.json
122 | resp = requests.get(
123 | "https://conda.anaconda.org/conda-forge/noarch/repodata.json"
124 | )
125 | resp.raise_for_status()
126 | f.write(resp.content)
127 |
128 | # Run the benchmark for each library
129 | results = {}
130 | import ast
131 |
132 | for lib, _, setup in benchmarks:
133 | script = TEMPLATE.format(path=f.name, setup=setup)
134 | # We execute each script in a subprocess to isolate their memory usage
135 | output = subprocess.check_output([sys.executable, "-c", script])
136 | results[lib] = ast.literal_eval(output.decode())
137 |
138 | # Compose the results table
139 | best_mem, best_time = results["msgspec structs"]
140 | columns = (
141 | "",
142 | "memory (MiB)",
143 | "vs.",
144 | "time (ms)",
145 | "vs.",
146 | )
147 | rows = [
148 | (
149 | f"**{lib}**",
150 | f"{mem:.1f}",
151 | f"{mem / best_mem:.1f}x",
152 | f"{time:.1f}",
153 | f"{time / best_time:.1f}x",
154 | )
155 | for lib, (mem, time) in results.items()
156 | ]
157 | rows.sort(key=lambda x: float(x[1]))
158 | widths = tuple(
159 | max(max(map(len, x)), len(c)) for x, c in zip(zip(*rows), columns)
160 | )
161 | row_template = ("|" + (" %%-%ds |" * len(columns))) % widths
162 | header = row_template % tuple(columns)
163 | bar_underline = "+%s+" % "+".join("=" * (w + 2) for w in widths)
164 | bar = "+%s+" % "+".join("-" * (w + 2) for w in widths)
165 | parts = [bar, header, bar_underline]
166 | for r in rows:
167 | parts.append(row_template % r)
168 | parts.append(bar)
169 | print("\n".join(parts))
170 |
171 |
172 | if __name__ == "__main__":
173 | main()
174 |
--------------------------------------------------------------------------------
/benchmarks/bench_library_size.py:
--------------------------------------------------------------------------------
1 | """
2 | This benchmark compares the installed library size between msgspec and pydantic
3 | in a Python 3.10 x86 environment.
4 | """
5 |
6 | import io
7 | import zipfile
8 |
9 | import requests
10 |
11 |
12 | def get_latest_noarch_wheel_size(library):
13 | """Get the total uncompressed size of the latest noarch wheel"""
14 | resp = requests.get(f"https://pypi.org/pypi/{library}/json").json()
15 | version = resp["info"]["version"]
16 | files = {}
17 | for file_info in resp["releases"][version]:
18 | name = file_info["filename"]
19 | url = file_info["url"]
20 | if name.endswith(".whl"):
21 | files[name] = url
22 | if len(files) != 1:
23 | raise ValueError(
24 | f"Expected to find only 1 matching file for {library}, got {list(files)}"
25 | )
26 |
27 | url = list(files.values())[0]
28 |
29 | resp = requests.get(url)
30 | fil = io.BytesIO(resp.content)
31 | zfil = zipfile.ZipFile(fil)
32 | size = sum(f.file_size for f in zfil.filelist)
33 | return version, size
34 |
35 |
36 | def get_latest_manylinux_wheel_size(library):
37 | """Get the total uncompressed size of the latest Python 3.10 manylinux
38 | x86_64 wheel for the library"""
39 | resp = requests.get(f"https://pypi.org/pypi/{library}/json").json()
40 | version = resp["info"]["version"]
41 | files = {}
42 | for file_info in resp["releases"][version]:
43 | name = file_info["filename"]
44 | url = file_info["url"]
45 | if "310" in name and "manylinux_2_17_x86_64" in name and "pp73" not in name:
46 | files[name] = url
47 | if len(files) != 1:
48 | raise ValueError(
49 | f"Expected to find only 1 matching file for {library}, got {list(files)}"
50 | )
51 |
52 | url = list(files.values())[0]
53 |
54 | resp = requests.get(url)
55 | fil = io.BytesIO(resp.content)
56 | zfil = zipfile.ZipFile(fil)
57 | size = sum(f.file_size for f in zfil.filelist)
58 | return version, size
59 |
60 |
61 | def main():
62 | msgspec_version, msgspec_size = get_latest_manylinux_wheel_size("msgspec")
63 | pydantic_version, pydantic_size = get_latest_noarch_wheel_size("pydantic")
64 | _, pydantic_core_size = get_latest_manylinux_wheel_size("pydantic-core")
65 | _, typing_extensions_size = get_latest_noarch_wheel_size("typing-extensions")
66 | _, annotated_types_size = get_latest_noarch_wheel_size("annotated-types")
67 |
68 | data = [
69 | ("msgspec", msgspec_version, msgspec_size),
70 | (
71 | "pydantic",
72 | pydantic_version,
73 | pydantic_size
74 | + pydantic_core_size
75 | + typing_extensions_size
76 | + annotated_types_size,
77 | ),
78 | ]
79 | data.sort(key=lambda x: x[2])
80 | msgspec_size = next(s for l, _, s in data if l == "msgspec")
81 |
82 | columns = ("", "version", "size (MiB)", "vs. msgspec")
83 | rows = [
84 | (
85 | f"**{lib}**",
86 | version,
87 | f"{size / (1024 * 1024):.2f}",
88 | f"{size / msgspec_size:.2f}x",
89 | )
90 | for lib, version, size in data
91 | ]
92 |
93 | widths = tuple(max(max(map(len, x)), len(c)) for x, c in zip(zip(*rows), columns))
94 | row_template = ("|" + (" %%-%ds |" * len(columns))) % widths
95 | header = row_template % tuple(columns)
96 | bar_underline = "+%s+" % "+".join("=" * (w + 2) for w in widths)
97 | bar = "+%s+" % "+".join("-" * (w + 2) for w in widths)
98 | parts = [bar, header, bar_underline]
99 | for r in rows:
100 | parts.append(row_template % r)
101 | parts.append(bar)
102 | print("\n".join(parts))
103 |
104 |
105 | if __name__ == "__main__":
106 | main()
107 |
--------------------------------------------------------------------------------
/benchmarks/bench_structs.py:
--------------------------------------------------------------------------------
1 | """This file benchmarks dataclass-like libraries. It measures the following
2 | operations:
3 |
4 | - Time to import a new class definition
5 | - Time to create an instance of that class
6 | - Time to compare an instance of that class with another instance.
7 | """
8 |
9 | from time import perf_counter
10 |
11 | order_template = """
12 | def __{method}__(self, other):
13 | if type(self) is not type(other):
14 | return NotImplemented
15 | return (
16 | (self.a, self.b, self.c, self.d, self.e) {op}
17 | (other.a, other.b, other.c, other.d, other.e)
18 | )
19 | """
20 |
21 |
22 | classes_template = """
23 | import reprlib
24 |
25 | class C{n}:
26 | def __init__(self, a, b, c, d, e):
27 | self.a = a
28 | self.b = b
29 | self.c = c
30 | self.d = d
31 | self.e = e
32 |
33 | @reprlib.recursive_repr()
34 | def __repr__(self):
35 | return (
36 | f"{{type(self).__name__}}(a={{self.a!r}}, b={{self.b!r}}, "
37 | f"c={{self.c!r}}, d={{self.d!r}}, e={{self.e!r}})"
38 | )
39 |
40 | def __eq__(self, other):
41 | if type(self) is not type(other):
42 | return NotImplemented
43 | return (
44 | self.a == other.a and
45 | self.b == other.b and
46 | self.c == other.c and
47 | self.d == other.d and
48 | self.e == other.e
49 | )
50 | """ + "".join(
51 | [
52 | order_template.format(method="lt", op="<"),
53 | order_template.format(method="le", op="<="),
54 | order_template.format(method="gt", op=">"),
55 | order_template.format(method="ge", op=">="),
56 | ]
57 | )
58 |
59 | attrs_template = """
60 | from attr import define
61 |
62 | @define(order=True)
63 | class C{n}:
64 | a: int
65 | b: int
66 | c: int
67 | d: int
68 | e: int
69 | """
70 |
71 | dataclasses_template = """
72 | from dataclasses import dataclass
73 |
74 | @dataclass(order=True)
75 | class C{n}:
76 | a: int
77 | b: int
78 | c: int
79 | d: int
80 | e: int
81 | """
82 |
83 | pydantic_template = """
84 | from pydantic import BaseModel
85 |
86 | class C{n}(BaseModel):
87 | a: int
88 | b: int
89 | c: int
90 | d: int
91 | e: int
92 | """
93 |
94 | msgspec_template = """
95 | from msgspec import Struct
96 |
97 | class C{n}(Struct, order=True):
98 | a: int
99 | b: int
100 | c: int
101 | d: int
102 | e: int
103 | """
104 |
105 |
106 | BENCHMARKS = [
107 | ("msgspec", "msgspec", msgspec_template),
108 | ("standard classes", None, classes_template),
109 | ("attrs", "attrs", attrs_template),
110 | ("dataclasses", None, dataclasses_template),
111 | ("pydantic", "pydantic", pydantic_template),
112 | ]
113 |
114 |
115 | def bench(name, template):
116 | N_classes = 100
117 |
118 | source = "\n".join(template.format(n=i) for i in range(N_classes))
119 | code_obj = compile(source, "__main__", "exec")
120 |
121 | # Benchmark defining new types
122 | N = 200
123 | start = perf_counter()
124 | for _ in range(N):
125 | ns = {}
126 | exec(code_obj, ns)
127 | end = perf_counter()
128 | define_time = ((end - start) / (N * N_classes)) * 1e6
129 |
130 | C = ns["C0"]
131 |
132 | # Benchmark creating new instances
133 | N = 1000
134 | M = 1000
135 | start = perf_counter()
136 | for _ in range(N):
137 | [C(a=i, b=i, c=i, d=i, e=i) for i in range(M)]
138 | end = perf_counter()
139 | init_time = ((end - start) / (N * M)) * 1e6
140 |
141 | # Benchmark equality
142 | N = 1000
143 | M = 1000
144 | val = M - 1
145 | needle = C(a=val, b=val, c=val, d=val, e=val)
146 | haystack = [C(a=i, b=i, c=i, d=i, e=i) for i in range(M)]
147 | start = perf_counter()
148 | for _ in range(N):
149 | haystack.index(needle)
150 | end = perf_counter()
151 | equality_time = ((end - start) / (N * M)) * 1e6
152 |
153 | # Benchmark order
154 | try:
155 | needle < needle
156 | except TypeError:
157 | order_time = None
158 | else:
159 | start = perf_counter()
160 | for _ in range(N):
161 | for obj in haystack:
162 | if obj >= needle:
163 | break
164 | end = perf_counter()
165 | order_time = ((end - start) / (N * M)) * 1e6
166 |
167 | return (name, define_time, init_time, equality_time, order_time)
168 |
169 |
170 | def format_table(results):
171 | columns = (
172 | "",
173 | "import (μs)",
174 | "create (μs)",
175 | "equality (μs)",
176 | "order (μs)",
177 | )
178 |
179 | def f(n):
180 | return "N/A" if n is None else f"{n:.2f}"
181 |
182 | rows = []
183 | for name, *times in results:
184 | rows.append((f"**{name}**", *(f(t) for t in times)))
185 |
186 | widths = tuple(max(max(map(len, x)), len(c)) for x, c in zip(zip(*rows), columns))
187 | row_template = ("|" + (" %%-%ds |" * len(columns))) % widths
188 | header = row_template % tuple(columns)
189 | bar_underline = "+%s+" % "+".join("=" * (w + 2) for w in widths)
190 | bar = "+%s+" % "+".join("-" * (w + 2) for w in widths)
191 | parts = [bar, header, bar_underline]
192 | for r in rows:
193 | parts.append(row_template % r)
194 | parts.append(bar)
195 | return "\n".join(parts)
196 |
197 |
198 | def main():
199 | import argparse
200 |
201 | parser = argparse.ArgumentParser(description="Benchmark msgspec Struct operations")
202 | parser.add_argument(
203 | "--versions",
204 | action="store_true",
205 | help="Output library version info, and exit immediately",
206 | )
207 | args = parser.parse_args()
208 |
209 | if args.versions:
210 | import sys
211 | import importlib.metadata
212 |
213 | for _, lib, _ in BENCHMARKS:
214 | if lib is not None:
215 | version = importlib.metadata.version(lib)
216 | print(f"- {lib}: {version}")
217 | sys.exit(0)
218 |
219 | results = []
220 | for name, _, source in BENCHMARKS:
221 | results.append(bench(name, source))
222 |
223 | print(format_table(results))
224 |
225 |
226 | if __name__ == "__main__":
227 | main()
228 |
--------------------------------------------------------------------------------
/benchmarks/bench_validation/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jcrist/msgspec/bc60e96772c5e8a3babff967d86a9e7dfcdbfb1b/benchmarks/bench_validation/__init__.py
--------------------------------------------------------------------------------
/benchmarks/bench_validation/__main__.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import json
3 | import tempfile
4 | from ..generate_data import make_filesystem_data
5 | import sys
6 | import subprocess
7 |
8 |
9 | LIBRARIES = ["msgspec", "mashumaro", "cattrs", "pydantic"]
10 |
11 |
12 | def parse_list(value):
13 | libs = [lib.strip() for lib in value.split(",")]
14 | for lib in libs:
15 | if lib not in LIBRARIES:
16 | print(f"{lib!r} is not a supported library, choose from {LIBRARIES}")
17 | sys.exit(1)
18 | return libs
19 |
20 |
21 | parser = argparse.ArgumentParser(
22 | description="Benchmark different python validation libraries"
23 | )
24 | parser.add_argument(
25 | "--json",
26 | action="store_true",
27 | help="Whether to output the results as json",
28 | )
29 | parser.add_argument(
30 | "-n",
31 | type=int,
32 | help="The number of objects in the generated data, defaults to 1000",
33 | default=1000,
34 | )
35 | parser.add_argument(
36 | "--libs",
37 | type=parse_list,
38 | help="A comma-separated list of libraries to benchmark. Defaults to all.",
39 | default=LIBRARIES,
40 | )
41 | parser.add_argument(
42 | "--versions",
43 | action="store_true",
44 | help="Output library version info, and exit immediately",
45 | )
46 | args = parser.parse_args()
47 |
48 | if args.versions:
49 | import importlib.metadata
50 |
51 | for lib in args.libs:
52 | version = importlib.metadata.version(lib)
53 | print(f"- {lib}: {version}")
54 | sys.exit(0)
55 |
56 |
57 | data = json.dumps(make_filesystem_data(args.n)).encode("utf-8")
58 |
59 | results = []
60 | with tempfile.NamedTemporaryFile() as f:
61 | f.write(data)
62 | f.flush()
63 |
64 | for lib in args.libs:
65 | res = subprocess.check_output(
66 | [sys.executable, "-m", "benchmarks.bench_validation.runner", lib, f.name]
67 | )
68 | results.append(json.loads(res))
69 |
70 | if args.json:
71 | for line in results:
72 | print(json.dumps(line))
73 | else:
74 | # Compose the results table
75 | results.sort(key=lambda row: row["encode"] + row["decode"])
76 | best_et = results[0]["encode"]
77 | best_dt = results[0]["decode"]
78 | best_tt = best_et + best_dt
79 | best_mem = results[0]["memory"]
80 |
81 | columns = (
82 | "",
83 | "encode (μs)",
84 | "vs.",
85 | "decode (μs)",
86 | "vs.",
87 | "total (μs)",
88 | "vs.",
89 | "memory (MiB)",
90 | "vs.",
91 | )
92 | rows = [
93 | (
94 | r["label"],
95 | f"{1_000_000 * r['encode']:.1f}",
96 | f"{r['encode'] / best_et:.1f}",
97 | f"{1_000_000 * r['decode']:.1f}",
98 | f"{r['decode'] / best_dt:.1f}",
99 | f"{1_000_000 * (r['encode'] + r['decode']):.1f}",
100 | f"{(r['encode'] + r['decode']) / best_tt:.1f}",
101 | f"{r['memory']:.1f}",
102 | f"{r['memory'] / best_mem:.1f}",
103 | )
104 | for r in results
105 | ]
106 | widths = tuple(max(max(map(len, x)), len(c)) for x, c in zip(zip(*rows), columns))
107 | row_template = ("|" + (" %%-%ds |" * len(columns))) % widths
108 | header = row_template % tuple(columns)
109 | bar_underline = "+%s+" % "+".join("=" * (w + 2) for w in widths)
110 | bar = "+%s+" % "+".join("-" * (w + 2) for w in widths)
111 | parts = [bar, header, bar_underline]
112 | for r in rows:
113 | parts.append(row_template % r)
114 | parts.append(bar)
115 | print("\n".join(parts))
116 |
--------------------------------------------------------------------------------
/benchmarks/bench_validation/bench_cattrs.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | import enum
4 | import datetime
5 | from typing import Literal
6 |
7 | import attrs
8 | import cattrs.preconf.orjson
9 |
10 |
11 | class Permissions(enum.Enum):
12 | READ = "READ"
13 | WRITE = "WRITE"
14 | READ_WRITE = "READ_WRITE"
15 |
16 |
17 | @attrs.define(kw_only=True)
18 | class File:
19 | name: str
20 | created_by: str
21 | created_at: datetime.datetime
22 | updated_by: str | None = None
23 | updated_at: datetime.datetime | None = None
24 | nbytes: int
25 | permissions: Permissions
26 | type: Literal["file"] = "file"
27 |
28 |
29 | @attrs.define(kw_only=True)
30 | class Directory:
31 | name: str
32 | created_by: str
33 | created_at: datetime.datetime
34 | updated_by: str | None = None
35 | updated_at: datetime.datetime | None = None
36 | contents: list[File | Directory]
37 | type: Literal["directory"] = "directory"
38 |
39 |
40 | converter = cattrs.preconf.orjson.make_converter(omit_if_default=True)
41 |
42 |
43 | def encode(obj):
44 | return converter.dumps(obj)
45 |
46 |
47 | def decode(msg):
48 | return converter.loads(msg, Directory)
49 |
50 |
51 | label = "cattrs"
52 |
--------------------------------------------------------------------------------
/benchmarks/bench_validation/bench_mashumaro.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | import enum
4 | import dataclasses
5 | import datetime
6 | from typing import Literal
7 |
8 | from mashumaro.mixins.orjson import DataClassORJSONMixin
9 |
10 |
11 | class Permissions(enum.Enum):
12 | READ = "READ"
13 | WRITE = "WRITE"
14 | READ_WRITE = "READ_WRITE"
15 |
16 |
17 | @dataclasses.dataclass(kw_only=True)
18 | class File(DataClassORJSONMixin):
19 | name: str
20 | created_by: str
21 | created_at: datetime.datetime
22 | updated_by: str | None = None
23 | updated_at: datetime.datetime | None = None
24 | nbytes: int
25 | permissions: Permissions
26 | type: Literal["file"] = "file"
27 |
28 | class Config:
29 | omit_default = True
30 | lazy_compilation = True
31 |
32 |
33 | @dataclasses.dataclass(kw_only=True)
34 | class Directory(DataClassORJSONMixin):
35 | name: str
36 | created_by: str
37 | created_at: datetime.datetime
38 | updated_by: str | None = None
39 | updated_at: datetime.datetime | None = None
40 | contents: list[File | Directory]
41 | type: Literal["directory"] = "directory"
42 |
43 | class Config:
44 | omit_default = True
45 | lazy_compilation = True
46 |
47 |
48 | label = "mashumaro"
49 |
50 |
51 | def encode(x):
52 | return x.to_json()
53 |
54 |
55 | def decode(msg):
56 | return Directory.from_json(msg)
57 |
--------------------------------------------------------------------------------
/benchmarks/bench_validation/bench_msgspec.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | import enum
4 | import datetime
5 |
6 | import msgspec
7 |
8 |
9 | class Permissions(enum.Enum):
10 | READ = "READ"
11 | WRITE = "WRITE"
12 | READ_WRITE = "READ_WRITE"
13 |
14 |
15 | class File(msgspec.Struct, kw_only=True, omit_defaults=True, tag="file"):
16 | name: str
17 | created_by: str
18 | created_at: datetime.datetime
19 | updated_by: str | None = None
20 | updated_at: datetime.datetime | None = None
21 | nbytes: int
22 | permissions: Permissions
23 |
24 |
25 | class Directory(msgspec.Struct, kw_only=True, omit_defaults=True, tag="directory"):
26 | name: str
27 | created_by: str
28 | created_at: datetime.datetime
29 | updated_by: str | None = None
30 | updated_at: datetime.datetime | None = None
31 | contents: list[File | Directory]
32 |
33 |
34 | enc = msgspec.json.Encoder()
35 | dec = msgspec.json.Decoder(Directory)
36 |
37 | label = "msgspec"
38 | encode = enc.encode
39 | decode = dec.decode
40 |
--------------------------------------------------------------------------------
/benchmarks/bench_validation/bench_pydantic.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | import enum
4 | import datetime
5 | from typing import Literal, Annotated
6 |
7 | import pydantic
8 |
9 |
10 | class Permissions(enum.Enum):
11 | READ = "READ"
12 | WRITE = "WRITE"
13 | READ_WRITE = "READ_WRITE"
14 |
15 |
16 | class File(pydantic.BaseModel):
17 | type: Literal["file"] = "file"
18 | name: str
19 | created_by: str
20 | created_at: datetime.datetime
21 | updated_by: str | None = None
22 | updated_at: datetime.datetime | None = None
23 | nbytes: int
24 | permissions: Permissions
25 |
26 |
27 | class Directory(pydantic.BaseModel):
28 | type: Literal["directory"] = "directory"
29 | name: str
30 | created_by: str
31 | created_at: datetime.datetime
32 | updated_by: str | None = None
33 | updated_at: datetime.datetime | None = None
34 | contents: list[Annotated[File | Directory, pydantic.Field(discriminator="type")]]
35 |
36 |
37 | if pydantic.__version__.startswith("2."):
38 | label = "pydantic v2"
39 |
40 | def encode(obj):
41 | return obj.model_dump_json(exclude_defaults=True)
42 |
43 | def decode(msg):
44 | return Directory.model_validate_json(msg)
45 |
46 | else:
47 | label = "pydantic v1"
48 |
49 | def encode(obj):
50 | return obj.json(exclude_defaults=True)
51 |
52 | def decode(msg):
53 | return Directory.parse_raw(msg)
54 |
--------------------------------------------------------------------------------
/benchmarks/bench_validation/runner.py:
--------------------------------------------------------------------------------
1 | import importlib
2 | import json
3 | import timeit
4 | import resource
5 | import sys
6 | import gc
7 |
8 | library, path = sys.argv[1:3]
9 |
10 | with open(path, "rb") as f:
11 | json_data = f.read()
12 |
13 | initial_rss = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
14 |
15 | mod = importlib.import_module(f"benchmarks.bench_validation.bench_{library}")
16 |
17 | msg = mod.decode(json_data)
18 |
19 | gc.collect()
20 | timer = timeit.Timer("func(data)", setup="", globals={"func": mod.encode, "data": msg})
21 | n, t = timer.autorange()
22 | encode_time = t / n
23 |
24 | del msg
25 |
26 | gc.collect()
27 | timer = timeit.Timer(
28 | "func(data)", setup="", globals={"func": mod.decode, "data": json_data}
29 | )
30 | n, t = timer.autorange()
31 | decode_time = t / n
32 |
33 | max_rss = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
34 |
35 |
36 | report = json.dumps(
37 | {
38 | "label": mod.label,
39 | "encode": encode_time,
40 | "decode": decode_time,
41 | "memory": (max_rss - initial_rss) / 1024,
42 | }
43 | )
44 | print(report)
45 |
--------------------------------------------------------------------------------
/benchmarks/generate_data.py:
--------------------------------------------------------------------------------
1 | import datetime
2 | import random
3 | import string
4 |
5 |
6 | class Generator:
7 | UTC = datetime.timezone.utc
8 | DATE_2018 = datetime.datetime(2018, 1, 1, tzinfo=UTC)
9 | DATE_2023 = datetime.datetime(2023, 1, 1, tzinfo=UTC)
10 | PERMISSIONS = ["READ", "WRITE", "READ_WRITE"]
11 | NAMES = [
12 | "alice",
13 | "ben",
14 | "carol",
15 | "daniel",
16 | "esther",
17 | "franklin",
18 | "genevieve",
19 | "harold",
20 | "ilana",
21 | "jerome",
22 | "katelyn",
23 | "leonard",
24 | "monique",
25 | "nathan",
26 | "ora",
27 | "patrick",
28 | "quinn",
29 | "ronald",
30 | "stephanie",
31 | "thomas",
32 | "uma",
33 | "vince",
34 | "wendy",
35 | "xavier",
36 | "yitzchak",
37 | "zahra",
38 | ]
39 |
40 | def __init__(self, capacity, seed=42):
41 | self.capacity = capacity
42 | self.random = random.Random(seed)
43 |
44 | def randdt(self, min, max):
45 | ts = self.random.randint(min.timestamp(), max.timestamp())
46 | return datetime.datetime.fromtimestamp(ts).replace(tzinfo=self.UTC)
47 |
48 | def randstr(self, min=None, max=None):
49 | if max is not None:
50 | min = self.random.randint(min, max)
51 | return "".join(self.random.choices(string.ascii_letters, k=min))
52 |
53 | def make(self, is_dir):
54 | name = self.randstr(4, 30)
55 | created_by = self.random.choice(self.NAMES)
56 | created_at = self.randdt(self.DATE_2018, self.DATE_2023)
57 | data = {
58 | "type": "directory" if is_dir else "file",
59 | "name": name,
60 | "created_by": created_by,
61 | "created_at": created_at.isoformat(),
62 | }
63 | if self.random.random() > 0.75:
64 | updated_by = self.random.choice(self.NAMES)
65 | updated_at = self.randdt(created_at, self.DATE_2023)
66 | data.update(
67 | updated_by=updated_by,
68 | updated_at=updated_at.isoformat(),
69 | )
70 | if is_dir:
71 | n = min(self.random.randint(0, 30), self.capacity)
72 | self.capacity -= n
73 | data["contents"] = [self.make_node() for _ in range(n)]
74 | else:
75 | data["nbytes"] = self.random.randint(0, 1000000)
76 | data["permissions"] = self.random.choice(self.PERMISSIONS)
77 | return data
78 |
79 | def make_node(self):
80 | return self.make(self.random.random() > 0.8)
81 |
82 | def generate(self):
83 | self.capacity -= 1
84 | if self.capacity == 0:
85 | out = self.make(False)
86 | else:
87 | out = self.make(True)
88 | while self.capacity:
89 | self.capacity -= 1
90 | out["contents"].append(self.make_node())
91 | return out
92 |
93 |
94 | def make_filesystem_data(n):
95 | return Generator(n).generate()
96 |
--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
1 | # Minimal makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line, and also
5 | # from the environment for the first two.
6 | SPHINXOPTS ?= -W --keep-going
7 | SPHINXBUILD ?= sphinx-build
8 | SOURCEDIR = source
9 | BUILDDIR = build
10 |
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 |
15 | .PHONY: help Makefile
16 |
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 |
--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
1 | @ECHO OFF
2 |
3 | pushd %~dp0
4 |
5 | REM Command file for Sphinx documentation
6 |
7 | if "%SPHINXBUILD%" == "" (
8 | set SPHINXBUILD=sphinx-build
9 | )
10 | set SOURCEDIR=source
11 | set BUILDDIR=build
12 |
13 | if "%1" == "" goto help
14 |
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | echo.
18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | echo.installed, then set the SPHINXBUILD environment variable to point
20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | echo.may add the Sphinx directory to PATH.
22 | echo.
23 | echo.If you don't have Sphinx installed, grab it from
24 | echo.http://sphinx-doc.org/
25 | exit /b 1
26 | )
27 |
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 |
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 |
34 | :end
35 | popd
36 |
--------------------------------------------------------------------------------
/docs/source/_static/anywidget.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jcrist/msgspec/bc60e96772c5e8a3babff967d86a9e7dfcdbfb1b/docs/source/_static/anywidget.png
--------------------------------------------------------------------------------
/docs/source/_static/bench-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jcrist/msgspec/bc60e96772c5e8a3babff967d86a9e7dfcdbfb1b/docs/source/_static/bench-1.png
--------------------------------------------------------------------------------
/docs/source/_static/custom.css:
--------------------------------------------------------------------------------
1 | body[data-theme]:not([data-theme="dark"]) .highlight .gp, .highlight .gh {
2 | color: #808080;
3 | font-weight: normal;
4 | }
5 |
6 | body[data-theme]:not([data-theme="dark"]) .highlight .go {
7 | color: #203060;
8 | }
9 |
10 | body[data-theme="dark"] .highlight .gh, .highlight .gp {
11 | color: #aaaaaa;
12 | font-weight: normal;
13 | }
14 |
--------------------------------------------------------------------------------
/docs/source/_static/edgedb.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/docs/source/_static/esmerald.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jcrist/msgspec/bc60e96772c5e8a3babff967d86a9e7dfcdbfb1b/docs/source/_static/esmerald.png
--------------------------------------------------------------------------------
/docs/source/_static/litestar.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jcrist/msgspec/bc60e96772c5e8a3babff967d86a9e7dfcdbfb1b/docs/source/_static/litestar.png
--------------------------------------------------------------------------------
/docs/source/_static/mosec.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jcrist/msgspec/bc60e96772c5e8a3babff967d86a9e7dfcdbfb1b/docs/source/_static/mosec.png
--------------------------------------------------------------------------------
/docs/source/_static/msgspec-logo-dark.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
15 |
16 | msgspec
--------------------------------------------------------------------------------
/docs/source/_static/msgspec-logo-light.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
15 |
16 | msgspec
--------------------------------------------------------------------------------
/docs/source/_static/nautilus-trader.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jcrist/msgspec/bc60e96772c5e8a3babff967d86a9e7dfcdbfb1b/docs/source/_static/nautilus-trader.png
--------------------------------------------------------------------------------
/docs/source/_static/pioreactor.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jcrist/msgspec/bc60e96772c5e8a3babff967d86a9e7dfcdbfb1b/docs/source/_static/pioreactor.png
--------------------------------------------------------------------------------
/docs/source/_static/sanic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jcrist/msgspec/bc60e96772c5e8a3babff967d86a9e7dfcdbfb1b/docs/source/_static/sanic.png
--------------------------------------------------------------------------------
/docs/source/_static/zero.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jcrist/msgspec/bc60e96772c5e8a3babff967d86a9e7dfcdbfb1b/docs/source/_static/zero.png
--------------------------------------------------------------------------------
/docs/source/_templates/help.html:
--------------------------------------------------------------------------------
1 | Need help?
2 |
3 |
4 | Open an issue in the issue tracker .
5 |
6 |
--------------------------------------------------------------------------------
/docs/source/api.rst:
--------------------------------------------------------------------------------
1 | API Docs
2 | ========
3 |
4 | Structs
5 | -------
6 |
7 | .. currentmodule:: msgspec
8 |
9 | .. autoclass:: Struct
10 |
11 | .. autofunction:: field
12 |
13 | .. autofunction:: defstruct
14 |
15 | .. autofunction:: msgspec.structs.replace
16 |
17 | .. autofunction:: msgspec.structs.asdict
18 |
19 | .. autofunction:: msgspec.structs.astuple
20 |
21 | .. autofunction:: msgspec.structs.force_setattr
22 |
23 | .. autofunction:: msgspec.structs.fields
24 |
25 | .. autoclass:: msgspec.structs.FieldInfo
26 |
27 | .. autoclass:: msgspec.structs.StructConfig
28 |
29 | .. autodata:: NODEFAULT
30 | :no-value:
31 |
32 | Meta
33 | ----
34 |
35 | .. autoclass:: Meta
36 | :members:
37 |
38 |
39 | Raw
40 | ---
41 |
42 | .. currentmodule:: msgspec
43 |
44 | .. autoclass:: Raw
45 | :members:
46 |
47 | Unset
48 | -----
49 |
50 | .. autodata:: UNSET
51 | :no-value:
52 |
53 | .. autoclass:: UnsetType
54 |
55 |
56 | JSON
57 | ----
58 |
59 | .. currentmodule:: msgspec.json
60 |
61 | .. autoclass:: Encoder
62 | :members: encode, encode_lines, encode_into
63 |
64 | .. autoclass:: Decoder
65 | :members: decode, decode_lines
66 |
67 | .. autofunction:: encode
68 |
69 | .. autofunction:: decode
70 |
71 | .. autofunction:: format
72 |
73 |
74 | MessagePack
75 | -----------
76 |
77 | .. currentmodule:: msgspec.msgpack
78 |
79 | .. autoclass:: Encoder
80 | :members: encode, encode_into
81 |
82 | .. autoclass:: Decoder
83 | :members: decode
84 |
85 | .. autoclass:: Ext
86 | :members:
87 |
88 | .. autofunction:: encode
89 |
90 | .. autofunction:: decode
91 |
92 |
93 | YAML
94 | ----
95 |
96 | .. currentmodule:: msgspec.yaml
97 |
98 | .. autofunction:: encode
99 |
100 | .. autofunction:: decode
101 |
102 |
103 | TOML
104 | ----
105 |
106 | .. currentmodule:: msgspec.toml
107 |
108 | .. autofunction:: encode
109 |
110 | .. autofunction:: decode
111 |
112 |
113 | JSON Schema
114 | -----------
115 |
116 | .. currentmodule:: msgspec.json
117 |
118 | .. autofunction:: schema
119 |
120 | .. autofunction:: schema_components
121 |
122 |
123 | .. _inspect-api:
124 |
125 |
126 | Converters
127 | ----------
128 |
129 | .. currentmodule:: msgspec
130 |
131 | .. autofunction:: convert
132 |
133 | .. autofunction:: to_builtins
134 |
135 |
136 | Inspect
137 | -------
138 |
139 | .. currentmodule:: msgspec.inspect
140 |
141 | .. autofunction:: type_info
142 | .. autofunction:: multi_type_info
143 | .. autoclass:: Type
144 | .. autoclass:: Metadata
145 | .. autoclass:: AnyType
146 | .. autoclass:: NoneType
147 | .. autoclass:: BoolType
148 | .. autoclass:: IntType
149 | .. autoclass:: FloatType
150 | .. autoclass:: StrType
151 | .. autoclass:: BytesType
152 | .. autoclass:: ByteArrayType
153 | .. autoclass:: MemoryViewType
154 | .. autoclass:: DateTimeType
155 | .. autoclass:: TimeType
156 | .. autoclass:: DateType
157 | .. autoclass:: TimeDeltaType
158 | .. autoclass:: UUIDType
159 | .. autoclass:: DecimalType
160 | .. autoclass:: ExtType
161 | .. autoclass:: RawType
162 | .. autoclass:: EnumType
163 | .. autoclass:: LiteralType
164 | .. autoclass:: CustomType
165 | .. autoclass:: UnionType
166 | :members:
167 | .. autoclass:: CollectionType
168 | .. autoclass:: ListType
169 | .. autoclass:: SetType
170 | .. autoclass:: FrozenSetType
171 | .. autoclass:: VarTupleType
172 | .. autoclass:: TupleType
173 | .. autoclass:: DictType
174 | .. autoclass:: Field
175 | .. autoclass:: TypedDictType
176 | .. autoclass:: NamedTupleType
177 | .. autoclass:: DataclassType
178 | .. autoclass:: StructType
179 |
180 |
181 | Exceptions
182 | ----------
183 |
184 | .. currentmodule:: msgspec
185 |
186 | .. autoexception:: MsgspecError
187 | :show-inheritance:
188 |
189 | .. autoexception:: EncodeError
190 | :show-inheritance:
191 |
192 | .. autoexception:: DecodeError
193 | :show-inheritance:
194 |
195 | .. autoexception:: ValidationError
196 | :show-inheritance:
197 |
--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
1 | # We want to document both the UNSET singleton, and the UnsetType class, but we
2 | # don't want them to have the same docstring. I couldn't find an easy way to
3 | # do this in sphinx. For now, we patch the UnsetType object when building types
4 | # to override the docstring handling.
5 | try:
6 | import msgspec
7 |
8 | class UnsetType:
9 | """The type of `UNSET`.
10 |
11 | See Also
12 | --------
13 | UNSET
14 | """
15 |
16 | msgspec.UnsetType = UnsetType
17 | except ImportError:
18 | pass
19 |
20 |
21 | project = "msgspec"
22 | copyright = "Jim Crist-Harif"
23 | author = "Jim Crist-Harif"
24 |
25 | GITHUB_LOGO = """
26 |
27 |
28 |
29 | """.strip()
30 |
31 | html_theme = "furo"
32 | html_title = ""
33 | templates_path = ["_templates"]
34 | html_static_path = ["_static"]
35 | html_css_files = ["custom.css"]
36 | pygments_style = "default"
37 |
38 | _link_color_light = "#024bb0"
39 | _link_color_dark = "#5192d2"
40 |
41 | html_theme_options = {
42 | "light_logo": "msgspec-logo-light.svg",
43 | "dark_logo": "msgspec-logo-dark.svg",
44 | "light_css_variables": {
45 | "color-brand-primary": "black",
46 | "color-brand-content": _link_color_light,
47 | "color-foreground-muted": "#808080",
48 | "color-highlight-on-target": "inherit",
49 | "color-highlighted-background": "#ffffcc",
50 | "color-sidebar-link-text": "black",
51 | "color-sidebar-link-text--top-level": "black",
52 | "color-link": _link_color_light,
53 | "color-link--hover": _link_color_light,
54 | "color-link-underline": "transparent",
55 | "color-link-underline--hover": _link_color_light,
56 | },
57 | "dark_css_variables": {
58 | "color-brand-primary": "#ffffff",
59 | "color-brand-content": _link_color_dark,
60 | "color-highlight-on-target": "inherit",
61 | "color-highlighted-background": "#333300",
62 | "color-sidebar-link-text": "#ffffffcc",
63 | "color-sidebar-link-text--top-level": "#ffffffcc",
64 | "color-link": _link_color_dark,
65 | "color-link--hover": _link_color_dark,
66 | "color-link-underline": "transparent",
67 | "color-link-underline--hover": _link_color_dark,
68 | },
69 | "sidebar_hide_name": True,
70 | "footer_icons": [
71 | {
72 | "name": "GitHub",
73 | "url": "https://github.com/jcrist/msgspec",
74 | "html": GITHUB_LOGO,
75 | "class": "",
76 | },
77 | ],
78 | }
79 |
80 | extensions = [
81 | "sphinx.ext.autodoc",
82 | "sphinx.ext.napoleon",
83 | "sphinx.ext.extlinks",
84 | "sphinx.ext.intersphinx",
85 | "sphinx_copybutton",
86 | "sphinx_design",
87 | "IPython.sphinxext.ipython_console_highlighting",
88 | ]
89 | intersphinx_mapping = {
90 | "python": ("https://docs.python.org/3", None),
91 | "attrs": ("https://www.attrs.org/en/stable/", None),
92 | }
93 | autodoc_typehints = "none"
94 | napoleon_numpy_docstring = True
95 | napoleon_google_docstring = False
96 | napoleon_use_rtype = False
97 | napoleon_custom_sections = [("Configuration", "params_style")]
98 | default_role = "obj"
99 | extlinks = {
100 | "issue": ("https://github.com/jcrist/msgspec/issues/%s", "Issue #%s"),
101 | "pr": ("https://github.com/jcrist/msgspec/pull/%s", "PR #%s"),
102 | }
103 | copybutton_prompt_text = r">>> |\.\.\. |\$ |In \[\d*\]: | {2,5}\.\.\.: "
104 | copybutton_prompt_is_regexp = True
105 |
--------------------------------------------------------------------------------
/docs/source/converters.rst:
--------------------------------------------------------------------------------
1 | Converters
2 | ==========
3 |
4 | .. currentmodule:: msgspec
5 |
6 | ``msgspec`` provides builtin support for several common protocols (``json``,
7 | ``msgpack``, ``yaml``, and ``toml``). Support for additional protocols may be
8 | added by combining a serialization library with msgspec's *converter
9 | functions*: `msgspec.to_builtins` and `msgspec.convert`.
10 |
11 | - `msgspec.to_builtins`: takes an object composed of any :doc:`supported type
12 | ` and converts it into one composed of only simple builtin
13 | types typically supported by Python serialization libraries.
14 |
15 | - `msgspec.convert`: takes an object composed of any :doc:`supported type
16 | `, and converts it to match a specified schema (validating
17 | along the way). If the conversion fails due to a schema mismatch, a nice
18 | error message is raised.
19 |
20 | These functions are designed to be paired with a Python serialization library as
21 | pre/post processors for typical ``dumps`` and ``loads`` functions.
22 |
23 | .. image:: _static/converters-light.svg
24 | :align: center
25 | :class: only-light
26 |
27 | .. image:: _static/converters-dark.svg
28 | :align: center
29 | :class: only-dark
30 |
31 | For example, if ``msgspec`` didn't already provide support for ``json``, you
32 | could add support by wrapping the standard library's `json` module as follows:
33 |
34 | .. code-block:: ipython
35 |
36 | In [1]: import json
37 | ...: from typing import Any
38 | ...:
39 | ...: import msgspec
40 |
41 | In [2]: def encode(obj):
42 | ...: return json.dumps(msgspec.to_builtins(obj))
43 |
44 | In [3]: def decode(msg, type=Any):
45 | ...: return msgspec.convert(json.loads(msg), type=type)
46 |
47 | In [4]: class Point(msgspec.Struct):
48 | ...: x: int
49 | ...: y: int
50 |
51 | In [5]: x = Point(1, 2)
52 |
53 | In [6]: msg = encode(x) # Encoding a high-level type works
54 |
55 | In [7]: msg
56 | '{"x": 1, "y": 2}'
57 |
58 | In [8]: decode(msg, type=Point) # Decoding a high-level type works
59 | Point(x=1, y=2)
60 |
61 | In [9]: decode('{"x": "oops", "y": 2}', type=Point) # Schema mismatches error
62 | ---------------------------------------------------------------------------
63 | ValidationError Traceback (most recent call last)
64 | Cell In[9], line 1
65 | ----> 1 decode('{"x": "oops", "y": 2}', type=Point) # Schema mismatches error
66 |
67 | Cell In[3], line 2, in decode(msg, type)
68 | 1 def decode(msg, type=Any):
69 | ---> 2 return msgspec.convert(json.loads(msg), type=type)
70 |
71 | ValidationError: Expected `int`, got `str` - at `$.x`
72 |
73 |
74 | Since all protocols are different, `to_builtins` and `convert` have a few
75 | configuration options:
76 |
77 | - ``builtin_types``: an iterable of additional types to treat as builtin types,
78 | beyond the standard `dict`, `list`, `tuple`, `set`, `frozenset`, `str`,
79 | `int`, `float`, `bool`, and `None`.
80 |
81 | - ``str_keys``: whether the wrapped protocol only supports strings for object
82 | keys, rather than any hashable type.
83 |
84 | - ``strict``: `convert` only. Whether type coercion rules should be strict.
85 | Defaults is True, setting to False enables a wider set of coercion rules from
86 | string to non-string types for all values. Among other uses, this may be used
87 | to handle completely untyped protocols like URL querystrings, where only
88 | string values exist. See :ref:`strict-vs-lax` for more information.
89 |
90 | - ``from_attributes``: `convert` only. If True, input objects may be coerced
91 | to ``Struct``/``dataclass``/``attrs`` types by extracting attributes from the
92 | input matching fields in the output type. One use case is converting database
93 | query results (ORM or otherwise) to msgspec structured types. The default is
94 | False.
95 |
96 | - ``enc_hook``/``dec_hook``: the standard keyword arguments used for
97 | :doc:`extending` msgspec to support additional types.
98 |
99 | -----
100 |
101 | Taking a look at another protocol - TOML_. This protocol
102 |
103 | - Includes native support for `datetime.datetime`, `datetime.date`, and
104 | `datetime.time` types.
105 | - Only supports strings for object keys.
106 |
107 | If ``msgspec`` didn't already provide support for ``toml``, you could add
108 | support by wrapping the standard library's `tomllib` module as follows:
109 |
110 | .. code-block:: python
111 |
112 | import datetime
113 | import tomllib
114 | from typing import Any
115 |
116 | import msgspec
117 |
118 | def decode(msg, *, type=Any, dec_hook=None):
119 | return msgspec.convert(
120 | toml.loads(msg),
121 | type,
122 | builtin_types=(datetime.datetime, datetime.date, datetime.time),
123 | str_keys=True,
124 | dec_hook=dec_hook,
125 | )
126 |
127 | ``msgspec`` uses these APIs to implement ``toml`` and ``yaml`` support,
128 | wrapping external serialization libraries:
129 |
130 | - ``msgspec.toml`` (`code `__)
131 |
132 | - ``msgspec.yaml`` (`code `__)
133 |
134 | The implementation in ``msgspec.toml`` is *almost* identical to the one above,
135 | with some additional code for error handling.
136 |
137 |
138 | .. _TOML: https://toml.io
139 |
--------------------------------------------------------------------------------
/docs/source/examples/asyncio-kv.rst:
--------------------------------------------------------------------------------
1 | Asyncio TCP Key-Value Server
2 | ============================
3 |
4 | This example demonstrates writing a small TCP server and client using `asyncio`
5 | and ``msgspec``.
6 |
7 | The server defines a few operations:
8 |
9 | - ``get(key: str) -> str | None``: get the value for a single key from the
10 | store if it exists.
11 | - ``put(key: str, val: str) -> None``: add a new key-value pair to the store.
12 | - ``delete(key: str) -> None``: delete a key-value pair from the store if it exists.
13 | - ``list_keys() -> list[str]``: list all the keys currently set in the store.
14 |
15 | Each operation has a corresponding request type defined as a :doc:`Struct <../structs>`
16 | type. Note that these structs are :ref:`tagged ` so they
17 | can be part of a ``Union`` of all request types the server handles.
18 |
19 | `msgspec.msgpack` is used to handle the encoding/decoding of the various
20 | messages. The length of each message is prefixed to each message
21 | (`Length-prefix framing
22 | `__)
23 | to make it easier to efficiently determine message boundaries.
24 |
25 | The full example source can be found `here
26 | `__.
27 |
28 | .. literalinclude:: ../../../examples/asyncio-kv/kv.py
29 | :language: python
30 |
31 |
32 | An example usage session:
33 |
34 | **Server**
35 |
36 | .. code-block:: shell
37 |
38 | $ python kv.py
39 | Serving on tcp://127.0.0.1:8888...
40 | Connection opened
41 | Connection closed
42 |
43 |
44 | **Client**
45 |
46 | .. code-block:: ipython3
47 |
48 | In [1]: from kv import Client
49 |
50 | In [2]: client = await Client.create()
51 |
52 | In [3]: await client.put("foo", "bar")
53 |
54 | In [4]: await client.put("fizz", "buzz")
55 |
56 | In [5]: await client.get("foo")
57 | Out[5]: 'bar'
58 |
59 | In [6]: await client.list_keys()
60 | Out[6]: ['fizz', 'foo']
61 |
62 | In [7]: await client.delete("fizz")
63 |
64 | In [8]: await client.list_keys()
65 | Out[8]: ['foo']
66 |
--------------------------------------------------------------------------------
/docs/source/examples/conda-repodata.rst:
--------------------------------------------------------------------------------
1 | Conda Repodata
2 | ==============
3 |
4 | This example benchmarks using different JSON libraries to parse and query the
5 | `current_repodata.json`_ file from conda-forge_. This is a medium-sized (~14
6 | MiB) JSON file containing nested metadata about every package on conda-forge.
7 |
8 | The following libraries are compared:
9 |
10 | - json_
11 | - ujson_
12 | - orjson_
13 | - simdjson_
14 | - msgspec_
15 |
16 | This benchmark measures how long it takes each library to decode the
17 | ``current_repodata.json`` file, extract the name and size of each package, and
18 | determine the top 10 packages by file size.
19 |
20 | **Results**
21 |
22 | .. raw:: html
23 |
24 |
25 |
26 | .. code-block:: text
27 |
28 | $ python query_repodata.py
29 | json: 139.14 ms
30 | ujson: 124.91 ms
31 | orjson: 91.69 ms
32 | simdjson: 66.40 ms
33 | msgspec: 25.73 ms
34 |
35 |
36 | **Commentary**
37 |
38 | - All of these are fairly quick, library choice likely doesn't matter at all
39 | for simple scripts on small- to medium-sized data.
40 |
41 | - While ``orjson`` is faster than ``json``, the difference between them is only
42 | ~30%. Creating python objects dominates the execution time of any well
43 | optimized decoding library. How fast the underlying JSON parser is matters,
44 | but JSON optimizations can only get you so far if you're still creating a new
45 | Python object for every node in the JSON object.
46 |
47 | - ``simdjson`` is much more performant. This is partly due to the SIMD
48 | optimizations it uses, but mostly it's due to not creating so many Python
49 | objects. ``simdjson`` first parses a JSON blob into a proxy object. It then
50 | lazily creates Python objects as needed as different fields are accessed.
51 | This means you only pay the cost of creating Python objects for the fields
52 | you use; a query that only accesses a few fields runs much faster since not
53 | as many Python objects are created. The downside is every attribute access
54 | results in some indirection as new objects are created
55 |
56 | - ``msgspec`` is the fastest option tested. It relies on defining a known
57 | schema beforehand. We don't define the schema for the entire structure, only
58 | for the fields we access. Only fields that are part of the schema are
59 | decoded, with a new Python object created for each. This allocates the same
60 | number of objects as ``simdjson``, but does it all at once, avoiding
61 | indirection costs later on during use. See :ref:`this performance tip
62 | ` for more information.
63 |
64 | **Source**
65 |
66 | The full example source can be found `here
67 | `__.
68 |
69 | .. literalinclude:: ../../../examples/conda-repodata/query_repodata.py
70 | :language: python
71 |
72 | .. raw:: html
73 |
74 |
75 |
76 |
77 |
109 |
110 |
111 | .. _conda-forge: https://conda-forge.org/
112 | .. _current_repodata.json: https://conda.anaconda.org/conda-forge/noarch/current_repodata.json
113 | .. _json: https://docs.python.org/3/library/json.html
114 | .. _ujson: https://github.com/ultrajson/ultrajson
115 | .. _msgspec: https://jcristharif.com/msgspec/
116 | .. _orjson: https://github.com/ijl/orjson
117 | .. _simdjson: https://github.com/TkTech/pysimdjson
118 |
--------------------------------------------------------------------------------
/docs/source/examples/geojson.rst:
--------------------------------------------------------------------------------
1 | GeoJSON
2 | =======
3 |
4 | `GeoJSON `__ is a popular format for encoding geographic
5 | data. Its specification_ describes nine different types a message may take
6 | (seven "geometry" types, plus two "feature" types). Here we provide one way of
7 | implementing that specification using ``msgspec`` to handle the parsing and
8 | validation.
9 |
10 | The ``loads`` and ``dumps`` methods defined below work similar to the
11 | standard library's ``json.loads``/``json.dumps``, but:
12 |
13 | - Will result in high-level `msgspec.Struct` objects representing GeoJSON types
14 | - Will error nicely if a field is missing or the wrong type
15 | - Will fill in default values for optional fields
16 | - Decodes and encodes *significantly faster* than the `json` module (as well as
17 | most other ``json`` implementations in Python).
18 |
19 | This example makes use `msgspec.Struct` types to define the different GeoJSON
20 | types, and :ref:`struct-tagged-unions` to differentiate between them. See the
21 | relevant docs for more information.
22 |
23 | The full example source can be found `here
24 | `__.
25 |
26 | .. literalinclude:: ../../../examples/geojson/msgspec_geojson.py
27 | :language: python
28 |
29 |
30 | Here we use the ``loads`` method defined above to read some `example GeoJSON`_.
31 |
32 | .. code-block:: ipython3
33 |
34 | In [1]: import msgspec_geojson
35 |
36 | In [2]: with open("canada.json", "rb") as f:
37 | ...: data = f.read()
38 |
39 | In [3]: canada = msgspec_geojson.loads(data)
40 |
41 | In [4]: type(canada) # loaded as high-level, validated object
42 | Out[4]: msgspec_geojson.FeatureCollection
43 |
44 | In [5]: canada.features[0].properties
45 | Out[5]: {'name': 'Canada'}
46 |
47 | Comparing performance to:
48 |
49 | - orjson_
50 | - `json`
51 | - geojson_ (another validating Python implementation)
52 |
53 | .. code-block:: ipython3
54 |
55 | In [6]: %timeit msgspec_geojson.loads(data) # benchmark msgspec
56 | 6.15 ms ± 13.8 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
57 |
58 | In [7]: %timeit orjson.loads(data) # benchmark orjson
59 | 8.67 ms ± 20.8 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
60 |
61 | In [8]: %timeit json.loads(data) # benchmark json
62 | 27.6 ms ± 102 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
63 |
64 | In [9]: %timeit geojson.loads(data) # benchmark geojson
65 | 93.9 ms ± 88.1 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
66 |
67 |
68 | This shows that the readable ``msgspec`` implementation above is 1.4x faster
69 | than `orjson` (on this data), while also ensuring the loaded data is valid
70 | GeoJSON. Compared to geojson_ (another validating geojson library for python),
71 | loading the data using ``msgspec`` was **15.3x faster**.
72 |
73 | .. _specification: https://datatracker.ietf.org/doc/html/rfc7946
74 | .. _example GeoJSON: https://github.com/jcrist/msgspec/blob/main/examples/geojson/canada.json
75 | .. _orjson: https://github.com/ijl/orjson
76 | .. _geojson: https://github.com/jazzband/geojson
77 |
--------------------------------------------------------------------------------
/docs/source/examples/index.rst:
--------------------------------------------------------------------------------
1 | Examples
2 | ========
3 |
4 | Here we provide a few examples using ``msgspec`` to accomplish various tasks.
5 |
6 | .. toctree::
7 | :maxdepth: 1
8 |
9 | geojson.rst
10 | asyncio-kv.rst
11 | conda-repodata.rst
12 | pyproject-toml.rst
13 | edgedb.rst
14 |
--------------------------------------------------------------------------------
/docs/source/examples/pyproject-toml.rst:
--------------------------------------------------------------------------------
1 | Parsing ``pyproject.toml``
2 | ==========================
3 |
4 | `PEP 518`_ defined a new ``pyproject.toml`` configuration file Python projects
5 | can use for configuring:
6 |
7 | - Metadata (name, version, ...)
8 | - Dependencies
9 | - Build systems
10 | - Additional development tools (black_, mypy_, pytest_, ... all support
11 | ``pyproject.toml`` files for configuration).
12 |
13 | The format was defined in a series of Python Enhancement Proposals (PEPs),
14 | which also serve as the main documentation for the file schema.
15 |
16 | - `PEP 517`_: A build-system independent format for source trees
17 | - `PEP 518`_: Specifying minimum build system requirements for Python projects
18 | - `PEP 621`_: Storing project metadata in pyproject.toml
19 |
20 | Here we define a msgspec schema for parsing and validating a ``pyproject.toml``
21 | file. This includes full schema definitions for all fields in the
22 | ``build-system`` and ``project`` tables, as well as an untyped table under
23 | ``tool``.
24 |
25 | The full example source can be found `here
26 | `__.
27 |
28 | .. literalinclude:: ../../../examples/pyproject-toml/pyproject.py
29 | :language: python
30 |
31 | Here we use it to load the `pyproject.toml for Starlette
32 | `__:
33 |
34 | .. code-block:: ipython3
35 |
36 | In [1]: import pyproject
37 |
38 | In [2]: import urllib.request
39 |
40 | In [3]: url = "https://raw.githubusercontent.com/encode/starlette/master/pyproject.toml"
41 |
42 | In [4]: with urllib.request.urlopen(url) as f:
43 | ...: data = f.read()
44 |
45 | In [5]: result = pyproject.decode(data) # decode the pyproject.toml
46 |
47 | In [6]: result.build_system
48 | Out[6]: BuildSystem(requires=['hatchling'], build_backend='hatchling.build', backend_path=[])
49 |
50 | In [7]: result.project.name
51 | Out[7]: 'starlette'
52 |
53 | Note that this only validates that fields are of the proper type. It doesn't
54 | check:
55 |
56 | - Whether strings like URLs or `dependency specifiers`_ are valid. Some of
57 | these could be handled using msgspec's existing :doc:`../constraints` system,
58 | but not all of them.
59 | - Mutually exclusive field restrictions (for example, you can't set both
60 | ``project.license.file`` and ``project.license.text``). ``msgspec`` currently
61 | has no way of declaring these restrictions.
62 |
63 | Even with these caveats, the schemas here are still useful:
64 |
65 | - Since ``forbid_unknown_fields=True`` is configured, any extra fields will
66 | raise a nice error message. This is very useful for catching typos in
67 | configuration files, as the misspelled field names won't be silently ignored.
68 | - Type errors for fields will also be caught, with a nice error raised.
69 | - Any downstream consumers of ``decode`` have a nice high-level object to work
70 | with, complete with type annotations. This plays well with tab-completion and
71 | tools like mypy_ or pyright_, improving usability.
72 |
73 | For example, here's an invalid ``pyproject.toml``.
74 |
75 | .. code-block:: toml
76 |
77 | [build-system]
78 | requires = "hatchling"
79 | build-backend = "hatchling.build"
80 |
81 | [project]
82 | name = "myproject"
83 | version = "0.1.0"
84 | description = "a super great library"
85 | authors = [
86 | {name = "alice shmalice", email = "alice@company.com"}
87 | ]
88 |
89 | Can you spot the error? Using the schemas defined above, ``msgpspec`` can
90 | detect schema issues like this, and raise a nice error message. In this case
91 | the issue is that ``build-system.requires`` should be an array of strings, not
92 | a single string:
93 |
94 | .. code-block:: ipython
95 |
96 | In [1]: import pyproject
97 |
98 | In [2]: with open("pyproject.toml", "rb") as f:
99 | ...: invalid = f.read()
100 |
101 | In [3]: pyproject.decode(invalid)
102 | ---------------------------------------------------------------------------
103 | ValidationError Traceback (most recent call last)
104 | Cell In [3], line 1
105 | ----> 1 pyproject.decode(invalid)
106 | ValidationError: Expected `array`, got `str` - at `$.build-system.requires`
107 |
108 |
109 | .. _PEP 517: https://peps.python.org/pep-0517/
110 | .. _PEP 518: https://peps.python.org/pep-0518/
111 | .. _PEP 621: https://peps.python.org/pep-0621/
112 | .. _black: https://black.readthedocs.io
113 | .. _mypy: https://mypy.readthedocs.io
114 | .. _pyright: https://github.com/microsoft/pyright
115 | .. _pytest: https://docs.pytest.org
116 | .. _dependency specifiers: https://packaging.python.org/en/latest/specifications/dependency-specifiers/
117 |
--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
1 | msgspec
2 | =======
3 |
4 | ``msgspec`` is a *fast* serialization and validation library, with builtin
5 | support for JSON_, MessagePack_, YAML_, and TOML_. It features:
6 |
7 | - 🚀 **High performance encoders/decoders** for common protocols. The JSON and
8 | MessagePack implementations regularly :doc:`benchmark ` as the
9 | fastest options for Python.
10 |
11 | - 🎉 **Support for a wide variety of Python types**. Additional types may
12 | be supported through :doc:`extensions `.
13 |
14 | - 🔍 **Zero-cost schema validation** using familiar Python type annotations.
15 | In :doc:`benchmarks ` ``msgspec`` decodes *and* validates JSON
16 | faster than orjson_ can decode it alone.
17 |
18 | - ✨ **A speedy Struct type** for representing structured data. If you already
19 | use dataclasses_ or attrs_, :doc:`structs` should feel familiar. However,
20 | they're :ref:`5-60x ` faster for common operations.
21 |
22 | All of this is included in a :ref:`lightweight library
23 | ` with no required dependencies.
24 |
25 | -----
26 |
27 | ``msgspec`` may be used for serialization alone, as a faster JSON or
28 | MessagePack library. For the greatest benefit though, we recommend using
29 | ``msgspec`` to handle the full serialization & validation workflow:
30 |
31 | **Define** your message schemas using standard Python type annotations.
32 |
33 | .. code-block:: python
34 |
35 | >>> import msgspec
36 |
37 | >>> class User(msgspec.Struct):
38 | ... """A new type describing a User"""
39 | ... name: str
40 | ... groups: set[str] = set()
41 | ... email: str | None = None
42 |
43 | **Encode** messages as JSON, or one of the many other supported protocols.
44 |
45 | .. code-block:: python
46 |
47 | >>> alice = User("alice", groups={"admin", "engineering"})
48 |
49 | >>> alice
50 | User(name='alice', groups={"admin", "engineering"}, email=None)
51 |
52 | >>> msg = msgspec.json.encode(alice)
53 |
54 | >>> msg
55 | b'{"name":"alice","groups":["admin","engineering"],"email":null}'
56 |
57 | **Decode** messages back into Python objects, with optional schema validation.
58 |
59 | .. code-block:: python
60 |
61 | >>> msgspec.json.decode(msg, type=User)
62 | User(name='alice', groups={"admin", "engineering"}, email=None)
63 |
64 | >>> msgspec.json.decode(b'{"name":"bob","groups":[123]}', type=User)
65 | Traceback (most recent call last):
66 | File "", line 1, in
67 | msgspec.ValidationError: Expected `str`, got `int` - at `$.groups[0]`
68 |
69 | ``msgspec`` is designed to be as performant as possible, while retaining some
70 | of the nicities of validation libraries like pydantic_. For supported types,
71 | encoding/decoding a message with ``msgspec`` can be :doc:`~10-80x faster than
72 | alternative libraries `.
73 |
74 | Highlights
75 | ----------
76 |
77 | - ``msgspec`` is **fast**. It :doc:`benchmarks ` as the fastest
78 | serialization library for Python, outperforming all other JSON/MessagePack
79 | libraries compared.
80 |
81 | - ``msgspec`` is **friendly**. Through use of Python's type annotations,
82 | messages are :ref:`validated ` during deserialization in a
83 | declarative way. ``msgspec`` also works well with other type-checking tooling
84 | like mypy_ and pyright_, providing excellent editor integration.
85 |
86 | - ``msgspec`` is **flexible**. It natively supports a :doc:`wide range of
87 | Python builtin types `. Support for additional types can
88 | also be added through :doc:`extensions `.
89 |
90 | - ``msgspec`` is **lightweight**. It has no required dependencies, and the
91 | binary size is :ref:`a fraction of that of comparable libraries
92 | `.
93 |
94 | - ``msgspec`` is **correct**. The encoders/decoders implemented are strictly
95 | compliant with their respective specifications, providing stronger guarantees
96 | of compatibility with other systems.
97 |
98 | Used By
99 | -------
100 |
101 | ``msgspec`` is used by many organizations and `open source projects
102 | `__, here we highlight a
103 | few:
104 |
105 | .. grid:: 2 2 4 4
106 |
107 | .. grid-item-card:: NautilusTrader
108 | :link: https://nautilustrader.io/
109 |
110 | .. image:: _static/nautilus-trader.png
111 |
112 | .. grid-item-card:: Litestar
113 | :link: https://litestar.dev/
114 |
115 | .. image:: _static/litestar.png
116 |
117 | .. grid-item-card:: Sanic
118 | :link: https://sanic.dev/en/
119 |
120 | .. image:: _static/sanic.png
121 |
122 | .. grid-item-card:: Mosec
123 | :link: https://mosecorg.github.io/mosec/
124 |
125 | .. image:: _static/mosec.png
126 |
127 | .. grid-item-card:: Pioreactor
128 | :link: https://pioreactor.com/
129 |
130 | .. image:: _static/pioreactor.png
131 |
132 | .. grid-item-card:: Zero
133 | :link: https://github.com/Ananto30/zero
134 |
135 | .. image:: _static/zero.png
136 |
137 | .. grid-item-card:: anywidget
138 | :link: https://anywidget.dev/
139 |
140 | .. image:: _static/anywidget.png
141 |
142 | .. grid-item-card:: esmerald
143 | :link: https://esmerald.dev/
144 |
145 | .. image:: _static/esmerald.png
146 |
147 |
148 | .. _type annotations: https://docs.python.org/3/library/typing.html
149 | .. _JSON: https://json.org
150 | .. _MessagePack: https://msgpack.org
151 | .. _YAML: https://yaml.org
152 | .. _TOML: https://toml.io
153 | .. _attrs: https://www.attrs.org
154 | .. _dataclasses: https://docs.python.org/3/library/dataclasses.html
155 | .. _orjson: https://github.com/ijl/orjson
156 | .. _pydantic: https://pydantic-docs.helpmanual.io/
157 | .. _mypy: https://mypy.readthedocs.io
158 | .. _pyright: https://github.com/microsoft/pyright
159 |
160 | .. toctree::
161 | :hidden:
162 | :maxdepth: 2
163 | :caption: Overview
164 |
165 | why.rst
166 | install.rst
167 | benchmarks.rst
168 |
169 | .. toctree::
170 | :hidden:
171 | :maxdepth: 2
172 | :caption: User Guide
173 |
174 | usage.rst
175 | supported-types.rst
176 | structs.rst
177 | constraints.rst
178 | converters.rst
179 | jsonschema.rst
180 | schema-evolution.rst
181 |
182 | .. toctree::
183 | :hidden:
184 | :maxdepth: 2
185 | :caption: Advanced
186 |
187 | extending.rst
188 | inspect.rst
189 | perf-tips.rst
190 |
191 | .. toctree::
192 | :hidden:
193 | :maxdepth: 2
194 | :caption: Reference
195 |
196 | api.rst
197 | examples/index.rst
198 | changelog.rst
199 |
--------------------------------------------------------------------------------
/docs/source/inspect.rst:
--------------------------------------------------------------------------------
1 | Inspecting Types
2 | ----------------
3 |
4 | .. currentmodule:: msgspec.inspect
5 |
6 | .. warning::
7 |
8 | This module is experimental. While we don't expect any breaking changes, we
9 | also don't promise not to break things between releases while this interface
10 | stabilizes.
11 |
12 | ``msgspec`` provides type-introspection support, which can be used to build
13 | tooling on top of msgspec-compatible types. Possible use cases include:
14 |
15 | - Generating OpenAPI_ specifications from msgspec-compatible types (note that
16 | the builtin :doc:`jsonschema` support may be a better starting point for
17 | this).
18 | - Generating example instances of types for testing or documentation purposes
19 | - Integration with hypothesis_ for testing
20 |
21 | The main function here is `msgspec.inspect.type_info` for converting a type
22 | annotation into a corresponding `msgspec.inspect.Type` object. There's also
23 | `msgspec.inspect.multi_type_info` which converts an iterable of annotations;
24 | this function is more efficient than calling `type_info` in a loop.
25 |
26 | .. code-block:: python
27 |
28 | >>> import msgspec
29 |
30 | >>> msgspec.inspect.type_info(bool)
31 | BoolType()
32 |
33 | >>> msgspec.inspect.type_info(int)
34 | IntType(gt=None, ge=None, lt=None, le=None, multiple_of=None)
35 |
36 | >>> msgspec.inspect.type_info(list[int]) # nested types are traversed
37 | ListType(
38 | item_type=IntType(gt=None, ge=None, lt=None, le=None, multiple_of=None),
39 | min_length=None,
40 | max_length=None
41 | )
42 |
43 | >>> msgspec.inspect.multi_type_info([bool, int]) # inspect multiple types
44 | (BoolType(), IntType(gt=None, ge=None, lt=None, le=None, multiple_of=None))
45 |
46 |
47 | Types with :doc:`constraints` will include the constraint information as well:
48 |
49 | .. code-block:: python
50 |
51 | >>> from typing import Annotated
52 |
53 | >>> from msgspec import Meta
54 |
55 | >>> PositiveInt = Annotated[int, Meta(gt=0)]
56 |
57 | >>> msgspec.inspect.type_info(PositiveInt)
58 | IntType(gt=0, ge=None, lt=None, le=None, multiple_of=None)
59 |
60 | Compound types like :doc:`structs` are also supported:
61 |
62 | .. code-block:: python
63 |
64 | >>> class User(msgspec.Struct):
65 | ... name: str
66 | ... groups: list[str] = []
67 | ... email: str | None = None
68 |
69 | >>> msgspec.inspect.type_info(User)
70 | StructType(
71 | cls=User,
72 | fields=(
73 | Field(
74 | name='name',
75 | encode_name='name',
76 | type=StrType(min_length=None, max_length=None, pattern=None),
77 | required=True,
78 | default=UNSET,
79 | default_factory=UNSET
80 | ),
81 | Field(
82 | name='groups',
83 | encode_name='groups',
84 | type=ListType(
85 | item_type=StrType(min_length=None, max_length=None, pattern=None),
86 | min_length=None,
87 | max_length=None
88 | ),
89 | required=False,
90 | default=[],
91 | default_factory=UNSET
92 | ),
93 | Field(
94 | name='email',
95 | encode_name='email',
96 | type=UnionType(
97 | types=(
98 | StrType(min_length=None, max_length=None, pattern=None),
99 | NoneType()
100 | )
101 | ),
102 | required=False,
103 | default=None,
104 | default_factory=UNSET
105 | )
106 | ),
107 | tag_field=None,
108 | tag=None,
109 | array_like=False,
110 | forbid_unknown_fields=False
111 | )
112 |
113 | Types with additional metadata like ``extra_json_schema`` or ``title`` will be
114 | wrapped in a `msgspec.inspect.Metadata` object. Note that all JSON schema
115 | specific fields are merged into a single ``extra_json_schema`` dict.
116 |
117 | .. code-block:: python
118 |
119 | >>> UnixName = Annotated[
120 | ... str,
121 | ... Meta(
122 | ... min_length=1,
123 | ... max_length=32,
124 | ... pattern="^[a-z_][a-z0-9_-]*$",
125 | ... description="A valid UNIX username"
126 | ... )
127 | ... ]
128 |
129 | >>> msgspec.inspect.type_info(UnixName)
130 | Metadata(
131 | type=StrType(
132 | min_length=1,
133 | max_length=32,
134 | pattern='^[a-z_][a-z0-9_-]*$'
135 | ),
136 | extra_json_schema={'description': 'A valid UNIX username'}
137 | )
138 |
139 | Every type supported by ``msgspec`` has a corresponding `msgspec.inspect.Type`
140 | subclass. See the :ref:`API docs ` for a complete list of types.
141 |
142 | For an example of using these functions, you might find our builtin
143 | :doc:`jsonschema` generator implementation useful - the code for this can be
144 | found `here
145 | `__. In
146 | particular, take a look at the large if-else statement in ``_to_schema``.
147 |
148 |
149 | .. _OpenAPI: https://www.openapis.org/
150 | .. _hypothesis: https://hypothesis.readthedocs.io/en/latest/
151 |
--------------------------------------------------------------------------------
/docs/source/install.rst:
--------------------------------------------------------------------------------
1 | Installation
2 | ============
3 |
4 | ``msgspec`` may be installed via ``pip`` or ``conda``. Note that Python >= 3.8
5 | is required. The basic install has no required dependencies.
6 |
7 | **pip**
8 |
9 | .. code-block:: shell
10 |
11 | pip install msgspec
12 |
13 | **conda**
14 |
15 | .. code-block:: shell
16 |
17 | conda install msgspec -c conda-forge
18 |
19 |
20 | Optional Dependencies
21 | ---------------------
22 |
23 | Depending on your platform, the base install of ``msgspec`` may not support
24 | TOML_ or YAML_ without additional dependencies.
25 |
26 | TOML
27 | ~~~~
28 |
29 | The TOML_ protocol requires:
30 |
31 | - Python < 3.11: `tomli`_ and `tomli_w`_ for reading and writing TOML.
32 |
33 | - Python >= 3.11: `tomli_w`_ for writing TOML. Reading TOML is done using
34 | the standard library's `tomllib` and requires no additional dependencies.
35 |
36 | You may either install these dependencies manually, or depend on the ``toml``
37 | extra:
38 |
39 | **pip**
40 |
41 | .. code-block:: shell
42 |
43 | pip install "msgspec[toml]"
44 |
45 | **conda**
46 |
47 | .. code-block:: shell
48 |
49 | conda install msgspec-toml -c conda-forge
50 |
51 | YAML
52 | ~~~~
53 |
54 | The YAML_ protocol requires PyYAML_ on all platforms. You may either install
55 | this dependency manually, or depend on the ``yaml`` extra:
56 |
57 | **pip**
58 |
59 | .. code-block:: shell
60 |
61 | pip install "msgspec[yaml]"
62 |
63 | **conda**
64 |
65 | .. code-block:: shell
66 |
67 | conda install msgspec-yaml -c conda-forge
68 |
69 |
70 | Installing from GitHub
71 | ----------------------
72 |
73 | If you want wish to use a feature that hasn't been released yet, you may
74 | install from the `development branch on GitHub
75 | `__ using ``pip``:
76 |
77 | .. code-block:: shell
78 |
79 | pip install git+https://github.com/jcrist/msgspec.git
80 |
81 |
82 | .. _YAML: https://yaml.org
83 | .. _TOML: https://toml.io
84 | .. _PyYAML: https://pyyaml.org/
85 | .. _tomli: https://github.com/hukkin/tomli
86 | .. _tomli_w: https://github.com/hukkin/tomli-w
87 |
--------------------------------------------------------------------------------
/docs/source/jsonschema.rst:
--------------------------------------------------------------------------------
1 | JSON Schema
2 | ===========
3 |
4 | ``msgspec`` provides a few utilities for generating `JSON Schema`_
5 | specifications from msgspec-compatible :doc:`types ` and
6 | :doc:`constraints `.
7 |
8 | - `msgspec.json.schema`: generates a complete JSON Schema for a single type.
9 | - `msgspec.json.schema_components`: generates JSON schemas for multiple types,
10 | along with a corresponding ``components`` mapping. This is mainly useful when
11 | generating multiple schemas to include in a larger specification like OpenAPI_.
12 |
13 |
14 | The generated schemas are compatible with `JSON Schema`_ 2020-12 and OpenAPI_
15 | 3.1.
16 |
17 |
18 | Example
19 | -------
20 |
21 |
22 | .. code-block:: python
23 |
24 | import msgspec
25 | from msgspec import Struct, Meta
26 | from typing import Annotated, Optional
27 |
28 |
29 | # A float constrained to values > 0
30 | PositiveFloat = Annotated[float, Meta(gt=0)]
31 |
32 |
33 | class Dimensions(Struct):
34 | """Dimensions for a product, all measurements in centimeters"""
35 | length: PositiveFloat
36 | width: PositiveFloat
37 | height: PositiveFloat
38 |
39 |
40 | class Product(Struct):
41 | """A product in a catalog"""
42 | id: int
43 | name: str
44 | price: PositiveFloat
45 | tags: set[str] = set()
46 | dimensions: Optional[Dimensions] = None
47 |
48 |
49 | # Generate a schema for a list of products
50 | schema = msgspec.json.schema(list[Product])
51 |
52 | # Print out that schema as JSON
53 | print(msgspec.json.encode(schema))
54 |
55 |
56 | .. code-block:: json
57 |
58 | {
59 | "type": "array",
60 | "items": {"$ref": "#/$defs/Product"},
61 | "$defs": {
62 | "Dimensions": {
63 | "title": "Dimensions",
64 | "description": "Dimensions for a product, all measurements in centimeters",
65 | "type": "object",
66 | "properties": {
67 | "length": {"type": "number", "exclusiveMinimum": 0},
68 | "width": {"type": "number", "exclusiveMinimum": 0},
69 | "height": {"type": "number", "exclusiveMinimum": 0}
70 | },
71 | "required": ["length", "width", "height"]
72 | },
73 | "Product": {
74 | "title": "Product",
75 | "description": "A product in a catalog",
76 | "type": "object",
77 | "properties": {
78 | "id": {"type": "integer"},
79 | "name": {"type": "string"},
80 | "price": {"type": "number", "exclusiveMinimum": 0},
81 | "tags": {
82 | "type": "array",
83 | "items": {"type": "string"},
84 | "default": [],
85 | },
86 | "dimensions": {
87 | "anyOf": [{"type": "null"}, {"$ref": "#/$defs/Dimensions"}],
88 | "default": null,
89 | }
90 | },
91 | "required": ["id", "name", "price"]
92 | }
93 | }
94 | }
95 |
96 |
97 | .. _JSON Schema: https://json-schema.org/
98 | .. _OpenAPI: https://www.openapis.org/
99 |
--------------------------------------------------------------------------------
/docs/source/schema-evolution.rst:
--------------------------------------------------------------------------------
1 | Schema Evolution
2 | ================
3 |
4 | ``msgspec`` includes support for "schema evolution", meaning that:
5 |
6 | - Messages serialized with an older version of a schema will be deserializable
7 | using a newer version of the schema.
8 | - Messages serialized with a newer version of the schema will be deserializable
9 | using an older version of the schema.
10 |
11 | This can be useful if, for example, you have clients and servers with
12 | mismatched versions.
13 |
14 | For schema evolution to work smoothly, you need to follow a few guidelines:
15 |
16 | 1. Any new fields on a `msgspec.Struct` must specify default values.
17 | 2. Structs with ``array_like=True`` must not reorder fields, and any new fields
18 | must be appended to the end (and have defaults).
19 | 3. Don't change the type annotations for existing messages or fields.
20 | 4. Don't change the type codes or implementations for any defined
21 | :ref:`extensions ` (MessagePack only).
22 |
23 | For example, suppose we had a `msgspec.Struct` type representing a user:
24 |
25 | .. code-block:: python
26 |
27 | >>> import msgspec
28 |
29 | >>> from typing import Set, Optional
30 |
31 | >>> class User(msgspec.Struct):
32 | ... """A struct representing a user"""
33 | ... name: str
34 | ... groups: Set[str] = set()
35 | ... email: Optional[str] = None
36 |
37 | Then suppose we wanted to add a new ``phone`` field to this struct in a way
38 | that wouldn't break clients/servers still using the prior definition. To
39 | accomplish this, we add ``phone`` as an _optional_ field (defaulting to
40 | ``None``), at the end of the struct.
41 |
42 | .. code-block:: python
43 |
44 | >>> class User2(msgspec.Struct):
45 | ... """An updated version of the User struct, now with a phone number"""
46 | ... name: str
47 | ... groups: Set[str] = set()
48 | ... email: Optional[str] = None
49 | ... phone : Optional[str] = None
50 |
51 | Messages serialized using both the old and new schemas can still be exchanged
52 | without error. If an old message is deserialized using the new schema, the
53 | missing fields all have default values that will be used. Likewise, if a new
54 | message is deserialized with the old schema the unknown new fields will be
55 | efficiently skipped without decoding.
56 |
57 | .. code-block:: python
58 |
59 | >>> old_dec = msgspec.json.Decoder(User)
60 |
61 | >>> new_dec = msgspec.json.Decoder(User2)
62 |
63 | >>> new_msg = msgspec.json.encode(
64 | ... User2("bob", groups={"finance"}, phone="512-867-5309")
65 | ... )
66 |
67 | >>> old_dec.decode(new_msg) # deserializing a new msg with an older decoder
68 | User(name='bob', groups={'finance'}, email=None)
69 |
70 | >>> old_msg = msgspec.json.encode(
71 | ... User("alice", groups={"admin", "engineering"})
72 | ... )
73 |
74 | >>> new_dec.decode(old_msg) # deserializing an old msg with a new decoder
75 | User2(name="alice", groups={"admin", "engineering"}, email=None, phone=None)
76 |
--------------------------------------------------------------------------------
/docs/source/usage.rst:
--------------------------------------------------------------------------------
1 | Usage
2 | =====
3 |
4 | ``msgspec`` supports multiple serialization protocols, accessed through
5 | separate submodules:
6 |
7 | - ``msgspec.json`` (JSON_)
8 | - ``msgspec.msgpack`` (MessagePack_)
9 | - ``msgspec.yaml`` (YAML_)
10 | - ``msgspec.toml`` (TOML_)
11 |
12 | Each supports a consistent interface, making it simple to switch between
13 | protocols as needed.
14 |
15 | Encoding
16 | --------
17 |
18 | Each submodule has an ``encode`` method for encoding Python objects using the
19 | respective protocol.
20 |
21 | .. code-block:: python
22 |
23 | >>> import msgspec
24 |
25 | >>> # Encode as JSON
26 | ... msgspec.json.encode({"hello": "world"})
27 | b'{"hello":"world"}'
28 |
29 | >>> # Encode as msgpack
30 | ... msgspec.msgpack.encode({"hello": "world"})
31 | b'\x81\xa5hello\xa5world'
32 |
33 | Note that if you're making multiple calls to ``encode``, it's more efficient to
34 | create an ``Encoder`` once and use the ``Encoder.encode`` method instead.
35 |
36 | .. code-block:: python
37 |
38 | >>> import msgspec
39 |
40 | >>> # Create a JSON encoder
41 | ... encoder = msgspec.json.Encoder()
42 |
43 | >>> # Encode as JSON using the encoder
44 | ... encoder.encode({"hello": "world"})
45 | b'{"hello":"world"}'
46 |
47 | Decoding
48 | --------
49 |
50 | Each submodule has ``decode`` method for decoding messages using the respective
51 | protocol.
52 |
53 | .. code-block:: python
54 |
55 | >>> import msgspec
56 |
57 | >>> # Decode JSON
58 | ... msgspec.json.decode(b'{"hello":"world"}')
59 | {'hello': 'world'}
60 |
61 | >>> # Decode msgpack
62 | ... msgspec.msgpack.decode(b'\x81\xa5hello\xa5world')
63 | {'hello': 'world'}
64 |
65 | Note that if you're making multiple calls to ``decode``, it's more efficient to
66 | create a ``Decoder`` once and use the ``Decoder.decode`` method instead.
67 |
68 | .. code-block:: python
69 |
70 | >>> import msgspec
71 |
72 | >>> # Create a JSON decoder
73 | ... decoder = msgspec.json.Decoder()
74 |
75 | >>> # Decode JSON using the decoder
76 | ... decoder.decode(b'{"hello":"world"}')
77 | {'hello': 'world'}
78 |
79 |
80 | .. _typed-decoding:
81 |
82 | Typed Decoding
83 | --------------
84 |
85 | ``msgspec`` optionally supports specifying the expected output types during
86 | decoding. This serves a few purposes:
87 |
88 | - Often serialized data has a fixed schema (e.g. a request handler in a REST
89 | api expects a certain JSON structure). Specifying the expected types allows
90 | ``msgspec`` to perform validation during decoding, with *no* added runtime
91 | cost.
92 |
93 | - Python has a much richer type system than serialization protocols like JSON_
94 | or MessagePack_. Specifying the output types lets ``msgspec`` decode messages
95 | into types other than the defaults described above (e.g. decoding JSON
96 | objects into a :doc:`Struct ` instead of the default `dict`).
97 |
98 | - The `type annotations`_ used to describe the expected types are compatible
99 | with tools like mypy_ or pyright_, providing excellent editor integration.
100 |
101 | ``msgspec`` uses Python `type annotations`_ to describe the expected types. A
102 | :doc:`wide variety of builtin types are supported `.
103 |
104 | Here we define a user schema as a :doc:`Struct ` type. We then pass
105 | the type to ``decode`` via the ``type`` keyword argument:
106 |
107 | .. code-block:: python
108 |
109 | >>> import msgspec
110 |
111 | >>> class User(msgspec.Struct):
112 | ... name: str
113 | ... groups: set[str] = set()
114 | ... email: str | None = None
115 |
116 | >>> msgspec.json.decode(
117 | ... b'{"name": "alice", "groups": ["admin", "engineering"]}',
118 | ... type=User
119 | ... )
120 | User(name='alice', groups={'admin', 'engineering'}, email=None)
121 |
122 | If a message doesn't match the expected type, an error is raised.
123 |
124 | .. code-block:: python
125 |
126 | >>> msgspec.json.decode(
127 | ... b'{"name": "bill", "groups": ["devops", 123]}',
128 | ... type=User
129 | ... )
130 | Traceback (most recent call last):
131 | File "", line 1, in
132 | msgspec.ValidationError: Expected `str`, got `int` - at `$.groups[1]`
133 |
134 | .. _strict-vs-lax:
135 |
136 | "Strict" vs "Lax" Mode
137 | ~~~~~~~~~~~~~~~~~~~~~~
138 |
139 | Unlike some other libraries (e.g. pydantic_), ``msgspec`` won't perform any
140 | unsafe implicit conversion by default ("strict" mode). For example, if an
141 | integer is specified and a string is provided instead, an error is raised
142 | rather than attempting to cast the string to an int.
143 |
144 | .. code-block:: python
145 |
146 | >>> msgspec.json.decode(b'[1, 2, "3"]', type=list[int])
147 | Traceback (most recent call last):
148 | File "", line 1, in
149 | msgspec.ValidationError: Expected `int`, got `str` - at `$[2]`
150 |
151 | For cases where you'd like a more lax set of conversion rules, you can pass
152 | ``strict=False`` to any ``decode`` function or ``Decoder`` class ("lax" mode).
153 | See :doc:`supported-types` for information on how this affects individual
154 | types.
155 |
156 | .. code-block:: python
157 |
158 | >>> msgspec.json.decode(b'[1, 2, "3"]', type=list[int], strict=False)
159 | [1, 2, 3]
160 |
161 |
162 | .. _JSON: https://json.org
163 | .. _MessagePack: https://msgpack.org
164 | .. _YAML: https://yaml.org
165 | .. _TOML: https://toml.io
166 | .. _type annotations: https://docs.python.org/3/library/typing.html
167 | .. _pydantic: https://pydantic-docs.helpmanual.io/
168 | .. _mypy: https://mypy.readthedocs.io
169 | .. _pyright: https://github.com/microsoft/pyright
170 |
--------------------------------------------------------------------------------
/docs/source/why.rst:
--------------------------------------------------------------------------------
1 | Why msgspec?
2 | ------------
3 |
4 | If you're writing a networked application, you'll need some agreed upon
5 | protocol that your clients and servers can use to communicate. JSON is a decent
6 | choice here (though there are many other options). It's ubiquitous, and Python
7 | has many libraries for parsing it into builtin types (``json``, ``ujson``,
8 | ``orjson``, ...).
9 |
10 | *However, servers don't just parse JSON, they also need to do something with
11 | it*.
12 |
13 | ``msgspec`` goes above and beyond other Python JSON libraries to help with the
14 | following:
15 |
16 | - **Validation**
17 |
18 | If a field is missing from a request or has the wrong type, you probably want
19 | to raise a nice error message rather than just throwing a 500 error.
20 |
21 | ``msgspec`` lets you describe your schema via type annotations, and will
22 | efficiently :ref:`validate ` messages against this
23 | schema while decoding.
24 |
25 | It also integrates well with static analysis tools like mypy_ and pyright_,
26 | helping you avoid whole classes of runtime errors.
27 |
28 | - **Application Logic**
29 |
30 | What your application actually does! While builtin types like dicts are
31 | fine for writing application logic, they aren't as ergonomic as custom
32 | classes (no attribute access, poor type checking, ...).
33 |
34 | ``msgspec`` supports a :doc:`wide variety of types `,
35 | letting you decouple the objects your application logic uses from those that
36 | JSON natively supports.
37 |
38 | - **Future Flexibility**
39 |
40 | Application needs change; you'll want to make sure your clients/servers won't
41 | break if the JSON schema evolves over time.
42 |
43 | To handle this, ``msgspec`` supports :doc:`"schema evolution"
44 | `. Messages can be sent between clients with different
45 | schemas without error, allowing systems to evolve over time.
46 |
47 | While there are other tools in this space, ``msgspec`` should be an :doc:`order
48 | of magnitude faster ` than other options. We also hope that it's
49 | quick to learn and friendly to use, letting you focus less on serialization and
50 | more on your application code.
51 |
52 |
53 | .. _mypy: https://mypy.readthedocs.io
54 | .. _pyright: https://github.com/microsoft/pyright
55 |
--------------------------------------------------------------------------------
/examples/asyncio-kv/kv.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | import asyncio
4 | import msgspec
5 | from typing import Any
6 |
7 |
8 | # Some utilities for writing and reading length-prefix framed messages. Using
9 | # length-prefixed framing makes it easier for the reader to determine the
10 | # boundaries of each message before passing it to msgspec to be decoded.
11 | async def prefixed_send(stream: asyncio.StreamWriter, buffer: bytes) -> None:
12 | """Write a length-prefixed buffer to the stream"""
13 | # Encode the message length as a 4 byte big-endian integer.
14 | prefix = len(buffer).to_bytes(4, "big")
15 |
16 | # Write the prefix and buffer to the stream.
17 | stream.write(prefix)
18 | stream.write(buffer)
19 | await stream.drain()
20 |
21 |
22 | async def prefixed_recv(stream: asyncio.StreamReader) -> bytes:
23 | """Read a length-prefixed buffer from the stream"""
24 | # Read the next 4 byte prefix
25 | prefix = await stream.readexactly(4)
26 |
27 | # Convert the prefix back into an integer for the next message length
28 | n = int.from_bytes(prefix, "big")
29 |
30 | # Read in the full message buffer
31 | return await stream.readexactly(n)
32 |
33 |
34 | # Define some request types. We set `tag=True` on each type so they can be used
35 | # in a "tagged-union" defining the request types.
36 | class Get(msgspec.Struct, tag=True):
37 | key: str
38 |
39 |
40 | class Put(msgspec.Struct, tag=True):
41 | key: str
42 | val: str
43 |
44 |
45 | class Del(msgspec.Struct, tag=True):
46 | key: str
47 |
48 |
49 | class ListKeys(msgspec.Struct, tag=True):
50 | pass
51 |
52 |
53 | # A union of all valid request types
54 | Request = Get | Put | Del | ListKeys
55 |
56 |
57 | class Server:
58 | """An example TCP key-value server using asyncio and msgspec"""
59 |
60 | def __init__(self, host: str = "127.0.0.1", port: int = 8888):
61 | self.host = host
62 | self.port = port
63 | self.kv: dict[str, str] = {}
64 | # A msgpack encoder for encoding responses
65 | self.encoder = msgspec.msgpack.Encoder()
66 | # A *typed* msgpack decoder for decoding requests. If a request doesn't
67 | # match the specified types, a nice error will be raised.
68 | self.decoder = msgspec.msgpack.Decoder(Request)
69 |
70 | async def handle_connection(
71 | self, reader: asyncio.StreamReader, writer: asyncio.StreamWriter
72 | ):
73 | """Handle the full lifetime of a single connection"""
74 | print("Connection opened")
75 | while True:
76 | try:
77 | # Receive and decode a request
78 | buffer = await prefixed_recv(reader)
79 | req = self.decoder.decode(buffer)
80 |
81 | # Process the request
82 | resp = await self.handle_request(req)
83 |
84 | # Encode and write the response
85 | buffer = self.encoder.encode(resp)
86 | await prefixed_send(writer, buffer)
87 | except EOFError:
88 | print("Connection closed")
89 | return
90 |
91 | async def handle_request(self, req: Request) -> Any:
92 | """Handle a single request and return the result (if any)"""
93 | # We use pattern matching here to branch on the different message types.
94 | # You could just as well use an if-else statement, but pattern matching
95 | # works pretty well here.
96 | match req:
97 | case Get(key):
98 | # Return the value for a key, or None if missing
99 | return self.kv.get(key)
100 | case Put(key, val):
101 | # Add a new key-value pair
102 | self.kv[key] = val
103 | return None
104 | case Del(key):
105 | # Remove a key-value pair if it exists
106 | self.kv.pop(key, None)
107 | return None
108 | case ListKeys():
109 | # Return a list of all keys in the store
110 | return sorted(self.kv)
111 |
112 | async def serve_forever(self) -> None:
113 | server = await asyncio.start_server(
114 | self.handle_connection, self.host, self.port
115 | )
116 | print(f"Serving on tcp://{self.host}:{self.port}...")
117 | async with server:
118 | await server.serve_forever()
119 |
120 | def run(self) -> None:
121 | """Run the server until ctrl-C"""
122 | asyncio.run(self.serve_forever())
123 |
124 |
125 | class Client:
126 | """An example TCP key-value client using asyncio and msgspec."""
127 |
128 | def __init__(self, reader: asyncio.StreamReader, writer: asyncio.StreamWriter):
129 | self.reader = reader
130 | self.writer = writer
131 |
132 | @classmethod
133 | async def create(cls, host: str = "127.0.0.1", port: int = 8888):
134 | """Create a new client"""
135 | reader, writer = await asyncio.open_connection(host, port)
136 | return cls(reader, writer)
137 |
138 | async def close(self) -> None:
139 | """Close the client."""
140 | self.writer.close()
141 | await self.writer.wait_closed()
142 |
143 | async def request(self, req):
144 | """Send a request and await the response"""
145 | # Encode and send the request
146 | buffer = msgspec.msgpack.encode(req)
147 | await prefixed_send(self.writer, buffer)
148 |
149 | # Receive and decode the response
150 | buffer = await prefixed_recv(self.reader)
151 | return msgspec.msgpack.decode(buffer)
152 |
153 | async def get(self, key: str) -> str | None:
154 | """Get a key from the KV store, returning None if not present"""
155 | return await self.request(Get(key))
156 |
157 | async def put(self, key: str, val: str) -> None:
158 | """Put a key-val pair in the KV store"""
159 | return await self.request(Put(key, val))
160 |
161 | async def delete(self, key: str) -> None:
162 | """Delete a key-val pair from the KV store"""
163 | return await self.request(Del(key))
164 |
165 | async def list_keys(self) -> list[str]:
166 | """List all keys in the KV store"""
167 | return await self.request(ListKeys())
168 |
169 |
170 | if __name__ == "__main__":
171 | Server().run()
172 |
--------------------------------------------------------------------------------
/examples/conda-repodata/query_repodata.py:
--------------------------------------------------------------------------------
1 | import json
2 | import time
3 |
4 | import orjson
5 | import requests
6 | import simdjson
7 | import ujson
8 |
9 | import msgspec
10 |
11 |
12 | def query_msgspec(data: bytes) -> list[tuple[int, str]]:
13 | # Use Struct types to define the JSON schema. For efficiency we only define
14 | # the fields we actually need.
15 | class Package(msgspec.Struct):
16 | name: str
17 | size: int
18 |
19 | class RepoData(msgspec.Struct):
20 | packages: dict[str, Package]
21 |
22 | # Decode the data as a `RepoData` type
23 | repo_data = msgspec.json.decode(data, type=RepoData)
24 |
25 | # Sort packages by `size`, and return the top 10
26 | return sorted(
27 | ((p.size, p.name) for p in repo_data.packages.values()), reverse=True
28 | )[:10]
29 |
30 |
31 | def query_orjson(data: bytes) -> list[tuple[int, str]]:
32 | repo_data = orjson.loads(data)
33 | return sorted(
34 | ((p["size"], p["name"]) for p in repo_data["packages"].values()), reverse=True
35 | )[:10]
36 |
37 |
38 | def query_json(data: bytes) -> list[tuple[int, str]]:
39 | repo_data = json.loads(data)
40 | return sorted(
41 | ((p["size"], p["name"]) for p in repo_data["packages"].values()), reverse=True
42 | )[:10]
43 |
44 |
45 | def query_ujson(data: bytes) -> list[tuple[int, str]]:
46 | repo_data = ujson.loads(data)
47 | return sorted(
48 | ((p["size"], p["name"]) for p in repo_data["packages"].values()), reverse=True
49 | )[:10]
50 |
51 |
52 | def query_simdjson(data: bytes) -> list[tuple[int, str]]:
53 | repo_data = simdjson.Parser().parse(data)
54 | return sorted(
55 | ((p["size"], p["name"]) for p in repo_data["packages"].values()), reverse=True
56 | )[:10]
57 |
58 |
59 | # Download the current_repodata.json file
60 | resp = requests.get(
61 | "https://conda.anaconda.org/conda-forge/noarch/current_repodata.json"
62 | )
63 | resp.raise_for_status()
64 | data = resp.content
65 |
66 | libraries = [
67 | ("json", query_json),
68 | ("ujson", query_ujson),
69 | ("orjson", query_orjson),
70 | ("simdjson", query_simdjson),
71 | ("msgspec", query_msgspec),
72 | ]
73 |
74 | # Run the query with each JSON library, timing the execution
75 | for lib, func in libraries:
76 | start = time.perf_counter()
77 | func(data)
78 | stop = time.perf_counter()
79 | print(f"{lib}: {(stop - start) * 1000:.2f} ms")
80 |
--------------------------------------------------------------------------------
/examples/edgedb/dbschema/default.esdl:
--------------------------------------------------------------------------------
1 | module default {
2 | type Person {
3 | required name: str;
4 | }
5 |
6 | type Movie {
7 | required title: str;
8 | multi actors: Person;
9 | }
10 | };
11 |
--------------------------------------------------------------------------------
/examples/edgedb/dbschema/migrations/00001.edgeql:
--------------------------------------------------------------------------------
1 | CREATE MIGRATION m1vegpxb3odf7j6rsioor2j5zcassvioypuixdcfujquycuufa3k2a
2 | ONTO initial
3 | {
4 | CREATE TYPE default::Person {
5 | CREATE REQUIRED PROPERTY name: std::str;
6 | };
7 | CREATE TYPE default::Movie {
8 | CREATE MULTI LINK actors: default::Person;
9 | CREATE REQUIRED PROPERTY title: std::str;
10 | };
11 | };
12 |
--------------------------------------------------------------------------------
/examples/edgedb/edgedb.toml:
--------------------------------------------------------------------------------
1 | [edgedb]
2 | server-version = "3.2"
3 |
--------------------------------------------------------------------------------
/examples/edgedb/insert_data.edgeql:
--------------------------------------------------------------------------------
1 | INSERT Movie {
2 | title := "Dune",
3 | actors := {
4 | (INSERT Person { name := "Timothée Chalamet" }),
5 | (INSERT Person { name := "Zendaya" })
6 | }
7 | };
8 |
--------------------------------------------------------------------------------
/examples/geojson/msgspec_geojson.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | import msgspec
4 |
5 | Position = tuple[float, float]
6 |
7 |
8 | # Define the 7 standard Geometry types.
9 | # All types set `tag=True`, meaning that they'll make use of a `type` field to
10 | # disambiguate between types when decoding.
11 | class Point(msgspec.Struct, tag=True):
12 | coordinates: Position
13 |
14 |
15 | class MultiPoint(msgspec.Struct, tag=True):
16 | coordinates: list[Position]
17 |
18 |
19 | class LineString(msgspec.Struct, tag=True):
20 | coordinates: list[Position]
21 |
22 |
23 | class MultiLineString(msgspec.Struct, tag=True):
24 | coordinates: list[list[Position]]
25 |
26 |
27 | class Polygon(msgspec.Struct, tag=True):
28 | coordinates: list[list[Position]]
29 |
30 |
31 | class MultiPolygon(msgspec.Struct, tag=True):
32 | coordinates: list[list[list[Position]]]
33 |
34 |
35 | class GeometryCollection(msgspec.Struct, tag=True):
36 | geometries: list[Geometry]
37 |
38 |
39 | Geometry = (
40 | Point
41 | | MultiPoint
42 | | LineString
43 | | MultiLineString
44 | | Polygon
45 | | MultiPolygon
46 | | GeometryCollection
47 | )
48 |
49 |
50 | # Define the two Feature types
51 | class Feature(msgspec.Struct, tag=True):
52 | geometry: Geometry | None = None
53 | properties: dict | None = None
54 | id: str | int | None = None
55 |
56 |
57 | class FeatureCollection(msgspec.Struct, tag=True):
58 | features: list[Feature]
59 |
60 |
61 | # A union of all 9 GeoJSON types
62 | GeoJSON = Geometry | Feature | FeatureCollection
63 |
64 |
65 | # Create a decoder and an encoder to use for decoding & encoding GeoJSON types
66 | loads = msgspec.json.Decoder(GeoJSON).decode
67 | dumps = msgspec.json.Encoder().encode
68 |
--------------------------------------------------------------------------------
/examples/pyproject-toml/pyproject.py:
--------------------------------------------------------------------------------
1 | from typing import Any
2 |
3 | import msgspec
4 |
5 |
6 | class Base(
7 | msgspec.Struct,
8 | omit_defaults=True,
9 | forbid_unknown_fields=True,
10 | rename="kebab",
11 | ):
12 | """A base class holding some common settings.
13 |
14 | - We set ``omit_defaults = True`` to omit any fields containing only their
15 | default value from the output when encoding.
16 | - We set ``forbid_unknown_fields = True`` to error nicely if an unknown
17 | field is present in the input TOML. This helps catch typo errors early,
18 | and is also required per PEP 621.
19 | - We set ``rename = "kebab"`` to rename all fields to use kebab case when
20 | encoding/decoding, as this is the convention used in pyproject.toml. For
21 | example, this will rename ``requires_python`` to ``requires-python``.
22 | """
23 |
24 | pass
25 |
26 |
27 | class BuildSystem(Base):
28 | requires: list[str] = []
29 | build_backend: str | None = None
30 | backend_path: list[str] = []
31 |
32 |
33 | class Readme(Base):
34 | file: str | None = None
35 | text: str | None = None
36 | content_type: str | None = None
37 |
38 |
39 | class License(Base):
40 | file: str | None = None
41 | text: str | None = None
42 |
43 |
44 | class Contributor(Base):
45 | name: str | None = None
46 | email: str | None = None
47 |
48 |
49 | class Project(Base):
50 | name: str | None = None
51 | version: str | None = None
52 | description: str | None = None
53 | readme: str | Readme | None = None
54 | license: str | License | None = None
55 | authors: list[Contributor] = []
56 | maintainers: list[Contributor] = []
57 | keywords: list[str] = []
58 | classifiers: list[str] = []
59 | urls: dict[str, str] = {}
60 | requires_python: str | None = None
61 | dependencies: list[str] = []
62 | optional_dependencies: dict[str, list[str]] = {}
63 | scripts: dict[str, str] = {}
64 | gui_scripts: dict[str, str] = {}
65 | entry_points: dict[str, dict[str, str]] = {}
66 | dynamic: list[str] = []
67 |
68 |
69 | class PyProject(Base):
70 | build_system: BuildSystem | None = None
71 | project: Project | None = None
72 | tool: dict[str, dict[str, Any]] = {}
73 |
74 |
75 | def decode(data: bytes | str) -> PyProject:
76 | """Decode a ``pyproject.toml`` file from TOML"""
77 | return msgspec.toml.decode(data, type=PyProject)
78 |
79 |
80 | def encode(msg: PyProject) -> bytes:
81 | """Encode a ``PyProject`` object to TOML"""
82 | return msgspec.toml.encode(msg)
83 |
--------------------------------------------------------------------------------
/msgspec/__init__.py:
--------------------------------------------------------------------------------
1 | from ._core import (
2 | DecodeError,
3 | EncodeError,
4 | Field as _Field,
5 | Meta,
6 | MsgspecError,
7 | Raw,
8 | Struct,
9 | UnsetType,
10 | UNSET,
11 | NODEFAULT,
12 | ValidationError,
13 | defstruct,
14 | convert,
15 | to_builtins,
16 | )
17 |
18 |
19 | def field(*, default=NODEFAULT, default_factory=NODEFAULT, name=None):
20 | return _Field(default=default, default_factory=default_factory, name=name)
21 |
22 |
23 | field.__doc__ = _Field.__doc__
24 |
25 |
26 | from . import msgpack
27 | from . import json
28 | from . import yaml
29 | from . import toml
30 | from . import inspect
31 | from . import structs
32 | from ._version import get_versions
33 |
34 | __version__ = get_versions()["version"]
35 | del get_versions
36 |
--------------------------------------------------------------------------------
/msgspec/__init__.pyi:
--------------------------------------------------------------------------------
1 | import enum
2 | from typing import (
3 | Any,
4 | Callable,
5 | ClassVar,
6 | Dict,
7 | Final,
8 | Iterable,
9 | Literal,
10 | Mapping,
11 | Optional,
12 | Tuple,
13 | Type,
14 | TypeVar,
15 | Union,
16 | overload,
17 | )
18 |
19 | from typing_extensions import dataclass_transform, Buffer
20 |
21 | from . import inspect, json, msgpack, structs, toml, yaml
22 |
23 | T = TypeVar("T")
24 |
25 | class UnsetType(enum.Enum):
26 | UNSET = "UNSET"
27 |
28 | UNSET = UnsetType.UNSET
29 |
30 | class _NoDefault(enum.Enum):
31 | NODEFAULT = "NODEFAULT"
32 |
33 | NODEFAULT = _NoDefault.NODEFAULT
34 |
35 | @overload
36 | def field(*, default: T, name: Optional[str] = None) -> T: ...
37 | @overload
38 | def field(*, default_factory: Callable[[], T], name: Optional[str] = None) -> T: ...
39 | @overload
40 | def field(*, name: Optional[str] = None) -> Any: ...
41 | @dataclass_transform(field_specifiers=(field,))
42 | class Struct:
43 | __struct_fields__: ClassVar[Tuple[str, ...]]
44 | __struct_config__: ClassVar[structs.StructConfig]
45 | __match_args__: ClassVar[Tuple[str, ...]]
46 | # A default __init__ so that Structs with unknown field types (say
47 | # constructed by `defstruct`) won't error on every call to `__init__`
48 | def __init__(self, *args: Any, **kwargs: Any) -> None: ...
49 | def __init_subclass__(
50 | cls,
51 | tag: Union[None, bool, str, int, Callable[[str], Union[str, int]]] = None,
52 | tag_field: Union[None, str] = None,
53 | rename: Union[
54 | None,
55 | Literal["lower", "upper", "camel", "pascal", "kebab"],
56 | Callable[[str], Optional[str]],
57 | Mapping[str, str],
58 | ] = None,
59 | omit_defaults: bool = False,
60 | forbid_unknown_fields: bool = False,
61 | frozen: bool = False,
62 | eq: bool = True,
63 | order: bool = False,
64 | kw_only: bool = False,
65 | repr_omit_defaults: bool = False,
66 | array_like: bool = False,
67 | gc: bool = True,
68 | weakref: bool = False,
69 | dict: bool = False,
70 | cache_hash: bool = False,
71 | ) -> None: ...
72 | def __rich_repr__(
73 | self,
74 | ) -> Iterable[Union[Any, Tuple[Any], Tuple[str, Any], Tuple[str, Any, Any]]]: ...
75 |
76 | def defstruct(
77 | name: str,
78 | fields: Iterable[Union[str, Tuple[str, type], Tuple[str, type, Any]]],
79 | *,
80 | bases: Optional[Tuple[Type[Struct], ...]] = None,
81 | module: Optional[str] = None,
82 | namespace: Optional[Dict[str, Any]] = None,
83 | tag: Union[None, bool, str, int, Callable[[str], Union[str, int]]] = None,
84 | tag_field: Union[None, str] = None,
85 | rename: Union[
86 | None,
87 | Literal["lower", "upper", "camel", "pascal", "kebab"],
88 | Callable[[str], Optional[str]],
89 | Mapping[str, str],
90 | ] = None,
91 | omit_defaults: bool = False,
92 | forbid_unknown_fields: bool = False,
93 | frozen: bool = False,
94 | eq: bool = True,
95 | order: bool = False,
96 | kw_only: bool = False,
97 | repr_omit_defaults: bool = False,
98 | array_like: bool = False,
99 | gc: bool = True,
100 | weakref: bool = False,
101 | dict: bool = False,
102 | cache_hash: bool = False,
103 | ) -> Type[Struct]: ...
104 |
105 | # Lie and say `Raw` is a subclass of `bytes`, so mypy will accept it in most
106 | # places where an object that implements the buffer protocol is valid
107 | class Raw(bytes):
108 | @overload
109 | def __new__(cls) -> "Raw": ...
110 | @overload
111 | def __new__(cls, msg: Union[Buffer, str]) -> "Raw": ...
112 | def copy(self) -> "Raw": ...
113 |
114 | class Meta:
115 | def __init__(
116 | self,
117 | *,
118 | gt: Union[int, float, None] = None,
119 | ge: Union[int, float, None] = None,
120 | lt: Union[int, float, None] = None,
121 | le: Union[int, float, None] = None,
122 | multiple_of: Union[int, float, None] = None,
123 | pattern: Union[str, None] = None,
124 | min_length: Union[int, None] = None,
125 | max_length: Union[int, None] = None,
126 | tz: Union[bool, None] = None,
127 | title: Union[str, None] = None,
128 | description: Union[str, None] = None,
129 | examples: Union[list, None] = None,
130 | extra_json_schema: Union[dict, None] = None,
131 | extra: Union[dict, None] = None,
132 | ): ...
133 | gt: Final[Union[int, float, None]]
134 | ge: Final[Union[int, float, None]]
135 | lt: Final[Union[int, float, None]]
136 | le: Final[Union[int, float, None]]
137 | multiple_of: Final[Union[int, float, None]]
138 | pattern: Final[Union[str, None]]
139 | min_length: Final[Union[int, None]]
140 | max_length: Final[Union[int, None]]
141 | tz: Final[Union[int, None]]
142 | title: Final[Union[str, None]]
143 | description: Final[Union[str, None]]
144 | examples: Final[Union[list, None]]
145 | extra_json_schema: Final[Union[dict, None]]
146 | extra: Final[Union[dict, None]]
147 | def __rich_repr__(self) -> Iterable[Tuple[str, Any]]: ...
148 |
149 | def to_builtins(
150 | obj: Any,
151 | *,
152 | str_keys: bool = False,
153 | builtin_types: Union[Iterable[type], None] = None,
154 | enc_hook: Optional[Callable[[Any], Any]] = None,
155 | order: Literal[None, "deterministic", "sorted"] = None,
156 | ) -> Any: ...
157 | @overload
158 | def convert(
159 | obj: Any,
160 | type: Type[T],
161 | *,
162 | strict: bool = True,
163 | from_attributes: bool = False,
164 | dec_hook: Optional[Callable[[type, Any], Any]] = None,
165 | builtin_types: Union[Iterable[type], None] = None,
166 | str_keys: bool = False,
167 | ) -> T: ...
168 | @overload
169 | def convert(
170 | obj: Any,
171 | type: Any,
172 | *,
173 | strict: bool = True,
174 | from_attributes: bool = False,
175 | dec_hook: Optional[Callable[[type, Any], Any]] = None,
176 | builtin_types: Union[Iterable[type], None] = None,
177 | str_keys: bool = False,
178 | ) -> Any: ...
179 |
180 | class MsgspecError(Exception): ...
181 | class EncodeError(MsgspecError): ...
182 | class DecodeError(MsgspecError): ...
183 | class ValidationError(DecodeError): ...
184 |
185 | __version__: str
186 |
--------------------------------------------------------------------------------
/msgspec/common.h:
--------------------------------------------------------------------------------
1 | #ifndef MS_COMMON_H
2 | #define MS_COMMON_H
3 |
4 | #ifdef __GNUC__
5 | #define MS_LIKELY(pred) __builtin_expect(!!(pred), 1)
6 | #define MS_UNLIKELY(pred) __builtin_expect(!!(pred), 0)
7 | #else
8 | #define MS_LIKELY(pred) (pred)
9 | #define MS_UNLIKELY(pred) (pred)
10 | #endif
11 |
12 | #ifdef __GNUC__
13 | #define MS_INLINE __attribute__((always_inline)) inline
14 | #define MS_NOINLINE __attribute__((noinline))
15 | #elif defined(_MSC_VER)
16 | #define MS_INLINE __forceinline
17 | #define MS_NOINLINE __declspec(noinline)
18 | #else
19 | #define MS_INLINE inline
20 | #define MS_NOINLINE
21 | #endif
22 |
23 | #endif
24 |
--------------------------------------------------------------------------------
/msgspec/json.py:
--------------------------------------------------------------------------------
1 | from ._core import (
2 | JSONDecoder as Decoder,
3 | JSONEncoder as Encoder,
4 | json_decode as decode,
5 | json_encode as encode,
6 | json_format as format,
7 | )
8 | from ._json_schema import schema, schema_components
9 |
--------------------------------------------------------------------------------
/msgspec/json.pyi:
--------------------------------------------------------------------------------
1 | from collections.abc import Iterable
2 | from typing import (
3 | Any,
4 | Callable,
5 | Dict,
6 | Generic,
7 | Iterable,
8 | Literal,
9 | Optional,
10 | Tuple,
11 | Type,
12 | TypeVar,
13 | Union,
14 | overload,
15 | )
16 |
17 | from typing_extensions import Buffer
18 |
19 | T = TypeVar("T")
20 |
21 | enc_hook_sig = Optional[Callable[[Any], Any]]
22 | dec_hook_sig = Optional[Callable[[type, Any], Any]]
23 | float_hook_sig = Optional[Callable[[str], Any]]
24 | schema_hook_sig = Optional[Callable[[type], dict[str, Any]]]
25 |
26 | class Encoder:
27 | enc_hook: enc_hook_sig
28 | decimal_format: Literal["string", "number"]
29 | uuid_format: Literal["canonical", "hex"]
30 | order: Literal[None, "deterministic", "sorted"]
31 |
32 | def __init__(
33 | self,
34 | *,
35 | enc_hook: enc_hook_sig = None,
36 | decimal_format: Literal["string", "number"] = "string",
37 | uuid_format: Literal["canonical", "hex"] = "canonical",
38 | order: Literal[None, "deterministic", "sorted"] = None,
39 | ): ...
40 | def encode(self, obj: Any, /) -> bytes: ...
41 | def encode_lines(self, items: Iterable, /) -> bytes: ...
42 | def encode_into(
43 | self, obj: Any, buffer: bytearray, offset: Optional[int] = 0, /
44 | ) -> None: ...
45 |
46 | class Decoder(Generic[T]):
47 | type: Type[T]
48 | strict: bool
49 | dec_hook: dec_hook_sig
50 | float_hook: float_hook_sig
51 |
52 | @overload
53 | def __init__(
54 | self: Decoder[Any],
55 | *,
56 | strict: bool = True,
57 | dec_hook: dec_hook_sig = None,
58 | float_hook: float_hook_sig = None,
59 | ) -> None: ...
60 | @overload
61 | def __init__(
62 | self: Decoder[T],
63 | type: Type[T] = ...,
64 | *,
65 | strict: bool = True,
66 | dec_hook: dec_hook_sig = None,
67 | float_hook: float_hook_sig = None,
68 | ) -> None: ...
69 | @overload
70 | def __init__(
71 | self: Decoder[Any],
72 | type: Any = ...,
73 | *,
74 | strict: bool = True,
75 | dec_hook: dec_hook_sig = None,
76 | float_hook: float_hook_sig = None,
77 | ) -> None: ...
78 | def decode(self, buf: Union[Buffer, str], /) -> T: ...
79 | def decode_lines(self, buf: Union[Buffer, str], /) -> list[T]: ...
80 |
81 | @overload
82 | def decode(
83 | buf: Union[Buffer, str],
84 | /,
85 | *,
86 | strict: bool = True,
87 | dec_hook: dec_hook_sig = None,
88 | ) -> Any: ...
89 | @overload
90 | def decode(
91 | buf: Union[Buffer, str],
92 | /,
93 | *,
94 | type: Type[T] = ...,
95 | strict: bool = True,
96 | dec_hook: dec_hook_sig = None,
97 | ) -> T: ...
98 | @overload
99 | def decode(
100 | buf: Union[Buffer, str],
101 | /,
102 | *,
103 | type: Any = ...,
104 | strict: bool = True,
105 | dec_hook: dec_hook_sig = None,
106 | ) -> Any: ...
107 | def encode(obj: Any, /, *, enc_hook: enc_hook_sig = None, order: Literal[None, "deterministic", "sorted"] = None) -> bytes: ...
108 | def schema(type: Any, *, schema_hook: schema_hook_sig = None) -> Dict[str, Any]: ...
109 | def schema_components(
110 | types: Iterable[Any],
111 | *,
112 | schema_hook: schema_hook_sig = None,
113 | ref_template: str = "#/$defs/{name}"
114 | ) -> Tuple[Tuple[Dict[str, Any], ...], Dict[str, Any]]: ...
115 | @overload
116 | def format(buf: str, /, *, indent: int = 2) -> str: ...
117 | @overload
118 | def format(buf: Buffer, /, *, indent: int = 2) -> bytes: ...
119 |
--------------------------------------------------------------------------------
/msgspec/msgpack.py:
--------------------------------------------------------------------------------
1 | from ._core import (
2 | Ext,
3 | MsgpackDecoder as Decoder,
4 | MsgpackEncoder as Encoder,
5 | msgpack_decode as decode,
6 | msgpack_encode as encode,
7 | )
8 |
--------------------------------------------------------------------------------
/msgspec/msgpack.pyi:
--------------------------------------------------------------------------------
1 | from typing import (
2 | Any,
3 | Callable,
4 | Generic,
5 | Literal,
6 | Optional,
7 | Type,
8 | TypeVar,
9 | Union,
10 | overload,
11 | )
12 |
13 | from typing_extensions import Buffer
14 |
15 |
16 | T = TypeVar("T")
17 |
18 | enc_hook_sig = Optional[Callable[[Any], Any]]
19 | ext_hook_sig = Optional[Callable[[int, memoryview], Any]]
20 | dec_hook_sig = Optional[Callable[[type, Any], Any]]
21 |
22 | class Ext:
23 | code: int
24 | data: Union[bytes, bytearray, memoryview]
25 | def __init__(
26 | self, code: int, data: Union[bytes, bytearray, memoryview]
27 | ) -> None: ...
28 |
29 | class Decoder(Generic[T]):
30 | type: Type[T]
31 | strict: bool
32 | dec_hook: dec_hook_sig
33 | ext_hook: ext_hook_sig
34 | @overload
35 | def __init__(
36 | self: Decoder[Any],
37 | *,
38 | strict: bool = True,
39 | dec_hook: dec_hook_sig = None,
40 | ext_hook: ext_hook_sig = None,
41 | ) -> None: ...
42 | @overload
43 | def __init__(
44 | self: Decoder[T],
45 | type: Type[T] = ...,
46 | *,
47 | strict: bool = True,
48 | dec_hook: dec_hook_sig = None,
49 | ext_hook: ext_hook_sig = None,
50 | ) -> None: ...
51 | @overload
52 | def __init__(
53 | self: Decoder[Any],
54 | type: Any = ...,
55 | *,
56 | strict: bool = True,
57 | dec_hook: dec_hook_sig = None,
58 | ext_hook: ext_hook_sig = None,
59 | ) -> None: ...
60 | def decode(self, buf: Buffer, /) -> T: ...
61 |
62 | class Encoder:
63 | enc_hook: enc_hook_sig
64 | decimal_format: Literal["string", "number"]
65 | uuid_format: Literal["canonical", "hex", "bytes"]
66 | order: Literal[None, "deterministic", "sorted"]
67 | def __init__(
68 | self,
69 | *,
70 | enc_hook: enc_hook_sig = None,
71 | decimal_format: Literal["string", "number"] = "string",
72 | uuid_format: Literal["canonical", "hex", "bytes"] = "canonical",
73 | order: Literal[None, "deterministic", "sorted"] = None,
74 | ): ...
75 | def encode(self, obj: Any, /) -> bytes: ...
76 | def encode_into(
77 | self, obj: Any, buffer: bytearray, offset: Optional[int] = 0, /
78 | ) -> None: ...
79 |
80 | @overload
81 | def decode(
82 | buf: Buffer,
83 | /,
84 | *,
85 | strict: bool = True,
86 | dec_hook: dec_hook_sig = None,
87 | ext_hook: ext_hook_sig = None,
88 | ) -> Any: ...
89 | @overload
90 | def decode(
91 | buf: Buffer,
92 | /,
93 | *,
94 | type: Type[T] = ...,
95 | strict: bool = True,
96 | dec_hook: dec_hook_sig = None,
97 | ext_hook: ext_hook_sig = None,
98 | ) -> T: ...
99 | @overload
100 | def decode(
101 | buf: Buffer,
102 | /,
103 | *,
104 | type: Any = ...,
105 | strict: bool = True,
106 | dec_hook: dec_hook_sig = None,
107 | ext_hook: ext_hook_sig = None,
108 | ) -> Any: ...
109 | def encode(obj: Any, /, *, enc_hook: enc_hook_sig = None, order: Literal[None, "deterministic", "sorted"] = None) -> bytes: ...
110 |
--------------------------------------------------------------------------------
/msgspec/py.typed:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jcrist/msgspec/bc60e96772c5e8a3babff967d86a9e7dfcdbfb1b/msgspec/py.typed
--------------------------------------------------------------------------------
/msgspec/structs.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | from typing import Any
4 |
5 | from . import NODEFAULT, Struct, field
6 | from ._core import ( # noqa
7 | Factory as _Factory,
8 | StructConfig,
9 | asdict,
10 | astuple,
11 | replace,
12 | force_setattr,
13 | )
14 | from ._utils import get_class_annotations as _get_class_annotations
15 |
16 | __all__ = (
17 | "FieldInfo",
18 | "StructConfig",
19 | "asdict",
20 | "astuple",
21 | "fields",
22 | "force_setattr",
23 | "replace",
24 | )
25 |
26 |
27 | def __dir__():
28 | return __all__
29 |
30 |
31 | class FieldInfo(Struct):
32 | """A record describing a field in a struct type.
33 |
34 | Parameters
35 | ----------
36 | name: str
37 | The field name as seen by Python code (e.g. ``field_one``).
38 | encode_name: str
39 | The name used when encoding/decoding the field. This may differ if
40 | the field is renamed (e.g. ``fieldOne``).
41 | type: Any
42 | The full field type annotation.
43 | default: Any, optional
44 | A default value for the field. Will be `NODEFAULT` if no default value
45 | is set.
46 | default_factory: Any, optional
47 | A callable that creates a default value for the field. Will be
48 | `NODEFAULT` if no ``default_factory`` is set.
49 | """
50 |
51 | name: str
52 | encode_name: str
53 | type: Any
54 | default: Any = field(default_factory=lambda: NODEFAULT)
55 | default_factory: Any = field(default_factory=lambda: NODEFAULT)
56 |
57 | @property
58 | def required(self) -> bool:
59 | """A helper for checking whether a field is required"""
60 | return self.default is NODEFAULT and self.default_factory is NODEFAULT
61 |
62 |
63 | def fields(type_or_instance: Struct | type[Struct]) -> tuple[FieldInfo]:
64 | """Get information about the fields in a Struct.
65 |
66 | Parameters
67 | ----------
68 | type_or_instance:
69 | A struct type or instance.
70 |
71 | Returns
72 | -------
73 | tuple[FieldInfo]
74 | """
75 | if isinstance(type_or_instance, Struct):
76 | annotated_cls = cls = type(type_or_instance)
77 | else:
78 | annotated_cls = type_or_instance
79 | cls = getattr(type_or_instance, "__origin__", type_or_instance)
80 | if not (isinstance(cls, type) and issubclass(cls, Struct)):
81 | raise TypeError("Must be called with a struct type or instance")
82 |
83 | hints = _get_class_annotations(annotated_cls)
84 | npos = len(cls.__struct_fields__) - len(cls.__struct_defaults__)
85 | fields = []
86 | for name, encode_name, default_obj in zip(
87 | cls.__struct_fields__,
88 | cls.__struct_encode_fields__,
89 | (NODEFAULT,) * npos + cls.__struct_defaults__,
90 | ):
91 | default = default_factory = NODEFAULT
92 | if isinstance(default_obj, _Factory):
93 | default_factory = default_obj.factory
94 | elif default_obj is not NODEFAULT:
95 | default = default_obj
96 |
97 | field = FieldInfo(
98 | name=name,
99 | encode_name=encode_name,
100 | type=hints[name],
101 | default=default,
102 | default_factory=default_factory,
103 | )
104 | fields.append(field)
105 |
106 | return tuple(fields)
107 |
--------------------------------------------------------------------------------
/msgspec/structs.pyi:
--------------------------------------------------------------------------------
1 | from typing import Any, TypeVar, Union
2 |
3 | from . import NODEFAULT, Struct
4 |
5 | S = TypeVar("S", bound=Struct)
6 |
7 | def replace(struct: S, /, **changes: Any) -> S: ...
8 | def asdict(struct: Struct) -> dict[str, Any]: ...
9 | def astuple(struct: Struct) -> tuple[Any, ...]: ...
10 | def force_setattr(struct: Struct, name: str, value: Any) -> None: ...
11 |
12 | class StructConfig:
13 | frozen: bool
14 | eq: bool
15 | order: bool
16 | array_like: bool
17 | gc: bool
18 | repr_omit_defaults: bool
19 | omit_defaults: bool
20 | forbid_unknown_fields: bool
21 | weakref: bool
22 | dict: bool
23 | cache_hash: bool
24 | tag: Union[str, int, None]
25 | tag_field: Union[str, None]
26 |
27 | class FieldInfo(Struct):
28 | name: str
29 | encode_name: str
30 | type: Any
31 | default: Any = NODEFAULT
32 | default_factory: Any = NODEFAULT
33 |
34 | @property
35 | def required(self) -> bool: ...
36 |
37 | def fields(type_or_instance: Struct | type[Struct]) -> tuple[FieldInfo]: ...
38 |
--------------------------------------------------------------------------------
/msgspec/toml.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | import datetime as _datetime
4 | from typing import TYPE_CHECKING, overload, TypeVar, Any
5 |
6 | from . import (
7 | DecodeError as _DecodeError,
8 | convert as _convert,
9 | to_builtins as _to_builtins,
10 | )
11 |
12 | if TYPE_CHECKING:
13 | from typing import Callable, Optional, Type, Union, Literal
14 | from typing_extensions import Buffer
15 |
16 |
17 | __all__ = ("encode", "decode")
18 |
19 |
20 | def __dir__():
21 | return __all__
22 |
23 |
24 | def _import_tomllib():
25 | try:
26 | import tomllib # type: ignore
27 |
28 | return tomllib
29 | except ImportError:
30 | pass
31 |
32 | try:
33 | import tomli # type: ignore
34 |
35 | return tomli
36 | except ImportError:
37 | raise ImportError(
38 | "`msgspec.toml.decode` requires `tomli` be installed.\n\n"
39 | "Please either `pip` or `conda` install it as follows:\n\n"
40 | " $ python -m pip install tomli # using pip\n"
41 | " $ conda install tomli # or using conda"
42 | ) from None
43 |
44 |
45 | def _import_tomli_w():
46 | try:
47 | import tomli_w # type: ignore
48 |
49 | return tomli_w
50 | except ImportError:
51 | raise ImportError(
52 | "`msgspec.toml.encode` requires `tomli_w` be installed.\n\n"
53 | "Please either `pip` or `conda` install it as follows:\n\n"
54 | " $ python -m pip install tomli_w # using pip\n"
55 | " $ conda install tomli_w # or using conda"
56 | ) from None
57 |
58 |
59 | def encode(
60 | obj: Any,
61 | *,
62 | enc_hook: Optional[Callable[[Any], Any]] = None,
63 | order: Literal[None, "deterministic", "sorted"] = None,
64 | ) -> bytes:
65 | """Serialize an object as TOML.
66 |
67 | Parameters
68 | ----------
69 | obj : Any
70 | The object to serialize.
71 | enc_hook : callable, optional
72 | A callable to call for objects that aren't supported msgspec types.
73 | Takes the unsupported object and should return a supported object, or
74 | raise a ``NotImplementedError`` if unsupported.
75 | order : {None, 'deterministic', 'sorted'}, optional
76 | The ordering to use when encoding unordered compound types.
77 |
78 | - ``None``: All objects are encoded in the most efficient manner
79 | matching their in-memory representations. The default.
80 | - `'deterministic'`: Unordered collections (sets, dicts) are sorted to
81 | ensure a consistent output between runs. Useful when
82 | comparison/hashing of the encoded binary output is necessary.
83 | - `'sorted'`: Like `'deterministic'`, but *all* object-like types
84 | (structs, dataclasses, ...) are also sorted by field name before
85 | encoding. This is slower than `'deterministic'`, but may produce more
86 | human-readable output.
87 |
88 | Returns
89 | -------
90 | data : bytes
91 | The serialized object.
92 |
93 | See Also
94 | --------
95 | decode
96 | """
97 | toml = _import_tomli_w()
98 | msg = _to_builtins(
99 | obj,
100 | builtin_types=(_datetime.datetime, _datetime.date, _datetime.time),
101 | str_keys=True,
102 | enc_hook=enc_hook,
103 | order=order,
104 | )
105 | return toml.dumps(msg).encode("utf-8")
106 |
107 |
108 | T = TypeVar("T")
109 |
110 |
111 | @overload
112 | def decode(
113 | buf: Union[Buffer, str],
114 | *,
115 | strict: bool = True,
116 | dec_hook: Optional[Callable[[type, Any], Any]] = None,
117 | ) -> Any:
118 | pass
119 |
120 |
121 | @overload
122 | def decode(
123 | buf: Union[Buffer, str],
124 | *,
125 | type: Type[T] = ...,
126 | strict: bool = True,
127 | dec_hook: Optional[Callable[[type, Any], Any]] = None,
128 | ) -> T:
129 | pass
130 |
131 |
132 | @overload
133 | def decode(
134 | buf: Union[Buffer, str],
135 | *,
136 | type: Any = ...,
137 | strict: bool = True,
138 | dec_hook: Optional[Callable[[type, Any], Any]] = None,
139 | ) -> Any:
140 | pass
141 |
142 |
143 | def decode(buf, *, type=Any, strict=True, dec_hook=None):
144 | """Deserialize an object from TOML.
145 |
146 | Parameters
147 | ----------
148 | buf : bytes-like or str
149 | The message to decode.
150 | type : type, optional
151 | A Python type (in type annotation form) to decode the object as. If
152 | provided, the message will be type checked and decoded as the specified
153 | type. Defaults to `Any`, in which case the message will be decoded
154 | using the default TOML types.
155 | strict : bool, optional
156 | Whether type coercion rules should be strict. Setting to False enables
157 | a wider set of coercion rules from string to non-string types for all
158 | values. Default is True.
159 | dec_hook : callable, optional
160 | An optional callback for handling decoding custom types. Should have
161 | the signature ``dec_hook(type: Type, obj: Any) -> Any``, where ``type``
162 | is the expected message type, and ``obj`` is the decoded representation
163 | composed of only basic TOML types. This hook should transform ``obj``
164 | into type ``type``, or raise a ``NotImplementedError`` if unsupported.
165 |
166 | Returns
167 | -------
168 | obj : Any
169 | The deserialized object.
170 |
171 | See Also
172 | --------
173 | encode
174 | """
175 | toml = _import_tomllib()
176 | if isinstance(buf, str):
177 | str_buf = buf
178 | elif isinstance(buf, (bytes, bytearray)):
179 | str_buf = buf.decode("utf-8")
180 | else:
181 | # call `memoryview` first, since `bytes(1)` is actually valid
182 | str_buf = bytes(memoryview(buf)).decode("utf-8")
183 | try:
184 | obj = toml.loads(str_buf)
185 | except toml.TOMLDecodeError as exc:
186 | raise _DecodeError(str(exc)) from None
187 |
188 | if type is Any:
189 | return obj
190 | return _convert(
191 | obj,
192 | type,
193 | builtin_types=(_datetime.datetime, _datetime.date, _datetime.time),
194 | str_keys=True,
195 | strict=strict,
196 | dec_hook=dec_hook,
197 | )
198 |
--------------------------------------------------------------------------------
/msgspec/yaml.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | import datetime as _datetime
4 | from typing import TYPE_CHECKING, overload, TypeVar, Any
5 |
6 | from . import (
7 | DecodeError as _DecodeError,
8 | convert as _convert,
9 | to_builtins as _to_builtins,
10 | )
11 |
12 | if TYPE_CHECKING:
13 | from typing import Callable, Optional, Type, Union, Literal
14 | from typing_extensions import Buffer
15 |
16 |
17 | __all__ = ("encode", "decode")
18 |
19 |
20 | def __dir__():
21 | return __all__
22 |
23 |
24 | def _import_pyyaml(name):
25 | try:
26 | import yaml # type: ignore
27 | except ImportError:
28 | raise ImportError(
29 | f"`msgspec.yaml.{name}` requires PyYAML be installed.\n\n"
30 | "Please either `pip` or `conda` install it as follows:\n\n"
31 | " $ python -m pip install pyyaml # using pip\n"
32 | " $ conda install pyyaml # or using conda"
33 | ) from None
34 | else:
35 | return yaml
36 |
37 |
38 | def encode(
39 | obj: Any,
40 | *,
41 | enc_hook: Optional[Callable[[Any], Any]] = None,
42 | order: Literal[None, "deterministic", "sorted"] = None,
43 | ) -> bytes:
44 | """Serialize an object as YAML.
45 |
46 | Parameters
47 | ----------
48 | obj : Any
49 | The object to serialize.
50 | enc_hook : callable, optional
51 | A callable to call for objects that aren't supported msgspec types.
52 | Takes the unsupported object and should return a supported object, or
53 | raise a ``NotImplementedError`` if unsupported.
54 | order : {None, 'deterministic', 'sorted'}, optional
55 | The ordering to use when encoding unordered compound types.
56 |
57 | - ``None``: All objects are encoded in the most efficient manner
58 | matching their in-memory representations. The default.
59 | - `'deterministic'`: Unordered collections (sets, dicts) are sorted to
60 | ensure a consistent output between runs. Useful when
61 | comparison/hashing of the encoded binary output is necessary.
62 | - `'sorted'`: Like `'deterministic'`, but *all* object-like types
63 | (structs, dataclasses, ...) are also sorted by field name before
64 | encoding. This is slower than `'deterministic'`, but may produce more
65 | human-readable output.
66 |
67 | Returns
68 | -------
69 | data : bytes
70 | The serialized object.
71 |
72 | Notes
73 | -----
74 | This function requires that the third-party `PyYAML library
75 | `_ is installed.
76 |
77 | See Also
78 | --------
79 | decode
80 | """
81 | yaml = _import_pyyaml("encode")
82 | # Use the C extension if available
83 | Dumper = getattr(yaml, "CSafeDumper", yaml.SafeDumper)
84 |
85 | return yaml.dump_all(
86 | [
87 | _to_builtins(
88 | obj,
89 | builtin_types=(_datetime.datetime, _datetime.date),
90 | enc_hook=enc_hook,
91 | order=order,
92 | )
93 | ],
94 | encoding="utf-8",
95 | Dumper=Dumper,
96 | allow_unicode=True,
97 | sort_keys=False,
98 | )
99 |
100 |
101 | T = TypeVar("T")
102 |
103 |
104 | @overload
105 | def decode(
106 | buf: Union[Buffer, str],
107 | *,
108 | strict: bool = True,
109 | dec_hook: Optional[Callable[[type, Any], Any]] = None,
110 | ) -> Any:
111 | pass
112 |
113 |
114 | @overload
115 | def decode(
116 | buf: Union[bytes, str],
117 | *,
118 | type: Type[T] = ...,
119 | strict: bool = True,
120 | dec_hook: Optional[Callable[[type, Any], Any]] = None,
121 | ) -> T:
122 | pass
123 |
124 |
125 | @overload
126 | def decode(
127 | buf: Union[bytes, str],
128 | *,
129 | type: Any = ...,
130 | strict: bool = True,
131 | dec_hook: Optional[Callable[[type, Any], Any]] = None,
132 | ) -> Any:
133 | pass
134 |
135 |
136 | def decode(buf, *, type=Any, strict=True, dec_hook=None):
137 | """Deserialize an object from YAML.
138 |
139 | Parameters
140 | ----------
141 | buf : bytes-like or str
142 | The message to decode.
143 | type : type, optional
144 | A Python type (in type annotation form) to decode the object as. If
145 | provided, the message will be type checked and decoded as the specified
146 | type. Defaults to `Any`, in which case the message will be decoded
147 | using the default YAML types.
148 | strict : bool, optional
149 | Whether type coercion rules should be strict. Setting to False enables
150 | a wider set of coercion rules from string to non-string types for all
151 | values. Default is True.
152 | dec_hook : callable, optional
153 | An optional callback for handling decoding custom types. Should have
154 | the signature ``dec_hook(type: Type, obj: Any) -> Any``, where ``type``
155 | is the expected message type, and ``obj`` is the decoded representation
156 | composed of only basic YAML types. This hook should transform ``obj``
157 | into type ``type``, or raise a ``NotImplementedError`` if unsupported.
158 |
159 | Returns
160 | -------
161 | obj : Any
162 | The deserialized object.
163 |
164 | Notes
165 | -----
166 | This function requires that the third-party `PyYAML library
167 | `_ is installed.
168 |
169 | See Also
170 | --------
171 | encode
172 | """
173 | yaml = _import_pyyaml("decode")
174 | # Use the C extension if available
175 | Loader = getattr(yaml, "CSafeLoader", yaml.SafeLoader)
176 | if not isinstance(buf, (str, bytes)):
177 | # call `memoryview` first, since `bytes(1)` is actually valid
178 | buf = bytes(memoryview(buf))
179 | try:
180 | obj = yaml.load(buf, Loader)
181 | except yaml.YAMLError as exc:
182 | raise _DecodeError(str(exc)) from None
183 |
184 | if type is Any:
185 | return obj
186 | return _convert(
187 | obj,
188 | type,
189 | builtin_types=(_datetime.datetime, _datetime.date),
190 | strict=strict,
191 | dec_hook=dec_hook,
192 | )
193 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.ruff]
2 | exclude = [
3 | "*.pyi",
4 | "__init__.py",
5 | "_version.py",
6 | "versioneer.py",
7 | "basic_typing_examples.py",
8 | "json.py",
9 | "msgpack.py",
10 | "test_JSONTestSuite.py",
11 | "conf.py",
12 | ]
13 | line-length = 88
14 |
15 | [tool.ruff.lint]
16 | ignore = [
17 | "E721", # Comparing types instead of isinstance
18 | "E741", # Ambiguous variable names
19 | "E501", # Conflicts with ruff format
20 | "W191", # Conflicts with ruff format
21 | ]
22 | select = [
23 | "E", # PEP8 Errors
24 | "F", # Pyflakes
25 | "W", # PEP8 Warnings
26 | ]
27 |
28 | [tool.ruff.lint.isort]
29 | combine-as-imports = true
30 |
--------------------------------------------------------------------------------
/scripts/generate_atof_consts.py:
--------------------------------------------------------------------------------
1 | """This script generates msgspec/atof_consts.h"""
2 |
3 | import math
4 | import os
5 | import textwrap
6 |
7 |
8 | def gen_hpd_tables():
9 | log2log10 = math.log(2) / math.log(10)
10 | shifts = ["0x0000"]
11 | powers = []
12 | for i in range(1, 61):
13 | offset = len(powers)
14 | assert offset <= 0x07FF
15 | num_new_digits = int(log2log10 * float(i)) + 1
16 | assert num_new_digits <= 31
17 | code = (num_new_digits << 11) | offset
18 | p = str(5**i)
19 | powers.extend(p)
20 | shifts.append("0x%04X" % code)
21 |
22 | for i in range(61, 65):
23 | shifts.append("0x%04X" % len(powers))
24 |
25 | n_shifts = len(shifts)
26 | n_powers = len(powers)
27 | assert n_powers <= 0x07FF
28 |
29 | shifts_str = "\n".join(textwrap.wrap(", ".join(shifts), width=78))
30 | powers_str = "\n".join(textwrap.wrap(", ".join(powers), width=78))
31 |
32 | return n_shifts, shifts_str, n_powers, powers_str
33 |
34 |
35 | def gen_row(e):
36 | z = 1 << 2048
37 | if e >= 0:
38 | exp = 10**e
39 | z = z * exp
40 | else:
41 | exp = 10 ** (-e)
42 | z = z // exp
43 |
44 | n = -2048
45 |
46 | while z >= (1 << 128):
47 | z = z >> 1
48 | n += 1
49 |
50 | h = hex(z)[2:]
51 | assert len(h) == 32
52 |
53 | approx_n = ((217706 * e) >> 16) + 1087
54 | biased_n = 1214 + n
55 |
56 | assert approx_n == biased_n
57 |
58 | return "{0x%s, 0x%s}, // 1e%-04d" % (h[16:], h[:16], e)
59 |
60 |
61 | table_rows = [gen_row(e) for e in range(-307, 289)]
62 |
63 | f64_powers = [f"1e{i}" for i in range(23)]
64 |
65 | n_shifts, shifts, n_powers, powers = gen_hpd_tables()
66 |
67 | text = """\
68 | /* DO NOT EDIT - generated by scripts/generate_atof_consts.py */
69 |
70 | #ifndef MSGSPEC_ATOF_CONSTS_H
71 | #define MSGSPEC_ATOF_CONSTS_H
72 |
73 | static const uint64_t ms_atof_powers_of_10[%d][2] = {
74 | %s
75 | };
76 |
77 | static const double ms_atof_f64_powers_of_10[%d] = {
78 | %s
79 | };
80 |
81 | static const uint16_t ms_atof_left_shift[%d] = {
82 | %s
83 | };
84 |
85 | static const uint8_t ms_atof_powers_of_5[%d] = {
86 | %s
87 | };
88 |
89 | #endif
90 | """ % (
91 | len(table_rows),
92 | "\n".join(table_rows),
93 | len(f64_powers),
94 | "\n".join(textwrap.wrap(", ".join(f64_powers), width=78)),
95 | n_shifts,
96 | shifts,
97 | n_powers,
98 | powers,
99 | )
100 |
101 |
102 | if __name__ == "__main__":
103 | repo = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
104 | path = os.path.join(repo, "msgspec", "atof_consts.h")
105 | with open(path, "wb") as f:
106 | f.write(text.encode("utf-8"))
107 |
--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [codespell]
2 | skip=*.py,*.c,*.h
3 |
4 | [coverage:run]
5 | omit =
6 | msgspec/_version.py
7 | tests/basic_typing_examples.py
8 | tests/test_mypy.py
9 | tests/test_pyright.py
10 |
11 | [tool:pytest]
12 | markers =
13 | mypy
14 | pyright
15 | filterwarnings =
16 | error
17 |
18 | [versioneer]
19 | VCS = git
20 | style = pep440
21 | versionfile_source = msgspec/_version.py
22 | versionfile_build = msgspec/_version.py
23 | tag_prefix =
24 | parentdir_prefix = msgspec-
25 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import os
3 |
4 | from setuptools import setup
5 | from setuptools.extension import Extension
6 |
7 | import versioneer
8 |
9 | # Check for 32-bit windows builds, which currently aren't supported. We can't
10 | # rely on `platform.architecture` here since users can still run 32-bit python
11 | # builds on 64 bit architectures.
12 | if sys.platform == "win32" and sys.maxsize == (2**31 - 1):
13 | import textwrap
14 |
15 | error = """
16 | ====================================================================
17 | `msgspec` currently doesn't support 32-bit Python windows builds. If
18 | this is important for your use case, please open an issue on GitHub:
19 |
20 | https://github.com/jcrist/msgspec/issues
21 | ====================================================================
22 | """
23 | print(textwrap.dedent(error))
24 | exit(1)
25 |
26 |
27 | SANITIZE = os.environ.get("MSGSPEC_SANITIZE", False)
28 | COVERAGE = os.environ.get("MSGSPEC_COVERAGE", False)
29 | DEBUG = os.environ.get("MSGSPEC_DEBUG", SANITIZE or COVERAGE)
30 |
31 | extra_compile_args = []
32 | extra_link_args = []
33 | if SANITIZE:
34 | extra_compile_args.extend(["-fsanitize=address", "-fsanitize=undefined"])
35 | extra_link_args.extend(["-lasan", "-lubsan"])
36 | if COVERAGE:
37 | extra_compile_args.append("--coverage")
38 | extra_link_args.append("-lgcov")
39 | if DEBUG:
40 | extra_compile_args.extend(["-O0", "-g", "-UNDEBUG"])
41 |
42 | ext_modules = [
43 | Extension(
44 | "msgspec._core",
45 | [os.path.join("msgspec", "_core.c")],
46 | extra_compile_args=extra_compile_args,
47 | extra_link_args=extra_link_args,
48 | )
49 | ]
50 |
51 | yaml_deps = ["pyyaml"]
52 | toml_deps = ['tomli ; python_version < "3.11"', "tomli_w"]
53 | doc_deps = ["sphinx", "furo", "sphinx-copybutton", "sphinx-design", "ipython"]
54 | test_deps = [
55 | "pytest",
56 | "msgpack",
57 | "attrs",
58 | 'eval-type-backport ; python_version < "3.10"',
59 | *yaml_deps,
60 | *toml_deps,
61 | ]
62 | dev_deps = ["pre-commit", "coverage", "mypy", "pyright", *doc_deps, *test_deps]
63 |
64 | extras_require = {
65 | "yaml": yaml_deps,
66 | "toml": toml_deps,
67 | "doc": doc_deps,
68 | "test": test_deps,
69 | "dev": dev_deps,
70 | }
71 |
72 | setup(
73 | name="msgspec",
74 | version=versioneer.get_version(),
75 | cmdclass=versioneer.get_cmdclass(),
76 | maintainer="Jim Crist-Harif",
77 | maintainer_email="jcristharif@gmail.com",
78 | url="https://jcristharif.com/msgspec/",
79 | project_urls={
80 | "Documentation": "https://jcristharif.com/msgspec/",
81 | "Source": "https://github.com/jcrist/msgspec/",
82 | "Issue Tracker": "https://github.com/jcrist/msgspec/issues",
83 | },
84 | description=(
85 | "A fast serialization and validation library, with builtin support for "
86 | "JSON, MessagePack, YAML, and TOML."
87 | ),
88 | keywords="JSON msgpack MessagePack TOML YAML serialization validation schema",
89 | classifiers=[
90 | "License :: OSI Approved :: BSD License",
91 | "Development Status :: 4 - Beta",
92 | "Programming Language :: Python :: 3.9",
93 | "Programming Language :: Python :: 3.10",
94 | "Programming Language :: Python :: 3.11",
95 | "Programming Language :: Python :: 3.12",
96 | "Programming Language :: Python :: 3.13",
97 | ],
98 | extras_require=extras_require,
99 | license="BSD",
100 | packages=["msgspec"],
101 | package_data={"msgspec": ["py.typed", "*.pyi"]},
102 | ext_modules=ext_modules,
103 | long_description=(
104 | open("README.md", encoding="utf-8").read()
105 | if os.path.exists("README.md")
106 | else ""
107 | ),
108 | long_description_content_type="text/markdown",
109 | python_requires=">=3.9",
110 | zip_safe=False,
111 | )
112 |
--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
1 | import math
2 | import random
3 | import string
4 | import struct
5 |
6 | import pytest
7 |
8 |
9 | class Rand:
10 | """Random source, pulled out into fixture with repr so the seed is
11 | displayed on failing tests"""
12 |
13 | def __init__(self, seed=0):
14 | self.seed = seed or random.randint(0, 2**32 - 1)
15 | self.rand = random.Random(self.seed)
16 |
17 | def __repr__(self):
18 | return f"Rand({self.seed})"
19 |
20 | def str(self, n, m=0):
21 | """
22 | str(n) -> random string of length `n`.
23 | str(n, m) -> random string between lengths `n` & `m`
24 | """
25 | if m:
26 | n = self.rand.randint(n, m)
27 | return "".join(self.rand.choices(string.ascii_letters, k=n))
28 |
29 | def bytes(self, n):
30 | """random bytes of length `n`"""
31 | return self.rand.getrandbits(8 * n).to_bytes(n, "little")
32 |
33 | def float(self):
34 | """random finite float"""
35 | while True:
36 | dbytes = self.rand.getrandbits(64).to_bytes(8, "big")
37 | x = struct.unpack("!d", dbytes)[0]
38 | if math.isfinite(x):
39 | return x
40 |
41 | def shuffle(self, obj):
42 | """random shuffle"""
43 | self.rand.shuffle(obj)
44 |
45 |
46 | @pytest.fixture
47 | def rand():
48 | yield Rand()
49 |
--------------------------------------------------------------------------------
/tests/test_cpylint.py:
--------------------------------------------------------------------------------
1 | """This file contains some simple linters for catching some common but easy to
2 | catch cpython capi bugs. These are naive string-munging checks, if you write
3 | some code that _is_ correct but is failing, add `/* cpylint-ignore */` on the
4 | failing source line and it will be ignored."""
5 |
6 | import os
7 |
8 | import pytest
9 |
10 | MSGSPEC_CORE_PATH = os.path.join(
11 | os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "msgspec", "_core.c"
12 | )
13 |
14 |
15 | @pytest.fixture
16 | def source():
17 | with open(MSGSPEC_CORE_PATH, "r") as f:
18 | return f.read().splitlines()
19 |
20 |
21 | def test_recursive_call_blocks(source):
22 | """Ensure all code that calls `Py_EnterRecursiveCall` doesn't return
23 | without calling `Py_LeaveRecursiveCall`"""
24 |
25 | in_block = False
26 | for lineno, line in enumerate(source, 1):
27 | if "cpylint-ignore" in line:
28 | continue
29 |
30 | if "Py_EnterRecursiveCall" in line:
31 | in_block = True
32 | elif "return " in line and in_block:
33 | raise ValueError(
34 | f"return without calling Py_LeaveRecursiveCall on line {lineno}"
35 | )
36 | elif "Py_LeaveRecursiveCall" in line:
37 | in_block = False
38 |
39 |
40 | def test_recursive_repr_blocks(source):
41 | """Ensure all code that calls `Py_ReprEnter` doesn't return without
42 | calling `Py_ReprLeave`"""
43 | in_block = False
44 | for lineno, line in enumerate(source, 1):
45 | if "cpylint-ignore" in line:
46 | continue
47 |
48 | if "Py_ReprEnter" in line:
49 | in_block = True
50 | elif "return " in line and in_block:
51 | raise ValueError(f"return without calling Py_ReprLeave on line {lineno}")
52 | elif "Py_ReprLeave" in line:
53 | in_block = False
54 |
--------------------------------------------------------------------------------
/tests/test_integration.py:
--------------------------------------------------------------------------------
1 | import datetime
2 | import sys
3 |
4 | import pytest
5 |
6 | import msgspec
7 |
8 |
9 | @pytest.fixture(params=["json", "msgpack"])
10 | def proto(request):
11 | if request.param == "json":
12 | return msgspec.json
13 | elif request.param == "msgpack":
14 | return msgspec.msgpack
15 |
16 |
17 | def test_decode_naive_datetime(proto):
18 | """See https://github.com/jcrist/msgspec/issues/408"""
19 | dt = datetime.datetime(2001, 2, 3, 4, 5, 6, 7)
20 | msg = proto.encode(dt)
21 |
22 | start = sys.getrefcount(None)
23 | for _ in range(1000):
24 | proto.decode(msg, type=datetime.datetime)
25 | end = sys.getrefcount(None)
26 | assert start == end
27 |
28 |
29 | def test_decode_naive_time(proto):
30 | """See https://github.com/jcrist/msgspec/issues/408"""
31 | dt = datetime.time(12, 20)
32 | msg = proto.encode(dt)
33 |
34 | start = sys.getrefcount(None)
35 | for _ in range(1000):
36 | proto.decode(msg, type=datetime.time)
37 | end = sys.getrefcount(None)
38 | assert start == end
39 |
--------------------------------------------------------------------------------
/tests/test_mypy.py:
--------------------------------------------------------------------------------
1 | import os
2 | import re
3 |
4 | import pytest
5 |
6 | pytestmark = pytest.mark.mypy
7 |
8 | api = pytest.importorskip("mypy.api")
9 |
10 | PATH = os.path.join(os.path.dirname(__file__), "basic_typing_examples.py")
11 |
12 |
13 | def get_lineno_type(line):
14 | assert "revealed type" in line.lower()
15 | _, lineno, msg = line.split(":", 2)
16 | lineno = int(lineno)
17 | pat = re.search("[\"'](.*)[\"']", msg)
18 | typ = pat.groups()[0]
19 | return lineno, typ
20 |
21 |
22 | def test_mypy():
23 | with open(PATH, "r") as fil:
24 | ex_lines = fil.readlines()
25 |
26 | stdout, stderr, code = api.run([PATH])
27 | lines = stdout.splitlines()
28 | for line in lines:
29 | if "revealed type" in line.lower():
30 | lineno, typ = get_lineno_type(line)
31 | check = ex_lines[lineno - 1].split("#")[1].strip()
32 | try:
33 | exec(check, {"typ": typ})
34 | except Exception:
35 | assert (
36 | False
37 | ), f"Failed check at {PATH}:{lineno}: {check!r}, where 'typ' is {typ!r}"
38 | elif "success" not in line.lower():
39 | assert False, line
40 |
--------------------------------------------------------------------------------
/tests/test_performance.py:
--------------------------------------------------------------------------------
1 | import textwrap
2 |
3 | import msgspec
4 |
5 | from utils import temp_module
6 |
7 |
8 | def test_process_large_recursive_union():
9 | """
10 | A recursive schema processing perf test from
11 | https://github.com/pydantic/pydantic/issues/8499
12 |
13 | This test is mostly to ensure that processing deeply recursive schemas with
14 | unions succeeds.
15 | """
16 |
17 | def gen_code():
18 | yield "from __future__ import annotations"
19 | yield "from msgspec import Struct"
20 | yield "from typing import Union"
21 |
22 | for i in range(50):
23 | yield textwrap.dedent(
24 | f"""
25 | class Node{i}(Struct, tag='node{i}'):
26 | data: Union[Node, None]
27 | """
28 | )
29 | yield "Node = Union["
30 | for i in range(50):
31 | yield f" Node{i},"
32 | yield "]"
33 |
34 | code = "\n".join(gen_code())
35 |
36 | with temp_module(code) as mod:
37 | dec = msgspec.json.Decoder(mod.Node)
38 |
39 | msg = b"""
40 | {
41 | "type": "node25",
42 | "data": {
43 | "type": "node13",
44 | "data": null
45 | }
46 | }
47 | """
48 |
49 | sol = mod.Node25(mod.Node13(None))
50 |
51 | assert dec.decode(msg) == sol
52 |
--------------------------------------------------------------------------------
/tests/test_pyright.py:
--------------------------------------------------------------------------------
1 | import os
2 | import re
3 | import subprocess
4 |
5 | import pytest
6 |
7 | pytestmark = pytest.mark.pyright
8 |
9 | pyright = pytest.importorskip("pyright")
10 |
11 | PATH = os.path.join(os.path.dirname(__file__), "basic_typing_examples.py")
12 |
13 |
14 | def test_pyright():
15 | with open(PATH, "r") as fil:
16 | ex_lines = fil.readlines()
17 |
18 | result = pyright.run(PATH, stdout=subprocess.PIPE)
19 | if result.returncode != 0:
20 | assert False, f"Unexpected pyright error:\n{result.stdout}"
21 | for line in result.stdout.decode().splitlines():
22 | try:
23 | _, lineno, _, msg = line.split(":", 3)
24 | except ValueError:
25 | continue
26 | lineno = int(lineno)
27 | pat = re.search("[\"'](.*)[\"']", msg)
28 | typ = pat.groups()[0]
29 | check = ex_lines[lineno - 1].split("#")[1].strip()
30 | try:
31 | exec(check, {"typ": typ})
32 | except Exception:
33 | assert (
34 | False
35 | ), f"Failed check at {PATH}:{lineno}: {check!r}, where 'typ' is {typ!r}"
36 |
--------------------------------------------------------------------------------
/tests/test_raw.py:
--------------------------------------------------------------------------------
1 | import operator
2 | import subprocess
3 | import sys
4 | import textwrap
5 | import weakref
6 |
7 | import pytest
8 |
9 | import msgspec
10 |
11 |
12 | def test_raw_noargs():
13 | r = msgspec.Raw()
14 | assert bytes(r) == b""
15 | assert len(r) == 0
16 | assert not r
17 |
18 |
19 | @pytest.mark.parametrize("type", [bytes, bytearray, memoryview, str])
20 | def test_raw_constructor(type):
21 | msg = "test" if type is str else type(b"test")
22 | r = msgspec.Raw(msg)
23 | assert bytes(r) == b"test"
24 | assert len(r) == 4
25 | assert r
26 |
27 |
28 | def test_raw_constructor_errors():
29 | with pytest.raises(TypeError):
30 | msgspec.Raw(1)
31 |
32 | with pytest.raises(TypeError):
33 | msgspec.Raw(msg=b"test")
34 |
35 | with pytest.raises(TypeError):
36 | msgspec.Raw(b"test", b"extra")
37 |
38 |
39 | def test_raw_from_view():
40 | r = msgspec.Raw(memoryview(b"123456")[:3])
41 | assert bytes(r) == b"123"
42 | assert len(r) == 3
43 | assert r
44 |
45 |
46 | def test_raw_copy():
47 | r = msgspec.Raw(b"test")
48 | c1 = sys.getrefcount(r)
49 | r2 = r.copy()
50 | c2 = sys.getrefcount(r)
51 | assert c1 + 1 == c2
52 | assert r2 is r
53 |
54 | r = msgspec.Raw()
55 | assert r.copy() is r
56 |
57 | m = memoryview(b"test")
58 | ref = weakref.ref(m)
59 | r = msgspec.Raw(m)
60 | del m
61 | # Raw holds a ref
62 | assert ref() is not None
63 | r2 = r.copy()
64 | # Actually copied
65 | assert r2 is not r
66 | assert bytes(r2) == b"test"
67 | # Copy doesn't accidentally release buffer
68 | assert ref() is not None
69 | del r
70 | # Copy doesn't hold a reference to original view
71 | assert ref() is None
72 |
73 |
74 | def test_raw_copy_doesnt_leak():
75 | """See https://github.com/jcrist/msgspec/pull/709"""
76 | script = textwrap.dedent(
77 | """
78 | import msgspec
79 | import tracemalloc
80 |
81 | tracemalloc.start()
82 |
83 | raw = msgspec.Raw(bytearray(1000))
84 | for _ in range(10000):
85 | raw.copy()
86 |
87 | _, peak = tracemalloc.get_traced_memory()
88 | print(peak)
89 | """
90 | )
91 |
92 | output = subprocess.check_output([sys.executable, "-c", script])
93 | peak = int(output.decode().strip())
94 | assert peak < 10_000 # should really be ~2000
95 |
96 |
97 | def test_raw_pickle_bytes():
98 | orig_buffer = b"test"
99 | r = msgspec.Raw(orig_buffer)
100 | o = r.__reduce__()
101 | assert o == (msgspec.Raw, (b"test",))
102 | assert o[1][0] is orig_buffer
103 |
104 |
105 | def test_raw_pickle_str():
106 | orig_buffer = "test"
107 | r = msgspec.Raw(orig_buffer)
108 | o = r.__reduce__()
109 | assert o == (msgspec.Raw, ("test",))
110 | assert o[1][0] is orig_buffer
111 |
112 |
113 | def test_raw_pickle_view():
114 | r = msgspec.Raw(memoryview(b"test")[:3])
115 | o = r.__reduce__()
116 | assert o == (msgspec.Raw, (b"tes",))
117 |
118 |
119 | def test_raw_comparison():
120 | r = msgspec.Raw()
121 | assert r == r
122 | assert not r != r
123 | assert msgspec.Raw() == msgspec.Raw()
124 | assert msgspec.Raw(b"") == msgspec.Raw()
125 | assert not msgspec.Raw(b"") == msgspec.Raw(b"other")
126 | assert msgspec.Raw(b"test") == msgspec.Raw(memoryview(b"testy")[:4])
127 | assert msgspec.Raw(b"test") != msgspec.Raw(b"tesp")
128 | assert msgspec.Raw(b"test") != msgspec.Raw(b"")
129 | assert msgspec.Raw(b"") != msgspec.Raw(b"test")
130 | assert msgspec.Raw() != 1
131 | assert 1 != msgspec.Raw()
132 |
133 | for op in [operator.lt, operator.gt, operator.le, operator.ge]:
134 | with pytest.raises(TypeError):
135 | op(msgspec.Raw(), msgspec.Raw())
136 |
--------------------------------------------------------------------------------
/tests/test_toml.py:
--------------------------------------------------------------------------------
1 | import dataclasses
2 | import datetime
3 | import enum
4 | import sys
5 | import uuid
6 | from decimal import Decimal
7 | from typing import Dict, FrozenSet, List, Set, Tuple
8 |
9 | import pytest
10 |
11 | import msgspec
12 |
13 | try:
14 | import tomllib
15 | except ImportError:
16 | try:
17 | import tomli as tomllib
18 | except ImportError:
19 | tomllib = None
20 |
21 | try:
22 | import tomli_w
23 | except ImportError:
24 | tomli_w = None
25 |
26 |
27 | needs_decode = pytest.mark.skipif(
28 | tomllib is None, reason="Neither tomllib or tomli are installed"
29 | )
30 | needs_encode = pytest.mark.skipif(tomli_w is None, reason="tomli_w is not installed")
31 |
32 | PY311 = sys.version_info[:2] >= (3, 11)
33 |
34 | UTC = datetime.timezone.utc
35 |
36 |
37 | class ExStruct(msgspec.Struct):
38 | x: int
39 | y: str
40 |
41 |
42 | @dataclasses.dataclass
43 | class ExDataclass:
44 | x: int
45 | y: str
46 |
47 |
48 | class ExEnum(enum.Enum):
49 | one = "one"
50 | two = "two"
51 |
52 |
53 | class ExIntEnum(enum.IntEnum):
54 | one = 1
55 | two = 2
56 |
57 |
58 | def test_module_dir():
59 | assert set(dir(msgspec.toml)) == {"encode", "decode"}
60 |
61 |
62 | @pytest.mark.skipif(PY311, reason="tomllib is builtin in 3.11+")
63 | def test_tomli_not_installed_error(monkeypatch):
64 | monkeypatch.setitem(sys.modules, "tomli", None)
65 |
66 | with pytest.raises(ImportError, match="conda install"):
67 | msgspec.toml.decode("a = 1", type=int)
68 |
69 |
70 | def test_tomli_w_not_installed_error(monkeypatch):
71 | monkeypatch.setitem(sys.modules, "tomli_w", None)
72 |
73 | with pytest.raises(ImportError, match="conda install"):
74 | msgspec.toml.encode({"a": 1})
75 |
76 |
77 | @pytest.mark.parametrize(
78 | "val",
79 | [
80 | True,
81 | False,
82 | 1,
83 | 1.5,
84 | "fizz",
85 | datetime.datetime(2022, 1, 2, 3, 4, 5, 6),
86 | datetime.datetime(2022, 1, 2, 3, 4, 5, 6, UTC),
87 | datetime.date(2022, 1, 2),
88 | datetime.time(12, 34),
89 | [1, 2],
90 | {"one": 2},
91 | ],
92 | )
93 | @needs_encode
94 | @needs_decode
95 | def test_roundtrip_any(val):
96 | msg = msgspec.toml.encode({"x": val})
97 | res = msgspec.toml.decode(msg)["x"]
98 | assert res == val
99 |
100 |
101 | @pytest.mark.parametrize(
102 | "val, type",
103 | [
104 | (True, bool),
105 | (False, bool),
106 | (1, int),
107 | (1.5, float),
108 | ("fizz", str),
109 | (b"fizz", bytes),
110 | (b"fizz", bytearray),
111 | (datetime.datetime(2022, 1, 2, 3, 4, 5, 6), datetime.datetime),
112 | (datetime.datetime(2022, 1, 2, 3, 4, 5, 6, UTC), datetime.datetime),
113 | (datetime.date(2022, 1, 2), datetime.date),
114 | (datetime.time(12, 34), datetime.time),
115 | (uuid.uuid4(), uuid.UUID),
116 | (ExEnum.one, ExEnum),
117 | (ExIntEnum.one, ExIntEnum),
118 | ([1, 2], List[int]),
119 | ((1, 2), Tuple[int, ...]),
120 | ({1, 2}, Set[int]),
121 | (frozenset({1, 2}), FrozenSet[int]),
122 | (("one", 2), Tuple[str, int]),
123 | ({"one": 2}, Dict[str, int]),
124 | ({1: "two"}, Dict[int, str]),
125 | (ExStruct(1, "two"), ExStruct),
126 | (ExDataclass(1, "two"), ExDataclass),
127 | ],
128 | )
129 | @needs_encode
130 | @needs_decode
131 | def test_roundtrip_typed(val, type):
132 | msg = msgspec.toml.encode({"x": val})
133 | res = msgspec.toml.decode(msg, type=Dict[str, type])["x"]
134 | assert res == val
135 |
136 |
137 | @needs_encode
138 | def test_encode_output_type():
139 | msg = msgspec.toml.encode({"x": 1})
140 | assert isinstance(msg, bytes)
141 |
142 |
143 | @needs_encode
144 | def test_encode_error():
145 | class Oops:
146 | pass
147 |
148 | with pytest.raises(TypeError, match="Encoding objects of type Oops is unsupported"):
149 | msgspec.toml.encode({"x": Oops()})
150 |
151 |
152 | @needs_encode
153 | @needs_decode
154 | def test_encode_enc_hook():
155 | msg = msgspec.toml.encode({"x": Decimal(1.5)}, enc_hook=str)
156 | assert msgspec.toml.decode(msg) == {"x": "1.5"}
157 |
158 |
159 | @needs_encode
160 | @pytest.mark.parametrize("order", [None, "deterministic"])
161 | def test_encode_order(order):
162 | msg = {"y": 1, "x": ({"n": 1, "m": 2},), "z": [{"b": 1, "a": 2}]}
163 | res = msgspec.toml.encode(msg, order=order)
164 | if order:
165 | sol_msg = {"x": ({"m": 2, "n": 1},), "y": 1, "z": [{"a": 2, "b": 1}]}
166 | else:
167 | sol_msg = msg
168 | sol = tomli_w.dumps(sol_msg).encode("utf-8")
169 | assert res == sol
170 |
171 |
172 | @needs_decode
173 | def test_decode_str_or_bytes_like():
174 | assert msgspec.toml.decode("a = 1") == {"a": 1}
175 | assert msgspec.toml.decode(b"a = 1") == {"a": 1}
176 | assert msgspec.toml.decode(bytearray(b"a = 1")) == {"a": 1}
177 | assert msgspec.toml.decode(memoryview(b"a = 1")) == {"a": 1}
178 | with pytest.raises(TypeError):
179 | msgspec.toml.decode(1)
180 |
181 |
182 | @needs_decode
183 | @pytest.mark.parametrize("msg", [b"{{", b"!!binary 123"])
184 | def test_decode_parse_error(msg):
185 | with pytest.raises(msgspec.DecodeError):
186 | msgspec.toml.decode(msg)
187 |
188 |
189 | @needs_decode
190 | def test_decode_validation_error():
191 | with pytest.raises(msgspec.ValidationError, match="Expected `str`"):
192 | msgspec.toml.decode(b"a = [1, 2, 3]", type=Dict[str, List[str]])
193 |
194 |
195 | @needs_decode
196 | @pytest.mark.parametrize("strict", [True, False])
197 | def test_decode_strict_or_lax(strict):
198 | msg = b"a = ['1', '2']"
199 | typ = Dict[str, List[int]]
200 |
201 | if strict:
202 | with pytest.raises(msgspec.ValidationError, match="Expected `int`"):
203 | msgspec.toml.decode(msg, type=typ, strict=strict)
204 | else:
205 | res = msgspec.toml.decode(msg, type=typ, strict=strict)
206 | assert res == {"a": [1, 2]}
207 |
208 |
209 | @needs_decode
210 | def test_decode_dec_hook():
211 | def dec_hook(typ, val):
212 | if typ is Decimal:
213 | return Decimal(val)
214 | raise TypeError
215 |
216 | res = msgspec.toml.decode("a = '1.5'", type=Dict[str, Decimal], dec_hook=dec_hook)
217 | assert res == {"a": Decimal("1.5")}
218 |
--------------------------------------------------------------------------------
/tests/test_utils.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | import sys
4 | from typing import Generic, List, Optional, Set, TypeVar
5 |
6 | import pytest
7 | from utils import temp_module, package_not_installed
8 |
9 | from msgspec._utils import get_class_annotations
10 |
11 | PY310 = sys.version_info[:2] >= (3, 10)
12 |
13 | T = TypeVar("T")
14 | S = TypeVar("S")
15 | U = TypeVar("U")
16 |
17 |
18 | class Base(Generic[T]):
19 | x: T
20 |
21 |
22 | class Base2(Generic[T, S]):
23 | a: T
24 | b: S
25 |
26 |
27 | class TestGetClassAnnotations:
28 | @pytest.mark.parametrize("future_annotations", [False, True])
29 | def test_eval_scopes(self, future_annotations):
30 | header = "from __future__ import annotations" if future_annotations else ""
31 | source = f"""
32 | {header}
33 | STR = str
34 |
35 | class Ex:
36 | LOCAL = float
37 | x: int
38 | y: LOCAL
39 | z: STR
40 | """
41 | with temp_module(source) as mod:
42 | assert get_class_annotations(mod.Ex) == {"x": int, "y": float, "z": str}
43 |
44 | def test_none_to_nonetype(self):
45 | class Ex:
46 | x: None
47 |
48 | assert get_class_annotations(Ex) == {"x": type(None)}
49 |
50 | def test_subclass(self):
51 | class Base:
52 | x: int
53 | y: str
54 |
55 | class Sub(Base):
56 | x: float
57 | z: list
58 |
59 | class Base2:
60 | a: int
61 |
62 | class Sub2(Sub, Base2):
63 | b: float
64 | y: list
65 |
66 | assert get_class_annotations(Base) == {"x": int, "y": str}
67 | assert get_class_annotations(Sub) == {"x": float, "y": str, "z": list}
68 | assert get_class_annotations(Sub2) == {
69 | "x": float,
70 | "y": list,
71 | "z": list,
72 | "a": int,
73 | "b": float,
74 | }
75 |
76 | def test_simple_generic(self):
77 | class Test(Generic[T]):
78 | x: T
79 | y: List[T]
80 | z: int
81 |
82 | assert get_class_annotations(Test) == {"x": T, "y": List[T], "z": int}
83 | assert get_class_annotations(Test[int]) == {"x": int, "y": List[int], "z": int}
84 | assert get_class_annotations(Test[Set[T]]) == {
85 | "x": Set[T],
86 | "y": List[Set[T]],
87 | "z": int,
88 | }
89 |
90 | def test_generic_sub1(self):
91 | class Sub(Base):
92 | y: int
93 |
94 | assert get_class_annotations(Sub) == {"x": T, "y": int}
95 |
96 | def test_generic_sub2(self):
97 | class Sub(Base, Generic[T]):
98 | y: List[T]
99 |
100 | assert get_class_annotations(Sub) == {"x": T, "y": List[T]}
101 | assert get_class_annotations(Sub[int]) == {"x": T, "y": List[int]}
102 |
103 | def test_generic_sub3(self):
104 | class Sub(Base[int], Generic[T]):
105 | y: List[T]
106 |
107 | assert get_class_annotations(Sub) == {"x": int, "y": List[T]}
108 | assert get_class_annotations(Sub[float]) == {"x": int, "y": List[float]}
109 |
110 | def test_generic_sub4(self):
111 | class Sub(Base[T]):
112 | y: List[T]
113 |
114 | assert get_class_annotations(Sub) == {"x": T, "y": List[T]}
115 | assert get_class_annotations(Sub[int]) == {"x": int, "y": List[int]}
116 |
117 | def test_generic_sub5(self):
118 | class Sub(Base[T], Generic[T]):
119 | y: List[T]
120 |
121 | assert get_class_annotations(Sub) == {"x": T, "y": List[T]}
122 | assert get_class_annotations(Sub[int]) == {"x": int, "y": List[int]}
123 |
124 | def test_generic_sub6(self):
125 | class Sub(Base[S]):
126 | y: List[S]
127 |
128 | assert get_class_annotations(Sub) == {"x": S, "y": List[S]}
129 | assert get_class_annotations(Sub[int]) == {"x": int, "y": List[int]}
130 |
131 | def test_generic_sub7(self):
132 | class Sub(Base[List[T]]):
133 | y: Set[T]
134 |
135 | assert get_class_annotations(Sub) == {"x": List[T], "y": Set[T]}
136 | assert get_class_annotations(Sub[int]) == {"x": List[int], "y": Set[int]}
137 |
138 | def test_generic_sub8(self):
139 | class Sub(Base[int], Base2[float, str]):
140 | pass
141 |
142 | assert get_class_annotations(Sub) == {"x": int, "a": float, "b": str}
143 |
144 | def test_generic_sub9(self):
145 | class Sub(Base[U], Base2[List[U], U]):
146 | y: str
147 |
148 | assert get_class_annotations(Sub) == {"y": str, "x": U, "a": List[U], "b": U}
149 | assert get_class_annotations(Sub[int]) == {
150 | "y": str,
151 | "x": int,
152 | "a": List[int],
153 | "b": int,
154 | }
155 |
156 | class Sub2(Sub[int]):
157 | x: list
158 |
159 | assert get_class_annotations(Sub2) == {
160 | "x": list,
161 | "y": str,
162 | "a": List[int],
163 | "b": int,
164 | }
165 |
166 | def test_generic_sub10(self):
167 | class Sub(Base[U], Base2[List[U], U]):
168 | y: str
169 |
170 | class Sub3(Sub[List[T]]):
171 | c: T
172 |
173 | assert get_class_annotations(Sub3) == {
174 | "c": T,
175 | "y": str,
176 | "x": List[T],
177 | "a": List[List[T]],
178 | "b": List[T],
179 | }
180 | assert get_class_annotations(Sub3[int]) == {
181 | "c": int,
182 | "y": str,
183 | "x": List[int],
184 | "a": List[List[int]],
185 | "b": List[int],
186 | }
187 |
188 | def test_generic_sub11(self):
189 | class Sub(Base[int]):
190 | y: float
191 |
192 | class Sub2(Sub, Base[int]):
193 | z: str
194 |
195 | assert get_class_annotations(Sub2) == {"x": int, "y": float, "z": str}
196 |
197 | def test_generic_invalid_parameters(self):
198 | class Invalid:
199 | @property
200 | def __parameters__(self):
201 | pass
202 |
203 | class Sub(Base[Invalid]):
204 | pass
205 |
206 | assert get_class_annotations(Sub) == {"x": Invalid}
207 |
208 | @pytest.mark.skipif(PY310, reason="<3.10 only")
209 | def test_union_backport_not_installed(self):
210 | class Ex:
211 | x: int | None = None
212 |
213 | with package_not_installed("eval_type_backport"):
214 | with pytest.raises(
215 | TypeError, match=r"or install the `eval_type_backport` package."
216 | ):
217 | get_class_annotations(Ex)
218 |
219 | @pytest.mark.skipif(PY310, reason="<3.10 only")
220 | def test_union_backport_installed(self):
221 | class Ex:
222 | x: int | None = None
223 |
224 | pytest.importorskip("eval_type_backport")
225 |
226 | assert get_class_annotations(Ex) == {"x": Optional[int]}
227 |
--------------------------------------------------------------------------------
/tests/test_yaml.py:
--------------------------------------------------------------------------------
1 | import dataclasses
2 | import datetime
3 | import enum
4 | import sys
5 | import uuid
6 | from decimal import Decimal
7 | from typing import Dict, FrozenSet, List, Set, Tuple
8 |
9 | import pytest
10 |
11 | import msgspec
12 |
13 | try:
14 | import yaml # noqa
15 | except ImportError:
16 | pytestmark = pytest.mark.skip(reason="PyYAML is not installed")
17 |
18 |
19 | UTC = datetime.timezone.utc
20 |
21 |
22 | class ExStruct(msgspec.Struct):
23 | x: int
24 | y: str
25 |
26 |
27 | @dataclasses.dataclass
28 | class ExDataclass:
29 | x: int
30 | y: str
31 |
32 |
33 | class ExEnum(enum.Enum):
34 | one = "one"
35 | two = "two"
36 |
37 |
38 | class ExIntEnum(enum.IntEnum):
39 | one = 1
40 | two = 2
41 |
42 |
43 | def test_module_dir():
44 | assert set(dir(msgspec.yaml)) == {"encode", "decode"}
45 |
46 |
47 | def test_pyyaml_not_installed_error(monkeypatch):
48 | monkeypatch.setitem(sys.modules, "yaml", None)
49 |
50 | with pytest.raises(ImportError, match="PyYAML"):
51 | msgspec.yaml.encode(1)
52 |
53 | with pytest.raises(ImportError, match="PyYAML"):
54 | msgspec.yaml.decode("1", type=int)
55 |
56 |
57 | @pytest.mark.parametrize(
58 | "val",
59 | [
60 | None,
61 | True,
62 | False,
63 | 1,
64 | 1.5,
65 | "fizz",
66 | datetime.datetime(2022, 1, 2, 3, 4, 5, 6),
67 | datetime.datetime(2022, 1, 2, 3, 4, 5, 6, UTC),
68 | datetime.date(2022, 1, 2),
69 | [1, 2],
70 | {"one": 2},
71 | {1: "two"},
72 | ],
73 | )
74 | def test_roundtrip_any(val):
75 | msg = msgspec.yaml.encode(val)
76 | res = msgspec.yaml.decode(msg)
77 | assert res == val
78 |
79 |
80 | @pytest.mark.parametrize(
81 | "val, type",
82 | [
83 | (None, None),
84 | (True, bool),
85 | (False, bool),
86 | (1, int),
87 | (1.5, float),
88 | ("fizz", str),
89 | (b"fizz", bytes),
90 | (b"fizz", bytearray),
91 | (datetime.datetime(2022, 1, 2, 3, 4, 5, 6), datetime.datetime),
92 | (datetime.datetime(2022, 1, 2, 3, 4, 5, 6, UTC), datetime.datetime),
93 | (datetime.date(2022, 1, 2), datetime.date),
94 | (datetime.time(12, 34), datetime.time),
95 | (uuid.uuid4(), uuid.UUID),
96 | (ExEnum.one, ExEnum),
97 | (ExIntEnum.one, ExIntEnum),
98 | ([1, 2], List[int]),
99 | ((1, 2), Tuple[int, ...]),
100 | ({1, 2}, Set[int]),
101 | (frozenset({1, 2}), FrozenSet[int]),
102 | (("one", 2), Tuple[str, int]),
103 | ({"one": 2}, Dict[str, int]),
104 | ({1: "two"}, Dict[int, str]),
105 | (ExStruct(1, "two"), ExStruct),
106 | (ExDataclass(1, "two"), ExDataclass),
107 | ],
108 | )
109 | def test_roundtrip_typed(val, type):
110 | msg = msgspec.yaml.encode(val)
111 | res = msgspec.yaml.decode(msg, type=type)
112 | assert res == val
113 |
114 |
115 | def test_encode_error():
116 | class Oops:
117 | pass
118 |
119 | with pytest.raises(TypeError, match="Encoding objects of type Oops is unsupported"):
120 | msgspec.yaml.encode(Oops())
121 |
122 |
123 | def test_encode_enc_hook():
124 | msg = msgspec.yaml.encode(Decimal(1.5), enc_hook=str)
125 | assert msgspec.yaml.decode(msg) == "1.5"
126 |
127 |
128 | @pytest.mark.parametrize("order", [None, "deterministic"])
129 | def test_encode_order(order):
130 | msg = {"y": 1, "x": 2, "z": 3}
131 | res = msgspec.yaml.encode(msg, order=order)
132 | sol = yaml.safe_dump(msg, sort_keys=bool(order)).encode("utf-8")
133 | assert res == sol
134 |
135 |
136 | def test_decode_str_or_bytes_like():
137 | assert msgspec.yaml.decode("[1, 2]") == [1, 2]
138 | assert msgspec.yaml.decode(b"[1, 2]") == [1, 2]
139 | assert msgspec.yaml.decode(bytearray(b"[1, 2]")) == [1, 2]
140 | assert msgspec.yaml.decode(memoryview(b"[1, 2]")) == [1, 2]
141 | with pytest.raises(TypeError):
142 | msgspec.yaml.decode(1)
143 |
144 |
145 | @pytest.mark.parametrize("msg", [b"{{", b"!!binary 123"])
146 | def test_decode_parse_error(msg):
147 | with pytest.raises(msgspec.DecodeError):
148 | msgspec.yaml.decode(msg)
149 |
150 |
151 | def test_decode_validation_error():
152 | with pytest.raises(msgspec.ValidationError, match="Expected `str`"):
153 | msgspec.yaml.decode(b"[1, 2, 3]", type=List[str])
154 |
155 |
156 | @pytest.mark.parametrize("strict", [True, False])
157 | def test_decode_strict_or_lax(strict):
158 | msg = b"a: ['1', '2']"
159 | typ = Dict[str, List[int]]
160 |
161 | if strict:
162 | with pytest.raises(msgspec.ValidationError, match="Expected `int`"):
163 | msgspec.yaml.decode(msg, type=typ, strict=strict)
164 | else:
165 | res = msgspec.yaml.decode(msg, type=typ, strict=strict)
166 | assert res == {"a": [1, 2]}
167 |
168 |
169 | def test_decode_dec_hook():
170 | def dec_hook(typ, val):
171 | if typ is Decimal:
172 | return Decimal(val)
173 | raise TypeError
174 |
175 | res = msgspec.yaml.decode("'1.5'", type=Decimal, dec_hook=dec_hook)
176 | assert res == Decimal("1.5")
177 |
--------------------------------------------------------------------------------
/tests/utils.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import inspect
3 | import textwrap
4 | import types
5 | import uuid
6 | from contextlib import contextmanager
7 |
8 |
9 | @contextmanager
10 | def temp_module(code):
11 | """Mutually recursive struct types defined inside functions don't work (and
12 | probably never will). To avoid populating a bunch of test structs in the
13 | top level of this module, we instead create a temporary module per test to
14 | exec whatever is needed for that test"""
15 | code = textwrap.dedent(code)
16 | name = f"temp_{uuid.uuid4().hex}"
17 | mod = types.ModuleType(name)
18 | sys.modules[name] = mod
19 | try:
20 | exec(code, mod.__dict__)
21 | yield mod
22 | finally:
23 | sys.modules.pop(name, None)
24 |
25 |
26 | @contextmanager
27 | def max_call_depth(n):
28 | cur_depth = len(inspect.stack(0))
29 | orig = sys.getrecursionlimit()
30 | try:
31 | # Our measure of the current stack depth can be off by a bit. Trying to
32 | # set a recursionlimit < the current depth will raise a RecursionError.
33 | # We just try again with a slightly higher limit, bailing after an
34 | # unreasonable amount of adjustments.
35 | for i in range(64):
36 | try:
37 | sys.setrecursionlimit(cur_depth + i + n)
38 | break
39 | except RecursionError:
40 | pass
41 | else:
42 | raise ValueError(
43 | "Failed to set low recursion limit, something is wrong here"
44 | )
45 | yield
46 | finally:
47 | sys.setrecursionlimit(orig)
48 |
49 |
50 | @contextmanager
51 | def package_not_installed(name):
52 | try:
53 | orig = sys.modules.get(name)
54 | sys.modules[name] = None
55 | yield
56 | finally:
57 | if orig is not None:
58 | sys.modules[name] = orig
59 | else:
60 | del sys.modules[name]
61 |
--------------------------------------------------------------------------------