├── .rustfmt.toml ├── crates ├── jiter │ ├── LICENSE │ ├── benches │ │ ├── pass2.json │ │ ├── x100.json │ │ ├── README.md │ │ ├── sentence.json │ │ ├── unicode.json │ │ ├── true_array.json │ │ ├── generate_big.py │ │ ├── string_array.json │ │ ├── true_object.json │ │ ├── pass1.json │ │ ├── medium_response.json │ │ ├── short_numbers.json │ │ ├── python.rs │ │ ├── main.rs │ │ ├── bigints_array.json │ │ └── floats_array.json │ ├── build.rs │ ├── Cargo.toml │ ├── src │ │ ├── py_lossless_float.rs │ │ ├── lib.rs │ │ ├── parse.rs │ │ ├── py_string_cache.rs │ │ ├── simd_aarch64.rs │ │ ├── errors.rs │ │ ├── jiter.rs │ │ └── python.rs │ └── tests │ │ └── python.rs ├── jiter-python │ ├── LICENSE │ ├── requirements.txt │ ├── tests │ │ ├── requirements.txt │ │ ├── emscripten_runner.js │ │ └── test_jiter.py │ ├── package.json │ ├── Cargo.toml │ ├── pyproject.toml │ ├── src │ │ └── lib.rs │ ├── jiter.pyi │ ├── bench.py │ └── README.md └── fuzz │ ├── .gitignore │ ├── Cargo.toml │ └── fuzz_targets │ ├── compare_skip.rs │ └── compare_to_serde.rs ├── .github ├── FUNDING.yml └── actions │ └── build-pgo-wheel │ └── action.yml ├── .hyperlint └── styles │ └── config │ └── vocabularies │ └── hyperlint │ └── accept.txt ├── .gitignore ├── codecov.yml ├── Cargo.toml ├── .pre-commit-config.yaml ├── LICENSE ├── Makefile └── README.md /.rustfmt.toml: -------------------------------------------------------------------------------- 1 | max_width = 120 2 | -------------------------------------------------------------------------------- /crates/jiter/LICENSE: -------------------------------------------------------------------------------- 1 | ../../LICENSE -------------------------------------------------------------------------------- /crates/jiter-python/LICENSE: -------------------------------------------------------------------------------- 1 | ../../LICENSE -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | github: samuelcolvin 2 | -------------------------------------------------------------------------------- /crates/fuzz/.gitignore: -------------------------------------------------------------------------------- 1 | target 2 | corpus 3 | artifacts 4 | /scratch.* 5 | -------------------------------------------------------------------------------- /crates/jiter-python/requirements.txt: -------------------------------------------------------------------------------- 1 | maturin 2 | orjson 3 | ujson 4 | -------------------------------------------------------------------------------- /crates/jiter/benches/pass2.json: -------------------------------------------------------------------------------- 1 | [[[[[[[[[[[[[[[[[[["Not too deep"]]]]]]]]]]]]]]]]]]] -------------------------------------------------------------------------------- /crates/jiter-python/tests/requirements.txt: -------------------------------------------------------------------------------- 1 | pytest 2 | pytest-pretty 3 | dirty_equals 4 | -------------------------------------------------------------------------------- /.hyperlint/styles/config/vocabularies/hyperlint/accept.txt: -------------------------------------------------------------------------------- 1 | batson 2 | Postgres 3 | Jiter 4 | Serde 5 | -------------------------------------------------------------------------------- /crates/jiter/build.rs: -------------------------------------------------------------------------------- 1 | fn main() { 2 | #[cfg(feature = "python")] 3 | pyo3_build_config::use_pyo3_cfgs(); 4 | } 5 | -------------------------------------------------------------------------------- /crates/jiter/benches/x100.json: -------------------------------------------------------------------------------- 1 | "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | target 2 | .idea/ 3 | **/big.json 4 | /profiling/ 5 | /scratch/ 6 | /scratch-project/ 7 | /*.bm 8 | env*/ 9 | 10 | **/Cargo.lock 11 | profile.json 12 | __pycache__ 13 | -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- 1 | comment: 2 | layout: 'header, diff, flags, files, footer' 3 | coverage: 4 | precision: 2 5 | status: 6 | project: 7 | default: 8 | target: auto 9 | threshold: 5% 10 | patch: 11 | default: 12 | target: 80% 13 | -------------------------------------------------------------------------------- /crates/jiter/benches/README.md: -------------------------------------------------------------------------------- 1 | # Benchmarks 2 | 3 | Before running benchmarks, make sure to generate `big.json`: 4 | 5 | ```shell 6 | python3 ./crates/jiter/benches/generate_big.py 7 | ``` 8 | 9 | To run benchmarks, run: 10 | 11 | ```shell 12 | cargo bench -p jiter 13 | ``` 14 | -------------------------------------------------------------------------------- /crates/jiter/benches/sentence.json: -------------------------------------------------------------------------------- 1 | "Fast iterable JSON parser.\n\nDocumentation is available at [docs.rs/jiter](https://docs.rs/jiter).\n\njiter has three interfaces:\n* [`JsonValue`] an enum representing JSON data\n* [`Jiter`] an iterator over JSON data\n* [`python_parse`] which parses a JSON string into a Python object\n\n## JsonValue Example\n\nSee [the `JsonValue` docs][JsonValue] for more details.\n" 2 | -------------------------------------------------------------------------------- /crates/jiter/benches/unicode.json: -------------------------------------------------------------------------------- 1 | "Fast iterable JSON parser.\n\nDocumentation is available at £ [docs.rs/jiter](https://docs.rs/jiter).\n\njiter has three interfaces:\n* [`JsonValue`] an enum representing JSON data\n* [`Jiter`] an iterator over JSON data\n* [`python_parse`] which parses a JSON string into a Python object\n\n## JsonValue Example\n\nSee [the `JsonValue` docs][JsonValue] for more details.\n💩" 2 | -------------------------------------------------------------------------------- /crates/jiter/benches/true_array.json: -------------------------------------------------------------------------------- 1 | [true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true] -------------------------------------------------------------------------------- /crates/jiter/benches/generate_big.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import json 3 | from random import random 4 | from pathlib import Path 5 | 6 | THIS_DIR = Path(__file__).parent 7 | 8 | data = [] 9 | no_strings = True 10 | for i in range(1_000): 11 | if random() > 0.5: 12 | if no_strings: 13 | data.append([v*random() for v in range(int(random()*500))]) 14 | else: 15 | data.append({str(random()): v*random() for v in range(int(random()*500))}) 16 | else: 17 | data.append(list(range(int(random()*500)))) 18 | 19 | (THIS_DIR / 'big.json').write_text(json.dumps(data, separators=(',', ':'))) 20 | -------------------------------------------------------------------------------- /crates/jiter/benches/string_array.json: -------------------------------------------------------------------------------- 1 | ["xxx", "xxx", "xxx", "xxx", "xxx", "xxx", "xxx", "xxx", "xxx", "xxx", "xxx", "xxx", "xxx", "xxx", "xxx", "xxx", "xxx", "xxx", "xxx", "xxx", "xxx", "xxx", "xxx", "xxx", "xxx", "xxx", "xxx", "xxx", "xxx", "xxx", "xxx", "xxx", "xxx", "xxx", "xxx", "xxx", "xxx", "xxx", "xxx", "xxx", "xxx", "xxx", "xxx", "xxx", "xxx", "xxx", "xxx", "xxx", "xxx", "xxx", "xxx", "xxx", "xxx", "xxx", "xxx", "xxx", "xxx", "xxx", "xxx", "xxx", "xxx", "xxx", "xxx", "xxx", "xxx", "xxx", "xxx", "xxx", "xxx", "xxx", "xxx", "xxx", "xxx", "xxx", "xxx", "xxx", "xxx", "xxx", "xxx", "xxx", "xxx", "xxx", "xxx", "xxx", "xxx", "xxx", "xxx", "xxx", "xxx", "xxx", "xxx", "xxx", "xxx", "xxx", "xxx", "xxx", "xxx", "xxx", "xxx", "xxx"] -------------------------------------------------------------------------------- /crates/fuzz/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "fuzz" 3 | publish = false 4 | version = {workspace = true} 5 | edition = {workspace = true} 6 | authors = {workspace = true} 7 | 8 | [package.metadata] 9 | cargo-fuzz = true 10 | 11 | [dependencies] 12 | libfuzzer-sys = "0.4.7" 13 | serde_json = {version = "1.0.107", features = ["preserve_order", "float_roundtrip"]} 14 | serde = "1.0.190" 15 | indexmap = "2.0.0" 16 | num-bigint = "0.4.4" 17 | num-traits = "0.2.17" 18 | jiter = {path = "../jiter"} 19 | 20 | [[bin]] 21 | name = "compare_to_serde" 22 | path = "fuzz_targets/compare_to_serde.rs" 23 | test = false 24 | doc = false 25 | 26 | [[bin]] 27 | name = "compare_skip" 28 | path = "fuzz_targets/compare_skip.rs" 29 | test = false 30 | doc = false 31 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [workspace] 2 | members = ["crates/jiter", "crates/jiter-python", "crates/fuzz"] 3 | resolver = "2" 4 | 5 | [workspace.package] 6 | authors = ["Samuel Colvin "] 7 | version = "0.12.0" 8 | edition = "2021" 9 | license = "MIT" 10 | keywords = ["JSON", "parsing", "deserialization", "iter"] 11 | categories = ["parser-implementations", "parsing"] 12 | homepage = "https://github.com/pydantic/jiter/" 13 | repository = "https://github.com/pydantic/jiter/" 14 | # MSRV should match pydantic-core 15 | rust-version = "1.75" 16 | 17 | [profile.bench] 18 | debug = true 19 | lto = true 20 | opt-level = 3 21 | codegen-units = 1 22 | 23 | [profile.profiling] 24 | inherits = "release" 25 | debug = true 26 | 27 | [workspace.dependencies] 28 | pyo3 = { version = "0.27" } 29 | pyo3-build-config = { version = "0.27" } 30 | -------------------------------------------------------------------------------- /crates/jiter-python/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "jiter", 3 | "version": "1.0.0", 4 | "description": "for running wasm tests.", 5 | "author": "Samuel Colvin", 6 | "license": "MIT", 7 | "homepage": "https://github.com/pydantic/jiter#readme", 8 | "main": "tests/emscripten_runner.js", 9 | "dependencies": { 10 | "prettier": "^2.7.1", 11 | "pyodide": "^0.26.3" 12 | }, 13 | "scripts": { 14 | "test": "node tests/emscripten_runner.js", 15 | "format": "prettier --write 'tests/emscripten_runner.js' 'wasm-preview/*.{html,js}'", 16 | "lint": "prettier --check 'tests/emscripten_runner.js' 'wasm-preview/*.{html,js}'" 17 | }, 18 | "prettier": { 19 | "singleQuote": true, 20 | "trailingComma": "all", 21 | "tabWidth": 2, 22 | "printWidth": 119, 23 | "bracketSpacing": false, 24 | "arrowParens": "avoid" 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /crates/jiter-python/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "jiter-python" 3 | version = {workspace = true} 4 | edition = {workspace = true} 5 | authors = {workspace = true} 6 | license = {workspace = true} 7 | keywords = {workspace = true} 8 | categories = {workspace = true} 9 | homepage = {workspace = true} 10 | repository = {workspace = true} 11 | rust-version = { workspace = true } 12 | 13 | [dependencies] 14 | pyo3 = { workspace = true, features = ["num-bigint"] } 15 | jiter = { path = "../jiter", features = ["python", "num-bigint"] } 16 | 17 | [features] 18 | # make extensions visible to cargo vendor 19 | extension-module = ["pyo3/generate-import-lib"] 20 | 21 | [lib] 22 | name = "jiter_python" 23 | crate-type = ["cdylib", "rlib"] 24 | 25 | [lints.clippy] 26 | dbg_macro = "deny" 27 | print_stdout = "deny" 28 | print_stderr = "deny" 29 | # in general we lint against the pedantic group, but we will whitelist 30 | # certain lints which we don't want to enforce (for now) 31 | pedantic = { level = "deny", priority = -1 } 32 | missing_errors_doc = "allow" 33 | must_use_candidate = "allow" 34 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | fail_fast: true 2 | 3 | repos: 4 | - repo: https://github.com/pre-commit/pre-commit-hooks 5 | rev: v4.0.1 6 | hooks: 7 | - id: check-yaml 8 | - id: check-toml 9 | - id: end-of-file-fixer 10 | exclude: 'benches/.*\.json' 11 | - id: trailing-whitespace 12 | - id: check-added-large-files 13 | 14 | - repo: local 15 | hooks: 16 | - id: format-check 17 | name: Format Check 18 | entry: cargo fmt 19 | types: [rust] 20 | language: system 21 | pass_filenames: false 22 | - id: clippy 23 | name: Clippy 24 | entry: cargo clippy -F python --workspace --all-targets -- -D warnings 25 | types: [rust] 26 | language: system 27 | pass_filenames: false 28 | - id: check-without-num-bigint 29 | name: Check without num-bigint feature 30 | entry: cargo check --no-default-features --package jiter 31 | types: [rust] 32 | language: system 33 | pass_filenames: false 34 | - id: test 35 | name: Test 36 | entry: cargo test --test main 37 | types: [rust] 38 | language: system 39 | pass_filenames: false 40 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2022 to present Samuel Colvin 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .DEFAULT_GOAL := all 2 | 3 | .PHONY: format 4 | format: 5 | @cargo fmt --version 6 | cargo fmt 7 | 8 | .PHONY: lint 9 | lint: 10 | @cargo clippy --version 11 | cargo clippy -- -D warnings 12 | cargo doc 13 | 14 | .PHONY: test 15 | test: 16 | cargo test 17 | 18 | .PHONY: python-install 19 | python-install: 20 | pip install -U maturin ujson orjson 21 | pip install -r crates/jiter-python/tests/requirements.txt 22 | 23 | .PHONY: python-dev 24 | python-dev: 25 | maturin develop -m crates/jiter-python/Cargo.toml 26 | 27 | .PHONY: python-test 28 | python-test: python-dev 29 | pytest crates/jiter-python/tests 30 | 31 | .PHONY: python-dev-release 32 | python-dev-release: 33 | maturin develop -m crates/jiter-python/Cargo.toml --release 34 | 35 | .PHONY: python-bench 36 | python-bench: python-dev-release 37 | python crates/jiter-python/bench.py 38 | 39 | .PHONY: bench 40 | bench: 41 | cargo bench -p jiter -F python 42 | 43 | .PHONY: fuzz 44 | fuzz: 45 | cargo +nightly fuzz run --fuzz-dir crates/fuzz compare_to_serde --release 46 | 47 | .PHONY: fuzz-skip 48 | fuzz-skip: 49 | cargo +nightly fuzz run --fuzz-dir crates/fuzz compare_skip --release 50 | 51 | .PHONY: all 52 | all: format lint test test-python 53 | -------------------------------------------------------------------------------- /crates/fuzz/fuzz_targets/compare_skip.rs: -------------------------------------------------------------------------------- 1 | #![no_main] 2 | 3 | use jiter::{Jiter, JiterError, JiterErrorType, JsonError, JsonValue}; 4 | 5 | use libfuzzer_sys::fuzz_target; 6 | fn errors_equal(value_error: &JsonError, jiter_error: &JiterError) { 7 | let jiter_error_type = match &jiter_error.error_type { 8 | JiterErrorType::JsonError(json_error_type) => json_error_type, 9 | JiterErrorType::WrongType { .. } => panic!("Expected JsonError, found WrongType"), 10 | }; 11 | assert_eq!(&value_error.error_type, jiter_error_type); 12 | assert_eq!(value_error.index, jiter_error.index); 13 | } 14 | 15 | fuzz_target!(|json: String| { 16 | let json_data = json.as_bytes(); 17 | match JsonValue::parse(json_data, false) { 18 | Ok(_) => { 19 | let mut jiter = Jiter::new(json_data); 20 | jiter.next_skip().unwrap(); 21 | jiter.finish().unwrap(); 22 | } 23 | Err(json_error) => { 24 | let mut jiter = Jiter::new(json_data); 25 | let jiter_error = match jiter.next_skip() { 26 | Ok(_) => jiter.finish().unwrap_err(), 27 | Err(e) => e, 28 | }; 29 | errors_equal(&json_error, &jiter_error); 30 | } 31 | }; 32 | }); 33 | -------------------------------------------------------------------------------- /crates/jiter/benches/true_object.json: -------------------------------------------------------------------------------- 1 | {"k_0": true, "k_1": true, "k_2": true, "k_3": true, "k_4": true, "k_5": true, "k_6": true, "k_7": true, "k_8": true, "k_9": true, "k_10": true, "k_11": true, "k_12": true, "k_13": true, "k_14": true, "k_15": true, "k_16": true, "k_17": true, "k_18": true, "k_19": true, "k_20": true, "k_21": true, "k_22": true, "k_23": true, "k_24": true, "k_25": true, "k_26": true, "k_27": true, "k_28": true, "k_29": true, "k_30": true, "k_31": true, "k_32": true, "k_33": true, "k_34": true, "k_35": true, "k_36": true, "k_37": true, "k_38": true, "k_39": true, "k_40": true, "k_41": true, "k_42": true, "k_43": true, "k_44": true, "k_45": true, "k_46": true, "k_47": true, "k_48": true, "k_49": true, "k_50": true, "k_51": true, "k_52": true, "k_53": true, "k_54": true, "k_55": true, "k_56": true, "k_57": true, "k_58": true, "k_59": true, "k_60": true, "k_61": true, "k_62": true, "k_63": true, "k_64": true, "k_65": true, "k_66": true, "k_67": true, "k_68": true, "k_69": true, "k_70": true, "k_71": true, "k_72": true, "k_73": true, "k_74": true, "k_75": true, "k_76": true, "k_77": true, "k_78": true, "k_79": true, "k_80": true, "k_81": true, "k_82": true, "k_83": true, "k_84": true, "k_85": true, "k_86": true, "k_87": true, "k_88": true, "k_89": true, "k_90": true, "k_91": true, "k_92": true, "k_93": true, "k_94": true, "k_95": true, "k_96": true, "k_97": true, "k_98": true, "k_99": true} -------------------------------------------------------------------------------- /crates/jiter-python/pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["maturin>=1.9.4,<2"] 3 | build-backend = "maturin" 4 | 5 | [project] 6 | name = "jiter" 7 | description = "Fast iterable JSON parser." 8 | requires-python = ">=3.9" 9 | authors = [ 10 | {name = "Samuel Colvin", email = "s@muelcolvin.com"} 11 | ] 12 | license = {file = "LICENSE"} 13 | readme = "README.md" 14 | classifiers = [ 15 | "Development Status :: 4 - Beta", 16 | "Programming Language :: Python", 17 | "Programming Language :: Python :: 3", 18 | "Programming Language :: Python :: 3 :: Only", 19 | "Programming Language :: Python :: 3.9", 20 | "Programming Language :: Python :: 3.10", 21 | "Programming Language :: Python :: 3.11", 22 | "Programming Language :: Python :: 3.12", 23 | "Programming Language :: Python :: 3.13", 24 | "Programming Language :: Python :: 3.14", 25 | "Programming Language :: Python :: Implementation :: CPython", 26 | "Programming Language :: Python :: Implementation :: GraalPy", 27 | "Intended Audience :: Developers", 28 | "Intended Audience :: Information Technology", 29 | "Intended Audience :: System Administrators", 30 | "License :: OSI Approved :: MIT License", 31 | "Operating System :: Unix", 32 | "Operating System :: POSIX :: Linux", 33 | "Environment :: Console", 34 | "Environment :: MacOS X", 35 | "Topic :: File Formats :: JSON", 36 | "Framework :: Pydantic :: 2", 37 | ] 38 | dynamic = ["version"] 39 | 40 | [tool.maturin] 41 | module-name = "jiter" 42 | bindings = "pyo3" 43 | features = ["pyo3/generate-import-lib"] 44 | 45 | [tool.ruff] 46 | target-version = "py39" 47 | 48 | [tool.ruff.format] 49 | quote-style = "single" 50 | -------------------------------------------------------------------------------- /crates/jiter/benches/pass1.json: -------------------------------------------------------------------------------- 1 | [ 2 | "JSON Test Pattern pass1", 3 | {"object with 1 member":["array with 1 element"]}, 4 | {}, 5 | [], 6 | -42, 7 | true, 8 | false, 9 | null, 10 | { 11 | "integer": 1234567890, 12 | "real": -9876.543210, 13 | "e": 0.123456789e-12, 14 | "E": 1.234567890E+34, 15 | "": 23456789012E66, 16 | "zero": 0, 17 | "one": 1, 18 | "space": " ", 19 | "quote": "\"", 20 | "backslash": "\\", 21 | "controls": "\b\f\n\r\t", 22 | "slash": "/ & \/", 23 | "alpha": "abcdefghijklmnopqrstuvwyz", 24 | "ALPHA": "ABCDEFGHIJKLMNOPQRSTUVWYZ", 25 | "digit": "0123456789", 26 | "0123456789": "digit", 27 | "special": "`1~!@#$%^&*()_+-={':[,]}|;.?", 28 | "hex": "\u0123\u4567\u89AB\uCDEF\uabcd\uef4A", 29 | "true": true, 30 | "false": false, 31 | "null": null, 32 | "array":[ ], 33 | "object":{ }, 34 | "address": "50 St. James Street", 35 | "url": "http://www.JSON.org/", 36 | "comment": "// /* */": " ", 38 | " s p a c e d " :[1,2 , 3 39 | 40 | , 41 | 42 | 4 , 5 , 6 ,7 ],"compact":[1,2,3,4,5,6,7], 43 | "jsontext": "{\"object with 1 member\":[\"array with 1 element\"]}", 44 | "quotes": "" \u0022 %22 0x22 034 "", 45 | "\/\\\"\uCAFE\uBABE\uAB98\uFCDE\ubcda\uef4A\b\f\n\r\t`1~!@#$%^&*()_+-=[]{}|;:',./<>?" 46 | : "A key can be any string" 47 | }, 48 | 0.5 ,98.6 49 | , 50 | 99.44 51 | , 52 | 53 | 1066, 54 | 1e1, 55 | 0.1e1, 56 | 1e-1, 57 | 1e00,2e+00,2e-00 58 | ,"rosebud"] -------------------------------------------------------------------------------- /crates/jiter-python/tests/emscripten_runner.js: -------------------------------------------------------------------------------- 1 | const {opendir} = require('node:fs/promises'); 2 | const {loadPyodide} = require('pyodide'); 3 | const path = require('path'); 4 | 5 | async function find_wheel(dist_dir) { 6 | const dir = await opendir(dist_dir); 7 | for await (const dirent of dir) { 8 | if (dirent.name.endsWith('.whl')) { 9 | return path.join(dist_dir, dirent.name); 10 | } 11 | } 12 | } 13 | 14 | async function main() { 15 | const root_dir = path.resolve(__dirname, '..'); 16 | const wheel_path = await find_wheel(path.join(root_dir, 'dist')); 17 | const stdout = [] 18 | const stderr = [] 19 | let errcode = 1; 20 | try { 21 | const pyodide = await loadPyodide({ 22 | stdout: (msg) => { 23 | stdout.push(msg) 24 | }, 25 | stderr: (msg) => { 26 | stderr.push(msg) 27 | } 28 | }); 29 | const FS = pyodide.FS; 30 | FS.mkdir('/test_dir'); 31 | FS.mount(FS.filesystems.NODEFS, {root: path.join(root_dir, 'tests')}, '/test_dir'); 32 | FS.chdir('/test_dir'); 33 | 34 | // mount jiter crate source for benchmark data 35 | FS.mkdir('/jiter'); 36 | FS.mount(FS.filesystems.NODEFS, {root: path.resolve(root_dir, "..", "jiter")}, '/jiter'); 37 | 38 | await pyodide.loadPackage(['micropip', 'pytest']); 39 | // language=python 40 | errcode = await pyodide.runPythonAsync(` 41 | import micropip 42 | import importlib 43 | 44 | # ugly hack to get tests to work on arm64 (my m1 mac) 45 | # see https://github.com/pyodide/pyodide/issues/2840 46 | # import sys; sys.setrecursionlimit(200) 47 | 48 | await micropip.install([ 49 | 'dirty_equals', 50 | 'file:${wheel_path}' 51 | ]) 52 | importlib.invalidate_caches() 53 | 54 | print('installed packages:', micropip.list()) 55 | 56 | import pytest 57 | pytest.main() 58 | `); 59 | } catch (e) { 60 | console.error(e); 61 | process.exit(1); 62 | } 63 | let out = stdout.join('\n') 64 | let err = stderr.join('\n') 65 | console.log('stdout:\n', out) 66 | console.log('stderr:\n', err) 67 | 68 | process.exit(errcode); 69 | } 70 | 71 | main(); 72 | -------------------------------------------------------------------------------- /crates/jiter/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "jiter" 3 | description = "Fast Iterable JSON parser" 4 | readme = "../../README.md" 5 | version = { workspace = true } 6 | edition = { workspace = true } 7 | authors = { workspace = true } 8 | license = { workspace = true } 9 | keywords = { workspace = true } 10 | categories = { workspace = true } 11 | homepage = { workspace = true } 12 | repository = { workspace = true } 13 | rust-version = { workspace = true } 14 | 15 | [dependencies] 16 | num-bigint = { version = "0.4.4", optional = true } 17 | num-traits = "0.2.16" 18 | ahash = "0.8.0" 19 | smallvec = "1.11.0" 20 | pyo3 = { workspace = true, optional = true } 21 | lexical-parse-float = { version = "1.0.5", features = ["format"] } 22 | bitvec = "1.0.1" 23 | 24 | [features] 25 | default = ["num-bigint"] 26 | python = ["dep:pyo3", "dep:pyo3-build-config"] 27 | num-bigint = ["dep:num-bigint", "pyo3?/num-bigint"] 28 | 29 | [dev-dependencies] 30 | paste = "1.0.7" 31 | serde_json = { version = "1.0.87", features = [ 32 | "preserve_order", 33 | "arbitrary_precision", 34 | "float_roundtrip", 35 | ] } 36 | serde = "1.0.147" 37 | pyo3 = { workspace = true, features = ["auto-initialize"] } 38 | codspeed-criterion-compat = "2.7.2" 39 | 40 | [build-dependencies] 41 | pyo3-build-config = { workspace = true, optional = true } 42 | 43 | [[test]] 44 | name = "python" 45 | required-features = ["python"] 46 | 47 | [[bench]] 48 | name = "main" 49 | harness = false 50 | 51 | [[bench]] 52 | name = "python" 53 | required-features = ["python"] 54 | harness = false 55 | 56 | # get docs.rs to include python docs 57 | [package.metadata.docs.rs] 58 | all-features = true 59 | 60 | [lints.clippy] 61 | dbg_macro = "deny" 62 | print_stdout = "deny" 63 | print_stderr = "deny" 64 | # in general we lint against the pedantic group, but we will whitelist 65 | # certain lints which we don't want to enforce (for now) 66 | pedantic = { level = "deny", priority = -1 } 67 | missing_errors_doc = "allow" 68 | module_name_repetitions = "allow" 69 | must_use_candidate = "allow" 70 | if_not_else = "allow" 71 | cast_lossless = "allow" 72 | cast_possible_wrap = "allow" 73 | cast_possible_truncation = "allow" 74 | cast_precision_loss = "allow" 75 | match_bool = "allow" 76 | doc_markdown = "allow" 77 | implicit_clone = "allow" 78 | iter_without_into_iter = "allow" 79 | return_self_not_must_use = "allow" 80 | inline_always = "allow" # TODO remove? 81 | match_same_arms = "allow" # TODO remove? 82 | -------------------------------------------------------------------------------- /crates/jiter-python/src/lib.rs: -------------------------------------------------------------------------------- 1 | use std::sync::OnceLock; 2 | 3 | pub fn get_jiter_version() -> &'static str { 4 | static JITER_VERSION: OnceLock = OnceLock::new(); 5 | 6 | JITER_VERSION.get_or_init(|| { 7 | let version = env!("CARGO_PKG_VERSION"); 8 | // cargo uses "1.0-alpha1" etc. while python uses "1.0.0a1", this is not full compatibility, 9 | // but it's good enough for now 10 | // see https://docs.rs/semver/1.0.9/semver/struct.Version.html#method.parse for rust spec 11 | // see https://peps.python.org/pep-0440/ for python spec 12 | // it seems the dot after "alpha/beta" e.g. "-alpha.1" is not necessary, hence why this works 13 | version.replace("-alpha", "a").replace("-beta", "b") 14 | }) 15 | } 16 | 17 | #[pyo3::pymodule(gil_used = false)] 18 | #[pyo3(name = "jiter")] 19 | mod jiter_python { 20 | use pyo3::prelude::*; 21 | 22 | use jiter::{map_json_error, FloatMode, LosslessFloat, PartialMode, PythonParse, StringCacheMode}; 23 | 24 | use super::get_jiter_version; 25 | 26 | #[allow(clippy::fn_params_excessive_bools)] 27 | #[pyfunction( 28 | signature = ( 29 | json_data, 30 | /, 31 | *, 32 | allow_inf_nan=true, 33 | cache_mode=StringCacheMode::All, 34 | partial_mode=PartialMode::Off, 35 | catch_duplicate_keys=false, 36 | float_mode=FloatMode::Float, 37 | ) 38 | )] 39 | pub fn from_json<'py>( 40 | py: Python<'py>, 41 | json_data: &[u8], 42 | allow_inf_nan: bool, 43 | cache_mode: StringCacheMode, 44 | partial_mode: PartialMode, 45 | catch_duplicate_keys: bool, 46 | float_mode: FloatMode, 47 | ) -> PyResult> { 48 | let parse_builder = PythonParse { 49 | allow_inf_nan, 50 | cache_mode, 51 | partial_mode, 52 | catch_duplicate_keys, 53 | float_mode, 54 | }; 55 | parse_builder 56 | .python_parse(py, json_data) 57 | .map_err(|e| map_json_error(json_data, &e)) 58 | } 59 | 60 | #[pyfunction] 61 | pub fn cache_clear() { 62 | jiter::cache_clear(); 63 | } 64 | 65 | #[pyfunction] 66 | pub fn cache_usage() -> usize { 67 | jiter::cache_usage() 68 | } 69 | 70 | #[pymodule_init] 71 | fn init_jiter_python(m: &Bound<'_, PyModule>) -> PyResult<()> { 72 | m.add("__version__", get_jiter_version())?; 73 | m.add_class::()?; 74 | Ok(()) 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /crates/jiter-python/jiter.pyi: -------------------------------------------------------------------------------- 1 | import decimal 2 | from typing import Any, Literal 3 | 4 | def from_json( 5 | json_data: bytes, 6 | /, 7 | *, 8 | allow_inf_nan: bool = True, 9 | cache_mode: Literal[True, False, "all", "keys", "none"] = "all", 10 | partial_mode: Literal[True, False, "off", "on", "trailing-strings"] = False, 11 | catch_duplicate_keys: bool = False, 12 | float_mode: Literal["float", "decimal", "lossless-float"] = "float", 13 | ) -> Any: 14 | """ 15 | Parse input bytes into a JSON object. 16 | 17 | Arguments: 18 | json_data: The JSON data to parse 19 | allow_inf_nan: Whether to allow infinity (`Infinity` an `-Infinity`) and `NaN` values to float fields. 20 | Defaults to True. 21 | cache_mode: cache Python strings to improve performance at the cost of some memory usage 22 | - True / 'all' - cache all strings 23 | - 'keys' - cache only object keys 24 | - False / 'none' - cache nothing 25 | partial_mode: How to handle incomplete strings: 26 | - False / 'off' - raise an exception if the input is incomplete 27 | - True / 'on' - allow incomplete JSON but discard the last string if it is incomplete 28 | - 'trailing-strings' - allow incomplete JSON, and include the last incomplete string in the output 29 | catch_duplicate_keys: if True, raise an exception if objects contain the same key multiple times 30 | float_mode: How to return floats: as a `float`, `Decimal` or `LosslessFloat` 31 | 32 | Returns: 33 | Python object built from the JSON input. 34 | """ 35 | 36 | def cache_clear() -> None: 37 | """ 38 | Reset the string cache. 39 | """ 40 | 41 | def cache_usage() -> int: 42 | """ 43 | get the size of the string cache. 44 | 45 | Returns: 46 | Size of the string cache in bytes. 47 | """ 48 | 49 | 50 | class LosslessFloat: 51 | """ 52 | Represents a float from JSON, by holding the underlying bytes representing a float from JSON. 53 | """ 54 | def __init__(self, json_float: bytes): 55 | """Construct a LosslessFloat object from a JSON bytes slice""" 56 | 57 | def as_decimal(self) -> decimal.Decimal: 58 | """Construct a Python Decimal from the JSON bytes slice""" 59 | 60 | def __float__(self) -> float: 61 | """Construct a Python float from the JSON bytes slice""" 62 | 63 | def __bytes__(self) -> bytes: 64 | """Return the JSON bytes slice as bytes""" 65 | 66 | def __str__(self): 67 | """Return the JSON bytes slice as a string""" 68 | 69 | def __repr__(self): 70 | ... 71 | -------------------------------------------------------------------------------- /crates/jiter/benches/medium_response.json: -------------------------------------------------------------------------------- 1 | { 2 | "person": { 3 | "id": "d50887ca-a6ce-4e59-b89f-14f0b5d03b03", 4 | "name": { 5 | "fullName": "Leonid Bugaev", 6 | "givenName": "Leonid", 7 | "familyName": "Bugaev" 8 | }, 9 | "email": "leonsbox@gmail.com", 10 | "gender": "male", 11 | "location": "Saint Petersburg, Saint Petersburg, RU", 12 | "geo": { 13 | "city": "Saint Petersburg", 14 | "state": "Saint Petersburg", 15 | "country": "Russia", 16 | "lat": 59.9342802, 17 | "lng": 30.3350986 18 | }, 19 | "bio": "Senior engineer at Granify.com", 20 | "site": "http://flickfaver.com", 21 | "avatar": "https://d1ts43dypk8bqh.cloudfront.net/v1/avatars/d50887ca-a6ce-4e59-b89f-14f0b5d03b03", 22 | "employment": { 23 | "name": "www.latera.ru", 24 | "title": "Software Engineer", 25 | "domain": "gmail.com" 26 | }, 27 | "facebook": { 28 | "handle": "leonid.bugaev" 29 | }, 30 | "github": { 31 | "handle": "buger", 32 | "id": 14009, 33 | "avatar": "https://avatars.githubusercontent.com/u/14009?v=3", 34 | "company": "Granify", 35 | "blog": "http://leonsbox.com", 36 | "followers": 95, 37 | "following": 10 38 | }, 39 | "twitter": { 40 | "handle": "flickfaver", 41 | "id": 77004410, 42 | "bio": null, 43 | "followers": 2, 44 | "following": 1, 45 | "statuses": 5, 46 | "favorites": 0, 47 | "location": "", 48 | "site": "http://flickfaver.com", 49 | "avatar": null 50 | }, 51 | "linkedin": { 52 | "handle": "in/leonidbugaev" 53 | }, 54 | "googleplus": { 55 | "handle": null 56 | }, 57 | "angellist": { 58 | "handle": "leonid-bugaev", 59 | "id": 61541, 60 | "bio": "Senior engineer at Granify.com", 61 | "blog": "http://buger.github.com", 62 | "site": "http://buger.github.com", 63 | "followers": 41, 64 | "avatar": "https://d1qb2nb5cznatu.cloudfront.net/users/61541-medium_jpg?1405474390" 65 | }, 66 | "klout": { 67 | "handle": null, 68 | "score": null 69 | }, 70 | "foursquare": { 71 | "handle": null 72 | }, 73 | "aboutme": { 74 | "handle": "leonid.bugaev", 75 | "bio": null, 76 | "avatar": null 77 | }, 78 | "gravatar": { 79 | "handle": "buger", 80 | "urls": [ 81 | ], 82 | "avatar": "http://1.gravatar.com/avatar/f7c8edd577d13b8930d5522f28123510", 83 | "avatars": [ 84 | { 85 | "url": "http://1.gravatar.com/avatar/f7c8edd577d13b8930d5522f28123510", 86 | "type": "thumbnail" 87 | } 88 | ] 89 | }, 90 | "fuzzy": false 91 | }, 92 | "company": null 93 | } 94 | -------------------------------------------------------------------------------- /crates/jiter/src/py_lossless_float.rs: -------------------------------------------------------------------------------- 1 | use pyo3::exceptions::{PyTypeError, PyValueError}; 2 | use pyo3::prelude::*; 3 | use pyo3::sync::PyOnceLock; 4 | use pyo3::types::PyType; 5 | 6 | use crate::Jiter; 7 | 8 | #[derive(Debug, Clone, Copy, Default)] 9 | pub enum FloatMode { 10 | #[default] 11 | Float, 12 | Decimal, 13 | LosslessFloat, 14 | } 15 | 16 | const FLOAT_ERROR: &str = "Invalid float mode, should be `'float'`, `'decimal'` or `'lossless-float'`"; 17 | 18 | impl<'py> FromPyObject<'_, 'py> for FloatMode { 19 | type Error = PyErr; 20 | fn extract(ob: Borrowed<'_, 'py, PyAny>) -> PyResult { 21 | if let Ok(str_mode) = ob.extract::<&str>() { 22 | match str_mode { 23 | "float" => Ok(Self::Float), 24 | "decimal" => Ok(Self::Decimal), 25 | "lossless-float" => Ok(Self::LosslessFloat), 26 | _ => Err(PyValueError::new_err(FLOAT_ERROR)), 27 | } 28 | } else { 29 | Err(PyTypeError::new_err(FLOAT_ERROR)) 30 | } 31 | } 32 | } 33 | 34 | /// Represents a float from JSON, by holding the underlying bytes representing a float from JSON. 35 | #[derive(Debug, Clone)] 36 | #[pyclass(module = "jiter")] 37 | pub struct LosslessFloat(Vec); 38 | 39 | impl LosslessFloat { 40 | pub fn new_unchecked(raw: Vec) -> Self { 41 | Self(raw) 42 | } 43 | } 44 | 45 | #[pymethods] 46 | impl LosslessFloat { 47 | #[new] 48 | fn new(raw: Vec) -> PyResult { 49 | let s = Self(raw); 50 | // check the string is valid by calling `as_float` 51 | s.__float__()?; 52 | Ok(s) 53 | } 54 | 55 | fn as_decimal<'py>(&self, py: Python<'py>) -> PyResult> { 56 | let decimal = get_decimal_type(py)?; 57 | let float_str = self.__str__()?; 58 | decimal.call1((float_str,)) 59 | } 60 | 61 | fn __float__(&self) -> PyResult { 62 | let bytes = &self.0; 63 | let mut jiter = Jiter::new(bytes).with_allow_inf_nan(); 64 | let f = jiter 65 | .next_float() 66 | .map_err(|e| PyValueError::new_err(e.description(&jiter)))?; 67 | jiter 68 | .finish() 69 | .map_err(|e| PyValueError::new_err(e.description(&jiter)))?; 70 | Ok(f) 71 | } 72 | 73 | fn __bytes__(&self) -> &[u8] { 74 | &self.0 75 | } 76 | 77 | fn __str__(&self) -> PyResult<&str> { 78 | std::str::from_utf8(&self.0).map_err(|_| PyValueError::new_err("Invalid UTF-8")) 79 | } 80 | 81 | fn __repr__(&self) -> PyResult { 82 | self.__str__().map(|s| format!("LosslessFloat({s})")) 83 | } 84 | } 85 | 86 | static DECIMAL_TYPE: PyOnceLock> = PyOnceLock::new(); 87 | 88 | pub fn get_decimal_type(py: Python<'_>) -> PyResult<&Bound<'_, PyType>> { 89 | DECIMAL_TYPE.import(py, "decimal", "Decimal") 90 | } 91 | -------------------------------------------------------------------------------- /.github/actions/build-pgo-wheel/action.yml: -------------------------------------------------------------------------------- 1 | name: Build PGO wheel 2 | description: Builds a PGO-optimized wheel 3 | inputs: 4 | interpreter: 5 | description: 'Interpreter to build the wheel for' 6 | required: true 7 | rust-toolchain: 8 | description: 'Rust toolchain to use' 9 | required: true 10 | outputs: 11 | wheel: 12 | description: 'Path to the built wheel' 13 | value: ${{ steps.find_wheel.outputs.path }} 14 | runs: 15 | using: "composite" 16 | steps: 17 | - name: prepare profiling directory 18 | shell: bash 19 | # making this ahead of the compile ensures that the local user can write to this 20 | # directory; the maturin action (on linux) runs in docker so would create as root 21 | run: mkdir -p ${{ github.workspace }}/profdata 22 | 23 | - name: build initial wheel 24 | uses: PyO3/maturin-action@v1 25 | with: 26 | manylinux: auto 27 | args: > 28 | --release 29 | --out pgo-wheel 30 | --interpreter ${{ inputs.interpreter }} 31 | rust-toolchain: ${{ inputs.rust-toolchain }} 32 | docker-options: -e CI 33 | working-directory: crates/jiter-python 34 | env: 35 | RUSTFLAGS: '-Cprofile-generate=${{ github.workspace }}/profdata' 36 | 37 | - name: detect rust host 38 | run: echo RUST_HOST=$(rustc -Vv | grep host | cut -d ' ' -f 2) >> "$GITHUB_ENV" 39 | shell: bash 40 | 41 | - name: generate pgo data 42 | run: | 43 | python -m pip install -U pip 44 | python -m pip install -r tests/requirements.txt 45 | python -m pip install jiter --no-index --no-deps --find-links pgo-wheel --force-reinstall 46 | python bench.py jiter jiter-cache 47 | RUST_HOST=$(rustc -Vv | grep host | cut -d ' ' -f 2) 48 | rustup run ${{ inputs.rust-toolchain }} bash -c 'echo LLVM_PROFDATA=$RUSTUP_HOME/toolchains/$RUSTUP_TOOLCHAIN/lib/rustlib/$RUST_HOST/bin/llvm-profdata >> "$GITHUB_ENV"' 49 | shell: bash 50 | working-directory: crates/jiter-python 51 | 52 | - name: merge pgo data 53 | run: ${{ env.LLVM_PROFDATA }} merge -o ${{ github.workspace }}/merged.profdata ${{ github.workspace }}/profdata 54 | shell: pwsh # because it handles paths on windows better, and works well enough on unix for this step 55 | 56 | - name: build pgo-optimized wheel 57 | uses: PyO3/maturin-action@v1 58 | with: 59 | manylinux: auto 60 | args: > 61 | --release 62 | --out dist 63 | --interpreter ${{ inputs.interpreter }} 64 | rust-toolchain: ${{inputs.rust-toolchain}} 65 | docker-options: -e CI 66 | working-directory: crates/jiter-python 67 | env: 68 | RUSTFLAGS: '-Cprofile-use=${{ github.workspace }}/merged.profdata' 69 | 70 | - name: find built wheel 71 | id: find_wheel 72 | run: echo "path=$(ls dist/*.whl)" | tee -a "$GITHUB_OUTPUT" 73 | shell: bash 74 | working-directory: crates/jiter-python 75 | -------------------------------------------------------------------------------- /crates/jiter/tests/python.rs: -------------------------------------------------------------------------------- 1 | use pyo3::prelude::*; 2 | use pyo3::types::PyString; 3 | 4 | use jiter::{pystring_ascii_new, JsonValue, PythonParse, StringCacheMode}; 5 | 6 | #[cfg(feature = "num-bigint")] 7 | #[test] 8 | fn test_to_py_object_numeric() { 9 | let value = JsonValue::parse( 10 | br#" { "int": 1, "bigint": 123456789012345678901234567890, "float": 1.2} "#, 11 | false, 12 | ) 13 | .unwrap(); 14 | Python::attach(|py| { 15 | let python_value = value.into_pyobject(py).unwrap(); 16 | let string = python_value.to_string(); 17 | assert_eq!( 18 | string, 19 | "{'int': 1, 'bigint': 123456789012345678901234567890, 'float': 1.2}" 20 | ); 21 | }); 22 | } 23 | 24 | #[test] 25 | fn test_to_py_object_other() { 26 | let value = JsonValue::parse( 27 | br#"["string", "\u00a3", true, false, null, NaN, Infinity, -Infinity]"#, 28 | true, 29 | ) 30 | .unwrap(); 31 | Python::attach(|py| { 32 | let python_value = value.into_pyobject(py).unwrap(); 33 | let string = python_value.to_string(); 34 | assert_eq!(string, "['string', '£', True, False, None, nan, inf, -inf]"); 35 | }); 36 | } 37 | 38 | #[test] 39 | fn test_cache_into() { 40 | Python::attach(|py| { 41 | let c: StringCacheMode = true.into_pyobject(py).unwrap().extract().unwrap(); 42 | assert!(matches!(c, StringCacheMode::All)); 43 | 44 | let c: StringCacheMode = false.into_pyobject(py).unwrap().extract().unwrap(); 45 | assert!(matches!(c, StringCacheMode::None)); 46 | 47 | let c: StringCacheMode = PyString::new(py, "all").extract().unwrap(); 48 | assert!(matches!(c, StringCacheMode::All)); 49 | 50 | let c: StringCacheMode = PyString::new(py, "keys").extract().unwrap(); 51 | assert!(matches!(c, StringCacheMode::Keys)); 52 | 53 | let c: StringCacheMode = PyString::new(py, "none").extract().unwrap(); 54 | assert!(matches!(c, StringCacheMode::None)); 55 | 56 | let e = PyString::new(py, "wrong").extract::().unwrap_err(); 57 | assert_eq!( 58 | e.to_string(), 59 | "ValueError: Invalid string cache mode, should be `'all'`, '`keys`', `'none`' or a `bool`" 60 | ); 61 | let e = 123i32 62 | .into_pyobject(py) 63 | .unwrap() 64 | .extract::() 65 | .unwrap_err(); 66 | assert_eq!( 67 | e.to_string(), 68 | "TypeError: Invalid string cache mode, should be `'all'`, '`keys`', `'none`' or a `bool`" 69 | ); 70 | }); 71 | } 72 | 73 | #[test] 74 | fn test_pystring_ascii_new() { 75 | let json = "100abc"; 76 | Python::attach(|py| { 77 | let s = unsafe { pystring_ascii_new(py, json) }; 78 | assert_eq!(s.to_string(), "100abc"); 79 | }); 80 | } 81 | 82 | #[test] 83 | fn test_python_parse_default() { 84 | Python::attach(|py| { 85 | let v = PythonParse::default().python_parse(py, b"[123]").unwrap(); 86 | assert_eq!(v.to_string(), "[123]"); 87 | }); 88 | } 89 | -------------------------------------------------------------------------------- /crates/jiter-python/bench.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import timeit 4 | from pathlib import Path 5 | 6 | import json 7 | 8 | CASES = { 9 | "array_short_strings": "[{}]".format(", ".join('"123"' for _ in range(100_000))), 10 | "object_short_strings": "{%s}" % ", ".join(f'"{i}": "{i}x"' for i in range(100_000)), 11 | "array_short_arrays": "[{}]".format(", ".join('["a", "b", "c", "d"]' for _ in range(10_000))), 12 | "one_long_string": json.dumps("x" * 100), 13 | "one_short_string": b'"foobar"', 14 | "1m_strings": json.dumps([str(i) for i in range(1_000_000)]), 15 | } 16 | 17 | BENCHES_DIR = Path(__file__).parent.parent / "jiter/benches/" 18 | 19 | for p in BENCHES_DIR.glob('*.json'): 20 | CASES[p.stem] = p.read_bytes() 21 | 22 | 23 | def run_bench(func, d, fast: bool): 24 | if isinstance(d, str): 25 | d = d.encode() 26 | timer = timeit.Timer( 27 | "func(json_data)", setup="", globals={"func": func, "json_data": d} 28 | ) 29 | if fast: 30 | return timer.timeit(1) 31 | else: 32 | n, t = timer.autorange() 33 | iter_time = t / n 34 | # print(f'{func.__module__}.{func.__name__}', iter_time) 35 | return iter_time 36 | 37 | 38 | def setup_orjson(): 39 | import orjson 40 | 41 | return lambda data: orjson.loads(data) 42 | 43 | 44 | def setup_jiter_cache(): 45 | import jiter 46 | 47 | return lambda data: jiter.from_json(data, cache_mode=True) 48 | 49 | 50 | def setup_jiter(): 51 | import jiter 52 | 53 | return lambda data: jiter.from_json(data, cache_mode=False) 54 | 55 | 56 | def setup_ujson(): 57 | import ujson 58 | 59 | return lambda data: ujson.loads(data) 60 | 61 | 62 | def setup_json(): 63 | import json 64 | 65 | return lambda data: json.loads(data) 66 | 67 | 68 | PARSERS = { 69 | "orjson": setup_orjson, 70 | "jiter-cache": setup_jiter_cache, 71 | "jiter": setup_jiter, 72 | "ujson": setup_ujson, 73 | "json": setup_json, 74 | } 75 | 76 | 77 | def main(): 78 | parser = argparse.ArgumentParser() 79 | parser.add_argument("--case", default="all", choices=[*CASES.keys(), "all"]) 80 | parser.add_argument("--fast", action="store_true", default=False) 81 | parser.add_argument( 82 | "parsers", nargs="*", default="all", choices=[*PARSERS.keys(), "all"] 83 | ) 84 | args = parser.parse_args() 85 | 86 | parsers = [*PARSERS.keys()] if "all" in args.parsers else args.parsers 87 | cases = [*CASES.keys()] if args.case == "all" else [args.case] 88 | 89 | for name in cases: 90 | print(f"Case: {name}") 91 | 92 | json_data = CASES[name] 93 | times = [(parser, run_bench(PARSERS[parser](), json_data, args.fast)) for parser in parsers] 94 | 95 | times.sort(key=lambda x: x[1]) 96 | best = times[0][1] 97 | 98 | print(f'{"package":>12} | {"time µs":>10} | slowdown') 99 | print(f'{"-" * 13}|{"-" * 12}|{"-" * 9}') 100 | for name, time in times: 101 | print(f"{name:>12} | {time * 1_000_000:10.2f} | {time / best:8.2f}") 102 | print("") 103 | 104 | 105 | if __name__ == "__main__": 106 | main() 107 | -------------------------------------------------------------------------------- /crates/jiter-python/README.md: -------------------------------------------------------------------------------- 1 | # jiter 2 | 3 | [![CI](https://github.com/pydantic/jiter/workflows/CI/badge.svg?event=push)](https://github.com/pydantic/jiter/actions?query=event%3Apush+branch%3Amain+workflow%3ACI) 4 | [![pypi](https://img.shields.io/pypi/v/jiter.svg)](https://pypi.python.org/pypi/jiter) 5 | [![versions](https://img.shields.io/pypi/pyversions/jiter.svg)](https://github.com/pydantic/jiter) 6 | [![license](https://img.shields.io/github/license/pydantic/jiter.svg)](https://github.com/pydantic/jiter/blob/main/LICENSE) 7 | 8 | This is a standalone version of the JSON parser used in `pydantic-core`. The recommendation is to only use this package directly if you do not use `pydantic`. 9 | 10 | The API is extremely minimal: 11 | 12 | ```python 13 | def from_json( 14 | json_data: bytes, 15 | /, 16 | *, 17 | allow_inf_nan: bool = True, 18 | cache_mode: Literal[True, False, "all", "keys", "none"] = "all", 19 | partial_mode: Literal[True, False, "off", "on", "trailing-strings"] = False, 20 | catch_duplicate_keys: bool = False, 21 | float_mode: Literal["float", "decimal", "lossless-float"] = "float", 22 | ) -> Any: 23 | """ 24 | Parse input bytes into a JSON object. 25 | 26 | Arguments: 27 | json_data: The JSON data to parse 28 | allow_inf_nan: Whether to allow infinity (`Infinity` an `-Infinity`) and `NaN` values to float fields. 29 | Defaults to True. 30 | cache_mode: cache Python strings to improve performance at the cost of some memory usage 31 | - True / 'all' - cache all strings 32 | - 'keys' - cache only object keys 33 | - False / 'none' - cache nothing 34 | partial_mode: How to handle incomplete strings: 35 | - False / 'off' - raise an exception if the input is incomplete 36 | - True / 'on' - allow incomplete JSON but discard the last string if it is incomplete 37 | - 'trailing-strings' - allow incomplete JSON, and include the last incomplete string in the output 38 | catch_duplicate_keys: if True, raise an exception if objects contain the same key multiple times 39 | float_mode: How to return floats: as a `float`, `Decimal` or `LosslessFloat` 40 | 41 | Returns: 42 | Python object built from the JSON input. 43 | """ 44 | 45 | def cache_clear() -> None: 46 | """ 47 | Reset the string cache. 48 | """ 49 | 50 | def cache_usage() -> int: 51 | """ 52 | get the size of the string cache. 53 | 54 | Returns: 55 | Size of the string cache in bytes. 56 | """ 57 | ``` 58 | ## Examples 59 | 60 | The main function provided by Jiter is `from_json()`, which accepts a bytes object containing JSON and returns a Python dictionary, list or other value. 61 | 62 | ```python 63 | import jiter 64 | 65 | json_data = b'{"name": "John", "age": 30}' 66 | parsed_data = jiter.from_json(json_data) 67 | print(parsed_data) # Output: {'name': 'John', 'age': 30} 68 | ``` 69 | 70 | ### Handling Partial JSON 71 | 72 | Incomplete JSON objects can be parsed using the `partial_mode=` parameter. 73 | 74 | ```python 75 | import jiter 76 | 77 | partial_json = b'{"name": "John", "age": 30, "city": "New Yor' 78 | 79 | # Raise error on incomplete JSON 80 | try: 81 | jiter.from_json(partial_json, partial_mode=False) 82 | except ValueError as e: 83 | print(f"Error: {e}") 84 | 85 | # Parse incomplete JSON, discarding incomplete last field 86 | result = jiter.from_json(partial_json, partial_mode=True) 87 | print(result) # Output: {'name': 'John', 'age': 30} 88 | 89 | # Parse incomplete JSON, including incomplete last field 90 | result = jiter.from_json(partial_json, partial_mode='trailing-strings') 91 | print(result) # Output: {'name': 'John', 'age': 30, 'city': 'New Yor'} 92 | ``` 93 | 94 | ### Catching Duplicate Keys 95 | 96 | The `catch_duplicate_keys=True` option can be used to raise a `ValueError` if an object contains duplicate keys. 97 | 98 | ```python 99 | import jiter 100 | 101 | json_with_dupes = b'{"foo": 1, "foo": 2}' 102 | 103 | # Default behavior (last value wins) 104 | result = jiter.from_json(json_with_dupes) 105 | print(result) # Output: {'foo': 2} 106 | 107 | # Catch duplicate keys 108 | try: 109 | jiter.from_json(json_with_dupes, catch_duplicate_keys=True) 110 | except ValueError as e: 111 | print(f"Error: {e}") 112 | ``` 113 | -------------------------------------------------------------------------------- /crates/jiter/benches/short_numbers.json: -------------------------------------------------------------------------------- 1 | [0, 142, 242, 342, 442, 542, 642, 742, 842, 942, 0, 142, 242, 342, 442, 542, 642, 742, 842, 942, 0, 142, 242, 342, 442, 542, 642, 742, 842, 942, 0, 142, 242, 342, 442, 542, 642, 742, 842, 942, 0, 142, 242, 342, 442, 542, 642, 742, 842, 942, 0, 142, 242, 342, 442, 542, 642, 742, 842, 942, 0, 142, 242, 342, 442, 542, 642, 742, 842, 942, 0, 142, 242, 342, 442, 542, 642, 742, 842, 942, 0, 142, 242, 342, 442, 542, 642, 742, 842, 942, 0, 142, 242, 342, 442, 542, 642, 742, 842, 942, 0, 142, 242, 342, 442, 542, 642, 742, 842, 942, 0, 142, 242, 342, 442, 542, 642, 742, 842, 942, 0, 142, 242, 342, 442, 542, 642, 742, 842, 942, 0, 142, 242, 342, 442, 542, 642, 742, 842, 942, 0, 142, 242, 342, 442, 542, 642, 742, 842, 942, 0, 142, 242, 342, 442, 542, 642, 742, 842, 942, 0, 142, 242, 342, 442, 542, 642, 742, 842, 942, 0, 142, 242, 342, 442, 542, 642, 742, 842, 942, 0, 142, 242, 342, 442, 542, 642, 742, 842, 942, 0, 142, 242, 342, 442, 542, 642, 742, 842, 942, 0, 142, 242, 342, 442, 542, 642, 742, 842, 942, 0, 142, 242, 342, 442, 542, 642, 742, 842, 942, 0, 142, 242, 342, 442, 542, 642, 742, 842, 942, 0, 142, 242, 342, 442, 542, 642, 742, 842, 942, 0, 142, 242, 342, 442, 542, 642, 742, 842, 942, 0, 142, 242, 342, 442, 542, 642, 742, 842, 942, 0, 142, 242, 342, 442, 542, 642, 742, 842, 942, 0, 142, 242, 342, 442, 542, 642, 742, 842, 942, 0, 142, 242, 342, 442, 542, 642, 742, 842, 942, 0, 142, 242, 342, 442, 542, 642, 742, 842, 942, 0, 142, 242, 342, 442, 542, 642, 742, 842, 942, 0, 142, 242, 342, 442, 542, 642, 742, 842, 942, 0, 142, 242, 342, 442, 542, 642, 742, 842, 942, 0, 142, 242, 342, 442, 542, 642, 742, 842, 942, 0, 142, 242, 342, 442, 542, 642, 742, 842, 942, 0, 142, 242, 342, 442, 542, 642, 742, 842, 942, 0, 142, 242, 342, 442, 542, 642, 742, 842, 942, 0, 142, 242, 342, 442, 542, 642, 742, 842, 942, 0, 142, 242, 342, 442, 542, 642, 742, 842, 942, 0, 142, 242, 342, 442, 542, 642, 742, 842, 942, 0, 142, 242, 342, 442, 542, 642, 742, 842, 942, 0, 142, 242, 342, 442, 542, 642, 742, 842, 942, 0, 142, 242, 342, 442, 542, 642, 742, 842, 942, 0, 142, 242, 342, 442, 542, 642, 742, 842, 942, 0, 142, 242, 342, 442, 542, 642, 742, 842, 942, 0, 142, 242, 342, 442, 542, 642, 742, 842, 942, 0, 142, 242, 342, 442, 542, 642, 742, 842, 942, 0, 142, 242, 342, 442, 542, 642, 742, 842, 942, 0, 142, 242, 342, 442, 542, 642, 742, 842, 942, 0, 142, 242, 342, 442, 542, 642, 742, 842, 942, 0, 142, 242, 342, 442, 542, 642, 742, 842, 942, 0, 142, 242, 342, 442, 542, 642, 742, 842, 942, 0, 142, 242, 342, 442, 542, 642, 742, 842, 942, 0, 142, 242, 342, 442, 542, 642, 742, 842, 942, 0, 142, 242, 342, 442, 542, 642, 742, 842, 942, 0, 142, 242, 342, 442, 542, 642, 742, 842, 942, 0, 142, 242, 342, 442, 542, 642, 742, 842, 942, 0, 142, 242, 342, 442, 542, 642, 742, 842, 942, 0, 142, 242, 342, 442, 542, 642, 742, 842, 942, 0, 142, 242, 342, 442, 542, 642, 742, 842, 942, 0, 142, 242, 342, 442, 542, 642, 742, 842, 942, 0, 142, 242, 342, 442, 542, 642, 742, 842, 942, 0, 142, 242, 342, 442, 542, 642, 742, 842, 942, 0, 142, 242, 342, 442, 542, 642, 742, 842, 942, 0, 142, 242, 342, 442, 542, 642, 742, 842, 942, 0, 142, 242, 342, 442, 542, 642, 742, 842, 942, 0, 142, 242, 342, 442, 542, 642, 742, 842, 942, 0, 142, 242, 342, 442, 542, 642, 742, 842, 942, 0, 142, 242, 342, 442, 542, 642, 742, 842, 942, 0, 142, 242, 342, 442, 542, 642, 742, 842, 942, 0, 142, 242, 342, 442, 542, 642, 742, 842, 942, 0, 142, 242, 342, 442, 542, 642, 742, 842, 942, 0, 142, 242, 342, 442, 542, 642, 742, 842, 942, 0, 142, 242, 342, 442, 542, 642, 742, 842, 942, 0, 142, 242, 342, 442, 542, 642, 742, 842, 942, 0, 142, 242, 342, 442, 542, 642, 742, 842, 942, 0, 142, 242, 342, 442, 542, 642, 742, 842, 942, 0, 142, 242, 342, 442, 542, 642, 742, 842, 942, 0, 142, 242, 342, 442, 542, 642, 742, 842, 942, 0, 142, 242, 342, 442, 542, 642, 742, 842, 942, 0, 142, 242, 342, 442, 542, 642, 742, 842, 942, 0, 142, 242, 342, 442, 542, 642, 742, 842, 942, 0, 142, 242, 342, 442, 542, 642, 742, 842, 942, 0, 142, 242, 342, 442, 542, 642, 742, 842, 942, 0, 142, 242, 342, 442, 542, 642, 742, 842, 942, 0, 142, 242, 342, 442, 542, 642, 742, 842, 942, 0, 142, 242, 342, 442, 542, 642, 742, 842, 942, 0, 142, 242, 342, 442, 542, 642, 742, 842, 942, 0, 142, 242, 342, 442, 542, 642, 742, 842, 942, 0, 142, 242, 342, 442, 542, 642, 742, 842, 942, 0, 142, 242, 342, 442, 542, 642, 742, 842, 942, 0, 142, 242, 342, 442, 542, 642, 742, 842, 942, 0, 142, 242, 342, 442, 542, 642, 742, 842, 942, 0, 142, 242, 342, 442, 542, 642, 742, 842, 942, 0, 142, 242, 342, 442, 542, 642, 742, 842, 942, 0, 142, 242, 342, 442, 542, 642, 742, 842, 942, 0, 142, 242, 342, 442, 542, 642, 742, 842, 942, 0, 142, 242, 342, 442, 542, 642, 742, 842, 942, 0, 142, 242, 342, 442, 542, 642, 742, 842, 942, 0, 142, 242, 342, 442, 542, 642, 742, 842, 942] 2 | -------------------------------------------------------------------------------- /crates/jiter/benches/python.rs: -------------------------------------------------------------------------------- 1 | use codspeed_criterion_compat::{criterion_group, criterion_main, Criterion}; 2 | 3 | use std::fs::File; 4 | use std::io::Read; 5 | use std::path::Path; 6 | 7 | use pyo3::Python; 8 | 9 | use jiter::{cache_clear, PythonParse, StringCacheMode}; 10 | 11 | fn python_parse_numeric(c: &mut Criterion) { 12 | Python::attach(|py| { 13 | cache_clear(); 14 | c.bench_function("python_parse_numeric", |bench| { 15 | bench.iter(|| { 16 | PythonParse::default() 17 | .python_parse( 18 | py, 19 | br#" { "int": 1, "bigint": 123456789012345678901234567890, "float": 1.2} "#, 20 | ) 21 | .unwrap() 22 | }); 23 | }); 24 | }); 25 | } 26 | 27 | fn python_parse_other(c: &mut Criterion) { 28 | Python::attach(|py| { 29 | cache_clear(); 30 | c.bench_function("python_parse_other", |bench| { 31 | bench.iter(|| { 32 | PythonParse::default() 33 | .python_parse(py, br#"["string", true, false, null]"#) 34 | .unwrap() 35 | }); 36 | }); 37 | }); 38 | } 39 | 40 | fn python_parse_file(path: &str, c: &mut Criterion, cache_mode: StringCacheMode) { 41 | let path = Path::new(path); 42 | let mut file = File::open(path).unwrap(); 43 | let mut contents = String::new(); 44 | file.read_to_string(&mut contents).unwrap(); 45 | let json_data = contents.as_bytes(); 46 | 47 | let title = { 48 | let file_stem = path.file_stem().unwrap().to_str().unwrap(); 49 | 50 | let cache_mode = match cache_mode { 51 | StringCacheMode::None => "_not_cached", 52 | _ => "", 53 | }; 54 | 55 | "python_parse_".to_owned() + file_stem + cache_mode 56 | }; 57 | 58 | Python::attach(|py| { 59 | cache_clear(); 60 | 61 | c.bench_function(&title, |bench| { 62 | bench.iter(|| { 63 | PythonParse { 64 | cache_mode, 65 | ..Default::default() 66 | } 67 | .python_parse(py, json_data) 68 | .unwrap() 69 | }); 70 | }); 71 | }); 72 | } 73 | 74 | fn python_parse_massive_ints_array(c: &mut Criterion) { 75 | python_parse_file("./benches/massive_ints_array.json", c, StringCacheMode::All); 76 | } 77 | 78 | fn python_parse_medium_response_not_cached(c: &mut Criterion) { 79 | python_parse_file("./benches/medium_response.json", c, StringCacheMode::None); 80 | } 81 | 82 | fn python_parse_medium_response(c: &mut Criterion) { 83 | python_parse_file("./benches/medium_response.json", c, StringCacheMode::All); 84 | } 85 | 86 | fn python_parse_true_object_not_cached(c: &mut Criterion) { 87 | python_parse_file("./benches/true_object.json", c, StringCacheMode::None); 88 | } 89 | 90 | fn python_parse_string_array_not_cached(c: &mut Criterion) { 91 | python_parse_file("./benches/string_array.json", c, StringCacheMode::None); 92 | } 93 | 94 | fn python_parse_string_array(c: &mut Criterion) { 95 | python_parse_file("./benches/string_array.json", c, StringCacheMode::All); 96 | } 97 | 98 | fn python_parse_x100_not_cached(c: &mut Criterion) { 99 | python_parse_file("./benches/x100.json", c, StringCacheMode::None); 100 | } 101 | 102 | fn python_parse_x100(c: &mut Criterion) { 103 | python_parse_file("./benches/x100.json", c, StringCacheMode::All); 104 | } 105 | 106 | fn python_parse_string_array_unique_not_cached(c: &mut Criterion) { 107 | python_parse_file("./benches/string_array_unique.json", c, StringCacheMode::None); 108 | } 109 | 110 | fn python_parse_string_array_unique(c: &mut Criterion) { 111 | python_parse_file("./benches/string_array_unique.json", c, StringCacheMode::All); 112 | } 113 | 114 | fn python_parse_true_object(c: &mut Criterion) { 115 | python_parse_file("./benches/true_object.json", c, StringCacheMode::All); 116 | } 117 | 118 | /// Note - caching strings should make no difference here 119 | fn python_parse_true_array(c: &mut Criterion) { 120 | python_parse_file("./benches/true_array.json", c, StringCacheMode::All); 121 | } 122 | 123 | criterion_group!( 124 | benches, 125 | python_parse_numeric, 126 | python_parse_other, 127 | python_parse_medium_response_not_cached, 128 | python_parse_medium_response, 129 | python_parse_true_object_not_cached, 130 | python_parse_string_array_not_cached, 131 | python_parse_string_array, 132 | python_parse_string_array_unique_not_cached, 133 | python_parse_string_array_unique, 134 | python_parse_x100_not_cached, 135 | python_parse_x100, 136 | python_parse_true_object, 137 | python_parse_true_array, 138 | python_parse_massive_ints_array, 139 | ); 140 | criterion_main!(benches); 141 | -------------------------------------------------------------------------------- /crates/fuzz/fuzz_targets/compare_to_serde.rs: -------------------------------------------------------------------------------- 1 | #![no_main] 2 | #![allow(clippy::dbg_macro)] 3 | 4 | use indexmap::IndexMap; 5 | use jiter::{JsonError as JiterError, JsonErrorType as JiterJsonErrorType, JsonValue as JiterValue}; 6 | use serde_json::{Error as SerdeError, Number as SerdeNumber, Value as SerdeValue}; 7 | 8 | use libfuzzer_sys::fuzz_target; 9 | use num_traits::ToPrimitive; 10 | 11 | pub fn values_equal(jiter_value: &JiterValue, serde_value: &SerdeValue) -> bool { 12 | match (jiter_value, serde_value) { 13 | (JiterValue::Null, SerdeValue::Null) => true, 14 | (JiterValue::Bool(b1), SerdeValue::Bool(b2)) => b1 == b2, 15 | (JiterValue::Int(i1), SerdeValue::Number(n2)) => ints_equal(i1, n2), 16 | (JiterValue::BigInt(i1), SerdeValue::Number(n2)) => floats_approx(i1.to_f64(), n2.as_f64()), 17 | (JiterValue::Float(f1), SerdeValue::Number(n2)) => floats_approx(Some(*f1), n2.as_f64()), 18 | (JiterValue::Str(s1), SerdeValue::String(s2)) => s1 == s2, 19 | (JiterValue::Array(a1), SerdeValue::Array(a2)) => { 20 | if a1.len() != a2.len() { 21 | return false; 22 | } 23 | for (v1, v2) in a1.iter().zip(a2.iter()) { 24 | if !values_equal(v1, v2) { 25 | return false; 26 | } 27 | } 28 | true 29 | } 30 | (JiterValue::Object(o1), SerdeValue::Object(o2)) => { 31 | // deduplicate, as `jiter` doesn't do this during parsing 32 | let o1: IndexMap<_, _> = o1.iter().map(|(k, v)| (k, v)).collect(); 33 | if o1.len() != o2.len() { 34 | return false; 35 | } 36 | for (k1, v1) in o1 { 37 | if let Some(v2) = o2.get::(k1.as_ref()) { 38 | if !values_equal(v1, v2) { 39 | return false; 40 | } 41 | } else { 42 | return false; 43 | } 44 | } 45 | true 46 | } 47 | _ => false, 48 | } 49 | } 50 | 51 | fn floats_approx(f1: Option, f2: Option) -> bool { 52 | match (f1, f2) { 53 | (Some(f1), Some(f2)) => { 54 | let mut threshold = f1.abs() / 1_000_000_f64; 55 | if threshold < 0.000_000_1 { 56 | threshold = 0.000_000_1; 57 | } 58 | let diff = f1 - f2; 59 | diff.abs() <= threshold 60 | } 61 | _ => false, 62 | } 63 | } 64 | 65 | fn ints_equal(i1: &i64, n2: &SerdeNumber) -> bool { 66 | let i1 = *i1; 67 | if let Some(i2) = n2.as_i64() { 68 | if i1 == i2 { 69 | return true; 70 | } 71 | } 72 | floats_approx(i1.to_f64(), n2.as_f64()) 73 | } 74 | 75 | fn remove_suffix(s: &str) -> &str { 76 | match s.find("line ") { 77 | Some(line_index) => &s[..line_index], 78 | None => s, 79 | } 80 | } 81 | 82 | fn errors_equal(jiter_error: &JiterError, serde_error: &SerdeError, json_data: &[u8]) -> bool { 83 | let jiter_error_str = jiter_error.description(json_data); 84 | let serde_error_str = serde_error.to_string(); 85 | if serde_error_str.starts_with("number out of range") { 86 | // ignore this case as serde is stricter so fails on this before jiter does 87 | true 88 | } else if serde_error_str.starts_with("recursion limit exceeded") { 89 | // serde has a different recursion limit to jiter 90 | true 91 | } else if matches!(jiter_error.error_type, JiterJsonErrorType::InvalidUnicodeCodePoint) { 92 | // https://github.com/serde-rs/json/issues/1083 93 | remove_suffix(&jiter_error_str) == remove_suffix(&serde_error_str) 94 | } else if jiter_error_str.starts_with("invalid escape at line") 95 | && serde_error_str.starts_with("invalid escape at line") 96 | { 97 | // see fuzz failures on #130 98 | true 99 | } else { 100 | jiter_error_str == serde_error_str 101 | } 102 | } 103 | 104 | // fuzz_target!(|json: String| { 105 | // let json_data = json.as_bytes(); 106 | fuzz_target!(|json_data: &[u8]| { 107 | let jiter_value = match JiterValue::parse(json_data, false) { 108 | Ok(v) => v, 109 | Err(jiter_error) => { 110 | match serde_json::from_slice::(json_data) { 111 | Ok(serde_value) => { 112 | dbg!(json_data, serde_value, jiter_error); 113 | panic!("jiter failed to parse when serde passed"); 114 | } 115 | Err(serde_error) => { 116 | if errors_equal(&jiter_error, &serde_error, json_data) { 117 | return; 118 | } else { 119 | eprintln!("============================"); 120 | dbg!( 121 | &jiter_error, 122 | jiter_error.description(json_data), 123 | &serde_error, 124 | serde_error.to_string() 125 | ); 126 | panic!("errors not equal"); 127 | // return 128 | } 129 | } 130 | } 131 | } 132 | }; 133 | let serde_value: SerdeValue = match serde_json::from_slice(json_data) { 134 | Ok(v) => v, 135 | Err(error) => { 136 | let error_string = error.to_string(); 137 | if error_string.starts_with("number out of range") { 138 | // this happens because of stricter behaviour on exponential floats in serde 139 | return; 140 | } else if error_string.starts_with("recursion limit exceeded") { 141 | // serde has a different recursion limit to jiter 142 | return; 143 | } else { 144 | dbg!(error, error_string, jiter_value); 145 | panic!("serde_json failed to parse json that Jiter did"); 146 | } 147 | } 148 | }; 149 | 150 | if !values_equal(&jiter_value, &serde_value) { 151 | dbg!(jiter_value, serde_value); 152 | panic!("values not equal"); 153 | } 154 | }); 155 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # jiter 2 | 3 | [![CI](https://github.com/pydantic/jiter/actions/workflows/ci.yml/badge.svg?event=push)](https://github.com/pydantic/jiter/actions/workflows/ci.yml?query=branch%3Amain) 4 | [![Crates.io](https://img.shields.io/crates/v/jiter?color=green)](https://crates.io/crates/jiter) 5 | [![CodSpeed Badge](https://img.shields.io/endpoint?url=https://codspeed.io/badge.json)](https://codspeed.io/pydantic/jiter) 6 | 7 | Fast iterable JSON parser. 8 | 9 | Documentation is available at [docs.rs/jiter](https://docs.rs/jiter). 10 | 11 | jiter has three interfaces: 12 | * `JsonValue` an enum representing JSON data 13 | * `Jiter` an iterator over JSON data 14 | * `PythonParse` which parses a JSON string into a Python object 15 | 16 | ## JsonValue Example 17 | 18 | See [the `JsonValue` docs](https://docs.rs/jiter/latest/jiter/enum.JsonValue.html) for more details. 19 | 20 | ```rust 21 | use jiter::JsonValue; 22 | 23 | let json_data = r#" 24 | { 25 | "name": "John Doe", 26 | "age": 43, 27 | "phones": [ 28 | "+44 1234567", 29 | "+44 2345678" 30 | ] 31 | }"#; 32 | let json_value = JsonValue::parse(json_data.as_bytes(), true).unwrap(); 33 | println!("{:#?}", json_value); 34 | ``` 35 | 36 | returns: 37 | 38 | ```text 39 | Object( 40 | { 41 | "name": Str("John Doe"), 42 | "age": Int(43), 43 | "phones": Array( 44 | [ 45 | Str("+44 1234567"), 46 | Str("+44 2345678"), 47 | ], 48 | ), 49 | }, 50 | ) 51 | ``` 52 | 53 | ## Jiter Example 54 | 55 | To use [Jiter](https://docs.rs/jiter/latest/jiter/struct.Jiter.html), you need to know what schema you're expecting: 56 | 57 | ```rust 58 | use jiter::{Jiter, NumberInt, Peek}; 59 | 60 | let json_data = r#" 61 | { 62 | "name": "John Doe", 63 | "age": 43, 64 | "phones": [ 65 | "+44 1234567", 66 | "+44 2345678" 67 | ] 68 | }"#; 69 | let mut jiter = Jiter::new(json_data.as_bytes()); 70 | assert_eq!(jiter.next_object().unwrap(), Some("name")); 71 | assert_eq!(jiter.next_str().unwrap(), "John Doe"); 72 | assert_eq!(jiter.next_key().unwrap(), Some("age")); 73 | assert_eq!(jiter.next_int().unwrap(), NumberInt::Int(43)); 74 | assert_eq!(jiter.next_key().unwrap(), Some("phones")); 75 | assert_eq!(jiter.next_array().unwrap(), Some(Peek::String)); 76 | // we know the next value is a string as we just asserted so 77 | assert_eq!(jiter.known_str().unwrap(), "+44 1234567"); 78 | assert_eq!(jiter.array_step().unwrap(), Some(Peek::String)); 79 | // same again 80 | assert_eq!(jiter.known_str().unwrap(), "+44 2345678"); 81 | // next we'll get `None` from `array_step` as the array is finished 82 | assert_eq!(jiter.array_step().unwrap(), None); 83 | // and `None` from `next_key` as the object is finished 84 | assert_eq!(jiter.next_key().unwrap(), None); 85 | // and we check there's nothing else in the input 86 | jiter.finish().unwrap(); 87 | ``` 88 | 89 | ## Benchmarks 90 | 91 | _There are lies, damned lies and benchmarks._ 92 | 93 | In particular, serde-json benchmarks use `serde_json::Value` which is significantly slower than deserializing 94 | to a string. 95 | 96 | For more details, see [the benchmarks](https://github.com/pydantic/jiter/tree/main/crates/jiter/benches). 97 | 98 | ```text 99 | running 48 tests 100 | test big_jiter_iter ... bench: 3,662,616 ns/iter (+/- 88,878) 101 | test big_jiter_value ... bench: 6,998,605 ns/iter (+/- 292,383) 102 | test big_serde_value ... bench: 29,793,191 ns/iter (+/- 576,173) 103 | test bigints_array_jiter_iter ... bench: 11,836 ns/iter (+/- 414) 104 | test bigints_array_jiter_value ... bench: 28,979 ns/iter (+/- 938) 105 | test bigints_array_serde_value ... bench: 129,797 ns/iter (+/- 5,096) 106 | test floats_array_jiter_iter ... bench: 19,302 ns/iter (+/- 631) 107 | test floats_array_jiter_value ... bench: 31,083 ns/iter (+/- 921) 108 | test floats_array_serde_value ... bench: 208,932 ns/iter (+/- 6,167) 109 | test lazy_map_lookup_1_10 ... bench: 615 ns/iter (+/- 15) 110 | test lazy_map_lookup_2_20 ... bench: 1,776 ns/iter (+/- 36) 111 | test lazy_map_lookup_3_50 ... bench: 4,291 ns/iter (+/- 77) 112 | test massive_ints_array_jiter_iter ... bench: 62,244 ns/iter (+/- 1,616) 113 | test massive_ints_array_jiter_value ... bench: 82,889 ns/iter (+/- 1,916) 114 | test massive_ints_array_serde_value ... bench: 498,650 ns/iter (+/- 47,759) 115 | test medium_response_jiter_iter ... bench: 0 ns/iter (+/- 0) 116 | test medium_response_jiter_value ... bench: 3,521 ns/iter (+/- 101) 117 | test medium_response_jiter_value_owned ... bench: 6,088 ns/iter (+/- 180) 118 | test medium_response_serde_value ... bench: 9,383 ns/iter (+/- 342) 119 | test pass1_jiter_iter ... bench: 0 ns/iter (+/- 0) 120 | test pass1_jiter_value ... bench: 3,048 ns/iter (+/- 79) 121 | test pass1_serde_value ... bench: 6,588 ns/iter (+/- 232) 122 | test pass2_jiter_iter ... bench: 384 ns/iter (+/- 9) 123 | test pass2_jiter_value ... bench: 1,259 ns/iter (+/- 44) 124 | test pass2_serde_value ... bench: 1,237 ns/iter (+/- 38) 125 | test sentence_jiter_iter ... bench: 283 ns/iter (+/- 10) 126 | test sentence_jiter_value ... bench: 357 ns/iter (+/- 15) 127 | test sentence_serde_value ... bench: 428 ns/iter (+/- 9) 128 | test short_numbers_jiter_iter ... bench: 0 ns/iter (+/- 0) 129 | test short_numbers_jiter_value ... bench: 18,085 ns/iter (+/- 613) 130 | test short_numbers_serde_value ... bench: 87,253 ns/iter (+/- 1,506) 131 | test string_array_jiter_iter ... bench: 615 ns/iter (+/- 18) 132 | test string_array_jiter_value ... bench: 1,410 ns/iter (+/- 44) 133 | test string_array_jiter_value_owned ... bench: 2,863 ns/iter (+/- 151) 134 | test string_array_serde_value ... bench: 3,467 ns/iter (+/- 60) 135 | test true_array_jiter_iter ... bench: 299 ns/iter (+/- 8) 136 | test true_array_jiter_value ... bench: 995 ns/iter (+/- 29) 137 | test true_array_serde_value ... bench: 1,207 ns/iter (+/- 36) 138 | test true_object_jiter_iter ... bench: 2,482 ns/iter (+/- 84) 139 | test true_object_jiter_value ... bench: 2,058 ns/iter (+/- 45) 140 | test true_object_serde_value ... bench: 7,991 ns/iter (+/- 370) 141 | test unicode_jiter_iter ... bench: 315 ns/iter (+/- 7) 142 | test unicode_jiter_value ... bench: 389 ns/iter (+/- 6) 143 | test unicode_serde_value ... bench: 445 ns/iter (+/- 6) 144 | test x100_jiter_iter ... bench: 12 ns/iter (+/- 0) 145 | test x100_jiter_value ... bench: 20 ns/iter (+/- 1) 146 | test x100_serde_iter ... bench: 72 ns/iter (+/- 3) 147 | test x100_serde_value ... bench: 83 ns/iter (+/- 3) 148 | ``` 149 | -------------------------------------------------------------------------------- /crates/jiter/src/lib.rs: -------------------------------------------------------------------------------- 1 | //! # jiter 2 | //! 3 | //! [![CI](https://github.com/pydantic/jiter/actions/workflows/ci.yml/badge.svg?event=push)](https://github.com/pydantic/jiter/actions/workflows/ci.yml?query=branch%3Amain) 4 | //! [![Crates.io](https://img.shields.io/crates/v/jiter?color=green)](https://crates.io/crates/jiter) 5 | //! [![CodSpeed Badge](https://img.shields.io/endpoint?url=https://codspeed.io/badge.json)](https://codspeed.io/pydantic/jiter) 6 | //! 7 | //! Fast iterable JSON parser. 8 | //! 9 | //! Documentation is available at [docs.rs/jiter](https://docs.rs/jiter). 10 | //! 11 | //! jiter has three interfaces: 12 | //! * [`JsonValue`] an enum representing JSON data 13 | //! * [`Jiter`] an iterator over JSON data 14 | //! * [`PythonParse`] which parses a JSON string into a Python object 15 | //! 16 | //! ## JsonValue Example 17 | //! 18 | //! See [the `JsonValue` docs][JsonValue] for more details. 19 | //! 20 | //! ```rust 21 | //! use jiter::JsonValue; 22 | //! 23 | //! let json_data = r#" 24 | //! { 25 | //! "name": "John Doe", 26 | //! "age": 43, 27 | //! "phones": [ 28 | //! "+44 1234567", 29 | //! "+44 2345678" 30 | //! ] 31 | //! }"#; 32 | //! let json_value = JsonValue::parse(json_data.as_bytes(), true).unwrap(); 33 | //! println!("{:#?}", json_value); 34 | //! 35 | //! ``` 36 | //! 37 | //! returns: 38 | //! 39 | //! ```text 40 | //! Object( 41 | //! { 42 | //! "name": Str("John Doe"), 43 | //! "age": Int(43), 44 | //! "phones": Array( 45 | //! [ 46 | //! Str("+44 1234567"), 47 | //! Str("+44 2345678"), 48 | //! ], 49 | //! ), 50 | //! }, 51 | //! ) 52 | //! ``` 53 | //! 54 | //! ## Jiter Example 55 | //! 56 | //! To use [Jiter], you need to know what schema you're expecting: 57 | //! 58 | //! ```rust 59 | //! use jiter::{Jiter, NumberInt, Peek}; 60 | //! 61 | //! let json_data = r#" 62 | //! { 63 | //! "name": "John Doe", 64 | //! "age": 43, 65 | //! "phones": [ 66 | //! "+44 1234567", 67 | //! "+44 2345678" 68 | //! ] 69 | //! }"#; 70 | //! let mut jiter = Jiter::new(json_data.as_bytes()).with_allow_inf_nan(); 71 | //! assert_eq!(jiter.next_object().unwrap(), Some("name")); 72 | //! assert_eq!(jiter.next_str().unwrap(), "John Doe"); 73 | //! assert_eq!(jiter.next_key().unwrap(), Some("age")); 74 | //! assert_eq!(jiter.next_int().unwrap(), NumberInt::Int(43)); 75 | //! assert_eq!(jiter.next_key().unwrap(), Some("phones")); 76 | //! assert_eq!(jiter.next_array().unwrap(), Some(Peek::String)); 77 | //! // we know the next value is a string as we just asserted so 78 | //! assert_eq!(jiter.known_str().unwrap(), "+44 1234567"); 79 | //! assert_eq!(jiter.array_step().unwrap(), Some(Peek::String)); 80 | //! // same again 81 | //! assert_eq!(jiter.known_str().unwrap(), "+44 2345678"); 82 | //! // next we'll get `None` from `array_step` as the array is finished 83 | //! assert_eq!(jiter.array_step().unwrap(), None); 84 | //! // and `None` from `next_key` as the object is finished 85 | //! assert_eq!(jiter.next_key().unwrap(), None); 86 | //! // and we check there's nothing else in the input 87 | //! jiter.finish().unwrap(); 88 | //! ``` 89 | //! 90 | //! ## Benchmarks 91 | //! 92 | //! _There are lies, damned lies and benchmarks._ 93 | //! 94 | //! In particular, serde-json benchmarks use `serde_json::Value` which is significantly slower than deserializing 95 | //! to a string. 96 | //! 97 | //! For more details, see [the benchmarks](https://github.com/pydantic/jiter/tree/main/benches). 98 | //! 99 | //! ```text 100 | //! running 48 tests 101 | //! test big_jiter_iter ... bench: 3,662,616 ns/iter (+/- 88,878) 102 | //! test big_jiter_value ... bench: 6,998,605 ns/iter (+/- 292,383) 103 | //! test big_serde_value ... bench: 29,793,191 ns/iter (+/- 576,173) 104 | //! test bigints_array_jiter_iter ... bench: 11,836 ns/iter (+/- 414) 105 | //! test bigints_array_jiter_value ... bench: 28,979 ns/iter (+/- 938) 106 | //! test bigints_array_serde_value ... bench: 129,797 ns/iter (+/- 5,096) 107 | //! test floats_array_jiter_iter ... bench: 19,302 ns/iter (+/- 631) 108 | //! test floats_array_jiter_value ... bench: 31,083 ns/iter (+/- 921) 109 | //! test floats_array_serde_value ... bench: 208,932 ns/iter (+/- 6,167) 110 | //! test lazy_map_lookup_1_10 ... bench: 615 ns/iter (+/- 15) 111 | //! test lazy_map_lookup_2_20 ... bench: 1,776 ns/iter (+/- 36) 112 | //! test lazy_map_lookup_3_50 ... bench: 4,291 ns/iter (+/- 77) 113 | //! test massive_ints_array_jiter_iter ... bench: 62,244 ns/iter (+/- 1,616) 114 | //! test massive_ints_array_jiter_value ... bench: 82,889 ns/iter (+/- 1,916) 115 | //! test massive_ints_array_serde_value ... bench: 498,650 ns/iter (+/- 47,759) 116 | //! test medium_response_jiter_iter ... bench: 0 ns/iter (+/- 0) 117 | //! test medium_response_jiter_value ... bench: 3,521 ns/iter (+/- 101) 118 | //! test medium_response_jiter_value_owned ... bench: 6,088 ns/iter (+/- 180) 119 | //! test medium_response_serde_value ... bench: 9,383 ns/iter (+/- 342) 120 | //! test pass1_jiter_iter ... bench: 0 ns/iter (+/- 0) 121 | //! test pass1_jiter_value ... bench: 3,048 ns/iter (+/- 79) 122 | //! test pass1_serde_value ... bench: 6,588 ns/iter (+/- 232) 123 | //! test pass2_jiter_iter ... bench: 384 ns/iter (+/- 9) 124 | //! test pass2_jiter_value ... bench: 1,259 ns/iter (+/- 44) 125 | //! test pass2_serde_value ... bench: 1,237 ns/iter (+/- 38) 126 | //! test sentence_jiter_iter ... bench: 283 ns/iter (+/- 10) 127 | //! test sentence_jiter_value ... bench: 357 ns/iter (+/- 15) 128 | //! test sentence_serde_value ... bench: 428 ns/iter (+/- 9) 129 | //! test short_numbers_jiter_iter ... bench: 0 ns/iter (+/- 0) 130 | //! test short_numbers_jiter_value ... bench: 18,085 ns/iter (+/- 613) 131 | //! test short_numbers_serde_value ... bench: 87,253 ns/iter (+/- 1,506) 132 | //! test string_array_jiter_iter ... bench: 615 ns/iter (+/- 18) 133 | //! test string_array_jiter_value ... bench: 1,410 ns/iter (+/- 44) 134 | //! test string_array_jiter_value_owned ... bench: 2,863 ns/iter (+/- 151) 135 | //! test string_array_serde_value ... bench: 3,467 ns/iter (+/- 60) 136 | //! test true_array_jiter_iter ... bench: 299 ns/iter (+/- 8) 137 | //! test true_array_jiter_value ... bench: 995 ns/iter (+/- 29) 138 | //! test true_array_serde_value ... bench: 1,207 ns/iter (+/- 36) 139 | //! test true_object_jiter_iter ... bench: 2,482 ns/iter (+/- 84) 140 | //! test true_object_jiter_value ... bench: 2,058 ns/iter (+/- 45) 141 | //! test true_object_serde_value ... bench: 7,991 ns/iter (+/- 370) 142 | //! test unicode_jiter_iter ... bench: 315 ns/iter (+/- 7) 143 | //! test unicode_jiter_value ... bench: 389 ns/iter (+/- 6) 144 | //! test unicode_serde_value ... bench: 445 ns/iter (+/- 6) 145 | //! test x100_jiter_iter ... bench: 12 ns/iter (+/- 0) 146 | //! test x100_jiter_value ... bench: 20 ns/iter (+/- 1) 147 | //! test x100_serde_iter ... bench: 72 ns/iter (+/- 3) 148 | //! test x100_serde_value ... bench: 83 ns/iter (+/- 3) 149 | //! ``` 150 | 151 | mod errors; 152 | mod jiter; 153 | mod number_decoder; 154 | mod parse; 155 | #[cfg(feature = "python")] 156 | mod py_lossless_float; 157 | #[cfg(feature = "python")] 158 | mod py_string_cache; 159 | #[cfg(feature = "python")] 160 | mod python; 161 | #[cfg(target_arch = "aarch64")] 162 | mod simd_aarch64; 163 | mod string_decoder; 164 | mod value; 165 | 166 | pub use errors::{JiterError, JiterErrorType, JsonError, JsonErrorType, JsonResult, JsonType, LinePosition}; 167 | pub use jiter::{Jiter, JiterResult}; 168 | pub use number_decoder::{NumberAny, NumberInt}; 169 | pub use parse::Peek; 170 | pub use value::{JsonArray, JsonObject, JsonValue}; 171 | 172 | #[cfg(feature = "python")] 173 | pub use py_lossless_float::{FloatMode, LosslessFloat}; 174 | #[cfg(feature = "python")] 175 | pub use py_string_cache::{ 176 | cache_clear, cache_usage, cached_py_string, cached_py_string_ascii, pystring_ascii_new, StringCacheMode, 177 | }; 178 | #[cfg(feature = "python")] 179 | pub use python::{map_json_error, PythonParse}; 180 | 181 | #[derive(Debug, Clone, Copy, Default)] 182 | pub enum PartialMode { 183 | #[default] 184 | Off, 185 | On, 186 | TrailingStrings, 187 | } 188 | 189 | impl From for PartialMode { 190 | fn from(mode: bool) -> Self { 191 | if mode { 192 | Self::On 193 | } else { 194 | Self::Off 195 | } 196 | } 197 | } 198 | 199 | impl PartialMode { 200 | pub fn is_active(self) -> bool { 201 | !matches!(self, Self::Off) 202 | } 203 | 204 | pub fn allow_trailing_str(self) -> bool { 205 | matches!(self, Self::TrailingStrings) 206 | } 207 | } 208 | -------------------------------------------------------------------------------- /crates/jiter/src/parse.rs: -------------------------------------------------------------------------------- 1 | use std::fmt; 2 | use std::ops::Range; 3 | 4 | use crate::errors::{json_err, JsonResult, LinePosition}; 5 | use crate::number_decoder::AbstractNumberDecoder; 6 | use crate::string_decoder::{AbstractStringDecoder, Tape}; 7 | 8 | #[derive(Copy, Clone, PartialEq, Eq)] 9 | pub struct Peek(u8); 10 | 11 | #[allow(non_upper_case_globals)] // while testing 12 | impl Peek { 13 | pub const Null: Self = Self(b'n'); 14 | pub const True: Self = Self(b't'); 15 | pub const False: Self = Self(b'f'); 16 | pub const Minus: Self = Self(b'-'); 17 | pub const Infinity: Self = Self(b'I'); 18 | pub const NaN: Self = Self(b'N'); 19 | pub const String: Self = Self(b'"'); 20 | pub const Array: Self = Self(b'['); 21 | pub const Object: Self = Self(b'{'); 22 | } 23 | 24 | impl fmt::Debug for Peek { 25 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 26 | match self.0 { 27 | b'n' => write!(f, "Null"), 28 | b't' => write!(f, "True"), 29 | b'f' => write!(f, "False"), 30 | b'-' => write!(f, "Minus"), 31 | b'I' => write!(f, "Infinity"), 32 | b'N' => write!(f, "NaN"), 33 | b'"' => write!(f, "String"), 34 | b'[' => write!(f, "Array"), 35 | b'{' => write!(f, "Object"), 36 | _ => write!(f, "Peek({:?})", self.0 as char), 37 | } 38 | } 39 | } 40 | 41 | impl Peek { 42 | pub const fn new(next: u8) -> Self { 43 | Self(next) 44 | } 45 | 46 | pub const fn is_num(self) -> bool { 47 | self.0.is_ascii_digit() || matches!(self, Self::Minus | Self::Infinity | Self::NaN) 48 | } 49 | 50 | pub const fn into_inner(self) -> u8 { 51 | self.0 52 | } 53 | } 54 | 55 | static TRUE_REST: [u8; 3] = [b'r', b'u', b'e']; 56 | static FALSE_REST: [u8; 4] = [b'a', b'l', b's', b'e']; 57 | static NULL_REST: [u8; 3] = [b'u', b'l', b'l']; 58 | static NAN_REST: [u8; 2] = [b'a', b'N']; 59 | static INFINITY_REST: [u8; 7] = [b'n', b'f', b'i', b'n', b'i', b't', b'y']; 60 | 61 | #[derive(Debug, Clone)] 62 | pub(crate) struct Parser<'j> { 63 | data: &'j [u8], 64 | pub index: usize, 65 | } 66 | 67 | impl<'j> Parser<'j> { 68 | pub fn new(data: &'j [u8]) -> Self { 69 | Self { data, index: 0 } 70 | } 71 | 72 | #[allow(dead_code)] 73 | pub fn slice(&self, range: Range) -> Option<&[u8]> { 74 | self.data.get(range) 75 | } 76 | 77 | pub fn current_position(&self) -> LinePosition { 78 | LinePosition::find(self.data, self.index) 79 | } 80 | 81 | pub fn peek(&mut self) -> JsonResult { 82 | if let Some(next) = self.eat_whitespace() { 83 | Ok(Peek::new(next)) 84 | } else { 85 | json_err!(EofWhileParsingValue, self.index) 86 | } 87 | } 88 | 89 | pub fn array_first(&mut self) -> JsonResult> { 90 | self.index += 1; 91 | if let Some(next) = self.eat_whitespace() { 92 | if next == b']' { 93 | self.index += 1; 94 | Ok(None) 95 | } else { 96 | Ok(Some(Peek::new(next))) 97 | } 98 | } else { 99 | json_err!(EofWhileParsingList, self.index) 100 | } 101 | } 102 | 103 | pub fn array_step(&mut self) -> JsonResult> { 104 | if let Some(next) = self.eat_whitespace() { 105 | match next { 106 | b',' => { 107 | self.index += 1; 108 | let next = self.array_peek()?; 109 | if next.is_none() { 110 | json_err!(TrailingComma, self.index) 111 | } else { 112 | Ok(next) 113 | } 114 | } 115 | b']' => { 116 | self.index += 1; 117 | Ok(None) 118 | } 119 | _ => { 120 | json_err!(ExpectedListCommaOrEnd, self.index) 121 | } 122 | } 123 | } else { 124 | json_err!(EofWhileParsingList, self.index) 125 | } 126 | } 127 | 128 | pub fn object_first<'t, D: AbstractStringDecoder<'t, 'j>>( 129 | &mut self, 130 | tape: &'t mut Tape, 131 | ) -> JsonResult> 132 | where 133 | 'j: 't, 134 | { 135 | self.index += 1; 136 | if let Some(next) = self.eat_whitespace() { 137 | match next { 138 | b'"' => self.object_key::(tape).map(Some), 139 | b'}' => { 140 | self.index += 1; 141 | Ok(None) 142 | } 143 | _ => json_err!(KeyMustBeAString, self.index), 144 | } 145 | } else { 146 | json_err!(EofWhileParsingObject, self.index) 147 | } 148 | } 149 | 150 | pub fn object_step<'t, D: AbstractStringDecoder<'t, 'j>>( 151 | &mut self, 152 | tape: &'t mut Tape, 153 | ) -> JsonResult> 154 | where 155 | 'j: 't, 156 | { 157 | if let Some(next) = self.eat_whitespace() { 158 | match next { 159 | b',' => { 160 | self.index += 1; 161 | match self.eat_whitespace() { 162 | Some(b'"') => self.object_key::(tape).map(Some), 163 | Some(b'}') => json_err!(TrailingComma, self.index), 164 | Some(_) => json_err!(KeyMustBeAString, self.index), 165 | None => json_err!(EofWhileParsingValue, self.index), 166 | } 167 | } 168 | b'}' => { 169 | self.index += 1; 170 | Ok(None) 171 | } 172 | _ => json_err!(ExpectedObjectCommaOrEnd, self.index), 173 | } 174 | } else { 175 | json_err!(EofWhileParsingObject, self.index) 176 | } 177 | } 178 | 179 | pub fn finish(&mut self) -> JsonResult<()> { 180 | if self.eat_whitespace().is_none() { 181 | Ok(()) 182 | } else { 183 | json_err!(TrailingCharacters, self.index) 184 | } 185 | } 186 | 187 | pub fn consume_true(&mut self) -> JsonResult<()> { 188 | self.consume_ident(TRUE_REST) 189 | } 190 | 191 | pub fn consume_false(&mut self) -> JsonResult<()> { 192 | self.consume_ident(FALSE_REST) 193 | } 194 | 195 | pub fn consume_null(&mut self) -> JsonResult<()> { 196 | self.consume_ident(NULL_REST) 197 | } 198 | 199 | pub fn consume_string<'t, D: AbstractStringDecoder<'t, 'j>>( 200 | &mut self, 201 | tape: &'t mut Tape, 202 | allow_partial: bool, 203 | ) -> JsonResult 204 | where 205 | 'j: 't, 206 | { 207 | let (output, index) = D::decode(self.data, self.index, tape, allow_partial)?; 208 | self.index = index; 209 | Ok(output) 210 | } 211 | 212 | pub fn consume_number( 213 | &mut self, 214 | first: u8, 215 | allow_inf_nan: bool, 216 | ) -> JsonResult { 217 | let (output, index) = D::decode(self.data, self.index, first, allow_inf_nan)?; 218 | self.index = index; 219 | Ok(output) 220 | } 221 | 222 | /// private method to get an object key, then consume the colon which should follow 223 | fn object_key<'t, D: AbstractStringDecoder<'t, 'j>>(&mut self, tape: &'t mut Tape) -> JsonResult 224 | where 225 | 'j: 't, 226 | { 227 | let (output, index) = D::decode(self.data, self.index, tape, false)?; 228 | self.index = index; 229 | if let Some(next) = self.eat_whitespace() { 230 | if next == b':' { 231 | self.index += 1; 232 | Ok(output) 233 | } else { 234 | json_err!(ExpectedColon, self.index) 235 | } 236 | } else { 237 | json_err!(EofWhileParsingObject, self.index) 238 | } 239 | } 240 | 241 | fn consume_ident(&mut self, expected: [u8; SIZE]) -> JsonResult<()> { 242 | self.index = consume_ident(self.data, self.index, expected)?; 243 | Ok(()) 244 | } 245 | 246 | fn array_peek(&mut self) -> JsonResult> { 247 | if let Some(next) = self.eat_whitespace() { 248 | match next { 249 | b']' => Ok(None), 250 | _ => Ok(Some(Peek::new(next))), 251 | } 252 | } else { 253 | json_err!(EofWhileParsingValue, self.index) 254 | } 255 | } 256 | 257 | fn eat_whitespace(&mut self) -> Option { 258 | while let Some(next) = self.data.get(self.index) { 259 | match next { 260 | b' ' | b'\r' | b'\t' | b'\n' => self.index += 1, 261 | _ => return Some(*next), 262 | } 263 | } 264 | None 265 | } 266 | } 267 | 268 | pub(crate) fn consume_infinity(data: &[u8], index: usize) -> JsonResult { 269 | consume_ident(data, index, INFINITY_REST) 270 | } 271 | 272 | pub(crate) fn consume_nan(data: &[u8], index: usize) -> JsonResult { 273 | consume_ident(data, index, NAN_REST) 274 | } 275 | 276 | fn consume_ident(data: &[u8], mut index: usize, expected: [u8; SIZE]) -> JsonResult { 277 | match data.get(index + 1..=index + SIZE) { 278 | Some(s) if s == expected => Ok(index + SIZE + 1), 279 | // TODO very sadly iterating over expected cause extra branches in the generated assembly 280 | // and is significantly slower than just returning an error 281 | _ => { 282 | index += 1; 283 | for c in &expected { 284 | match data.get(index) { 285 | Some(v) if v == c => index += 1, 286 | Some(_) => return json_err!(ExpectedSomeIdent, index), 287 | _ => break, 288 | } 289 | } 290 | json_err!(EofWhileParsingValue, index) 291 | } 292 | } 293 | } 294 | -------------------------------------------------------------------------------- /crates/jiter/src/py_string_cache.rs: -------------------------------------------------------------------------------- 1 | use std::sync::{Mutex, MutexGuard, OnceLock}; 2 | 3 | use ahash::random_state::RandomState; 4 | use pyo3::exceptions::{PyTypeError, PyValueError}; 5 | use pyo3::prelude::*; 6 | use pyo3::types::{PyBool, PyString}; 7 | 8 | use crate::string_decoder::StringOutput; 9 | 10 | #[derive(Debug, Clone, Copy, Default)] 11 | pub enum StringCacheMode { 12 | #[default] 13 | All, 14 | Keys, 15 | None, 16 | } 17 | 18 | impl<'py> FromPyObject<'_, 'py> for StringCacheMode { 19 | type Error = PyErr; 20 | 21 | fn extract(ob: Borrowed<'_, 'py, PyAny>) -> PyResult { 22 | if let Ok(bool_mode) = ob.cast::() { 23 | Ok(bool_mode.is_true().into()) 24 | } else if let Ok(str_mode) = ob.extract::<&str>() { 25 | match str_mode { 26 | "all" => Ok(Self::All), 27 | "keys" => Ok(Self::Keys), 28 | "none" => Ok(Self::None), 29 | _ => Err(PyValueError::new_err( 30 | "Invalid string cache mode, should be `'all'`, '`keys`', `'none`' or a `bool`", 31 | )), 32 | } 33 | } else { 34 | Err(PyTypeError::new_err( 35 | "Invalid string cache mode, should be `'all'`, '`keys`', `'none`' or a `bool`", 36 | )) 37 | } 38 | } 39 | } 40 | 41 | impl From for StringCacheMode { 42 | fn from(mode: bool) -> Self { 43 | if mode { 44 | Self::All 45 | } else { 46 | Self::None 47 | } 48 | } 49 | } 50 | 51 | pub trait StringMaybeCache { 52 | fn get_key<'py>(py: Python<'py>, string_output: StringOutput<'_, '_>) -> Bound<'py, PyString>; 53 | 54 | fn get_value<'py>(py: Python<'py>, string_output: StringOutput<'_, '_>) -> Bound<'py, PyString> { 55 | Self::get_key(py, string_output) 56 | } 57 | } 58 | 59 | pub struct StringCacheAll; 60 | 61 | impl StringMaybeCache for StringCacheAll { 62 | fn get_key<'py>(py: Python<'py>, string_output: StringOutput<'_, '_>) -> Bound<'py, PyString> { 63 | // Safety: string_output carries the safety information 64 | unsafe { cached_py_string_maybe_ascii(py, string_output.as_str(), string_output.ascii_only()) } 65 | } 66 | } 67 | 68 | pub struct StringCacheKeys; 69 | 70 | impl StringMaybeCache for StringCacheKeys { 71 | fn get_key<'py>(py: Python<'py>, string_output: StringOutput<'_, '_>) -> Bound<'py, PyString> { 72 | // Safety: string_output carries the safety information 73 | unsafe { cached_py_string_maybe_ascii(py, string_output.as_str(), string_output.ascii_only()) } 74 | } 75 | 76 | fn get_value<'py>(py: Python<'py>, string_output: StringOutput<'_, '_>) -> Bound<'py, PyString> { 77 | unsafe { pystring_fast_new_maybe_ascii(py, string_output.as_str(), string_output.ascii_only()) } 78 | } 79 | } 80 | 81 | pub struct StringNoCache; 82 | 83 | impl StringMaybeCache for StringNoCache { 84 | fn get_key<'py>(py: Python<'py>, string_output: StringOutput<'_, '_>) -> Bound<'py, PyString> { 85 | unsafe { pystring_fast_new_maybe_ascii(py, string_output.as_str(), string_output.ascii_only()) } 86 | } 87 | } 88 | 89 | static STRING_CACHE: OnceLock> = OnceLock::new(); 90 | 91 | #[inline] 92 | fn get_string_cache() -> MutexGuard<'static, PyStringCache> { 93 | match STRING_CACHE.get_or_init(|| Mutex::new(PyStringCache::default())).lock() { 94 | Ok(cache) => cache, 95 | Err(poisoned) => { 96 | let mut cache = poisoned.into_inner(); 97 | // worst case if we panic while the cache is held, we just clear and keep going 98 | cache.clear(); 99 | cache 100 | } 101 | } 102 | } 103 | 104 | pub fn cache_usage() -> usize { 105 | get_string_cache().usage() 106 | } 107 | 108 | pub fn cache_clear() { 109 | get_string_cache().clear(); 110 | } 111 | 112 | /// Create a cached Python `str` from a string slice 113 | #[inline] 114 | pub fn cached_py_string<'py>(py: Python<'py>, s: &str) -> Bound<'py, PyString> { 115 | // SAFETY: not setting ascii-only 116 | unsafe { cached_py_string_maybe_ascii(py, s, false) } 117 | } 118 | 119 | /// Create a cached Python `str` from a string slice. 120 | /// 121 | /// # Safety 122 | /// 123 | /// Caller must pass ascii-only string. 124 | #[inline] 125 | pub unsafe fn cached_py_string_ascii<'py>(py: Python<'py>, s: &str) -> Bound<'py, PyString> { 126 | // SAFETY: caller upholds invariant 127 | unsafe { cached_py_string_maybe_ascii(py, s, true) } 128 | } 129 | 130 | /// # Safety 131 | /// 132 | /// Caller must match the ascii_only flag to the string passed in. 133 | unsafe fn cached_py_string_maybe_ascii<'py>(py: Python<'py>, s: &str, ascii_only: bool) -> Bound<'py, PyString> { 134 | // from tests, 0 and 1 character strings are faster not cached 135 | if (2..64).contains(&s.len()) { 136 | get_string_cache().get_or_insert(py, s, ascii_only) 137 | } else { 138 | pystring_fast_new_maybe_ascii(py, s, ascii_only) 139 | } 140 | } 141 | 142 | // capacity should be a power of 2 so the compiler can convert `%` to a right shift below 143 | // Using a smaller number here (e.g. 1024) seems to be faster in many cases than a larger number (like 65536) 144 | // and also avoids stack overflow risks 145 | const CAPACITY: usize = 16_384; 146 | type Entry = Option<(u64, Py)>; 147 | 148 | /// This is a Fully associative cache with LRU replacement policy. 149 | /// See https://en.wikipedia.org/wiki/Cache_placement_policies#Fully_associative_cache 150 | #[derive(Debug)] 151 | struct PyStringCache { 152 | entries: Box<[Entry; CAPACITY]>, 153 | hash_builder: RandomState, 154 | } 155 | 156 | const ARRAY_REPEAT_VALUE: Entry = None; 157 | 158 | impl Default for PyStringCache { 159 | fn default() -> Self { 160 | Self { 161 | #[allow(clippy::large_stack_arrays)] 162 | entries: Box::new([ARRAY_REPEAT_VALUE; CAPACITY]), 163 | hash_builder: RandomState::default(), 164 | } 165 | } 166 | } 167 | 168 | impl PyStringCache { 169 | /// Lookup the cache for an entry with the given string. If it exists, return it. 170 | /// If it is not set or has a different string, insert it and return it. 171 | /// 172 | /// # Safety 173 | /// 174 | /// `ascii_only` must only be set to `true` if the string is guaranteed to be ASCII only. 175 | unsafe fn get_or_insert<'py>(&mut self, py: Python<'py>, s: &str, ascii_only: bool) -> Bound<'py, PyString> { 176 | let hash = self.hash_builder.hash_one(s); 177 | 178 | let hash_index = hash as usize % CAPACITY; 179 | 180 | let set_entry = |entry: &mut Entry| { 181 | // SAFETY: caller upholds invariant 182 | let py_str = unsafe { pystring_fast_new_maybe_ascii(py, s, ascii_only) }; 183 | if let Some((_, old_py_str)) = entry.replace((hash, py_str.clone().unbind())) { 184 | // micro-optimization: bind the old entry before dropping it so that PyO3 can 185 | // fast-path the drop (Bound::drop is faster than Py::drop) 186 | drop(old_py_str.into_bound(py)); 187 | } 188 | py_str 189 | }; 190 | 191 | // we try up to 5 contiguous slots to find a match or an empty slot 192 | for index in hash_index..hash_index.wrapping_add(5) { 193 | if let Some(entry) = self.entries.get_mut(index) { 194 | if let Some((entry_hash, py_str_ob)) = entry { 195 | // to avoid a string comparison, we first compare the hashes 196 | if *entry_hash == hash { 197 | // if the hashes match, we compare the strings to be absolutely sure - as a hashmap would do 198 | if py_str_ob.bind(py) == s { 199 | // the strings matched, return the cached string object 200 | return py_str_ob.bind(py).to_owned(); 201 | } 202 | } 203 | } else { 204 | // we got to an empty entry, use it 205 | return set_entry(entry); 206 | } 207 | } else { 208 | // we reached the end of entries, break 209 | break; 210 | } 211 | } 212 | // we tried all 5 slots (or got to the end of entries) without finding a match 213 | // or an empty slot, make this LRU by replacing the first entry 214 | let entry = self.entries.get_mut(hash_index).unwrap(); 215 | set_entry(entry) 216 | } 217 | 218 | /// get the number of entries in the cache that are set 219 | fn usage(&self) -> usize { 220 | self.entries.iter().filter(|e| e.is_some()).count() 221 | } 222 | 223 | /// clear the cache by resetting all entries to `None` 224 | fn clear(&mut self) { 225 | self.entries.fill_with(|| None); 226 | } 227 | } 228 | 229 | /// Creatate a new Python `str` from a string slice, with a fast path for ASCII strings 230 | /// 231 | /// # Safety 232 | /// 233 | /// `ascii_only` must only be set to `true` if the string is guaranteed to be ASCII only. 234 | unsafe fn pystring_fast_new_maybe_ascii<'py>(py: Python<'py>, s: &str, ascii_only: bool) -> Bound<'py, PyString> { 235 | if ascii_only { 236 | // SAFETY: caller upholds invariant 237 | unsafe { pystring_ascii_new(py, s) } 238 | } else { 239 | PyString::new(py, s) 240 | } 241 | } 242 | 243 | /// Faster creation of PyString from an ASCII string, inspired by 244 | /// https://github.com/ijl/orjson/blob/3.10.0/src/str/create.rs#L41 245 | /// 246 | /// # Safety 247 | /// 248 | /// `s` must be ASCII only 249 | pub unsafe fn pystring_ascii_new<'py>(py: Python<'py>, s: &str) -> Bound<'py, PyString> { 250 | #[cfg(not(any(PyPy, GraalPy, Py_LIMITED_API)))] 251 | { 252 | let ptr = pyo3::ffi::PyUnicode_New(s.len() as isize, 127); 253 | // see https://github.com/pydantic/jiter/pull/72#discussion_r1545485907 254 | debug_assert_eq!(pyo3::ffi::PyUnicode_KIND(ptr), pyo3::ffi::PyUnicode_1BYTE_KIND); 255 | let data_ptr = pyo3::ffi::PyUnicode_DATA(ptr).cast(); 256 | core::ptr::copy_nonoverlapping(s.as_ptr(), data_ptr, s.len()); 257 | core::ptr::write(data_ptr.add(s.len()), 0); 258 | Bound::from_owned_ptr(py, ptr).cast_into_unchecked() 259 | } 260 | 261 | #[cfg(any(PyPy, GraalPy, Py_LIMITED_API))] 262 | { 263 | PyString::new(py, s) 264 | } 265 | } 266 | -------------------------------------------------------------------------------- /crates/jiter/src/simd_aarch64.rs: -------------------------------------------------------------------------------- 1 | use std::mem::transmute; 2 | #[rustfmt::skip] 3 | use std::arch::aarch64::{ 4 | uint8x16_t, 5 | uint16x8_t, 6 | uint32x4_t, 7 | uint64x2_t, 8 | uint8x8_t, 9 | uint16x4_t, 10 | uint32x2_t, 11 | uint64x1_t, 12 | // 16 byte methods 13 | vld1q_u8 as simd_load_16, 14 | vcgtq_u8 as simd_gt_16, 15 | vcltq_u8 as simd_lt_16, 16 | vorrq_u8 as simd_or_16, 17 | vceqq_u8 as simd_eq_16, 18 | vextq_u8 as combine_vecs_16, 19 | vsubq_u8 as simd_sub_16, 20 | vmulq_u8 as simd_mul_16, 21 | vpaddlq_u8 as simd_add_16, 22 | vmulq_u16 as simd_mul_u16_8, 23 | vpaddlq_u16 as simd_add_u16_8, 24 | vmulq_u32 as simd_mul_u32_4, 25 | vpaddlq_u32 as simd_add_u32_4, 26 | // 8 byte methods 27 | vget_low_u8 as simd_get_low, 28 | vext_u8 as combine_vecs_8, 29 | vsub_u8 as simd_sub_8, 30 | vmul_u8 as simd_mul_8, 31 | vpaddl_u8 as simd_add_8, 32 | vmul_u16 as simd_mul_u16_4, 33 | vpaddl_u16 as simd_add_u16_4, 34 | vmul_u32 as simd_mul_u32_2, 35 | vpaddl_u32 as simd_add_u32_2, 36 | }; 37 | use crate::JsonResult; 38 | 39 | use crate::number_decoder::{decode_int_chunk_fallback, IntChunk}; 40 | use crate::string_decoder::StringChunk; 41 | 42 | type SimdVecu8_16 = uint8x16_t; 43 | type SimdVecu16_8 = uint16x8_t; 44 | type SimdVecu32_4 = uint32x4_t; 45 | type SimdVecu64_2 = uint64x2_t; 46 | 47 | type SimdVecu8_8 = uint8x8_t; 48 | type SimdVecu16_4 = uint16x4_t; 49 | type SimdVecu32_2 = uint32x2_t; 50 | type SimdVecu64_1 = uint64x1_t; 51 | const SIMD_STEP: usize = 16; 52 | 53 | macro_rules! simd_const { 54 | ($array:expr) => { 55 | unsafe { transmute($array) } 56 | }; 57 | } 58 | 59 | const ZERO_DIGIT_U8_8: SimdVecu8_8 = simd_const!([b'0'; 8]); 60 | const ZERO_VAL_U8_8: SimdVecu8_8 = simd_const!([0u8; 8]); 61 | const ALT_MUL_U8_8: SimdVecu8_8 = simd_const!([10u8, 1u8, 10u8, 1u8, 10u8, 1u8, 10u8, 1u8]); 62 | const ALT_MUL_U16_4: SimdVecu16_4 = simd_const!([100u16, 1u16, 100u16, 1u16]); 63 | const ALT_MUL_U32_2: SimdVecu32_2 = simd_const!([10000u32, 1u32]); 64 | const ZERO_DIGIT_16: SimdVecu8_16 = simd_const!([b'0'; 16]); 65 | const NINE_DIGIT_16: SimdVecu8_16 = simd_const!([b'9'; 16]); 66 | 67 | const ZERO_VAL_U8_16: SimdVecu8_16 = simd_const!([0u8; 16]); 68 | const ALT_MUL_U8_16: SimdVecu8_16 = 69 | simd_const!([10u8, 1u8, 10u8, 1u8, 10u8, 1u8, 10u8, 1u8, 10u8, 1u8, 10u8, 1u8, 10u8, 1u8, 10u8, 1u8]); 70 | const ALT_MUL_U16_8: SimdVecu16_8 = simd_const!([100u16, 1u16, 100u16, 1u16, 100u16, 1u16, 100u16, 1u16]); 71 | const ALT_MUL_U32_4: SimdVecu32_4 = simd_const!([10000u32, 1u32, 10000u32, 1u32]); 72 | 73 | #[inline(always)] 74 | pub(crate) fn decode_int_chunk(data: &[u8], index: usize) -> (IntChunk, usize) { 75 | if let Some(byte_chunk) = data.get(index..index + SIMD_STEP) { 76 | let byte_vec = load_slice(byte_chunk); 77 | 78 | let digit_mask = get_digit_mask(byte_vec); 79 | if is_zero(digit_mask) { 80 | // all lanes are digits, parse the full vector 81 | let value = unsafe { full_calc(byte_vec, 16) }; 82 | (IntChunk::Ongoing(value), index + SIMD_STEP) 83 | } else { 84 | // some lanes are not digits, transmute to a pair of u64 and find the first non-digit 85 | let last_digit = find_end(digit_mask); 86 | let index = index + last_digit as usize; 87 | if next_is_float(data, index) { 88 | (IntChunk::Float, index) 89 | } else if last_digit <= 8 { 90 | // none-digit in the first 8 bytes 91 | let value = unsafe { first_half_calc(byte_vec, last_digit) }; 92 | (IntChunk::Done(value), index) 93 | } else { 94 | // none-digit in the last 8 bytes 95 | let value = unsafe { full_calc(byte_vec, last_digit) }; 96 | (IntChunk::Done(value), index) 97 | } 98 | } 99 | } else { 100 | // we got near the end of the string, fall back to the slow path 101 | decode_int_chunk_fallback(data, index, 0) 102 | } 103 | } 104 | 105 | #[rustfmt::skip] 106 | fn get_digit_mask(byte_vec: SimdVecu8_16) -> SimdVecu8_16 { 107 | unsafe { 108 | simd_or_16( 109 | simd_lt_16(byte_vec, ZERO_DIGIT_16), 110 | simd_gt_16(byte_vec, NINE_DIGIT_16), 111 | ) 112 | } 113 | } 114 | 115 | unsafe fn first_half_calc(byte_vec: SimdVecu8_16, last_digit: u32) -> u64 { 116 | let small_byte_vec = simd_get_low(byte_vec); 117 | // subtract ascii '0' from every byte to get the digit values 118 | let digits: SimdVecu8_8 = simd_sub_8(small_byte_vec, ZERO_DIGIT_U8_8); 119 | let digits = match last_digit { 120 | 0 => return 0, 121 | 1 => { 122 | let t: [u8; 8] = transmute(digits); 123 | return t[0] as u64; 124 | } 125 | 2 => combine_vecs_8::<2>(ZERO_VAL_U8_8, digits), 126 | 3 => combine_vecs_8::<3>(ZERO_VAL_U8_8, digits), 127 | 4 => combine_vecs_8::<4>(ZERO_VAL_U8_8, digits), 128 | 5 => combine_vecs_8::<5>(ZERO_VAL_U8_8, digits), 129 | 6 => combine_vecs_8::<6>(ZERO_VAL_U8_8, digits), 130 | 7 => combine_vecs_8::<7>(ZERO_VAL_U8_8, digits), 131 | 8 => digits, 132 | _ => unreachable!("last_digit should be less than 8"), 133 | }; 134 | // multiple every other digit by 10 135 | let x: SimdVecu8_8 = simd_mul_8(digits, ALT_MUL_U8_8); 136 | // add the value together and combine the 8x8-bit lanes into 4x16-bit lanes 137 | let x: SimdVecu16_4 = simd_add_8(x); 138 | // multiple every other digit by 100 139 | let x: SimdVecu16_4 = simd_mul_u16_4(x, ALT_MUL_U16_4); 140 | // add the value together and combine the 4x16-bit lanes into 2x32-bit lanes 141 | let x: SimdVecu32_2 = simd_add_u16_4(x); 142 | // multiple the first value 10000 143 | let x: SimdVecu32_2 = simd_mul_u32_2(x, ALT_MUL_U32_2); 144 | // add the value together and combine the 2x32-bit lanes into 1x64-bit lane 145 | let x: SimdVecu64_1 = simd_add_u32_2(x); 146 | // transmute the 64-bit lane into a u64 147 | transmute(x) 148 | } 149 | 150 | unsafe fn full_calc(byte_vec: SimdVecu8_16, last_digit: u32) -> u64 { 151 | // subtract ascii '0' from every byte to get the digit values 152 | let digits: SimdVecu8_16 = simd_sub_16(byte_vec, ZERO_DIGIT_16); 153 | let digits = match last_digit { 154 | 9 => combine_vecs_16::<9>(ZERO_VAL_U8_16, digits), 155 | 10 => combine_vecs_16::<10>(ZERO_VAL_U8_16, digits), 156 | 11 => combine_vecs_16::<11>(ZERO_VAL_U8_16, digits), 157 | 12 => combine_vecs_16::<12>(ZERO_VAL_U8_16, digits), 158 | 13 => combine_vecs_16::<13>(ZERO_VAL_U8_16, digits), 159 | 14 => combine_vecs_16::<14>(ZERO_VAL_U8_16, digits), 160 | 15 => combine_vecs_16::<15>(ZERO_VAL_U8_16, digits), 161 | 16 => digits, 162 | _ => unreachable!("last_digit should be between 9 and 16"), 163 | }; 164 | // multiple every other digit by 10 165 | let x: SimdVecu8_16 = simd_mul_16(digits, ALT_MUL_U8_16); 166 | // add the value together and combine the 16x8-bit lanes into 8x16-bit lanes 167 | let x: SimdVecu16_8 = simd_add_16(x); 168 | // multiple every other digit by 100 169 | let x: SimdVecu16_8 = simd_mul_u16_8(x, ALT_MUL_U16_8); 170 | // add the value together and combine the 8x16-bit lanes into 4x32-bit lanes 171 | let x: SimdVecu32_4 = simd_add_u16_8(x); 172 | // multiple every other digit by 10000 173 | let x: SimdVecu32_4 = simd_mul_u32_4(x, ALT_MUL_U32_4); 174 | // add the value together and combine the 4x32-bit lanes into 2x64-bit lane 175 | let x: SimdVecu64_2 = simd_add_u32_4(x); 176 | 177 | // transmute the 2x64-bit lane into an array; 178 | let t: [u64; 2] = transmute(x); 179 | // since the data started out as digits, it's safe to assume the result fits in a u64 180 | t[0].wrapping_mul(100_000_000).wrapping_add(t[1]) 181 | } 182 | 183 | fn next_is_float(data: &[u8], index: usize) -> bool { 184 | let next = unsafe { data.get_unchecked(index) }; 185 | matches!(next, b'.' | b'e' | b'E') 186 | } 187 | 188 | const QUOTE_16: SimdVecu8_16 = simd_const!([b'"'; 16]); 189 | const BACKSLASH_16: SimdVecu8_16 = simd_const!([b'\\'; 16]); 190 | // values below 32 are control characters 191 | const CONTROL_16: SimdVecu8_16 = simd_const!([32u8; 16]); 192 | const ASCII_MAX_16: SimdVecu8_16 = simd_const!([127u8; 16]); 193 | 194 | #[inline(always)] 195 | pub(crate) fn decode_string_chunk( 196 | data: &[u8], 197 | mut index: usize, 198 | mut ascii_only: bool, 199 | allow_partial: bool, 200 | ) -> JsonResult<(StringChunk, bool, usize)> { 201 | while let Some(byte_chunk) = data.get(index..index + SIMD_STEP) { 202 | let byte_vec = load_slice(byte_chunk); 203 | 204 | let ascii_mask = string_ascii_mask(byte_vec); 205 | if is_zero(ascii_mask) { 206 | // this chunk is just ascii, continue to the next chunk 207 | index += SIMD_STEP; 208 | } else { 209 | // this chunk contains either a stop character or a non-ascii character 210 | let a: [u8; 16] = unsafe { transmute(byte_vec) }; 211 | #[allow(clippy::redundant_else)] 212 | if let Some(r) = StringChunk::decode_array(a, &mut index, ascii_only) { 213 | return r; 214 | } else { 215 | ascii_only = false; 216 | } 217 | } 218 | } 219 | // we got near the end of the string, fall back to the slow path 220 | StringChunk::decode_fallback(data, index, ascii_only, allow_partial) 221 | } 222 | 223 | #[rustfmt::skip] 224 | /// returns a mask where any non-zero byte means we don't have a simple ascii character, either 225 | /// quote, backslash, control character, or non-ascii (above 127) 226 | fn string_ascii_mask(byte_vec: SimdVecu8_16) -> SimdVecu8_16 { 227 | unsafe { 228 | simd_or_16( 229 | simd_eq_16(byte_vec, QUOTE_16), 230 | simd_or_16( 231 | simd_eq_16(byte_vec, BACKSLASH_16), 232 | simd_or_16( 233 | simd_gt_16(byte_vec, ASCII_MAX_16), 234 | simd_lt_16(byte_vec, CONTROL_16), 235 | ) 236 | ) 237 | ) 238 | } 239 | } 240 | 241 | fn find_end(digit_mask: SimdVecu8_16) -> u32 { 242 | let t: [u64; 2] = unsafe { transmute(digit_mask) }; 243 | if t[0] != 0 { 244 | // non-digit in the first 8 bytes 245 | t[0].trailing_zeros() / 8 246 | } else { 247 | t[1].trailing_zeros() / 8 + 8 248 | } 249 | } 250 | 251 | /// return true if all bytes are zero 252 | fn is_zero(vec: SimdVecu8_16) -> bool { 253 | let t: [u64; 2] = unsafe { transmute(vec) }; 254 | t[0] == 0 && t[1] == 0 255 | } 256 | 257 | fn load_slice(bytes: &[u8]) -> SimdVecu8_16 { 258 | debug_assert_eq!(bytes.len(), 16); 259 | unsafe { simd_load_16(bytes.as_ptr()) } 260 | } 261 | -------------------------------------------------------------------------------- /crates/jiter/src/errors.rs: -------------------------------------------------------------------------------- 1 | /// Enum representing all possible errors in JSON syntax. 2 | /// 3 | /// Almost all of `JsonErrorType` is copied from [serde_json](https://github.com/serde-rs) so errors match 4 | /// those expected from `serde_json`. 5 | #[derive(Debug, PartialEq, Eq, Clone)] 6 | pub enum JsonErrorType { 7 | /// float value was found where an int was expected 8 | FloatExpectingInt, 9 | 10 | /// duplicate keys in an object 11 | DuplicateKey(String), 12 | 13 | /// happens when getting the `Decimal` type or constructing a decimal fails 14 | InternalError(String), 15 | 16 | /// NOTE: all errors from here on are copied from serde_json 17 | /// [src/error.rs](https://github.com/serde-rs/json/blob/v1.0.107/src/error.rs#L236) 18 | /// with `Io` and `Message` removed 19 | /// 20 | /// EOF while parsing a list. 21 | EofWhileParsingList, 22 | 23 | /// EOF while parsing an object. 24 | EofWhileParsingObject, 25 | 26 | /// EOF while parsing a string. 27 | EofWhileParsingString, 28 | 29 | /// EOF while parsing a JSON value. 30 | EofWhileParsingValue, 31 | 32 | /// Expected this character to be a `':'`. 33 | ExpectedColon, 34 | 35 | /// Expected this character to be either a `','` or a `']'`. 36 | ExpectedListCommaOrEnd, 37 | 38 | /// Expected this character to be either a `','` or a `'}'`. 39 | ExpectedObjectCommaOrEnd, 40 | 41 | /// Expected to parse either a `true`, `false`, or a `null`. 42 | ExpectedSomeIdent, 43 | 44 | /// Expected this character to start a JSON value. 45 | ExpectedSomeValue, 46 | 47 | /// Invalid hex escape code. 48 | InvalidEscape, 49 | 50 | /// Invalid number. 51 | InvalidNumber, 52 | 53 | /// Number is bigger than the maximum value of its type. 54 | NumberOutOfRange, 55 | 56 | /// Invalid unicode code point. 57 | InvalidUnicodeCodePoint, 58 | 59 | /// Control character found while parsing a string. 60 | ControlCharacterWhileParsingString, 61 | 62 | /// Object key is not a string. 63 | KeyMustBeAString, 64 | 65 | /// Lone leading surrogate in hex escape. 66 | LoneLeadingSurrogateInHexEscape, 67 | 68 | /// JSON has a comma after the last value in an array or map. 69 | TrailingComma, 70 | 71 | /// JSON has non-whitespace trailing characters after the value. 72 | TrailingCharacters, 73 | 74 | /// Unexpected end of hex escape. 75 | UnexpectedEndOfHexEscape, 76 | 77 | /// Encountered nesting of JSON maps and arrays more than 128 layers deep. 78 | RecursionLimitExceeded, 79 | } 80 | 81 | impl std::fmt::Display for JsonErrorType { 82 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 83 | // Messages for enum members copied from serde_json are unchanged 84 | match self { 85 | Self::FloatExpectingInt => f.write_str("float value was found where an int was expected"), 86 | Self::DuplicateKey(s) => write!(f, "Detected duplicate key {s:?}"), 87 | Self::InternalError(s) => write!(f, "Internal error: {s:?}"), 88 | Self::EofWhileParsingList => f.write_str("EOF while parsing a list"), 89 | Self::EofWhileParsingObject => f.write_str("EOF while parsing an object"), 90 | Self::EofWhileParsingString => f.write_str("EOF while parsing a string"), 91 | Self::EofWhileParsingValue => f.write_str("EOF while parsing a value"), 92 | Self::ExpectedColon => f.write_str("expected `:`"), 93 | Self::ExpectedListCommaOrEnd => f.write_str("expected `,` or `]`"), 94 | Self::ExpectedObjectCommaOrEnd => f.write_str("expected `,` or `}`"), 95 | Self::ExpectedSomeIdent => f.write_str("expected ident"), 96 | Self::ExpectedSomeValue => f.write_str("expected value"), 97 | Self::InvalidEscape => f.write_str("invalid escape"), 98 | Self::InvalidNumber => f.write_str("invalid number"), 99 | Self::NumberOutOfRange => f.write_str("number out of range"), 100 | Self::InvalidUnicodeCodePoint => f.write_str("invalid unicode code point"), 101 | Self::ControlCharacterWhileParsingString => { 102 | f.write_str("control character (\\u0000-\\u001F) found while parsing a string") 103 | } 104 | Self::KeyMustBeAString => f.write_str("key must be a string"), 105 | Self::LoneLeadingSurrogateInHexEscape => f.write_str("lone leading surrogate in hex escape"), 106 | Self::TrailingComma => f.write_str("trailing comma"), 107 | Self::TrailingCharacters => f.write_str("trailing characters"), 108 | Self::UnexpectedEndOfHexEscape => f.write_str("unexpected end of hex escape"), 109 | Self::RecursionLimitExceeded => f.write_str("recursion limit exceeded"), 110 | } 111 | } 112 | } 113 | 114 | pub type JsonResult = Result; 115 | 116 | /// Represents an error from parsing JSON 117 | #[derive(Debug, Clone, Eq, PartialEq)] 118 | pub struct JsonError { 119 | /// The type of error. 120 | pub error_type: JsonErrorType, 121 | /// The index in the data where the error occurred. 122 | pub index: usize, 123 | } 124 | 125 | impl JsonError { 126 | pub(crate) fn new(error_type: JsonErrorType, index: usize) -> Self { 127 | Self { error_type, index } 128 | } 129 | 130 | pub fn get_position(&self, json_data: &[u8]) -> LinePosition { 131 | LinePosition::find(json_data, self.index) 132 | } 133 | 134 | pub fn description(&self, json_data: &[u8]) -> String { 135 | let position = self.get_position(json_data); 136 | format!("{} at {}", self.error_type, position) 137 | } 138 | 139 | pub(crate) fn allowed_if_partial(&self) -> bool { 140 | matches!( 141 | self.error_type, 142 | JsonErrorType::EofWhileParsingList 143 | | JsonErrorType::EofWhileParsingObject 144 | | JsonErrorType::EofWhileParsingString 145 | | JsonErrorType::EofWhileParsingValue 146 | | JsonErrorType::ExpectedListCommaOrEnd 147 | | JsonErrorType::ExpectedObjectCommaOrEnd 148 | ) 149 | } 150 | } 151 | 152 | impl std::fmt::Display for JsonError { 153 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 154 | write!(f, "{} at index {}", self.error_type, self.index) 155 | } 156 | } 157 | 158 | impl std::error::Error for JsonError {} 159 | 160 | macro_rules! json_error { 161 | ($error_type:ident, $index:expr) => { 162 | crate::errors::JsonError::new(crate::errors::JsonErrorType::$error_type, $index) 163 | }; 164 | } 165 | 166 | pub(crate) use json_error; 167 | 168 | macro_rules! json_err { 169 | ($error_type:ident, $index:expr) => { 170 | Err(crate::errors::json_error!($error_type, $index)) 171 | }; 172 | } 173 | 174 | use crate::Jiter; 175 | pub(crate) use json_err; 176 | 177 | pub(crate) const DEFAULT_RECURSION_LIMIT: u8 = 200; 178 | 179 | /// Enum representing all JSON types. 180 | #[derive(Debug, Clone, Eq, PartialEq)] 181 | pub enum JsonType { 182 | Null, 183 | Bool, 184 | Int, 185 | Float, 186 | String, 187 | Array, 188 | Object, 189 | } 190 | 191 | impl std::fmt::Display for JsonType { 192 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 193 | match self { 194 | Self::Null => f.write_str("null"), 195 | Self::Bool => f.write_str("bool"), 196 | Self::Int => f.write_str("int"), 197 | Self::Float => f.write_str("float"), 198 | Self::String => f.write_str("string"), 199 | Self::Array => f.write_str("array"), 200 | Self::Object => f.write_str("object"), 201 | } 202 | } 203 | } 204 | 205 | /// Enum representing either a [JsonErrorType] or a WrongType error. 206 | #[derive(Debug, Clone, Eq, PartialEq)] 207 | pub enum JiterErrorType { 208 | JsonError(JsonErrorType), 209 | WrongType { expected: JsonType, actual: JsonType }, 210 | } 211 | 212 | impl std::fmt::Display for JiterErrorType { 213 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 214 | match self { 215 | Self::JsonError(error_type) => write!(f, "{error_type}"), 216 | Self::WrongType { expected, actual } => { 217 | write!(f, "expected {expected} but found {actual}") 218 | } 219 | } 220 | } 221 | } 222 | 223 | /// An error from the Jiter iterator. 224 | #[derive(Debug, Clone, Eq, PartialEq)] 225 | pub struct JiterError { 226 | pub error_type: JiterErrorType, 227 | pub index: usize, 228 | } 229 | 230 | impl std::fmt::Display for JiterError { 231 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 232 | write!(f, "{} at index {}", self.error_type, self.index) 233 | } 234 | } 235 | 236 | impl std::error::Error for JiterError {} 237 | 238 | impl JiterError { 239 | pub(crate) fn new(error_type: JiterErrorType, index: usize) -> Self { 240 | Self { error_type, index } 241 | } 242 | 243 | pub fn get_position(&self, jiter: &Jiter) -> LinePosition { 244 | jiter.error_position(self.index) 245 | } 246 | 247 | pub fn description(&self, jiter: &Jiter) -> String { 248 | let position = self.get_position(jiter); 249 | format!("{} at {}", self.error_type, position) 250 | } 251 | 252 | pub(crate) fn wrong_type(expected: JsonType, actual: JsonType, index: usize) -> Self { 253 | Self::new(JiterErrorType::WrongType { expected, actual }, index) 254 | } 255 | } 256 | 257 | impl From for JiterError { 258 | fn from(error: JsonError) -> Self { 259 | Self { 260 | error_type: JiterErrorType::JsonError(error.error_type), 261 | index: error.index, 262 | } 263 | } 264 | } 265 | 266 | /// Represents a line and column in a file or input string, used for both errors and value positions. 267 | #[derive(Debug, Clone, PartialEq, Eq)] 268 | pub struct LinePosition { 269 | /// Line number, starting at 1. 270 | pub line: usize, 271 | /// Column number, starting at 1. 272 | pub column: usize, 273 | } 274 | 275 | impl std::fmt::Display for LinePosition { 276 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 277 | write!(f, "line {} column {}", self.line, self.column) 278 | } 279 | } 280 | 281 | impl LinePosition { 282 | pub fn new(line: usize, column: usize) -> Self { 283 | Self { line, column } 284 | } 285 | 286 | /// Find the line and column of a byte index in a string. 287 | pub fn find(json_data: &[u8], find: usize) -> Self { 288 | let mut line = 1; 289 | let mut last_line_start = 0; 290 | let mut index = 0; 291 | while let Some(next) = json_data.get(index) { 292 | if *next == b'\n' { 293 | line += 1; 294 | last_line_start = index + 1; 295 | } 296 | if index == find { 297 | return Self { 298 | line, 299 | column: index + 1 - last_line_start, 300 | }; 301 | } 302 | index += 1; 303 | } 304 | Self { 305 | line, 306 | column: index.saturating_sub(last_line_start), 307 | } 308 | } 309 | 310 | pub fn short(&self) -> String { 311 | format!("{}:{}", self.line, self.column) 312 | } 313 | } 314 | -------------------------------------------------------------------------------- /crates/jiter/benches/main.rs: -------------------------------------------------------------------------------- 1 | use codspeed_criterion_compat::{criterion_group, criterion_main, Criterion}; 2 | 3 | use std::fs::File; 4 | use std::hint::black_box; 5 | use std::io::Read; 6 | use std::path::Path; 7 | 8 | use jiter::{Jiter, JsonValue, PartialMode, Peek}; 9 | use serde_json::Value; 10 | 11 | fn read_title(path: &str) -> String { 12 | let path = Path::new(path); 13 | let file_stem = path.file_stem().unwrap(); 14 | 15 | file_stem.to_str().unwrap().to_owned() 16 | } 17 | 18 | fn read_file(path: &str) -> String { 19 | let mut file = File::open(path).unwrap(); 20 | let mut contents = String::new(); 21 | file.read_to_string(&mut contents).unwrap(); 22 | contents 23 | } 24 | 25 | fn jiter_value(path: &str, c: &mut Criterion) { 26 | let title = read_title(path) + "_jiter_value"; 27 | let json = read_file(path); 28 | let json_data = json.as_bytes(); 29 | 30 | c.bench_function(&title, |bench| { 31 | bench.iter(|| { 32 | let v = JsonValue::parse(black_box(json_data), false).unwrap(); 33 | black_box(v) 34 | }); 35 | }); 36 | } 37 | 38 | fn jiter_skip(path: &str, c: &mut Criterion) { 39 | let title = read_title(path) + "_jiter_skip"; 40 | let json = read_file(path); 41 | let json_data = black_box(json.as_bytes()); 42 | 43 | c.bench_function(&title, |bench| { 44 | bench.iter(|| { 45 | let mut jiter = Jiter::new(json_data); 46 | jiter.next_skip().unwrap(); 47 | }); 48 | }); 49 | } 50 | 51 | fn jiter_iter_big(path: &str, c: &mut Criterion) { 52 | let title = read_title(path) + "_jiter_iter"; 53 | let json = read_file(path); 54 | let json_data = black_box(json.as_bytes()); 55 | 56 | c.bench_function(&title, |bench| { 57 | bench.iter(|| { 58 | let mut jiter = Jiter::new(json_data); 59 | jiter.next_array().unwrap(); 60 | 61 | loop { 62 | if let Some(peek) = jiter.next_array().unwrap() { 63 | let i = jiter.known_float(peek).unwrap(); 64 | black_box(i); 65 | while let Some(peek) = jiter.array_step().unwrap() { 66 | let i = jiter.known_float(peek).unwrap(); 67 | black_box(i); 68 | } 69 | } 70 | if jiter.array_step().unwrap().is_none() { 71 | break; 72 | } 73 | } 74 | }); 75 | }); 76 | } 77 | 78 | fn find_string(jiter: &mut Jiter) -> String { 79 | let peek = jiter.peek().unwrap(); 80 | match peek { 81 | Peek::String => jiter.known_str().unwrap().to_string(), 82 | Peek::Array => { 83 | assert!(jiter.known_array().unwrap().is_some()); 84 | let s = find_string(jiter).to_string(); 85 | assert!(jiter.array_step().unwrap().is_none()); 86 | s 87 | } 88 | _ => panic!("Expected string or array"), 89 | } 90 | } 91 | 92 | fn jiter_iter_pass2(path: &str, c: &mut Criterion) { 93 | let title = read_title(path) + "_jiter_iter"; 94 | let json = read_file(path); 95 | let json_data = black_box(json.as_bytes()); 96 | 97 | c.bench_function(&title, |bench| { 98 | bench.iter(|| { 99 | let mut jiter = Jiter::new(json_data); 100 | let string = find_string(&mut jiter); 101 | jiter.finish().unwrap(); 102 | black_box(string) 103 | }); 104 | }); 105 | } 106 | 107 | fn jiter_iter_string_array(path: &str, c: &mut Criterion) { 108 | let title = read_title(path) + "_jiter_iter"; 109 | let json = read_file(path); 110 | let json_data = black_box(json.as_bytes()); 111 | 112 | c.bench_function(&title, |bench| { 113 | bench.iter(|| { 114 | let mut jiter = Jiter::new(json_data); 115 | jiter.next_array().unwrap(); 116 | let i = jiter.known_str().unwrap(); 117 | // record len instead of allocating the string to simulate something like constructing a PyString 118 | black_box(i.len()); 119 | while jiter.array_step().unwrap().is_some() { 120 | let i = jiter.known_str().unwrap(); 121 | black_box(i.len()); 122 | } 123 | jiter.finish().unwrap(); 124 | }); 125 | }); 126 | } 127 | 128 | fn jiter_iter_true_array(path: &str, c: &mut Criterion) { 129 | let title = read_title(path) + "_jiter_iter"; 130 | let json = read_file(path); 131 | let json_data = black_box(json.as_bytes()); 132 | 133 | c.bench_function(&title, |bench| { 134 | bench.iter(|| { 135 | let mut jiter = Jiter::new(json_data); 136 | let first_peek = jiter.next_array().unwrap().unwrap(); 137 | let i = jiter.known_bool(first_peek).unwrap(); 138 | black_box(i); 139 | while let Some(peek) = jiter.array_step().unwrap() { 140 | let i = jiter.known_bool(peek).unwrap(); 141 | black_box(i); 142 | } 143 | }); 144 | }); 145 | } 146 | 147 | fn jiter_iter_true_object(path: &str, c: &mut Criterion) { 148 | let title = read_title(path) + "_jiter_iter"; 149 | let json = read_file(path); 150 | let json_data = black_box(json.as_bytes()); 151 | 152 | c.bench_function(&title, |bench| { 153 | bench.iter(|| { 154 | let mut jiter = Jiter::new(json_data); 155 | if let Some(first_key) = jiter.next_object().unwrap() { 156 | let first_key = first_key.to_string(); 157 | let first_value = jiter.next_bool().unwrap(); 158 | black_box((first_key, first_value)); 159 | while let Some(key) = jiter.next_key().unwrap() { 160 | let key = key.to_string(); 161 | let value = jiter.next_bool().unwrap(); 162 | black_box((key, value)); 163 | } 164 | } 165 | }); 166 | }); 167 | } 168 | 169 | fn jiter_iter_ints_array(path: &str, c: &mut Criterion) { 170 | let title = read_title(path) + "_jiter_iter"; 171 | let json = read_file(path); 172 | let json_data = black_box(json.as_bytes()); 173 | 174 | c.bench_function(&title, |bench| { 175 | bench.iter(|| { 176 | let mut jiter = Jiter::new(json_data); 177 | let first_peek = jiter.next_array().unwrap().unwrap(); 178 | let i = jiter.known_int(first_peek).unwrap(); 179 | black_box(i); 180 | while let Some(peek) = jiter.array_step().unwrap() { 181 | let i = jiter.known_int(peek).unwrap(); 182 | black_box(i); 183 | } 184 | }); 185 | }); 186 | } 187 | 188 | fn jiter_iter_floats_array(path: &str, c: &mut Criterion) { 189 | let title = read_title(path) + "_jiter_iter"; 190 | let json = read_file(path); 191 | let json_data = black_box(json.as_bytes()); 192 | 193 | c.bench_function(&title, |bench| { 194 | bench.iter(|| { 195 | let mut jiter = Jiter::new(json_data); 196 | let first_peek = jiter.next_array().unwrap().unwrap(); 197 | let i = jiter.known_float(first_peek).unwrap(); 198 | black_box(i); 199 | while let Some(peek) = jiter.array_step().unwrap() { 200 | let i = jiter.known_float(peek).unwrap(); 201 | black_box(i); 202 | } 203 | }); 204 | }); 205 | } 206 | 207 | fn jiter_string(path: &str, c: &mut Criterion) { 208 | let title = read_title(path) + "_jiter_iter"; 209 | let json = read_file(path); 210 | let json_data = black_box(json.as_bytes()); 211 | 212 | c.bench_function(&title, |bench| { 213 | bench.iter(|| { 214 | let mut jiter = Jiter::new(json_data); 215 | let string = jiter.next_str().unwrap(); 216 | black_box(string); 217 | jiter.finish().unwrap(); 218 | }); 219 | }); 220 | } 221 | 222 | fn serde_value(path: &str, c: &mut Criterion) { 223 | let title = read_title(path) + "_serde_value"; 224 | let json = read_file(path); 225 | let json_data = black_box(json.as_bytes()); 226 | 227 | c.bench_function(&title, |bench| { 228 | bench.iter(|| { 229 | let value: Value = serde_json::from_slice(json_data).unwrap(); 230 | black_box(value); 231 | }); 232 | }); 233 | } 234 | 235 | fn serde_str(path: &str, c: &mut Criterion) { 236 | let title = read_title(path) + "_serde_iter"; 237 | let json = read_file(path); 238 | let json_data = black_box(json.as_bytes()); 239 | 240 | c.bench_function(&title, |bench| { 241 | bench.iter(|| { 242 | let value: String = serde_json::from_slice(json_data).unwrap(); 243 | black_box(value); 244 | }); 245 | }); 246 | } 247 | 248 | macro_rules! test_cases { 249 | ($file_name:ident) => { 250 | paste::item! { 251 | fn [< $file_name _jiter_value >](c: &mut Criterion) { 252 | let file_path = format!("./benches/{}.json", stringify!($file_name)); 253 | jiter_value(&file_path, c); 254 | } 255 | 256 | fn [< $file_name _jiter_iter >](c: &mut Criterion) { 257 | let file_name = stringify!($file_name); 258 | let file_path = format!("./benches/{}.json", file_name); 259 | if file_name == "big" { 260 | jiter_iter_big(&file_path, c); 261 | } else if file_name == "pass2" { 262 | jiter_iter_pass2(&file_path, c); 263 | } else if file_name == "string_array" { 264 | jiter_iter_string_array(&file_path, c); 265 | } else if file_name == "true_array" { 266 | jiter_iter_true_array(&file_path, c); 267 | } else if file_name == "true_object" { 268 | jiter_iter_true_object(&file_path, c); 269 | } else if file_name == "bigints_array" { 270 | jiter_iter_ints_array(&file_path, c); 271 | } else if file_name == "massive_ints_array" { 272 | jiter_iter_ints_array(&file_path, c); 273 | } else if file_name == "floats_array" { 274 | jiter_iter_floats_array(&file_path, c); 275 | } else if file_name == "x100" || file_name == "sentence" || file_name == "unicode" { 276 | jiter_string(&file_path, c); 277 | } 278 | } 279 | fn [< $file_name _jiter_skip >](c: &mut Criterion) { 280 | let file_path = format!("./benches/{}.json", stringify!($file_name)); 281 | jiter_skip(&file_path, c); 282 | } 283 | 284 | fn [< $file_name _serde_value >](c: &mut Criterion) { 285 | let file_path = format!("./benches/{}.json", stringify!($file_name)); 286 | serde_value(&file_path, c); 287 | } 288 | } 289 | }; 290 | } 291 | 292 | // https://json.org/JSON_checker/test/pass1.json 293 | // see https://github.com/python/cpython/blob/main/Lib/test/test_json/test_pass1.py 294 | test_cases!(pass1); 295 | // this needs ./benches/generate_big.py to be called 296 | test_cases!(big); 297 | // https://json.org/JSON_checker/test/pass2.json 298 | test_cases!(pass2); 299 | test_cases!(string_array); 300 | test_cases!(true_array); 301 | test_cases!(true_object); 302 | test_cases!(bigints_array); 303 | test_cases!(massive_ints_array); 304 | test_cases!(floats_array); 305 | // from https://github.com/json-iterator/go-benchmark/blob/179abe5e3f72acce34fb5a16f3473b901fbdd6b9/ 306 | // src/github.com/json-iterator/go-benchmark/benchmark.go#L30C17-L30C29 307 | test_cases!(medium_response); 308 | test_cases!(x100); 309 | test_cases!(sentence); 310 | test_cases!(unicode); 311 | test_cases!(short_numbers); 312 | 313 | fn string_array_jiter_value_owned(c: &mut Criterion) { 314 | let json = read_file("./benches/string_array.json"); 315 | let json_data = json.as_bytes(); 316 | 317 | c.bench_function("string_array_jiter_value_owned", |bench| { 318 | bench.iter(|| { 319 | let v = JsonValue::parse_owned(black_box(json_data), false, PartialMode::Off).unwrap(); 320 | black_box(v) 321 | }); 322 | }); 323 | } 324 | 325 | fn medium_response_jiter_value_owned(c: &mut Criterion) { 326 | let json = read_file("./benches/medium_response.json"); 327 | let json_data = json.as_bytes(); 328 | 329 | c.bench_function("medium_response_jiter_value_owned", |bench| { 330 | bench.iter(|| { 331 | let v = JsonValue::parse_owned(black_box(json_data), false, PartialMode::Off).unwrap(); 332 | black_box(v) 333 | }); 334 | }); 335 | } 336 | 337 | fn x100_serde_iter(c: &mut Criterion) { 338 | serde_str("./benches/x100.json", c); 339 | } 340 | 341 | criterion_group!( 342 | benches, 343 | big_jiter_iter, 344 | big_jiter_skip, 345 | big_jiter_value, 346 | big_serde_value, 347 | bigints_array_jiter_iter, 348 | bigints_array_jiter_skip, 349 | bigints_array_jiter_value, 350 | bigints_array_serde_value, 351 | floats_array_jiter_iter, 352 | floats_array_jiter_skip, 353 | floats_array_jiter_value, 354 | floats_array_serde_value, 355 | massive_ints_array_jiter_iter, 356 | massive_ints_array_jiter_skip, 357 | massive_ints_array_jiter_value, 358 | massive_ints_array_serde_value, 359 | medium_response_jiter_iter, 360 | medium_response_jiter_skip, 361 | medium_response_jiter_value, 362 | medium_response_jiter_value_owned, 363 | medium_response_serde_value, 364 | x100_jiter_iter, 365 | x100_jiter_skip, 366 | x100_jiter_value, 367 | x100_serde_iter, 368 | x100_serde_value, 369 | sentence_jiter_iter, 370 | sentence_jiter_skip, 371 | sentence_jiter_value, 372 | sentence_serde_value, 373 | unicode_jiter_iter, 374 | unicode_jiter_skip, 375 | unicode_jiter_value, 376 | unicode_serde_value, 377 | pass1_jiter_iter, 378 | pass1_jiter_skip, 379 | pass1_jiter_value, 380 | pass1_serde_value, 381 | pass2_jiter_iter, 382 | pass2_jiter_skip, 383 | pass2_jiter_value, 384 | pass2_serde_value, 385 | string_array_jiter_iter, 386 | string_array_jiter_skip, 387 | string_array_jiter_value, 388 | string_array_jiter_value_owned, 389 | string_array_serde_value, 390 | true_array_jiter_iter, 391 | true_array_jiter_skip, 392 | true_array_jiter_value, 393 | true_array_serde_value, 394 | true_object_jiter_iter, 395 | true_object_jiter_skip, 396 | true_object_jiter_value, 397 | true_object_serde_value, 398 | short_numbers_jiter_iter, 399 | short_numbers_jiter_skip, 400 | short_numbers_jiter_value, 401 | short_numbers_serde_value, 402 | ); 403 | criterion_main!(benches); 404 | -------------------------------------------------------------------------------- /crates/jiter-python/tests/test_jiter.py: -------------------------------------------------------------------------------- 1 | from concurrent.futures import ThreadPoolExecutor 2 | import json 3 | from decimal import Decimal 4 | from pathlib import Path 5 | import sys 6 | from typing import Any 7 | 8 | import jiter 9 | import pytest 10 | from math import inf 11 | from dirty_equals import IsFloatNan 12 | 13 | JITER_BENCH_DIR = Path(__file__).parent.parent.parent / 'jiter' / 'benches' 14 | 15 | JITER_BENCH_DATAS = [ 16 | (JITER_BENCH_DIR / 'bigints_array.json').read_bytes(), 17 | (JITER_BENCH_DIR / 'floats_array.json').read_bytes(), 18 | (JITER_BENCH_DIR / 'massive_ints_array.json').read_bytes(), 19 | (JITER_BENCH_DIR / 'medium_response.json').read_bytes(), 20 | (JITER_BENCH_DIR / 'pass1.json').read_bytes(), 21 | (JITER_BENCH_DIR / 'pass2.json').read_bytes(), 22 | (JITER_BENCH_DIR / 'sentence.json').read_bytes(), 23 | (JITER_BENCH_DIR / 'short_numbers.json').read_bytes(), 24 | (JITER_BENCH_DIR / 'string_array_unique.json').read_bytes(), 25 | (JITER_BENCH_DIR / 'string_array.json').read_bytes(), 26 | (JITER_BENCH_DIR / 'true_array.json').read_bytes(), 27 | (JITER_BENCH_DIR / 'true_object.json').read_bytes(), 28 | (JITER_BENCH_DIR / 'unicode.json').read_bytes(), 29 | (JITER_BENCH_DIR / 'x100.json').read_bytes(), 30 | ] 31 | 32 | 33 | def test_python_parse_numeric(): 34 | parsed = jiter.from_json( 35 | b' { "int": 1, "bigint": 123456789012345678901234567890, "float": 1.2} ' 36 | ) 37 | assert parsed == {'int': 1, 'bigint': 123456789012345678901234567890, 'float': 1.2} 38 | 39 | 40 | def test_python_parse_other_cached(): 41 | parsed = jiter.from_json( 42 | b'["string", true, false, null, NaN, Infinity, -Infinity]', 43 | allow_inf_nan=True, 44 | cache_mode=True, 45 | ) 46 | assert parsed == ['string', True, False, None, IsFloatNan(), inf, -inf] 47 | 48 | 49 | def test_python_parse_other_no_cache(): 50 | parsed = jiter.from_json( 51 | b'["string", true, false, null]', 52 | cache_mode=False, 53 | ) 54 | assert parsed == ['string', True, False, None] 55 | 56 | 57 | def test_python_disallow_nan(): 58 | with pytest.raises(ValueError, match='expected value at line 1 column 2'): 59 | jiter.from_json(b'[NaN]', allow_inf_nan=False) 60 | 61 | 62 | def test_error(): 63 | with pytest.raises(ValueError, match='EOF while parsing a list at line 1 column 9'): 64 | jiter.from_json(b'["string"') 65 | 66 | 67 | def test_recursion_limit(): 68 | with pytest.raises( 69 | ValueError, match='recursion limit exceeded at line 1 column 202' 70 | ): 71 | jiter.from_json(b'[' * 10_000) 72 | 73 | 74 | def test_recursion_limit_incr(): 75 | json = b'[' + b', '.join(b'[1]' for _ in range(2000)) + b']' 76 | v = jiter.from_json(json) 77 | assert len(v) == 2000 78 | 79 | v = jiter.from_json(json) 80 | assert len(v) == 2000 81 | 82 | 83 | def test_extracted_value_error(): 84 | with pytest.raises(ValueError, match='expected value at line 1 column 1'): 85 | jiter.from_json(b'xx') 86 | 87 | 88 | def test_partial_array(): 89 | json = b'["string", true, null, 1, "foo' 90 | 91 | with pytest.raises( 92 | ValueError, match='EOF while parsing a string at line 1 column 30' 93 | ): 94 | jiter.from_json(json, partial_mode=False) 95 | 96 | parsed = jiter.from_json(json, partial_mode=True) 97 | assert parsed == ['string', True, None, 1] 98 | 99 | # test that stopping at every points is ok 100 | for i in range(1, len(json)): 101 | parsed = jiter.from_json(json[:i], partial_mode=True) 102 | assert isinstance(parsed, list) 103 | 104 | 105 | def test_partial_array_trailing_strings(): 106 | json = b'["string", true, null, 1, "foo' 107 | parsed = jiter.from_json(json, partial_mode='trailing-strings') 108 | assert parsed == ['string', True, None, 1, 'foo'] 109 | 110 | # test that stopping at every points is ok 111 | for i in range(1, len(json)): 112 | parsed = jiter.from_json(json[:i], partial_mode='trailing-strings') 113 | assert isinstance(parsed, list) 114 | 115 | 116 | def test_partial_array_first(): 117 | json = b'[' 118 | parsed = jiter.from_json(json, partial_mode=True) 119 | assert parsed == [] 120 | 121 | with pytest.raises(ValueError, match='EOF while parsing a list at line 1 column 1'): 122 | jiter.from_json(json) 123 | 124 | with pytest.raises(ValueError, match='EOF while parsing a list at line 1 column 1'): 125 | jiter.from_json(json, partial_mode='off') 126 | 127 | 128 | def test_partial_object(): 129 | json = b'{"a": 1, "b": 2, "c' 130 | parsed = jiter.from_json(json, partial_mode=True) 131 | assert parsed == {'a': 1, 'b': 2} 132 | 133 | # test that stopping at every points is ok 134 | for i in range(1, len(json)): 135 | parsed = jiter.from_json(json, partial_mode=True) 136 | assert isinstance(parsed, dict) 137 | 138 | 139 | def test_partial_object_string(): 140 | json = b'{"a": 1, "b": 2, "c": "foo' 141 | parsed = jiter.from_json(json, partial_mode=True) 142 | assert parsed == {'a': 1, 'b': 2} 143 | parsed = jiter.from_json(json, partial_mode='on') 144 | assert parsed == {'a': 1, 'b': 2} 145 | 146 | # test that stopping at every points is ok 147 | for i in range(1, len(json)): 148 | parsed = jiter.from_json(json, partial_mode=True) 149 | assert isinstance(parsed, dict) 150 | 151 | json = b'{"title": "Pride and Prejudice", "author": "Jane A' 152 | parsed = jiter.from_json(json, partial_mode=True) 153 | assert parsed == {'title': 'Pride and Prejudice'} 154 | 155 | 156 | def test_partial_object_string_trailing_strings(): 157 | json = b'{"a": 1, "b": 2, "c": "foo' 158 | parsed = jiter.from_json(json, partial_mode='trailing-strings') 159 | assert parsed == {'a': 1, 'b': 2, 'c': 'foo'} 160 | 161 | # test that stopping at every points is ok 162 | for i in range(1, len(json)): 163 | parsed = jiter.from_json(json, partial_mode=True) 164 | assert isinstance(parsed, dict) 165 | 166 | json = b'{"title": "Pride and Prejudice", "author": "Jane A' 167 | parsed = jiter.from_json(json, partial_mode='trailing-strings') 168 | assert parsed == {'title': 'Pride and Prejudice', 'author': 'Jane A'} 169 | 170 | 171 | def test_partial_json_invalid_utf8_bytes(): 172 | missing_closing_quote_string = '"abc€' 173 | missing_closing_quote_bytes = missing_closing_quote_string.encode() # b'"abc\xe2\x82\xac' 174 | result = jiter.from_json(missing_closing_quote_bytes, partial_mode='trailing-strings') 175 | assert result == 'abc€' 176 | 177 | # remove the last byte to create an invalid UTF-8 sequence 178 | non_unicode_partial_string_bytes = missing_closing_quote_bytes[:-1] # b'"abc\xe2\x82' - missing last byte of € 179 | 180 | # This should fail by default (incomplete UTF-8 sequence)... 181 | with pytest.raises(ValueError, match='EOF while parsing a string'): 182 | jiter.from_json(non_unicode_partial_string_bytes) 183 | 184 | # ...but succeed in partial mode by truncating to valid UTF-8 boundary 185 | result = jiter.from_json(non_unicode_partial_string_bytes, partial_mode='trailing-strings') 186 | assert result == 'abc' 187 | 188 | # However, truly invalid UTF-8 (not just incomplete) should always raise an error 189 | # Byte \xff is always invalid in UTF-8 190 | for invalid_utf8_bytes in (b'"abc\xff', b'"abc\xffdef'): 191 | # This should fail by default (invalid UTF-8 sequence)... 192 | with pytest.raises(ValueError, match='EOF while parsing a string'): 193 | jiter.from_json(invalid_utf8_bytes) 194 | 195 | # ...but ALSO WITH partial mode 196 | with pytest.raises(ValueError, match='invalid unicode code point'): 197 | jiter.from_json(invalid_utf8_bytes, partial_mode='trailing-strings') 198 | 199 | # It should not truncate valid content 200 | valid_utf8_bytes = b'"abc\xe2\x82\xac"' 201 | result = jiter.from_json(valid_utf8_bytes, partial_mode='trailing-strings') 202 | assert result == 'abc€' 203 | 204 | 205 | def test_partial_nested(): 206 | json = b'{"a": 1, "b": 2, "c": [1, 2, {"d": 1, ' 207 | parsed = jiter.from_json(json, partial_mode=True) 208 | assert parsed == {'a': 1, 'b': 2, 'c': [1, 2, {'d': 1}]} 209 | 210 | # test that stopping at every points is ok 211 | for i in range(1, len(json)): 212 | parsed = jiter.from_json(json[:i], partial_mode=True) 213 | assert isinstance(parsed, dict) 214 | 215 | 216 | def test_partial_error(): 217 | json = b'["string", true, null, 1, "foo' 218 | 219 | with pytest.raises( 220 | ValueError, match='EOF while parsing a string at line 1 column 30' 221 | ): 222 | jiter.from_json(json, partial_mode=False) 223 | 224 | assert jiter.from_json(json, partial_mode=True) == ['string', True, None, 1] 225 | 226 | msg = "Invalid partial mode, should be `'off'`, `'on'`, `'trailing-strings'` or a `bool`" 227 | with pytest.raises(ValueError, match=msg): 228 | jiter.from_json(json, partial_mode='wrong') 229 | with pytest.raises(TypeError, match=msg): 230 | jiter.from_json(json, partial_mode=123) 231 | 232 | 233 | def test_python_cache_usage_all(): 234 | jiter.cache_clear() 235 | parsed = jiter.from_json(b'{"foo": "bar", "spam": 3}', cache_mode='all') 236 | assert parsed == {'foo': 'bar', 'spam': 3} 237 | assert jiter.cache_usage() == 3 238 | 239 | 240 | def test_python_cache_usage_keys(): 241 | jiter.cache_clear() 242 | parsed = jiter.from_json(b'{"foo": "bar", "spam": 3}', cache_mode='keys') 243 | assert parsed == {'foo': 'bar', 'spam': 3} 244 | assert jiter.cache_usage() == 2 245 | 246 | 247 | def test_python_cache_usage_none(): 248 | jiter.cache_clear() 249 | parsed = jiter.from_json( 250 | b'{"foo": "bar", "spam": 3}', 251 | cache_mode='none', 252 | ) 253 | assert parsed == {'foo': 'bar', 'spam': 3} 254 | assert jiter.cache_usage() == 0 255 | 256 | 257 | def test_use_tape(): 258 | json = ' "foo\\nbar" '.encode() 259 | jiter.cache_clear() 260 | parsed = jiter.from_json(json, cache_mode=False) 261 | assert parsed == 'foo\nbar' 262 | 263 | 264 | def test_unicode(): 265 | json = '{"💩": "£"}'.encode() 266 | jiter.cache_clear() 267 | parsed = jiter.from_json(json, cache_mode=False) 268 | assert parsed == {'💩': '£'} 269 | 270 | 271 | def test_unicode_cache(): 272 | json = '{"💩": "£"}'.encode() 273 | jiter.cache_clear() 274 | parsed = jiter.from_json(json) 275 | assert parsed == {'💩': '£'} 276 | 277 | 278 | def test_json_float(): 279 | f = jiter.LosslessFloat(b'123.45') 280 | assert str(f) == '123.45' 281 | assert repr(f) == 'LosslessFloat(123.45)' 282 | assert float(f) == 123.45 283 | assert f.as_decimal() == Decimal('123.45') 284 | assert bytes(f) == b'123.45' 285 | 286 | 287 | def test_json_float_scientific(): 288 | f = jiter.LosslessFloat(b'123e4') 289 | assert str(f) == '123e4' 290 | assert float(f) == 123e4 291 | assert f.as_decimal() == Decimal('123e4') 292 | 293 | 294 | def test_json_float_invalid(): 295 | with pytest.raises(ValueError, match='trailing characters at line 1 column 6'): 296 | jiter.LosslessFloat(b'123.4x') 297 | 298 | 299 | def test_lossless_floats(): 300 | f = jiter.from_json(b'12.3') 301 | assert isinstance(f, float) 302 | assert f == 12.3 303 | 304 | f = jiter.from_json(b'12.3', float_mode='float') 305 | assert isinstance(f, float) 306 | assert f == 12.3 307 | 308 | f = jiter.from_json(b'12.3', float_mode='lossless-float') 309 | assert isinstance(f, jiter.LosslessFloat) 310 | assert str(f) == '12.3' 311 | assert float(f) == 12.3 312 | assert f.as_decimal() == Decimal('12.3') 313 | 314 | f = jiter.from_json(b'123.456789123456789e45', float_mode='lossless-float') 315 | assert isinstance(f, jiter.LosslessFloat) 316 | assert 123e45 < float(f) < 124e45 317 | assert f.as_decimal() == Decimal('1.23456789123456789E+47') 318 | assert bytes(f) == b'123.456789123456789e45' 319 | assert str(f) == '123.456789123456789e45' 320 | assert repr(f) == 'LosslessFloat(123.456789123456789e45)' 321 | 322 | f = jiter.from_json(b'123', float_mode='lossless-float') 323 | assert isinstance(f, int) 324 | assert f == 123 325 | 326 | with pytest.raises(ValueError, match='expected value at line 1 column 1'): 327 | jiter.from_json(b'wrong', float_mode='lossless-float') 328 | 329 | with pytest.raises(ValueError, match='trailing characters at line 1 column 2'): 330 | jiter.from_json(b'1wrong', float_mode='lossless-float') 331 | 332 | 333 | def test_decimal_floats(): 334 | f = jiter.from_json(b'12.3') 335 | assert isinstance(f, float) 336 | assert f == 12.3 337 | 338 | f = jiter.from_json(b'12.3', float_mode='decimal') 339 | assert isinstance(f, Decimal) 340 | assert f == Decimal('12.3') 341 | 342 | f = jiter.from_json(b'123.456789123456789e45', float_mode='decimal') 343 | assert isinstance(f, Decimal) 344 | assert f == Decimal('1.23456789123456789E+47') 345 | 346 | f = jiter.from_json(b'123', float_mode='decimal') 347 | assert isinstance(f, int) 348 | assert f == 123 349 | 350 | with pytest.raises(ValueError, match='expected value at line 1 column 1'): 351 | jiter.from_json(b'wrong', float_mode='decimal') 352 | 353 | with pytest.raises(ValueError, match='trailing characters at line 1 column 2'): 354 | jiter.from_json(b'1wrong', float_mode='decimal') 355 | 356 | 357 | def test_unicode_roundtrip(): 358 | original = ['中文'] 359 | json_data = json.dumps(original).encode() 360 | assert jiter.from_json(json_data) == original 361 | assert json.loads(json_data) == original 362 | 363 | 364 | def test_unicode_roundtrip_ensure_ascii(): 365 | original = {'name': '中文'} 366 | json_data = json.dumps(original, ensure_ascii=False).encode() 367 | assert jiter.from_json(json_data, cache_mode=False) == original 368 | assert json.loads(json_data) == original 369 | 370 | 371 | def test_catch_duplicate_keys(): 372 | assert jiter.from_json(b'{"foo": 1, "foo": 2}') == {'foo': 2} 373 | 374 | with pytest.raises( 375 | ValueError, match='Detected duplicate key "foo" at line 1 column 18' 376 | ): 377 | jiter.from_json(b'{"foo": 1, "foo": 2}', catch_duplicate_keys=True) 378 | 379 | with pytest.raises( 380 | ValueError, match='Detected duplicate key "foo" at line 1 column 28' 381 | ): 382 | jiter.from_json(b'{"foo": 1, "bar": 2, "foo": 2}', catch_duplicate_keys=True) 383 | 384 | 385 | def test_against_json(): 386 | for data in JITER_BENCH_DATAS: 387 | assert jiter.from_json(data) == json.loads(data) 388 | 389 | 390 | @pytest.mark.skipif( 391 | sys.platform == 'emscripten', reason='threads not supported on pyodide' 392 | ) 393 | def test_multithreaded_parsing(): 394 | """Basic sanity check that running a parse in multiple threads is fine.""" 395 | expected_datas = [json.loads(data) for data in JITER_BENCH_DATAS] 396 | 397 | def assert_jiter_ok(data: bytes, expected: Any) -> bool: 398 | return jiter.from_json(data) == expected 399 | 400 | with ThreadPoolExecutor(8) as pool: 401 | results = [] 402 | for _ in range(1000): 403 | for data, expected_result in zip(JITER_BENCH_DATAS, expected_datas): 404 | results.append(pool.submit(assert_jiter_ok, data, expected_result)) 405 | 406 | for result in results: 407 | assert result.result() 408 | -------------------------------------------------------------------------------- /crates/jiter/src/jiter.rs: -------------------------------------------------------------------------------- 1 | use crate::errors::{json_error, JiterError, JsonType, LinePosition, DEFAULT_RECURSION_LIMIT}; 2 | use crate::number_decoder::{NumberAny, NumberFloat, NumberInt, NumberRange}; 3 | use crate::parse::{Parser, Peek}; 4 | use crate::string_decoder::{StringDecoder, StringDecoderRange, Tape}; 5 | use crate::value::{take_value_borrowed, take_value_owned, take_value_skip, JsonValue}; 6 | use crate::{JsonError, JsonErrorType, PartialMode}; 7 | 8 | pub type JiterResult = Result; 9 | 10 | /// A JSON iterator. 11 | #[derive(Debug)] 12 | pub struct Jiter<'j> { 13 | data: &'j [u8], 14 | parser: Parser<'j>, 15 | tape: Tape, 16 | allow_inf_nan: bool, 17 | allow_partial_strings: bool, 18 | } 19 | 20 | impl Clone for Jiter<'_> { 21 | /// Clone a `Jiter`. Like the default implementation, but a new empty `tape` is used. 22 | fn clone(&self) -> Self { 23 | Self { 24 | data: self.data, 25 | parser: self.parser.clone(), 26 | tape: Tape::default(), 27 | allow_inf_nan: self.allow_inf_nan, 28 | allow_partial_strings: self.allow_partial_strings, 29 | } 30 | } 31 | } 32 | 33 | impl<'j> Jiter<'j> { 34 | /// Constructs a new `Jiter`. 35 | /// 36 | /// # Arguments 37 | /// - `data`: The JSON data to be parsed. 38 | /// - `allow_inf_nan`: Whether to allow `NaN`, `Infinity` and `-Infinity` as numbers. 39 | pub fn new(data: &'j [u8]) -> Self { 40 | Self { 41 | data, 42 | parser: Parser::new(data), 43 | tape: Tape::default(), 44 | allow_inf_nan: false, 45 | allow_partial_strings: false, 46 | } 47 | } 48 | 49 | pub fn with_allow_inf_nan(mut self) -> Self { 50 | self.allow_inf_nan = true; 51 | self 52 | } 53 | 54 | pub fn with_allow_partial_strings(mut self) -> Self { 55 | self.allow_partial_strings = true; 56 | self 57 | } 58 | 59 | /// Get the current [LinePosition] of the parser. 60 | pub fn current_position(&self) -> LinePosition { 61 | self.parser.current_position() 62 | } 63 | 64 | /// Get the current index of the parser. 65 | pub fn current_index(&self) -> usize { 66 | self.parser.index 67 | } 68 | 69 | /// Get a slice of the underlying JSON data from `start` to `current_index`. 70 | pub fn slice_to_current(&self, start: usize) -> &'j [u8] { 71 | &self.data[start..self.current_index()] 72 | } 73 | 74 | /// Convert an error index to a [LinePosition]. 75 | /// 76 | /// # Arguments 77 | /// - `index`: The index of the error to find the position of. 78 | pub fn error_position(&self, index: usize) -> LinePosition { 79 | LinePosition::find(self.data, index) 80 | } 81 | 82 | /// Peek at the next JSON value without consuming it. 83 | pub fn peek(&mut self) -> JiterResult { 84 | self.parser.peek().map_err(Into::into) 85 | } 86 | 87 | /// Assuming the next value is `null`, consume it. Error if it is not `null`, or is invalid JSON. 88 | pub fn next_null(&mut self) -> JiterResult<()> { 89 | let peek = self.peek()?; 90 | match peek { 91 | Peek::Null => self.known_null(), 92 | _ => Err(self.wrong_type(JsonType::Null, peek)), 93 | } 94 | } 95 | 96 | /// Knowing the next value is `null`, consume it. 97 | pub fn known_null(&mut self) -> JiterResult<()> { 98 | self.parser.consume_null()?; 99 | Ok(()) 100 | } 101 | 102 | /// Assuming the next value is `true` or `false`, consume it. Error if it is not a boolean, or is invalid JSON. 103 | /// 104 | /// # Returns 105 | /// The boolean value. 106 | pub fn next_bool(&mut self) -> JiterResult { 107 | let peek = self.peek()?; 108 | self.known_bool(peek) 109 | } 110 | 111 | /// Knowing the next value is `true` or `false`, parse it. 112 | pub fn known_bool(&mut self, peek: Peek) -> JiterResult { 113 | match peek { 114 | Peek::True => { 115 | self.parser.consume_true()?; 116 | Ok(true) 117 | } 118 | Peek::False => { 119 | self.parser.consume_false()?; 120 | Ok(false) 121 | } 122 | _ => Err(self.wrong_type(JsonType::Bool, peek)), 123 | } 124 | } 125 | 126 | /// Assuming the next value is a number, consume it. Error if it is not a number, or is invalid JSON. 127 | /// 128 | /// # Returns 129 | /// A [NumberAny] representing the number. 130 | pub fn next_number(&mut self) -> JiterResult { 131 | let peek = self.peek()?; 132 | self.known_number(peek) 133 | } 134 | 135 | /// Knowing the next value is a number, parse it. 136 | pub fn known_number(&mut self, peek: Peek) -> JiterResult { 137 | self.parser 138 | .consume_number::(peek.into_inner(), self.allow_inf_nan) 139 | .map_err(|e| self.maybe_number_error(e, JsonType::Int, peek)) 140 | } 141 | 142 | /// Assuming the next value is an integer, consume it. Error if it is not an integer, or is invalid JSON. 143 | pub fn next_int(&mut self) -> JiterResult { 144 | let peek = self.peek()?; 145 | self.known_int(peek) 146 | } 147 | 148 | /// Knowing the next value is an integer, parse it. 149 | pub fn known_int(&mut self, peek: Peek) -> JiterResult { 150 | self.parser 151 | .consume_number::(peek.into_inner(), self.allow_inf_nan) 152 | .map_err(|e| { 153 | if e.error_type == JsonErrorType::FloatExpectingInt { 154 | JiterError::wrong_type(JsonType::Int, JsonType::Float, self.parser.index) 155 | } else { 156 | self.maybe_number_error(e, JsonType::Int, peek) 157 | } 158 | }) 159 | } 160 | 161 | /// Assuming the next value is a float, consume it. Error if it is not a float, or is invalid JSON. 162 | pub fn next_float(&mut self) -> JiterResult { 163 | let peek = self.peek()?; 164 | self.known_float(peek) 165 | } 166 | 167 | /// Knowing the next value is a float, parse it. 168 | pub fn known_float(&mut self, peek: Peek) -> JiterResult { 169 | self.parser 170 | .consume_number::(peek.into_inner(), self.allow_inf_nan) 171 | .map_err(|e| self.maybe_number_error(e, JsonType::Float, peek)) 172 | } 173 | 174 | /// Assuming the next value is a number, consume it and return bytes from the original JSON data. 175 | pub fn next_number_bytes(&mut self) -> JiterResult<&[u8]> { 176 | let peek = self.peek()?; 177 | self.known_number_bytes(peek) 178 | } 179 | 180 | /// Knowing the next value is a number, parse it and return bytes from the original JSON data. 181 | fn known_number_bytes(&mut self, peek: Peek) -> JiterResult<&[u8]> { 182 | match self 183 | .parser 184 | .consume_number::(peek.into_inner(), self.allow_inf_nan) 185 | { 186 | Ok(numbe_range) => Ok(&self.data[numbe_range.range]), 187 | Err(e) => Err(self.maybe_number_error(e, JsonType::Float, peek)), 188 | } 189 | } 190 | 191 | /// Assuming the next value is a string, consume it. Error if it is not a string, or is invalid JSON. 192 | pub fn next_str(&mut self) -> JiterResult<&str> { 193 | let peek = self.peek()?; 194 | match peek { 195 | Peek::String => self.known_str(), 196 | _ => Err(self.wrong_type(JsonType::String, peek)), 197 | } 198 | } 199 | 200 | /// Knowing the next value is a string, parse it. 201 | pub fn known_str(&mut self) -> JiterResult<&str> { 202 | match self 203 | .parser 204 | .consume_string::(&mut self.tape, self.allow_partial_strings) 205 | { 206 | Ok(output) => Ok(output.as_str()), 207 | Err(e) => Err(e.into()), 208 | } 209 | } 210 | 211 | /// Assuming the next value is a string, consume it and return bytes from the original JSON data. 212 | pub fn next_bytes(&mut self) -> JiterResult<&[u8]> { 213 | let peek = self.peek()?; 214 | match peek { 215 | Peek::String => self.known_bytes(), 216 | _ => Err(self.wrong_type(JsonType::String, peek)), 217 | } 218 | } 219 | 220 | /// Knowing the next value is a string, parse it and return bytes from the original JSON data. 221 | pub fn known_bytes(&mut self) -> JiterResult<&[u8]> { 222 | let range = self 223 | .parser 224 | .consume_string::(&mut self.tape, self.allow_partial_strings)?; 225 | Ok(&self.data[range]) 226 | } 227 | 228 | /// Parse the next JSON value and return it as a [JsonValue]. Error if it is invalid JSON. 229 | pub fn next_value(&mut self) -> JiterResult> { 230 | let peek = self.peek()?; 231 | self.known_value(peek) 232 | } 233 | 234 | /// Parse the next JSON value and return it as a [JsonValue]. Error if it is invalid JSON. 235 | /// 236 | /// # Arguments 237 | /// - `peek`: The [Peek] of the next JSON value. 238 | pub fn known_value(&mut self, peek: Peek) -> JiterResult> { 239 | take_value_borrowed( 240 | peek, 241 | &mut self.parser, 242 | &mut self.tape, 243 | DEFAULT_RECURSION_LIMIT, 244 | self.allow_inf_nan, 245 | PartialMode::Off, 246 | ) 247 | .map_err(Into::into) 248 | } 249 | 250 | /// Parse the next JSON value, but don't return it. 251 | /// This should be faster than returning the value, useful when you don't care about this value. 252 | /// Error if it is invalid JSON. 253 | /// 254 | /// *WARNING:* For performance reasons, this method does not check that strings would be valid UTF-8. 255 | pub fn next_skip(&mut self) -> JiterResult<()> { 256 | let peek = self.peek()?; 257 | self.known_skip(peek) 258 | } 259 | 260 | /// Parse the next JSON value, but don't return it. Error if it is invalid JSON. 261 | /// 262 | /// # Arguments 263 | /// - `peek`: The [Peek] of the next JSON value. 264 | pub fn known_skip(&mut self, peek: Peek) -> JiterResult<()> { 265 | take_value_skip( 266 | peek, 267 | &mut self.parser, 268 | &mut self.tape, 269 | DEFAULT_RECURSION_LIMIT, 270 | self.allow_inf_nan, 271 | ) 272 | .map_err(Into::into) 273 | } 274 | 275 | /// Parse the next JSON value and return it as a [JsonValue] with static lifetime. Error if it is invalid JSON. 276 | pub fn next_value_owned(&mut self) -> JiterResult> { 277 | let peek = self.peek()?; 278 | self.known_value_owned(peek) 279 | } 280 | 281 | /// Parse the next JSON value and return it as a [JsonValue] with static lifetime. Error if it is invalid JSON. 282 | /// 283 | /// # Arguments 284 | /// - `peek`: The [Peek] of the next JSON value. 285 | pub fn known_value_owned(&mut self, peek: Peek) -> JiterResult> { 286 | take_value_owned( 287 | peek, 288 | &mut self.parser, 289 | &mut self.tape, 290 | DEFAULT_RECURSION_LIMIT, 291 | self.allow_inf_nan, 292 | PartialMode::Off, 293 | ) 294 | .map_err(Into::into) 295 | } 296 | 297 | /// Assuming the next value is an array, peek at the first value. 298 | /// Error if it is not an array, or is invalid JSON. 299 | /// 300 | /// # Returns 301 | /// The `Some(peek)` of the first value in the array is not empty, `None` if it is empty. 302 | pub fn next_array(&mut self) -> JiterResult> { 303 | let peek = self.peek()?; 304 | match peek { 305 | Peek::Array => self.known_array(), 306 | _ => Err(self.wrong_type(JsonType::Array, peek)), 307 | } 308 | } 309 | 310 | /// Assuming the next value is an array, peat at the first value. 311 | pub fn known_array(&mut self) -> JiterResult> { 312 | self.parser.array_first().map_err(Into::into) 313 | } 314 | 315 | /// Peek at the next value in an array. 316 | pub fn array_step(&mut self) -> JiterResult> { 317 | self.parser.array_step().map_err(Into::into) 318 | } 319 | 320 | /// Assuming the next value is an object, consume the first key. 321 | /// Error if it is not an object, or is invalid JSON. 322 | /// 323 | /// # Returns 324 | /// The `Some(key)` of the first key in the object is not empty, `None` if it is empty. 325 | pub fn next_object(&mut self) -> JiterResult> { 326 | let peek = self.peek()?; 327 | match peek { 328 | Peek::Object => self.known_object(), 329 | _ => Err(self.wrong_type(JsonType::Object, peek)), 330 | } 331 | } 332 | 333 | /// Assuming the next value is an object, conssume the first key and return bytes from the original JSON data. 334 | pub fn known_object(&mut self) -> JiterResult> { 335 | let op_str = self.parser.object_first::(&mut self.tape)?; 336 | Ok(op_str.map(|s| s.as_str())) 337 | } 338 | 339 | /// Assuming the next value is an object, peek at the first key. 340 | pub fn next_object_bytes(&mut self) -> JiterResult> { 341 | let peek = self.peek()?; 342 | match peek { 343 | Peek::Object => { 344 | let op_range = self.parser.object_first::(&mut self.tape)?; 345 | Ok(op_range.map(|r| &self.data[r])) 346 | } 347 | _ => Err(self.wrong_type(JsonType::Object, peek)), 348 | } 349 | } 350 | 351 | /// Get the next key in an object, or `None` if there are no more keys. 352 | pub fn next_key(&mut self) -> JiterResult> { 353 | let strs = self.parser.object_step::(&mut self.tape)?; 354 | Ok(strs.map(|s| s.as_str())) 355 | } 356 | 357 | /// Get the next key in an object as bytes, or `None` if there are no more keys. 358 | pub fn next_key_bytes(&mut self) -> JiterResult> { 359 | let op_range = self.parser.object_step::(&mut self.tape)?; 360 | Ok(op_range.map(|r| &self.data[r])) 361 | } 362 | 363 | /// Finish parsing the JSON data. Error if there is more data to be parsed. 364 | pub fn finish(&mut self) -> JiterResult<()> { 365 | self.parser.finish().map_err(Into::into) 366 | } 367 | 368 | fn wrong_type(&self, expected: JsonType, peek: Peek) -> JiterError { 369 | match peek { 370 | Peek::True | Peek::False => JiterError::wrong_type(expected, JsonType::Bool, self.parser.index), 371 | Peek::Null => JiterError::wrong_type(expected, JsonType::Null, self.parser.index), 372 | Peek::String => JiterError::wrong_type(expected, JsonType::String, self.parser.index), 373 | Peek::Array => JiterError::wrong_type(expected, JsonType::Array, self.parser.index), 374 | Peek::Object => JiterError::wrong_type(expected, JsonType::Object, self.parser.index), 375 | _ if peek.is_num() => self.wrong_num(peek.into_inner(), expected), 376 | _ => json_error!(ExpectedSomeValue, self.parser.index).into(), 377 | } 378 | } 379 | 380 | fn wrong_num(&self, first: u8, expected: JsonType) -> JiterError { 381 | let mut parser2 = self.parser.clone(); 382 | let actual = match parser2.consume_number::(first, self.allow_inf_nan) { 383 | Ok(NumberAny::Int { .. }) => JsonType::Int, 384 | Ok(NumberAny::Float { .. }) => JsonType::Float, 385 | Err(e) => return e.into(), 386 | }; 387 | JiterError::wrong_type(expected, actual, self.parser.index) 388 | } 389 | 390 | fn maybe_number_error(&self, e: JsonError, expected: JsonType, peek: Peek) -> JiterError { 391 | if peek.is_num() { 392 | e.into() 393 | } else { 394 | self.wrong_type(expected, peek) 395 | } 396 | } 397 | } 398 | -------------------------------------------------------------------------------- /crates/jiter/src/python.rs: -------------------------------------------------------------------------------- 1 | use ahash::AHashSet; 2 | use std::marker::PhantomData; 3 | 4 | use pyo3::exceptions::{PyTypeError, PyValueError}; 5 | use pyo3::ffi; 6 | use pyo3::prelude::*; 7 | use pyo3::types::{PyBool, PyDict, PyList, PyString}; 8 | 9 | use smallvec::SmallVec; 10 | 11 | use crate::errors::{json_err, json_error, JsonError, JsonResult, DEFAULT_RECURSION_LIMIT}; 12 | use crate::number_decoder::{AbstractNumberDecoder, NumberAny, NumberRange}; 13 | use crate::parse::{Parser, Peek}; 14 | use crate::py_lossless_float::{get_decimal_type, FloatMode}; 15 | use crate::py_string_cache::{StringCacheAll, StringCacheKeys, StringCacheMode, StringMaybeCache, StringNoCache}; 16 | use crate::string_decoder::{StringDecoder, Tape}; 17 | use crate::{JsonErrorType, LosslessFloat, PartialMode}; 18 | 19 | #[derive(Default)] 20 | #[allow(clippy::struct_excessive_bools)] 21 | pub struct PythonParse { 22 | /// Whether to allow `(-)Infinity` and `NaN` values. 23 | pub allow_inf_nan: bool, 24 | /// Whether to cache strings to avoid constructing new Python objects, 25 | pub cache_mode: StringCacheMode, 26 | /// Whether to allow partial JSON data. 27 | pub partial_mode: PartialMode, 28 | /// Whether to catch duplicate keys in objects. 29 | pub catch_duplicate_keys: bool, 30 | /// How to return floats: as a `float` (`'float'`), `Decimal` (`'decimal'`) or 31 | /// [`LosslessFloat`] (`'lossless-float'`) 32 | pub float_mode: FloatMode, 33 | } 34 | 35 | impl PythonParse { 36 | /// Parse a JSON value from a byte slice and return a Python object. 37 | /// 38 | /// # Arguments 39 | /// 40 | /// - `py`: [Python](https://docs.rs/pyo3/latest/pyo3/marker/struct.Python.html) marker token. 41 | /// - `json_data`: The JSON data to parse. 42 | /// this should have a significant improvement on performance but increases memory slightly. 43 | /// 44 | /// # Returns 45 | /// 46 | /// A [PyObject](https://docs.rs/pyo3/latest/pyo3/type.PyObject.html) representing the parsed JSON value. 47 | pub fn python_parse<'py>(&self, py: Python<'py>, json_data: &[u8]) -> JsonResult> { 48 | macro_rules! ppp { 49 | ($string_cache:ident, $key_check:ident, $parse_number:ident) => { 50 | PythonParser::<$string_cache, $key_check, $parse_number>::parse( 51 | py, 52 | json_data, 53 | self.allow_inf_nan, 54 | self.partial_mode, 55 | ) 56 | }; 57 | } 58 | macro_rules! ppp_group { 59 | ($string_cache:ident) => { 60 | match (self.catch_duplicate_keys, self.float_mode) { 61 | (true, FloatMode::Float) => ppp!($string_cache, DuplicateKeyCheck, ParseNumberLossy), 62 | (true, FloatMode::Decimal) => ppp!($string_cache, DuplicateKeyCheck, ParseNumberDecimal), 63 | (true, FloatMode::LosslessFloat) => ppp!($string_cache, DuplicateKeyCheck, ParseNumberLossless), 64 | (false, FloatMode::Float) => ppp!($string_cache, NoopKeyCheck, ParseNumberLossy), 65 | (false, FloatMode::Decimal) => ppp!($string_cache, NoopKeyCheck, ParseNumberDecimal), 66 | (false, FloatMode::LosslessFloat) => ppp!($string_cache, NoopKeyCheck, ParseNumberLossless), 67 | } 68 | }; 69 | } 70 | 71 | match self.cache_mode { 72 | StringCacheMode::All => ppp_group!(StringCacheAll), 73 | StringCacheMode::Keys => ppp_group!(StringCacheKeys), 74 | StringCacheMode::None => ppp_group!(StringNoCache), 75 | } 76 | } 77 | } 78 | 79 | /// Map a `JsonError` to a `PyErr` which can be raised as an exception in Python as a `ValueError`. 80 | pub fn map_json_error(json_data: &[u8], json_error: &JsonError) -> PyErr { 81 | PyValueError::new_err(json_error.description(json_data)) 82 | } 83 | 84 | struct PythonParser<'j, StringCache, KeyCheck, ParseNumber> { 85 | _string_cache: PhantomData, 86 | _key_check: PhantomData, 87 | _parse_number: PhantomData, 88 | parser: Parser<'j>, 89 | tape: Tape, 90 | recursion_limit: u8, 91 | allow_inf_nan: bool, 92 | partial_mode: PartialMode, 93 | } 94 | 95 | impl 96 | PythonParser<'_, StringCache, KeyCheck, ParseNumber> 97 | { 98 | fn parse<'py>( 99 | py: Python<'py>, 100 | json_data: &[u8], 101 | allow_inf_nan: bool, 102 | partial_mode: PartialMode, 103 | ) -> JsonResult> { 104 | let mut slf = PythonParser { 105 | _string_cache: PhantomData::, 106 | _key_check: PhantomData::, 107 | _parse_number: PhantomData::, 108 | parser: Parser::new(json_data), 109 | tape: Tape::default(), 110 | recursion_limit: DEFAULT_RECURSION_LIMIT, 111 | allow_inf_nan, 112 | partial_mode, 113 | }; 114 | 115 | let peek = slf.parser.peek()?; 116 | let v = slf.py_take_value(py, peek)?; 117 | if !slf.partial_mode.is_active() { 118 | slf.parser.finish()?; 119 | } 120 | Ok(v) 121 | } 122 | 123 | fn py_take_value<'py>(&mut self, py: Python<'py>, peek: Peek) -> JsonResult> { 124 | match peek { 125 | Peek::Null => { 126 | self.parser.consume_null()?; 127 | Ok(py.None().into_bound(py)) 128 | } 129 | Peek::True => { 130 | self.parser.consume_true()?; 131 | Ok(PyBool::new(py, true).to_owned().into_any()) 132 | } 133 | Peek::False => { 134 | self.parser.consume_false()?; 135 | Ok(PyBool::new(py, false).to_owned().into_any()) 136 | } 137 | Peek::String => { 138 | let s = self 139 | .parser 140 | .consume_string::(&mut self.tape, self.partial_mode.allow_trailing_str())?; 141 | Ok(StringCache::get_value(py, s).into_any()) 142 | } 143 | Peek::Array => { 144 | let peek_first = match self.parser.array_first() { 145 | Ok(Some(peek)) => peek, 146 | Err(e) if !self.allow_partial_err(&e) => return Err(e), 147 | Ok(None) | Err(_) => return Ok(PyList::empty(py).into_any()), 148 | }; 149 | 150 | let mut vec: SmallVec<[Bound<'_, PyAny>; 8]> = SmallVec::with_capacity(8); 151 | if let Err(e) = self.parse_array(py, peek_first, &mut vec) { 152 | if !self.allow_partial_err(&e) { 153 | return Err(e); 154 | } 155 | } 156 | 157 | Ok(PyList::new(py, vec) 158 | .map_err(|e| py_err_to_json_err(&e, self.parser.index))? 159 | .into_any()) 160 | } 161 | Peek::Object => { 162 | let dict = PyDict::new(py); 163 | if let Err(e) = self.parse_object(py, &dict) { 164 | if !self.allow_partial_err(&e) { 165 | return Err(e); 166 | } 167 | } 168 | Ok(dict.into_any()) 169 | } 170 | _ => ParseNumber::parse_number(py, &mut self.parser, peek, self.allow_inf_nan), 171 | } 172 | } 173 | 174 | fn parse_array<'py>( 175 | &mut self, 176 | py: Python<'py>, 177 | peek_first: Peek, 178 | vec: &mut SmallVec<[Bound<'py, PyAny>; 8]>, 179 | ) -> JsonResult<()> { 180 | let v = self.check_take_value(py, peek_first)?; 181 | vec.push(v); 182 | while let Some(peek) = self.parser.array_step()? { 183 | let v = self.check_take_value(py, peek)?; 184 | vec.push(v); 185 | } 186 | Ok(()) 187 | } 188 | 189 | fn parse_object<'py>(&mut self, py: Python<'py>, dict: &Bound<'py, PyDict>) -> JsonResult<()> { 190 | let set_item = |key: Bound<'py, PyString>, value: Bound<'py, PyAny>| { 191 | let r = unsafe { ffi::PyDict_SetItem(dict.as_ptr(), key.as_ptr(), value.as_ptr()) }; 192 | // AFAIK this shouldn't happen since the key will always be a string which is hashable 193 | // we panic here rather than returning a result and using `?` below as it's up to 14% faster 194 | // presumably because there are fewer branches 195 | assert_ne!(r, -1, "PyDict_SetItem failed"); 196 | }; 197 | let mut check_keys = KeyCheck::default(); 198 | if let Some(first_key) = self.parser.object_first::(&mut self.tape)? { 199 | let first_key_s = first_key.as_str(); 200 | check_keys.check(first_key_s, self.parser.index)?; 201 | let first_key = StringCache::get_key(py, first_key); 202 | let peek = self.parser.peek()?; 203 | let first_value = self.check_take_value(py, peek)?; 204 | set_item(first_key, first_value); 205 | while let Some(key) = self.parser.object_step::(&mut self.tape)? { 206 | let key_s = key.as_str(); 207 | check_keys.check(key_s, self.parser.index)?; 208 | let key = StringCache::get_key(py, key); 209 | let peek = self.parser.peek()?; 210 | let value = self.check_take_value(py, peek)?; 211 | set_item(key, value); 212 | } 213 | } 214 | Ok(()) 215 | } 216 | 217 | fn allow_partial_err(&self, e: &JsonError) -> bool { 218 | if self.partial_mode.is_active() { 219 | e.allowed_if_partial() 220 | } else { 221 | false 222 | } 223 | } 224 | 225 | fn check_take_value<'py>(&mut self, py: Python<'py>, peek: Peek) -> JsonResult> { 226 | self.recursion_limit = match self.recursion_limit.checked_sub(1) { 227 | Some(limit) => limit, 228 | None => return json_err!(RecursionLimitExceeded, self.parser.index), 229 | }; 230 | 231 | let r = self.py_take_value(py, peek); 232 | 233 | self.recursion_limit += 1; 234 | r 235 | } 236 | } 237 | 238 | const PARTIAL_ERROR: &str = "Invalid partial mode, should be `'off'`, `'on'`, `'trailing-strings'` or a `bool`"; 239 | 240 | impl<'py> FromPyObject<'_, 'py> for PartialMode { 241 | type Error = PyErr; 242 | fn extract(ob: Borrowed<'_, 'py, PyAny>) -> PyResult { 243 | if let Ok(bool_mode) = ob.cast::() { 244 | Ok(bool_mode.is_true().into()) 245 | } else if let Ok(str_mode) = ob.extract::<&str>() { 246 | match str_mode { 247 | "off" => Ok(Self::Off), 248 | "on" => Ok(Self::On), 249 | "trailing-strings" => Ok(Self::TrailingStrings), 250 | _ => Err(PyValueError::new_err(PARTIAL_ERROR)), 251 | } 252 | } else { 253 | Err(PyTypeError::new_err(PARTIAL_ERROR)) 254 | } 255 | } 256 | } 257 | 258 | trait MaybeKeyCheck: Default { 259 | fn check(&mut self, key: &str, index: usize) -> JsonResult<()>; 260 | } 261 | 262 | #[derive(Default)] 263 | struct NoopKeyCheck; 264 | 265 | impl MaybeKeyCheck for NoopKeyCheck { 266 | fn check(&mut self, _key: &str, _index: usize) -> JsonResult<()> { 267 | Ok(()) 268 | } 269 | } 270 | 271 | #[derive(Default)] 272 | struct DuplicateKeyCheck(AHashSet); 273 | 274 | impl MaybeKeyCheck for DuplicateKeyCheck { 275 | fn check(&mut self, key: &str, index: usize) -> JsonResult<()> { 276 | if self.0.insert(key.to_owned()) { 277 | Ok(()) 278 | } else { 279 | Err(JsonError::new(JsonErrorType::DuplicateKey(key.to_owned()), index)) 280 | } 281 | } 282 | } 283 | 284 | trait MaybeParseNumber { 285 | fn parse_number<'py>( 286 | py: Python<'py>, 287 | parser: &mut Parser, 288 | peek: Peek, 289 | allow_inf_nan: bool, 290 | ) -> JsonResult>; 291 | } 292 | 293 | struct ParseNumberLossy; 294 | 295 | impl MaybeParseNumber for ParseNumberLossy { 296 | fn parse_number<'py>( 297 | py: Python<'py>, 298 | parser: &mut Parser, 299 | peek: Peek, 300 | allow_inf_nan: bool, 301 | ) -> JsonResult> { 302 | match parser.consume_number::(peek.into_inner(), allow_inf_nan) { 303 | Ok(number) => Ok(number 304 | .into_pyobject(py) 305 | .map_err(|e| py_err_to_json_err(&e, parser.index))? 306 | .into_any()), 307 | Err(e) => { 308 | if !peek.is_num() { 309 | Err(json_error!(ExpectedSomeValue, parser.index)) 310 | } else { 311 | Err(e) 312 | } 313 | } 314 | } 315 | } 316 | } 317 | 318 | struct ParseNumberLossless; 319 | 320 | impl MaybeParseNumber for ParseNumberLossless { 321 | fn parse_number<'py>( 322 | py: Python<'py>, 323 | parser: &mut Parser, 324 | peek: Peek, 325 | allow_inf_nan: bool, 326 | ) -> JsonResult> { 327 | match parser.consume_number::(peek.into_inner(), allow_inf_nan) { 328 | Ok(number_range) => { 329 | let bytes = parser.slice(number_range.range).unwrap(); 330 | let obj = if number_range.is_int { 331 | NumberAny::decode(bytes, 0, peek.into_inner(), allow_inf_nan)? 332 | .0 333 | .into_pyobject(py) 334 | .map_err(|e| py_err_to_json_err(&e, parser.index))? 335 | } else { 336 | LosslessFloat::new_unchecked(bytes.to_vec()) 337 | .into_pyobject(py) 338 | .map_err(|e| py_err_to_json_err(&e, parser.index))? 339 | .into_any() 340 | }; 341 | Ok(obj) 342 | } 343 | Err(e) => { 344 | if !peek.is_num() { 345 | Err(json_error!(ExpectedSomeValue, parser.index)) 346 | } else { 347 | Err(e) 348 | } 349 | } 350 | } 351 | } 352 | } 353 | 354 | struct ParseNumberDecimal; 355 | 356 | impl MaybeParseNumber for ParseNumberDecimal { 357 | fn parse_number<'py>( 358 | py: Python<'py>, 359 | parser: &mut Parser, 360 | peek: Peek, 361 | allow_inf_nan: bool, 362 | ) -> JsonResult> { 363 | match parser.consume_number::(peek.into_inner(), allow_inf_nan) { 364 | Ok(number_range) => { 365 | let bytes = parser.slice(number_range.range).unwrap(); 366 | if number_range.is_int { 367 | let obj = NumberAny::decode(bytes, 0, peek.into_inner(), allow_inf_nan)? 368 | .0 369 | .into_pyobject(py) 370 | .map_err(|e| py_err_to_json_err(&e, parser.index))?; 371 | Ok(obj.into_any()) 372 | } else { 373 | let decimal_type = get_decimal_type(py).map_err(|e| py_err_to_json_err(&e, parser.index))?; 374 | // SAFETY: NumberRange::decode has already confirmed that bytes are a valid JSON number, 375 | // and therefore valid str 376 | let float_str = unsafe { std::str::from_utf8_unchecked(bytes) }; 377 | decimal_type 378 | .call1((float_str,)) 379 | .map_err(|e| py_err_to_json_err(&e, parser.index)) 380 | } 381 | } 382 | Err(e) => { 383 | if !peek.is_num() { 384 | Err(json_error!(ExpectedSomeValue, parser.index)) 385 | } else { 386 | Err(e) 387 | } 388 | } 389 | } 390 | } 391 | } 392 | 393 | fn py_err_to_json_err(e: &PyErr, index: usize) -> JsonError { 394 | JsonError::new(JsonErrorType::InternalError(e.to_string()), index) 395 | } 396 | -------------------------------------------------------------------------------- /crates/jiter/benches/bigints_array.json: -------------------------------------------------------------------------------- 1 | [-988705390355151, -905834754076502, -103632019616477, -316545588917286, 490779389534829, -953493388874162, -243006771752239, -744025192631980, -359016919000159, 732901781366502, -104773647740791, -286481351498865, 1027561665338462, -123563885683323, -778611441148419, 960167215353837, -535796870414128, -407290660154826, -783018892592292, -647509354641701, 919621526248276, 85546598576532, 247664997821096, -532964163462505, 1102930398831693, 511750375836837, 391566968564879, -30088081623192, 870074451521159, 422783352752786, -752404068034058, 273126039552480, -1112378526824179, -810678636525244, 96429053513874, -197153575112881, -338660527941979, 874279430413142, 959217801504784, -846627508529606, 145522168773349, -223488193094310, 768135463506990, 173123489075291, -514033032961709, 418626953186771, -279727614653123, 23333512608960, 859417391330454, 331877305422123, -976074027850508, -454976794242595, -825976206456296, 307854522721772, 733794815062524, 51500859319357, 50287208041289, 58656859135819, 892600618900566, 446631400051377, 875610606076539, 1112038204400279, -629213238648357, -577055498844378, -996684682754418, 330103393324719, 779413142604725, 387182940826300, -56542528643706, -730616366312838, 816370577509875, 448684358107563, -1050709454243847, -688917906175987, 1073319326402270, -791380589797488, 722637909805156, -411040270277397, 184176997363231, -869421991358315, -552508789984637, 1094976976592030, 269224830664724, 55754694134550, -645604911041525, 1120343995477345, -1122566252092263, 834477351701237, 192827316068643, -125522448780388, -552757882694521, -312055161426779, 319825229839468, -279715385042019, -533142725046953, -997790140659501, -100950642610622, -87766579331816, 980654702022165, -531831430847554, 4739046943333, -23128276099200, -106166459572754, 736408087587240, 289062642712291, 930233877126717, 77764357182416, -450174307894161, -296173809788624, 931861580541581, -129851042962204, -293087464659959, 1116466762761964, -118822783385624, -931893015968007, -585335818262979, -249647342072052, -399005413403026, 290631332150680, -271235975257165, 960700470183818, 339724323258436, -778514562925105, -455578790229797, 820598153868011, -632361376871135, 170035906823228, -289961814723300, 1020092385539773, -212307506286549, 749065887545963, -1106751197579685, 694930372121405, -979416410281839, -627024301598283, -930462021479034, 907430974888217, 1008473792843562, -91216313350653, 109727626452659, 103314712168157, 509545786235923, -286784961619988, 1064251094762820, 997165842892216, 469944521626954, 1087278611407001, -1108452735291705, -864198430816471, -966722391151462, 151551631636749, 218908781840392, -508079597127107, 278257309671280, -211486365386816, -186778870987021, 363634300784002, 101621183780108, 26428167283446, -179642809987759, 645855656435636, 974276788414182, 151992650260855, -740683087569956, 1018656472882769, 45609525239378, -42622304242628, -754308828282822, -854028801556510, -999365670030395, 154057708609293, 374784775949716, 361495289515170, -707040739622560, -1068219573712335, -1024170497183496, 1070776055212169, 990690917208036, 932200709652558, -491563708056186, -49133681507950, -530789520927108, -593671028501760, -872895040066249, 470305380432482, 834557012161111, -857185072643503, -621982386272810, -298591002896505, 842219757498543, -514887262859796, 404320536881152, -641011955283082, 303239733234310, -1036887876000571, 1027489944852863, 981242602516223, -1104320219708144, -835193836849718, 340049831284306, -413604960517438, -780194068305202, -431981163540261, -298881545243623, 597648026587379, -317432940308790, -40807192145838, -1120785419310125, -330464192997345, 124351376233453, -800095904452143, -179579226223493, 992889224778654, 945915125052595, 980271928859414, 904690107713841, -899777257399666, 51343613909386, -688234023650781, 441204726684403, -371176624407714, 72300442591487, 51971474177113, -16426545464487, -428366430720291, 669353970752476, -438189985582259, -39369546535377, -363546768164753, 337462631524285, 622213468329766, 166905513363491, -692581211765331, -1099357506067864, 395822698802187, -37595012442566, -1047580776936861, -1045600752163795, -335540916148738, -410343429708108, -450799582411571, 347117092995623, 765943595462423, 855757263854890, -399907166223604, 1054378928353332, 306658204592241, 191640680458107, 390455294206366, -941774346541233, 336367819998737, 1021400933405918, 953099346256185, 1073859748633945, 763678798334149, -798047780084254, -559079024522666, -993971216432208, 675476890597714, -1122840898879534, -317677438164615, 620422080081923, 222639279977067, 1002506258451267, 480602461693015, -830743537858656, -304633451961870, 218540745814260, -509152381299138, -933381794723113, 579623656225647, -482229312551939, -1113544765586269, -600219910225613, -466060866985367, 832364873470823, -32049108990677, 37167750430610, 35918485064459, 950401822458041, -824325213720244, -511489583450976, -542199451459094, 905595777966004, -343092238022947, -1117174074410652, -734011479521055, 76231098906867, -1055061432914948, -881232388917530, 292352484032022, 764856237718099, -695351086381417, -665657799052262, 686535513361552, -710322854171819, 8981036502547, 45817279305737, 187585171642842, 84456369830973, -67888321234800, -204174414236870, -195374036988284, 414583334911515, 397691078217491, 159655051501829, 730659396893397, 959028039889174, 480720505184102, 301693796002437, 587919005784982, -966938410192648, -286578850388856, -98631838582778, 1104482166663713, -1005526292425240, 418945357818386, 1059665704660883, 737377295031600, 291143017368096, -419753619071078, -398720616906262, 143937541787224, -628703503207113, 1009734705623195, -600275739305204, 821748259782539, -657135745816216, 958492700048475, -33451666225860, 344607420230813, 76230576190201, -341958452248121, 879377810295953, -620323674317212, -464891366206414, -622543062773193, -195543012182485, -772013173800212, -261995345084260, 977899057820640, 995174655321076, 596128802996829, 913469211517203, 540965627160582, 311095585905645, 885874489248488, 1008426969431472, 468572188275949, -317120792105793, 193797534596578, 741355975454540, 99526256200974, 853348362327402, 877537022129670, 493279492162893, 594351199946549, 1079603478773545, 611668544706423, 698453854647244, 632951364006977, -944071568600794, 307694378799760, 157014812453004, 853486045800829, 835194717224839, 675953569983438, 234311161087235, -886729140965650, -888788984102839, -689573086860369, 612369210280216, -865794167739272, -392930288109946, 1050514315725764, 295854810009161, 315378431848622, -885668466092261, -431412868864228, -96593989614625, -776621483732053, -811957966913861, 474102849977151, 52870621520725, -474964613706707, -408707939062299, 783709996586638, -724017097889866, -969301234849340, 751231395655793, 988992140186726, 112002546252954, 341505810689226, -549158727280033, 409101776227568, 972038339154912, 135566449730292, -42800573136166, -890269132324741, 654601051732390, -849839417809201, 501100857323250, 868071925910339, -915178253892999, 71814011280822, -1003030727463715, 1060232860192790, 934500166825949, -923450385976399, 1100048374533488, -31070546246066, 777072914429395, -402953127590, 461799827091248, 826993099752919, -1082104258172510, -1156161279358, -388273641923198, -694416833279147, 988850855553385, 1028554730280646, -554330909326732, 792386649307434, -697588204027121, 1017518864400011, -1024426130050646, -1098971895372240, 1021626197601787, -1070980679316579, -131275451604692, 259712829836542, -936188432025706, 753637181753623, 569897843493593, -314812166378166, -476934723343417, 29870929380610, 52758500586383, 262278900582783, 315296255584711, 513580881991793, -687140097592059, -54001891374895, -189572664503503, -190237170782316, -955843298417822, 920004157872909, 1009576728558253, -136175778721381, 624444902432239, -181488118030013, -62102097309187, 61934003608074, -311556431246389, -86814768044392, -406589343230392, 149741500055655, 558018021915558, -265295471195438, 363450791494565, -747879221019691, 492698776576997, 283462289724972, 1020618124699625, 716275193159212, 316920812076025, -735922556616389, 700988927735122, 880079991609422, 496810102751613, 961438822931072, -428250922138622, 815521557912831, 494395822898618, 743550805715955, 124995981587963, 1041377295982863, 773957515188259, 27555834856986, 202062307485285, -888774789373661, 278088985869791, 992902840246865, 446415571505556, -791227889405819, -368419392595369, -459101757093159, 168610514227098, -355223146890902, -857109874777313, -426722147106860, -467216217090754, 353787975389174, 519803233466040, -1089778639056042, 225873280389682, -1083106720500790, -1035944381632398, -591909761383445, -762559586517407, -168567237367256, -158232680479784, -826739846800799, 309588265886899, -933272676782316, -353373147714511, -50704034923487, -18632680557817, 742519357869834, 33620784260669, -695372925188600, -53887336671091, -1096415999069942, 30209233886529, -351067136661584, -367997739266235, 119796096837598, -859649087168824, 995887454612616, 970310391561146, 922978283016993, -286428401008868, -670701254258184, -327136126361668, -306476803359504, 978576268751174, -103287345735014, 633257843963675, -493003664352117, 805821124680191, 628081644444347, -464846777662615, 664215228684185, 1065811786350373, -510344289557500, 441509020585575, -1028585465842689, 199926130445187, -593492055778316, -174376421931430, -699353450032296, -194589682228716, 473580501238, 1095097005252818, -538291527687858, 1056763862994800, 300001529803084, 943344737566322, 343939148687535, 1107836994646394, 595214836771722, -427914314565199, 1120639238983515, -1041852347292077, 613426930353647, 495914984763308, -341594875117500, -981924211109605, 496293055096218, 726735729586034, 634848465666173, 669151768740188, 570188457069319, 552431000907780, -119908858477219, -1071962346200891, -820546796228010, -1123392627305213, -125624454440357, -326301510046154, -452192001905496, -569055770573081, 10552212107, 1106190180928626, -126996582967796, -266157078848538, 368735163030279, -1002624159347923, -891460796959439, -463015934934208, -330060939991584, -245980444433967, 209321423890816, 67345600919767, -560749190666689, 763300300099041, -860274932240710, -813652868375928, 817133225734306, 31268247365553, 1073729135598137, -662795386039320, -13856526634411, -502014819445974, -149621606877450, -342322468323003, -505944042149658, 338189356892039, 373619031271703, 42327338842811, 98378227351343, -1111070783557021, -1072319619915991, -991547088577104, -539379502464645, 695679279357450, 726938253871986, 26959650240761, 286412211112913, 937840561735032, -505417430169180, -267317383540102, 1083021222582566, 354847794325587, -349357710751512, 77460996925948, -417443999828308, 872786543082860, 392666608918147, -762669381417237, -475355312834064, 662064037301589, 962840933354910, 494562111612758, -14524811930117, -727382575753294, -134442888815051, 361516407684592, 647651712246476, 926986701828378, -533108657514294, 1050181646177121, 829411345065084, -588998510407499, -361975235014231, 89805361518005, -306919263772024, 360960388715134, 732784014961262, -186502480347380, 543066211915801, -320028797168274, -359452294407072, -118431417787989, -1106750691881705, -1090742264534790, -1100248941922456, 1091219142258980, -631672124074979, 1112358142001494, -938764124556259, 386977818014254, -1055843861066784, -997728070289152, -357411743472123, -221616666736268, -478185353769849, -112973178151888, -773022068481207, -2845403486878, -653705873455472, -65730822756372, 240491511824022, -557996790919487, -898223285285615, 1053565406691902, -691551798924319, -709117812698948, 987176589961859, 209611118951132, -267262137022231, -353593890942843, 180903076968367, -77284041972494, 576615768643418, -208389671580134, -202299754794491, 13298971450010, 619945763694731, 779748573663556, -763470741717488, -414127160374701, 540418625569865, 122853754942691, 899410647296154, 359084330384040, -68417635999602, 462363975354085, 305935105799253, -782013796755724, 1020724104142023, -117942828824866, -437961250902590, 1060627763726651, 752703787958586, -546686685185147, 680052396737075, 202976162939771, 533081389620154, 1042369699052297, -462036935489911, -947800636049342, 478748215005727, -224155316063120, -736161718866411, 875641461719469, -882729158426870, -972429577996981, 463825136753312, -171265792085214, 520969678701714, -245735234127429, -56855020347059, -875113456203229, 460923088765317, 106466061790776, -612101869083070, -207466366705276, 1063751632102172, -263119738154847, -1046902855841426, 671943589814256, 309727229326078, -137836950211440, -710754923901197, -840059174715819, -1060957251111180, -364514631332549, -161420382162189, 162451504597593, -260170449264758, -637206251869872, -235913472870681, -565459997524097, -690607133679932, -337416654540828, 368621347360870, 596723446312724, -707514816394475, -496820741803964, 779697765781302, -595993825810763, 334105987847154, -1117025826233719, -573131218261301, 542955914214666, 235620166523698, -1075857935831740, -108648207115215, 754992864212238, -391966122664867, -1115561736753876, 312309427398057, -268027426614228, 316796350077952, 316882832411623, 215328709863222, -555441606177047, -367577818243664, -867906387333377, -1108665360936363, 91377028687721, 847355850947164, 152812258264154, 869249600912182, -863425547254824, -107492769515576, 447528055736208, -778980467343931, 716581059284762, -878441641795297, 917083020929485, 44869964520767, 632773893345282, -256604478366975, -645991188109760, -485045130900478, -296344973813878, 801023777071707, 936946268289448, 236656254772612, 837179615223556, 890096658841842, 311927132226557, -187366197774278, -825990601456241, -284501055396602, 1053133546048411, -379895591941564, 48781942472674, 4923355449226, 502969039830554, -618664083073569, 924387513057356, 564006988224030, 476729859360055, 1057565377286361, -716748293983190, 740903464538272, -105515379762826, -600500967999974, -156275192506458, -247601715583665, -973740842021948, -889934005321609, 296901571425922, -81192051432994, -593028951574836, 75989294161335, 395753186496214, 558090843358897, -486911184209278, 673720853106159, 821584460385320, 290818038033403, 561513563958724, -668451217223360, -833583437570069, 901717147410008, -322311956856341, -952886582187034, 869576854284795, 796436200216732, -884220500138601, 609387292720141, 141517382610779, 936843410761798, -577081736567526, 307724524252743, 649135433354797, 1119850375947736, -25593479984473, -1082755337446368, -478242108070817, -1028598074056038, 982996655131851, 520523344838071, -1121914759131742, 13263114442106, 457644715960836, 343193238425350, 177936067081613, 67292921503309, 452545542877090, 485982325910914, -353934059869849, 1042538393917458, -12736317105416, 1086021543696114, -951085655090452, -530414715595890, -627861055250282, 989726059853174, -834125835710196, -175133158498594, -777736761593318, 431913467504048, -204676474586795, 151988307210251, -296473171728292, -616405342285736, -99070037785964, 749916922018030, 945142332885495, -478885425976300, 853942641941112, 601516202244742, -876451854840070, 519271382363313, -247338585156031, 773823924321169, 176603093825403, -843398849203056, -1003460663385920, -342030241800761, -1105697391113173, 1010522363958957, -996422913823888, 63920563911400, 606255200853280, -468259567556262, 281002308407639, 641544060861186, 306143760393413, -149628546536953, 878036210906797, -835433375253030, -814163980606393, -475714273366660, 431887015184789, 823930583336596, -53137669700119, 18114364036133, -691502159654486, 283015682866272, -674499118545381, -528435012669823, -259952414045377, -311379042119019, 88700564470642, -90705392833427, -411499629134461, 78450246558806, -922957151753702, -70067789041383, 574747773849631, 1111843764747681, 556486123769620, -1102878946698377, -136863044866596, -1106393145917732, -30729440223262, -530628460754164, 1010925210117085, 305282964816319, -1029269181752756, 972624487451147, -71376930688293, -137125442366393, 430536724204647, -589383792392177, -351135375495573, 166286385101848, -1063536508945647, -1004002241541499, -670079022967573, -1030176100132425, 39468984860645, 936217348957610, -1107643885397440, -972004404673293, -909172614639342, 411106699067221, 1063814249447753, -271733839114761, -481499727798729, 842681564887068, -319415976457250, -821257397999660, 636357070303509, 139822338006841, 107548476263711, 703220413010103, -378414167905101, -347441424591530, -890410645913867, -107094605916117, -208418142479036, -699133308954750, -327801389504978, 498481408354366, 714087744115672, 425978817385295, 356534027337197, 1112443171047230, 231581211410989, 626509626576886, 948595981359844, -69356712568210, -1081511210769377, 692827020282321, -990289210235591, 231350112712985, 1124061038793010, -756300580677405, 992651606088817, -454333571608461, 53539606592668, 994968393656004, 1011246234850529, -862881811975678, 1020005580371674, -392150246809229, -46912353662655, 441831211339677, 631688758508998, -263995624443529, 557081705721439, 1087625288208526, 636200280550649, -892185600125844, -459063820818901, 43356432917366, 279405045624695, 356179155634132, -244935345524274, -496287643105961, -393830676213120, -219367044162349, 498982785542215, 341005061245918, 1096792390042542, -1095280901962219, -1117811041784134, -364813500830627, 956064967397004, -838210316488918, -866043274507890, -789343783501549, 367795352831366] -------------------------------------------------------------------------------- /crates/jiter/benches/floats_array.json: -------------------------------------------------------------------------------- 1 | [445.32175578228936, 71.26165361584043, 327.3961663419041, 552.5762343844564, 203.16202449559069, 61.87158509781195, 181.1916474886267, 411.0600805327249, 51.13357400226682, 18.3569347847777, 397.06198965403996, 66.00996749548081, 59.93629503208731, 44.614894665393464, 242.32589396940202, 669.1281195067415, 154.16982516961266, 127.77699886854342, 434.14278477745677, 477.91542865948026, 64.44252945381155, 8.548677011139418, 1.1350098088662484, 7.269712236660293, 21.794314862404388, 383.74186840042296, 432.56987579061195, 33.02688406073059, 575.9371925935393, 123.11786931870564, 734.9658356313023, 67.64746113803403, 428.6298352558726, 130.64525490551839, 323.65366624957187, 545.6337078740843, 569.1013227633094, 134.62065923071816, 28.312826901890894, 260.8438877163715, 100.6948540283588, 5.023193545521323, 8.603472950782084, 30.74057348441437, 653.4814966711328, 28.932473592297622, 210.07809804847903, 613.938957468352, 171.7510567509926, 64.64728141895893, 155.20849467683027, 651.4746311462322, 596.6951839878459, 11.983122725302033, 34.579429488940804, 116.54478000347623, 378.17157975953626, 305.72105986103526, 825.0352254017706, 1.3170242008271678, 138.94953959508095, 179.88775582540754, 166.28428670416034, 248.47782480523526, 330.12477798070637, 737.9809075726357, 901.8302778717807, 180.68345550251252, 200.28929446791864, 258.6982218419452, 463.4004033949486, 303.64877563084156, 536.4559771676156, 228.39105264757083, 307.7602900320842, 37.337718277367145, 4.60984622412697, 23.279278122501392, 503.9696628330601, 215.32987516848362, 916.3454701042798, 49.756933922770315, 5.500754890483946, 97.51393525933204, 470.1389768909831, 13.331209722468516, 135.32191680089636, 242.91097572453853, 10.167697114335331, 644.2102588612323, 129.9680411483549, 637.4137672624372, 5.362137634109969, 715.5942994951512, 256.7003824855991, 17.10296093067267, 212.52046743220842, 156.61539411364026, 8.800214256755925, 558.1822911309316, 623.968417517606, 300.6310830784058, 633.0099257352596, 239.01544612193908, 132.68085404884633, 35.394232426497915, 291.13995021084116, 267.4396403266604, 443.7809198443098, 585.5339263475531, 460.1870316484034, 706.8439896002166, 683.7038228645612, 133.15532509422064, 50.88067734316441, 92.92091164555448, 99.0304944941999, 5.742396426514162, 725.9186676208419, 304.0366123252118, 49.90557098745663, 2.6447287431641087, 364.6316587087531, 208.98427596599137, 655.2159087331534, 16.87684003844798, 82.21618832447284, 38.572345028892514, 169.29611711296815, 9.735889876392571, 214.58982194472063, 3.977812808283264, 438.6197210319962, 24.020756838329767, 226.24105955600166, 205.45097309738105, 159.74774481170462, 428.97995073158893, 286.41088148512125, 40.16999689158506, 28.680136347126, 319.101348713989, 251.17135417011968, 128.2294466057225, 294.4304429191598, 831.2232685675125, 767.0257776268041, 494.0037399961167, 177.22525872545108, 0.6390616994176508, 174.68268639141112, 315.18071411687004, 4.339411116961037, 59.58905996161401, 192.6500585083209, 10.735943566452566, 731.4657441347897, 63.93227557517375, 245.55517114740573, 52.497405922783834, 27.705481306404725, 50.93575675897262, 547.7704184185275, 808.5333746720481, 684.399785422834, 391.6466965270882, 843.9941237149388, 344.92859299895804, 368.1067876071478, 37.037748349881774, 85.90344508360413, 53.845186188853674, 42.856028161455974, 147.13052571126943, 57.53539888364649, 64.62376454747512, 110.51377245382618, 64.82491944076041, 840.8739042702485, 655.2348022942175, 111.91766527356444, 126.20472491835368, 400.9180266103902, 157.06014324818338, 6.52942516720119, 240.38712817414566, 350.0664591158972, 307.3525988041587, 247.22826612339287, 0.09707591533940096, 8.038989605185723, 271.33494719166276, 35.77477103521802, 355.0131377535041, 252.2661375056943, 349.8262199779361, 113.35339768677981, 209.29224745383817, 201.06427662090374, 498.19413593238596, 377.5036366547618, 244.54811620786288, 1.8739820315741376, 173.43122504807266, 569.0628632698871, 81.74599620803372, 16.214016006071503, 626.7271276149148, 477.82121028777306, 235.6920758045615, 60.48939401072075, 273.00229488032426, 211.77411474051496, 22.683679352293517, 143.60624805613136, 73.35060317117896, 567.6573148045935, 7.988195323291154, 53.789466671443435, 7.624632833972108, 240.9597235479452, 9.681496016531796, 94.98454305453856, 826.5274857444654, 170.2392952942253, 290.2713746302211, 468.1933424606523, 525.4170527234169, 12.18532300607243, 20.121013158954945, 165.56164194015525, 46.39985228909274, 133.83541556008748, 513.7234038710303, 344.0954979206531, 44.80254384297389, 703.7420063041723, 52.49137002442659, 216.70064896672613, 245.6461339429587, 76.38158400848377, 102.34738302686796, 33.22396788168437, 460.9342035319171, 381.28572125335995, 132.39091064735555, 165.3530756462124, 388.4223105152839, 549.3294792617493, 203.54342076712607, 304.8151652940343, 35.7173588480667, 77.69572823682614, 542.3636991313757, 448.7914744618647, 356.59005018249957, 697.7316466311345, 99.20261646058884, 797.5826364514579, 93.34527458618311, 799.1228668091054, 740.0693609607321, 22.4782034102246, 31.568432039761095, 17.339692119950417, 487.56183745759216, 562.4879784852417, 61.45982555161015, 37.61834609225103, 205.9003745784705, 1.1172467724096915, 58.48697087019619, 408.0579406989306, 153.63745535960948, 71.12371992011731, 339.7875400364345, 247.47697957761235, 42.14594685893278, 769.7873485517104, 269.8086511202552, 278.56746262882723, 562.9234936446597, 168.1596704204697, 104.90184446492593, 73.06235730081491, 818.6183411795297, 4.082963817029678, 331.5370213847028, 657.1931071346148, 121.78184300989133, 98.98476973613162, 438.0472967026435, 73.4959875645188, 83.16286028804947, 257.62357658797816, 260.1044410403613, 303.95751085437814, 259.8028536039712, 101.58422151232044, 76.56448091172797, 42.77646283155196, 23.707347991848838, 123.57804479555688, 12.79425388258776, 150.0050545898871, 55.20727389033323, 357.29356054258517, 560.3794255500779, 386.5821248583325, 805.9098106952594, 799.0796419953177, 175.23290693368196, 93.98676416431299, 392.85860435387576, 212.1437124829545, 86.43687791106021, 674.2617231864782, 505.5736914268391, 31.14415677409285, 5.1925448454011445, 55.832889602430896, 112.18022376561106, 69.27544830073845, 413.35461070441517, 173.81135848438694, 675.7712211397725, 57.569456088410334, 260.09117419583345, 29.19011778251706, 168.17424337132087, 98.7591450116124, 15.707141435753542, 199.8398650358158, 432.5001130031162, 84.38073312175776, 330.72075965782204, 793.3265682815392, 700.5982264954687, 194.37548080747482, 316.5707551567287, 198.38036493043975, 43.602650587057816, 209.46250917155712, 296.0962418545932, 308.5457180935021, 221.09074519918875, 275.1667316824933, 237.7974509234259, 116.02715471216071, 19.83346120206655, 204.51351164615107, 116.65552017864772, 188.88030558765766, 73.26771746771084, 223.25728657235996, 21.479295649508575, 903.7940249888794, 426.2410500627051, 120.04139724670023, 164.25380529520285, 592.3130669739119, 77.52865552471147, 240.81401844271446, 206.70168811877562, 214.97738222878564, 419.2408103972946, 510.0962975026028, 141.74399576806118, 12.592112839650765, 78.66392901923666, 699.3035783747086, 133.82498225216352, 36.271321804535646, 730.8885621023458, 139.18098081011954, 531.7229472686662, 185.82193999703554, 408.5294808293853, 242.28078595870915, 490.5542212062478, 938.292022251062, 313.2637799866804, 250.32313314144497, 92.67139265313966, 284.5699646225197, 2.8724761931867615, 77.33030028939044, 566.9700335068081, 38.23954278944219, 11.62573149142236, 36.37691799060333, 97.87773229868303, 12.43146326053684, 277.4080850208361, 268.7387311215239, 236.74183599949504, 67.378525215967, 308.61758705859245, 212.80630809503018, 224.18350535048037, 137.52742211289342, 7.554168968332686, 3.684803384618582, 748.8130977421112, 28.693889817989746, 12.382770846505519, 703.1479691879869, 82.82315980434639, 584.4564989184796, 166.70618599626317, 352.58948019899594, 66.50472763781937, 50.69119488390004, 346.9713598757074, 16.056166391295914, 230.04690091747406, 525.7752742011932, 97.71593236029803, 302.48621091587154, 551.1131081822043, 584.3818579484189, 150.72093442202817, 253.01420633573397, 170.3511289138442, 66.44739845404273, 566.1068665767139, 209.31365722257257, 54.4725477199841, 406.8630316097342, 99.01383592874018, 372.8570497659973, 628.2854043077125, 117.06086928249445, 693.7735597098745, 82.00163209737896, 367.9368360681948, 7.115302301189714, 34.915661590352066, 6.672573371596743, 109.66196276712851, 383.0440527514553, 3.6800923096904414, 145.59364163389316, 5.210060018754351, 376.30763904885384, 303.4957043183403, 267.80005776866875, 594.6929220595489, 19.02734873988605, 518.8492554417912, 302.5910932776552, 705.0800947085012, 180.6575863491539, 612.308361553487, 214.6376069164151, 150.2614361798107, 213.53871295383206, 108.98630669395709, 315.7505522332845, 13.98689948529595, 288.8620204514602, 241.6615756413137, 308.74664546558887, 356.78030409999957, 423.47442300866265, 475.1033904214788, 557.5184248637977, 14.01824822208497, 192.46596374707278, 122.89020057544461, 355.47839068460803, 212.7230572106499, 554.2817956062656, 5.839216846382353, 27.6008171640561, 31.861534326911652, 91.8838590961286, 304.9207726040466, 495.31796328649096, 209.24896482482842, 55.71375022253595, 12.251492752625195, 362.75600248702636, 470.6675179884329, 281.68582669750623, 123.83502666060463, 214.66885650712845, 798.1426101698205, 142.80789382379317, 123.32879813824428, 550.3700190654939, 773.625506951824, 67.24261050884704, 176.8156172114429, 189.7978830258287, 176.24197850116468, 460.64820974914164, 912.2548729947903, 2.9112203646286208, 191.00378195584227, 791.8572814264063, 628.4840162113923, 828.5252185673664, 129.99282343915021, 273.3616641301517, 58.46634792077818, 131.7115933550258, 283.1301404428292, 690.0991680164408, 10.815804978729538, 282.37476171127673, 308.1937141637751, 47.4772475341292, 722.6035632742331, 221.7478768063414, 102.1830297913463, 21.27021569453931, 481.5100544835709, 468.45080676662485, 202.52176708912805, 439.4282552683798, 335.5550314798056, 43.017931706854064, 384.1832767197708, 102.4993239761275, 348.17850840995175, 312.45633754722553, 98.69450196909517, 0.36862192922553005, 68.83599996816186, 744.3244645818734, 345.54863483914806, 28.18491482671706, 12.576573811346918, 14.625978296198937, 379.31830551019874, 282.2807070213743, 89.5894197597805, 716.872568195045, 4.548077404616239, 21.587812150366908, 310.6776428759006, 165.6241289722075, 521.3146051357534, 319.08710544794764, 164.462445548839, 39.00129751158379, 78.09717702601645, 345.4360599262151, 60.854288465417554, 127.29441913097031, 58.857860394574885, 41.72105738192397, 146.14247265993112, 399.3502403830897, 163.7087652157697, 543.3228568606839, 524.8044040506846, 264.05318458670547, 65.55627409504241, 727.5017499650331, 490.1001534145944, 736.6487913245434, 397.6917478844272, 272.38432169050964, 42.49861362995276, 89.456439822773, 34.742524070489296, 248.3908047549283, 191.73532921997628, 67.32465535875617, 293.9379092749961, 371.1705560237598, 31.167011997191516, 217.58680959618232, 141.81028128425461, 267.2333323016145, 300.27775978019605, 406.3912214851415, 635.6799059118379, 81.25370515151691, 366.81602572582614, 219.3892928078017, 621.7710159155212, 118.85700302197844, 172.29676564446535, 397.17992672489066, 285.61128754346413, 94.36052369119474, 307.0280067936588, 202.43177178505368, 249.61864809543076, 6.394538208814945, 578.7400045719694, 6.308963717494551, 337.11401800616767, 429.10661894745266, 127.60790949145945, 213.57567605529798, 519.5852014490839, 4.612957447866025, 116.57974199638272, 65.52498598964927, 107.01594881974272, 289.56584258847636, 265.2824868323073, 829.7757422274365, 219.00028451425106, 206.12310755613925, 145.66813867741322, 772.2380401868755, 769.7758958742545, 451.40725270056987, 106.69133981279079, 75.39655876966403, 63.7449791431225, 84.48122117266087, 629.5314248981857, 606.4293987903917, 741.4516434151102, 31.29827444105853, 73.36061979151333, 466.3070386480815, 33.686993809903825, 270.0084400784738, 193.06991796596347, 450.43095889805653, 64.25344123190116, 147.73739835023673, 120.90498667265022, 566.41449662011, 134.65427899309913, 502.17768892789064, 163.94943331625603, 170.97306137289084, 253.8643482001839, 14.2954685323058, 810.6203260910991, 714.0271144902787, 24.936684221838906, 415.03291379665194, 695.6877968263259, 366.5330448028534, 48.88978784736481, 299.05402539997175, 255.2850118264632, 817.3129953729247, 353.5327539332803, 182.64144870676049, 601.5747570710837, 34.98302496818031, 23.139366009887873, 392.65651576412057, 115.95520250702147, 573.8014925918196, 202.0648784759529, 326.8535201604161, 84.8319627692996, 919.2534044244509, 105.28034683490789, 135.9744191736028, 83.43097334760246, 0.19802736401193632, 798.9520572949897, 499.2797319693313, 67.98284796434831, 46.93019462272395, 42.46459224270669, 368.3137945139897, 295.5678548565214, 106.92120216312038, 54.36158185378826, 464.6622366470618, 587.2540732122891, 333.241136260474, 0.6541126876010326, 369.42253834376527, 126.08952546012058, 752.382306789718, 424.2937344413127, 4.768318152561781, 52.82198582141276, 418.91268311140345, 594.2028347205832, 365.3420635164094, 43.278597408913654, 2.4368715275803066, 734.1320130671853, 8.02950629502742, 76.94832141301544, 592.3102098420899, 243.57039052714504, 404.78212655469775, 547.8522682917043, 105.93238936992397, 583.6557578630186, 122.19770948909616, 74.99520771880002, 53.70966257599581, 582.5097722565954, 28.252006192818442, 88.12416011921736, 377.89713308314094, 81.39623604675376, 2.3838529168302225, 214.8844226703588, 163.42273563531907, 62.12373827261207, 114.76588828718678, 226.75508511643687, 470.89290590869865, 235.60174639446066, 312.2114899286692, 75.12316178232729, 375.8585434937535, 9.410723249433914, 16.964406175641454, 153.9735190891219, 95.59111600490377, 292.376237231672, 567.9932795461151, 39.0446920715525, 16.6985512851649, 73.54119954060134, 480.90031594131364, 21.08816047213026, 557.4061443602437, 5.691566717745852, 115.51747801599385, 145.94859308775406, 54.148234325683376, 22.370992646888908, 336.5583595169807, 804.4926238978975, 38.3906857077388, 234.78529049987583, 152.9453648496889, 412.1942062269916, 216.94242213423487, 676.8164819032093, 547.0176864564813, 205.41043078622795, 208.30376160173154, 385.2333123726966, 709.5060852943249, 70.71333429523138, 363.77014004185725, 32.621262594824515, 315.8889379985525, 490.5225175314045, 141.36262244423807, 298.3426657788165, 86.89048761087594, 9.70213823303741, 185.61392529311803, 132.10671583162357, 472.52473629674006, 741.7269294933733, 52.66430688028271, 162.013077868942, 123.55068519630767, 55.713777718996134, 507.0054522099067, 108.62167237699882, 0.08502341553352755, 178.06355578576984, 282.3531438526133, 117.04929841392241, 445.0724350009568, 132.85597814284756, 5.932498114572707, 754.6790846053725, 767.8886219652737, 88.61891585831583, 60.88302666680231, 251.0264966058801, 319.97387264795884, 60.97062056439855, 99.26191737448633, 467.7797367349051, 531.5437120386682, 66.58994574173593, 128.49052834308227, 66.13497589110179, 793.5321246803908, 230.45644474922156, 288.909337464941, 14.885362245625343, 44.68435732254761, 120.0317849088159, 599.7498918080062, 59.68244284708463, 760.4806511901286, 289.97756610197905, 107.45790741289119, 42.384564293847085, 555.5483649030391, 421.39833297960934, 144.86270716766282, 483.6710863582512, 823.5324650740175, 281.3022944090922, 48.05934416183614, 278.1201146803361, 43.60474325173029, 50.82102509459469, 243.67667808104412, 696.6530515720195, 740.525974576397, 433.0000072893932, 582.2107707544733, 339.0381274940293, 44.64367677694766, 125.6156336880198, 33.703435313268734, 157.62228642808887, 334.31400699968736, 226.94988596479487, 449.55559047572996, 24.132118126868313, 324.6534013693348, 370.62021066478167, 76.0391920324559, 589.9782842429947, 684.070743093206, 108.379298695269, 42.65503780575997, 154.59504796936565, 13.574049466559753, 96.59102328222231, 576.0053124205883, 854.7102190036604, 219.42384576133136, 300.3138976065237, 555.5733973973445, 293.3484338725666, 306.75126837434914, 162.27960005368183, 230.44106110650068, 894.0923692929359, 296.64685465716406, 131.54075642506146, 712.0619549863698, 243.6174186353626, 337.687988989945, 24.771431640549164, 157.56048565738294, 438.0797964127827, 4.676713964829472, 372.9939673692317, 75.05947453146541, 98.62647780839376, 66.95664900916918, 575.4488046474394, 162.8526341939642, 703.9616863875566, 207.8689231599697, 132.50987888354217, 177.46609707080503, 91.03929964239619, 29.46224143458952, 383.43354147521035, 332.9767748985491, 55.88498235502823, 113.38583994306536, 143.3447900810264, 30.49163891078285, 399.6558110103479, 5.417270310158783, 61.344303896304865, 176.90370383958336, 66.88125932598086, 704.5622426089699, 507.9479981888596, 499.0776024346966, 543.9642935895987, 490.43759669230286, 278.5753787447815, 281.1616233186713, 616.7127508474805, 24.038741414422425, 687.6568325140552, 271.39119861391174, 173.99631483905978, 172.49774991021178, 436.61927591379515, 840.9865048203441, 459.1131435967032, 817.0672430968907, 44.31250829329462, 24.967703098111272, 10.061816697594633, 171.69685019271316, 92.03313368732067, 501.1661828719429, 224.67821338027613, 44.300757973524696, 107.77933879241503, 385.6363105416585, 57.790632482981515, 408.83118253852535, 39.429424278195654, 30.424264793719, 382.55358924905596, 72.01747599112, 406.97352323560136, 278.62330120725693, 133.5940915626496, 348.00999916543066, 50.893860033579536, 1.6638368413799753, 87.14940924794156, 1.5632610710493595, 337.51249281818724, 29.30058114932345, 136.5368526757847, 711.7255015153168, 32.235551435475415, 515.2061778746104, 50.57301010706302, 320.62585844839083, 209.75728757645558, 153.12143816133332, 297.9202390535216, 31.295893959714483, 402.5438384911535, 463.86930755779616, 26.611251590970358, 73.48363522344313, 111.52791362153408, 378.15838968304075, 281.02573528780624, 283.67205612075134, 355.53262585938035, 218.00667297175713, 452.673094429716, 67.80552089566407, 92.02922247712017, 192.96373571821178, 10.540440369438004, 505.2239747261604, 77.83013661653726, 153.93437833693838, 98.31900241098504, 604.7719735395757, 227.16150132650824, 354.48082871190627, 723.8178792654102, 46.669712561768286, 382.2807089938516, 88.07486261761102, 80.4909653057407, 509.92721907561634, 257.1758029384339, 66.0322767223044, 29.301157989997012, 757.5157400503309, 376.74602321581045, 400.6846627405593, 598.7645767511439, 408.5197918313817, 10.309133641859743, 672.0131049050337, 433.8989397721204, 16.62778638768705, 21.812484225397565, 122.01441100689564, 421.36773515058155, 437.82604717794544, 1.6896492370711003, 70.66543492924538, 95.25192721759788, 173.36378934716478, 10.051993123808485, 867.1879282408413, 71.63682445913597, 527.4072798679473, 319.32356093949784, 22.460599064757094, 48.739512637401255, 504.9141187050938, 45.477106107759944, 230.01956811385023, 45.35182174856778, 41.903542226103625, 352.60623774158523, 249.32248076432614, 0.45828722743258477, 79.61687709391326, 493.64972642333686, 10.23756975734348, 578.399256484223, 219.68276457281706, 3.296330883059834, 50.53600167391153, 280.2729005848514, 402.65157259977497, 809.8789488403143, 333.1244916676535, 28.247777108336294] --------------------------------------------------------------------------------