├── .gitmodules ├── benchmarks ├── src │ └── lib.rs ├── benches │ ├── schema │ │ ├── README.md │ │ ├── src │ │ │ ├── lib.rs │ │ │ ├── canada.rs │ │ │ ├── empty.rs │ │ │ ├── prim_str.rs │ │ │ ├── enumstr.rs │ │ │ ├── citm_catalog.rs │ │ │ ├── color.rs │ │ │ └── twitter.rs │ │ ├── Cargo.toml │ │ └── licences │ │ │ └── LICENSE-json-benchmark │ ├── testdata │ │ └── book.json │ ├── common.rs │ ├── get_from.rs │ ├── serialize_struct.rs │ ├── serialize_value.rs │ ├── deserialize_struct.rs │ └── deserialize_value.rs ├── .cargo │ └── config.toml └── Cargo.toml ├── fuzz ├── .gitignore ├── fuzz_targets │ └── from_slice.rs └── Cargo.toml ├── clippy.toml ├── sonic-number ├── README.md ├── Cargo.toml ├── src │ ├── arch │ │ ├── aarch64.rs │ │ ├── fallback.rs │ │ ├── mod.rs │ │ └── x86_64.rs │ ├── common.rs │ └── slow.rs └── licenses │ └── LICENSE-yyjson ├── examples ├── testdata │ └── person.json ├── get_from.rs ├── json_number.rs ├── get_by_schema.rs ├── serde.rs ├── lazyvalue.rs ├── get_many.rs ├── json_filter.rs ├── value.rs ├── iterator.rs └── handle_error.rs ├── taplo.toml ├── bindings ├── README.md └── ffi │ ├── Cargo.toml │ ├── build.rs │ ├── include │ └── sonic_ffi.h │ └── src │ └── lib.rs ├── scripts ├── fuzz.sh ├── fmt_fix.sh ├── sanitize.sh └── test.sh ├── src ├── pointer │ ├── mod.rs │ ├── from.rs │ ├── point.rs │ └── tree.rs ├── util │ ├── mod.rs │ ├── private.rs │ ├── arch │ │ ├── fallback.rs │ │ ├── mod.rs │ │ ├── x86_64.rs │ │ └── aarch64.rs │ ├── utf8.rs │ ├── mock.rs │ └── reborrow.rs ├── config.rs ├── prelude.rs ├── value │ ├── shared.rs │ ├── mod.rs │ ├── visitor.rs │ ├── tls_buffer.rs │ └── tryfrom.rs ├── lazyvalue │ ├── mod.rs │ ├── de.rs │ └── ser.rs ├── lib.rs ├── input.rs ├── serde │ └── rawnumber.rs └── writer.rs ├── typos.toml ├── sonic-simd ├── README.md ├── Cargo.toml └── src │ ├── lib.rs │ ├── traits.rs │ ├── bits.rs │ ├── avx512.rs │ ├── sse2.rs │ ├── avx2.rs │ ├── v128.rs │ ├── v256.rs │ ├── v512.rs │ └── neon.rs ├── .gitignore ├── .cargo └── config.toml ├── assets └── pngs │ └── flamegraph.sh ├── rustfmt.toml ├── .github ├── workflows │ ├── dependency-review.yaml │ ├── security.yml │ └── ci.yml └── ISSUE_TEMPLATE │ ├── feature_request.md │ └── bug_report.md ├── docs ├── serdejson_compatibility.md ├── for_Golang_user_zh.md ├── for_Golang_user.md ├── benchmark_aarch64.md └── performance_zh.md ├── ROADMAP.md ├── licenses ├── serde_json │ └── LICENSE-MIT └── LICENSE-yyjson ├── profile └── README.md ├── Cargo.toml ├── CONTRIBUTING.md └── CODE_OF_CONDUCT.md /.gitmodules: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /benchmarks/src/lib.rs: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /fuzz/.gitignore: -------------------------------------------------------------------------------- 1 | artifacts/ 2 | corpus/ 3 | coverage/ 4 | target/ 5 | -------------------------------------------------------------------------------- /clippy.toml: -------------------------------------------------------------------------------- 1 | ignore-interior-mutability = ["bytes::Bytes", "faststr::FastStr"] 2 | -------------------------------------------------------------------------------- /sonic-number/README.md: -------------------------------------------------------------------------------- 1 | # sonic_number 2 | 3 | A fast number parsing library based on SIMD. -------------------------------------------------------------------------------- /examples/testdata/person.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Xiaoming", 3 | "age": 18, 4 | "phones": ["+123456"] 5 | } 6 | -------------------------------------------------------------------------------- /taplo.toml: -------------------------------------------------------------------------------- 1 | [formatting] 2 | align_comments = true 3 | align_entries = true 4 | indent_string = " " 5 | reorder_keys = true 6 | -------------------------------------------------------------------------------- /bindings/README.md: -------------------------------------------------------------------------------- 1 | # A collections of bindings for sonic-rs 2 | 3 | 1. ffi: the low-level APIs of sonic-rs, should not used directly 4 | ... -------------------------------------------------------------------------------- /benchmarks/benches/schema/README.md: -------------------------------------------------------------------------------- 1 | 2 | # JSON Schema 3 | 4 | Schema used in benches, copied from `https://github.com/serde-rs/json-benchmark`. 5 | -------------------------------------------------------------------------------- /scripts/fuzz.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -ex 4 | 5 | cargo install cargo-fuzz 6 | 7 | RUST_BACKTRACE=full cargo +nightly fuzz run fuzz_value -- -max_total_time=20m -------------------------------------------------------------------------------- /src/pointer/mod.rs: -------------------------------------------------------------------------------- 1 | mod from; 2 | pub(crate) mod point; 3 | pub(crate) mod tree; 4 | 5 | pub use point::{JsonPointer, PointerNode}; 6 | pub use tree::PointerTree; 7 | -------------------------------------------------------------------------------- /benchmarks/benches/schema/src/lib.rs: -------------------------------------------------------------------------------- 1 | pub mod canada; 2 | pub mod citm_catalog; 3 | pub mod enumstr; 4 | pub mod twitter; 5 | 6 | mod color; 7 | mod empty; 8 | mod prim_str; 9 | -------------------------------------------------------------------------------- /benchmarks/benches/schema/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | edition = "2021" 3 | name = "schema" 4 | version = "0.1.0" 5 | 6 | [dependencies] 7 | serde = { version = "1.0", features = ["derive"] } 8 | -------------------------------------------------------------------------------- /fuzz/fuzz_targets/from_slice.rs: -------------------------------------------------------------------------------- 1 | #![no_main] 2 | 3 | use libfuzzer_sys::fuzz_target; 4 | use sonic_rs_fuzz::sonic_rs_fuzz_data; 5 | 6 | fuzz_target!(|data: &[u8]| sonic_rs_fuzz_data(data)); 7 | -------------------------------------------------------------------------------- /typos.toml: -------------------------------------------------------------------------------- 1 | # Typo check: https://github.com/crate-ci/typos 2 | 3 | [files] 4 | extend-exclude = ["assets/", "benchmarks/benches/testdata/"] 5 | 6 | [default.extend-words] 7 | alse = "alse" 8 | -------------------------------------------------------------------------------- /sonic-simd/README.md: -------------------------------------------------------------------------------- 1 | 2 | # sonic_simd 3 | 4 | A portable SIMD library that provides low-level APIs for x86, ARM. Other platforms will use the fallback scalar implementation. 5 | 6 | TODO: 7 | 8 | 1. support RISC-V. -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | target/ 2 | build/ 3 | .vscode/ 4 | perf.data* 5 | .tmp/ 6 | bench.sh 7 | rust- 8 | *.data 9 | opensource_git_commit.log 10 | Cargo.lock 11 | *.profraw 12 | *.profdata 13 | *.svg 14 | *.diff 15 | .DS_Store -------------------------------------------------------------------------------- /src/util/mod.rs: -------------------------------------------------------------------------------- 1 | pub(crate) mod arch; 2 | 3 | pub(crate) mod private; 4 | pub(crate) mod reborrow; 5 | pub(crate) mod string; 6 | pub(crate) mod unicode; 7 | pub(crate) mod utf8; 8 | 9 | #[cfg(test)] 10 | pub(crate) mod mock; 11 | -------------------------------------------------------------------------------- /.cargo/config.toml: -------------------------------------------------------------------------------- 1 | [build] 2 | rustdocflags = ["-C", "target-cpu=native"] 3 | rustflags = ["-g", "-C", "target-cpu=native"] 4 | 5 | [bench] 6 | rustdocflags = ["-C", "target-cpu=native"] 7 | rustflags = ["-g", "-C", "target-cpu=native"] 8 | -------------------------------------------------------------------------------- /benchmarks/.cargo/config.toml: -------------------------------------------------------------------------------- 1 | [build] 2 | rustdocflags = ["-C", "target-cpu=native"] 3 | rustflags = ["-g", "-C", "target-cpu=native"] 4 | 5 | [bench] 6 | rustdocflags = ["-C", "target-cpu=native"] 7 | rustflags = ["-g", "-C", "target-cpu=native"] 8 | -------------------------------------------------------------------------------- /bindings/ffi/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | edition = "2021" 3 | name = "sonic_rs_ffi" 4 | version = "0.1.0" 5 | 6 | [dependencies] 7 | sonic-rs = { path = "../../" } 8 | 9 | [build] 10 | lib = ["staticlib"] 11 | 12 | [build-dependencies] 13 | cbindgen = "0.27" 14 | -------------------------------------------------------------------------------- /src/config.rs: -------------------------------------------------------------------------------- 1 | #[derive(Debug, Clone, Copy, Default)] 2 | pub(crate) struct DeserializeCfg { 3 | pub use_rawnumber: bool, 4 | pub utf8_lossy: bool, 5 | } 6 | 7 | #[derive(Debug, Clone, Copy, Default)] 8 | pub(crate) struct SerializeCfg { 9 | pub sort_map_keys: bool, 10 | } 11 | -------------------------------------------------------------------------------- /assets/pngs/flamegraph.sh: -------------------------------------------------------------------------------- 1 | 2 | # the command to profiling sonic-rs benchmarks 3 | 4 | CARGO_PROFILE_BENCH_DEBUG=true cargo flamegraph --bench deserialize_struct -- --bench citm_catalog/sonic --profile-time 5 5 | 6 | CARGO_PROFILE_BENCH_DEBUG=true cargo flamegraph --bench deserialize_struct -- --bench citm_catalog/simd_json --profile-time 5 -------------------------------------------------------------------------------- /sonic-number/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | authors = ["Volo Team "] 3 | description = "Fast number parsing based on SIMD" 4 | edition = "2021" 5 | license = "Apache-2.0" 6 | name = "sonic-number" 7 | repository = "https://github.com/cloudwego/sonic-rs" 8 | version = "0.1.0" 9 | 10 | 11 | [dependencies] 12 | cfg-if = "1.0" 13 | -------------------------------------------------------------------------------- /benchmarks/benches/testdata/book.json: -------------------------------------------------------------------------------- 1 | {"id":12125925,"ids":[-2147483648,2147483647],"title":"未来简史-从智人到智神","titles":["hello","world"],"price":345.67,"prices":[-0.1,0.1],"hot":true,"hots":[true,true,true],"author":{"name":"json","age":99,"male":true},"authors":[{"name":"json","age":99,"male":true},{"name":"json","age":99,"male":true},{"name":"json","age":99,"male":true}],"weights":[]} -------------------------------------------------------------------------------- /rustfmt.toml: -------------------------------------------------------------------------------- 1 | comment_width = 100 2 | format_code_in_doc_comments = true 3 | format_strings = true 4 | group_imports = "StdExternalCrate" 5 | imports_granularity = "Crate" 6 | newline_style = "Unix" 7 | reorder_imports = true 8 | reorder_modules = true 9 | wrap_comments = true 10 | -------------------------------------------------------------------------------- /sonic-number/src/arch/aarch64.rs: -------------------------------------------------------------------------------- 1 | #[inline(always)] 2 | pub unsafe fn simd_str2int(c: &[u8], need: usize) -> (u64, usize) { 3 | debug_assert!(need < 17); 4 | let mut sum = 0u64; 5 | let mut i = 0; 6 | while i < need && c.get_unchecked(i).is_ascii_digit() { 7 | sum = (c.get_unchecked(i) - b'0') as u64 + sum * 10; 8 | i += 1; 9 | } 10 | (sum, i) 11 | } 12 | -------------------------------------------------------------------------------- /sonic-number/src/arch/fallback.rs: -------------------------------------------------------------------------------- 1 | #[inline(always)] 2 | pub unsafe fn simd_str2int(c: &[u8], need: usize) -> (u64, usize) { 3 | debug_assert!(need < 17); 4 | let mut sum = 0u64; 5 | let mut i = 0; 6 | while i < need && c.get_unchecked(i).is_ascii_digit() { 7 | sum = (c.get_unchecked(i) - b'0') as u64 + sum * 10; 8 | i += 1; 9 | } 10 | (sum, i) 11 | } 12 | -------------------------------------------------------------------------------- /scripts/fmt_fix.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -euo pipefail 4 | 5 | SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" 6 | REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)" 7 | 8 | cd "${REPO_ROOT}" 9 | 10 | echo "Updating Rust toolchain..." 11 | rustup update 12 | 13 | if [[ "${1:-}" == "--check" ]]; then 14 | cargo fmt --all -- --check 15 | else 16 | cargo fmt --all 17 | fi 18 | 19 | -------------------------------------------------------------------------------- /src/prelude.rs: -------------------------------------------------------------------------------- 1 | //! Imports the various traits about JSON. `use sonic_rs::prelude::*` to make the 2 | //! various traits and methods imported if you need. 3 | 4 | pub use crate::{ 5 | index::Index, 6 | input::JsonInput, 7 | reader::{Read, Reader}, 8 | serde::JsonNumberTrait, 9 | value::{JsonContainerTrait, JsonValueMutTrait, JsonValueTrait}, 10 | writer::WriteExt, 11 | }; 12 | -------------------------------------------------------------------------------- /.github/workflows/dependency-review.yaml: -------------------------------------------------------------------------------- 1 | name: 'Dependency Review' 2 | on: [pull_request] 3 | 4 | permissions: 5 | contents: read 6 | 7 | jobs: 8 | dependency-review: 9 | runs-on: [self-hosted, Linux, amd64] 10 | steps: 11 | - name: 'Checkout Repository' 12 | uses: actions/checkout@v4 13 | - name: 'Dependency Review' 14 | uses: actions/dependency-review-action@v1 -------------------------------------------------------------------------------- /bindings/ffi/build.rs: -------------------------------------------------------------------------------- 1 | use std::env; 2 | 3 | use cbindgen::Language::C; 4 | 5 | fn main() { 6 | setup_cbindgen(); 7 | } 8 | 9 | fn setup_cbindgen() { 10 | let crate_dir = env::var("CARGO_MANIFEST_DIR").unwrap(); 11 | cbindgen::Builder::new() 12 | .with_crate(crate_dir) 13 | .with_language(C) 14 | .generate() 15 | .expect("Unable to generate bindings") 16 | .write_to_file("include/sonic_ffi.h"); 17 | } 18 | -------------------------------------------------------------------------------- /sonic-number/src/arch/mod.rs: -------------------------------------------------------------------------------- 1 | cfg_if::cfg_if! { 2 | if #[cfg(all(target_arch = "x86_64", target_feature = "pclmulqdq", target_feature = "avx2", target_feature = "sse2"))] { 3 | mod x86_64; 4 | pub use x86_64::*; 5 | } else if #[cfg(all(target_feature="neon", target_arch="aarch64"))] { 6 | mod aarch64; 7 | pub use aarch64::*; 8 | } else { 9 | mod fallback; 10 | pub use fallback::*; 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /sonic-simd/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | authors = ["Volo Team "] 3 | description = "Portable SIMD API for sonic-rs" 4 | edition = "2021" 5 | license = "Apache-2.0" 6 | name = "sonic-simd" 7 | repository = "https://github.com/cloudwego/sonic-rs" 8 | version = "0.1.2" 9 | 10 | [features] 11 | avx512 = [] # enable avx512, requires Rust 1.89 or later, and also enable `avx512f` target feature 12 | 13 | [dependencies] 14 | cfg-if = "1.0" 15 | -------------------------------------------------------------------------------- /.github/workflows/security.yml: -------------------------------------------------------------------------------- 1 | name: "Security Audit" 2 | on: 3 | pull_request: 4 | push: 5 | paths: 6 | - "**/Cargo.toml" 7 | - "**/Cargo.lock" 8 | jobs: 9 | security-audit: 10 | permissions: write-all 11 | runs-on: [self-hosted, Linux, amd64] 12 | steps: 13 | - uses: actions/checkout@v4 14 | - uses: dtolnay/rust-toolchain@stable 15 | - uses: actions-rs/audit-check@v1 16 | with: 17 | token: ${{ secrets.GITHUB_TOKEN }} 18 | -------------------------------------------------------------------------------- /scripts/sanitize.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -ex 4 | 5 | export ASAN_OPTIONS="disable_coredump=0:unmap_shadow_on_exit=1:abort_on_error=1" 6 | 7 | run_tests() { 8 | cargo +nightly test --release --target x86_64-unknown-linux-gnu --features sanitize,"$1" 9 | cargo +nightly test --doc --package sonic-rs --target x86_64-unknown-linux-gnu --features sanitize,"$1" 10 | } 11 | 12 | echo "Running tests with $1 and $2" 13 | RUSTFLAGS="-Zsanitizer=$1" RUSTDOCFLAGS="-Zsanitizer=$1" run_tests "$2" 14 | -------------------------------------------------------------------------------- /scripts/test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -ex 4 | 5 | cargo test 6 | 7 | cargo test --features arbitrary_precision 8 | 9 | cargo test --features sort_keys 10 | 11 | cargo test --features utf8_lossy 12 | 13 | cargo test --features non_trailing_zero 14 | 15 | cargo test --features avx512 16 | 17 | examples=$(cargo build --example 2>&1 | grep -v ":") 18 | 19 | for example in $examples; do 20 | echo "Running example $example" 21 | cargo run --example $example 22 | done 23 | 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /docs/serdejson_compatibility.md: -------------------------------------------------------------------------------- 1 | # A quick guide to migrate from serde_json 2 | 3 | The goal of sonic-rs is performance and easiness (more APIs and ALLINONE) to use. Otherwise, recommended to use `serde_json`. 4 | 5 | Just replace as follows: 6 | 7 | - `&'a serde_json::RawValue` -> `sonic_rs::LazyValue<'a>` 8 | 9 | - `Box` -> `sonic_rs::OwnedLazyValue` 10 | 11 | - `serde_json::Value` -> `sonic_rs::Value` (Note: different when JSON has duplicate keys) 12 | 13 | - `serde_json::RawNumber` -> `sonic_rs::RawNumber` 14 | 15 | -------------------------------------------------------------------------------- /src/value/shared.rs: -------------------------------------------------------------------------------- 1 | use std::fmt::Debug; 2 | 3 | use bumpalo::Bump; 4 | 5 | // Represent a shared allocator. 6 | #[derive(Debug, Default)] 7 | #[repr(C, align(8))] 8 | #[doc(hidden)] 9 | pub struct Shared { 10 | json: Vec, 11 | alloc: Bump, 12 | } 13 | 14 | impl Shared { 15 | pub fn get_alloc(&mut self) -> &mut Bump { 16 | &mut self.alloc 17 | } 18 | 19 | pub fn set_json(&mut self, json: Vec) { 20 | self.json = json; 21 | } 22 | } 23 | 24 | // #safety 25 | // we not export the immutable bump allocator, so `Sync`` is always safe here 26 | unsafe impl Sync for Shared {} 27 | -------------------------------------------------------------------------------- /fuzz/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | edition = "2021" 3 | name = "sonic_rs-fuzz" 4 | publish = false 5 | version = "0.0.0" 6 | 7 | [package.metadata] 8 | cargo-fuzz = true 9 | 10 | # Prevent this from interfering with workspaces 11 | [workspace] 12 | members = ["."] 13 | 14 | [dependencies] 15 | faststr = "0.2" 16 | libfuzzer-sys = "0.4" 17 | serde = { version = "1.0", features = ["derive"] } 18 | serde_json = { version = "1.0", features = ["float_roundtrip"] } 19 | simdutf8 = "0.1" 20 | sonic-rs = { path = ".." } 21 | 22 | [[bin]] 23 | doc = false 24 | name = "fuzz_value" 25 | path = "fuzz_targets/from_slice.rs" 26 | test = false 27 | -------------------------------------------------------------------------------- /src/util/private.rs: -------------------------------------------------------------------------------- 1 | use bytes::Bytes; 2 | use faststr::FastStr; 3 | 4 | use crate::{ 5 | reader::{PaddedSliceRead, Read}, 6 | PointerNode, 7 | }; 8 | 9 | // Prevent users from implementing the trait in sonic-rs. 10 | pub trait Sealed {} 11 | impl Sealed for usize {} 12 | impl Sealed for str {} 13 | impl Sealed for std::string::String {} 14 | impl Sealed for FastStr {} 15 | impl Sealed for Bytes {} 16 | impl Sealed for u8 {} 17 | impl<'de> Sealed for Read<'de> {} 18 | impl<'de> Sealed for PaddedSliceRead<'de> {} 19 | impl<'a, T> Sealed for &'a T where T: ?Sized + Sealed {} 20 | impl Sealed for [T] where T: Sized + Sealed {} 21 | impl Sealed for PointerNode {} 22 | -------------------------------------------------------------------------------- /benchmarks/benches/common.rs: -------------------------------------------------------------------------------- 1 | #[derive(Debug, Clone, Copy)] 2 | struct SonicConfig { 3 | use_rawnum: bool, 4 | } 5 | 6 | static SONIC_DEFAULT_CFG: SonicConfig = SonicConfig { use_rawnum: false }; 7 | 8 | static SONIC_USE_RAWNUM_CFG: SonicConfig = SonicConfig { use_rawnum: true }; 9 | 10 | static SONIC_USE_RAW_CFG: SonicConfig = SonicConfig { use_rawnum: false }; 11 | 12 | fn do_sonic_rs_from_slice(data: &[u8], cfg: SonicConfig) -> sonic_rs::Result { 13 | let mut de = sonic_rs::Deserializer::new(sonic_rs::Read::from(data)); 14 | if cfg.use_rawnum { 15 | de = de.use_rawnumber(); 16 | } 17 | sonic_rs::Deserialize::deserialize(&mut de) 18 | } 19 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | 12 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 13 | 14 | **Describe the solution you'd like** 15 | 16 | A clear and concise description of what you want to happen. 17 | 18 | **Describe alternatives you've considered** 19 | 20 | A clear and concise description of any alternative solutions or features you've considered. 21 | 22 | **Additional context** 23 | 24 | Add any other context or screenshots about the feature request here. 25 | -------------------------------------------------------------------------------- /src/util/arch/fallback.rs: -------------------------------------------------------------------------------- 1 | // Not use PMULL instructions, but it is apparently slow. 2 | // This is copied from simdjson. 3 | pub unsafe fn prefix_xor(bitmask: u64) -> u64 { 4 | let mut bitmask = bitmask; 5 | bitmask ^= bitmask << 1; 6 | bitmask ^= bitmask << 2; 7 | bitmask ^= bitmask << 4; 8 | bitmask ^= bitmask << 8; 9 | bitmask ^= bitmask << 16; 10 | bitmask ^= bitmask << 32; 11 | bitmask 12 | } 13 | 14 | #[inline(always)] 15 | pub unsafe fn get_nonspace_bits(data: &[u8; 64]) -> u64 { 16 | let mut mask: u64 = 0; 17 | for (i, p) in data.iter().enumerate() { 18 | if !matches!(*p, b'\t' | b'\n' | b'\r' | b' ') { 19 | mask |= 1 << i; 20 | } 21 | } 22 | mask 23 | } 24 | -------------------------------------------------------------------------------- /src/value/mod.rs: -------------------------------------------------------------------------------- 1 | //! A dynamic type to representing any valid JSON value. 2 | 3 | pub mod array; 4 | pub(crate) mod de; 5 | mod from; 6 | pub(crate) mod node; 7 | #[doc(hidden)] 8 | pub mod shared; 9 | mod tryfrom; 10 | #[macro_use] 11 | mod macros; 12 | pub mod get; 13 | pub mod object; 14 | mod partial_eq; 15 | mod ser; 16 | mod tls_buffer; 17 | mod value_trait; 18 | pub(crate) mod visitor; 19 | 20 | #[doc(inline)] 21 | pub use self::array::Array; 22 | #[doc(inline)] 23 | pub use self::de::from_value; 24 | #[doc(inline)] 25 | pub use self::node::{Value, ValueRef}; 26 | #[doc(inline)] 27 | pub use self::object::Object; 28 | #[doc(inline)] 29 | pub use self::ser::to_value; 30 | #[doc(inline)] 31 | pub use self::value_trait::{JsonContainerTrait, JsonType, JsonValueMutTrait, JsonValueTrait}; 32 | -------------------------------------------------------------------------------- /src/pointer/from.rs: -------------------------------------------------------------------------------- 1 | use faststr::FastStr; 2 | 3 | use crate::PointerNode; 4 | 5 | impl From for PointerNode { 6 | fn from(value: usize) -> Self { 7 | PointerNode::Index(value) 8 | } 9 | } 10 | 11 | impl From<&usize> for PointerNode { 12 | fn from(value: &usize) -> Self { 13 | PointerNode::Index(*value) 14 | } 15 | } 16 | 17 | impl From<&str> for PointerNode { 18 | fn from(value: &str) -> Self { 19 | PointerNode::Key(FastStr::new(value)) 20 | } 21 | } 22 | 23 | impl From for PointerNode { 24 | fn from(value: FastStr) -> Self { 25 | PointerNode::Key(value) 26 | } 27 | } 28 | 29 | impl From<&FastStr> for PointerNode { 30 | fn from(value: &FastStr) -> Self { 31 | PointerNode::Key(value.clone()) 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /src/util/utf8.rs: -------------------------------------------------------------------------------- 1 | use crate::error::{Error, ErrorCode, Result}; 2 | 3 | // simduft8 will cause `out-of-bounds pointer arithmetic` when using Miri tests 4 | #[cfg(not(miri))] 5 | #[inline] 6 | pub(crate) fn from_utf8(data: &[u8]) -> Result<&str> { 7 | simdutf8::basic::from_utf8(data).or_else(|_| from_utf8_compat(data)) 8 | } 9 | 10 | #[cfg(miri)] 11 | pub(crate) fn from_utf8(data: &[u8]) -> Result<&str> { 12 | std::str::from_utf8(data) 13 | .map_err(|e| Error::syntax(ErrorCode::InvalidUTF8, data, e.valid_up_to())) 14 | } 15 | 16 | #[cfg(not(miri))] 17 | #[cold] 18 | fn from_utf8_compat(data: &[u8]) -> Result<&str> { 19 | // compat::from_utf8 is slower than basic::from_utf8 20 | simdutf8::compat::from_utf8(data) 21 | .map_err(|e| Error::syntax(ErrorCode::InvalidUTF8, data, e.valid_up_to())) 22 | } 23 | -------------------------------------------------------------------------------- /examples/get_from.rs: -------------------------------------------------------------------------------- 1 | use sonic_rs::{get, get_unchecked, pointer, JsonValueTrait}; 2 | 3 | fn main() { 4 | let path = pointer!["a", "b", "c", 1]; 5 | let json = r#" 6 | {"u": 123, "a": {"b" : {"c": [null, "found"]}}} 7 | "#; 8 | let target = unsafe { get_unchecked(json, &path).unwrap() }; 9 | assert_eq!(target.as_raw_str(), r#""found""#); 10 | assert_eq!(target.as_str().unwrap(), "found"); 11 | 12 | let target = get(json, &path); 13 | assert_eq!(target.as_str().unwrap(), "found"); 14 | assert_eq!(target.unwrap().as_raw_str(), r#""found""#); 15 | 16 | let path = pointer!["a", "b", "c", "d"]; 17 | let json = r#" 18 | {"u": 123, "a": {"b" : {"c": [null, "found"]}}} 19 | "#; 20 | // not found from json 21 | let target = get(json, &path); 22 | assert!(target.is_err()); 23 | } 24 | -------------------------------------------------------------------------------- /src/util/mock.rs: -------------------------------------------------------------------------------- 1 | pub(crate) struct MockString(String); 2 | 3 | impl std::ops::Deref for MockString { 4 | type Target = str; 5 | 6 | fn deref(&self) -> &str { 7 | &self.0 8 | } 9 | } 10 | 11 | impl std::ops::DerefMut for MockString { 12 | fn deref_mut(&mut self) -> &mut str { 13 | &mut self.0 14 | } 15 | } 16 | 17 | impl From for MockString { 18 | fn from(s: String) -> Self { 19 | Self(s) 20 | } 21 | } 22 | 23 | impl From<&str> for MockString { 24 | fn from(s: &str) -> Self { 25 | Self(s.to_string()) 26 | } 27 | } 28 | 29 | impl Drop for MockString { 30 | fn drop(&mut self) { 31 | // clear memory expictly before drop 32 | let bs = unsafe { self.0.as_bytes_mut() }; 33 | for b in bs.iter_mut() { 34 | *b = 0; 35 | } 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | 12 | A clear and concise description of what the bug is. 13 | 14 | **To Reproduce** 15 | 16 | Steps to reproduce the behavior: 17 | 1. Go to '...' 18 | 2. Click on '....' 19 | 3. Scroll down to '....' 20 | 4. See error 21 | 22 | **Expected behavior** 23 | 24 | A clear and concise description of what you expected to happen. 25 | 26 | **Screenshots** 27 | 28 | If applicable, add screenshots to help explain your problem. 29 | 30 | **sonic-rs version:** 31 | 32 | Please provide the version of sonic-rs you are using. 33 | 34 | **Environment:** 35 | 36 | The environment. 37 | 38 | **Additional context** 39 | 40 | Add any other context about the problem here. 41 | -------------------------------------------------------------------------------- /examples/json_number.rs: -------------------------------------------------------------------------------- 1 | use sonic_rs::{from_str, to_string, JsonNumberTrait, Number, RawNumber}; 2 | 3 | fn main() { 4 | // parse RawNumber from JSON number 5 | let number: RawNumber = from_str(" 123").unwrap(); 6 | assert_eq!(number.as_str(), "123"); 7 | assert_eq!(to_string(&number).unwrap(), "123"); 8 | 9 | // parse RawNumber from JSON string 10 | let number: RawNumber = from_str(r#""0.123""#).unwrap(); 11 | assert_eq!(number.as_str(), "0.123"); 12 | assert_eq!(to_string(&number).unwrap(), "0.123"); 13 | assert!(number.is_f64()); 14 | assert_eq!(number.as_f64().unwrap(), 0.123); 15 | assert_eq!(number.as_u64(), None); 16 | 17 | // convert RawNumber to Number 18 | let num: Number = number.try_into().unwrap(); 19 | assert_eq!(num.as_f64().unwrap(), 0.123); 20 | assert_eq!(num.as_u64(), None); 21 | } 22 | -------------------------------------------------------------------------------- /src/util/arch/mod.rs: -------------------------------------------------------------------------------- 1 | cfg_if::cfg_if! { 2 | if #[cfg(all(target_arch = "x86_64", target_feature = "pclmulqdq", target_feature = "avx2", target_feature = "sse2"))] { 3 | mod x86_64; 4 | pub use x86_64::*; 5 | } else if #[cfg(all(target_feature="neon", target_arch="aarch64"))] { 6 | mod aarch64; 7 | pub use aarch64::*; 8 | } else { 9 | mod fallback; 10 | pub use fallback::*; 11 | } 12 | } 13 | 14 | #[cfg(test)] 15 | mod test { 16 | use super::*; 17 | 18 | #[test] 19 | fn test_get_non_space_bits() { 20 | let input = b"\t\r\n xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"; 21 | let non_space_bits = unsafe { get_nonspace_bits(input) }; 22 | let expected_bits = 0b1111111111111111111111111111111111111111111111111111111111110000; 23 | assert_eq!(non_space_bits, expected_bits, "bits is {non_space_bits:b}"); 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /examples/get_by_schema.rs: -------------------------------------------------------------------------------- 1 | use sonic_rs::json; 2 | 3 | fn main() { 4 | let schema = json!({ 5 | "a": null, // default value is `null` 6 | "b": { 7 | "b1": {}, 8 | "b2": "default string" // default value is string 9 | }, 10 | "c": [], // default value is [] 11 | }); 12 | 13 | let data = r#" 14 | { 15 | "a": {}, 16 | "b": { 17 | "b1": 123 18 | }, 19 | "c": [1, 2, 3], 20 | "d": "balabala..." 21 | }"#; 22 | 23 | // parse json data by schem, we can parse into the schema value inplace 24 | let got = sonic_rs::get_by_schema(data, schema).unwrap(); 25 | assert_eq!( 26 | got, 27 | json!({ 28 | "a": {}, 29 | "b": { 30 | "b1": 123, 31 | "b2": "default string" 32 | }, 33 | "c": [1, 2, 3] 34 | }) 35 | ); 36 | } 37 | -------------------------------------------------------------------------------- /examples/serde.rs: -------------------------------------------------------------------------------- 1 | use std::fs::File; 2 | 3 | use sonic_rs::{Deserialize, Serialize}; 4 | 5 | #[derive(Serialize, Deserialize)] 6 | struct Person { 7 | name: String, 8 | age: u8, 9 | phones: Vec, 10 | } 11 | 12 | fn main() { 13 | // parse a string 14 | let data = r#"{ 15 | "name": "Xiaoming", 16 | "age": 18, 17 | "phones": [ 18 | "+123456" 19 | ] 20 | }"#; 21 | let p: Person = sonic_rs::from_str(data).unwrap(); 22 | assert_eq!(p.age, 18); 23 | assert_eq!(p.name, "Xiaoming"); 24 | let out = sonic_rs::to_string_pretty(&p).unwrap(); 25 | assert_eq!(out, data); 26 | 27 | // parse a file reader 28 | let p: Person = 29 | sonic_rs::from_reader(File::open("examples/testdata/person.json").unwrap()).unwrap(); 30 | assert_eq!(p.age, 18); 31 | assert_eq!(p.name, "Xiaoming"); 32 | let out = sonic_rs::to_string_pretty(&p).unwrap(); 33 | assert_eq!(out, data); 34 | } 35 | -------------------------------------------------------------------------------- /src/lazyvalue/mod.rs: -------------------------------------------------------------------------------- 1 | //! A lazy type to representing a unparsed raw JSON text for lazy operators. 2 | 3 | mod get; 4 | mod iterator; 5 | pub(crate) mod owned; 6 | pub(crate) mod value; 7 | 8 | #[doc(inline)] 9 | pub use self::{ 10 | get::{ 11 | get, get_from_bytes, get_from_bytes_unchecked, get_from_faststr, 12 | get_from_faststr_unchecked, get_from_slice, get_from_slice_unchecked, get_from_str, 13 | get_from_str_unchecked, get_many, get_many_unchecked, get_unchecked, 14 | }, 15 | iterator::{ 16 | to_array_iter, to_array_iter_unchecked, to_object_iter, to_object_iter_unchecked, 17 | ArrayJsonIter, ObjectJsonIter, 18 | }, 19 | owned::{LazyArray, LazyObject, OwnedLazyValue}, 20 | value::LazyValue, 21 | }; 22 | 23 | pub(crate) mod de; 24 | pub(crate) mod ser; 25 | pub(crate) const TOKEN: &str = "$sonic_rs::LazyValue"; 26 | 27 | pub(crate) const OWNED_LAZY_VALUE_TOKEN: &str = "$sonic::OwnedLv"; 28 | -------------------------------------------------------------------------------- /examples/lazyvalue.rs: -------------------------------------------------------------------------------- 1 | use serde::{Deserialize, Serialize}; 2 | use serde_json::value::RawValue; 3 | use sonic_rs::{LazyValue, OwnedLazyValue}; 4 | 5 | fn main() { 6 | let input = r#"{ "borrowed": "hello", "owned": "world" }"#; 7 | 8 | // use sonic_rs 9 | #[derive(Debug, Deserialize, Serialize)] 10 | struct TestLazyValue<'a> { 11 | #[serde(borrow)] 12 | borrowed: LazyValue<'a>, 13 | owned: OwnedLazyValue, 14 | } 15 | let data: TestLazyValue = sonic_rs::from_str(input).unwrap(); 16 | assert_eq!(data.borrowed.as_raw_str(), "\"hello\""); 17 | 18 | // use serde_json 19 | #[derive(Debug, Deserialize, Serialize)] 20 | struct TestRawValue<'a> { 21 | #[serde(borrow)] 22 | borrowed: &'a RawValue, 23 | owned: Box, 24 | } 25 | 26 | let data: TestRawValue = serde_json::from_str(input).unwrap(); 27 | assert_eq!(data.borrowed.get(), "\"hello\""); 28 | assert_eq!(data.owned.get(), "\"world\""); 29 | } 30 | -------------------------------------------------------------------------------- /ROADMAP.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | # Sonic-rs RoadMap 4 | 5 | This document shows key roadmap of `sonic-rs` development. It may help users know more about the future features. But the actual work is driven by real-world needs, we may adjust our goals sometimes. 6 | 7 | ## stability 8 | 9 | 1. ~~support utf-8 validate~~ 10 | 11 | 2. ~~add more fuzzing tests~~ 12 | 13 | 3. make unittest coverage to 90% 14 | 15 | 16 | ## Portability 17 | 18 | 0. ~~make sonic-rs support stable Rust~~ 19 | 20 | 1. ~~optimize the performance in aarch64 (WIP: 50%)~~ 21 | 22 | 2. runtime CPU detection 23 | 24 | 3. ~~support fallback in unsupported arch~~ 25 | 26 | 27 | ## Features 28 | 29 | 1. support more JSON RFC: 30 | - [`JSON Path`](https://datatracker.ietf.org/wg/jsonpath/about/). 31 | - [`JSON Merge Patch`](https://www.rfc-editor.org/rfc/rfc7396). 32 | 33 | 2. support the `Deserializer` trait for document (document can be deserialized into a Rust type). 34 | 35 | ## Performance 36 | 37 | 1. support zero-copy for FastStr 38 | 39 | 2. maybe reimplement the `Deserialize` or `Serialize` trait ?. -------------------------------------------------------------------------------- /examples/get_many.rs: -------------------------------------------------------------------------------- 1 | use sonic_rs::pointer; 2 | 3 | fn main() { 4 | let json = r#" 5 | {"u": 123, "a": {"b" : {"c": [null, "found"]}}}"#; 6 | 7 | // build a pointer tree, representing multiple json path 8 | let mut tree = sonic_rs::PointerTree::new(); 9 | 10 | tree.add_path(&["u"]); 11 | tree.add_path(&["unknown_key"]); 12 | tree.add_path(pointer!["a", "b", "c", 1]); 13 | 14 | let nodes = unsafe { sonic_rs::get_many_unchecked(json, &tree) }; 15 | 16 | match nodes { 17 | Ok(vals) => { 18 | assert_eq!(vals[0].as_ref().unwrap().as_raw_str(), "123"); 19 | assert!(vals[1].is_none()); 20 | assert_eq!(vals[2].as_ref().unwrap().as_raw_str(), "\"found\""); 21 | for val in vals { 22 | match val { 23 | Some(_) => println!("{}", val.as_ref().unwrap().as_raw_str()), 24 | None => println!("None"), 25 | }; 26 | } 27 | } 28 | Err(e) => { 29 | println!("err: {e:?}") 30 | } 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /benchmarks/benches/schema/src/canada.rs: -------------------------------------------------------------------------------- 1 | use std::collections::BTreeMap as Map; 2 | 3 | use serde::{Deserialize, Serialize}; 4 | 5 | use crate::enum_str; 6 | 7 | pub type Canada = FeatureCollection; 8 | 9 | #[derive(Serialize, Deserialize)] 10 | #[serde(deny_unknown_fields)] 11 | pub struct FeatureCollection { 12 | #[serde(rename = "type")] 13 | pub obj_type: ObjType, 14 | pub features: Vec, 15 | } 16 | 17 | #[derive(Serialize, Deserialize)] 18 | #[serde(deny_unknown_fields)] 19 | pub struct Feature { 20 | #[serde(rename = "type")] 21 | pub obj_type: ObjType, 22 | pub properties: Map, 23 | pub geometry: Geometry, 24 | } 25 | 26 | #[derive(Serialize, Deserialize)] 27 | #[serde(deny_unknown_fields)] 28 | pub struct Geometry { 29 | #[serde(rename = "type")] 30 | pub obj_type: ObjType, 31 | pub coordinates: Vec>, 32 | } 33 | 34 | pub type Latitude = f32; 35 | pub type Longitude = f32; 36 | 37 | enum_str!(ObjType { 38 | FeatureCollection("FeatureCollection"), 39 | Feature("Feature"), 40 | Polygon("Polygon"), 41 | }); 42 | -------------------------------------------------------------------------------- /licenses/serde_json/LICENSE-MIT: -------------------------------------------------------------------------------- 1 | Permission is hereby granted, free of charge, to any 2 | person obtaining a copy of this software and associated 3 | documentation files (the "Software"), to deal in the 4 | Software without restriction, including without 5 | limitation the rights to use, copy, modify, merge, 6 | publish, distribute, sublicense, and/or sell copies of 7 | the Software, and to permit persons to whom the Software 8 | is furnished to do so, subject to the following 9 | conditions: 10 | 11 | The above copyright notice and this permission notice 12 | shall be included in all copies or substantial portions 13 | of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF 16 | ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED 17 | TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 18 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT 19 | SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 20 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 21 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR 22 | IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 23 | DEALINGS IN THE SOFTWARE. 24 | -------------------------------------------------------------------------------- /benchmarks/benches/schema/licences/LICENSE-json-benchmark: -------------------------------------------------------------------------------- 1 | Permission is hereby granted, free of charge, to any 2 | person obtaining a copy of this software and associated 3 | documentation files (the "Software"), to deal in the 4 | Software without restriction, including without 5 | limitation the rights to use, copy, modify, merge, 6 | publish, distribute, sublicense, and/or sell copies of 7 | the Software, and to permit persons to whom the Software 8 | is furnished to do so, subject to the following 9 | conditions: 10 | 11 | The above copyright notice and this permission notice 12 | shall be included in all copies or substantial portions 13 | of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF 16 | ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED 17 | TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 18 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT 19 | SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 20 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 21 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR 22 | IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 23 | DEALINGS IN THE SOFTWARE. 24 | -------------------------------------------------------------------------------- /benchmarks/benches/schema/src/empty.rs: -------------------------------------------------------------------------------- 1 | use std::fmt; 2 | 3 | use serde::{de, Deserialize, Deserializer, Serialize, Serializer}; 4 | 5 | #[derive(Clone, Copy)] 6 | pub struct Array; 7 | 8 | impl Serialize for Array { 9 | fn serialize(&self, serializer: S) -> Result 10 | where 11 | S: Serializer, 12 | { 13 | [(); 0].serialize(serializer) 14 | } 15 | } 16 | 17 | impl<'de> Deserialize<'de> for Array { 18 | fn deserialize(deserializer: D) -> Result 19 | where 20 | D: Deserializer<'de>, 21 | { 22 | struct Visitor; 23 | 24 | impl<'de> de::Visitor<'de> for Visitor { 25 | type Value = Array; 26 | 27 | fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { 28 | formatter.write_str("empty array") 29 | } 30 | 31 | fn visit_seq(self, _: V) -> Result 32 | where 33 | V: de::SeqAccess<'de>, 34 | { 35 | Ok(Array) 36 | } 37 | } 38 | 39 | deserializer.deserialize_tuple(0, Visitor) 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /licenses/LICENSE-yyjson: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 YaoYuan 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /sonic-number/licenses/LICENSE-yyjson: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 YaoYuan 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /examples/json_filter.rs: -------------------------------------------------------------------------------- 1 | use std::collections::HashSet; 2 | 3 | use faststr::FastStr; 4 | use serde::{ser::SerializeMap, Serializer}; 5 | use sonic_rs::{to_object_iter, writer::WriteExt}; 6 | 7 | #[allow(clippy::mutable_key_type)] 8 | fn filter_json(json: &str, keys: HashSet, w: W) -> sonic_rs::Result<()> { 9 | // create a new serialize from writer 10 | let mut outer = sonic_rs::Serializer::new(w); 11 | 12 | // begin to serialize a map 13 | let mut maper = outer.serialize_map(None)?; 14 | for ret in to_object_iter(json) { 15 | let (name, value) = ret.expect("invalid json"); 16 | if keys.contains(name.as_ref()) { 17 | maper.serialize_entry(&name, &value)?; 18 | } 19 | } 20 | maper.end() 21 | } 22 | 23 | fn main() { 24 | let json = r#"{ 25 | "a": 1, 26 | "b": "2", 27 | "c": [3, 4, 5], 28 | "d": { 29 | "e": 6, 30 | "f": "7", 31 | "g": [8, 9, 10] 32 | } 33 | }"#; 34 | #[allow(clippy::mutable_key_type)] 35 | let keys = ["a", "c"].iter().map(|s| FastStr::from(*s)).collect(); 36 | let mut buf = Vec::new(); 37 | filter_json(json, keys, &mut buf).unwrap(); 38 | assert_eq!(buf, br#"{"a":1,"c":[3, 4, 5]}"#); 39 | } 40 | -------------------------------------------------------------------------------- /sonic-simd/src/lib.rs: -------------------------------------------------------------------------------- 1 | #![allow(non_camel_case_types)] 2 | 3 | pub mod bits; 4 | mod traits; 5 | 6 | // pick v128 simd 7 | cfg_if::cfg_if! { 8 | if #[cfg(target_feature = "sse2")] { 9 | mod sse2; 10 | use self::sse2::*; 11 | } else if #[cfg(all(target_feature="neon", target_arch="aarch64"))] { 12 | pub mod neon; 13 | use self::neon::*; 14 | } else { 15 | // TODO: support wasm 16 | mod v128; 17 | use self::v128::*; 18 | } 19 | } 20 | 21 | // pick v256 simd 22 | cfg_if::cfg_if! { 23 | if #[cfg(target_feature = "avx2")] { 24 | mod avx2; 25 | use self::avx2::*; 26 | } else { 27 | mod v256; 28 | use self::v256::*; 29 | } 30 | } 31 | 32 | pub use self::traits::{BitMask, Mask, Simd}; 33 | // pick v512 simd 34 | cfg_if::cfg_if! { 35 | if #[cfg(all(target_feature = "avx512f", feature = "avx512"))] { 36 | mod avx512; 37 | use self::avx512::*; 38 | } else { 39 | mod v512; 40 | use self::v512::*; 41 | } 42 | } 43 | 44 | pub type u8x16 = Simd128u; 45 | pub type u8x32 = Simd256u; 46 | pub type u8x64 = Simd512u; 47 | 48 | pub type i8x16 = Simd128i; 49 | pub type i8x32 = Simd256i; 50 | pub type i8x64 = Simd512i; 51 | 52 | pub type m8x32 = Mask256; 53 | -------------------------------------------------------------------------------- /profile/README.md: -------------------------------------------------------------------------------- 1 | ## Hi there 👋 2 | 3 | 🙋‍♀️ A short introduction - CloudWeGo is an open-source middleware set launched by ByteDance that can be used to quickly build enterprise-class cloud native architectures. The common characteristics of CloudWeGo projects are high performance, high scalability, high reliability and focusing on microservices communication and governance. 4 | 5 | 🌈 Community Membership - the [Responsibilities and Requirements](https://github.com/cloudwego/community/blob/main/COMMUNITY_MEMBERSHIP.md) of contributor roles in CloudWeGo. 6 | 7 | 👩‍💻 Useful resources - [Portal](https://www.cloudwego.io/), [Community](https://www.cloudwego.io/zh/community/), [Blogs](https://www.cloudwego.io/zh/blog/), [Use Cases](https://www.cloudwego.io/zh/cooperation/) 8 | 9 | 🍿 Security - [Vulnerability Reporting](https://www.cloudwego.io/zh/security/vulnerability-reporting/), [Safety Bulletin](https://www.cloudwego.io/zh/security/safety-bulletin/) 10 | 11 | 🌲 Ecosystem - [Kitex-contrib](https://github.com/kitex-contrib), [Hertz-contrib](https://github.com/hertz-contrib), [Volo-rs](https://github.com/volo-rs) 12 | 13 | 🎊 Example - [kitex-example](https://github.com/cloudwego/kitex-examples), [hertz-example](https://github.com/cloudwego/hertz-examples), [biz-demo](https://github.com/cloudwego/biz-demo), [netpoll-example](https://github.com/cloudwego/netpoll-examples) 14 | -------------------------------------------------------------------------------- /docs/for_Golang_user_zh.md: -------------------------------------------------------------------------------- 1 | ## Golang 迁移 Rust 2 | 3 | 目前版本: 4 | 5 | `sonic-rs = "0.3"` 6 | 7 | 对应 API 参考: 8 | 9 | - 解析到 Golang 结构体等强类型: 10 | 11 | sonic-go/encoding-json Unmarshal => sonic_rs::from_str/from_slice 12 | 13 | sonic-go/encoding-json Marshal => sonic_rs::to_string/to_vec 等 14 | 15 | - 解析到 Golang `interface{}/any` 或 sonic-go `ast.Node` 16 | 17 | 建议使用 `sonic_rs::Value` 替换,性能更优。 18 | 19 | - 使用 `gjson.Get` 或 `jsonparser.Get` 等API: 20 | gjson/jsonparser get API 本身未做严格的JSON 校验,因此可以使用 `sonic_rs::get_unchecked` 进行平替。 sonic_rs get API 会返回一个 `Result`. 如果没有找到该字段,会报错。 21 | 22 | `LazyValue` 可以用 `as_bool, as_str`等将 JSON 进一步**解析成对应的类型**。 23 | 24 | 如果只需要拿到原始的raw JSON, ***不做解析***,请使用 `as_raw_str, as_raw_faststr` 等 API. 参考例子: [get_from.rs](../examples/get_from.rs) 25 | 26 | 如果需要从 JSON 中拿到多个字段,推荐使用 `get_many`. 参考例子: [get_many.rs](../examples/get_many.rs) 27 | 28 | - 使用 `gjson.ForEach` or `jsonparser.ObjectEach/ArrayEach` 等API: 29 | 30 | 这些 API 也没有对原始 JSON 做严格校验。因此可以使用 `sonic_rs::to_array/object_iter_unchecked` 等进行平替。参考例子 [iterator.rs](../examples/iterator.rs) 31 | 32 | - 解析到 Golang JsonNumber: 33 | 34 | 请直接使用 `sonic_rs::RawNumber` 35 | 36 | - 解析到 Golang RawMessage: 37 | 38 | 请直接使用 `sonic_rs::LazyValue<'a>`, 生命周期和输入的JSON绑定,会尽可能减少拷贝开销。如果不想带生命周期,可以使用 `sonic_rs::OwnedLazyValue`. 例如: [lazyvalue.rs](../examples/lazyvalue.rs) 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | -------------------------------------------------------------------------------- /bindings/ffi/include/sonic_ffi.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #define SONIC_RS_DESERIALIZE_USE_RAWNUMBER 2 7 | 8 | #define SONIC_RS_DESERIALIZE_UTF8_LOSSY 4 9 | 10 | #define SONIC_RS_SERIALIZE_PRETTY 1 11 | 12 | /** 13 | * A string allocated in Rust, ending with `\0`. Used for serialize output and error message. 14 | */ 15 | typedef struct SonicCString { 16 | const void *buf; 17 | uintptr_t len; 18 | } SonicCString; 19 | 20 | typedef struct SonicDeserializeRet { 21 | const void *value; 22 | struct SonicCString err; 23 | } SonicDeserializeRet; 24 | 25 | typedef struct SonicSerializeRet { 26 | struct SonicCString json; 27 | struct SonicCString err; 28 | } SonicSerializeRet; 29 | 30 | /** 31 | * # Safety 32 | * 33 | * The caller should drop the returned `value` or `err`. 34 | */ 35 | struct SonicDeserializeRet sonic_rs_deserialize_value(const char *json, 36 | uintptr_t len, 37 | uint64_t cfg); 38 | 39 | /** 40 | * # Safety 41 | * 42 | * The caller should drop the returned `json` or `err`. 43 | */ 44 | struct SonicSerializeRet sonic_rs_serialize_value(const void *value, uint64_t cfg); 45 | 46 | /** 47 | * # Safety 48 | */ 49 | void sonic_rs_drop_value(void *value); 50 | 51 | /** 52 | * # Safety 53 | */ 54 | void sonic_rs_drop_string(uint8_t *buf, uint64_t len); 55 | -------------------------------------------------------------------------------- /examples/value.rs: -------------------------------------------------------------------------------- 1 | // Parse json into sonic_rs `Value`. 2 | 3 | use sonic_rs::{from_str, json, pointer, JsonValueMutTrait, JsonValueTrait, Value}; 4 | 5 | fn main() { 6 | let json = r#"{ 7 | "name": "Xiaoming", 8 | "obj": {}, 9 | "arr": [], 10 | "age": 18, 11 | "address": { 12 | "city": "Beijing" 13 | }, 14 | "phones": [ 15 | "+123456" 16 | ] 17 | }"#; 18 | 19 | let mut root: Value = from_str(json).unwrap(); 20 | 21 | // get key from value 22 | let age = root.get("age").as_i64(); 23 | assert_eq!(age.unwrap_or_default(), 18); 24 | 25 | // get by index 26 | let first = root["phones"][0].as_str().unwrap(); 27 | assert_eq!(first, "+123456"); 28 | 29 | // get by pointer 30 | let phones = root.pointer(pointer!["phones", 0]); 31 | assert_eq!(phones.as_str().unwrap(), "+123456"); 32 | 33 | // convert to mutable object 34 | let obj = root.as_object_mut().unwrap(); 35 | obj.insert(&"inserted", true); 36 | assert!(obj.contains_key(&"inserted")); 37 | 38 | let mut object = json!({ "A": 65, "B": 66, "C": 67 }); 39 | *object.get_mut("A").unwrap() = json!({ 40 | "code": 123, 41 | "success": false, 42 | "payload": {} 43 | }); 44 | 45 | let mut val = json!(["A", "B", "C"]); 46 | *val.get_mut(2).unwrap() = json!("D"); 47 | 48 | // serialize 49 | assert_eq!(serde_json::to_string(&val).unwrap(), r#"["A","B","D"]"#); 50 | } 51 | -------------------------------------------------------------------------------- /examples/iterator.rs: -------------------------------------------------------------------------------- 1 | use bytes::Bytes; 2 | use faststr::FastStr; 3 | use sonic_rs::{to_array_iter, to_object_iter_unchecked, JsonValueTrait}; 4 | fn main() { 5 | let json = Bytes::from(r#"[1, 2, 3, 4, 5, 6]"#); 6 | let iter = to_array_iter(&json); 7 | for (i, v) in iter.enumerate() { 8 | assert_eq!(i + 1, v.as_u64().unwrap() as usize); 9 | } 10 | 11 | let json = Bytes::from(r#"[1, 2, 3, 4, 5, 6"#); 12 | let iter = to_array_iter(&json); 13 | for elem in iter { 14 | // do something for each elem 15 | 16 | // deal with errors when invalid json 17 | if elem.is_err() { 18 | assert!(elem.err().unwrap().to_string().starts_with( 19 | "Expected this character to be either a ',' or a ']' while parsing at line 1 \ 20 | column 17" 21 | )); 22 | } 23 | } 24 | 25 | let json = FastStr::from(r#"{"a": null, "b":[1, 2, 3]}"#); 26 | let iter = unsafe { to_object_iter_unchecked(&json) }; 27 | for ret in iter { 28 | // deal with errors 29 | if let Err(e) = ret { 30 | println!("{}", e); 31 | return; 32 | } 33 | 34 | let (k, v) = ret.unwrap(); 35 | if k == "a" { 36 | assert!(v.is_null()); 37 | } else if k == "b" { 38 | let iter = to_array_iter(v.as_raw_str()); 39 | for (i, v) in iter.enumerate() { 40 | assert_eq!(i + 1, v.as_u64().unwrap() as usize); 41 | } 42 | } 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /examples/handle_error.rs: -------------------------------------------------------------------------------- 1 | use sonic_rs::{from_slice, from_str, Deserialize}; 2 | 3 | fn main() { 4 | #[allow(dead_code)] 5 | #[derive(Debug, Deserialize)] 6 | struct Foo { 7 | a: Vec, 8 | c: String, 9 | } 10 | 11 | // deal with Eof errors 12 | let err = from_str::("{\"a\": [").unwrap_err(); 13 | assert!(err.is_eof()); 14 | eprintln!("{err}"); 15 | // EOF while parsing at line 1 column 7 16 | 17 | // {"a": [ 18 | // ......^ 19 | assert_eq!( 20 | format!("{err}"), 21 | "EOF while parsing at line 1 column 7\n\n\t{\"a\": [\n\t......^\n" 22 | ); 23 | 24 | // deal with unmatched type errors 25 | let err = from_str::("{ \"b\":[]}").unwrap_err(); 26 | eprintln!("{err}"); 27 | assert!(err.is_unmatched_type()); 28 | // println as follows: 29 | // missing field `a` at line 1 column 9 30 | // 31 | // { "b":[]} 32 | // ........^ 33 | assert_eq!( 34 | format!("{err}"), 35 | "missing field `a` at line 1 column 9\n\n\t{ \"b\":[]}\n\t........^\n" 36 | ); 37 | 38 | // deal with Syntax errors 39 | let err = from_slice::(b"{\"b\":\"\x80\"}").unwrap_err(); 40 | eprintln!("{err}"); 41 | assert!(err.is_syntax()); 42 | // println as follows: 43 | // Invalid UTF-8 characters in json at line 1 column 7 44 | // 45 | // {"b":"�"} 46 | // ......^... 47 | assert_eq!( 48 | format!("{err}"), 49 | "Invalid UTF-8 characters in json at line 1 column 7\n\n\t{\"b\":\"�\"}\n\t......^..\n" 50 | ); 51 | } 52 | -------------------------------------------------------------------------------- /benchmarks/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | edition = "2024" 3 | name = "json_benchmarks" 4 | version = "0.1.0" 5 | 6 | [dependencies] 7 | 8 | 9 | [profile.release] 10 | codegen-units = 1 11 | debug = true 12 | debug-assertions = false 13 | incremental = false 14 | lto = true 15 | opt-level = 3 16 | overflow-checks = false 17 | panic = 'unwind' 18 | rpath = false 19 | 20 | 21 | [target.'cfg(not(target_env = "msvc"))'.dev-dependencies] 22 | jemallocator = "0.5" 23 | 24 | [dev-dependencies] 25 | core_affinity = "0.8" 26 | criterion = { version = "0.7", features = ["html_reports"] } 27 | encoding_rs = "0.8" 28 | gjson = "0.8" 29 | serde = { version = "1.0", features = ["rc", "derive"] } 30 | serde_derive = "1.0" 31 | serde_json = { version = "1.0", features = ["float_roundtrip", "raw_value"] } 32 | simd-json = "0.17" 33 | sonic-rs = { path = "../" } 34 | # This schema are used in benches and copied from https://github.com/serde-rs/json-benchmark 35 | bytes = { version = "1.10", features = ["serde"] } 36 | chrono = { version = "0.4", features = ["serde"] } 37 | paste = "1.0" 38 | schema = { path = "benches/schema" } 39 | serde_bytes = "0.11" 40 | 41 | [[bench]] 42 | harness = false 43 | name = "deserialize_struct" 44 | 45 | [[bench]] 46 | harness = false 47 | name = "deserialize_value" 48 | 49 | [[bench]] 50 | harness = false 51 | name = "serialize_struct" 52 | 53 | [[bench]] 54 | harness = false 55 | name = "serialize_value" 56 | 57 | [[bench]] 58 | harness = false 59 | name = "get_from" 60 | 61 | [[bench]] 62 | harness = false 63 | name = "value_operator" 64 | 65 | [features] 66 | default = [] 67 | -------------------------------------------------------------------------------- /src/value/visitor.rs: -------------------------------------------------------------------------------- 1 | pub(crate) trait JsonVisitor<'de> { 2 | fn visit_dom_start(&mut self) -> bool { 3 | false 4 | } 5 | 6 | fn visit_null(&mut self) -> bool { 7 | false 8 | } 9 | 10 | fn visit_bool(&mut self, _val: bool) -> bool { 11 | false 12 | } 13 | 14 | #[allow(dead_code)] 15 | fn visit_u64(&mut self, _val: u64) -> bool { 16 | false 17 | } 18 | 19 | #[allow(dead_code)] 20 | fn visit_i64(&mut self, _val: i64) -> bool { 21 | false 22 | } 23 | 24 | #[allow(dead_code)] 25 | fn visit_f64(&mut self, _val: f64) -> bool { 26 | false 27 | } 28 | 29 | #[allow(dead_code)] 30 | fn visit_raw_number(&mut self, _val: &str) -> bool { 31 | false 32 | } 33 | 34 | #[allow(dead_code)] 35 | fn visit_borrowed_raw_number(&mut self, _val: &str) -> bool { 36 | false 37 | } 38 | 39 | fn visit_str(&mut self, _value: &str) -> bool { 40 | false 41 | } 42 | 43 | fn visit_borrowed_str(&mut self, _value: &'de str) -> bool { 44 | false 45 | } 46 | 47 | fn visit_object_start(&mut self, _hint: usize) -> bool { 48 | false 49 | } 50 | 51 | fn visit_object_end(&mut self, _len: usize) -> bool { 52 | false 53 | } 54 | 55 | fn visit_array_start(&mut self, _hint: usize) -> bool { 56 | false 57 | } 58 | 59 | fn visit_array_end(&mut self, _len: usize) -> bool { 60 | false 61 | } 62 | 63 | #[allow(dead_code)] 64 | fn visit_key(&mut self, _key: &str) -> bool { 65 | false 66 | } 67 | 68 | #[allow(dead_code)] 69 | fn visit_borrowed_key(&mut self, _key: &'de str) -> bool { 70 | false 71 | } 72 | 73 | fn visit_dom_end(&mut self) -> bool { 74 | false 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /benchmarks/benches/schema/src/prim_str.rs: -------------------------------------------------------------------------------- 1 | use std::{fmt, fmt::Display, str::FromStr}; 2 | 3 | use serde::{ 4 | de::{self, Deserialize, Deserializer, Unexpected}, 5 | ser::{Serialize, Serializer}, 6 | }; 7 | 8 | #[derive(Clone, Copy, Ord, PartialOrd, Eq, PartialEq)] 9 | pub struct PrimStr(T) 10 | where 11 | T: Copy + Ord + Display + FromStr; 12 | 13 | impl Serialize for PrimStr 14 | where 15 | T: Copy + Ord + Display + FromStr, 16 | { 17 | fn serialize(&self, serializer: S) -> Result 18 | where 19 | S: Serializer, 20 | { 21 | serializer.collect_str(&self.0) 22 | } 23 | } 24 | 25 | impl<'de, T> Deserialize<'de> for PrimStr 26 | where 27 | T: Copy + Ord + Display + FromStr, 28 | { 29 | fn deserialize(deserializer: D) -> Result 30 | where 31 | D: Deserializer<'de>, 32 | { 33 | use std::marker::PhantomData; 34 | struct Visitor(PhantomData); 35 | 36 | impl<'de, T> de::Visitor<'de> for Visitor 37 | where 38 | T: Copy + Ord + Display + FromStr, 39 | { 40 | type Value = PrimStr; 41 | 42 | fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { 43 | formatter.write_str("number represented as string") 44 | } 45 | 46 | fn visit_str(self, value: &str) -> Result, E> 47 | where 48 | E: de::Error, 49 | { 50 | match T::from_str(value) { 51 | Ok(id) => Ok(PrimStr(id)), 52 | Err(_) => Err(E::invalid_value(Unexpected::Str(value), &self)), 53 | } 54 | } 55 | } 56 | 57 | deserializer.deserialize_str(Visitor(PhantomData)) 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /src/value/tls_buffer.rs: -------------------------------------------------------------------------------- 1 | use std::{ 2 | mem::{size_of, ManuallyDrop}, 3 | ptr::NonNull, 4 | }; 5 | 6 | use super::node::Value; 7 | 8 | // use const make thread local access faster 9 | 10 | thread_local! { 11 | static NODE_BUF: std::cell::RefCell>> = const { std::cell::RefCell::new(Vec::new()) }; 12 | } 13 | 14 | /// A thread-local buffer for temporary nodes. Avoid allocating temporary memory multiple times. 15 | pub struct TlsBuf { 16 | buf: NonNull>>, 17 | need_drop: bool, 18 | } 19 | 20 | impl TlsBuf { 21 | const MAX_TLS_SIZE: usize = (3 << 20) / size_of::(); // 3 Mb 22 | 23 | #[inline] 24 | pub fn with_capacity(n: usize) -> Self { 25 | if n >= Self::MAX_TLS_SIZE { 26 | let vec = Box::into_raw(Box::new(Vec::with_capacity(n))); 27 | Self { 28 | buf: unsafe { NonNull::new_unchecked(vec) }, 29 | need_drop: true, 30 | } 31 | } else { 32 | let vec = NODE_BUF.with(|buf| { 33 | let mut nodes = buf.borrow_mut(); 34 | nodes.clear(); 35 | nodes.reserve(n); 36 | (&mut *nodes) as *mut Vec> 37 | }); 38 | 39 | Self { 40 | buf: unsafe { NonNull::new_unchecked(vec) }, 41 | need_drop: false, 42 | } 43 | } 44 | } 45 | 46 | #[inline] 47 | pub fn as_vec_mut(&mut self) -> &mut Vec> { 48 | unsafe { self.buf.as_mut() } 49 | } 50 | } 51 | 52 | impl Drop for TlsBuf { 53 | fn drop(&mut self) { 54 | if self.need_drop { 55 | let boxed: Box>> = unsafe { Box::from_raw(self.buf.as_ptr()) }; 56 | drop(boxed); 57 | } 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /src/pointer/point.rs: -------------------------------------------------------------------------------- 1 | use faststr::FastStr; 2 | 3 | /// Represents a json pointer path. It can be created by [`pointer!`] macro. 4 | pub type JsonPointer = [PointerNode]; 5 | 6 | /// Represents a node in a json pointer path. 7 | #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] 8 | pub enum PointerNode { 9 | Key(FastStr), 10 | Index(usize), 11 | } 12 | 13 | /// Represents a json pointer path. 14 | /// 15 | /// Used to indexing a [`Value`][`crate::Value`], [`LazyValue`][`crate::LazyValue`], 16 | /// [`get`][`crate::get`] or [`get_unchecked`][`crate::get_unchecked`]. 17 | /// 18 | /// The path can includes both keys or indexes. 19 | /// - keys: string-like, used to indexing an object. 20 | /// - indexes: usize-like, used to indexing an array. 21 | /// 22 | /// # Examples 23 | /// 24 | /// ``` 25 | /// # use sonic_rs::pointer; 26 | /// use sonic_rs::JsonValueTrait; 27 | /// 28 | /// let value: sonic_rs::Value = sonic_rs::from_str( 29 | /// r#"{ 30 | /// "foo": [ 31 | /// 0, 32 | /// 1, 33 | /// { 34 | /// "bar": 123 35 | /// } 36 | /// ] 37 | /// }"#, 38 | /// ) 39 | /// .unwrap(); 40 | /// let path = pointer!["foo", 2, "bar"]; 41 | /// 42 | /// let got = value.pointer(&path).unwrap(); 43 | /// 44 | /// assert_eq!(got, 123); 45 | /// ``` 46 | #[macro_export] 47 | macro_rules! pointer { 48 | () => ( 49 | ([] as [$crate::PointerNode; 0]) 50 | ); 51 | ($($x:expr),+ $(,)?) => ( 52 | [$($crate::PointerNode::from($x)),+] 53 | ); 54 | } 55 | 56 | #[cfg(test)] 57 | mod test { 58 | #[test] 59 | fn test_json_pointer() { 60 | let pointers = pointer![]; 61 | println!("{pointers:?}"); 62 | let mut pointers = pointer![1, 2, 3, "foo", "bar"].to_vec(); 63 | pointers.push(123.into()); 64 | println!("{pointers:?}"); 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /docs/for_Golang_user.md: -------------------------------------------------------------------------------- 1 | ## Golang to Rust migration 2 | 3 | Current version: 4 | 5 | `sonic-rs = "0.3"` 6 | 7 | Corresponding API references: 8 | 9 | - Parsing into Golang structures or strong types: 10 | 11 | sonic-go/encoding-json Unmarshal => sonic_rs::from_str/from_slice 12 | 13 | sonic-go/encoding-json Marshal => sonic_rs::to_string/to_vec, etc. 14 | 15 | - Parsing into Golang `interface{}/any` or sonic-go `ast.Node`: 16 | 17 | It is recommended to replace it with `sonic_rs::Value` for better performance. 18 | 19 | - Using `gjson.Get` or `jsonparser.Get` APIs: 20 | 21 | The gjson/jsonparser get API itself does not perform strict JSON validation, so you can use `sonic_rs::get_unchecked` for replacement. 22 | 23 | The sonic_rs get API will return a `Result`. 24 | 25 | `LazyValue` can be further ***parsed into the corresponding type*** by using `as_bool, as_str`, etc. 26 | 27 | If you need to get the original raw JSON, ***without parsing***, please use `as_raw_str, as_raw_slice` API. Refer to the example: [get_from.rs](../examples/get_from.rs) 28 | 29 | If you need to get multiple fields from JSON, it is recommended to use `get_many`. Reference example: [get_many.rs](../examples/get_many.rs) 30 | 31 | - Using `gjson.ForEach` or `jsonparser.ObjectEach/ArrayEach` 32 | 33 | These APIs also do not perform strict JSON validation, so you can use `sonic_rs::to_object/array_iter_unchecked` for replacement. Refer to the example [iterator.rs](../examples/iterator.rs) 34 | 35 | - Parsing into Golang `json.Number`: 36 | 37 | Please use `sonic_rs::RawNumber` directly. 38 | 39 | - Parsing into Golang `json.RawMessage`: 40 | 41 | Please use `sonic_rs::LazyValue<'a>` directly. The lifetime is as the origin JSON. If you want to be owned, pls use `sonic_rs::OwnedLazyValue`. For example, [lazyvalue.rs](../examples/lazyvalue.rs) 42 | -------------------------------------------------------------------------------- /benchmarks/benches/get_from.rs: -------------------------------------------------------------------------------- 1 | #[macro_use] 2 | extern crate criterion; 3 | use std::io::Read; 4 | 5 | use criterion::{criterion_group, BatchSize, Criterion}; 6 | 7 | fn bench_get(c: &mut Criterion) { 8 | let core_ids = core_affinity::get_core_ids().unwrap(); 9 | core_affinity::set_for_current(core_ids[0]); 10 | 11 | let mut data = Vec::new(); 12 | let root = env!("CARGO_MANIFEST_DIR").to_owned(); 13 | std::fs::File::open(root + concat!("/benches/testdata/twitter.json")) 14 | .unwrap() 15 | .read_to_end(&mut data) 16 | .unwrap(); 17 | let data = unsafe { std::str::from_utf8_unchecked(&data) }; 18 | 19 | // verify sonic-rs parse 20 | let rpath = ["search_metadata", "count"]; 21 | let gpath = "search_metadata.count"; 22 | let gout = gjson::get(data, gpath); 23 | let rout = unsafe { sonic_rs::get_unchecked(data, &rpath) }; 24 | assert_eq!(rout.unwrap().as_raw_str(), gout.str()); 25 | 26 | let mut group = c.benchmark_group("twitter"); 27 | 28 | group.bench_with_input("sonic-rs::get_unchecked_from_str", data, |b, data| { 29 | b.iter_batched( 30 | || data, 31 | |json| unsafe { sonic_rs::get_unchecked(json, &rpath) }, 32 | BatchSize::SmallInput, 33 | ) 34 | }); 35 | 36 | group.bench_with_input("sonic-rs::get_from_str", data, |b, data| { 37 | b.iter_batched( 38 | || data, 39 | |json| sonic_rs::get(json, &rpath), 40 | BatchSize::SmallInput, 41 | ) 42 | }); 43 | 44 | group.bench_with_input("gjson::get_from_str", data, |b, data| { 45 | b.iter_batched( 46 | || data, 47 | |json| gjson::get(json, gpath), 48 | BatchSize::SmallInput, 49 | ) 50 | }); 51 | } 52 | 53 | criterion_group!(benches, bench_get); 54 | criterion_main!(benches); 55 | -------------------------------------------------------------------------------- /src/util/arch/x86_64.rs: -------------------------------------------------------------------------------- 1 | use std::arch::x86_64::*; 2 | 3 | #[inline(always)] 4 | pub unsafe fn prefix_xor(bitmask: u64) -> u64 { 5 | unsafe { 6 | let all_ones = _mm_set1_epi8(-1i8); 7 | let result = _mm_clmulepi64_si128(_mm_set_epi64x(0, bitmask as i64), all_ones, 0); 8 | _mm_cvtsi128_si64(result) as u64 9 | } 10 | } 11 | 12 | #[inline(always)] 13 | pub unsafe fn get_nonspace_bits(data: &[u8; 64]) -> u64 { 14 | unsafe { 15 | let lo: std::arch::x86_64::__m256i = _mm256_loadu_si256(data.as_ptr() as *const __m256i); 16 | let hi: std::arch::x86_64::__m256i = 17 | _mm256_loadu_si256(data.as_ptr().offset(32) as *const __m256i); 18 | let whitespace_data = _mm256_setr_epi8( 19 | b' ' as i8, 20 | 100, 21 | 100, 22 | 100, 23 | 17, 24 | 100, 25 | 113, 26 | 2, 27 | 100, 28 | b'\t' as i8, 29 | b'\n' as i8, 30 | 112, 31 | 100, 32 | b'\r' as i8, 33 | 100, 34 | 100, 35 | b' ' as i8, 36 | 100, 37 | 100, 38 | 100, 39 | 17, 40 | 100, 41 | 113, 42 | 2, 43 | 100, 44 | b'\t' as i8, 45 | b'\n' as i8, 46 | 112, 47 | 100, 48 | b'\r' as i8, 49 | 100, 50 | 100, 51 | ); 52 | let shuf_lo = _mm256_shuffle_epi8(whitespace_data, lo); 53 | let shuf_hi = _mm256_shuffle_epi8(whitespace_data, hi); 54 | let lo = _mm256_cmpeq_epi8(lo, shuf_lo); 55 | let hi = _mm256_cmpeq_epi8(hi, shuf_hi); 56 | let space = _mm256_movemask_epi8(lo) as u32 as u64 57 | | ((_mm256_movemask_epi8(hi) as u32 as u64) << 32); 58 | !space 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /benchmarks/benches/schema/src/enumstr.rs: -------------------------------------------------------------------------------- 1 | #[macro_export] 2 | macro_rules! enum_str { 3 | ($name:ident { $($variant:ident($str:expr), )* }) => { 4 | #[derive(Clone, Copy)] 5 | pub enum $name { 6 | $($variant,)* 7 | } 8 | 9 | impl $name { 10 | fn as_str(self) -> &'static str { 11 | match self { 12 | $( $name::$variant => $str, )* 13 | } 14 | } 15 | } 16 | 17 | impl ::serde::Serialize for $name { 18 | fn serialize(&self, serializer: S) -> Result 19 | where S: ::serde::Serializer, 20 | { 21 | serializer.serialize_str(self.as_str()) 22 | } 23 | } 24 | 25 | impl<'de> ::serde::Deserialize<'de> for $name { 26 | fn deserialize(deserializer: D) -> Result 27 | where D: ::serde::Deserializer<'de>, 28 | { 29 | struct Visitor; 30 | 31 | impl<'de> ::serde::de::Visitor<'de> for Visitor { 32 | type Value = $name; 33 | 34 | fn expecting(&self, formatter: &mut ::std::fmt::Formatter) -> ::std::fmt::Result { 35 | formatter.write_str("unit variant") 36 | } 37 | 38 | fn visit_str(self, value: &str) -> Result<$name, E> 39 | where E: ::serde::de::Error, 40 | { 41 | match value { 42 | $( $str => Ok($name::$variant), )* 43 | _ => Err(E::invalid_value(::serde::de::Unexpected::Str(value), &self)), 44 | } 45 | } 46 | } 47 | 48 | deserializer.deserialize_str(Visitor) 49 | } 50 | } 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /sonic-simd/src/traits.rs: -------------------------------------------------------------------------------- 1 | use std::ops::{BitAnd, BitOr, BitOrAssign}; 2 | 3 | /// Portbal SIMD traits 4 | pub trait Simd: Sized { 5 | const LANES: usize; 6 | 7 | type Element; 8 | type Mask: Mask; 9 | 10 | /// # Safety 11 | unsafe fn from_slice_unaligned_unchecked(slice: &[u8]) -> Self { 12 | debug_assert!(slice.len() >= Self::LANES); 13 | Self::loadu(slice.as_ptr()) 14 | } 15 | 16 | /// # Safety 17 | unsafe fn write_to_slice_unaligned_unchecked(&self, slice: &mut [u8]) { 18 | debug_assert!(slice.len() >= Self::LANES); 19 | self.storeu(slice.as_mut_ptr()); 20 | } 21 | 22 | /// # Safety 23 | unsafe fn loadu(ptr: *const u8) -> Self; 24 | 25 | /// # Safety 26 | unsafe fn storeu(&self, ptr: *mut u8); 27 | 28 | fn eq(&self, rhs: &Self) -> Self::Mask; 29 | 30 | fn splat(elem: Self::Element) -> Self; 31 | 32 | /// greater than 33 | fn gt(&self, rhs: &Self) -> Self::Mask; 34 | 35 | /// less or equal 36 | fn le(&self, rhs: &Self) -> Self::Mask; 37 | } 38 | 39 | /// Portbal SIMD mask traits 40 | pub trait Mask: Sized + BitOr + BitOrAssign + BitAnd { 41 | type Element; 42 | type BitMask: BitMask; 43 | 44 | fn bitmask(self) -> Self::BitMask; 45 | 46 | fn splat(b: bool) -> Self; 47 | } 48 | 49 | /// Trait for the bitmask of a vector Mask. 50 | pub trait BitMask { 51 | /// Total bits in the bitmask. 52 | const LEN: usize; 53 | 54 | /// get the offset of the first `1` bit. 55 | fn first_offset(&self) -> usize; 56 | 57 | /// check if this bitmask is before the other bitmask. 58 | fn before(&self, rhs: &Self) -> bool; 59 | 60 | /// convert bitmask as little endian 61 | fn as_little_endian(&self) -> Self; 62 | 63 | /// whether all bits are zero. 64 | fn all_zero(&self) -> bool; 65 | 66 | /// clear high n bits. 67 | fn clear_high_bits(&self, n: usize) -> Self; 68 | } 69 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | #![cfg_attr(not(doctest), doc = include_str!("../README.md"))] 2 | #![allow(clippy::needless_lifetimes)] 3 | #![doc(test(attr(warn(unused))))] 4 | 5 | mod config; 6 | pub mod error; 7 | mod index; 8 | mod input; 9 | mod pointer; 10 | pub mod reader; 11 | mod util; 12 | 13 | pub mod format; 14 | pub mod lazyvalue; 15 | pub mod parser; 16 | pub mod serde; 17 | pub mod value; 18 | pub mod writer; 19 | 20 | // re-export FastStr 21 | pub use ::faststr::FastStr; 22 | // re-export the serde trait 23 | pub use ::serde::{Deserialize, Serialize}; 24 | #[doc(inline)] 25 | pub use reader::Read; 26 | 27 | #[doc(inline)] 28 | pub use crate::error::{Error, Result}; 29 | #[doc(inline)] 30 | pub use crate::index::Index; 31 | #[doc(inline)] 32 | pub use crate::input::JsonInput; 33 | #[doc(inline)] 34 | pub use crate::lazyvalue::{ 35 | get, get_from_bytes, get_from_bytes_unchecked, get_from_faststr, get_from_faststr_unchecked, 36 | get_from_slice, get_from_slice_unchecked, get_from_str, get_from_str_unchecked, get_many, 37 | get_many_unchecked, get_unchecked, to_array_iter, to_array_iter_unchecked, to_object_iter, 38 | to_object_iter_unchecked, ArrayJsonIter, LazyArray, LazyObject, LazyValue, ObjectJsonIter, 39 | OwnedLazyValue, 40 | }; 41 | #[doc(inline)] 42 | pub use crate::pointer::{JsonPointer, PointerNode, PointerTree}; 43 | #[doc(inline)] 44 | pub use crate::serde::de::{MapAccess, SeqAccess}; 45 | #[doc(inline)] 46 | pub use crate::serde::{ 47 | from_reader, from_slice, from_slice_unchecked, from_str, to_lazyvalue, to_string, 48 | to_string_pretty, to_vec, to_vec_pretty, to_writer, to_writer_pretty, Deserializer, 49 | JsonNumberTrait, Number, RawNumber, Serializer, StreamDeserializer, 50 | }; 51 | #[doc(inline)] 52 | pub use crate::value::{ 53 | from_value, get::get_by_schema, to_value, Array, JsonContainerTrait, JsonType, 54 | JsonValueMutTrait, JsonValueTrait, Object, Value, ValueRef, 55 | }; 56 | 57 | pub mod prelude; 58 | -------------------------------------------------------------------------------- /src/value/tryfrom.rs: -------------------------------------------------------------------------------- 1 | use super::Value; 2 | use crate::{LazyValue, Number}; 3 | 4 | impl TryFrom for Value { 5 | type Error = crate::Error; 6 | 7 | /// Try convert a f32 to `Value`. If the float is NaN or infinity, return a error. 8 | /// 9 | /// # Examples 10 | /// 11 | /// ``` 12 | /// use sonic_rs::{JsonValueTrait, Value}; 13 | /// 14 | /// let f1: f32 = 2.333; 15 | /// let x1: Value = f1.try_into().unwrap(); 16 | /// assert_eq!(x1, f1); 17 | /// 18 | /// let x2: Value = f32::INFINITY.try_into().unwrap_or_default(); 19 | /// let x3: Value = f32::NAN.try_into().unwrap_or_default(); 20 | /// 21 | /// assert!(x2.is_null() && x3.is_null()); 22 | /// ``` 23 | #[inline] 24 | fn try_from(value: f32) -> Result { 25 | Number::try_from(value).map(Into::into) 26 | } 27 | } 28 | 29 | impl TryFrom for Value { 30 | /// Try convert a f64 to `Value`. If the float is NaN or infinity, return a error. 31 | /// 32 | /// # Examples 33 | /// 34 | /// ``` 35 | /// use sonic_rs::{JsonValueTrait, Value}; 36 | /// 37 | /// let f1: f64 = 2.333; 38 | /// let x1: Value = f1.try_into().unwrap(); 39 | /// assert_eq!(x1, 2.333); 40 | /// 41 | /// let x2: Value = f64::INFINITY.try_into().unwrap_or_default(); 42 | /// let x3: Value = f64::NAN.try_into().unwrap_or_default(); 43 | /// 44 | /// assert!(x2.is_null() && x3.is_null()); 45 | /// ``` 46 | type Error = crate::Error; 47 | #[inline] 48 | fn try_from(value: f64) -> Result { 49 | Number::try_from(value).map(Into::into) 50 | } 51 | } 52 | 53 | /// Try parse a `LazyValue` into a `Value`. `LazyValue` is always a valid JSON, at least it is 54 | /// followed the JSON syntax. 55 | /// 56 | /// However, in some cases, the parse will failed and return errors, such as the float number in 57 | /// JSON is inifity. 58 | impl<'de> TryFrom> for Value { 59 | type Error = crate::Error; 60 | fn try_from(value: LazyValue<'de>) -> Result { 61 | crate::from_str(value.as_raw_str()) 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /benchmarks/benches/schema/src/citm_catalog.rs: -------------------------------------------------------------------------------- 1 | use std::collections::BTreeMap as Map; 2 | 3 | use serde::{Deserialize, Serialize}; 4 | 5 | use crate::{empty, prim_str::PrimStr}; 6 | 7 | #[derive(Serialize, Deserialize)] 8 | #[serde(deny_unknown_fields, rename_all = "camelCase")] 9 | pub struct CitmCatalog { 10 | pub area_names: Map, 11 | pub audience_sub_category_names: Map, 12 | pub block_names: Map, 13 | pub events: Map, 14 | pub performances: Vec, 15 | pub seat_category_names: Map, 16 | pub sub_topic_names: Map, 17 | pub subject_names: Map, 18 | pub topic_names: Map, 19 | pub topic_sub_topics: Map>, 20 | pub venue_names: Map, 21 | } 22 | 23 | pub type Id = u32; 24 | pub type IdStr = PrimStr; 25 | 26 | #[derive(Serialize, Deserialize)] 27 | #[serde(deny_unknown_fields, rename_all = "camelCase")] 28 | pub struct Event { 29 | pub description: (), 30 | pub id: Id, 31 | pub logo: Option, 32 | pub name: String, 33 | pub sub_topic_ids: Vec, 34 | pub subject_code: (), 35 | pub subtitle: (), 36 | pub topic_ids: Vec, 37 | } 38 | 39 | #[derive(Serialize, Deserialize)] 40 | #[serde(deny_unknown_fields, rename_all = "camelCase")] 41 | pub struct Performance { 42 | pub event_id: Id, 43 | pub id: Id, 44 | pub logo: Option, 45 | pub name: (), 46 | pub prices: Vec, 47 | pub seat_categories: Vec, 48 | pub seat_map_image: (), 49 | pub start: u64, 50 | pub venue_code: String, 51 | } 52 | 53 | #[derive(Serialize, Deserialize)] 54 | #[serde(deny_unknown_fields, rename_all = "camelCase")] 55 | pub struct Price { 56 | pub amount: u32, 57 | pub audience_sub_category_id: Id, 58 | pub seat_category_id: Id, 59 | } 60 | 61 | #[derive(Serialize, Deserialize)] 62 | #[serde(deny_unknown_fields, rename_all = "camelCase")] 63 | pub struct SeatCategory { 64 | pub areas: Vec, 65 | pub seat_category_id: Id, 66 | } 67 | 68 | #[derive(Serialize, Deserialize)] 69 | #[serde(deny_unknown_fields, rename_all = "camelCase")] 70 | pub struct Area { 71 | pub area_id: Id, 72 | pub block_ids: empty::Array, 73 | } 74 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | authors = ["Volo Team "] 3 | categories = ["encoding", "parser-implementations"] 4 | description = "Sonic-rs is a fast Rust JSON library based on SIMD" 5 | documentation = "https://docs.rs/sonic-rs" 6 | edition = "2021" 7 | exclude = ["benchmarks", "assets", "bindings"] 8 | keywords = ["json", "simd", "serde", "serialization"] 9 | license = "Apache-2.0" 10 | name = "sonic-rs" 11 | readme = "README.md" 12 | repository = "https://github.com/cloudwego/sonic-rs" 13 | version = "0.5.6" 14 | 15 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 16 | 17 | [dependencies] 18 | ahash = "0.8" 19 | bumpalo = "3.19" 20 | bytes = "1.10" 21 | cfg-if = "1.0" 22 | faststr = { version = "0.2", features = ["serde"] } 23 | itoa = "1.0" 24 | ref-cast = "1.0" 25 | ryu = "1.0" 26 | serde = { version = "1.0", features = ["rc", "derive"] } 27 | simdutf8 = "0.1" 28 | sonic-number = { path = "./sonic-number", version = "0.1" } 29 | sonic-simd = { path = "./sonic-simd", version = "0.1" } 30 | thiserror = "2.0" 31 | 32 | [dev-dependencies] 33 | bytes = { version = "1.4", features = ["serde"] } 34 | chrono = { version = "0.4", features = ["serde"] } 35 | encoding_rs = "0.8" 36 | paste = "1.0" 37 | schema = { path = "./benchmarks/benches/schema" } 38 | serde_bytes = "0.11" 39 | serde_derive = "1.0" 40 | serde_json = { version = "1.0", features = ["float_roundtrip", "raw_value"] } 41 | 42 | [features] 43 | default = [] 44 | 45 | # Use an arbitrary precision number type representation when parsing JSON into `sonic_rs::Value`. 46 | # This allows the JSON numbers will be serialized without loss of precision. 47 | arbitrary_precision = [] 48 | 49 | # Sort the keys when serializing `sonic_rs::Value`. 50 | sort_keys = [] 51 | 52 | # Allow to parse JSON with invalid UTF-8 and UTF-16 characters. Will replace them with `\uFFFD` (displayed as �). 53 | utf8_lossy = [] 54 | 55 | # Enable sanitize, maybe cause 30% performance-loss in serialize. 56 | sanitize = [] 57 | 58 | # Serialize floating point numbers without trailing zeros if the float can be represented as an integer without loss of precision. 59 | # For example, `18.0` will be serialized as `18` instead of `18.0`. 60 | non_trailing_zero = [] 61 | 62 | # Enable avx512, requires Rust 1.89 or later, and also enable `avx512f` target feature 63 | avx512 = ["sonic-simd/avx512"] 64 | -------------------------------------------------------------------------------- /sonic-number/src/common.rs: -------------------------------------------------------------------------------- 1 | //! Common utilities, for internal use only. 2 | 3 | // The code is cloned from [rust-lang](https://github.com/rust-lang/rust) and modified necessary parts. 4 | 5 | /// Helper methods to process immutable bytes. 6 | pub(crate) trait ByteSlice { 7 | /// Read 8 bytes as a 64-bit integer in little-endian order. 8 | fn read_u64(&self) -> u64; 9 | 10 | /// Write a 64-bit integer as 8 bytes in little-endian order. 11 | fn write_u64(&mut self, value: u64); 12 | 13 | /// Iteratively parse and consume digits from bytes. 14 | /// Returns the same bytes with consumed digits being 15 | /// elided. 16 | fn parse_digits(&self, func: impl FnMut(u8)) -> &Self; 17 | } 18 | 19 | impl ByteSlice for [u8] { 20 | #[inline(always)] // inlining this is crucial to remove bound checks 21 | fn read_u64(&self) -> u64 { 22 | let mut tmp = [0; 8]; 23 | tmp.copy_from_slice(&self[..8]); 24 | u64::from_le_bytes(tmp) 25 | } 26 | 27 | #[inline(always)] // inlining this is crucial to remove bound checks 28 | fn write_u64(&mut self, value: u64) { 29 | self[..8].copy_from_slice(&value.to_le_bytes()) 30 | } 31 | 32 | #[inline] 33 | fn parse_digits(&self, mut func: impl FnMut(u8)) -> &Self { 34 | let mut s = self; 35 | 36 | // FIXME: Can't use s.split_first() here yet, 37 | // see https://github.com/rust-lang/rust/issues/109328 38 | while let [c, s_next @ ..] = s { 39 | let c = c.wrapping_sub(b'0'); 40 | if c < 10 { 41 | func(c); 42 | s = s_next; 43 | } else { 44 | break; 45 | } 46 | } 47 | 48 | s 49 | } 50 | } 51 | 52 | /// Determine if 8 bytes are all decimal digits. 53 | /// This does not care about the order in which the bytes were loaded. 54 | pub(crate) fn is_8digits(v: u64) -> bool { 55 | let a = v.wrapping_add(0x4646_4646_4646_4646); 56 | let b = v.wrapping_sub(0x3030_3030_3030_3030); 57 | (a | b) & 0x8080_8080_8080_8080 == 0 58 | } 59 | 60 | /// A custom 64-bit floating point type, representing `f * 2^e`. 61 | /// e is biased, so it be directly shifted into the exponent bits. 62 | #[derive(Debug, Copy, Clone, PartialEq, Eq, Default)] 63 | pub struct BiasedFp { 64 | /// The significant digits. 65 | pub f: u64, 66 | /// The biased, binary exponent. 67 | pub e: i32, 68 | } 69 | 70 | impl BiasedFp { 71 | #[inline] 72 | pub const fn zero_pow2(e: i32) -> Self { 73 | Self { f: 0, e } 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # How to Contribute 2 | 3 | ## Your First Pull Request 4 | We use github for our codebase. You can start by reading [How To Pull Request](https://docs.github.com/en/github/collaborating-with-issues-and-pull-requests/about-pull-requests). 5 | 6 | ## Branch Organization 7 | We use [git-flow](https://nvie.com/posts/a-successful-git-branching-model/) as our branch organization, as known as [FDD](https://en.wikipedia.org/wiki/Feature-driven_development) 8 | 9 | ## Bugs 10 | ### 1. How to Find Known Issues 11 | We are using [Github Issues](https://github.com/cloudwego/sonic-rs/issues) for our public bugs. We keep a close eye on this and try to make it clear when we have an internal fix in progress. Before filing a new task, try to make sure your problem doesn’t already exist. 12 | 13 | ### 2. Reporting New Issues 14 | Providing a reduced test code is a recommended way for reporting issues. Then can placed in: 15 | - Just in issues 16 | 17 | ### 3. Security Bugs 18 | Please do not report the safe disclosure of bugs to public issues. Contact us by [Support Email](mailto:conduct@cloudwego.io) 19 | 20 | ## How to Get in Touch 21 | - [Email](mailto:conduct@cloudwego.io) 22 | 23 | ## Submit a Pull Request 24 | Before you submit your Pull Request (PR) consider the following guidelines: 25 | 1. Search [GitHub](https://github.com/cloudwego/sonic-rs/pulls) for an open or closed PR that relates to your submission. You don't want to duplicate existing efforts. 26 | 2. Be sure that an issue describes the problem you're fixing, or documents the design for the feature you'd like to add. Discussing the design upfront helps to ensure that we're ready to accept your work. 27 | 3. [Fork](https://docs.github.com/en/github/getting-started-with-github/fork-a-repo) the cloudwego/sonic-rs repo. 28 | 4. In your forked repository, make your changes in a new git branch: 29 | ``` 30 | git checkout -b my-fix-branch develop 31 | ``` 32 | 5. Create your patch, including appropriate test cases. 33 | 6. Follow our [Style Guides](#code-style-guides). 34 | 7. Commit your changes using a descriptive commit message that follows [AngularJS Git Commit Message Conventions](https://docs.google.com/document/d/1QrDFcIiPjSLDn3EL15IJygNPiHORgU1_OOAqWjiDU5Y/edit). 35 | Adherence to these conventions is necessary because release notes are automatically generated from these messages. 36 | 8. Push your branch to GitHub: 37 | ``` 38 | git push origin my-fix-branch 39 | ``` 40 | 9. In GitHub, send a pull request to `sonic-rs:develop` 41 | 42 | ## Contribution Prerequisites 43 | - You are familiar with [Github](https://github.com) 44 | - Maybe you need familiar with [Actions](https://github.com/features/actions)(our default workflow tool). 45 | 46 | ## Code Style Guides 47 | We use `rustfmt` tool. 48 | -------------------------------------------------------------------------------- /src/util/reborrow.rs: -------------------------------------------------------------------------------- 1 | // Copied from Rust-lang BTreeMap implementation 2 | 3 | use core::{marker::PhantomData, ptr::NonNull}; 4 | 5 | /// Models a reborrow of some unique reference, when you know that the reborrow 6 | /// and all its descendants (i.e., all pointers and references derived from it) 7 | /// will not be used any more at some point, after which you want to use the 8 | /// original unique reference again. 9 | /// 10 | /// The borrow checker usually handles this stacking of borrows for you, but 11 | /// some control flows that accomplish this stacking are too complicated for 12 | /// the compiler to follow. A `DormantMutRef` allows you to check borrowing 13 | /// yourself, while still expressing its stacked nature, and encapsulating 14 | /// the raw pointer code needed to do this without undefined behavior. 15 | pub struct DormantMutRef<'a, T> { 16 | ptr: NonNull, 17 | _marker: PhantomData<&'a mut T>, 18 | } 19 | 20 | unsafe impl<'a, T> Sync for DormantMutRef<'a, T> where &'a mut T: Sync {} 21 | unsafe impl<'a, T> Send for DormantMutRef<'a, T> where &'a mut T: Send {} 22 | 23 | impl<'a, T> DormantMutRef<'a, T> { 24 | /// Capture a unique borrow, and immediately reborrow it. For the compiler, 25 | /// the lifetime of the new reference is the same as the lifetime of the 26 | /// original reference, but you promise to use it for a shorter period. 27 | pub fn new(t: &'a mut T) -> (&'a mut T, Self) { 28 | let ptr = NonNull::from(t); 29 | // SAFETY: we hold the borrow throughout 'a via `_marker`, and we expose 30 | // only this reference, so it is unique. 31 | let new_ref = unsafe { &mut *ptr.as_ptr() }; 32 | ( 33 | new_ref, 34 | Self { 35 | ptr, 36 | _marker: PhantomData, 37 | }, 38 | ) 39 | } 40 | 41 | /// Revert to the unique borrow initially captured. 42 | /// 43 | /// # Safety 44 | /// 45 | /// The reborrow must have ended, i.e., the reference returned by `new` and 46 | /// all pointers and references derived from it, must not be used anymore. 47 | pub unsafe fn awaken(self) -> &'a mut T { 48 | // SAFETY: our own safety conditions imply this reference is again unique. 49 | unsafe { &mut *self.ptr.as_ptr() } 50 | } 51 | 52 | /// Borrows a new mutable reference from the unique borrow initially captured. 53 | /// 54 | /// # Safety 55 | /// 56 | /// The reborrow must have ended, i.e., the reference returned by `new` and 57 | /// all pointers and references derived from it, must not be used anymore. 58 | pub unsafe fn reborrow(&mut self) -> &'a mut T { 59 | // SAFETY: our own safety conditions imply this reference is again unique. 60 | unsafe { &mut *self.ptr.as_ptr() } 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /benchmarks/benches/schema/src/color.rs: -------------------------------------------------------------------------------- 1 | use std::{fmt, mem::MaybeUninit, ptr, slice, str}; 2 | 3 | use serde::{ 4 | de::{self, Deserialize, Deserializer, Unexpected}, 5 | ser::{Serialize, Serializer}, 6 | }; 7 | 8 | #[derive(Clone, Copy)] 9 | pub struct Color(u32); 10 | 11 | const HEX_LUT: &[u8] = b"\ 12 | 000102030405060708090A0B0C0D0E0F101112131415161718191A1B1C1D1E1F\ 13 | 202122232425262728292A2B2C2D2E2F303132333435363738393A3B3C3D3E3F\ 14 | 404142434445464748494A4B4C4D4E4F505152535455565758595A5B5C5D5E5F\ 15 | 606162636465666768696A6B6C6D6E6F707172737475767778797A7B7C7D7E7F\ 16 | 808182838485868788898A8B8C8D8E8F909192939495969798999A9B9C9D9E9F\ 17 | A0A1A2A3A4A5A6A7A8A9AAABACADAEAFB0B1B2B3B4B5B6B7B8B9BABBBCBDBEBF\ 18 | C0C1C2C3C4C5C6C7C8C9CACBCCCDCECFD0D1D2D3D4D5D6D7D8D9DADBDCDDDEDF\ 19 | E0E1E2E3E4E5E6E7E8E9EAEBECEDEEEFF0F1F2F3F4F5F6F7F8F9FAFBFCFDFEFF"; 20 | 21 | impl Color { 22 | fn as_str(self, buf: &mut MaybeUninit<[u8; 6]>) -> &str { 23 | let buf_len = 6; 24 | let buf_ptr = buf.as_mut_ptr() as *mut u8; 25 | let lut_ptr = HEX_LUT.as_ptr(); 26 | 27 | let r = ((self.0 & 0xFF0000) >> 15) as isize; 28 | let g = ((self.0 & 0x00FF00) >> 7) as isize; 29 | let b = ((self.0 & 0x0000FF) << 1) as isize; 30 | 31 | unsafe { 32 | ptr::copy_nonoverlapping(lut_ptr.offset(r), buf_ptr, 2); 33 | ptr::copy_nonoverlapping(lut_ptr.offset(g), buf_ptr.offset(2), 2); 34 | ptr::copy_nonoverlapping(lut_ptr.offset(b), buf_ptr.offset(4), 2); 35 | 36 | str::from_utf8(slice::from_raw_parts(buf_ptr, buf_len)).unwrap() 37 | } 38 | } 39 | } 40 | 41 | impl Serialize for Color { 42 | fn serialize(&self, serializer: S) -> Result 43 | where 44 | S: Serializer, 45 | { 46 | let mut buf = MaybeUninit::uninit(); 47 | serializer.serialize_str(self.as_str(&mut buf)) 48 | } 49 | } 50 | 51 | impl<'de> Deserialize<'de> for Color { 52 | fn deserialize(deserializer: D) -> Result 53 | where 54 | D: Deserializer<'de>, 55 | { 56 | struct Visitor; 57 | 58 | impl<'de> de::Visitor<'de> for Visitor { 59 | type Value = Color; 60 | 61 | fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { 62 | formatter.write_str("color string") 63 | } 64 | 65 | fn visit_str(self, value: &str) -> Result 66 | where 67 | E: de::Error, 68 | { 69 | match u32::from_str_radix(value, 16) { 70 | Ok(hex) => Ok(Color(hex)), 71 | Err(_) => Err(E::invalid_value(Unexpected::Str(value), &self)), 72 | } 73 | } 74 | } 75 | 76 | deserializer.deserialize_str(Visitor) 77 | } 78 | } 79 | 80 | #[test] 81 | fn test_color() { 82 | let mut buf = MaybeUninit::uninit(); 83 | let string = Color(0xA0A0A0).as_str(&mut buf); 84 | assert_eq!(string, "A0A0A0"); 85 | } 86 | -------------------------------------------------------------------------------- /sonic-simd/src/bits.rs: -------------------------------------------------------------------------------- 1 | use super::traits::BitMask; 2 | 3 | macro_rules! impl_bits { 4 | () => {}; 5 | ($($ty:ty)*) => { 6 | $( 7 | impl BitMask for $ty { 8 | const LEN: usize = std::mem::size_of::<$ty>() * 8; 9 | 10 | #[inline] 11 | fn before(&self, rhs: &Self) -> bool { 12 | (self.as_little_endian() & rhs.as_little_endian().wrapping_sub(1)) != 0 13 | } 14 | 15 | #[inline] 16 | fn first_offset(&self) -> usize { 17 | self.as_little_endian().trailing_zeros() as usize 18 | } 19 | 20 | #[inline] 21 | fn as_little_endian(&self) -> Self { 22 | #[cfg(target_endian = "little")] 23 | { 24 | self.clone() 25 | } 26 | #[cfg(target_endian = "big")] 27 | { 28 | self.swap_bytes() 29 | } 30 | } 31 | 32 | #[inline] 33 | fn all_zero(&self) -> bool { 34 | *self == 0 35 | } 36 | 37 | #[inline] 38 | fn clear_high_bits(&self, n: usize) -> Self { 39 | debug_assert!(n <= Self::LEN); 40 | *self & ((u64::MAX as $ty) >> n) 41 | } 42 | } 43 | )* 44 | }; 45 | } 46 | 47 | impl_bits!(u16 u32 u64); 48 | 49 | /// Use u64 representation the bitmask of Neon vector. 50 | /// (low) 51 | /// Vector: 00-ff-ff-ff-ff-00-00-00 52 | /// Mask : 0000-1111-1111-1111-1111-0000-0000-0000 53 | /// 54 | /// first_offset() = 1 55 | /// clear_high_bits(4) = Mask(0000-1111-1111-1111-[0000]-0000-0000-0000) 56 | /// 57 | /// reference: https://community.arm.com/arm-community-blogs/b/infrastructure-solutions-blog/posts/porting-x86-vector-bitmask-optimizations-to-arm-neon 58 | pub struct NeonBits(u64); 59 | 60 | impl NeonBits { 61 | #[inline] 62 | pub fn new(u: u64) -> Self { 63 | Self(u) 64 | } 65 | } 66 | 67 | impl BitMask for NeonBits { 68 | const LEN: usize = 16; 69 | 70 | #[inline] 71 | fn first_offset(&self) -> usize { 72 | (self.as_little_endian().0.trailing_zeros() as usize) >> 2 73 | } 74 | 75 | #[inline] 76 | fn before(&self, rhs: &Self) -> bool { 77 | (self.as_little_endian().0 & rhs.as_little_endian().0.wrapping_sub(1)) != 0 78 | } 79 | 80 | #[inline] 81 | fn as_little_endian(&self) -> Self { 82 | #[cfg(target_endian = "little")] 83 | { 84 | Self::new(self.0) 85 | } 86 | #[cfg(target_endian = "big")] 87 | { 88 | Self::new(self.0.swap_bytes()) 89 | } 90 | } 91 | 92 | #[inline] 93 | fn all_zero(&self) -> bool { 94 | self.0 == 0 95 | } 96 | 97 | #[inline] 98 | fn clear_high_bits(&self, n: usize) -> Self { 99 | debug_assert!(n <= Self::LEN); 100 | Self(self.0 & u64::MAX >> (n * 4)) 101 | } 102 | } 103 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: 'CI' 2 | on: pull_request 3 | 4 | env: 5 | RUST_BACKTRACE: 1 6 | CARGO_TERM_COLOR: always 7 | 8 | jobs: 9 | test-stable-hosted: 10 | strategy: 11 | fail-fast: false 12 | matrix: 13 | os: 14 | - [self-hosted, Linux, amd64] 15 | - [self-hosted, Linux, aarch64] 16 | name: Rust stable 17 | runs-on: ${{matrix.os}} 18 | timeout-minutes: 45 19 | steps: 20 | - uses: actions/checkout@v4 21 | - uses: dtolnay/rust-toolchain@stable 22 | - run: ./scripts/test.sh 23 | 24 | test-stable-wasm: 25 | runs-on: [self-hosted, Linux, amd64] 26 | env: 27 | WASMTIME_BACKTRACE_DETAILS: 1 28 | steps: 29 | - uses: actions/checkout@v4 30 | 31 | - uses: dtolnay/rust-toolchain@stable 32 | with: 33 | targets: wasm32-wasip1 34 | 35 | - name: Install wasmtime 36 | run: cargo install wasmtime-cli 37 | 38 | - name: Run tests 39 | run: | 40 | cargo build --tests --target-dir target --target wasm32-wasip1 41 | wasmtime run target/wasm32-wasip1/debug/deps/sonic_rs*.wasm 42 | 43 | 44 | test-nightly-hosted: 45 | strategy: 46 | fail-fast: false 47 | matrix: 48 | os: 49 | - [self-hosted, Linux, amd64] 50 | - [self-hosted, Linux, aarch64] 51 | name: Rust nightly 52 | runs-on: ${{matrix.os}} 53 | timeout-minutes: 45 54 | steps: 55 | - uses: actions/checkout@v4 56 | - uses: dtolnay/rust-toolchain@nightly 57 | - run: ./scripts/test.sh 58 | 59 | clippy_lint: 60 | name: Format check 61 | runs-on: [self-hosted, Linux, amd64] 62 | timeout-minutes: 45 63 | steps: 64 | - uses: actions/checkout@v4 65 | - uses: dtolnay/rust-toolchain@nightly 66 | with: 67 | components: rustfmt, clippy 68 | - name: Format check 69 | run: | 70 | cargo clippy --all-targets --all-features -- -D warnings 71 | cargo fmt -- --check 72 | 73 | sanitize: 74 | strategy: 75 | fail-fast: false 76 | matrix: 77 | san: ["address,leak"] 78 | feature: ["", "arbitrary_precision", "sort_keys", "utf8_lossy", "non_trailing_zero"] 79 | name: Sanitize ${{matrix.san}} feature ${{matrix.feature}} 80 | runs-on: [self-hosted, Linux, amd64] 81 | steps: 82 | - uses: actions/checkout@v4 83 | - uses: dtolnay/rust-toolchain@nightly 84 | with: 85 | components: rustfmt, clippy 86 | - uses: actions-rs/clippy-check@v1 87 | with: 88 | token: ${{ secrets.GITHUB_TOKEN }} 89 | - name: Sanitize 90 | run: ./scripts/sanitize.sh ${{matrix.san}} ${{matrix.feature}} 91 | 92 | fuzz: 93 | runs-on: [self-hosted, Linux, amd64] 94 | steps: 95 | - uses: actions/checkout@v4 96 | - uses: dtolnay/rust-toolchain@nightly 97 | with: 98 | components: rustfmt, clippy 99 | - uses: actions-rs/clippy-check@v1 100 | with: 101 | token: ${{ secrets.GITHUB_TOKEN }} 102 | - name: Fuzz 103 | run: ./scripts/fuzz.sh 104 | -------------------------------------------------------------------------------- /src/lazyvalue/de.rs: -------------------------------------------------------------------------------- 1 | use std::{marker::PhantomData, mem::MaybeUninit}; 2 | 3 | use ::serde::{de, de::Visitor, Deserialize, Deserializer}; 4 | use faststr::FastStr; 5 | 6 | use super::{owned::OwnedLazyValue, value::LazyValue}; 7 | use crate::lazyvalue::value::HasEsc; 8 | 9 | impl<'de: 'a, 'a> Deserialize<'de> for LazyValue<'a> { 10 | fn deserialize(deserializer: D) -> Result 11 | where 12 | D: Deserializer<'de>, 13 | { 14 | struct LazyValueVisitor<'a> { 15 | _marker: PhantomData<&'a ()>, 16 | } 17 | 18 | impl<'de: 'a, 'a> Visitor<'de> for LazyValueVisitor<'a> { 19 | type Value = LazyValue<'a>; 20 | 21 | fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result { 22 | write!(formatter, "any valid JSON value") 23 | } 24 | 25 | // NOTE: only used for visit the str that has escaped chars 26 | fn visit_str(self, v: &str) -> Result 27 | where 28 | E: de::Error, 29 | { 30 | Ok(LazyValue::new(FastStr::new(v).into(), HasEsc::Yes)) 31 | } 32 | 33 | fn visit_borrowed_str(self, v: &'de str) -> Result 34 | where 35 | E: de::Error, 36 | { 37 | Ok(LazyValue::new(v.as_bytes().into(), HasEsc::None)) 38 | } 39 | } 40 | 41 | let visit = LazyValueVisitor { 42 | _marker: PhantomData, 43 | }; 44 | deserializer.deserialize_newtype_struct(super::TOKEN, visit) 45 | } 46 | } 47 | 48 | impl<'de> Deserialize<'de> for OwnedLazyValue { 49 | fn deserialize(deserializer: D) -> Result 50 | where 51 | D: Deserializer<'de>, 52 | { 53 | struct OwnedVisitor; 54 | 55 | let visit = OwnedVisitor; 56 | 57 | impl<'de> Visitor<'de> for OwnedVisitor { 58 | type Value = OwnedLazyValue; 59 | 60 | fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 61 | formatter.write_str("expect a valid json") 62 | } 63 | 64 | fn visit_bytes(self, value_binary: &[u8]) -> std::result::Result 65 | where 66 | E: de::Error, 67 | { 68 | // we pass the value from value_binary 69 | unsafe { 70 | assert!( 71 | value_binary.len() == std::mem::size_of::(), 72 | "invalid value size {}", 73 | value_binary.len() 74 | ); 75 | let mut dom: MaybeUninit = MaybeUninit::zeroed(); 76 | std::ptr::copy_nonoverlapping( 77 | value_binary.as_ptr() as *const Self::Value, 78 | dom.as_mut_ptr(), 79 | 1, 80 | ); 81 | Ok(dom.assume_init()) 82 | } 83 | } 84 | } 85 | 86 | deserializer.deserialize_newtype_struct(super::OWNED_LAZY_VALUE_TOKEN, visit) 87 | } 88 | } 89 | -------------------------------------------------------------------------------- /src/util/arch/aarch64.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2018-2019 The simdjson authors 2 | 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | // This file may have been modified by ByteDance authors. All ByteDance 16 | // Modifications are Copyright 2022 ByteDance Authors. 17 | 18 | use std::arch::aarch64::*; 19 | 20 | // Not use PMULL instructions, but it is apparently slow. 21 | // This is copied from simdjson. 22 | pub unsafe fn prefix_xor(bitmask: u64) -> u64 { 23 | let mut bitmask = bitmask; 24 | bitmask ^= bitmask << 1; 25 | bitmask ^= bitmask << 2; 26 | bitmask ^= bitmask << 4; 27 | bitmask ^= bitmask << 8; 28 | bitmask ^= bitmask << 16; 29 | bitmask ^= bitmask << 32; 30 | bitmask 31 | } 32 | 33 | // We compute whitespace and op separately. If the code later only use one or the 34 | // other, given the fact that all functions are aggressively inlined, we can 35 | // hope that useless computations will be omitted. This is namely case when 36 | // minifying (we only need whitespace). *However* if we only need spaces, 37 | // it is likely that we will still compute 'v' above with two lookup_16: one 38 | // could do it a bit cheaper. This is in contrast with the x64 implementations 39 | // where we can, efficiently, do the white space and structural matching 40 | // separately. One reason for this difference is that on ARM NEON, the table 41 | // lookups either zero or leave unchanged the characters exceeding 0xF whereas 42 | // on x64, the equivalent instruction (pshufb) automatically applies a mask, 43 | // ignoring the 4 most significant bits. Thus the x64 implementation is 44 | // optimized differently. This being said, if you use this code strictly 45 | // just for minification (or just to identify the structural characters), 46 | // there is a small untaken optimization opportunity here. We deliberately 47 | // do not pick it up. 48 | #[inline(always)] 49 | pub unsafe fn get_nonspace_bits(data: &[u8; 64]) -> u64 { 50 | // return super::fallback::get_nonspace_bits(data); 51 | #[inline(always)] 52 | unsafe fn chunk_nonspace_bits(input: uint8x16_t) -> uint8x16_t { 53 | const LOW_TAB: uint8x16_t = 54 | unsafe { std::mem::transmute([16u8, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0]) }; 55 | 56 | const HIGH_TAB: uint8x16_t = 57 | unsafe { std::mem::transmute([8u8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0]) }; 58 | 59 | let white_mask = vmovq_n_u8(0x18); 60 | let lo4 = vandq_u8(input, vmovq_n_u8(0xf)); 61 | let hi4 = vshrq_n_u8(input, 4); 62 | 63 | let lo4_sf = vqtbl1q_u8(LOW_TAB, lo4); 64 | let hi4_sf = vqtbl1q_u8(HIGH_TAB, hi4); 65 | 66 | let v = vandq_u8(lo4_sf, hi4_sf); 67 | 68 | vtstq_u8(v, white_mask) 69 | } 70 | 71 | !sonic_simd::neon::to_bitmask64( 72 | chunk_nonspace_bits(vld1q_u8(data.as_ptr())), 73 | chunk_nonspace_bits(vld1q_u8(data.as_ptr().offset(16))), 74 | chunk_nonspace_bits(vld1q_u8(data.as_ptr().offset(32))), 75 | chunk_nonspace_bits(vld1q_u8(data.as_ptr().offset(48))), 76 | ) 77 | } 78 | -------------------------------------------------------------------------------- /sonic-simd/src/avx512.rs: -------------------------------------------------------------------------------- 1 | use std::{ 2 | arch::x86_64::*, 3 | ops::{BitAnd, BitOr, BitOrAssign}, 4 | }; 5 | 6 | use super::{Mask, Simd}; 7 | 8 | #[derive(Debug)] 9 | #[repr(transparent)] 10 | pub struct Simd512u(__m512i); 11 | 12 | #[derive(Debug)] 13 | #[repr(transparent)] 14 | pub struct Simd512i(__m512i); 15 | 16 | #[derive(Debug, Clone, Copy)] 17 | #[repr(transparent)] 18 | pub struct Mask512(__mmask64); 19 | 20 | impl Mask for Mask512 { 21 | type BitMask = u64; 22 | type Element = u8; 23 | 24 | #[inline(always)] 25 | fn bitmask(self) -> Self::BitMask { 26 | self.0 27 | } 28 | 29 | #[inline(always)] 30 | fn splat(b: bool) -> Self { 31 | if b { 32 | Mask512(u64::MAX) 33 | } else { 34 | Mask512(0) 35 | } 36 | } 37 | } 38 | 39 | impl BitOr for Mask512 { 40 | type Output = Self; 41 | 42 | #[inline(always)] 43 | fn bitor(self, rhs: Self) -> Self::Output { 44 | Mask512(self.0 | rhs.0) 45 | } 46 | } 47 | 48 | impl BitOrAssign for Mask512 { 49 | #[inline(always)] 50 | fn bitor_assign(&mut self, rhs: Self) { 51 | self.0 |= rhs.0; 52 | } 53 | } 54 | 55 | impl BitAnd for Mask512 { 56 | type Output = Self; 57 | 58 | #[inline(always)] 59 | fn bitand(self, rhs: Mask512) -> Self::Output { 60 | Mask512(self.0 & rhs.0) 61 | } 62 | } 63 | 64 | impl Simd for Simd512u { 65 | const LANES: usize = 64; 66 | type Element = u8; 67 | type Mask = Mask512; 68 | 69 | #[inline(always)] 70 | unsafe fn loadu(ptr: *const u8) -> Self { 71 | unsafe { Simd512u(_mm512_loadu_si512(ptr as *const __m512i)) } 72 | } 73 | 74 | #[inline(always)] 75 | unsafe fn storeu(&self, ptr: *mut u8) { 76 | unsafe { _mm512_storeu_si512(ptr as *mut __m512i, self.0) } 77 | } 78 | 79 | #[inline(always)] 80 | fn eq(&self, rhs: &Self) -> Self::Mask { 81 | unsafe { Mask512(_mm512_cmpeq_epi8_mask(self.0, rhs.0)) } 82 | } 83 | 84 | #[inline(always)] 85 | fn splat(ch: u8) -> Self { 86 | unsafe { Simd512u(_mm512_set1_epi8(ch as i8)) } 87 | } 88 | 89 | #[inline(always)] 90 | fn le(&self, rhs: &Self) -> Self::Mask { 91 | unsafe { Mask512(_mm512_cmple_epu8_mask(self.0, rhs.0)) } 92 | } 93 | 94 | #[inline(always)] 95 | fn gt(&self, rhs: &Self) -> Self::Mask { 96 | unsafe { Mask512(_mm512_cmpgt_epu8_mask(self.0, rhs.0)) } 97 | } 98 | } 99 | 100 | impl Simd for Simd512i { 101 | const LANES: usize = 64; 102 | type Element = i8; 103 | type Mask = Mask512; 104 | 105 | #[inline(always)] 106 | unsafe fn loadu(ptr: *const u8) -> Self { 107 | unsafe { Simd512i(_mm512_loadu_si512(ptr as *const __m512i)) } 108 | } 109 | 110 | #[inline(always)] 111 | unsafe fn storeu(&self, ptr: *mut u8) { 112 | unsafe { _mm512_storeu_si512(ptr as *mut __m512i, self.0) } 113 | } 114 | 115 | #[inline(always)] 116 | fn eq(&self, rhs: &Self) -> Self::Mask { 117 | unsafe { Mask512(_mm512_cmpeq_epi8_mask(self.0, rhs.0)) } 118 | } 119 | 120 | #[inline(always)] 121 | fn splat(elem: i8) -> Self { 122 | unsafe { Simd512i(_mm512_set1_epi8(elem)) } 123 | } 124 | 125 | #[inline(always)] 126 | fn le(&self, rhs: &Self) -> Self::Mask { 127 | unsafe { Mask512(_mm512_cmple_epi8_mask(self.0, rhs.0)) } 128 | } 129 | 130 | #[inline(always)] 131 | fn gt(&self, rhs: &Self) -> Self::Mask { 132 | unsafe { Mask512(_mm512_cmpgt_epi8_mask(self.0, rhs.0)) } 133 | } 134 | } 135 | -------------------------------------------------------------------------------- /sonic-simd/src/sse2.rs: -------------------------------------------------------------------------------- 1 | use std::{ 2 | arch::x86_64::*, 3 | ops::{BitAnd, BitOr, BitOrAssign}, 4 | }; 5 | 6 | use super::{Mask, Simd}; 7 | 8 | #[derive(Debug)] 9 | #[repr(transparent)] 10 | pub struct Simd128i(__m128i); 11 | 12 | #[derive(Debug)] 13 | #[repr(transparent)] 14 | pub struct Simd128u(__m128i); 15 | 16 | impl Simd for Simd128i { 17 | const LANES: usize = 16; 18 | type Mask = Mask128; 19 | type Element = i8; 20 | 21 | #[inline(always)] 22 | unsafe fn loadu(ptr: *const u8) -> Self { 23 | Self(_mm_loadu_si128(ptr as *const __m128i)) 24 | } 25 | 26 | #[inline(always)] 27 | unsafe fn storeu(&self, ptr: *mut u8) { 28 | _mm_storeu_si128(ptr as *mut __m128i, self.0) 29 | } 30 | 31 | #[inline(always)] 32 | fn eq(&self, rhs: &Self) -> Self::Mask { 33 | let eq = unsafe { _mm_cmpeq_epi8(self.0, rhs.0) }; 34 | Mask128(eq) 35 | } 36 | 37 | #[inline(always)] 38 | fn splat(elem: i8) -> Self { 39 | unsafe { Self(_mm_set1_epi8(elem)) } 40 | } 41 | 42 | #[inline(always)] 43 | fn le(&self, rhs: &Self) -> Self::Mask { 44 | // self <= rhs equal as rhs >= self 45 | rhs.gt(self) | rhs.eq(self) 46 | } 47 | 48 | #[inline(always)] 49 | fn gt(&self, rhs: &Self) -> Self::Mask { 50 | unsafe { Mask128(_mm_cmpgt_epi8(self.0, rhs.0)) } 51 | } 52 | } 53 | 54 | #[derive(Debug)] 55 | #[repr(transparent)] 56 | pub struct Mask128(__m128i); 57 | 58 | impl Mask for Mask128 { 59 | type BitMask = u16; 60 | type Element = u8; 61 | 62 | #[inline(always)] 63 | fn bitmask(self) -> Self::BitMask { 64 | unsafe { _mm_movemask_epi8(self.0) as u16 } 65 | } 66 | 67 | #[inline(always)] 68 | fn splat(b: bool) -> Self { 69 | let v: i8 = if b { -1 } else { 0 }; 70 | unsafe { Mask128(_mm_set1_epi8(v)) } 71 | } 72 | } 73 | 74 | impl BitAnd for Mask128 { 75 | type Output = Self; 76 | 77 | #[inline(always)] 78 | fn bitand(self, rhs: Mask128) -> Self::Output { 79 | unsafe { Mask128(_mm_and_si128(self.0, rhs.0)) } 80 | } 81 | } 82 | 83 | impl BitOr for Mask128 { 84 | type Output = Self; 85 | 86 | #[inline(always)] 87 | fn bitor(self, rhs: Mask128) -> Self::Output { 88 | unsafe { Mask128(_mm_or_si128(self.0, rhs.0)) } 89 | } 90 | } 91 | 92 | impl BitOrAssign for Mask128 { 93 | #[inline(always)] 94 | fn bitor_assign(&mut self, rhs: Mask128) { 95 | self.0 = unsafe { _mm_or_si128(self.0, rhs.0) }; 96 | } 97 | } 98 | 99 | impl Simd for Simd128u { 100 | const LANES: usize = 16; 101 | type Mask = Mask128; 102 | type Element = u8; 103 | 104 | #[inline(always)] 105 | unsafe fn loadu(ptr: *const u8) -> Self { 106 | Simd128u(_mm_loadu_si128(ptr as *const __m128i)) 107 | } 108 | 109 | #[inline(always)] 110 | unsafe fn storeu(&self, ptr: *mut u8) { 111 | _mm_storeu_si128(ptr as *mut __m128i, self.0) 112 | } 113 | 114 | #[inline(always)] 115 | fn eq(&self, rhs: &Self) -> Self::Mask { 116 | let eq = unsafe { _mm_cmpeq_epi8(self.0, rhs.0) }; 117 | Mask128(eq) 118 | } 119 | 120 | #[inline(always)] 121 | fn splat(ch: u8) -> Self { 122 | Simd128u(unsafe { _mm_set1_epi8(ch as i8) }) 123 | } 124 | 125 | #[inline(always)] 126 | fn le(&self, rhs: &Self) -> Self::Mask { 127 | unsafe { 128 | let max = _mm_max_epu8(self.0, rhs.0); 129 | let eq = _mm_cmpeq_epi8(max, rhs.0); 130 | Mask128(eq) 131 | } 132 | } 133 | 134 | #[inline(always)] 135 | fn gt(&self, _rhs: &Self) -> Self::Mask { 136 | todo!() 137 | } 138 | } 139 | -------------------------------------------------------------------------------- /sonic-number/src/arch/x86_64.rs: -------------------------------------------------------------------------------- 1 | use std::arch::x86_64::*; 2 | 3 | macro_rules! packadd_1 { 4 | ($v:ident) => { 5 | let delta = _mm_set1_epi64x(0x010A010A010A010A); 6 | $v = _mm_maddubs_epi16($v, delta); 7 | }; 8 | } 9 | 10 | macro_rules! packadd_2 { 11 | ($v:ident) => { 12 | let delta = _mm_set1_epi64x(0x0001006400010064); 13 | $v = _mm_madd_epi16($v, delta); 14 | }; 15 | } 16 | 17 | macro_rules! packadd_4 { 18 | ($v:ident) => { 19 | $v = _mm_packus_epi32($v, $v); 20 | let delta = _mm_set_epi16(0, 0, 0, 0, 1, 10000, 1, 10000); 21 | $v = _mm_madd_epi16($v, delta); 22 | }; 23 | } 24 | 25 | // simd add for 5 ~ 8 digits 26 | macro_rules! simd_add_5_8 { 27 | ($v:ident, $and:literal) => {{ 28 | $v = _mm_slli_si128($v, 16 - $and); 29 | packadd_1!($v); 30 | packadd_2!($v); 31 | (_mm_extract_epi32($v, 2) as u64) * 10000 + (_mm_extract_epi32($v, 3) as u64) 32 | }}; 33 | } 34 | 35 | // simd add for 9 ~ 15 digits 36 | macro_rules! simd_add_9_15 { 37 | ($v:ident, $and:literal) => {{ 38 | $v = _mm_slli_si128($v, 16 - $and); 39 | packadd_1!($v); 40 | packadd_2!($v); 41 | packadd_4!($v); 42 | (_mm_extract_epi32($v, 0) as u64) * 100000000 + (_mm_extract_epi32($v, 1) as u64) 43 | }}; 44 | } 45 | 46 | macro_rules! simd_add_16 { 47 | ($v:ident) => {{ 48 | packadd_1!($v); 49 | packadd_2!($v); 50 | packadd_4!($v); 51 | (_mm_extract_epi32($v, 0) as u64) * 100000000 + (_mm_extract_epi32($v, 1) as u64) 52 | }}; 53 | } 54 | #[inline(always)] 55 | pub unsafe fn simd_str2int(c: &[u8], need: usize) -> (u64, usize) { 56 | debug_assert!(need <= 16); 57 | let data = _mm_loadu_si128(c.as_ptr() as *const __m128i); 58 | let zero = _mm_setzero_si128(); 59 | let nine = _mm_set1_epi8(9); 60 | let zero_c = _mm_set1_epi8(b'0' as i8); 61 | 62 | let mut data = _mm_sub_epi8(data, zero_c); 63 | let lt_zero = _mm_cmpgt_epi8(zero, data); 64 | let gt_nine = _mm_cmpgt_epi8(data, nine); 65 | 66 | let is_num_end = _mm_or_si128(lt_zero, gt_nine); 67 | let is_num_end_int = _mm_movemask_epi8(is_num_end); 68 | 69 | // get the real parsed count 70 | let mut count = need; 71 | if is_num_end_int != 0 { 72 | let digits = is_num_end_int.trailing_zeros() as usize; 73 | if digits < need { 74 | count = digits; 75 | } 76 | } 77 | 78 | let sum = match count { 79 | 1 => _mm_extract_epi8(data, 0) as u64, 80 | 2 => (_mm_extract_epi8(data, 0) * 10 + _mm_extract_epi8(data, 1)) as u64, 81 | 3 => { 82 | // shift to clear the non-digit ascii in vector 83 | data = _mm_slli_si128(data, 16 - 3); 84 | packadd_1!(data); 85 | // add the highest two lanes 86 | (_mm_extract_epi16(data, 6) * 100 + _mm_extract_epi16(data, 7)) as u64 87 | } 88 | 4 => { 89 | data = _mm_slli_si128(data, 16 - 4); 90 | packadd_1!(data); 91 | (_mm_extract_epi16(data, 6) * 100 + _mm_extract_epi16(data, 7)) as u64 92 | } 93 | 5 => simd_add_5_8!(data, 5), 94 | 6 => simd_add_5_8!(data, 6), 95 | 7 => simd_add_5_8!(data, 7), 96 | 8 => simd_add_5_8!(data, 8), 97 | 9 => simd_add_9_15!(data, 9), 98 | 10 => simd_add_9_15!(data, 10), 99 | 11 => simd_add_9_15!(data, 11), 100 | 12 => simd_add_9_15!(data, 12), 101 | 13 => simd_add_9_15!(data, 13), 102 | 14 => simd_add_9_15!(data, 14), 103 | 15 => simd_add_9_15!(data, 15), 104 | 16 => simd_add_16!(data), 105 | _ => unreachable!(), 106 | }; 107 | (sum, count) 108 | } 109 | -------------------------------------------------------------------------------- /sonic-simd/src/avx2.rs: -------------------------------------------------------------------------------- 1 | use std::{ 2 | arch::x86_64::*, 3 | ops::{BitAnd, BitOr, BitOrAssign}, 4 | }; 5 | 6 | use super::{Mask, Simd}; 7 | 8 | #[derive(Debug)] 9 | #[repr(transparent)] 10 | pub struct Simd256u(__m256i); 11 | 12 | #[derive(Debug)] 13 | #[repr(transparent)] 14 | pub struct Simd256i(__m256i); 15 | 16 | impl Simd for Simd256i { 17 | const LANES: usize = 32; 18 | type Mask = Mask256; 19 | type Element = i8; 20 | 21 | #[inline(always)] 22 | unsafe fn loadu(ptr: *const u8) -> Self { 23 | unsafe { Self(_mm256_loadu_si256(ptr as *const __m256i)) } 24 | } 25 | 26 | #[inline(always)] 27 | unsafe fn storeu(&self, ptr: *mut u8) { 28 | unsafe { _mm256_storeu_si256(ptr as *mut __m256i, self.0) } 29 | } 30 | 31 | #[inline(always)] 32 | fn eq(&self, rhs: &Self) -> Self::Mask { 33 | unsafe { Mask256(_mm256_cmpeq_epi8(self.0, rhs.0)) } 34 | } 35 | 36 | #[inline(always)] 37 | fn splat(elem: i8) -> Self { 38 | unsafe { Self(_mm256_set1_epi8(elem)) } 39 | } 40 | 41 | #[inline(always)] 42 | fn le(&self, rhs: &Self) -> Self::Mask { 43 | // self <= rhs equal as rhs >= self 44 | rhs.gt(self) | rhs.eq(self) 45 | } 46 | 47 | #[inline(always)] 48 | fn gt(&self, rhs: &Self) -> Self::Mask { 49 | unsafe { Mask256(_mm256_cmpgt_epi8(self.0, rhs.0)) } 50 | } 51 | } 52 | 53 | #[derive(Debug)] 54 | #[repr(transparent)] 55 | pub struct Mask256(__m256i); 56 | 57 | impl Mask for Mask256 { 58 | type BitMask = u32; 59 | type Element = u8; 60 | 61 | #[inline(always)] 62 | fn bitmask(self) -> Self::BitMask { 63 | unsafe { _mm256_movemask_epi8(self.0) as u32 } 64 | } 65 | 66 | #[inline(always)] 67 | fn splat(b: bool) -> Self { 68 | let v: i8 = if b { -1 } else { 0 }; 69 | unsafe { Mask256(_mm256_set1_epi8(v)) } 70 | } 71 | } 72 | 73 | impl BitAnd for Mask256 { 74 | type Output = Self; 75 | 76 | #[inline(always)] 77 | fn bitand(self, rhs: Mask256) -> Self::Output { 78 | unsafe { Mask256(_mm256_and_si256(self.0, rhs.0)) } 79 | } 80 | } 81 | 82 | impl BitOr for Mask256 { 83 | type Output = Self; 84 | 85 | #[inline(always)] 86 | fn bitor(self, rhs: Mask256) -> Self::Output { 87 | unsafe { Mask256(_mm256_or_si256(self.0, rhs.0)) } 88 | } 89 | } 90 | 91 | impl BitOrAssign for Mask256 { 92 | #[inline(always)] 93 | fn bitor_assign(&mut self, rhs: Mask256) { 94 | unsafe { self.0 = _mm256_or_si256(self.0, rhs.0) } 95 | } 96 | } 97 | 98 | impl Simd for Simd256u { 99 | const LANES: usize = 32; 100 | type Mask = Mask256; 101 | type Element = u8; 102 | 103 | #[inline(always)] 104 | unsafe fn loadu(ptr: *const u8) -> Self { 105 | unsafe { Simd256u(_mm256_loadu_si256(ptr as *const __m256i)) } 106 | } 107 | 108 | #[inline(always)] 109 | unsafe fn storeu(&self, ptr: *mut u8) { 110 | unsafe { _mm256_storeu_si256(ptr as *mut __m256i, self.0) } 111 | } 112 | 113 | #[inline(always)] 114 | fn eq(&self, rhs: &Self) -> Self::Mask { 115 | unsafe { 116 | let eq = _mm256_cmpeq_epi8(self.0, rhs.0); 117 | Mask256(eq) 118 | } 119 | } 120 | 121 | #[inline(always)] 122 | fn splat(ch: u8) -> Self { 123 | unsafe { Simd256u(_mm256_set1_epi8(ch as i8)) } 124 | } 125 | 126 | #[inline(always)] 127 | fn le(&self, rhs: &Self) -> Self::Mask { 128 | unsafe { 129 | let max = _mm256_max_epu8(self.0, rhs.0); 130 | let eq = _mm256_cmpeq_epi8(max, rhs.0); 131 | Mask256(eq) 132 | } 133 | } 134 | 135 | #[inline(always)] 136 | fn gt(&self, _rhs: &Self) -> Self::Mask { 137 | todo!() 138 | } 139 | } 140 | -------------------------------------------------------------------------------- /src/lazyvalue/ser.rs: -------------------------------------------------------------------------------- 1 | use serde::ser::SerializeStruct; 2 | 3 | use super::value::LazyValue; 4 | 5 | impl<'a> serde::ser::Serialize for LazyValue<'a> { 6 | fn serialize(&self, serializer: S) -> std::result::Result 7 | where 8 | S: serde::Serializer, 9 | { 10 | let raw = self.as_raw_str(); 11 | let mut s = serializer.serialize_struct(super::TOKEN, 1)?; 12 | // will directly write raw in `LazyValueStrEmitter::seriazlie_str` 13 | s.serialize_field(super::TOKEN, raw)?; 14 | s.end() 15 | } 16 | } 17 | 18 | #[cfg(test)] 19 | mod test { 20 | use serde::{Deserialize, Serialize}; 21 | 22 | use crate::{from_str, to_string, LazyValue, OwnedLazyValue, Result}; 23 | 24 | #[test] 25 | fn test_lazyvalue_serde() { 26 | let json = r#"{ 27 | "a": 1, 28 | "b": "2", 29 | "c": [3, 4, 5], 30 | "d": { 31 | "e": 6, 32 | "f": "7", 33 | "g": [8, 9, 10] 34 | } 35 | }"#; 36 | let value = crate::from_str::(json).unwrap(); 37 | let json2 = crate::to_string(&value).unwrap(); 38 | assert_eq!(json, json2); 39 | } 40 | 41 | #[derive(Debug, Deserialize, Serialize)] 42 | struct TestLazyValue<'a> { 43 | #[serde(borrow)] 44 | borrowed_lv: LazyValue<'a>, 45 | owned_lv: OwnedLazyValue, 46 | } 47 | 48 | #[test] 49 | fn test_raw_value_ok() { 50 | fn test_json_ok(json: &str) { 51 | let data = TestLazyValue { 52 | borrowed_lv: from_str(json).expect(json), 53 | owned_lv: from_str(json).expect(json), 54 | }; 55 | 56 | // test long json for SIMD 57 | let json2 = json.to_string() + &" ".repeat(1000); 58 | let data2 = TestLazyValue { 59 | borrowed_lv: from_str(json).expect(&json2), 60 | owned_lv: from_str(json).expect(&json2), 61 | }; 62 | assert_eq!(to_string(&data).unwrap(), to_string(&data2).unwrap()); 63 | let json = json.trim(); 64 | let expect: String = format!("{{\"borrowed_lv\":{json},\"owned_lv\":{json}}}"); 65 | let serialized = to_string(&data).expect(json); 66 | assert_eq!(expect, serialized); 67 | assert_eq!(serialized, to_string(&data).unwrap()); 68 | } 69 | test_json_ok(r#""""#); 70 | test_json_ok(r#""raw value""#); 71 | test_json_ok(r#""哈哈哈☺""#); 72 | test_json_ok(r#"true"#); 73 | test_json_ok(r#"false"#); 74 | test_json_ok(r#"0"#); 75 | test_json_ok(r#"-1"#); 76 | test_json_ok(r#"-1e+1111111111111"#); 77 | test_json_ok(r#"-1e-1111111111111"#); 78 | test_json_ok(r#"{}"#); 79 | test_json_ok(r#"[]"#); 80 | test_json_ok(r#"{"":[], "": ["", "", []]}"#); 81 | test_json_ok(r#"{"":[], "": ["", "", []]}"#); 82 | } 83 | 84 | #[test] 85 | fn test_raw_value_failed() { 86 | fn test_json_failed(json: &str) { 87 | let ret: Result> = from_str(json); 88 | assert!(ret.is_err(), "invalid json is {json}"); 89 | } 90 | test_json_failed(r#"""#); 91 | test_json_failed(r#""raw " value""#); 92 | test_json_failed(r#"哈哈哈""#); 93 | test_json_failed(r#""\x""#); 94 | test_json_failed("\"\x00\""); 95 | test_json_failed(r#"tru"#); 96 | test_json_failed(r#"fals"#); 97 | test_json_failed(r#"0."#); 98 | test_json_failed(r#"-"#); 99 | test_json_failed(r#"-1e"#); 100 | test_json_failed(r#"-1e-"#); 101 | test_json_failed(r#"-1e-1.111"#); 102 | test_json_failed(r#"-1e-1,"#); 103 | test_json_failed(r#"{"#); 104 | test_json_failed(r#" ]"#); 105 | test_json_failed(r#"{"":[], ["", "", []]}"#); 106 | } 107 | } 108 | -------------------------------------------------------------------------------- /bindings/ffi/src/lib.rs: -------------------------------------------------------------------------------- 1 | use std::{ffi::c_char, mem::ManuallyDrop, os::raw::c_void}; 2 | 3 | use sonic_rs::Value; 4 | 5 | /// A string allocated in Rust, ending with `\0`. Used for serialize output and error message. 6 | #[derive(Debug)] 7 | #[repr(C)] 8 | pub struct SonicCString { 9 | buf: *const c_void, 10 | len: usize, 11 | } 12 | 13 | impl Default for SonicCString { 14 | fn default() -> Self { 15 | SonicCString { 16 | buf: std::ptr::null(), 17 | len: 0, 18 | } 19 | } 20 | } 21 | 22 | #[derive(Debug)] 23 | #[repr(C)] 24 | pub struct SonicDeserializeRet { 25 | value: *const c_void, 26 | err: SonicCString, 27 | } 28 | 29 | pub const SONIC_RS_DESERIALIZE_USE_RAWNUMBER: u64 = 2; 30 | pub const SONIC_RS_DESERIALIZE_UTF8_LOSSY: u64 = 4; 31 | 32 | /// # Safety 33 | /// 34 | /// The caller should drop the returned `value` or `err`. 35 | #[no_mangle] 36 | pub unsafe extern "C" fn sonic_rs_deserialize_value( 37 | json: *const c_char, 38 | len: usize, 39 | cfg: u64, 40 | ) -> SonicDeserializeRet { 41 | let json = std::slice::from_raw_parts(json as *const u8, len); 42 | let mut de = sonic_rs::serde::Deserializer::from_slice(json); 43 | 44 | if cfg & SONIC_RS_DESERIALIZE_USE_RAWNUMBER != 0 { 45 | de = de.use_rawnumber(); 46 | } 47 | 48 | if cfg & SONIC_RS_DESERIALIZE_UTF8_LOSSY != 0 { 49 | de = de.utf8_lossy(); 50 | } 51 | 52 | match de.deserialize::() { 53 | Ok(value) => SonicDeserializeRet { 54 | value: Box::into_raw(Box::new(value)) as *const _, 55 | err: SonicCString::default(), 56 | }, 57 | Err(e) => { 58 | // messega always end with '\0' 59 | let msg = ManuallyDrop::new(format!("{}\0", e)); 60 | let err = SonicCString { 61 | buf: msg.as_ptr() as *const c_void, 62 | len: msg.len(), 63 | }; 64 | SonicDeserializeRet { 65 | value: std::ptr::null_mut(), 66 | err, 67 | } 68 | } 69 | } 70 | } 71 | 72 | #[derive(Debug)] 73 | #[repr(C)] 74 | pub struct SonicSerializeRet { 75 | json: SonicCString, 76 | err: SonicCString, 77 | } 78 | 79 | pub const SONIC_RS_SERIALIZE_PRETTY: u64 = 1; 80 | 81 | /// # Safety 82 | /// 83 | /// The caller should drop the returned `json` or `err`. 84 | #[no_mangle] 85 | pub unsafe extern "C" fn sonic_rs_serialize_value( 86 | value: *const c_void, 87 | cfg: u64, 88 | ) -> SonicSerializeRet { 89 | let value = unsafe { &*(value as *const Value) }; 90 | let ret = if cfg & SONIC_RS_SERIALIZE_PRETTY != 0 { 91 | sonic_rs::to_string_pretty(value) 92 | } else { 93 | sonic_rs::to_string(value) 94 | }; 95 | 96 | match ret { 97 | Ok(json) => { 98 | let json = ManuallyDrop::new(json); 99 | let json = SonicCString { 100 | buf: json.as_ptr() as *const c_void, 101 | len: json.len(), 102 | }; 103 | SonicSerializeRet { 104 | json, 105 | err: SonicCString::default(), 106 | } 107 | } 108 | Err(e) => { 109 | // NOTE: should be dropped manually in the foreign caller 110 | let msg = ManuallyDrop::new(format!("{}\0", e)); 111 | let err = SonicCString { 112 | buf: msg.as_ptr() as *const c_void, 113 | len: msg.len(), 114 | }; 115 | SonicSerializeRet { 116 | json: SonicCString::default(), 117 | err, 118 | } 119 | } 120 | } 121 | } 122 | 123 | /// # Safety 124 | #[no_mangle] 125 | pub unsafe extern "C" fn sonic_rs_drop_value(value: *mut c_void) { 126 | std::mem::drop(Box::from_raw(value as *mut Value)); 127 | } 128 | 129 | /// # Safety 130 | #[no_mangle] 131 | pub unsafe extern "C" fn sonic_rs_drop_string(buf: *mut u8, len: u64) { 132 | let buf = Vec::::from_raw_parts(buf, len as usize, len as usize); 133 | std::mem::drop(buf); 134 | } 135 | -------------------------------------------------------------------------------- /benchmarks/benches/serialize_struct.rs: -------------------------------------------------------------------------------- 1 | #[macro_use] 2 | extern crate criterion; 3 | use std::{fs::File, io::Read}; 4 | 5 | use criterion::{criterion_group, BatchSize, Criterion, SamplingMode, Throughput}; 6 | use schema::{canada::Canada, citm_catalog::CitmCatalog, twitter::Twitter}; 7 | 8 | #[cfg(not(target_env = "msvc"))] 9 | #[global_allocator] 10 | static GLOBAL: jemallocator::Jemalloc = jemallocator::Jemalloc; 11 | 12 | fn diff_json(got: &str, expect: &str) -> bool { 13 | let value1: serde_json::Value = serde_json::from_str(got).unwrap(); 14 | let value2: serde_json::Value = serde_json::from_str(expect).unwrap(); 15 | 16 | fn write_to(file: &str, data: &str) -> std::io::Result<()> { 17 | use std::io::Write; 18 | let mut file = std::fs::File::create(file)?; 19 | file.write_all(data.as_bytes())?; 20 | Ok(()) 21 | } 22 | 23 | if value1 != value2 { 24 | write_to("got.json", got).unwrap(); 25 | write_to("expect.json", expect).unwrap(); 26 | false 27 | } else { 28 | true 29 | } 30 | } 31 | 32 | macro_rules! bench_file { 33 | (json: $name:ident, structure: $structure:ty) => { 34 | fn $name(c: &mut Criterion) { 35 | let core_ids = core_affinity::get_core_ids().unwrap(); 36 | core_affinity::set_for_current(core_ids[0]); 37 | 38 | let mut data = Vec::new(); 39 | let root = env!("CARGO_MANIFEST_DIR").to_owned(); 40 | File::open(root + concat!("/benches/testdata/", stringify!($name), ".json")) 41 | .unwrap() 42 | .read_to_end(&mut data) 43 | .unwrap(); 44 | 45 | // verify sonic-rs parse 46 | if stringify!($name) != "canada" { 47 | let serde_val: $structure = serde_json::from_slice(&data).unwrap(); 48 | let serde_out = serde_json::to_string_pretty(&serde_val).unwrap(); 49 | 50 | let value: $structure = sonic_rs::from_slice(&data).unwrap(); 51 | let out = sonic_rs::to_string_pretty(&value).unwrap(); 52 | assert!( 53 | diff_json(&out, &serde_out), 54 | "sonic_rs failed in {}", 55 | stringify!($name) 56 | ); 57 | 58 | let mut data = data.clone(); 59 | let value: $structure = simd_json::from_slice(&mut data).unwrap(); 60 | let _out = simd_json::to_string_pretty(&value).unwrap(); 61 | // assert!( 62 | // diff_json(&out, &serde_out), 63 | // "simdjson failed in {}", 64 | // stringify!($name) 65 | // ); 66 | } 67 | 68 | let mut group = c.benchmark_group(stringify!($name)); 69 | group.sampling_mode(SamplingMode::Flat); 70 | 71 | let val: $structure = sonic_rs::from_slice(&data).unwrap(); 72 | group.bench_with_input("sonic_rs::to_string", &val, |b, data| { 73 | b.iter_batched( 74 | || data, 75 | |val| sonic_rs::to_string(&val).unwrap(), 76 | BatchSize::SmallInput, 77 | ) 78 | }); 79 | 80 | let mut data2 = data.clone(); 81 | let val: $structure = simd_json::from_slice(&mut data2).unwrap(); 82 | group.bench_with_input("simd_json::to_string", &val, |b, data| { 83 | b.iter_batched( 84 | || data.clone(), 85 | |val| simd_json::to_string(&val).unwrap(), 86 | BatchSize::SmallInput, 87 | ) 88 | }); 89 | 90 | let val: $structure = serde_json::from_slice(&data).unwrap(); 91 | group.bench_with_input("serde_json::to_string", &val, |b, data| { 92 | b.iter_batched( 93 | || data, 94 | |val| serde_json::to_string(&val).unwrap(), 95 | BatchSize::SmallInput, 96 | ) 97 | }); 98 | 99 | group.throughput(Throughput::Bytes(data.len() as u64)); 100 | } 101 | }; 102 | } 103 | 104 | bench_file!( 105 | json: twitter, 106 | structure: Twitter 107 | ); 108 | bench_file!( 109 | json: canada, 110 | structure: Canada 111 | ); 112 | bench_file!( 113 | json: citm_catalog, 114 | structure: CitmCatalog 115 | ); 116 | 117 | criterion_group!(benches, twitter, canada, citm_catalog,); 118 | criterion_main!(benches); 119 | -------------------------------------------------------------------------------- /sonic-simd/src/v128.rs: -------------------------------------------------------------------------------- 1 | use std::ops::{BitAnd, BitOr, BitOrAssign}; 2 | 3 | use super::{Mask, Simd}; 4 | 5 | #[derive(Debug)] 6 | pub struct Simd128i([i8; 16]); 7 | 8 | #[derive(Debug)] 9 | pub struct Simd128u([u8; 16]); 10 | 11 | #[derive(Debug)] 12 | pub struct Mask128([u8; 16]); 13 | 14 | impl Simd for Simd128i { 15 | type Element = i8; 16 | const LANES: usize = 16; 17 | type Mask = Mask128; 18 | 19 | unsafe fn loadu(ptr: *const u8) -> Self { 20 | let v = std::slice::from_raw_parts(ptr, Self::LANES); 21 | let mut res = [0i8; 16]; 22 | res.copy_from_slice(std::mem::transmute::<&[u8], &[i8]>(v)); 23 | Self(res) 24 | } 25 | 26 | unsafe fn storeu(&self, ptr: *mut u8) { 27 | let data = std::mem::transmute::<&[i8], &[u8]>(&self.0); 28 | std::ptr::copy_nonoverlapping(data.as_ptr(), ptr, Self::LANES); 29 | } 30 | 31 | fn eq(&self, rhs: &Self) -> Self::Mask { 32 | let mut mask = [0u8; 16]; 33 | for i in 0..Self::LANES { 34 | mask[i] = if self.0[i] == rhs.0[i] { 1 } else { 0 }; 35 | } 36 | Mask128(mask) 37 | } 38 | 39 | fn splat(value: i8) -> Self { 40 | Self([value as i8; Self::LANES]) 41 | } 42 | 43 | fn le(&self, rhs: &Self) -> Self::Mask { 44 | let mut mask = [0u8; 16]; 45 | for i in 0..Self::LANES { 46 | mask[i] = if self.0[i] <= rhs.0[i] { 1 } else { 0 }; 47 | } 48 | Mask128(mask) 49 | } 50 | 51 | fn gt(&self, rhs: &Self) -> Self::Mask { 52 | let mut mask = [0u8; 16]; 53 | for i in 0..Self::LANES { 54 | mask[i] = if self.0[i] > rhs.0[i] { 1 } else { 0 }; 55 | } 56 | Mask128(mask) 57 | } 58 | } 59 | 60 | impl Simd for Simd128u { 61 | type Element = u8; 62 | const LANES: usize = 16; 63 | type Mask = Mask128; 64 | 65 | unsafe fn loadu(ptr: *const u8) -> Self { 66 | let v = std::slice::from_raw_parts(ptr, Self::LANES); 67 | let mut res = [0u8; 16]; 68 | res.copy_from_slice(v); 69 | Self(res) 70 | } 71 | 72 | unsafe fn storeu(&self, ptr: *mut u8) { 73 | let data = &self.0; 74 | std::ptr::copy_nonoverlapping(data.as_ptr(), ptr, Self::LANES); 75 | } 76 | 77 | fn eq(&self, rhs: &Self) -> Self::Mask { 78 | let mut mask = [0u8; 16]; 79 | for i in 0..Self::LANES { 80 | mask[i] = if self.0[i] == rhs.0[i] { 1 } else { 0 }; 81 | } 82 | Mask128(mask) 83 | } 84 | 85 | fn splat(value: u8) -> Self { 86 | Self([value; Self::LANES]) 87 | } 88 | 89 | fn le(&self, rhs: &Self) -> Self::Mask { 90 | let mut mask = [0u8; 16]; 91 | for i in 0..Self::LANES { 92 | mask[i] = if self.0[i] <= rhs.0[i] { 1 } else { 0 }; 93 | } 94 | Mask128(mask) 95 | } 96 | 97 | fn gt(&self, rhs: &Self) -> Self::Mask { 98 | let mut mask = [0u8; 16]; 99 | for i in 0..Self::LANES { 100 | mask[i] = if self.0[i] > rhs.0[i] { 1 } else { 0 }; 101 | } 102 | Mask128(mask) 103 | } 104 | } 105 | 106 | impl Mask for Mask128 { 107 | type BitMask = u16; 108 | type Element = u8; 109 | 110 | fn bitmask(self) -> Self::BitMask { 111 | #[cfg(target_endian = "little")] 112 | { 113 | self.0 114 | .iter() 115 | .enumerate() 116 | .fold(0, |acc, (i, &b)| acc | ((b as u16) << i)) 117 | } 118 | #[cfg(target_endian = "big")] 119 | { 120 | self.0 121 | .iter() 122 | .enumerate() 123 | .fold(0, |acc, (i, &b)| acc | ((b as u16) << (15 - i))) 124 | } 125 | } 126 | 127 | fn splat(b: bool) -> Self { 128 | Mask128([b as u8; 16]) 129 | } 130 | } 131 | 132 | impl BitAnd for Mask128 { 133 | type Output = Self; 134 | 135 | fn bitand(self, rhs: Self) -> Self::Output { 136 | let mut result = [0u8; 16]; 137 | for i in 0..16 { 138 | result[i] = self.0[i] & rhs.0[i]; 139 | } 140 | Mask128(result) 141 | } 142 | } 143 | 144 | impl BitOr for Mask128 { 145 | type Output = Self; 146 | 147 | fn bitor(self, rhs: Self) -> Self::Output { 148 | let mut result = [0u8; 16]; 149 | for i in 0..16 { 150 | result[i] = self.0[i] | rhs.0[i]; 151 | } 152 | Mask128(result) 153 | } 154 | } 155 | 156 | impl BitOrAssign for Mask128 { 157 | fn bitor_assign(&mut self, rhs: Self) { 158 | for i in 0..16 { 159 | self.0[i] |= rhs.0[i]; 160 | } 161 | } 162 | } 163 | -------------------------------------------------------------------------------- /sonic-number/src/slow.rs: -------------------------------------------------------------------------------- 1 | //! Slow, fallback algorithm for cases the Eisel-Lemire algorithm cannot round. 2 | 3 | // The code is cloned from [rust-lang](https://github.com/rust-lang/rust) and modified necessary parts. 4 | 5 | use super::{ 6 | common::BiasedFp, 7 | decimal::{parse_decimal, Decimal}, 8 | float::RawFloat, 9 | }; 10 | 11 | /// Parse the significant digits and biased, binary exponent of a float. 12 | /// 13 | /// This is a fallback algorithm that uses a big-integer representation 14 | /// of the float, and therefore is considerably slower than faster 15 | /// approximations. However, it will always determine how to round 16 | /// the significant digits to the nearest machine float, allowing 17 | /// use to handle near half-way cases. 18 | /// 19 | /// Near half-way cases are halfway between two consecutive machine floats. 20 | /// For example, the float `16777217.0` has a bitwise representation of 21 | /// `100000000000000000000000 1`. Rounding to a single-precision float, 22 | /// the trailing `1` is truncated. Using round-nearest, tie-even, any 23 | /// value above `16777217.0` must be rounded up to `16777218.0`, while 24 | /// any value before or equal to `16777217.0` must be rounded down 25 | /// to `16777216.0`. These near-halfway conversions therefore may require 26 | /// a large number of digits to unambiguously determine how to round. 27 | /// 28 | /// The algorithms described here are based on "Processing Long Numbers Quickly", 29 | /// available here: . 30 | pub(crate) fn parse_long_mantissa(s: &[u8]) -> BiasedFp { 31 | const MAX_SHIFT: usize = 60; 32 | const NUM_POWERS: usize = 19; 33 | const POWERS: [u8; 19] = [ 34 | 0, 3, 6, 9, 13, 16, 19, 23, 26, 29, 33, 36, 39, 43, 46, 49, 53, 56, 59, 35 | ]; 36 | 37 | let get_shift = |n| { 38 | if n < NUM_POWERS { 39 | POWERS[n] as usize 40 | } else { 41 | MAX_SHIFT 42 | } 43 | }; 44 | 45 | let fp_zero = BiasedFp::zero_pow2(0); 46 | let fp_inf = BiasedFp::zero_pow2(F::INFINITE_POWER); 47 | 48 | let mut d = parse_decimal(s); 49 | 50 | // Short-circuit if the value can only be a literal 0 or infinity. 51 | if d.num_digits == 0 || d.decimal_point < -324 { 52 | return fp_zero; 53 | } else if d.decimal_point >= 310 { 54 | return fp_inf; 55 | } 56 | let mut exp2 = 0_i32; 57 | // Shift right toward (1/2 ... 1]. 58 | while d.decimal_point > 0 { 59 | let n = d.decimal_point as usize; 60 | let shift = get_shift(n); 61 | d.right_shift(shift); 62 | if d.decimal_point < -Decimal::DECIMAL_POINT_RANGE { 63 | return fp_zero; 64 | } 65 | exp2 += shift as i32; 66 | } 67 | // Shift left toward (1/2 ... 1]. 68 | while d.decimal_point <= 0 { 69 | let shift = if d.decimal_point == 0 { 70 | match d.digits[0] { 71 | digit if digit >= 5 => break, 72 | 0 | 1 => 2, 73 | _ => 1, 74 | } 75 | } else { 76 | get_shift((-d.decimal_point) as _) 77 | }; 78 | d.left_shift(shift); 79 | if d.decimal_point > Decimal::DECIMAL_POINT_RANGE { 80 | return fp_inf; 81 | } 82 | exp2 -= shift as i32; 83 | } 84 | // We are now in the range [1/2 ... 1] but the binary format uses [1 ... 2]. 85 | exp2 -= 1; 86 | while (F::MINIMUM_EXPONENT + 1) > exp2 { 87 | let mut n = ((F::MINIMUM_EXPONENT + 1) - exp2) as usize; 88 | if n > MAX_SHIFT { 89 | n = MAX_SHIFT; 90 | } 91 | d.right_shift(n); 92 | exp2 += n as i32; 93 | } 94 | if (exp2 - F::MINIMUM_EXPONENT) >= F::INFINITE_POWER { 95 | return fp_inf; 96 | } 97 | // Shift the decimal to the hidden bit, and then round the value 98 | // to get the high mantissa+1 bits. 99 | d.left_shift(F::MANTISSA_EXPLICIT_BITS + 1); 100 | let mut mantissa = d.round(); 101 | if mantissa >= (1_u64 << (F::MANTISSA_EXPLICIT_BITS + 1)) { 102 | // Rounding up overflowed to the carry bit, need to 103 | // shift back to the hidden bit. 104 | d.right_shift(1); 105 | exp2 += 1; 106 | mantissa = d.round(); 107 | if (exp2 - F::MINIMUM_EXPONENT) >= F::INFINITE_POWER { 108 | return fp_inf; 109 | } 110 | } 111 | let mut power2 = exp2 - F::MINIMUM_EXPONENT; 112 | if mantissa < (1_u64 << F::MANTISSA_EXPLICIT_BITS) { 113 | power2 -= 1; 114 | } 115 | // Zero out all the bits above the explicit mantissa bits. 116 | mantissa &= (1_u64 << F::MANTISSA_EXPLICIT_BITS) - 1; 117 | BiasedFp { 118 | f: mantissa, 119 | e: power2, 120 | } 121 | } 122 | -------------------------------------------------------------------------------- /benchmarks/benches/serialize_value.rs: -------------------------------------------------------------------------------- 1 | #[macro_use] 2 | extern crate criterion; 3 | 4 | use std::{fs::File, io::Read}; 5 | 6 | use criterion::{criterion_group, BatchSize, Criterion, SamplingMode, Throughput}; 7 | 8 | include!("./common.rs"); 9 | 10 | #[cfg(not(target_env = "msvc"))] 11 | #[global_allocator] 12 | static GLOBAL: jemallocator::Jemalloc = jemallocator::Jemalloc; 13 | 14 | fn simdjson_to_string(val: &simd_json::value::borrowed::Value) { 15 | let _ = simd_json::to_string(val).unwrap(); 16 | } 17 | 18 | fn serde_to_string(val: &serde_json::Value) { 19 | let _ = serde_json::to_string(val).unwrap(); 20 | } 21 | 22 | fn sonic_rs_to_string(val: &sonic_rs::Value) { 23 | let _ = sonic_rs::to_string(val).unwrap(); 24 | } 25 | 26 | fn diff_json(got: &str, expect: &str) -> bool { 27 | let value1: serde_json::Value = serde_json::from_str(got).unwrap(); 28 | let value2: serde_json::Value = serde_json::from_str(expect).unwrap(); 29 | 30 | fn write_to(file: &str, data: &str) -> std::io::Result<()> { 31 | use std::io::Write; 32 | let mut file = std::fs::File::create(file)?; 33 | file.write_all(data.as_bytes())?; 34 | Ok(()) 35 | } 36 | 37 | if value1 != value2 { 38 | write_to("got.json", got).unwrap(); 39 | write_to("expect.json", expect).unwrap(); 40 | false 41 | } else { 42 | true 43 | } 44 | } 45 | 46 | macro_rules! bench_file { 47 | ($name:ident) => { 48 | #[allow(unused)] 49 | fn $name(c: &mut Criterion) { 50 | let core_ids = core_affinity::get_core_ids().unwrap(); 51 | core_affinity::set_for_current(core_ids[0]); 52 | 53 | let mut data = Vec::new(); 54 | let root = env!("CARGO_MANIFEST_DIR").to_owned(); 55 | File::open(root + concat!("/benches/testdata/", stringify!($name), ".json")) 56 | .unwrap() 57 | .read_to_end(&mut data) 58 | .unwrap(); 59 | 60 | // verify sonic-rs parse 61 | if stringify!($name) != "canada" { 62 | let serde_out: serde_json::Value = serde_json::from_slice(&data).unwrap(); 63 | let expect = serde_json::to_string(&serde_out).unwrap(); 64 | 65 | let value: sonic_rs::Value = sonic_rs::from_slice(&data).unwrap(); 66 | let got = sonic_rs::to_string(&value).unwrap(); 67 | assert!( 68 | diff_json(&got, &expect), 69 | concat!("/benches/testdata/", stringify!($name)) 70 | ); 71 | } 72 | 73 | let mut group = c.benchmark_group(stringify!($name)); 74 | group.sampling_mode(SamplingMode::Flat); 75 | 76 | let value: sonic_rs::Value = do_sonic_rs_from_slice(&data, SONIC_DEFAULT_CFG).unwrap(); 77 | group.bench_with_input("sonic_rs::to_string", &value, |b, data| { 78 | b.iter_batched( 79 | || data, 80 | |val| sonic_rs_to_string(&val), 81 | BatchSize::SmallInput, 82 | ) 83 | }); 84 | 85 | let value: sonic_rs::Value = 86 | do_sonic_rs_from_slice(&data, SONIC_USE_RAWNUM_CFG).unwrap(); 87 | group.bench_with_input("sonic_rs::to_string_use_rawnum", &value, |b, data| { 88 | b.iter_batched( 89 | || data, 90 | |val| sonic_rs_to_string(&val), 91 | BatchSize::SmallInput, 92 | ) 93 | }); 94 | 95 | let value: serde_json::Value = serde_json::from_slice(&data).unwrap(); 96 | group.bench_with_input("serde_json::to_string", &value, |b, data| { 97 | b.iter_batched(|| data, |val| serde_to_string(&val), BatchSize::SmallInput) 98 | }); 99 | 100 | let mut copy = data.clone(); 101 | let value = simd_json::to_borrowed_value(&mut copy).unwrap(); 102 | group.bench_with_input("simd_json::to_string", &value, |b, data| { 103 | b.iter_batched( 104 | || data.clone(), 105 | |val| simdjson_to_string(&val), 106 | BatchSize::SmallInput, 107 | ) 108 | }); 109 | 110 | group.throughput(Throughput::Bytes(data.len() as u64)); 111 | } 112 | }; 113 | } 114 | 115 | bench_file!(book); 116 | bench_file!(canada); 117 | bench_file!(citm_catalog); 118 | bench_file!(twitter); 119 | bench_file!(github_events); 120 | 121 | // criterion_group!(benches, canada, otfcc, citm_catalog, twitter, lottie, github_events, 122 | // twitterescaped, book, poet, fgo); 123 | criterion_group!(benches, twitter, citm_catalog, canada); 124 | criterion_main!(benches); 125 | -------------------------------------------------------------------------------- /src/input.rs: -------------------------------------------------------------------------------- 1 | use bytes::Bytes; 2 | use faststr::FastStr; 3 | 4 | use crate::{parser::as_str, util::private::Sealed}; 5 | 6 | /// JsonSlice is a wrapper for different json input. 7 | #[doc(hidden)] 8 | #[non_exhaustive] 9 | #[derive(Debug, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)] 10 | pub enum JsonSlice<'de> { 11 | Raw(&'de [u8]), 12 | FastStr(FastStr), // note: FastStr maybe inlined and in the stack. 13 | } 14 | 15 | impl<'de> JsonSlice<'de> { 16 | #[inline(always)] 17 | pub(crate) unsafe fn as_faststr(&self) -> FastStr { 18 | match self { 19 | JsonSlice::Raw(sub) => FastStr::new(as_str(sub)), 20 | JsonSlice::FastStr(f) => f.clone(), 21 | } 22 | } 23 | } 24 | 25 | impl Default for JsonSlice<'_> { 26 | fn default() -> Self { 27 | JsonSlice::Raw(&b"null"[..]) 28 | } 29 | } 30 | 31 | impl<'de> From for JsonSlice<'de> { 32 | fn from(value: FastStr) -> Self { 33 | JsonSlice::FastStr(value) 34 | } 35 | } 36 | 37 | impl<'de> From for JsonSlice<'de> { 38 | fn from(value: Bytes) -> Self { 39 | JsonSlice::FastStr(unsafe { FastStr::from_bytes_unchecked(value) }) 40 | } 41 | } 42 | 43 | impl<'de> From<&'de [u8]> for JsonSlice<'de> { 44 | fn from(value: &'de [u8]) -> Self { 45 | JsonSlice::Raw(value) 46 | } 47 | } 48 | 49 | impl<'de> From<&'de str> for JsonSlice<'de> { 50 | fn from(value: &'de str) -> Self { 51 | JsonSlice::Raw(value.as_bytes()) 52 | } 53 | } 54 | 55 | impl<'de> From<&'de String> for JsonSlice<'de> { 56 | fn from(value: &'de String) -> Self { 57 | JsonSlice::Raw(value.as_bytes()) 58 | } 59 | } 60 | 61 | impl From for JsonSlice<'_> { 62 | fn from(value: String) -> Self { 63 | JsonSlice::FastStr(FastStr::new(value)) 64 | } 65 | } 66 | 67 | impl<'de> AsRef<[u8]> for JsonSlice<'de> { 68 | fn as_ref(&self) -> &[u8] { 69 | match self { 70 | Self::Raw(r) => r, 71 | Self::FastStr(s) => s.as_bytes(), 72 | } 73 | } 74 | } 75 | 76 | /// A trait for string/bytes-like types that can be parsed into JSON. 77 | pub trait JsonInput<'de>: Sealed { 78 | fn need_utf8_valid(&self) -> bool; 79 | fn to_json_slice(&self) -> JsonSlice<'de>; 80 | #[allow(clippy::wrong_self_convention)] 81 | fn from_subset(&self, sub: &'de [u8]) -> JsonSlice<'de>; 82 | fn to_u8_slice(&self) -> &'de [u8]; 83 | } 84 | 85 | impl<'de> JsonInput<'de> for &'de [u8] { 86 | fn need_utf8_valid(&self) -> bool { 87 | true 88 | } 89 | 90 | fn to_json_slice(&self) -> JsonSlice<'de> { 91 | JsonSlice::Raw(self) 92 | } 93 | 94 | fn from_subset(&self, sub: &'de [u8]) -> JsonSlice<'de> { 95 | sub.into() 96 | } 97 | 98 | fn to_u8_slice(&self) -> &'de [u8] { 99 | self 100 | } 101 | } 102 | 103 | impl<'de> JsonInput<'de> for &'de str { 104 | fn need_utf8_valid(&self) -> bool { 105 | false 106 | } 107 | fn to_json_slice(&self) -> JsonSlice<'de> { 108 | JsonSlice::Raw((*self).as_bytes()) 109 | } 110 | 111 | fn from_subset(&self, sub: &'de [u8]) -> JsonSlice<'de> { 112 | sub.into() 113 | } 114 | 115 | fn to_u8_slice(&self) -> &'de [u8] { 116 | (*self).as_bytes() 117 | } 118 | } 119 | 120 | impl<'de> JsonInput<'de> for &'de Bytes { 121 | fn need_utf8_valid(&self) -> bool { 122 | true 123 | } 124 | 125 | fn to_json_slice(&self) -> JsonSlice<'de> { 126 | let bytes = self.as_ref(); 127 | let newed = self.slice_ref(bytes); 128 | JsonSlice::FastStr(unsafe { FastStr::from_bytes_unchecked(newed) }) 129 | } 130 | 131 | fn from_subset(&self, sub: &'de [u8]) -> JsonSlice<'de> { 132 | self.slice_ref(sub).into() 133 | } 134 | 135 | fn to_u8_slice(&self) -> &'de [u8] { 136 | (*self).as_ref() 137 | } 138 | } 139 | 140 | impl<'de> JsonInput<'de> for &'de FastStr { 141 | fn need_utf8_valid(&self) -> bool { 142 | false 143 | } 144 | 145 | fn to_json_slice(&self) -> JsonSlice<'de> { 146 | JsonSlice::FastStr((**self).clone()) 147 | } 148 | 149 | fn from_subset(&self, sub: &'de [u8]) -> JsonSlice<'de> { 150 | self.slice_ref(as_str(sub)).into() 151 | } 152 | 153 | fn to_u8_slice(&self) -> &'de [u8] { 154 | (*self).as_ref() 155 | } 156 | } 157 | 158 | impl<'de> JsonInput<'de> for &'de String { 159 | fn need_utf8_valid(&self) -> bool { 160 | false 161 | } 162 | 163 | fn to_json_slice(&self) -> JsonSlice<'de> { 164 | JsonSlice::Raw(self.as_bytes()) 165 | } 166 | 167 | fn from_subset(&self, sub: &'de [u8]) -> JsonSlice<'de> { 168 | sub.into() 169 | } 170 | 171 | fn to_u8_slice(&self) -> &'de [u8] { 172 | (*self).as_bytes() 173 | } 174 | } 175 | -------------------------------------------------------------------------------- /sonic-simd/src/v256.rs: -------------------------------------------------------------------------------- 1 | use std::ops::{BitAnd, BitOr, BitOrAssign}; 2 | 3 | use super::{Mask, Mask128, Simd, Simd128i, Simd128u}; 4 | 5 | #[derive(Debug)] 6 | #[repr(transparent)] 7 | pub struct Simd256u((Simd128u, Simd128u)); 8 | 9 | #[derive(Debug)] 10 | #[repr(transparent)] 11 | pub struct Simd256i((Simd128i, Simd128i)); 12 | 13 | #[derive(Debug)] 14 | #[repr(transparent)] 15 | pub struct Mask256(pub(crate) (Mask128, Mask128)); 16 | 17 | impl Mask for Mask256 { 18 | type BitMask = u32; 19 | type Element = u8; 20 | 21 | #[inline(always)] 22 | fn bitmask(self) -> Self::BitMask { 23 | cfg_if::cfg_if! { 24 | if #[cfg(all(target_feature="neon", target_arch="aarch64"))] { 25 | let(v0, v1) = self.0; 26 | unsafe { super::neon::to_bitmask32(v0.0, v1.0) } 27 | } else { 28 | fn combine_u16(lo: u16, hi: u16) -> u32 { 29 | #[cfg(target_endian = "little")] 30 | { 31 | (lo as u32) | ((hi as u32) << 16) 32 | } 33 | #[cfg(target_endian = "big")] 34 | { 35 | (hi as u32) | ((lo as u32) << 16) 36 | } 37 | } 38 | combine_u16(self.0 .0.bitmask(), self.0 .1.bitmask()) 39 | } 40 | } 41 | } 42 | 43 | #[inline(always)] 44 | fn splat(b: bool) -> Self { 45 | Mask256((Mask128::splat(b), Mask128::splat(b))) 46 | } 47 | } 48 | 49 | impl BitOr for Mask256 { 50 | type Output = Self; 51 | 52 | #[inline(always)] 53 | fn bitor(self, rhs: Self) -> Self::Output { 54 | let lo = self.0 .0 | rhs.0 .0; 55 | let hi = self.0 .1 | rhs.0 .1; 56 | Mask256((lo, hi)) 57 | } 58 | } 59 | 60 | impl BitOrAssign for Mask256 { 61 | #[inline(always)] 62 | fn bitor_assign(&mut self, rhs: Self) { 63 | self.0 .0 |= rhs.0 .0; 64 | self.0 .1 |= rhs.0 .1; 65 | } 66 | } 67 | 68 | impl BitAnd for Mask256 { 69 | type Output = Self; 70 | 71 | #[inline(always)] 72 | fn bitand(self, rhs: Mask256) -> Self::Output { 73 | let lo = self.0 .0 & rhs.0 .0; 74 | let hi = self.0 .1 & rhs.0 .1; 75 | Mask256((lo, hi)) 76 | } 77 | } 78 | 79 | impl Simd for Simd256u { 80 | const LANES: usize = 32; 81 | 82 | type Mask = Mask256; 83 | type Element = u8; 84 | 85 | #[inline(always)] 86 | unsafe fn loadu(ptr: *const u8) -> Self { 87 | let lo = Simd128u::loadu(ptr); 88 | let hi = Simd128u::loadu(ptr.add(Simd128u::LANES)); 89 | Simd256u((lo, hi)) 90 | } 91 | 92 | #[inline(always)] 93 | unsafe fn storeu(&self, ptr: *mut u8) { 94 | Simd128u::storeu(&self.0 .0, ptr); 95 | Simd128u::storeu(&self.0 .1, ptr.add(Simd128u::LANES)); 96 | } 97 | 98 | #[inline(always)] 99 | fn eq(&self, rhs: &Self) -> Self::Mask { 100 | let lo = self.0 .0.eq(&rhs.0 .0); 101 | let hi = self.0 .1.eq(&rhs.0 .1); 102 | Mask256((lo, hi)) 103 | } 104 | 105 | #[inline(always)] 106 | fn splat(elem: u8) -> Self { 107 | Simd256u((Simd128u::splat(elem), Simd128u::splat(elem))) 108 | } 109 | 110 | #[inline(always)] 111 | fn le(&self, rhs: &Self) -> Self::Mask { 112 | let lo = self.0 .0.le(&rhs.0 .0); 113 | let hi = self.0 .1.le(&rhs.0 .1); 114 | Mask256((lo, hi)) 115 | } 116 | 117 | #[inline(always)] 118 | fn gt(&self, rhs: &Self) -> Self::Mask { 119 | let lo = self.0 .0.gt(&rhs.0 .0); 120 | let hi = self.0 .1.gt(&rhs.0 .1); 121 | Mask256((lo, hi)) 122 | } 123 | } 124 | 125 | impl Simd for Simd256i { 126 | const LANES: usize = 32; 127 | 128 | type Mask = Mask256; 129 | type Element = i8; 130 | 131 | #[inline(always)] 132 | unsafe fn loadu(ptr: *const u8) -> Self { 133 | let lo = Simd128i::loadu(ptr); 134 | let hi = Simd128i::loadu(ptr.add(Simd128i::LANES)); 135 | Simd256i((lo, hi)) 136 | } 137 | 138 | #[inline(always)] 139 | unsafe fn storeu(&self, ptr: *mut u8) { 140 | Simd128i::storeu(&self.0 .0, ptr); 141 | Simd128i::storeu(&self.0 .1, ptr.add(Simd128i::LANES)); 142 | } 143 | 144 | #[inline(always)] 145 | fn eq(&self, rhs: &Self) -> Self::Mask { 146 | let lo = self.0 .0.eq(&rhs.0 .0); 147 | let hi = self.0 .1.eq(&rhs.0 .1); 148 | Mask256((lo, hi)) 149 | } 150 | 151 | #[inline(always)] 152 | fn splat(elem: i8) -> Self { 153 | Simd256i((Simd128i::splat(elem), Simd128i::splat(elem))) 154 | } 155 | 156 | #[inline(always)] 157 | fn le(&self, rhs: &Self) -> Self::Mask { 158 | let lo = self.0 .0.le(&rhs.0 .0); 159 | let hi = self.0 .1.le(&rhs.0 .1); 160 | Mask256((lo, hi)) 161 | } 162 | 163 | #[inline(always)] 164 | fn gt(&self, rhs: &Self) -> Self::Mask { 165 | let lo = self.0 .0.gt(&rhs.0 .0); 166 | let hi = self.0 .1.gt(&rhs.0 .1); 167 | Mask256((lo, hi)) 168 | } 169 | } 170 | -------------------------------------------------------------------------------- /src/serde/rawnumber.rs: -------------------------------------------------------------------------------- 1 | use ::serde::{ 2 | de, de::Visitor, ser::SerializeStruct, Deserialize, Deserializer, Serialize, Serializer, 3 | }; 4 | use ::std::fmt; 5 | use faststr::FastStr; 6 | 7 | use super::number::Number; 8 | use crate::{util::private::Sealed, Error, JsonNumberTrait}; 9 | 10 | /// Represents a JSON number with arbitrary precision, the underlying representation of a string, 11 | /// like as Golang `json.Number`. 12 | /// 13 | /// Example1: 14 | /// 15 | /// ``` 16 | /// use sonic_rs::RawNumber; 17 | /// 18 | /// use crate::sonic_rs::JsonNumberTrait; 19 | /// 20 | /// // RawNumber can be parsed from a JSON number text. 21 | /// let num: RawNumber = sonic_rs::from_str("123").unwrap(); 22 | /// assert_eq!(num.as_i64(), Some(123)); 23 | /// assert_eq!(num.as_str(), "123"); 24 | /// 25 | /// // RawNumber can be parsed from a JSON string text that contains a number. 26 | /// let num: RawNumber = 27 | /// sonic_rs::from_str("\"1.2333333333333333333333333333333333333333\"").unwrap(); 28 | /// assert_eq!(num.as_f64(), Some(1.2333333333333334)); 29 | /// assert_eq!(num.as_str(), "1.2333333333333333333333333333333333333333"); 30 | /// ``` 31 | #[derive(Clone, PartialEq, Eq, Hash, Debug)] 32 | pub struct RawNumber { 33 | n: FastStr, 34 | } 35 | 36 | impl RawNumber { 37 | pub(crate) fn new(s: &str) -> Self { 38 | Self { n: FastStr::new(s) } 39 | } 40 | 41 | pub(crate) fn from_faststr(n: FastStr) -> Self { 42 | Self { n } 43 | } 44 | 45 | /// as_str returns the underlying string representation of the number. 46 | pub fn as_str(&self) -> &str { 47 | self.n.as_str() 48 | } 49 | } 50 | 51 | pub(crate) const TOKEN: &str = "$sonic_rs::private::JsonNumber"; 52 | 53 | impl<'de> Deserialize<'de> for RawNumber { 54 | #[inline] 55 | fn deserialize(deserializer: D) -> Result 56 | where 57 | D: Deserializer<'de>, 58 | { 59 | struct JsonNumberVisitor; 60 | 61 | impl<'de> Visitor<'de> for JsonNumberVisitor { 62 | type Value = RawNumber; 63 | 64 | fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { 65 | formatter.write_str("a JSON number") 66 | } 67 | 68 | fn visit_borrowed_str(self, raw: &'de str) -> Result 69 | where 70 | E: de::Error, 71 | { 72 | Ok(RawNumber::new(raw)) 73 | } 74 | } 75 | 76 | deserializer.deserialize_newtype_struct(TOKEN, JsonNumberVisitor) 77 | } 78 | } 79 | 80 | impl Serialize for RawNumber { 81 | fn serialize(&self, serializer: S) -> Result 82 | where 83 | S: Serializer, 84 | { 85 | let mut s = serializer.serialize_struct(TOKEN, 1)?; 86 | s.serialize_field(TOKEN, &self.n)?; 87 | s.end() 88 | } 89 | } 90 | 91 | impl Sealed for RawNumber {} 92 | 93 | impl JsonNumberTrait for RawNumber { 94 | /// Returns true if the `Number` is an integer between `i64::MIN` and 95 | /// `i64::MAX`. 96 | /// 97 | /// For any Number on which `is_i64` returns true, `as_i64` is guaranteed to 98 | /// return the integer value. 99 | #[inline] 100 | fn is_i64(&self) -> bool { 101 | self.as_i64().is_some() 102 | } 103 | 104 | /// Returns true if the `Number` is an integer between zero and `u64::MAX`. 105 | /// 106 | /// For any Number on which `is_u64` returns true, `as_u64` is guaranteed to 107 | /// return the integer value. 108 | #[inline] 109 | fn is_u64(&self) -> bool { 110 | self.as_u64().is_some() 111 | } 112 | 113 | /// Returns true if the `Number` can be represented by f64. 114 | /// 115 | /// For any Number on which `is_f64` returns true, `as_f64` is guaranteed to 116 | /// return the floating point value. 117 | /// 118 | /// Currently this function returns true if and only if both `is_i64` and 119 | /// `is_u64` return false but this is not a guarantee in the future. 120 | #[inline] 121 | fn is_f64(&self) -> bool { 122 | self.as_f64().is_some() 123 | } 124 | 125 | /// If the `Number` is an integer, represent it as i64 if possible. Returns 126 | /// None otherwise. 127 | #[inline] 128 | fn as_i64(&self) -> Option { 129 | self.n.parse().ok() 130 | } 131 | 132 | /// If the `Number` is an integer, represent it as u64 if possible. Returns 133 | /// None otherwise. 134 | #[inline] 135 | fn as_u64(&self) -> Option { 136 | self.n.parse().ok() 137 | } 138 | 139 | /// Represents the number as finite f64 if possible. Returns None otherwise. 140 | #[inline] 141 | fn as_f64(&self) -> Option { 142 | self.n.parse::().ok().filter(|float| float.is_finite()) 143 | } 144 | } 145 | 146 | impl TryFrom for Number { 147 | type Error = Error; 148 | 149 | fn try_from(value: RawNumber) -> Result { 150 | let num: Number = crate::from_str(value.n.as_str())?; 151 | Ok(num) 152 | } 153 | } 154 | -------------------------------------------------------------------------------- /src/pointer/tree.rs: -------------------------------------------------------------------------------- 1 | use std::collections::HashMap; 2 | 3 | use faststr::FastStr; 4 | 5 | use crate::index::Index; 6 | 7 | /// PointerTree is designed for [`get_many`][`crate::get_many`] and 8 | /// [`get_many_unchecked`][`crate::get_many_unchecked`]. 9 | /// 10 | /// It is recommended to use `get_many` when you need to get multiple values from json. Instead of 11 | /// using `get` multiple times. 12 | /// 13 | /// # Examples 14 | /// 15 | /// ``` 16 | /// # use sonic_rs::pointer; 17 | /// # use sonic_rs::PointerTree; 18 | /// 19 | /// let json = r#" 20 | /// {"u": 123, "a": {"b" : {"c": [null, "found"]}}}"#; 21 | /// 22 | /// // build a pointer tree, representing multiple json path 23 | /// let mut tree = PointerTree::new(); 24 | /// 25 | /// tree.add_path(&["u"]); 26 | /// tree.add_path(&["unknown_key"]); 27 | /// tree.add_path(&pointer!["a", "b", "c", 1]); 28 | /// 29 | /// let nodes = unsafe { sonic_rs::get_many_unchecked(json, &tree) }; 30 | /// 31 | /// match nodes { 32 | /// Ok(vals) => { 33 | /// assert_eq!(vals[0].as_ref().unwrap().as_raw_str(), "123"); 34 | /// assert!(vals[1].is_none()); 35 | /// assert_eq!(vals[2].as_ref().unwrap().as_raw_str(), "\"found\""); 36 | /// for val in vals { 37 | /// match val { 38 | /// Some(_) => println!("{}", val.as_ref().unwrap().as_raw_str()), 39 | /// None => println!("None"), 40 | /// }; 41 | /// } 42 | /// } 43 | /// Err(e) => { 44 | /// println!("err: {:?}", e) 45 | /// } 46 | /// } 47 | /// ``` 48 | 49 | #[derive(Debug, Default)] 50 | pub struct PointerTree { 51 | // the count of path 52 | size: usize, 53 | // the root of tree 54 | pub(crate) root: PointerTreeNode, 55 | } 56 | 57 | impl PointerTree { 58 | /// Creat a empty tree. If `get_many` from empty tree, it will return the whole json. 59 | pub fn new() -> Self { 60 | Self::default() 61 | } 62 | 63 | /// we build tree and return value according by the order of path. 64 | /// Allow the repeated path. 65 | pub fn add_path(&mut self, path: Path) 66 | where 67 | Path::Item: Index, 68 | { 69 | self.root.add_path(path, self.size); 70 | self.size += 1; 71 | } 72 | 73 | /// the count of nodes 74 | pub fn size(&self) -> usize { 75 | self.size 76 | } 77 | } 78 | 79 | #[derive(Debug, Default)] 80 | pub(crate) enum PointerTreeInner { 81 | #[default] 82 | Empty, 83 | Key(MultiKey), 84 | Index(MultiIndex), 85 | } 86 | 87 | // Note: support the repeat path 88 | #[derive(Debug, Default)] 89 | pub(crate) struct PointerTreeNode { 90 | pub(crate) order: Vec, 91 | pub(crate) children: PointerTreeInner, 92 | } 93 | 94 | impl PointerTreeNode { 95 | pub fn add_path(&mut self, path: Path, order: usize) 96 | where 97 | Path::Item: Index, 98 | { 99 | let mut cur = self; 100 | let iter = path.into_iter(); 101 | for p in iter { 102 | if let Some(key) = p.as_key() { 103 | if matches!(cur.children, PointerTreeInner::Empty) { 104 | cur.children = PointerTreeInner::Key(HashMap::new()); 105 | } 106 | cur = cur.insert_key(key) 107 | } else if let Some(index) = p.as_index() { 108 | if matches!(cur.children, PointerTreeInner::Empty) { 109 | cur.children = PointerTreeInner::Index(HashMap::new()); 110 | } 111 | cur = cur.insert_index(index) 112 | } 113 | } 114 | cur.order.push(order); 115 | } 116 | 117 | fn insert_key(&mut self, key: &str) -> &mut Self { 118 | if let PointerTreeInner::Key(mkey) = &mut self.children { 119 | mkey.entry(FastStr::new(key)).or_insert(Self::default()) 120 | } else { 121 | unreachable!() 122 | } 123 | } 124 | 125 | fn insert_index(&mut self, idx: usize) -> &mut Self { 126 | if let PointerTreeInner::Index(midx) = &mut self.children { 127 | midx.entry(idx).or_insert(Self::default()) 128 | } else { 129 | unreachable!() 130 | } 131 | } 132 | } 133 | 134 | #[allow(clippy::mutable_key_type)] 135 | pub(crate) type MultiKey = HashMap; 136 | 137 | pub(crate) type MultiIndex = HashMap; 138 | 139 | #[cfg(test)] 140 | mod test { 141 | use super::*; 142 | use crate::pointer; 143 | 144 | #[test] 145 | fn test_tree() { 146 | let mut tree = PointerTree::default(); 147 | tree.add_path(["a", "a_b", "a_b_c"].iter()); 148 | tree.add_path(["a", "a_b"].iter()); 149 | tree.add_path(pointer!["a", "a_a", 1].iter()); 150 | tree.add_path(pointer!["a"].iter()); 151 | tree.add_path(pointer!["a"].iter()); 152 | tree.add_path(pointer!["b", 2].iter()); 153 | tree.add_path(pointer![].iter()); 154 | assert_eq!(tree.size(), 7); 155 | println!("tree is {tree:#?}"); 156 | } 157 | } 158 | -------------------------------------------------------------------------------- /sonic-simd/src/v512.rs: -------------------------------------------------------------------------------- 1 | use std::ops::{BitAnd, BitOr, BitOrAssign}; 2 | 3 | use super::{Mask, Mask256, Simd, Simd256i, Simd256u}; 4 | 5 | #[derive(Debug)] 6 | #[repr(transparent)] 7 | pub struct Simd512u((Simd256u, Simd256u)); 8 | 9 | #[derive(Debug)] 10 | #[repr(transparent)] 11 | pub struct Simd512i((Simd256i, Simd256i)); 12 | 13 | #[derive(Debug)] 14 | #[repr(transparent)] 15 | pub struct Mask512((Mask256, Mask256)); 16 | 17 | impl Mask for Mask512 { 18 | type BitMask = u64; 19 | type Element = u8; 20 | 21 | #[inline(always)] 22 | fn bitmask(self) -> Self::BitMask { 23 | cfg_if::cfg_if! { 24 | if #[cfg(all(target_feature="neon", target_arch="aarch64"))] { 25 | let (v0, v1) = self.0; 26 | let (m0, m1) = v0.0; 27 | let (m2, m3) = v1.0; 28 | unsafe { super::neon::to_bitmask64(m0.0, m1.0, m2.0, m3.0) } 29 | } else { 30 | fn combine_u32(lo: u32, hi: u32) -> u64 { 31 | #[cfg(target_endian = "little")] 32 | { 33 | (lo as u64) | ((hi as u64) << 32) 34 | } 35 | #[cfg(target_endian = "big")] 36 | { 37 | (hi as u64) | ((lo as u64) << 32) 38 | } 39 | } 40 | combine_u32(self.0 .0.bitmask(), self.0 .1.bitmask()) 41 | } 42 | } 43 | } 44 | 45 | #[inline(always)] 46 | fn splat(b: bool) -> Self { 47 | Mask512((Mask256::splat(b), Mask256::splat(b))) 48 | } 49 | } 50 | 51 | impl BitOr for Mask512 { 52 | type Output = Self; 53 | 54 | #[inline(always)] 55 | fn bitor(self, rhs: Self) -> Self::Output { 56 | let lo = self.0 .0 | rhs.0 .0; 57 | let hi = self.0 .1 | rhs.0 .1; 58 | Mask512((lo, hi)) 59 | } 60 | } 61 | 62 | impl BitOrAssign for Mask512 { 63 | #[inline(always)] 64 | fn bitor_assign(&mut self, rhs: Self) { 65 | self.0 .0 |= rhs.0 .0; 66 | self.0 .1 |= rhs.0 .1; 67 | } 68 | } 69 | 70 | impl BitAnd for Mask512 { 71 | type Output = Self; 72 | 73 | #[inline(always)] 74 | fn bitand(self, rhs: Mask512) -> Self::Output { 75 | let lo = self.0 .0 & rhs.0 .0; 76 | let hi = self.0 .1 & rhs.0 .1; 77 | Mask512((lo, hi)) 78 | } 79 | } 80 | 81 | impl Simd for Simd512u { 82 | const LANES: usize = 64; 83 | type Element = u8; 84 | type Mask = Mask512; 85 | 86 | #[inline(always)] 87 | unsafe fn loadu(ptr: *const u8) -> Self { 88 | let lo = Simd256u::loadu(ptr); 89 | let hi = Simd256u::loadu(ptr.add(Simd256u::LANES)); 90 | Simd512u((lo, hi)) 91 | } 92 | 93 | #[inline(always)] 94 | unsafe fn storeu(&self, ptr: *mut u8) { 95 | Simd256u::storeu(&self.0 .0, ptr); 96 | Simd256u::storeu(&self.0 .1, ptr.add(Simd256u::LANES)); 97 | } 98 | 99 | #[inline(always)] 100 | fn eq(&self, rhs: &Self) -> Self::Mask { 101 | let lo = self.0 .0.eq(&rhs.0 .0); 102 | let hi = self.0 .1.eq(&rhs.0 .1); 103 | Mask512((lo, hi)) 104 | } 105 | 106 | #[inline(always)] 107 | fn splat(ch: u8) -> Self { 108 | Simd512u((Simd256u::splat(ch), Simd256u::splat(ch))) 109 | } 110 | 111 | #[inline(always)] 112 | fn le(&self, rhs: &Self) -> Self::Mask { 113 | let lo = self.0 .0.le(&rhs.0 .0); 114 | let hi = self.0 .1.le(&rhs.0 .1); 115 | Mask512((lo, hi)) 116 | } 117 | 118 | #[inline(always)] 119 | fn gt(&self, rhs: &Self) -> Self::Mask { 120 | let lo = self.0 .0.gt(&rhs.0 .0); 121 | let hi = self.0 .1.gt(&rhs.0 .1); 122 | Mask512((lo, hi)) 123 | } 124 | } 125 | 126 | impl Simd for Simd512i { 127 | const LANES: usize = 64; 128 | type Element = i8; 129 | 130 | type Mask = Mask512; 131 | 132 | #[inline(always)] 133 | unsafe fn loadu(ptr: *const u8) -> Self { 134 | let lo = Simd256i::loadu(ptr); 135 | let hi = Simd256i::loadu(ptr.add(Simd256i::LANES)); 136 | Simd512i((lo, hi)) 137 | } 138 | 139 | #[inline(always)] 140 | unsafe fn storeu(&self, ptr: *mut u8) { 141 | Simd256i::storeu(&self.0 .0, ptr); 142 | Simd256i::storeu(&self.0 .1, ptr.add(Simd256i::LANES)); 143 | } 144 | 145 | #[inline(always)] 146 | fn eq(&self, rhs: &Self) -> Self::Mask { 147 | let lo = self.0 .0.eq(&rhs.0 .0); 148 | let hi = self.0 .1.eq(&rhs.0 .1); 149 | Mask512((lo, hi)) 150 | } 151 | 152 | #[inline(always)] 153 | fn splat(elem: i8) -> Self { 154 | Simd512i((Simd256i::splat(elem), Simd256i::splat(elem))) 155 | } 156 | 157 | #[inline(always)] 158 | fn le(&self, rhs: &Self) -> Self::Mask { 159 | let lo = self.0 .0.le(&rhs.0 .0); 160 | let hi = self.0 .1.le(&rhs.0 .1); 161 | Mask512((lo, hi)) 162 | } 163 | 164 | #[inline(always)] 165 | fn gt(&self, rhs: &Self) -> Self::Mask { 166 | let lo = self.0 .0.gt(&rhs.0 .0); 167 | let hi = self.0 .1.gt(&rhs.0 .1); 168 | Mask512((lo, hi)) 169 | } 170 | } 171 | -------------------------------------------------------------------------------- /sonic-simd/src/neon.rs: -------------------------------------------------------------------------------- 1 | use std::arch::aarch64::*; 2 | 3 | use super::{bits::NeonBits, Mask, Simd}; 4 | 5 | #[derive(Debug)] 6 | #[repr(transparent)] 7 | pub struct Simd128u(uint8x16_t); 8 | 9 | #[derive(Debug)] 10 | #[repr(transparent)] 11 | pub struct Simd128i(int8x16_t); 12 | 13 | impl Simd for Simd128u { 14 | const LANES: usize = 16; 15 | type Mask = Mask128; 16 | type Element = u8; 17 | 18 | #[inline(always)] 19 | unsafe fn loadu(ptr: *const u8) -> Self { 20 | Self(vld1q_u8(ptr)) 21 | } 22 | 23 | #[inline(always)] 24 | unsafe fn storeu(&self, ptr: *mut u8) { 25 | vst1q_u8(ptr, self.0); 26 | } 27 | 28 | #[inline(always)] 29 | fn eq(&self, lhs: &Self) -> Self::Mask { 30 | unsafe { Mask128(vceqq_u8(self.0, lhs.0)) } 31 | } 32 | 33 | #[inline(always)] 34 | fn splat(ch: u8) -> Self { 35 | unsafe { Self(vdupq_n_u8(ch)) } 36 | } 37 | 38 | // less or equal 39 | #[inline(always)] 40 | fn le(&self, lhs: &Self) -> Self::Mask { 41 | unsafe { Mask128(vcleq_u8(self.0, lhs.0)) } 42 | } 43 | 44 | // greater than 45 | #[inline(always)] 46 | fn gt(&self, lhs: &Self) -> Self::Mask { 47 | unsafe { Mask128(vcgtq_u8(self.0, lhs.0)) } 48 | } 49 | } 50 | 51 | impl Simd for Simd128i { 52 | const LANES: usize = 16; 53 | type Mask = Mask128; 54 | type Element = i8; 55 | 56 | #[inline(always)] 57 | unsafe fn loadu(ptr: *const u8) -> Self { 58 | Self(vld1q_s8(ptr as *const i8)) 59 | } 60 | 61 | #[inline(always)] 62 | unsafe fn storeu(&self, ptr: *mut u8) { 63 | vst1q_s8(ptr as *mut i8, self.0); 64 | } 65 | 66 | #[inline(always)] 67 | fn eq(&self, lhs: &Self) -> Self::Mask { 68 | unsafe { Mask128(vceqq_s8(self.0, lhs.0)) } 69 | } 70 | 71 | #[inline(always)] 72 | fn splat(elem: i8) -> Self { 73 | unsafe { Self(vdupq_n_s8(elem)) } 74 | } 75 | 76 | // less or equal 77 | #[inline(always)] 78 | fn le(&self, lhs: &Self) -> Self::Mask { 79 | unsafe { Mask128(vcleq_s8(self.0, lhs.0)) } 80 | } 81 | 82 | // greater than 83 | #[inline(always)] 84 | fn gt(&self, lhs: &Self) -> Self::Mask { 85 | unsafe { Mask128(vcgtq_s8(self.0, lhs.0)) } 86 | } 87 | } 88 | 89 | pub(crate) const BIT_MASK_TAB: [u8; 16] = [ 90 | 0x01u8, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, 91 | ]; 92 | 93 | #[derive(Debug)] 94 | #[repr(transparent)] 95 | pub struct Mask128(pub(crate) uint8x16_t); 96 | 97 | impl Mask for Mask128 { 98 | type BitMask = NeonBits; 99 | type Element = u8; 100 | 101 | /// Convert Mask Vector 0x00-ff-ff to Bits 0b0000-1111-1111 102 | /// Reference: https://community.arm.com/arm-community-blogs/b/infrastructure-solutions-blog/posts/porting-x86-vector-bitmask-optimizations-to-arm-neon 103 | #[inline(always)] 104 | fn bitmask(self) -> Self::BitMask { 105 | unsafe { 106 | let v16 = vreinterpretq_u16_u8(self.0); 107 | let sr4 = vshrn_n_u16(v16, 4); 108 | let v64 = vreinterpret_u64_u8(sr4); 109 | NeonBits::new(vget_lane_u64(v64, 0)) 110 | } 111 | } 112 | 113 | #[inline(always)] 114 | fn splat(b: bool) -> Self { 115 | let v: i8 = if b { -1 } else { 0 }; 116 | unsafe { Self(vdupq_n_u8(v as u8)) } 117 | } 118 | } 119 | 120 | // Bitwise AND for Mask128 121 | impl std::ops::BitAnd for Mask128 { 122 | type Output = Self; 123 | 124 | #[inline(always)] 125 | fn bitand(self, rhs: Mask128) -> Self::Output { 126 | unsafe { Self(vandq_u8(self.0, rhs.0)) } 127 | } 128 | } 129 | 130 | // Bitwise OR for Mask128 131 | impl std::ops::BitOr for Mask128 { 132 | type Output = Self; 133 | 134 | #[inline(always)] 135 | fn bitor(self, rhs: Mask128) -> Self::Output { 136 | unsafe { Self(vorrq_u8(self.0, rhs.0)) } 137 | } 138 | } 139 | 140 | // Bitwise OR assignment for Mask128 141 | impl std::ops::BitOrAssign for Mask128 { 142 | #[inline(always)] 143 | fn bitor_assign(&mut self, rhs: Mask128) { 144 | unsafe { 145 | self.0 = vorrq_u8(self.0, rhs.0); 146 | } 147 | } 148 | } 149 | 150 | #[inline(always)] 151 | pub unsafe fn to_bitmask64(v0: uint8x16_t, v1: uint8x16_t, v2: uint8x16_t, v3: uint8x16_t) -> u64 { 152 | let bit_mask = std::mem::transmute::<[u8; 16], uint8x16_t>(BIT_MASK_TAB); 153 | 154 | let t0 = vandq_u8(v0, bit_mask); 155 | let t1 = vandq_u8(v1, bit_mask); 156 | let t2 = vandq_u8(v2, bit_mask); 157 | let t3 = vandq_u8(v3, bit_mask); 158 | 159 | let pair0 = vpaddq_u8(t0, t1); 160 | let pair1 = vpaddq_u8(t2, t3); 161 | let quad = vpaddq_u8(pair0, pair1); 162 | let octa = vpaddq_u8(quad, quad); 163 | 164 | vgetq_lane_u64(vreinterpretq_u64_u8(octa), 0) 165 | } 166 | 167 | #[inline(always)] 168 | pub(crate) unsafe fn to_bitmask32(v0: uint8x16_t, v1: uint8x16_t) -> u32 { 169 | let bit_mask = std::mem::transmute::<[u8; 16], uint8x16_t>(BIT_MASK_TAB); 170 | 171 | let t0 = vandq_u8(v0, bit_mask); 172 | let t1 = vandq_u8(v1, bit_mask); 173 | 174 | let pair = vpaddq_u8(t0, t1); 175 | let quad = vpaddq_u8(pair, pair); 176 | let octa = vpaddq_u8(quad, quad); 177 | 178 | vgetq_lane_u32(vreinterpretq_u32_u8(octa), 0) 179 | } 180 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | We as members, contributors, and leaders pledge to make participation in our 6 | community a harassment-free experience for everyone, regardless of age, body 7 | size, visible or invisible disability, ethnicity, sex characteristics, gender 8 | identity and expression, level of experience, education, socio-economic status, 9 | nationality, personal appearance, race, religion, or sexual identity 10 | and orientation. 11 | 12 | We pledge to act and interact in ways that contribute to an open, welcoming, 13 | diverse, inclusive, and healthy community. 14 | 15 | ## Our Standards 16 | 17 | Examples of behavior that contributes to a positive environment for our 18 | community include: 19 | 20 | * Demonstrating empathy and kindness toward other people 21 | * Being respectful of differing opinions, viewpoints, and experiences 22 | * Giving and gracefully accepting constructive feedback 23 | * Accepting responsibility and apologizing to those affected by our mistakes, 24 | and learning from the experience 25 | * Focusing on what is best not just for us as individuals, but for the 26 | overall community 27 | 28 | Examples of unacceptable behavior include: 29 | 30 | * The use of sexualized language or imagery, and sexual attention or 31 | advances of any kind 32 | * Trolling, insulting or derogatory comments, and personal or political attacks 33 | * Public or private harassment 34 | * Publishing others' private information, such as a physical or email 35 | address, without their explicit permission 36 | * Other conduct which could reasonably be considered inappropriate in a 37 | professional setting 38 | 39 | ## Enforcement Responsibilities 40 | 41 | Community leaders are responsible for clarifying and enforcing our standards of 42 | acceptable behavior and will take appropriate and fair corrective action in 43 | response to any behavior that they deem inappropriate, threatening, offensive, 44 | or harmful. 45 | 46 | Community leaders have the right and responsibility to remove, edit, or reject 47 | comments, commits, code, wiki edits, issues, and other contributions that are 48 | not aligned to this Code of Conduct, and will communicate reasons for moderation 49 | decisions when appropriate. 50 | 51 | ## Scope 52 | 53 | This Code of Conduct applies within all community spaces, and also applies when 54 | an individual is officially representing the community in public spaces. 55 | Examples of representing our community include using an official e-mail address, 56 | posting via an official social media account, or acting as an appointed 57 | representative at an online or offline event. 58 | 59 | ## Enforcement 60 | 61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 62 | reported to the community leaders responsible for enforcement at 63 | conduct@cloudwego.io. 64 | All complaints will be reviewed and investigated promptly and fairly. 65 | 66 | All community leaders are obligated to respect the privacy and security of the 67 | reporter of any incident. 68 | 69 | ## Enforcement Guidelines 70 | 71 | Community leaders will follow these Community Impact Guidelines in determining 72 | the consequences for any action they deem in violation of this Code of Conduct: 73 | 74 | ### 1. Correction 75 | 76 | **Community Impact**: Use of inappropriate language or other behavior deemed 77 | unprofessional or unwelcome in the community. 78 | 79 | **Consequence**: A private, written warning from community leaders, providing 80 | clarity around the nature of the violation and an explanation of why the 81 | behavior was inappropriate. A public apology may be requested. 82 | 83 | ### 2. Warning 84 | 85 | **Community Impact**: A violation through a single incident or series 86 | of actions. 87 | 88 | **Consequence**: A warning with consequences for continued behavior. No 89 | interaction with the people involved, including unsolicited interaction with 90 | those enforcing the Code of Conduct, for a specified period of time. This 91 | includes avoiding interactions in community spaces as well as external channels 92 | like social media. Violating these terms may lead to a temporary or 93 | permanent ban. 94 | 95 | ### 3. Temporary Ban 96 | 97 | **Community Impact**: A serious violation of community standards, including 98 | sustained inappropriate behavior. 99 | 100 | **Consequence**: A temporary ban from any sort of interaction or public 101 | communication with the community for a specified period of time. No public or 102 | private interaction with the people involved, including unsolicited interaction 103 | with those enforcing the Code of Conduct, is allowed during this period. 104 | Violating these terms may lead to a permanent ban. 105 | 106 | ### 4. Permanent Ban 107 | 108 | **Community Impact**: Demonstrating a pattern of violation of community 109 | standards, including sustained inappropriate behavior, harassment of an 110 | individual, or aggression toward or disparagement of classes of individuals. 111 | 112 | **Consequence**: A permanent ban from any sort of public interaction within 113 | the community. 114 | 115 | ## Attribution 116 | 117 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], 118 | version 2.0, available at 119 | https://www.contributor-covenant.org/version/2/0/code_of_conduct.html. 120 | 121 | Community Impact Guidelines were inspired by [Mozilla's code of conduct 122 | enforcement ladder](https://github.com/mozilla/diversity). 123 | 124 | [homepage]: https://www.contributor-covenant.org 125 | 126 | For answers to common questions about this code of conduct, see the FAQ at 127 | https://www.contributor-covenant.org/faq. Translations are available at 128 | https://www.contributor-covenant.org/translations. 129 | -------------------------------------------------------------------------------- /benchmarks/benches/deserialize_struct.rs: -------------------------------------------------------------------------------- 1 | #[macro_use] 2 | extern crate criterion; 3 | 4 | use std::{fs::File, io::Read, str::from_utf8_unchecked}; 5 | 6 | use criterion::{criterion_group, BatchSize, Criterion, SamplingMode, Throughput}; 7 | use schema::{canada::Canada, citm_catalog::CitmCatalog, twitter::Twitter}; 8 | 9 | #[cfg(not(target_env = "msvc"))] 10 | #[global_allocator] 11 | static GLOBAL: jemallocator::Jemalloc = jemallocator::Jemalloc; 12 | 13 | fn serde_json_parse_struct<'de, T>(data: &'de [u8]) -> serde_json::Result 14 | where 15 | T: serde::Deserialize<'de>, 16 | { 17 | serde_json::from_slice::(data) 18 | } 19 | 20 | fn serde_json_parse_struct_from_str<'de, T>(data: &'de [u8]) -> serde_json::Result 21 | where 22 | T: serde::Deserialize<'de>, 23 | { 24 | let data = unsafe { from_utf8_unchecked(data) }; 25 | serde_json::from_str::(data) 26 | } 27 | 28 | fn sonic_rs_parse_struct<'de, T>(data: &'de [u8]) -> sonic_rs::Result 29 | where 30 | T: serde::Deserialize<'de>, 31 | { 32 | sonic_rs::from_slice::(data) 33 | } 34 | 35 | fn sonic_rs_parse_struct_unchecked<'de, T>(data: &'de [u8]) -> sonic_rs::Result 36 | where 37 | T: serde::Deserialize<'de>, 38 | { 39 | unsafe { sonic_rs::from_slice_unchecked::(data) } 40 | } 41 | 42 | fn simd_json_parse_struct<'de, T>(data: &'de mut [u8]) -> simd_json::Result 43 | where 44 | T: serde::Deserialize<'de>, 45 | { 46 | simd_json::serde::from_slice::(data) 47 | } 48 | 49 | fn diff_json(got: &str, expect: &str) -> bool { 50 | let value1: serde_json::Value = serde_json::from_str(got).unwrap(); 51 | let value2: serde_json::Value = serde_json::from_str(expect).unwrap(); 52 | 53 | fn write_to(file: &str, data: &str) -> std::io::Result<()> { 54 | use std::io::Write; 55 | let mut file = std::fs::File::create(file)?; 56 | file.write_all(data.as_bytes())?; 57 | Ok(()) 58 | } 59 | 60 | if value1 != value2 { 61 | write_to("got.json", got).unwrap(); 62 | write_to("expect.json", expect).unwrap(); 63 | false 64 | } else { 65 | true 66 | } 67 | } 68 | 69 | macro_rules! bench_file { 70 | (json: $name:ident, structure: $structure:ty) => { 71 | paste::item! { 72 | #[allow(non_snake_case)] 73 | fn [< bench_ $name _ $structure >](c: &mut Criterion) { 74 | let core_ids = core_affinity::get_core_ids().unwrap(); 75 | core_affinity::set_for_current(core_ids[0]); 76 | 77 | let mut vec = Vec::new(); 78 | let root = env!("CARGO_MANIFEST_DIR").to_owned(); 79 | File::open(root + concat!("/benches/testdata/", stringify!($name), ".json")) 80 | .unwrap() 81 | .read_to_end(&mut vec) 82 | .unwrap(); 83 | 84 | // verify sonic-rs parse 85 | let serde_val: $structure = serde_json::from_slice(&vec).unwrap(); 86 | let serde_out = serde_json::to_string_pretty(&serde_val).unwrap(); 87 | 88 | let value : $structure = sonic_rs::from_slice(&vec).unwrap(); 89 | let out = sonic_rs::to_string_pretty(&value).unwrap(); 90 | assert!(diff_json(&out, &serde_out)); 91 | 92 | let mut group = c.benchmark_group(stringify!($name)); 93 | group.sampling_mode(SamplingMode::Flat); 94 | 95 | group.bench_with_input("sonic_rs::from_slice_unchecked", &vec, |b, data| { 96 | b.iter_batched( 97 | || data, 98 | |bytes| sonic_rs_parse_struct_unchecked::<$structure>(&bytes), 99 | BatchSize::SmallInput, 100 | ) 101 | }); 102 | 103 | group.bench_with_input("sonic_rs::from_slice", &vec, |b, data| { 104 | b.iter_batched( 105 | || data, 106 | |bytes| sonic_rs_parse_struct::<$structure>(&bytes), 107 | BatchSize::SmallInput, 108 | ) 109 | }); 110 | 111 | group.bench_with_input("simd_json::from_slice", &vec, |b, data| { 112 | b.iter_batched( 113 | || data.clone(), 114 | |mut bytes| simd_json_parse_struct::<$structure>(&mut bytes), 115 | BatchSize::SmallInput, 116 | ) 117 | }); 118 | 119 | group.bench_with_input("serde_json::from_slice", &vec, |b, data| { 120 | b.iter_batched( 121 | || data, 122 | |bytes| serde_json_parse_struct::<$structure>(&bytes), 123 | BatchSize::SmallInput, 124 | ) 125 | }); 126 | 127 | group.bench_with_input("serde_json::from_str", &vec, |b, data| { 128 | b.iter_batched( 129 | || data, 130 | |bytes| serde_json_parse_struct_from_str::<$structure>(&bytes), 131 | BatchSize::SmallInput, 132 | ) 133 | }); 134 | 135 | group.throughput(Throughput::Bytes(vec.len() as u64)); 136 | } 137 | } 138 | }; 139 | } 140 | 141 | bench_file!( 142 | json: twitter, 143 | structure: Twitter 144 | ); 145 | bench_file!( 146 | json: canada, 147 | structure: Canada 148 | ); 149 | bench_file!( 150 | json: citm_catalog, 151 | structure: CitmCatalog 152 | ); 153 | 154 | criterion_group!( 155 | benches, 156 | bench_twitter_Twitter, 157 | bench_citm_catalog_CitmCatalog, 158 | bench_canada_Canada, 159 | ); 160 | criterion_main!(benches); 161 | -------------------------------------------------------------------------------- /src/writer.rs: -------------------------------------------------------------------------------- 1 | //! Extend trait from io::Write for JSON serializing. 2 | 3 | use std::{io, io::BufWriter as IoBufWriter, mem::MaybeUninit, slice::from_raw_parts_mut}; 4 | 5 | use bytes::{buf::Writer, BytesMut}; 6 | 7 | /// The trait is a extension to [`io::Write`] with a reserved capacity. 8 | pub trait WriteExt: io::Write { 9 | /// Reserve with `additional` capacity and returns the remaining spare capacity of the write as 10 | /// a slice of `MaybeUninit`. 11 | /// 12 | /// The returned slice will be used to write new data before marking the data as initialized 13 | /// using the [`WriteExt::flush_len`] method. 14 | fn reserve_with(&mut self, additional: usize) -> io::Result<&mut [MaybeUninit]>; 15 | 16 | /// Flush the `additional` length to the output stream, ensuring that `additional` bytes 17 | /// intermediately buffered contents reach their destination. 18 | /// 19 | /// # Safety 20 | /// 21 | /// Must be used after `reserve_with` 22 | unsafe fn flush_len(&mut self, additional: usize) -> io::Result<()>; 23 | } 24 | 25 | /// Wrapper around generic I/O streams implementing [`WriteExt`] 26 | /// 27 | /// It internally maintains a buffer for fast operations which it then flushes 28 | /// to the underlying I/O stream when requested. 29 | pub struct BufferedWriter { 30 | inner: W, 31 | buffer: Vec, 32 | } 33 | 34 | impl BufferedWriter { 35 | /// Construct a new buffered writer 36 | pub fn new(inner: W) -> Self { 37 | Self { 38 | inner, 39 | buffer: Vec::new(), 40 | } 41 | } 42 | } 43 | 44 | impl io::Write for BufferedWriter 45 | where 46 | W: io::Write, 47 | { 48 | #[inline(always)] 49 | fn write(&mut self, buf: &[u8]) -> io::Result { 50 | self.inner.write(buf) 51 | } 52 | 53 | #[inline(always)] 54 | fn flush(&mut self) -> io::Result<()> { 55 | self.inner.flush() 56 | } 57 | } 58 | 59 | impl WriteExt for BufferedWriter 60 | where 61 | W: io::Write, 62 | { 63 | #[inline(always)] 64 | fn reserve_with(&mut self, additional: usize) -> io::Result<&mut [MaybeUninit]> { 65 | self.buffer.reserve_with(additional) 66 | } 67 | 68 | #[inline(always)] 69 | unsafe fn flush_len(&mut self, additional: usize) -> io::Result<()> { 70 | self.buffer.flush_len(additional)?; 71 | self.inner.write_all(&self.buffer)?; 72 | self.buffer.clear(); 73 | 74 | Ok(()) 75 | } 76 | } 77 | 78 | impl WriteExt for Vec { 79 | #[inline(always)] 80 | fn reserve_with(&mut self, additional: usize) -> io::Result<&mut [MaybeUninit]> { 81 | self.reserve(additional); 82 | unsafe { 83 | let ptr = self.as_mut_ptr().add(self.len()) as *mut MaybeUninit; 84 | Ok(from_raw_parts_mut(ptr, additional)) 85 | } 86 | } 87 | 88 | #[inline(always)] 89 | unsafe fn flush_len(&mut self, additional: usize) -> io::Result<()> { 90 | unsafe { 91 | let new_len = self.len() + additional; 92 | self.set_len(new_len); 93 | } 94 | 95 | Ok(()) 96 | } 97 | } 98 | 99 | impl WriteExt for Writer { 100 | #[inline(always)] 101 | unsafe fn flush_len(&mut self, additional: usize) -> io::Result<()> { 102 | let new_len = self.get_ref().len() + additional; 103 | self.get_mut().set_len(new_len); 104 | Ok(()) 105 | } 106 | 107 | #[inline(always)] 108 | fn reserve_with(&mut self, additional: usize) -> io::Result<&mut [MaybeUninit]> { 109 | self.get_mut().reserve(additional); 110 | unsafe { 111 | let ptr = self.get_mut().as_mut_ptr().add(self.get_ref().len()) as *mut MaybeUninit; 112 | Ok(from_raw_parts_mut(ptr, additional)) 113 | } 114 | } 115 | } 116 | 117 | impl WriteExt for Writer<&mut BytesMut> { 118 | #[inline(always)] 119 | unsafe fn flush_len(&mut self, additional: usize) -> io::Result<()> { 120 | let new_len = self.get_ref().len() + additional; 121 | self.get_mut().set_len(new_len); 122 | Ok(()) 123 | } 124 | 125 | #[inline(always)] 126 | fn reserve_with(&mut self, additional: usize) -> io::Result<&mut [MaybeUninit]> { 127 | self.get_mut().reserve(additional); 128 | unsafe { 129 | let ptr = self.get_mut().as_mut_ptr().add(self.get_ref().len()) as *mut MaybeUninit; 130 | Ok(from_raw_parts_mut(ptr, additional)) 131 | } 132 | } 133 | } 134 | 135 | impl WriteExt for IoBufWriter { 136 | fn reserve_with(&mut self, additional: usize) -> io::Result<&mut [MaybeUninit]> { 137 | self.get_mut().reserve_with(additional) 138 | } 139 | 140 | unsafe fn flush_len(&mut self, additional: usize) -> io::Result<()> { 141 | self.get_mut().flush_len(additional) 142 | } 143 | } 144 | 145 | impl WriteExt for &mut W { 146 | #[inline(always)] 147 | unsafe fn flush_len(&mut self, additional: usize) -> io::Result<()> { 148 | (*self).flush_len(additional) 149 | } 150 | 151 | #[inline(always)] 152 | fn reserve_with(&mut self, additional: usize) -> io::Result<&mut [MaybeUninit]> { 153 | (*self).reserve_with(additional) 154 | } 155 | } 156 | 157 | impl WriteExt for Box { 158 | #[inline(always)] 159 | unsafe fn flush_len(&mut self, additional: usize) -> io::Result<()> { 160 | (**self).flush_len(additional) 161 | } 162 | 163 | #[inline(always)] 164 | fn reserve_with(&mut self, additional: usize) -> io::Result<&mut [MaybeUninit]> { 165 | (**self).reserve_with(additional) 166 | } 167 | } 168 | 169 | #[cfg(test)] 170 | mod test { 171 | use std::io::Write; 172 | 173 | use bytes::{BufMut, BytesMut}; 174 | 175 | use crate::writer::WriteExt; 176 | 177 | #[test] 178 | fn test_writer() { 179 | let buffer = BytesMut::new(); 180 | let writer = &mut buffer.writer(); 181 | 182 | let buf = writer.reserve_with(20).unwrap_or_default(); 183 | assert_eq!(buf.len(), 20); 184 | assert_eq!(writer.get_ref().capacity(), 20); 185 | 186 | let data = b"Hello, World!"; 187 | writer.write_all(&data[..]).unwrap(); 188 | assert_eq!(writer.get_ref().capacity(), 20); 189 | assert_eq!(writer.get_ref().as_ref(), &data[..]); 190 | } 191 | } 192 | -------------------------------------------------------------------------------- /benchmarks/benches/schema/src/twitter.rs: -------------------------------------------------------------------------------- 1 | use serde::{Deserialize, Serialize}; 2 | 3 | use crate::{color::Color, empty, enum_str, prim_str::PrimStr}; 4 | 5 | #[derive(Serialize, Deserialize)] 6 | #[serde(deny_unknown_fields)] 7 | pub struct Twitter { 8 | pub statuses: Vec, 9 | pub search_metadata: SearchMetadata, 10 | } 11 | 12 | pub type LongId = u64; 13 | pub type ShortId = u32; 14 | pub type LongIdStr = PrimStr; 15 | pub type ShortIdStr = PrimStr; 16 | 17 | #[derive(Serialize, Deserialize)] 18 | #[serde(deny_unknown_fields)] 19 | pub struct Status { 20 | pub metadata: Metadata, 21 | pub created_at: String, 22 | pub id: LongId, 23 | pub id_str: LongIdStr, 24 | pub text: String, 25 | pub source: String, 26 | pub truncated: bool, 27 | pub in_reply_to_status_id: Option, 28 | pub in_reply_to_status_id_str: Option, 29 | pub in_reply_to_user_id: Option, 30 | pub in_reply_to_user_id_str: Option, 31 | pub in_reply_to_screen_name: Option, 32 | pub user: User, 33 | pub geo: (), 34 | pub coordinates: (), 35 | pub place: (), 36 | pub contributors: (), 37 | pub retweeted_status: Option>, 38 | pub retweet_count: u32, 39 | pub favorite_count: u32, 40 | pub entities: StatusEntities, 41 | pub favorited: bool, 42 | pub retweeted: bool, 43 | pub possibly_sensitive: Option, 44 | pub lang: LanguageCode, 45 | } 46 | 47 | #[derive(Serialize, Deserialize)] 48 | #[serde(deny_unknown_fields)] 49 | pub struct Metadata { 50 | pub result_type: ResultType, 51 | pub iso_language_code: LanguageCode, 52 | } 53 | 54 | #[derive(Serialize, Deserialize)] 55 | #[serde(deny_unknown_fields)] 56 | pub struct User { 57 | pub id: ShortId, 58 | pub id_str: ShortIdStr, 59 | pub name: String, 60 | pub screen_name: String, 61 | pub location: String, 62 | pub description: String, 63 | pub url: Option, 64 | pub entities: UserEntities, 65 | pub protected: bool, 66 | pub followers_count: u32, 67 | pub friends_count: u32, 68 | pub listed_count: u32, 69 | pub created_at: String, 70 | pub favourites_count: u32, 71 | pub utc_offset: Option, 72 | pub time_zone: Option, 73 | pub geo_enabled: bool, 74 | pub verified: bool, 75 | pub statuses_count: u32, 76 | pub lang: LanguageCode, 77 | pub contributors_enabled: bool, 78 | pub is_translator: bool, 79 | pub is_translation_enabled: bool, 80 | pub profile_background_color: Color, 81 | pub profile_background_image_url: String, 82 | pub profile_background_image_url_https: String, 83 | pub profile_background_tile: bool, 84 | pub profile_image_url: String, 85 | pub profile_image_url_https: String, 86 | pub profile_banner_url: Option, 87 | pub profile_link_color: Color, 88 | pub profile_sidebar_border_color: Color, 89 | pub profile_sidebar_fill_color: Color, 90 | pub profile_text_color: Color, 91 | pub profile_use_background_image: bool, 92 | pub default_profile: bool, 93 | pub default_profile_image: bool, 94 | pub following: bool, 95 | pub follow_request_sent: bool, 96 | pub notifications: bool, 97 | } 98 | 99 | #[derive(Serialize, Deserialize)] 100 | #[serde(deny_unknown_fields)] 101 | pub struct UserEntities { 102 | pub url: Option, 103 | pub description: UserEntitiesDescription, 104 | } 105 | 106 | #[derive(Serialize, Deserialize)] 107 | #[serde(deny_unknown_fields)] 108 | pub struct UserUrl { 109 | pub urls: Vec, 110 | } 111 | 112 | #[derive(Serialize, Deserialize)] 113 | #[serde(deny_unknown_fields)] 114 | pub struct Url { 115 | pub url: String, 116 | pub expanded_url: String, 117 | pub display_url: String, 118 | pub indices: Indices, 119 | } 120 | 121 | #[derive(Serialize, Deserialize)] 122 | #[serde(deny_unknown_fields)] 123 | pub struct UserEntitiesDescription { 124 | pub urls: Vec, 125 | } 126 | 127 | #[derive(Serialize, Deserialize)] 128 | #[serde(deny_unknown_fields)] 129 | pub struct StatusEntities { 130 | pub hashtags: Vec, 131 | pub symbols: empty::Array, 132 | pub urls: Vec, 133 | pub user_mentions: Vec, 134 | pub media: Option>, 135 | } 136 | 137 | #[derive(Serialize, Deserialize)] 138 | #[serde(deny_unknown_fields)] 139 | pub struct Hashtag { 140 | pub text: String, 141 | pub indices: Indices, 142 | } 143 | 144 | #[derive(Serialize, Deserialize)] 145 | #[serde(deny_unknown_fields)] 146 | pub struct UserMention { 147 | pub screen_name: String, 148 | pub name: String, 149 | pub id: ShortId, 150 | pub id_str: ShortIdStr, 151 | pub indices: Indices, 152 | } 153 | 154 | #[derive(Serialize, Deserialize)] 155 | #[serde(deny_unknown_fields)] 156 | pub struct Media { 157 | pub id: LongId, 158 | pub id_str: LongIdStr, 159 | pub indices: Indices, 160 | pub media_url: String, 161 | pub media_url_https: String, 162 | pub url: String, 163 | pub display_url: String, 164 | pub expanded_url: String, 165 | #[serde(rename = "type")] 166 | pub media_type: String, 167 | pub sizes: Sizes, 168 | pub source_status_id: Option, 169 | pub source_status_id_str: Option, 170 | } 171 | 172 | #[derive(Serialize, Deserialize)] 173 | #[serde(deny_unknown_fields)] 174 | pub struct Sizes { 175 | pub medium: Size, 176 | pub small: Size, 177 | pub thumb: Size, 178 | pub large: Size, 179 | } 180 | 181 | #[derive(Serialize, Deserialize)] 182 | #[serde(deny_unknown_fields)] 183 | pub struct Size { 184 | pub w: u16, 185 | pub h: u16, 186 | pub resize: Resize, 187 | } 188 | 189 | pub type Indices = (u8, u8); 190 | 191 | #[derive(Serialize, Deserialize)] 192 | #[serde(deny_unknown_fields)] 193 | pub struct SearchMetadata { 194 | pub completed_in: f32, 195 | pub max_id: LongId, 196 | pub max_id_str: LongIdStr, 197 | pub next_results: String, 198 | pub query: String, 199 | pub refresh_url: String, 200 | pub count: u8, 201 | pub since_id: LongId, 202 | pub since_id_str: LongIdStr, 203 | } 204 | 205 | enum_str!(Resize { 206 | Fit("fit"), 207 | Crop("crop"), 208 | }); 209 | 210 | enum_str!(LanguageCode { 211 | Cn("zh-cn"), 212 | En("en"), 213 | Es("es"), 214 | It("it"), 215 | Ja("ja"), 216 | Zh("zh"), 217 | }); 218 | 219 | enum_str!(ResultType { 220 | Recent("recent"), 221 | }); 222 | -------------------------------------------------------------------------------- /docs/benchmark_aarch64.md: -------------------------------------------------------------------------------- 1 | ## Benchmark in Apple M1 Pro 2 | 3 | ### Deserialize Struct 4 | 5 | ``` 6 | twitter/sonic_rs::from_slice_unchecked 7 | time: [436.54 µs 437.34 µs 438.22 µs] 8 | twitter/sonic_rs::from_slice 9 | time: [457.72 µs 459.11 µs 460.80 µs] 10 | twitter/simd_json::from_slice 11 | time: [424.34 µs 425.05 µs 425.92 µs] 12 | twitter/serde_json::from_slice 13 | time: [831.10 µs 832.50 µs 834.16 µs] 14 | twitter/serde_json::from_str 15 | time: [524.50 µs 525.55 µs 526.74 µs] 16 | 17 | citm_catalog/sonic_rs::from_slice_unchecked 18 | time: [854.49 µs 855.71 µs 857.15 µs] 19 | citm_catalog/sonic_rs::from_slice 20 | time: [892.97 µs 898.45 µs 904.43 µs] 21 | citm_catalog/simd_json::from_slice 22 | time: [831.27 µs 837.38 µs 843.78 µs] 23 | citm_catalog/serde_json::from_slice 24 | time: [1.3759 ms 1.3815 ms 1.3876 ms] 25 | citm_catalog/serde_json::from_str 26 | time: [1.1859 ms 1.1875 ms 1.1894 ms] 27 | 28 | canada/sonic_rs::from_slice_unchecked 29 | time: [3.1438 ms 3.1660 ms 3.1886 ms] 30 | canada/sonic_rs::from_slice 31 | time: [3.1151 ms 3.1357 ms 3.1566 ms] 32 | canada/simd_json::from_slice 33 | time: [3.2259 ms 3.2330 ms 3.2407 ms] 34 | canada/serde_json::from_slice 35 | time: [4.9878 ms 5.0213 ms 5.0568 ms] 36 | canada/serde_json::from_str 37 | time: [5.3256 ms 5.3714 ms 5.4191 ms] 38 | ``` 39 | 40 | ### Deserialize Untyped 41 | 42 | `cargo bench --bench deserialize_value -- --quiet "twitter|canada|citm_catalog"` 43 | 44 | ``` 45 | canada/sonic_rs_dom::from_slice 46 | time: [2.4394 ms 2.4495 ms 2.4606 ms] 47 | canada/sonic_rs_dom::from_slice_unchecked 48 | time: [2.3656 ms 2.3697 ms 2.3744 ms] 49 | canada/serde_json::from_slice 50 | time: [6.8682 ms 6.8864 ms 6.9067 ms] 51 | canada/serde_json::from_str 52 | time: [6.9604 ms 6.9907 ms 7.0223 ms] 53 | canada/simd_json::slice_to_owned_value 54 | time: [5.0212 ms 5.0402 ms 5.0602 ms] 55 | canada/simd_json::slice_to_borrowed_value 56 | time: [5.0442 ms 5.0661 ms 5.0885 ms] 57 | 58 | citm_catalog/sonic_rs_dom::from_slice 59 | time: [825.96 µs 827.98 µs 830.61 µs] 60 | citm_catalog/sonic_rs_dom::from_slice_unchecked 61 | time: [805.69 µs 807.07 µs 808.59 µs] 62 | citm_catalog/serde_json::from_slice 63 | time: [2.6804 ms 2.6872 ms 2.6942 ms] 64 | citm_catalog/serde_json::from_str 65 | time: [2.4323 ms 2.4372 ms 2.4423 ms] 66 | citm_catalog/simd_json::slice_to_owned_value 67 | time: [1.8281 ms 1.8348 ms 1.8418 ms] 68 | citm_catalog/simd_json::slice_to_borrowed_value 69 | time: [1.3757 ms 1.3796 ms 1.3848 ms] 70 | 71 | twitter/sonic_rs_dom::from_slice 72 | time: [380.30 µs 381.16 µs 382.14 µs] 73 | twitter/sonic_rs_dom::from_slice_unchecked 74 | time: [357.51 µs 358.07 µs 358.70 µs] 75 | twitter/serde_json::from_slice 76 | time: [1.5932 ms 1.5957 ms 1.5984 ms] 77 | twitter/serde_json::from_str 78 | time: [1.2584 ms 1.2636 ms 1.2689 ms] 79 | twitter/simd_json::slice_to_owned_value 80 | time: [892.94 µs 896.75 µs 900.67 µs] 81 | twitter/simd_json::slice_to_borrowed_value 82 | time: [622.22 µs 622.47 µs 622.73 µs] 83 | ``` 84 | 85 | ### Serialize Struct 86 | 87 | `cargo bench --bench serialize_struct -- --quiet` 88 | 89 | ``` 90 | twitter/sonic_rs::to_string 91 | time: [212.16 µs 213.44 µs 215.07 µs] 92 | twitter/simd_json::to_string 93 | time: [300.20 µs 303.13 µs 306.55 µs] 94 | twitter/serde_json::to_string 95 | time: [341.77 µs 343.50 µs 345.85 µs] 96 | 97 | canada/sonic_rs::to_string 98 | time: [2.3674 ms 2.3730 ms 2.3785 ms] 99 | canada/simd_json::to_string 100 | time: [2.9695 ms 2.9778 ms 2.9865 ms] 101 | canada/serde_json::to_string 102 | time: [2.3422 ms 2.3555 ms 2.3706 ms] 103 | 104 | citm_catalog/sonic_rs::to_string 105 | time: [325.60 µs 326.13 µs 326.71 µs] 106 | citm_catalog/simd_json::to_string 107 | time: [374.37 µs 374.97 µs 375.66 µs] 108 | citm_catalog/serde_json::to_string 109 | time: [431.37 µs 432.92 µs 434.81 µs] 110 | 111 | ``` 112 | 113 | ### Serialize Untyped 114 | 115 | `cargo bench --bench serialize_value -- --quiet` 116 | 117 | ``` 118 | twitter/sonic_rs::to_string 119 | time: [168.74 µs 168.98 µs 169.24 µs] 120 | twitter/serde_json::to_string 121 | time: [358.03 µs 358.89 µs 359.93 µs] 122 | twitter/simd_json::to_string 123 | time: [382.20 µs 383.01 µs 383.97 µs] 124 | 125 | citm_catalog/sonic_rs::to_string 126 | time: [336.69 µs 337.15 µs 337.66 µs] 127 | citm_catalog/serde_json::to_string 128 | time: [588.08 µs 594.31 µs 601.53 µs] 129 | citm_catalog/simd_json::to_string 130 | time: [814.63 µs 815.93 µs 817.37 µs] 131 | 132 | canada/sonic_rs::to_string 133 | time: [2.8751 ms 2.8912 ms 2.9102 ms] 134 | canada/serde_json::to_string 135 | time: [2.8237 ms 2.8298 ms 2.8357 ms] 136 | canada/simd_json::to_string 137 | time: [3.4206 ms 3.4268 ms 3.4335 ms] 138 | ``` 139 | 140 | ### Get from JSON 141 | 142 | `cargo bench --bench get_from -- --quiet` 143 | 144 | The benchmark is getting a specific field from the twitter JSON. 145 | 146 | - sonic-rs::get_unchecked_from_str: without validate 147 | - sonic-rs::get_from_str: with validate 148 | - gjson::get_from_str: without validate 149 | 150 | Sonic-rs utilize SIMD to quickly skip unnecessary fields in the unchecked case, thus enhancing the performance. 151 | 152 | ``` 153 | twitter/sonic-rs::get_unchecked_from_str 154 | time: [51.211 µs 51.285 µs 51.363 µs] 155 | twitter/sonic-rs::get_from_str 156 | time: [374.21 µs 376.41 µs 379.08 µs] 157 | twitter/gjson::get_from_str 158 | time: [159.11 µs 159.39 µs 159.69 µs] 159 | ``` 160 | 161 | -------------------------------------------------------------------------------- /benchmarks/benches/deserialize_value.rs: -------------------------------------------------------------------------------- 1 | #[macro_use] 2 | extern crate criterion; 3 | 4 | use std::{fs::File, io::Read, str::from_utf8_unchecked}; 5 | 6 | use criterion::{criterion_group, BatchSize, Criterion, SamplingMode, Throughput}; 7 | 8 | include!("./common.rs"); 9 | 10 | #[cfg(not(target_env = "msvc"))] 11 | #[global_allocator] 12 | static GLOBAL: jemallocator::Jemalloc = jemallocator::Jemalloc; 13 | 14 | fn simdjson_to_borrowed_value(data: &mut [u8]) { 15 | let _ = simd_json::to_borrowed_value(data).unwrap(); 16 | } 17 | 18 | fn simdjson_to_owned_value(data: &mut [u8]) { 19 | let _ = simd_json::to_owned_value(data).unwrap(); 20 | } 21 | 22 | fn serde_from_slice(data: &[u8]) { 23 | let _: serde_json::Value = serde_json::from_slice(data).unwrap(); 24 | } 25 | 26 | fn serde_from_str(data: &[u8]) { 27 | let data = unsafe { from_utf8_unchecked(data) }; 28 | let _: serde_json::Value = serde_json::from_str(data).unwrap(); 29 | } 30 | 31 | fn sonic_rs_from_slice(data: &[u8], cfg: SonicConfig) { 32 | let _: sonic_rs::Value = do_sonic_rs_from_slice(data, cfg).unwrap(); 33 | } 34 | 35 | fn sonic_rs_from_slice_unchecked(data: &[u8]) { 36 | let _: sonic_rs::Value = unsafe { sonic_rs::from_slice_unchecked(data).unwrap() }; 37 | } 38 | 39 | // fn sonic_rs_skip_one(data: &[u8]) { 40 | // unsafe { 41 | // let data = from_utf8_unchecked(data); 42 | // let empty: &[&str] = &[]; 43 | // let _ = sonic_rs::get_unchecked(data, empty).unwrap(); 44 | // } 45 | // } 46 | 47 | fn sonic_rs_to_serdejson_value(data: &[u8]) { 48 | let _: serde_json::Value = sonic_rs::from_slice(data).unwrap(); 49 | } 50 | 51 | fn sonic_rs_to_simdjson_value(data: &[u8]) { 52 | let _: simd_json::OwnedValue = sonic_rs::from_slice(data).unwrap(); 53 | } 54 | 55 | macro_rules! bench_file { 56 | ($name:ident) => { 57 | #[allow(unused)] 58 | fn $name(c: &mut Criterion) { 59 | let core_ids = core_affinity::get_core_ids().unwrap(); 60 | core_affinity::set_for_current(core_ids[0]); 61 | 62 | let mut vec = Vec::new(); 63 | let root = env!("CARGO_MANIFEST_DIR").to_owned(); 64 | File::open(root + concat!("/benches/testdata/", stringify!($name), ".json")) 65 | .unwrap() 66 | .read_to_end(&mut vec) 67 | .unwrap(); 68 | 69 | // verify sonic-rs parse 70 | let serde_out: serde_json::Value = serde_json::from_slice(&vec).unwrap(); 71 | 72 | let value: sonic_rs::Value = sonic_rs::from_slice(&vec).unwrap(); 73 | let out = sonic_rs::to_string(&value).unwrap(); 74 | let rs_out1: serde_json::Value = serde_json::from_str(&out).unwrap(); 75 | assert_eq!(rs_out1, serde_out); 76 | 77 | let mut group = c.benchmark_group(stringify!($name)); 78 | group.sampling_mode(SamplingMode::Flat); 79 | 80 | group.bench_with_input("sonic_rs_dom::from_slice", &vec, |b, data| { 81 | b.iter_batched( 82 | || data, 83 | |bytes| sonic_rs_from_slice(&bytes, SONIC_DEFAULT_CFG), 84 | BatchSize::SmallInput, 85 | ) 86 | }); 87 | 88 | group.bench_with_input("sonic_rs_dom::from_slice_use_rawnum", &vec, |b, data| { 89 | b.iter_batched( 90 | || data, 91 | |bytes| sonic_rs_from_slice(&bytes, SONIC_USE_RAWNUM_CFG), 92 | BatchSize::SmallInput, 93 | ) 94 | }); 95 | 96 | group.bench_with_input("sonic_rs_dom::from_slice_unchecked", &vec, |b, data| { 97 | b.iter_batched( 98 | || data, 99 | |bytes| sonic_rs_from_slice_unchecked(&bytes), 100 | BatchSize::SmallInput, 101 | ) 102 | }); 103 | 104 | group.bench_with_input( 105 | "sonic_rs_to_serde_json_value::from_slice_unchecked", 106 | &vec, 107 | |b, data| { 108 | b.iter_batched( 109 | || data, 110 | |bytes| sonic_rs_to_serdejson_value(&bytes), 111 | BatchSize::SmallInput, 112 | ) 113 | }, 114 | ); 115 | 116 | group.bench_with_input( 117 | "sonic_rs_to_simd_json_value::from_slice_unchecked", 118 | &vec, 119 | |b, data| { 120 | b.iter_batched( 121 | || data, 122 | |bytes| sonic_rs_to_simdjson_value(&bytes), 123 | BatchSize::SmallInput, 124 | ) 125 | }, 126 | ); 127 | 128 | // group.bench_with_input("sonic_rs::skip_one", &vec, |b, data| { 129 | // b.iter_batched( 130 | // || data, 131 | // |bytes| sonic_rs_skip_one(&bytes), 132 | // BatchSize::SmallInput, 133 | // ) 134 | // }); 135 | 136 | // group.bench_with_input("sonic_rs::to_serdejson_value", &vec, |b, data| { 137 | // b.iter_batched( 138 | // || data, 139 | // |bytes| sonic_rs_to_serdejson_value(&bytes), 140 | // BatchSize::SmallInput, 141 | // ) 142 | // }); 143 | 144 | group.bench_with_input("serde_json::from_slice", &vec, |b, data| { 145 | b.iter_batched( 146 | || data, 147 | |bytes| serde_from_slice(&bytes), 148 | BatchSize::SmallInput, 149 | ) 150 | }); 151 | 152 | group.bench_with_input("serde_json::from_str", &vec, |b, data| { 153 | b.iter_batched( 154 | || data, 155 | |bytes| serde_from_str(&bytes), 156 | BatchSize::SmallInput, 157 | ) 158 | }); 159 | 160 | group.bench_with_input("simd_json::slice_to_owned_value", &vec, |b, data| { 161 | b.iter_batched( 162 | || data.clone(), 163 | |mut bytes| simdjson_to_owned_value(&mut bytes), 164 | BatchSize::SmallInput, 165 | ) 166 | }); 167 | 168 | group.bench_with_input("simd_json::slice_to_borrowed_value", &vec, |b, data| { 169 | b.iter_batched( 170 | || data.clone(), 171 | |mut bytes| simdjson_to_borrowed_value(&mut bytes), 172 | BatchSize::SmallInput, 173 | ) 174 | }); 175 | group.throughput(Throughput::Bytes(vec.len() as u64)); 176 | } 177 | }; 178 | } 179 | 180 | bench_file!(book); 181 | bench_file!(canada); 182 | bench_file!(citm_catalog); 183 | bench_file!(twitter); 184 | bench_file!(github_events); 185 | 186 | criterion_group!(benches, canada, citm_catalog, twitter, github_events, book); 187 | criterion_main!(benches); 188 | -------------------------------------------------------------------------------- /docs/performance_zh.md: -------------------------------------------------------------------------------- 1 | # Sonic-rs 优化细节 2 | 3 | 下面介绍一些sonic-rs的性能优化细节,其中代码版本是commit `631411b`. 4 | 5 | ## 按需解析 6 | 7 | 如何实现一个性能更好的按需解析算法。按需解析的性能关键在于跳过不需要的字段,其中难点在于如何跳过 JSON container, 包括 JSON Object 和 JSON array,因为我们需要注意 JSON 字符串中的括号,例如 `"{ "key": "value {}"}`。 我们利用了 simd 指令计算字符串的bitmap,然后通过计算括号的数量来跳过整个JSON container。参考论文 [JSONSki](https://dl.acm.org/doi/10.1145/3503222.3507719). 8 | 9 | 整体算法如下: 10 | 11 | ```rs 12 | #[inline(always)] 13 | fn skip_container_loop( 14 | input: &[u8; 64], 15 | prev_instring: &mut u64, 16 | prev_escaped: &mut u64, 17 | lbrace_num: &mut usize, 18 | rbrace_num: &mut usize, 19 | left: u8, 20 | right: u8, 21 | ) -> Option { 22 | 23 | let instring = get_string_bits(input, prev_instring, prev_escaped); 24 | // #Safety 25 | // the input is 64 bytes, so the v will be always valid. 26 | let v = unsafe { u8x64::from_slice_unaligned_unchecked(input) }; 27 | let last_lbrace_num = *lbrace_num; 28 | let mut rbrace = (v.eq(u8x64::splat(right))).bitmask() & !instring; 29 | let lbrace = (v.eq(u8x64::splat(left))).bitmask() & !instring; 30 | while rbrace != 0 { 31 | *rbrace_num += 1; 32 | *lbrace_num = last_lbrace_num + (lbrace & (rbrace - 1)).count_ones() as usize; 33 | let is_closed = lbrace_num < rbrace_num; 34 | if is_closed { 35 | debug_assert_eq!(*rbrace_num, *lbrace_num + 1); 36 | let cnt = rbrace.trailing_zeros() + 1; 37 | return unsafe { Some(NonZeroU8::new_unchecked(cnt as u8)) }; 38 | } 39 | rbrace &= rbrace - 1; 40 | } 41 | *lbrace_num = last_lbrace_num + lbrace.count_ones() as usize; 42 | None 43 | } 44 | ``` 45 | 46 | 主要的算法步骤如下: 47 | 1. 计算 JSON 字符串的 bitmap `instring`。 48 | 49 | 对于在字符串中的字节,我们将bitmap中对应位置的bit标记为1。 这里面需要注意 JSON 字符串中可能包含 escaped 字符 ('"', '\'). 例如: 50 | ``` 51 | JSON text : "\\hel{}lo\"" 52 | insting bitmap: 0111111111110 53 | ``` 54 | 55 | 这里利用了 simdjson 的无分支的 SIMD 算法,代码在 `get_escaped_branchless_u64`。 56 | 57 | 2. 如何通过匹配括号数量来跳过 Object 或 array? 58 | 59 | 我们得到 `instring` 之后,再通过于 `[]` 或 `{}` bitmap的异或操作,就可以得到真正的括号bitmap。然后以此来进行括号匹配操作。每当发现有右括号存在时,这时候有可能我们就需要进行括号匹配, 因为右括号有可能是Object或array 结束位置。 60 | 在括号匹配操作里面,我们挨个判断右括号的数量是否大于之前的左括号数量,如果超过了,说明该 Object 或 Array 已经结束。 61 | 62 | ## Skip Space using SIMD 63 | 64 | JSON 规范中的空格字符有: ` `, `\n`, '\r', '\t`. 利用 SIMD 指令跳过空格,至少有两种实现方式。 65 | 一种方式是直接使用 compeq 向量指令得到各个空格字符的 bitmap,然后进行汇总得到空格的bitmap。还有一种方式是直接利用 shuffle SIMD 指令,这个idead来源于 simdjson。这里面有两种方式的[实现测试](https://github.com/liuq19/simdstr/blob/main/examples/shuffle/bm_shuffle.cpp). 66 | 67 | 我们发现JSON的格式有紧凑的和pretty的,空格之间相隔并不远。而且在常见的 pretty 格式下,Object 的':' 和value 中间往往只隔一个空格。例如: 68 | ``` 69 | { 70 | "statuses": [ 71 | { 72 | "metadata": { 73 | "result_type": "recent", 74 | "iso_language_code": "ja" 75 | }, 76 | ``` 77 | (json 片段来自 twitter.json) 78 | 79 | 因此,我们在每次跳过空格时,将计算得到的非空格字符的 bitmap保存下来,后面跳过空格时,查询这个bitmap这样能够节省后续很多不必要的 simd 计算。可以参考 `skip_space` 函数中的下列代码: 80 | ```rs 81 | // fast path 2: reuse the bitmap for short key or numbers 82 | let nospace_offset = (reader.index() as isize) - self.nospace_start; 83 | if nospace_offset < 64 { 84 | let bitmap = { 85 | let mask = !((1 << nospace_offset) - 1); 86 | self.nospace_bits & mask 87 | }; 88 | if bitmap != 0 { 89 | let cnt = bitmap.trailing_zeros() as usize; 90 | let ch = reader.at(self.nospace_start as usize + cnt); 91 | reader.set_index(self.nospace_start as usize + cnt + 1); 92 | 93 | return Some(ch); 94 | } else { 95 | // we can still fast skip the marked space in here. 96 | reader.set_index(self.nospace_start as usize + 64); 97 | } 98 | } 99 | ``` 100 | 101 | 另外,我们还针对紧凑 JSON 和只有一个空格的情况,使用了fastpath。例如, 在 `skip_space` 函数中: 102 | ```rs 103 | // fast path 1: for nospace or single space 104 | // most JSON is like ` "name": "balabala" ` 105 | if let Some(ch) = reader.next() { 106 | if !is_whitespace(ch) { 107 | return Some(ch); 108 | } 109 | } 110 | if let Some(ch) = reader.next() { 111 | if !is_whitespace(ch) { 112 | return Some(ch); 113 | } 114 | } 115 | ``` 116 | 117 | ## 使用 SIMD 解析浮点数 118 | 119 | 浮点数解析是 JSON 解析中的一个非常耗时的操作。在很多浮点数中,往往有比较长的尾数,例如 `canada.json` 中,浮点数尾数部分是15位: 120 | ``` 121 | [[[-65.613616999999977,43.420273000000009],[-65.619720000000029,43.418052999999986],[-65.625,43.421379000000059],[-65.636123999999882,43.449714999999969],[-65.633056999999951,43.474709000000132],[-65.611389000000031,43.513054000000068],[-65.605835000000013,43.516105999999979],[-65.598343,43.515830999999935],[-65. 122 | ``` 123 | 124 | 125 | 对于长度为16的数字字符串,是可以直接使用 SIMD 指令进行解析,读取 ascii 数字字符并且逐步累加的。 具体算法可以参考[simd_str2int](https://github.com/cloudwego/sonic-rs/blob/main/src/util/arch/x86_64.rs#L115)。这个算法来源于 [sonic-cpp](https://github.com/bytedance/sonic-cpp/blob/master/include/sonic/internal/arch/sse/str2int.h). 在解析浮点数时,按照 IEEE754 规范,对于64 位浮点数,我们只需要关注17位有效数字。因此,在这个函数里面使用了一个 switch table 来减少不必要的 SIMD 指令。 126 | 127 | 128 | ## 使用 SIMD 序列化 JSON string 129 | 130 | 在序列化JSON字符串时, 如果JSON字符串比较长,非常适合使用SIMD。sonic-rs 使用了 `copy and find` 的算法。 131 | 132 | ```rs 133 | while nb >= LANS { 134 | // copy from the JSON string 135 | let v = { 136 | let raw = std::slice::from_raw_parts(sptr, LANS); 137 | u8x32::from_slice_unaligned_unchecked(raw) 138 | }; 139 | v.write_to_slice_unaligned_unchecked(std::slice::from_raw_parts_mut(dptr, LANS)); 140 | // if find the escaped character, then deal with it 141 | let mask = escaped_mask(v); 142 | if mask == 0 { 143 | nb -= LANS; 144 | dptr = dptr.add(LANS); 145 | sptr = sptr.add(LANS); 146 | } else { 147 | let cn = mask.trailing_zeros() as usize; 148 | nb -= cn; 149 | dptr = dptr.add(cn); 150 | sptr = sptr.add(cn); 151 | escape_unchecked(&mut sptr, &mut nb, &mut dptr); 152 | } 153 | } 154 | ``` 155 | 156 | ## 内存池分配器 157 | 158 | 我们之前在 sonic-cpp 中发现,在将 JSON 解析到 document时,document 中对每个节点内存的分配,是一个性能热点,同时,在 c++ JSON 库`rapidjson` 使用了memory pool allocator 来统一预分配 document的内存。因此,我们在sonic-rs中也使用 `bump` crate来对 整个document 进行预分配内存。Arena 机制能够减少内存分配开销,同时让缓存变得更加友好,因为 document 的各个节点的内存位置是邻近的。 159 | 160 | 这里面有一个有趣的细节是,我们发现在解析 JSON array 或object时,我们事先不知道该节点里面有多少children 节点。因此,在解析的过程中,往往需要一个vector先存储中间节点,等到array或object解析完成之后,最后才能在document上面创建该 object 或 array节点。 161 | 162 | 为了节省这一块性能开销,我们在解析JSON前,预分配了一个长度为 JSON length/2 + 2 个节点的vector作为中间存储。因此,在后续解析过程中,我们无需对该vector 进行扩容。因为当需要的节点数量超过 vector 长度时,此时的 JSON 必定是不合法的。 163 | 164 | ```rs 165 | // optimize: use a pre-allocated vec. 166 | // If json is valid, the max number of value nodes should be 167 | // half of the valid json length + 2. like as [1,2,3,1,2,3...] 168 | // if the capacity is not enough, we will return a error. 169 | let nodes = Vec::with_capacity((json.len() / 2) + 2); 170 | let parent = 0; 171 | let mut visitor = DocumentVisitor { 172 | alloc, 173 | nodes, 174 | parent, 175 | }; 176 | ``` 177 | 178 | --------------------------------------------------------------------------------