├── .circleci └── config.yml ├── .github ├── dependabot.yml └── workflows │ └── ci.yml ├── .gitignore ├── Cargo.toml ├── Cross.toml ├── LICENSE ├── README.md ├── README.zh-CN.md ├── crates ├── base32-simd │ ├── Cargo.toml │ ├── LICENSE │ ├── README.md │ ├── build.rs │ ├── src │ │ ├── alsw.rs │ │ ├── check.rs │ │ ├── decode.rs │ │ ├── encode.rs │ │ ├── error.rs │ │ ├── heap.rs │ │ ├── lib.rs │ │ └── multiversion.rs │ └── tests │ │ └── it.rs ├── base64-simd │ ├── Cargo.toml │ ├── LICENSE │ ├── README.md │ ├── build.rs │ ├── src │ │ ├── alsw.rs │ │ ├── ascii.rs │ │ ├── check.rs │ │ ├── decode.rs │ │ ├── encode.rs │ │ ├── error.rs │ │ ├── forgiving.rs │ │ ├── heap.rs │ │ ├── lib.rs │ │ ├── multiversion.rs │ │ └── parallel.rs │ └── tests │ │ └── it.rs ├── hex-simd │ ├── Cargo.toml │ ├── LICENSE │ ├── README.md │ ├── build.rs │ ├── src │ │ ├── check.rs │ │ ├── decode.rs │ │ ├── encode.rs │ │ ├── error.rs │ │ ├── heap.rs │ │ ├── lib.rs │ │ └── multiversion.rs │ └── tests │ │ └── it.rs ├── unicode-simd │ ├── Cargo.toml │ ├── LICENSE │ ├── README.md │ ├── build.rs │ ├── src │ │ ├── ascii.rs │ │ ├── lib.rs │ │ ├── multiversion.rs │ │ ├── utf16.rs │ │ └── utf32.rs │ └── tests │ │ └── it.rs ├── uuid-simd │ ├── Cargo.toml │ ├── LICENSE │ ├── README.md │ ├── build.rs │ ├── src │ │ ├── error.rs │ │ ├── ext.rs │ │ ├── format.rs │ │ ├── lib.rs │ │ ├── multiversion.rs │ │ ├── parse.rs │ │ └── spec.rs │ └── tests │ │ └── it.rs └── vsimd │ ├── Cargo.toml │ ├── LICENSE │ ├── README.md │ ├── src │ ├── alsw.rs │ ├── ascii.rs │ ├── bswap.rs │ ├── hex.rs │ ├── isa.rs │ ├── lib.rs │ ├── macros.rs │ ├── mask.rs │ ├── native.rs │ ├── pod.rs │ ├── scalable.rs │ ├── simd128.rs │ ├── simd256.rs │ ├── simd64.rs │ ├── simulation.rs │ ├── table.rs │ ├── tools.rs │ ├── unified.rs │ ├── unstable.rs │ └── vector.rs │ └── tests │ └── it.rs ├── justfile ├── rustfmt.toml └── scripts ├── base64.js ├── dump-symbols.py └── testgen.py /.circleci/config.yml: -------------------------------------------------------------------------------- 1 | version: 2.1 2 | orbs: 3 | python: circleci/python@2.1.1 4 | 5 | jobs: 6 | test-aarch64: 7 | machine: 8 | image: ubuntu-2004:current 9 | resource_class: arm.large 10 | steps: 11 | - checkout 12 | - run: | 13 | curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- --default-toolchain nightly -y 14 | - run: | 15 | rustc -V -v 16 | python3 -V 17 | - run: | 18 | python3 ./scripts/testgen.py --target aarch64-unknown-linux-gnu | bash -ex 19 | # bench-aarch64: 20 | # parameters: 21 | # dispatch: 22 | # type: string 23 | # machine: 24 | # image: ubuntu-2004:current 25 | # resource_class: arm.large 26 | # steps: 27 | # - checkout 28 | # - run: | 29 | # curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- --default-toolchain nightly -y 30 | # - run: | 31 | # rustc -V -v 32 | # python3 -V 33 | # - run: | 34 | # cargo install cargo-criterion 35 | # pip3 install tabulate 36 | # - run: | 37 | # DISPATCH=<< parameters.dispatch >> ./scripts/bench.sh --benches --plotting-backend disabled -- --warm-up-time 1 --measurement-time 1 38 | # - run: | 39 | # COMMIT_HASH=`git rev-parse --short HEAD` 40 | # NAME=target/simd-benches/$COMMIT_HASH-<< parameters.dispatch >> 41 | # mv $NAME.md result.md 42 | # cat result.md 43 | # - store_artifacts: 44 | # path: result.md 45 | 46 | workflows: 47 | test: 48 | jobs: 49 | - test-aarch64: 50 | filters: 51 | branches: 52 | only: main 53 | # bench: 54 | # jobs: 55 | # - bench-aarch64: 56 | # filters: 57 | # branches: 58 | # only: benchmark 59 | # matrix: 60 | # parameters: 61 | # dispatch: 62 | # - static-unstable 63 | # - dynamic 64 | # - fallback 65 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | # To get started with Dependabot version updates, you'll need to specify which 2 | # package ecosystems to update and where the package manifests are located. 3 | # Please see the documentation for all configuration options: 4 | # https://help.github.com/github/administering-a-repository/configuration-options-for-dependency-updates 5 | 6 | version: 2 7 | updates: 8 | - package-ecosystem: "cargo" # See documentation for possible values 9 | directory: "/" # Location of package manifests 10 | schedule: 11 | interval: "weekly" 12 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | on: 2 | push: 3 | branches: 4 | - main 5 | pull_request: 6 | branches: 7 | - main 8 | schedule: # https://docs.github.com/en/actions/reference/workflow-syntax-for-github-actions#onschedule 9 | - cron: '0 0 * * 0' # at midnight of each sunday 10 | workflow_dispatch: 11 | 12 | name: CI 13 | 14 | jobs: 15 | develop: 16 | runs-on: ubuntu-latest 17 | steps: 18 | - uses: actions/checkout@v4 19 | - uses: dtolnay/rust-toolchain@nightly 20 | with: 21 | components: rustfmt, clippy 22 | - run: cargo fmt --all -- --check 23 | - run: cargo clippy -- -D warnings 24 | - run: cargo build --release --workspace --tests 25 | 26 | rust-version-test: 27 | runs-on: ubuntu-latest 28 | strategy: 29 | fail-fast: false 30 | matrix: 31 | toolchain: 32 | - stable 33 | - 1.70.0 # MSRV 34 | steps: 35 | - uses: actions/checkout@v4 36 | - uses: dtolnay/rust-toolchain@master 37 | with: 38 | toolchain: ${{ matrix.toolchain }} 39 | - run: | 40 | if [ "${{ matrix.toolchain }}" = "stable" ]; then 41 | cargo test --release --workspace --exclude simd-benches 42 | else 43 | cargo build --release --workspace --exclude simd-benches 44 | fi 45 | 46 | test: 47 | runs-on: ubuntu-latest 48 | strategy: 49 | fail-fast: false 50 | matrix: 51 | target: 52 | - x86_64-unknown-linux-gnu 53 | - i686-unknown-linux-gnu 54 | - aarch64-unknown-linux-gnu 55 | # - armv7-unknown-linux-gnueabihf # FIXME 56 | - wasm32-unknown-unknown 57 | # - mips-unknown-linux-gnu # missing toolchain 58 | steps: 59 | - uses: actions/checkout@v4 60 | - uses: dtolnay/rust-toolchain@nightly 61 | with: 62 | targets: ${{ matrix.target }} 63 | - uses: taiki-e/install-action@v2 64 | with: 65 | tool: cross 66 | - run: | 67 | if [ "${{ matrix.target }}" == "wasm32-unknown-unknown" ]; then 68 | curl https://rustwasm.github.io/wasm-pack/installer/init.sh -sSf | bash 69 | fi 70 | - run: | 71 | python3 ./scripts/testgen.py --target "${{ matrix.target }}" | bash -ex 72 | 73 | miri: 74 | runs-on: ubuntu-latest 75 | steps: 76 | - uses: actions/checkout@v4 77 | - uses: dtolnay/rust-toolchain@nightly 78 | with: 79 | components: miri 80 | - run: | 81 | cargo miri test --workspace --exclude simd-benches 82 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /.vscode 2 | /target 3 | Cargo.lock 4 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [workspace] 2 | members = ["crates/*"] 3 | resolver = "2" 4 | 5 | [profile.test] 6 | opt-level = 3 7 | 8 | [profile.bench] 9 | lto = "fat" 10 | codegen-units = 1 11 | -------------------------------------------------------------------------------- /Cross.toml: -------------------------------------------------------------------------------- 1 | [build.env] 2 | passthrough = ["RUST_BACKTRACE", "RUST_LOG", "RUSTFLAGS"] 3 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Nugine 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # simd 2 | 3 | [![MIT licensed][mit-badge]][mit-url] [![CI][CI-badge]][CI-url] 4 | [English](./README.md) | [中文](./README.zh-CN.md) 5 | 6 | [CI-badge]: https://github.com/Nugine/simd/actions/workflows/ci.yml/badge.svg 7 | [CI-url]: https://github.com/Nugine/simd/actions/workflows/ci.yml 8 | [mit-badge]: https://img.shields.io/badge/license-MIT-blue.svg 9 | [mit-url]: ./LICENSE 10 | 11 | SIMD-accelerated operations 12 | 13 | | crate | version | docs | 14 | | :----------------------------------: | :---------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------: | 15 | | [base64-simd](./crates/base64-simd/) | [![Crates.io](https://img.shields.io/crates/v/base64-simd.svg)](https://crates.io/crates/base64-simd) | [![Docs](https://docs.rs/base64-simd/badge.svg)](https://docs.rs/base64-simd/) | 16 | | [hex-simd](./crates/hex-simd/) | [![Crates.io](https://img.shields.io/crates/v/hex-simd.svg)](https://crates.io/crates/hex-simd) | [![Docs](https://docs.rs/hex-simd/badge.svg)](https://docs.rs/hex-simd/) | 17 | | [uuid-simd](./crates/uuid-simd/) | [![Crates.io](https://img.shields.io/crates/v/uuid-simd.svg)](https://crates.io/crates/uuid-simd) | [![Docs](https://docs.rs/uuid-simd/badge.svg)](https://docs.rs/uuid-simd/) | 18 | 19 | The crates automatically select SIMD functions when available and provide fast fallback implementations. Benchmark results are available in [simd-benches](https://github.com/Nugine/simd-benches). 20 | 21 | ## Goals 22 | 23 | + Performance: To be the fastest 24 | + Productivity: Efficient SIMD abstractions 25 | + Ergonomics: Easy to use 26 | 27 | ## Safety 28 | 29 | This project relies heavily on unsafe code. We encourage everyone to review the code and report any issues. 30 | 31 | Memory safety bugs and unsoundness issues are classified as critical bugs. They will be fixed as soon as possible. 32 | 33 | ## References 34 | 35 | This project contains multiple algorithms and implementations. Some of them are not original. We list the references here. 36 | 37 | base64: 38 | 39 | + 40 | + 41 | 42 | hex: 43 | 44 | + 45 | 46 | unicode: 47 | 48 | + 49 | 50 | ## Sponsor 51 | 52 | If my open-source work has been helpful to you, please [sponsor me](https://github.com/Nugine#sponsor). 53 | 54 | Every little bit helps. Thank you! 55 | -------------------------------------------------------------------------------- /README.zh-CN.md: -------------------------------------------------------------------------------- 1 | # simd 2 | 3 | [![MIT licensed][mit-badge]][mit-url] [English](./README.md) | [中文](./README.zh-CN.md) 4 | 5 | [mit-badge]: https://img.shields.io/badge/license-MIT-blue.svg 6 | [mit-url]: ./LICENSE 7 | 8 | SIMD 加速操作 9 | 10 | | crate | version | docs | 11 | | :----------------------------------: | :---------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------: | 12 | | [base64-simd](./crates/base64-simd/) | [![Crates.io](https://img.shields.io/crates/v/base64-simd.svg)](https://crates.io/crates/base64-simd) | [![Docs](https://docs.rs/base64-simd/badge.svg)](https://docs.rs/base64-simd/) | 13 | | [hex-simd](./crates/hex-simd/) | [![Crates.io](https://img.shields.io/crates/v/hex-simd.svg)](https://crates.io/crates/hex-simd) | [![Docs](https://docs.rs/hex-simd/badge.svg)](https://docs.rs/hex-simd/) | 14 | | [uuid-simd](./crates/uuid-simd/) | [![Crates.io](https://img.shields.io/crates/v/uuid-simd.svg)](https://crates.io/crates/uuid-simd) | [![Docs](https://docs.rs/uuid-simd/badge.svg)](https://docs.rs/uuid-simd/) | 15 | 16 | 这些 crate 自动选择可用的 SIMD 函数并提供快速的回退实现。基准测试结果可在 [simd-benches](https://github.com/Nugine/simd-benches) 查看。 17 | 18 | ## 目标 19 | 20 | + 性能:做到最快 21 | + 生产力:高效的 SIMD 抽象 22 | + 人体工程学:易于使用 23 | 24 | ## 安全性 25 | 26 | 本项目高度依赖不安全的代码。我们鼓励每个人审查代码并报告任何问题。 27 | 28 | 内存安全错误和健全性问题被归类为致命错误。它们将被尽快修复。 29 | 30 | ## 语言 31 | 32 | 本项目接受中文或英文。所有代码、文档、PR 和议题都应该使用中文或英文编写。 33 | 34 | ## 参考资料 35 | 36 | 本项目包含多种算法和实现。其中一些不是原创的。我们在这里列出参考资料。 37 | 38 | base64: 39 | 40 | + 41 | + 42 | 43 | hex: 44 | 45 | + 46 | 47 | unicode: 48 | 49 | + 50 | 51 | ## 赞助 52 | 53 | 如果我的开源工作对您有帮助,请[赞助我](https://github.com/Nugine#sponsor)。 54 | 55 | 每一点点都有帮助。非常感谢! 56 | -------------------------------------------------------------------------------- /crates/base32-simd/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "base32-simd" 3 | version = "0.9.0-dev" 4 | edition = "2021" 5 | description = "SIMD-accelerated base32 encoding and decoding" 6 | license = "MIT" 7 | repository = "https://github.com/Nugine/simd" 8 | keywords = ["base32", "simd"] 9 | categories = ["no-std", "parser-implementations", "encoding"] 10 | readme = "README.md" 11 | rust-version = "1.63" 12 | 13 | [package.metadata.docs.rs] 14 | all-features = true 15 | rustdoc-args = ["--cfg", "docsrs"] 16 | 17 | [features] 18 | default = ["std", "detect"] 19 | alloc = ["vsimd/alloc"] 20 | std = ["alloc", "vsimd/std"] 21 | detect = ["vsimd/detect"] 22 | unstable = ["vsimd/unstable"] 23 | 24 | [dependencies] 25 | outref = "0.5.1" 26 | vsimd = { path = "../vsimd", version = "0.9.0-dev" } 27 | 28 | [dev-dependencies] 29 | rand = "0.8.5" 30 | 31 | [target.'cfg(target_arch="wasm32")'.dev-dependencies] 32 | getrandom = { version = "0.2.8", features = ["js"] } 33 | wasm-bindgen-test = "0.3.34" 34 | -------------------------------------------------------------------------------- /crates/base32-simd/LICENSE: -------------------------------------------------------------------------------- 1 | ../../LICENSE -------------------------------------------------------------------------------- /crates/base32-simd/README.md: -------------------------------------------------------------------------------- 1 | # base32-simd 2 | 3 | [![Crates.io](https://img.shields.io/crates/v/base32-simd.svg)](https://crates.io/crates/base32-simd) 4 | [![Docs](https://docs.rs/base32-simd/badge.svg)](https://docs.rs/base32-simd/) 5 | [![MIT licensed][mit-badge]][mit-url] 6 | 7 | [mit-badge]: https://img.shields.io/badge/license-MIT-blue.svg 8 | [mit-url]: ../../LICENSE 9 | 10 | SIMD-accelerated base32 encoding and decoding. 11 | 12 | Documentation: 13 | 14 | Repository: 15 | -------------------------------------------------------------------------------- /crates/base32-simd/build.rs: -------------------------------------------------------------------------------- 1 | fn main() { 2 | println!("cargo:rustc-check-cfg=cfg(vsimd_dump_symbols)") 3 | } 4 | -------------------------------------------------------------------------------- /crates/base32-simd/src/alsw.rs: -------------------------------------------------------------------------------- 1 | use vsimd::alsw::AlswLut; 2 | use vsimd::vector::{V128, V256}; 3 | 4 | struct Base32Alsw; 5 | 6 | impl Base32Alsw { 7 | #[inline] 8 | const fn decode(c: u8) -> u8 { 9 | match c { 10 | b'A'..=b'Z' => c - b'A', 11 | b'2'..=b'7' => c - b'2' + 26, 12 | _ => 0xff, 13 | } 14 | } 15 | 16 | #[inline] 17 | const fn check_hash(i: u8) -> u8 { 18 | match i { 19 | 0x0 => 1, 20 | 0x1 => 1, 21 | 0x2..=0x7 => 6, 22 | 0x8..=0xA => 1, 23 | 0xB..=0xF => 7, 24 | _ => unreachable!(), 25 | } 26 | } 27 | 28 | #[inline] 29 | const fn decode_hash(i: u8) -> u8 { 30 | Self::check_hash(i) 31 | } 32 | } 33 | 34 | vsimd::impl_alsw!(Base32Alsw); 35 | 36 | struct Base32HexAlsw; 37 | 38 | impl Base32HexAlsw { 39 | #[inline] 40 | const fn decode(c: u8) -> u8 { 41 | match c { 42 | b'0'..=b'9' => c - b'0', 43 | b'A'..=b'V' => c - b'A' + 10, 44 | _ => 0xff, 45 | } 46 | } 47 | 48 | #[inline] 49 | const fn check_hash(i: u8) -> u8 { 50 | match i { 51 | 0 => 1, 52 | 1..=6 => 1, 53 | 7..=9 => 7, 54 | 0xA..=0xF => 2, 55 | _ => unreachable!(), 56 | } 57 | } 58 | 59 | #[inline] 60 | const fn decode_hash(i: u8) -> u8 { 61 | Self::check_hash(i) 62 | } 63 | } 64 | 65 | vsimd::impl_alsw!(Base32HexAlsw); 66 | 67 | pub const BASE32_ALSW_CHECK_X2: AlswLut = Base32Alsw::check_lut().x2(); 68 | pub const BASE32_ALSW_DECODE_X2: AlswLut = Base32Alsw::decode_lut().x2(); 69 | 70 | pub const BASE32HEX_ALSW_CHECK_X2: AlswLut = Base32HexAlsw::check_lut().x2(); 71 | pub const BASE32HEX_ALSW_DECODE_X2: AlswLut = Base32HexAlsw::decode_lut().x2(); 72 | 73 | #[cfg(test)] 74 | mod algorithm { 75 | use super::*; 76 | 77 | #[cfg_attr( 78 | any(miri, not(all(target_arch = "x86_64", target_os = "linux", target_env = "gnu"))), 79 | ignore 80 | )] 81 | #[test] 82 | fn base32_alsw() { 83 | Base32Alsw::test_check(); 84 | Base32Alsw::test_decode(); 85 | } 86 | 87 | #[cfg_attr( 88 | any(miri, not(all(target_arch = "x86_64", target_os = "linux", target_env = "gnu"))), 89 | ignore 90 | )] 91 | #[test] 92 | fn base32hex_alsw() { 93 | Base32HexAlsw::test_check(); 94 | Base32HexAlsw::test_decode(); 95 | } 96 | } 97 | -------------------------------------------------------------------------------- /crates/base32-simd/src/check.rs: -------------------------------------------------------------------------------- 1 | use crate::alsw::{BASE32HEX_ALSW_CHECK_X2, BASE32_ALSW_CHECK_X2}; 2 | use crate::decode::{decode_bits, decode_extra}; 3 | use crate::decode::{BASE32HEX_TABLE, BASE32_TABLE}; 4 | use crate::Error; 5 | use crate::Kind; 6 | 7 | use vsimd::alsw::AlswLut; 8 | use vsimd::vector::V256; 9 | use vsimd::SIMD256; 10 | 11 | use core::ptr::null_mut; 12 | 13 | #[inline(always)] 14 | pub(crate) unsafe fn check_fallback(mut src: *const u8, mut len: usize, kind: Kind) -> Result<(), Error> { 15 | let table = match kind { 16 | Kind::Base32 => BASE32_TABLE.as_ptr(), 17 | Kind::Base32Hex => BASE32HEX_TABLE.as_ptr(), 18 | }; 19 | 20 | let end = src.add(len / 8 * 8); 21 | while src < end { 22 | let (_, flag) = decode_bits::<8>(src, table); 23 | ensure!(flag != 0xff); 24 | src = src.add(8); 25 | } 26 | len %= 8; 27 | 28 | decode_extra::(src, len, null_mut(), table) 29 | } 30 | 31 | #[inline(always)] 32 | pub(crate) unsafe fn check_simd(s: S, mut src: *const u8, mut len: usize, kind: Kind) -> Result<(), Error> { 33 | let check_lut = match kind { 34 | Kind::Base32 => BASE32_ALSW_CHECK_X2, 35 | Kind::Base32Hex => BASE32HEX_ALSW_CHECK_X2, 36 | }; 37 | 38 | let end = src.add(len / 32 * 32); 39 | while src < end { 40 | let x = s.v256_load_unaligned(src); 41 | 42 | let is_valid = check_ascii32(s, x, check_lut); 43 | ensure!(is_valid); 44 | 45 | src = src.add(32); 46 | } 47 | len %= 32; 48 | 49 | check_fallback(src, len, kind) 50 | } 51 | 52 | #[inline(always)] 53 | fn check_ascii32(s: S, x: V256, check: AlswLut) -> bool { 54 | vsimd::alsw::check_ascii_xn(s, x, check) 55 | } 56 | -------------------------------------------------------------------------------- /crates/base32-simd/src/error.rs: -------------------------------------------------------------------------------- 1 | use core::fmt; 2 | 3 | /// Base32 Error 4 | pub struct Error(()); 5 | 6 | impl Error { 7 | #[inline(always)] 8 | pub(crate) const fn new() -> Self { 9 | Error(()) 10 | } 11 | } 12 | 13 | impl fmt::Debug for Error { 14 | #[inline] 15 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 16 | ::fmt("Base32Error", f) 17 | } 18 | } 19 | 20 | impl fmt::Display for Error { 21 | #[inline] 22 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 23 | ::fmt("Base32Error", f) 24 | } 25 | } 26 | 27 | #[cfg_attr(docsrs, doc(cfg(feature = "std")))] 28 | #[cfg(feature = "std")] 29 | impl std::error::Error for Error {} 30 | 31 | macro_rules! ensure { 32 | ($cond:expr) => { 33 | if !$cond { 34 | return Err($crate::error::Error::new()); 35 | } 36 | }; 37 | } 38 | 39 | #[allow(unused_macros)] 40 | macro_rules! try_ { 41 | ($result:expr) => { 42 | match $result { 43 | Ok(value) => value, 44 | Err(_) => return Err(Error::new()), 45 | } 46 | }; 47 | } 48 | -------------------------------------------------------------------------------- /crates/base32-simd/src/heap.rs: -------------------------------------------------------------------------------- 1 | use crate::decode::decoded_length; 2 | use crate::encode::encoded_length_unchecked; 3 | use crate::{AppendBase32Decode, AppendBase32Encode, Base32, Error, FromBase32Decode, FromBase32Encode}; 4 | 5 | use vsimd::tools::{alloc_uninit_bytes, assume_init, boxed_str, slice_parts}; 6 | 7 | #[cfg(not(any(test, feature = "std")))] 8 | use alloc::boxed::Box; 9 | #[cfg(not(any(test, feature = "std")))] 10 | use alloc::string::String; 11 | #[cfg(not(any(test, feature = "std")))] 12 | use alloc::vec::Vec; 13 | 14 | #[inline] 15 | fn decode_to_boxed_bytes(base32: &Base32, src: &[u8]) -> Result, Error> { 16 | if src.is_empty() { 17 | return Ok(Box::from([])); 18 | } 19 | 20 | unsafe { 21 | let (n, m) = decoded_length(src, base32.padding)?; 22 | 23 | // safety: 0 < m < isize::MAX 24 | let mut buf = alloc_uninit_bytes(m); 25 | 26 | { 27 | let src: *const u8 = src.as_ptr(); 28 | let dst: *mut u8 = buf.as_mut_ptr().cast(); 29 | crate::multiversion::decode::auto(src, n, dst, base32.kind)?; 30 | } 31 | 32 | Ok(assume_init(buf)) 33 | } 34 | } 35 | 36 | #[inline] 37 | fn decode_append_vec(base32: &Base32, src: &[u8], buf: &mut Vec) -> Result<(), Error> { 38 | if src.is_empty() { 39 | return Ok(()); 40 | } 41 | 42 | let (n, m) = decoded_length(src, base32.padding)?; 43 | 44 | buf.reserve_exact(m); 45 | let prev_len = buf.len(); 46 | 47 | unsafe { 48 | let dst: *mut u8 = buf.as_mut_ptr().add(prev_len); 49 | let src: *const u8 = src.as_ptr(); 50 | crate::multiversion::decode::auto(src, n, dst, base32.kind)?; 51 | 52 | buf.set_len(prev_len + m); 53 | Ok(()) 54 | } 55 | } 56 | 57 | #[inline] 58 | fn encode_to_boxed_str(base32: &Base32, src: &[u8]) -> Box { 59 | if src.is_empty() { 60 | return Box::from(""); 61 | } 62 | 63 | unsafe { 64 | let m = encoded_length_unchecked(src.len(), base32.padding); 65 | assert!(m <= usize::MAX / 2); 66 | 67 | let mut buf = alloc_uninit_bytes(m); 68 | 69 | { 70 | let (src, len) = slice_parts(src); 71 | let dst: *mut u8 = buf.as_mut_ptr().cast(); 72 | crate::multiversion::encode::auto(src, len, dst, base32.kind, base32.padding); 73 | } 74 | 75 | boxed_str(assume_init(buf)) 76 | } 77 | } 78 | 79 | #[inline] 80 | fn encode_append_vec(base32: &Base32, src: &[u8], buf: &mut Vec) { 81 | if src.is_empty() { 82 | return; 83 | } 84 | 85 | let m = encoded_length_unchecked(src.len(), base32.padding); 86 | assert!(m <= usize::MAX / 2); 87 | 88 | buf.reserve_exact(m); 89 | let prev_len = buf.len(); 90 | 91 | unsafe { 92 | let (src, len) = slice_parts(src); 93 | let dst = buf.as_mut_ptr().add(prev_len); 94 | crate::multiversion::encode::auto(src, len, dst, base32.kind, base32.padding); 95 | 96 | buf.set_len(prev_len + m); 97 | } 98 | } 99 | 100 | #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))] 101 | impl FromBase32Decode for Box<[u8]> { 102 | #[inline] 103 | fn from_base32_decode(base32: &Base32, data: &[u8]) -> Result { 104 | decode_to_boxed_bytes(base32, data) 105 | } 106 | } 107 | 108 | #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))] 109 | impl FromBase32Decode for Vec { 110 | #[inline] 111 | fn from_base32_decode(base32: &Base32, data: &[u8]) -> Result { 112 | let ans = as FromBase32Decode>::from_base32_decode(base32, data)?; 113 | Ok(Vec::from(ans)) 114 | } 115 | } 116 | 117 | #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))] 118 | impl FromBase32Encode for Box<[u8]> { 119 | #[inline] 120 | fn from_base32_encode(base32: &Base32, data: &[u8]) -> Self { 121 | let ans = encode_to_boxed_str(base32, data); 122 | ans.into_boxed_bytes() 123 | } 124 | } 125 | 126 | #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))] 127 | impl FromBase32Encode for Box { 128 | #[inline] 129 | fn from_base32_encode(base32: &Base32, data: &[u8]) -> Self { 130 | encode_to_boxed_str(base32, data) 131 | } 132 | } 133 | 134 | #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))] 135 | impl FromBase32Encode for Vec { 136 | #[inline] 137 | fn from_base32_encode(base32: &Base32, data: &[u8]) -> Self { 138 | let ans = encode_to_boxed_str(base32, data); 139 | Vec::from(ans.into_boxed_bytes()) 140 | } 141 | } 142 | 143 | #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))] 144 | impl FromBase32Encode for String { 145 | #[inline] 146 | fn from_base32_encode(base32: &Base32, data: &[u8]) -> Self { 147 | let ans = encode_to_boxed_str(base32, data); 148 | String::from(ans) 149 | } 150 | } 151 | 152 | #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))] 153 | impl AppendBase32Encode for Vec { 154 | #[inline] 155 | fn append_base32_encode(base32: &Base32, src: &[u8], dst: &mut Self) { 156 | encode_append_vec(base32, src, dst); 157 | } 158 | } 159 | 160 | #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))] 161 | impl AppendBase32Encode for String { 162 | #[inline] 163 | fn append_base32_encode(base32: &Base32, src: &[u8], dst: &mut Self) { 164 | unsafe { encode_append_vec(base32, src, dst.as_mut_vec()) }; 165 | } 166 | } 167 | 168 | #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))] 169 | impl AppendBase32Decode for Vec { 170 | #[inline] 171 | fn append_base32_decode(base32: &Base32, src: &[u8], dst: &mut Self) -> Result<(), Error> { 172 | decode_append_vec(base32, src, dst) 173 | } 174 | } 175 | -------------------------------------------------------------------------------- /crates/base32-simd/src/multiversion.rs: -------------------------------------------------------------------------------- 1 | use crate::{Error, Kind}; 2 | 3 | vsimd::dispatch!( 4 | name = {check}, 5 | signature = {pub(crate) unsafe fn(src: *const u8, len: usize, kind: Kind) -> Result<(), Error>}, 6 | fallback = {crate::check::check_fallback}, 7 | simd = {crate::check::check_simd}, 8 | targets = {"avx2", "ssse3", "neon", "simd128"}, 9 | fastest = {"avx2", "neon", "simd128"}, 10 | ); 11 | 12 | vsimd::dispatch!( 13 | name = {decode}, 14 | signature = {pub(crate) unsafe fn(src: *const u8, len: usize, dst: *mut u8, kind: Kind) -> Result<(), Error>}, 15 | fallback = {crate::decode::decode_fallback}, 16 | simd = {crate::decode::decode_simd}, 17 | targets = {"avx2", "sse4.1", "neon", "simd128"}, 18 | fastest = {"avx2", "neon", "simd128"}, 19 | ); 20 | 21 | vsimd::dispatch!( 22 | name = {encode}, 23 | signature = {pub(crate) unsafe fn(src: *const u8, len: usize, dst: *mut u8, kind: Kind, padding: bool) -> ()}, 24 | fallback = {crate::encode::encode_fallback}, 25 | simd = {crate::encode::encode_simd}, 26 | targets = {"avx2", "sse4.1", "neon", "simd128"}, 27 | fastest = {"avx2", "neon", "simd128"}, 28 | ); 29 | -------------------------------------------------------------------------------- /crates/base32-simd/tests/it.rs: -------------------------------------------------------------------------------- 1 | use base32_simd::{AsOut, Base32}; 2 | use base32_simd::{BASE32, BASE32HEX, BASE32HEX_NO_PAD, BASE32_NO_PAD}; 3 | 4 | fn rand_bytes(n: usize) -> Vec { 5 | use rand::RngCore; 6 | let mut bytes = vec![0u8; n]; 7 | rand::thread_rng().fill_bytes(&mut bytes); 8 | bytes 9 | } 10 | 11 | #[cfg(miri)] 12 | use std::io::Write as _; 13 | 14 | macro_rules! dbgmsg { 15 | ($($fmt:tt)*) => { 16 | // println!($($fmt)*); 17 | // #[cfg(miri)] 18 | // std::io::stdout().flush().unwrap(); 19 | }; 20 | } 21 | 22 | #[cfg_attr(not(target_arch = "wasm32"), test)] 23 | #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)] 24 | fn basic() { 25 | let cases: &[(Base32, &str, &str)] = &[ 26 | (BASE32, "", ""), 27 | (BASE32, "f", "MY======"), 28 | (BASE32, "fo", "MZXQ===="), 29 | (BASE32, "foo", "MZXW6==="), 30 | (BASE32, "foob", "MZXW6YQ="), 31 | (BASE32, "fooba", "MZXW6YTB"), 32 | (BASE32, "foobar", "MZXW6YTBOI======"), 33 | (BASE32HEX, "", ""), 34 | (BASE32HEX, "f", "CO======"), 35 | (BASE32HEX, "fo", "CPNG===="), 36 | (BASE32HEX, "foo", "CPNMU==="), 37 | (BASE32HEX, "foob", "CPNMUOG="), 38 | (BASE32HEX, "fooba", "CPNMUOJ1"), 39 | (BASE32HEX, "foobar", "CPNMUOJ1E8======"), 40 | ]; 41 | 42 | let mut buf: Vec = Vec::new(); 43 | for &(ref base32, input, output) in cases { 44 | dbgmsg!("base32 = {base32:?}, input = {input:?}, output = {output:?}"); 45 | 46 | buf.clear(); 47 | buf.resize(base32.encoded_length(input.len()), 0); 48 | 49 | let ans = base32.encode_as_str(input.as_bytes(), buf.as_out()).unwrap(); 50 | assert_eq!(ans, output); 51 | 52 | buf.clear(); 53 | buf.resize(base32.decoded_length(output.as_bytes()).unwrap(), 0); 54 | 55 | let ans = base32.decode(output.as_bytes(), buf.as_out()).unwrap(); 56 | assert_eq!(ans, input.as_bytes()); 57 | } 58 | } 59 | 60 | #[cfg_attr(not(target_arch = "wasm32"), test)] 61 | #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)] 62 | fn special() { 63 | // failed random cases 64 | let inputs: &[&[u8]] = &[ 65 | &[ 66 | 0xC5, 0xB2, 0xFF, 0x01, 0xEA, 0xA1, 0xCE, 0x92, // 67 | 0x3F, 0xB5, 0x08, 0xD8, 0xBB, 0xE2, 0x80, 0xD9, // 68 | 0xC9, 0x8C, 0x5C, 0x18, 0x75, 0x3F, 0x12, 0xAE, // 69 | 0xD7, 0xA5, // 70 | ], 71 | &[ 72 | 0x06, 0x3A, 0x87, 0x48, 0xAB, 0xD7, 0xAB, 0xF0, // 73 | 0xAD, 0x85, 0x39, 0x50, 0x32, 0x23, 0x43, 0xEE, // 74 | 0x3B, 0x79, 0xF6, 0x95, 0xC9, 0x9B, 0x63, 0xE2, // 75 | 0xAD, 0x66, 0x68, 0xB5, 0xE0, 0x2B, 0x5A, 0x81, // 76 | 0x5F, 0x46, 0xC2, 0x3B, // 77 | ], 78 | ]; 79 | 80 | let base32 = BASE32; 81 | 82 | for &input in inputs { 83 | let mut buf: Vec = vec![0; base32.encoded_length(input.len())]; 84 | 85 | let ans = base32.encode(input, buf.as_out()).unwrap(); 86 | assert!(base32.check(ans).is_ok()); 87 | 88 | let ans = base32.decode_inplace(&mut buf).unwrap(); 89 | assert_eq!(ans, input); 90 | } 91 | } 92 | 93 | #[cfg(feature = "alloc")] 94 | #[cfg_attr(not(target_arch = "wasm32"), test)] 95 | #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)] 96 | fn allocation() { 97 | let src = "helloworld"; 98 | let prefix = "data:;base32,"; 99 | 100 | let mut encode_buf = prefix.to_owned(); 101 | BASE32.encode_append(src, &mut encode_buf); 102 | 103 | assert_eq!(encode_buf, format!("{prefix}NBSWY3DPO5XXE3DE")); 104 | 105 | let mut decode_buf = b"123".to_vec(); 106 | let src = &encode_buf[prefix.len()..]; 107 | BASE32.decode_append(src, &mut decode_buf).unwrap(); 108 | 109 | assert_eq!(decode_buf, b"123helloworld"); 110 | } 111 | 112 | #[cfg_attr(not(target_arch = "wasm32"), test)] 113 | #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)] 114 | fn random() { 115 | dbgmsg!(); 116 | for n in 0..128 { 117 | dbgmsg!("n = {}", n); 118 | let bytes = rand_bytes(n); 119 | 120 | let test_config = [ 121 | BASE32, // 122 | BASE32HEX, // 123 | BASE32_NO_PAD, // 124 | BASE32HEX_NO_PAD, // 125 | ]; 126 | 127 | for base32 in test_config { 128 | dbgmsg!("base32 = {:?}", base32); 129 | 130 | let mut buf = vec![0u8; base32.encoded_length(n)]; 131 | let encoded = base32.encode(&bytes, buf.as_out()).unwrap(); 132 | assert!(base32.check(encoded).is_ok()); 133 | 134 | let mut buf = encoded.to_owned(); 135 | let ans = base32.decode_inplace(&mut buf).unwrap(); 136 | assert_eq!(ans, bytes); 137 | 138 | let mut buf = vec![0u8; base32.decoded_length(encoded).unwrap()]; 139 | let ans = base32.decode(encoded, buf.as_out()).unwrap(); 140 | assert_eq!(ans, bytes); 141 | } 142 | } 143 | } 144 | -------------------------------------------------------------------------------- /crates/base64-simd/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "base64-simd" 3 | version = "0.9.0-dev" 4 | edition = "2021" 5 | description = "SIMD-accelerated base64 encoding and decoding" 6 | license = "MIT" 7 | repository = "https://github.com/Nugine/simd" 8 | keywords = ["base64", "simd"] 9 | categories = ["no-std", "parser-implementations", "encoding"] 10 | readme = "README.md" 11 | rust-version = "1.63" 12 | 13 | [package.metadata.docs.rs] 14 | all-features = true 15 | rustdoc-args = ["--cfg", "docsrs"] 16 | 17 | [features] 18 | default = ["std", "detect"] 19 | alloc = ["vsimd/alloc"] 20 | std = ["alloc", "vsimd/std"] 21 | detect = ["vsimd/detect"] 22 | unstable = ["vsimd/unstable"] 23 | parallel = ["unstable", "dep:rayon"] 24 | 25 | [dependencies] 26 | outref = "0.5.1" 27 | vsimd = { path = "../vsimd", version = "0.9.0-dev" } 28 | rayon = { version = "1.6.1", optional = true } 29 | 30 | [dev-dependencies] 31 | base64 = "0.21.0" 32 | rand = "0.8.5" 33 | const-str = "0.5.3" 34 | 35 | [target.'cfg(target_arch="wasm32")'.dev-dependencies] 36 | getrandom = { version = "0.2.8", features = ["js"] } 37 | wasm-bindgen-test = "0.3.34" 38 | -------------------------------------------------------------------------------- /crates/base64-simd/LICENSE: -------------------------------------------------------------------------------- 1 | ../../LICENSE -------------------------------------------------------------------------------- /crates/base64-simd/README.md: -------------------------------------------------------------------------------- 1 | # base64-simd 2 | 3 | [![Crates.io](https://img.shields.io/crates/v/base64-simd.svg)](https://crates.io/crates/base64-simd) 4 | [![Docs](https://docs.rs/base64-simd/badge.svg)](https://docs.rs/base64-simd/) 5 | [![MIT licensed][mit-badge]][mit-url] 6 | 7 | [mit-badge]: https://img.shields.io/badge/license-MIT-blue.svg 8 | [mit-url]: ../../LICENSE 9 | 10 | SIMD-accelerated base64 encoding and decoding. 11 | 12 | Documentation: 13 | 14 | Repository: 15 | -------------------------------------------------------------------------------- /crates/base64-simd/build.rs: -------------------------------------------------------------------------------- 1 | fn main() { 2 | println!("cargo:rustc-check-cfg=cfg(vsimd_dump_symbols)") 3 | } 4 | -------------------------------------------------------------------------------- /crates/base64-simd/src/alsw.rs: -------------------------------------------------------------------------------- 1 | use vsimd::alsw::AlswLut; 2 | use vsimd::vector::{V128, V256}; 3 | 4 | struct StandardAlsw; 5 | 6 | impl StandardAlsw { 7 | #[inline] 8 | const fn decode(c: u8) -> u8 { 9 | match c { 10 | b'A'..=b'Z' => c - b'A', 11 | b'a'..=b'z' => c - b'a' + 26, 12 | b'0'..=b'9' => c - b'0' + 52, 13 | b'+' => 62, 14 | b'/' => 63, 15 | _ => 0xff, 16 | } 17 | } 18 | 19 | #[inline] 20 | const fn check_hash(i: u8) -> u8 { 21 | match i { 22 | 0 => 5, 23 | 1..=9 => 2, 24 | 0xA => 4, 25 | 0xB => 6, 26 | 0xC..=0xE => 8, 27 | 0xF => 6, 28 | _ => unreachable!(), 29 | } 30 | } 31 | 32 | #[inline] 33 | const fn decode_hash(i: u8) -> u8 { 34 | match i { 35 | 0xB => 0x07, 36 | 0xF => 0x08, 37 | _ => 0x01, 38 | } 39 | } 40 | } 41 | 42 | vsimd::impl_alsw!(StandardAlsw); 43 | 44 | struct UrlSafeAlsw; 45 | 46 | impl UrlSafeAlsw { 47 | #[inline] 48 | const fn decode(c: u8) -> u8 { 49 | match c { 50 | b'A'..=b'Z' => c - b'A', 51 | b'a'..=b'z' => c - b'a' + 26, 52 | b'0'..=b'9' => c - b'0' + 52, 53 | b'-' => 62, 54 | b'_' => 63, 55 | _ => 0xff, 56 | } 57 | } 58 | 59 | #[inline] 60 | const fn check_hash(i: u8) -> u8 { 61 | match i { 62 | 0 => 7, 63 | 1..=9 => 2, 64 | 0xA => 4, 65 | 0xB | 0xC => 6, 66 | 0xD => 8, 67 | 0xE => 6, 68 | 0xF => 6, 69 | _ => unreachable!(), 70 | } 71 | } 72 | 73 | #[inline] 74 | const fn decode_hash(i: u8) -> u8 { 75 | match i { 76 | 0xD => 0x01, 77 | 0xF => 0x05, 78 | _ => 0x01, 79 | } 80 | } 81 | } 82 | 83 | vsimd::impl_alsw!(UrlSafeAlsw); 84 | 85 | pub const STANDARD_ALSW_CHECK_X2: AlswLut = StandardAlsw::check_lut().x2(); 86 | pub const STANDARD_ALSW_DECODE_X2: AlswLut = StandardAlsw::decode_lut().x2(); 87 | 88 | pub const URL_SAFE_ALSW_CHECK_X2: AlswLut = UrlSafeAlsw::check_lut().x2(); 89 | pub const URL_SAFE_ALSW_DECODE_X2: AlswLut = UrlSafeAlsw::decode_lut().x2(); 90 | 91 | #[cfg(test)] 92 | mod algorithm { 93 | use super::*; 94 | 95 | #[cfg_attr( 96 | any(miri, not(all(target_arch = "x86_64", target_os = "linux", target_env = "gnu"))), 97 | ignore 98 | )] 99 | #[test] 100 | fn standard_alsw() { 101 | StandardAlsw::test_check(); 102 | StandardAlsw::test_decode(); 103 | } 104 | 105 | #[cfg_attr( 106 | any(miri, not(all(target_arch = "x86_64", target_os = "linux", target_env = "gnu"))), 107 | ignore 108 | )] 109 | #[test] 110 | fn url_safe_alsw() { 111 | UrlSafeAlsw::test_check(); 112 | UrlSafeAlsw::test_decode(); 113 | } 114 | 115 | #[cfg(feature = "std")] 116 | #[test] 117 | #[ignore] 118 | fn debug_standard_alsw_check() { 119 | let hash = &StandardAlsw::CHECK_HASH; 120 | let offset = &StandardAlsw::CHECK_OFFSET; 121 | let is_primary = |c: u8| StandardAlsw::decode(c) != 0xff; 122 | 123 | vsimd::tools::print_fn_table(is_primary, |c: u8| vsimd::alsw::hash(hash, c)); 124 | vsimd::tools::print_fn_table(is_primary, |c: u8| vsimd::alsw::check(hash, offset, c)); 125 | } 126 | 127 | #[cfg(feature = "std")] 128 | #[test] 129 | #[ignore] 130 | fn debug_standard_alsw_decode() { 131 | let hash = &StandardAlsw::DECODE_HASH; 132 | let offset = &StandardAlsw::DECODE_OFFSET; 133 | let is_primary = |c: u8| StandardAlsw::decode(c) != 0xff; 134 | 135 | vsimd::tools::print_fn_table(is_primary, |c: u8| vsimd::alsw::hash(hash, c)); 136 | vsimd::tools::print_fn_table(is_primary, |c: u8| vsimd::alsw::decode(hash, offset, c)); 137 | } 138 | } 139 | -------------------------------------------------------------------------------- /crates/base64-simd/src/ascii.rs: -------------------------------------------------------------------------------- 1 | use vsimd::isa::AVX2; 2 | use vsimd::tools::slice_parts; 3 | use vsimd::{matches_isa, Scalable, POD, SIMD256}; 4 | 5 | use core::ops::Not; 6 | 7 | #[inline(always)] 8 | #[must_use] 9 | fn lookup_ascii_whitespace(c: u8) -> u8 { 10 | const TABLE: &[u8; 256] = &{ 11 | let mut ans = [0; 256]; 12 | let mut i: u8 = 0; 13 | loop { 14 | ans[i as usize] = if i.is_ascii_whitespace() { 0xff } else { 0 }; 15 | if i == 255 { 16 | break; 17 | } 18 | i += 1; 19 | } 20 | ans 21 | }; 22 | unsafe { *TABLE.get_unchecked(c as usize) } 23 | } 24 | 25 | #[inline(always)] 26 | fn has_ascii_whitespace, V: POD>(s: S, x: V) -> bool { 27 | // ASCII whitespaces 28 | // TAB 0x09 00001001 29 | // LF 0x0a 00001010 30 | // FF 0x0c 00001100 31 | // CR 0x0d 00001101 32 | // SPACE 0x20 00010000 33 | // 34 | 35 | // m1 = {{byte in 0x09..=0x0d}}x32 36 | let m1 = s.i8xn_lt(s.u8xn_sub(x, s.u8xn_splat(0x89)), s.i8xn_splat(-128 + 5)); 37 | 38 | // m2 = {{byte == 0x0b}} 39 | let m2 = s.u8xn_eq(x, s.u8xn_splat(0x0b)); 40 | 41 | // m3 = {{byte is SPACE}} 42 | let m3 = s.u8xn_eq(x, s.u8xn_splat(0x20)); 43 | 44 | // any((m1 & !m2) | m3) 45 | s.mask8xn_any(s.or(s.andnot(m1, m2), m3)) 46 | } 47 | 48 | #[inline(always)] 49 | unsafe fn find_non_ascii_whitespace_short(mut src: *const u8, len: usize) -> usize { 50 | let base = src; 51 | let end = base.add(len); 52 | while src < end { 53 | if lookup_ascii_whitespace(src.read()) != 0 { 54 | break; 55 | } 56 | src = src.add(1); 57 | } 58 | 59 | src.offset_from(base) as usize 60 | } 61 | 62 | #[inline(always)] 63 | pub unsafe fn find_non_ascii_whitespace_fallback(src: *const u8, len: usize) -> usize { 64 | find_non_ascii_whitespace_short(src, len) 65 | } 66 | 67 | #[inline(always)] 68 | pub unsafe fn find_non_ascii_whitespace_simd(s: S, mut src: *const u8, len: usize) -> usize { 69 | let base = src; 70 | 71 | if matches_isa!(S, AVX2) { 72 | let end = src.add(len / 32 * 32); 73 | while src < end { 74 | let x = s.v256_load_unaligned(src); 75 | if has_ascii_whitespace(s, x) { 76 | break; 77 | } 78 | src = src.add(32); 79 | } 80 | if (len % 32) >= 16 { 81 | let x = s.v128_load_unaligned(src); 82 | if has_ascii_whitespace(s, x).not() { 83 | src = src.add(16); 84 | } 85 | } 86 | } else { 87 | let end = src.add(len / 16 * 16); 88 | while src < end { 89 | let x = s.v128_load_unaligned(src); 90 | if has_ascii_whitespace(s, x) { 91 | break; 92 | } 93 | src = src.add(16); 94 | } 95 | } 96 | 97 | let checked_len = src.offset_from(base) as usize; 98 | let pos = find_non_ascii_whitespace_short(src, len - checked_len); 99 | checked_len + pos 100 | } 101 | 102 | #[inline(always)] 103 | #[must_use] 104 | pub fn find_non_ascii_whitespace(data: &[u8]) -> usize { 105 | let (src, len) = slice_parts(data); 106 | unsafe { crate::multiversion::find_non_ascii_whitespace::auto(src, len) } 107 | } 108 | 109 | #[inline(always)] 110 | #[must_use] 111 | pub unsafe fn remove_ascii_whitespace_fallback(mut src: *const u8, len: usize, mut dst: *mut u8) -> usize { 112 | let dst_base = dst; 113 | 114 | let end = src.add(len); 115 | while src < end { 116 | let x = src.read(); 117 | if lookup_ascii_whitespace(x) == 0 { 118 | dst.write(x); 119 | dst = dst.add(1); 120 | } 121 | src = src.add(1); 122 | } 123 | 124 | dst.offset_from(dst_base) as usize 125 | } 126 | 127 | #[inline(always)] 128 | #[must_use] 129 | pub fn remove_ascii_whitespace_inplace(data: &mut [u8]) -> &mut [u8] { 130 | let pos = find_non_ascii_whitespace(data); 131 | debug_assert!(pos <= data.len()); 132 | 133 | if pos == data.len() { 134 | return data; 135 | } 136 | 137 | unsafe { 138 | let len = data.len() - pos; 139 | let dst = data.as_mut_ptr().add(pos); 140 | let src = dst; 141 | 142 | let rem = remove_ascii_whitespace_fallback(src, len, dst); 143 | debug_assert!(rem <= len); 144 | 145 | data.get_unchecked_mut(..(pos + rem)) 146 | } 147 | } 148 | 149 | #[cfg(test)] 150 | mod tests { 151 | use super::*; 152 | 153 | #[cfg_attr(not(target_arch = "wasm32"), test)] 154 | #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)] 155 | fn test_remove_ascii_whitespace() { 156 | let cases = [ 157 | "\0\0\0\0", 158 | "abcd", 159 | "ab\tcd", 160 | "ab\ncd", 161 | "ab\x0Ccd", 162 | "ab\rcd", 163 | "ab cd", 164 | "ab\t\n\x0C\r cd", 165 | "ab\t\n\x0C\r =\t\n\x0C\r =\t\n\x0C\r ", 166 | ]; 167 | 168 | let check = |case: &str, repeat: usize| { 169 | let mut buf = case.repeat(repeat).into_bytes(); 170 | let expected = { 171 | let mut v = buf.clone(); 172 | v.retain(|c| !c.is_ascii_whitespace()); 173 | v 174 | }; 175 | let ans = remove_ascii_whitespace_inplace(&mut buf); 176 | assert_eq!(ans, &*expected, "case = {case:?}"); 177 | }; 178 | 179 | for case in cases { 180 | check(case, 1); 181 | 182 | if cfg!(not(miri)) { 183 | check(case, 10); 184 | } 185 | } 186 | } 187 | } 188 | 189 | #[cfg(test)] 190 | mod algorithm { 191 | #[cfg_attr( 192 | any(miri, not(all(target_arch = "x86_64", target_os = "linux", target_env = "gnu"))), 193 | ignore 194 | )] 195 | #[test] 196 | fn is_ascii_whitespace() { 197 | for x in 0..=255u8 { 198 | let m1 = (x.wrapping_sub(0x89) as i8) < (-128 + 5); 199 | let m2 = x == 0x0b; 200 | let m3 = x == 0x20; 201 | let ans = (m1 && !m2) || m3; 202 | assert_eq!(ans, x.is_ascii_whitespace()); 203 | } 204 | } 205 | } 206 | -------------------------------------------------------------------------------- /crates/base64-simd/src/check.rs: -------------------------------------------------------------------------------- 1 | use crate::alsw::{STANDARD_ALSW_CHECK_X2, URL_SAFE_ALSW_CHECK_X2}; 2 | use crate::decode::{decode_ascii4, decode_ascii8, decode_extra}; 3 | use crate::decode::{STANDARD_DECODE_TABLE, URL_SAFE_DECODE_TABLE}; 4 | use crate::{Config, Error, Kind}; 5 | 6 | use vsimd::alsw::AlswLut; 7 | use vsimd::vector::V256; 8 | use vsimd::SIMD256; 9 | 10 | use core::ptr::null_mut; 11 | 12 | #[inline] 13 | pub(crate) unsafe fn check_fallback(mut src: *const u8, mut n: usize, config: Config) -> Result<(), Error> { 14 | let kind = config.kind; 15 | let forgiving = config.extra.forgiving(); 16 | 17 | let table = match kind { 18 | Kind::Standard => STANDARD_DECODE_TABLE.as_ptr(), 19 | Kind::UrlSafe => URL_SAFE_DECODE_TABLE.as_ptr(), 20 | }; 21 | 22 | unsafe { 23 | // n*3/4 >= 6+2 24 | while n >= 11 { 25 | decode_ascii8::(src, null_mut(), table)?; 26 | src = src.add(8); 27 | n -= 8; 28 | } 29 | 30 | while n >= 4 { 31 | decode_ascii4::(src, null_mut(), table)?; 32 | src = src.add(4); 33 | n -= 4; 34 | } 35 | 36 | decode_extra::(n, src, null_mut(), table, forgiving) 37 | } 38 | } 39 | 40 | #[inline(always)] 41 | pub(crate) unsafe fn check_simd( 42 | s: S, 43 | mut src: *const u8, 44 | mut n: usize, 45 | config: Config, 46 | ) -> Result<(), Error> { 47 | let kind = config.kind; 48 | 49 | let check_lut = match kind { 50 | Kind::Standard => STANDARD_ALSW_CHECK_X2, 51 | Kind::UrlSafe => URL_SAFE_ALSW_CHECK_X2, 52 | }; 53 | 54 | unsafe { 55 | // n*3/4 >= 24+4 56 | while n >= 38 { 57 | let x = s.v256_load_unaligned(src); 58 | let is_valid = check_ascii32(s, x, check_lut); 59 | ensure!(is_valid); 60 | src = src.add(32); 61 | n -= 32; 62 | } 63 | 64 | check_fallback(src, n, config) 65 | } 66 | } 67 | 68 | #[inline(always)] 69 | fn check_ascii32(s: S, x: V256, check: AlswLut) -> bool { 70 | vsimd::alsw::check_ascii_xn(s, x, check) 71 | } 72 | -------------------------------------------------------------------------------- /crates/base64-simd/src/decode.rs: -------------------------------------------------------------------------------- 1 | use crate::alsw::{STANDARD_ALSW_CHECK_X2, URL_SAFE_ALSW_CHECK_X2}; 2 | use crate::alsw::{STANDARD_ALSW_DECODE_X2, URL_SAFE_ALSW_DECODE_X2}; 3 | use crate::{Config, Error, Extra, Kind}; 4 | use crate::{STANDARD_CHARSET, URL_SAFE_CHARSET}; 5 | 6 | use vsimd::alsw::AlswLut; 7 | use vsimd::isa::{NEON, SSSE3, WASM128}; 8 | use vsimd::mask::u8x32_highbit_any; 9 | use vsimd::matches_isa; 10 | use vsimd::tools::{read, write}; 11 | use vsimd::vector::V256; 12 | use vsimd::SIMD256; 13 | 14 | use core::ops::Not; 15 | 16 | const fn decode_table(charset: &'static [u8; 64]) -> [u8; 256] { 17 | let mut table = [0xff; 256]; 18 | let mut i = 0; 19 | while i < charset.len() { 20 | table[charset[i] as usize] = i as u8; 21 | i += 1; 22 | } 23 | table 24 | } 25 | 26 | pub const STANDARD_DECODE_TABLE: &[u8; 256] = &decode_table(STANDARD_CHARSET); 27 | pub const URL_SAFE_DECODE_TABLE: &[u8; 256] = &decode_table(URL_SAFE_CHARSET); 28 | 29 | #[inline(always)] 30 | pub(crate) fn decoded_length(src: &[u8], config: Config) -> Result<(usize, usize), Error> { 31 | if src.is_empty() { 32 | return Ok((0, 0)); 33 | } 34 | 35 | let n = unsafe { 36 | let len = src.len(); 37 | 38 | let count_pad = || { 39 | let last1 = *src.get_unchecked(len - 1); 40 | let last2 = *src.get_unchecked(len - 2); 41 | if last1 == b'=' { 42 | if last2 == b'=' { 43 | 2 44 | } else { 45 | 1 46 | } 47 | } else { 48 | 0 49 | } 50 | }; 51 | 52 | match config.extra { 53 | Extra::Pad => { 54 | ensure!(len % 4 == 0); 55 | len - count_pad() 56 | } 57 | Extra::NoPad => len, 58 | Extra::Forgiving => { 59 | if len % 4 == 0 { 60 | len - count_pad() 61 | } else { 62 | len 63 | } 64 | } 65 | } 66 | }; 67 | 68 | let m = match n % 4 { 69 | 0 => n / 4 * 3, 70 | 1 => return Err(Error::new()), 71 | 2 => n / 4 * 3 + 1, 72 | 3 => n / 4 * 3 + 2, 73 | _ => unsafe { core::hint::unreachable_unchecked() }, 74 | }; 75 | 76 | Ok((n, m)) 77 | } 78 | 79 | #[inline(always)] 80 | pub unsafe fn decode_ascii8(src: *const u8, dst: *mut u8, table: *const u8) -> Result<(), Error> { 81 | let mut y: u64 = 0; 82 | let mut flag = 0; 83 | 84 | let mut i = 0; 85 | while i < 8 { 86 | let x = read(src, i); 87 | let bits = read(table, x as usize); 88 | flag |= bits; 89 | 90 | if WRITE { 91 | y |= (bits as u64) << (58 - i * 6); 92 | } 93 | 94 | i += 1; 95 | } 96 | 97 | if WRITE { 98 | dst.cast::().write_unaligned(y.to_be()); 99 | } 100 | 101 | ensure!(flag != 0xff); 102 | Ok(()) 103 | } 104 | 105 | #[inline(always)] 106 | pub unsafe fn decode_ascii4(src: *const u8, dst: *mut u8, table: *const u8) -> Result<(), Error> { 107 | let mut y: u32 = 0; 108 | let mut flag = 0; 109 | 110 | let mut i = 0; 111 | while i < 4 { 112 | let x = read(src, i); 113 | let bits = read(table, x as usize); 114 | flag |= bits; 115 | 116 | if WRITE { 117 | y |= (bits as u32) << (18 - i * 6); 118 | } 119 | 120 | i += 1; 121 | } 122 | 123 | if WRITE { 124 | let y = y.to_be_bytes(); 125 | write(dst, 0, y[1]); 126 | write(dst, 1, y[2]); 127 | write(dst, 2, y[3]); 128 | } 129 | 130 | ensure!(flag != 0xff); 131 | Ok(()) 132 | } 133 | 134 | #[inline(always)] 135 | pub unsafe fn decode_extra( 136 | extra: usize, 137 | src: *const u8, 138 | dst: *mut u8, 139 | table: *const u8, 140 | forgiving: bool, 141 | ) -> Result<(), Error> { 142 | match extra { 143 | 0 => {} 144 | 1 => core::hint::unreachable_unchecked(), 145 | 2 => { 146 | let [x1, x2] = src.cast::<[u8; 2]>().read(); 147 | 148 | let y1 = read(table, x1 as usize); 149 | let y2 = read(table, x2 as usize); 150 | ensure!((y1 | y2) != 0xff && (forgiving || (y2 & 0x0f) == 0)); 151 | 152 | if WRITE { 153 | write(dst, 0, (y1 << 2) | (y2 >> 4)); 154 | } 155 | } 156 | 3 => { 157 | let [x1, x2, x3] = src.cast::<[u8; 3]>().read(); 158 | 159 | let y1 = read(table, x1 as usize); 160 | let y2 = read(table, x2 as usize); 161 | let y3 = read(table, x3 as usize); 162 | ensure!((y1 | y2 | y3) != 0xff && (forgiving || (y3 & 0x03) == 0)); 163 | 164 | if WRITE { 165 | write(dst, 0, (y1 << 2) | (y2 >> 4)); 166 | write(dst, 1, (y2 << 4) | (y3 >> 2)); 167 | } 168 | } 169 | _ => core::hint::unreachable_unchecked(), 170 | } 171 | Ok(()) 172 | } 173 | 174 | #[inline] 175 | pub(crate) unsafe fn decode_fallback( 176 | mut src: *const u8, 177 | mut dst: *mut u8, 178 | mut n: usize, 179 | config: Config, 180 | ) -> Result<(), Error> { 181 | let kind = config.kind; 182 | let forgiving = config.extra.forgiving(); 183 | 184 | let table = match kind { 185 | Kind::Standard => STANDARD_DECODE_TABLE.as_ptr(), 186 | Kind::UrlSafe => URL_SAFE_DECODE_TABLE.as_ptr(), 187 | }; 188 | 189 | // n*3/4 >= 6+2 190 | while n >= 11 { 191 | decode_ascii8::(src, dst, table)?; 192 | src = src.add(8); 193 | dst = dst.add(6); 194 | n -= 8; 195 | } 196 | 197 | let end = src.add(n / 4 * 4); 198 | while src < end { 199 | decode_ascii4::(src, dst, table)?; 200 | src = src.add(4); 201 | dst = dst.add(3); 202 | } 203 | n %= 4; 204 | 205 | decode_extra::(n, src, dst, table, forgiving) 206 | } 207 | 208 | #[inline(always)] 209 | pub(crate) unsafe fn decode_simd( 210 | s: S, 211 | mut src: *const u8, 212 | mut dst: *mut u8, 213 | mut n: usize, 214 | config: Config, 215 | ) -> Result<(), Error> { 216 | let kind = config.kind; 217 | 218 | let (check_lut, decode_lut) = match kind { 219 | Kind::Standard => (STANDARD_ALSW_CHECK_X2, STANDARD_ALSW_DECODE_X2), 220 | Kind::UrlSafe => (URL_SAFE_ALSW_CHECK_X2, URL_SAFE_ALSW_DECODE_X2), 221 | }; 222 | 223 | // n*3/4 >= 24+4 224 | while n >= 38 { 225 | let x = s.v256_load_unaligned(src); 226 | let y = try_!(decode_ascii32(s, x, check_lut, decode_lut)); 227 | 228 | let (y1, y2) = y.to_v128x2(); 229 | s.v128_store_unaligned(dst, y1); 230 | s.v128_store_unaligned(dst.add(12), y2); 231 | 232 | src = src.add(32); 233 | dst = dst.add(24); 234 | n -= 32; 235 | } 236 | 237 | decode_fallback(src, dst, n, config) 238 | } 239 | 240 | #[inline(always)] 241 | fn merge_bits_x2(s: S, x: V256) -> V256 { 242 | // x : {00aaaaaa|00bbbbbb|00cccccc|00dddddd} x8 243 | 244 | let y = if matches_isa!(S, SSSE3) { 245 | let m1 = s.u16x16_splat(u16::from_le_bytes([0x40, 0x01])); 246 | let x1 = s.i16x16_maddubs(x, m1); 247 | // x1: {aabbbbbb|0000aaaa|ccdddddd|0000cccc} x8 248 | 249 | let m2 = s.u32x8_splat(u32::from_le_bytes([0x00, 0x10, 0x01, 0x00])); 250 | s.i16x16_madd(x1, m2) 251 | // {ccdddddd|bbbbcccc|aaaaaabb|00000000} x8 252 | } else if matches_isa!(S, NEON | WASM128) { 253 | let m1 = s.u32x8_splat(u32::from_le_bytes([0x3f, 0x00, 0x3f, 0x00])); 254 | let x1 = s.v256_and(x, m1); 255 | // x1: {00aaaaaa|00000000|00cccccc|00000000} x8 256 | 257 | let m2 = s.u32x8_splat(u32::from_le_bytes([0x00, 0x3f, 0x00, 0x3f])); 258 | let x2 = s.v256_and(x, m2); 259 | // x2: {00000000|00bbbbbb|00000000|00dddddd} x8 260 | 261 | let x3 = s.v256_or(s.u32x8_shl::<18>(x1), s.u32x8_shr::<10>(x1)); 262 | // x3: {cc000000|0000cccc|aaaaaa00|00000000} x8 263 | 264 | let x4 = s.v256_or(s.u32x8_shl::<4>(x2), s.u32x8_shr::<24>(x2)); 265 | // x4: {00dddddd|bbbb0000|000000bb|dddd0000} 266 | 267 | let mask = s.u32x8_splat(u32::from_le_bytes([0xff, 0xff, 0xff, 0x00])); 268 | s.v256_and(s.v256_or(x3, x4), mask) 269 | // {ccdddddd|bbbbcccc|aaaaaabb|00000000} x8 270 | } else { 271 | unreachable!() 272 | }; 273 | 274 | const SHUFFLE: V256 = V256::double_bytes([ 275 | 0x02, 0x01, 0x00, 0x06, 0x05, 0x04, 0x0a, 0x09, // 276 | 0x08, 0x0e, 0x0d, 0x0c, 0x80, 0x80, 0x80, 0x80, // 277 | ]); 278 | s.u8x16x2_swizzle(y, SHUFFLE) 279 | // {AAAB|BBCC|CDDD|0000|EEEF|FFGG|GHHH|0000} 280 | } 281 | 282 | #[inline(always)] 283 | fn decode_ascii32(s: S, x: V256, check: AlswLut, decode: AlswLut) -> Result { 284 | let (c1, c2) = vsimd::alsw::decode_ascii_xn(s, x, check, decode); 285 | let y = merge_bits_x2(s, c2); 286 | ensure!(u8x32_highbit_any(s, c1).not()); 287 | Ok(y) 288 | } 289 | -------------------------------------------------------------------------------- /crates/base64-simd/src/error.rs: -------------------------------------------------------------------------------- 1 | use core::fmt; 2 | 3 | /// Base64 Error 4 | pub struct Error(()); 5 | 6 | impl Error { 7 | #[inline(always)] 8 | pub(crate) const fn new() -> Self { 9 | Error(()) 10 | } 11 | } 12 | 13 | impl fmt::Debug for Error { 14 | #[inline] 15 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 16 | ::fmt("Base64Error", f) 17 | } 18 | } 19 | 20 | impl fmt::Display for Error { 21 | #[inline] 22 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 23 | ::fmt("Base64Error", f) 24 | } 25 | } 26 | 27 | #[cfg_attr(docsrs, doc(cfg(feature = "std")))] 28 | #[cfg(feature = "std")] 29 | impl std::error::Error for Error {} 30 | 31 | macro_rules! ensure { 32 | ($cond:expr) => { 33 | if !$cond { 34 | return Err($crate::error::Error::new()); 35 | } 36 | }; 37 | } 38 | 39 | #[allow(unused_macros)] 40 | macro_rules! try_ { 41 | ($result:expr) => { 42 | match $result { 43 | Ok(value) => value, 44 | Err(_) => return Err(Error::new()), 45 | } 46 | }; 47 | } 48 | -------------------------------------------------------------------------------- /crates/base64-simd/src/forgiving.rs: -------------------------------------------------------------------------------- 1 | use crate::ascii::*; 2 | use crate::STANDARD_FORGIVING; 3 | use crate::{Error, Out}; 4 | 5 | use vsimd::tools::slice_mut; 6 | 7 | use core::ptr::copy_nonoverlapping; 8 | 9 | #[cfg(all(feature = "alloc", not(any(test, feature = "std"))))] 10 | use alloc::vec::Vec; 11 | 12 | /// Forgiving decodes a base64 string to bytes and writes inplace. 13 | /// 14 | /// This function uses the standard charset. 15 | /// 16 | /// See 17 | /// 18 | /// # Errors 19 | /// This function returns `Err` if the content of `data` is invalid. 20 | #[inline] 21 | pub fn forgiving_decode_inplace(data: &mut [u8]) -> Result<&mut [u8], Error> { 22 | let data = remove_ascii_whitespace_inplace(data); 23 | STANDARD_FORGIVING.decode_inplace(data) 24 | } 25 | 26 | /// Forgiving decodes a base64 string to bytes. 27 | /// 28 | /// This function uses the standard charset. 29 | /// 30 | /// See 31 | /// 32 | /// # Errors 33 | /// This function returns `Err` if 34 | /// + `src.len() > dst.len()` 35 | /// + the content of `src` is invalid 36 | /// 37 | #[inline] 38 | pub fn forgiving_decode<'d>(src: &[u8], mut dst: Out<'d, [u8]>) -> Result<&'d mut [u8], Error> { 39 | ensure!(src.len() <= dst.len()); 40 | 41 | let pos = find_non_ascii_whitespace(src); 42 | debug_assert!(pos <= src.len()); 43 | 44 | if pos == src.len() { 45 | return STANDARD_FORGIVING.decode(src, dst); 46 | } 47 | 48 | unsafe { 49 | let len = src.len(); 50 | let src = src.as_ptr(); 51 | let dst = dst.as_mut_ptr(); 52 | 53 | copy_nonoverlapping(src, dst, pos); 54 | 55 | let rem = remove_ascii_whitespace_fallback(src.add(pos), len - pos, dst.add(pos)); 56 | debug_assert!(rem <= len - pos); 57 | 58 | let data = slice_mut(dst, pos + rem); 59 | STANDARD_FORGIVING.decode_inplace(data) 60 | } 61 | } 62 | 63 | /// Forgiving decodes a base64 string to bytes and returns a new [`Vec`](Vec). 64 | /// 65 | /// This function uses the standard charset. 66 | /// 67 | /// See 68 | /// 69 | /// # Errors 70 | /// This function returns `Err` if the content of `data` is invalid. 71 | #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))] 72 | #[cfg(feature = "alloc")] 73 | #[inline] 74 | pub fn forgiving_decode_to_vec(data: &[u8]) -> Result, Error> { 75 | let pos = find_non_ascii_whitespace(data); 76 | debug_assert!(pos <= data.len()); 77 | 78 | if pos == data.len() { 79 | return STANDARD_FORGIVING.decode_type::>(data); 80 | } 81 | 82 | let mut vec = Vec::with_capacity(data.len()); 83 | 84 | unsafe { 85 | let len = data.len(); 86 | let src = data.as_ptr(); 87 | let dst = vec.as_mut_ptr(); 88 | 89 | copy_nonoverlapping(src, dst, pos); 90 | 91 | let rem = remove_ascii_whitespace_fallback(src.add(pos), len - pos, dst.add(pos)); 92 | debug_assert!(rem <= len - pos); 93 | 94 | let data = slice_mut(dst, pos + rem); 95 | let ans_len = STANDARD_FORGIVING.decode_inplace(data)?.len(); 96 | 97 | vec.set_len(ans_len); 98 | }; 99 | 100 | Ok(vec) 101 | } 102 | 103 | #[cfg(test)] 104 | mod tests { 105 | use super::*; 106 | 107 | use crate::AsOut; 108 | 109 | #[cfg_attr(not(target_arch = "wasm32"), test)] 110 | #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)] 111 | fn test_forgiving() { 112 | use const_str::hex; 113 | 114 | let mut inputs: Vec<&str> = Vec::new(); 115 | let mut outputs: Vec<&[u8]> = Vec::new(); 116 | 117 | { 118 | let mut i = |i| inputs.push(i); 119 | let mut o = |o| outputs.push(o); 120 | 121 | i("ab"); 122 | o(&[0x69]); 123 | 124 | i("abc"); 125 | o(&[0x69, 0xB7]); 126 | 127 | i("abcd"); 128 | o(&[0x69, 0xB7, 0x1D]); 129 | 130 | i("helloworld"); 131 | o(&hex!("85 E9 65 A3 0A 2B 95")); 132 | 133 | i(" h e l l o w o r\nl\rd\t"); 134 | o(&hex!("85 E9 65 A3 0A 2B 95")); 135 | } 136 | 137 | for i in 0..inputs.len() { 138 | let (src, expected) = (inputs[i], outputs[i]); 139 | 140 | let mut buf = src.to_owned().into_bytes(); 141 | 142 | let ans = forgiving_decode_inplace(&mut buf).unwrap(); 143 | assert_eq!(ans, expected); 144 | 145 | let ans = crate::forgiving_decode(src.as_bytes(), buf.as_out()).unwrap(); 146 | assert_eq!(ans, expected); 147 | 148 | #[cfg(feature = "alloc")] 149 | { 150 | let ans = crate::forgiving_decode_to_vec(src.as_bytes()).unwrap(); 151 | assert_eq!(ans, expected); 152 | } 153 | } 154 | } 155 | } 156 | -------------------------------------------------------------------------------- /crates/base64-simd/src/heap.rs: -------------------------------------------------------------------------------- 1 | use crate::decode::decoded_length; 2 | use crate::encode::encoded_length_unchecked; 3 | use crate::{AppendBase64Decode, AppendBase64Encode}; 4 | use crate::{Base64, Error}; 5 | use crate::{FromBase64Decode, FromBase64Encode}; 6 | 7 | use vsimd::tools::{alloc_uninit_bytes, assume_init, boxed_str, slice_parts}; 8 | 9 | #[cfg(not(any(test, feature = "std")))] 10 | use alloc::boxed::Box; 11 | #[cfg(not(any(test, feature = "std")))] 12 | use alloc::string::String; 13 | #[cfg(not(any(test, feature = "std")))] 14 | use alloc::vec::Vec; 15 | 16 | #[inline] 17 | fn encode_to_boxed_str(base64: &Base64, data: &[u8]) -> Box { 18 | if data.is_empty() { 19 | return Box::from(""); 20 | } 21 | 22 | unsafe { 23 | let m = encoded_length_unchecked(data.len(), base64.config); 24 | assert!(m <= usize::MAX / 2); 25 | 26 | let mut buf = alloc_uninit_bytes(m); 27 | 28 | { 29 | let (src, len) = slice_parts(data); 30 | let dst: *mut u8 = buf.as_mut_ptr().cast(); 31 | crate::multiversion::encode::auto(src, len, dst, base64.config); 32 | } 33 | 34 | boxed_str(assume_init(buf)) 35 | } 36 | } 37 | 38 | #[inline] 39 | fn encode_append_vec(base64: &Base64, src: &[u8], buf: &mut Vec) { 40 | if src.is_empty() { 41 | return; 42 | } 43 | 44 | unsafe { 45 | let m = encoded_length_unchecked(src.len(), base64.config); 46 | assert!(m <= usize::MAX / 2); 47 | 48 | buf.reserve_exact(m); 49 | let prev_len = buf.len(); 50 | 51 | { 52 | let (src, len) = slice_parts(src); 53 | let dst = buf.as_mut_ptr().add(prev_len); 54 | crate::multiversion::encode::auto(src, len, dst, base64.config); 55 | } 56 | 57 | buf.set_len(prev_len + m); 58 | } 59 | } 60 | 61 | #[inline] 62 | fn decode_to_boxed_bytes(base64: &Base64, data: &[u8]) -> Result, Error> { 63 | if data.is_empty() { 64 | return Ok(Box::from([])); 65 | } 66 | 67 | unsafe { 68 | let (n, m) = decoded_length(data, base64.config)?; 69 | 70 | // safety: 0 < m < isize::MAX 71 | let mut buf = alloc_uninit_bytes(m); 72 | 73 | { 74 | let dst = buf.as_mut_ptr().cast(); 75 | let src = data.as_ptr(); 76 | crate::multiversion::decode::auto(src, dst, n, base64.config)?; 77 | } 78 | 79 | Ok(assume_init(buf)) 80 | } 81 | } 82 | 83 | #[inline] 84 | fn decode_append_vec(base64: &Base64, src: &[u8], buf: &mut Vec) -> Result<(), Error> { 85 | if src.is_empty() { 86 | return Ok(()); 87 | } 88 | 89 | unsafe { 90 | let (n, m) = decoded_length(src, base64.config)?; 91 | 92 | buf.reserve_exact(m); 93 | let prev_len = buf.len(); 94 | 95 | let dst = buf.as_mut_ptr().add(prev_len); 96 | let src = src.as_ptr(); 97 | crate::multiversion::decode::auto(src, dst, n, base64.config)?; 98 | 99 | buf.set_len(prev_len + m); 100 | Ok(()) 101 | } 102 | } 103 | 104 | #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))] 105 | impl FromBase64Decode for Box<[u8]> { 106 | #[inline] 107 | fn from_base64_decode(base64: &Base64, data: &[u8]) -> Result { 108 | decode_to_boxed_bytes(base64, data) 109 | } 110 | } 111 | 112 | #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))] 113 | impl FromBase64Decode for Vec { 114 | #[inline] 115 | fn from_base64_decode(base64: &Base64, data: &[u8]) -> Result { 116 | let ans = decode_to_boxed_bytes(base64, data)?; 117 | Ok(Vec::from(ans)) 118 | } 119 | } 120 | 121 | #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))] 122 | impl FromBase64Encode for Box<[u8]> { 123 | #[inline] 124 | fn from_base64_encode(base64: &Base64, data: &[u8]) -> Self { 125 | let ans = encode_to_boxed_str(base64, data); 126 | ans.into_boxed_bytes() 127 | } 128 | } 129 | 130 | #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))] 131 | impl FromBase64Encode for Box { 132 | #[inline] 133 | fn from_base64_encode(base64: &Base64, data: &[u8]) -> Self { 134 | encode_to_boxed_str(base64, data) 135 | } 136 | } 137 | 138 | #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))] 139 | impl FromBase64Encode for Vec { 140 | #[inline] 141 | fn from_base64_encode(base64: &Base64, data: &[u8]) -> Self { 142 | let ans = encode_to_boxed_str(base64, data); 143 | Vec::from(ans.into_boxed_bytes()) 144 | } 145 | } 146 | 147 | #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))] 148 | impl FromBase64Encode for String { 149 | #[inline] 150 | fn from_base64_encode(base64: &Base64, data: &[u8]) -> Self { 151 | let ans = encode_to_boxed_str(base64, data); 152 | String::from(ans) 153 | } 154 | } 155 | 156 | #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))] 157 | impl AppendBase64Encode for Vec { 158 | #[inline] 159 | fn append_base64_encode(base64: &Base64, src: &[u8], dst: &mut Self) { 160 | encode_append_vec(base64, src, dst); 161 | } 162 | } 163 | 164 | #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))] 165 | impl AppendBase64Encode for String { 166 | #[inline] 167 | fn append_base64_encode(base64: &Base64, src: &[u8], dst: &mut Self) { 168 | unsafe { encode_append_vec(base64, src, dst.as_mut_vec()) }; 169 | } 170 | } 171 | 172 | #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))] 173 | impl AppendBase64Decode for Vec { 174 | #[inline] 175 | fn append_base64_decode(base64: &Base64, src: &[u8], dst: &mut Self) -> Result<(), Error> { 176 | decode_append_vec(base64, src, dst) 177 | } 178 | } 179 | -------------------------------------------------------------------------------- /crates/base64-simd/src/multiversion.rs: -------------------------------------------------------------------------------- 1 | use crate::{Config, Error}; 2 | 3 | vsimd::dispatch!( 4 | name = {encode}, 5 | signature = {pub(crate) unsafe fn(src: *const u8, len: usize, dst: *mut u8, config: Config) -> ()}, 6 | fallback = {crate::encode::encode_fallback}, 7 | simd = {crate::encode::encode_simd}, 8 | targets = {"avx2", "ssse3", "neon", "simd128"}, 9 | fastest = {"avx2", "neon", "simd128"}, 10 | ); 11 | 12 | vsimd::dispatch!( 13 | name = {decode}, 14 | signature = {pub(crate) unsafe fn(src: *const u8, dst: *mut u8, n: usize, config: Config) -> Result<(), Error>}, 15 | fallback = {crate::decode::decode_fallback}, 16 | simd = {crate::decode::decode_simd}, 17 | targets = {"avx2", "ssse3", "neon", "simd128"}, 18 | fastest = {"avx2", "neon", "simd128"}, 19 | ); 20 | 21 | vsimd::dispatch!( 22 | name = {check}, 23 | signature = {pub(crate) unsafe fn(src: *const u8, n: usize, config: Config) -> Result<(), Error>}, 24 | fallback = {crate::check::check_fallback}, 25 | simd = {crate::check::check_simd}, 26 | targets = {"avx2", "ssse3", "neon", "simd128"}, 27 | fastest = {"avx2", "neon", "simd128"}, 28 | ); 29 | 30 | vsimd::dispatch!( 31 | name = {find_non_ascii_whitespace}, 32 | signature = {pub unsafe fn(src: *const u8, len: usize) -> usize}, 33 | fallback = {crate::ascii::find_non_ascii_whitespace_fallback}, 34 | simd = {crate::ascii::find_non_ascii_whitespace_simd}, 35 | targets = {"avx2", "sse2", "neon", "simd128"}, 36 | fastest = {"avx2", "neon", "simd128"}, 37 | ); 38 | -------------------------------------------------------------------------------- /crates/base64-simd/src/parallel.rs: -------------------------------------------------------------------------------- 1 | use crate::encode::encoded_length_unchecked; 2 | use crate::{Base64, Config, Error, Extra, Out}; 3 | 4 | use rayon::prelude::{IndexedParallelIterator, ParallelIterator}; 5 | use rayon::slice::{ParallelSlice, ParallelSliceMut}; 6 | use vsimd::tools::slice_mut; 7 | 8 | impl Base64 { 9 | /// **EXPERIMENTAL**: 10 | /// Encodes bytes to a base64 string in parallel. 11 | /// 12 | /// # Errors 13 | /// This function returns `Err` if the length of `dst` is not enough. 14 | #[cfg_attr(docsrs, doc(cfg(feature = "parallel")))] 15 | #[inline] 16 | pub fn par_encode<'d>(&self, src: &[u8], dst: Out<'d, [u8]>) -> Result<&'d mut [u8], Error> { 17 | let p = rayon::current_num_threads(); 18 | let b = src.len() / 3; 19 | if src.len() < p * 4096 || p < 2 || b < p { 20 | return self.encode(src, dst); 21 | } 22 | 23 | let encoded_len = encoded_length_unchecked(src.len(), self.config); 24 | let dst = unsafe { dst.into_uninit_slice() }; 25 | let dst = &mut dst[..encoded_len]; // panic? 26 | 27 | let chunks = (b + p) / p; 28 | 29 | let src_chunks = src.par_chunks(chunks * 3); 30 | let dst_chunks = dst.par_chunks_mut(chunks * 4); 31 | 32 | if self.config.extra.padding() { 33 | let no_pad = Config { 34 | kind: self.config.kind, 35 | extra: Extra::NoPad, 36 | }; 37 | src_chunks.zip(dst_chunks).for_each(|(s, d)| unsafe { 38 | let len = s.len(); 39 | let sp = s.as_ptr(); 40 | let dp = d.as_mut_ptr().cast::(); 41 | if len % 3 == 0 { 42 | crate::multiversion::encode::auto(sp, len, dp, no_pad); 43 | } else { 44 | crate::multiversion::encode::auto(sp, len, dp, self.config); 45 | } 46 | }); 47 | } else { 48 | src_chunks.zip(dst_chunks).for_each(|(s, d)| unsafe { 49 | let len = s.len(); 50 | let sp = s.as_ptr(); 51 | let dp = d.as_mut_ptr().cast::(); 52 | crate::multiversion::encode::auto(sp, len, dp, self.config); 53 | }); 54 | } 55 | 56 | unsafe { 57 | let len = dst.len(); 58 | let ptr = dst.as_mut_ptr().cast::(); 59 | Ok(slice_mut(ptr, len)) 60 | } 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /crates/hex-simd/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "hex-simd" 3 | version = "0.9.0-dev" 4 | edition = "2021" 5 | description = "SIMD-accelerated hex encoding and decoding" 6 | license = "MIT" 7 | repository = "https://github.com/Nugine/simd" 8 | keywords = ["hex", "simd"] 9 | categories = ["no-std", "parser-implementations", "encoding"] 10 | readme = "README.md" 11 | rust-version = "1.63" 12 | 13 | [package.metadata.docs.rs] 14 | all-features = true 15 | rustdoc-args = ["--cfg", "docsrs"] 16 | 17 | [features] 18 | default = ["std", "detect"] 19 | alloc = ["vsimd/alloc"] 20 | std = ["alloc", "vsimd/std"] 21 | detect = ["vsimd/detect"] 22 | unstable = ["vsimd/unstable"] 23 | 24 | [dependencies] 25 | outref = "0.5.1" 26 | vsimd = { path = "../vsimd", version = "0.9.0-dev" } 27 | 28 | [dev-dependencies] 29 | rand = "0.8.5" 30 | 31 | [target.'cfg(target_arch="wasm32")'.dev-dependencies] 32 | getrandom = { version = "0.2.8", features = ["js"] } 33 | wasm-bindgen-test = "0.3.34" 34 | -------------------------------------------------------------------------------- /crates/hex-simd/LICENSE: -------------------------------------------------------------------------------- 1 | ../../LICENSE -------------------------------------------------------------------------------- /crates/hex-simd/README.md: -------------------------------------------------------------------------------- 1 | # hex-simd 2 | 3 | [![Crates.io](https://img.shields.io/crates/v/hex-simd.svg)](https://crates.io/crates/hex-simd) 4 | [![Docs](https://docs.rs/hex-simd/badge.svg)](https://docs.rs/hex-simd/) 5 | [![MIT licensed][mit-badge]][mit-url] 6 | 7 | [mit-badge]: https://img.shields.io/badge/license-MIT-blue.svg 8 | [mit-url]: ../../LICENSE 9 | 10 | SIMD-accelerated hex encoding and decoding. 11 | 12 | Documentation: 13 | 14 | Repository: 15 | -------------------------------------------------------------------------------- /crates/hex-simd/build.rs: -------------------------------------------------------------------------------- 1 | fn main() { 2 | println!("cargo:rustc-check-cfg=cfg(vsimd_dump_symbols)") 3 | } 4 | -------------------------------------------------------------------------------- /crates/hex-simd/src/check.rs: -------------------------------------------------------------------------------- 1 | use crate::Error; 2 | 3 | use vsimd::hex::unhex; 4 | use vsimd::isa::{AVX2, WASM128}; 5 | use vsimd::{matches_isa, SIMD256}; 6 | 7 | #[inline(always)] 8 | unsafe fn check_short(mut src: *const u8, len: usize) -> Result<(), Error> { 9 | let mut flag = 0; 10 | let end = src.add(len); 11 | while src < end { 12 | let x = src.read(); 13 | flag |= unhex(x); 14 | src = src.add(1); 15 | } 16 | ensure!(flag != 0xff); 17 | Ok(()) 18 | } 19 | 20 | /// FIXME: work around for suboptimal auto-vectorization (AVX2, WASM128) 21 | #[inline(always)] 22 | unsafe fn check_short_sc(mut src: *const u8, len: usize) -> Result<(), Error> { 23 | let end = src.add(len); 24 | while src < end { 25 | let x = src.read(); 26 | ensure!(unhex(x) != 0xff); 27 | src = src.add(1); 28 | } 29 | Ok(()) 30 | } 31 | 32 | #[inline(always)] 33 | pub unsafe fn check_fallback(src: *const u8, len: usize) -> Result<(), Error> { 34 | check_short(src, len) 35 | } 36 | 37 | #[inline(always)] 38 | pub unsafe fn check_simd(s: S, mut src: *const u8, mut len: usize) -> Result<(), Error> { 39 | if matches_isa!(S, AVX2) { 40 | if len == 16 { 41 | let x = s.v128_load_unaligned(src); 42 | ensure!(vsimd::hex::check_xn(s, x)); 43 | return Ok(()); 44 | } 45 | 46 | let end = src.add(len / 32 * 32); 47 | while src < end { 48 | let x = s.v256_load_unaligned(src); 49 | ensure!(vsimd::hex::check_xn(s, x)); 50 | src = src.add(32); 51 | } 52 | len %= 32; 53 | 54 | if len == 0 { 55 | return Ok(()); 56 | } 57 | 58 | if len >= 16 { 59 | let x = s.v128_load_unaligned(src); 60 | ensure!(vsimd::hex::check_xn(s, x)); 61 | len -= 16; 62 | src = src.add(16); 63 | } 64 | } else { 65 | let end = src.add(len / 16 * 16); 66 | while src < end { 67 | let x = s.v128_load_unaligned(src); 68 | ensure!(vsimd::hex::check_xn(s, x)); 69 | src = src.add(16); 70 | } 71 | len %= 16; 72 | 73 | if len == 0 { 74 | return Ok(()); 75 | } 76 | } 77 | 78 | if matches_isa!(S, AVX2 | WASM128) { 79 | check_short_sc(src, len) 80 | } else { 81 | check_short(src, len) 82 | } 83 | } 84 | -------------------------------------------------------------------------------- /crates/hex-simd/src/decode.rs: -------------------------------------------------------------------------------- 1 | use crate::Error; 2 | 3 | use vsimd::hex::unhex; 4 | use vsimd::is_isa_type; 5 | use vsimd::isa::{Fallback, InstructionSet, AVX2, SSE2, WASM128}; 6 | use vsimd::matches_isa; 7 | use vsimd::tools::read; 8 | use vsimd::vector::V64; 9 | use vsimd::{SIMD128, SIMD256}; 10 | 11 | #[inline(always)] 12 | fn shl4(x: u8) -> u8 { 13 | x.wrapping_shl(4) 14 | } 15 | 16 | #[inline(always)] 17 | unsafe fn decode_bits(src: *const u8, dst: *mut u8) -> u8 { 18 | let y1 = unhex(read(src, 0)); 19 | let y2 = unhex(read(src, 1)); 20 | let z = shl4(y1) | y2; 21 | dst.write(z); 22 | y1 | y2 23 | } 24 | 25 | #[inline(always)] 26 | unsafe fn decode_short(mut src: *const u8, len: usize, mut dst: *mut u8) -> Result<(), Error> 27 | where 28 | S: InstructionSet, 29 | { 30 | // FIXME: work around for suboptimal auto-vectorization (AVX2, WASM128) 31 | if matches_isa!(S, AVX2 | WASM128) { 32 | let end = src.add(len); 33 | while src < end { 34 | let flag = decode_bits(src, dst); 35 | ensure!(flag != 0xff); 36 | src = src.add(2); 37 | dst = dst.add(1); 38 | } 39 | Ok(()) 40 | } else { 41 | let end = src.add(len); 42 | let mut flag = 0; 43 | while src < end { 44 | flag |= decode_bits(src, dst); 45 | src = src.add(2); 46 | dst = dst.add(1); 47 | } 48 | ensure!(flag != 0xff); 49 | Ok(()) 50 | } 51 | } 52 | 53 | #[inline(always)] 54 | unsafe fn decode_long(mut src: *const u8, len: usize, mut dst: *mut u8) -> Result<(), Error> { 55 | let end = src.add(len / 16 * 16); 56 | while src < end { 57 | let mut flag = 0; 58 | let mut i = 0; 59 | while i < 8 { 60 | flag |= decode_bits(src, dst); 61 | src = src.add(2); 62 | dst = dst.add(1); 63 | i += 1; 64 | } 65 | ensure!(flag != 0xff); 66 | } 67 | decode_short::(src, len % 16, dst) 68 | } 69 | 70 | #[inline(always)] 71 | pub unsafe fn decode_fallback(src: *const u8, len: usize, dst: *mut u8) -> Result<(), Error> { 72 | decode_long(src, len, dst) 73 | } 74 | 75 | #[inline(always)] 76 | unsafe fn decode16(s: S, src: *const u8, dst: *mut u8) -> Result<(), Error> { 77 | let x = s.v128_load_unaligned(src); 78 | let y = try_!(vsimd::hex::decode_ascii16(s, x)); 79 | dst.cast::().write_unaligned(y); 80 | Ok(()) 81 | } 82 | 83 | #[inline(always)] 84 | unsafe fn decode32(s: S, src: *const u8, dst: *mut u8) -> Result<(), Error> { 85 | let x = s.v256_load_unaligned(src); 86 | let y = try_!(vsimd::hex::decode_ascii32(s, x)); 87 | s.v128_store_unaligned(dst, y); 88 | Ok(()) 89 | } 90 | 91 | #[inline(always)] 92 | pub unsafe fn decode_simd(s: S, src: *const u8, len: usize, dst: *mut u8) -> Result<(), Error> { 93 | if cfg!(any(target_arch = "x86", target_arch = "x86_64")) { 94 | if is_isa_type!(S, SSE2) { 95 | return decode_simd_sse2(SSE2::new(), src, len, dst); 96 | } 97 | if matches_isa!(S, AVX2) { 98 | return decode_simd_v256(s, src, len, dst); 99 | } 100 | } 101 | decode_simd_v128(s, src, len, dst) 102 | } 103 | 104 | #[inline(always)] 105 | pub unsafe fn decode_simd_v256( 106 | s: S, 107 | mut src: *const u8, 108 | mut len: usize, 109 | mut dst: *mut u8, 110 | ) -> Result<(), Error> { 111 | if len == 16 { 112 | return decode16(s, src, dst); 113 | } 114 | 115 | if len == 32 { 116 | return decode32(s, src, dst); 117 | } 118 | 119 | let end = src.add(len / 64 * 64); 120 | while src < end { 121 | let x0 = s.v256_load_unaligned(src); 122 | src = src.add(32); 123 | 124 | let x1 = s.v256_load_unaligned(src); 125 | src = src.add(32); 126 | 127 | let x = (x0, x1); 128 | let y = try_!(vsimd::hex::decode_ascii32x2(s, x)); 129 | s.v256_store_unaligned(dst, y); 130 | dst = dst.add(32); 131 | } 132 | len %= 64; 133 | 134 | if len == 0 { 135 | return Ok(()); 136 | } 137 | 138 | if len >= 32 { 139 | decode32(s, src, dst)?; 140 | src = src.add(32); 141 | dst = dst.add(16); 142 | len -= 32; 143 | } 144 | 145 | if len >= 16 { 146 | decode16(s, src, dst)?; 147 | src = src.add(16); 148 | dst = dst.add(8); 149 | len -= 16; 150 | } 151 | 152 | decode_short::(src, len, dst) 153 | } 154 | 155 | #[inline(always)] 156 | pub unsafe fn decode_simd_v128( 157 | s: S, 158 | mut src: *const u8, 159 | mut len: usize, 160 | mut dst: *mut u8, 161 | ) -> Result<(), Error> { 162 | let end = src.add(len / 32 * 32); 163 | while src < end { 164 | decode32(s, src, dst)?; 165 | src = src.add(32); 166 | dst = dst.add(16); 167 | } 168 | len %= 32; 169 | 170 | if len == 0 { 171 | return Ok(()); 172 | } 173 | if len >= 16 { 174 | decode16(s, src, dst)?; 175 | src = src.add(16); 176 | dst = dst.add(8); 177 | len -= 16; 178 | } 179 | decode_short::(src, len, dst) 180 | } 181 | 182 | #[inline(always)] 183 | pub unsafe fn decode_simd_sse2(s: SSE2, mut src: *const u8, mut len: usize, mut dst: *mut u8) -> Result<(), Error> { 184 | let end = src.add(len / 16 * 16); 185 | while src < end { 186 | let x = s.v128_load_unaligned(src); 187 | 188 | let (nibbles, flag) = vsimd::hex::sse2::decode_nibbles(s, x); 189 | ensure!(s.u8x16_bitmask(flag) == 0); 190 | 191 | let ans = vsimd::hex::sse2::merge_bits(s, nibbles); 192 | dst.cast::().write_unaligned(ans); 193 | 194 | src = src.add(16); 195 | dst = dst.add(8); 196 | } 197 | len %= 16; 198 | 199 | if len == 0 { 200 | return Ok(()); 201 | } 202 | 203 | decode_short::(src, len, dst) 204 | } 205 | -------------------------------------------------------------------------------- /crates/hex-simd/src/encode.rs: -------------------------------------------------------------------------------- 1 | use vsimd::ascii::AsciiCase; 2 | use vsimd::is_isa_type; 3 | use vsimd::isa::{InstructionSet, AVX2, SSE2}; 4 | use vsimd::matches_isa; 5 | use vsimd::tools::{read, write}; 6 | use vsimd::{SIMD128, SIMD256}; 7 | 8 | #[inline(always)] 9 | fn charset(case: AsciiCase) -> &'static [u8; 16] { 10 | match case { 11 | AsciiCase::Lower => vsimd::hex::LOWER_CHARSET, 12 | AsciiCase::Upper => vsimd::hex::UPPER_CHARSET, 13 | } 14 | } 15 | 16 | #[inline(always)] 17 | unsafe fn encode_bits(src: *const u8, dst: *mut u8, charset: *const u8) { 18 | let x = src.read(); 19 | let hi = read(charset, (x >> 4) as usize); 20 | let lo = read(charset, (x & 0x0f) as usize); 21 | write(dst, 0, hi); 22 | write(dst, 1, lo); 23 | } 24 | 25 | #[inline(always)] 26 | unsafe fn encode_short(mut src: *const u8, len: usize, mut dst: *mut u8, charset: *const u8) { 27 | let end = src.add(len); 28 | while src < end { 29 | encode_bits(src, dst, charset); 30 | src = src.add(1); 31 | dst = dst.add(2); 32 | } 33 | } 34 | 35 | #[inline(always)] 36 | unsafe fn encode_long(mut src: *const u8, len: usize, mut dst: *mut u8, case: AsciiCase) { 37 | let charset = charset(case).as_ptr(); 38 | 39 | let end = src.add(len / 8 * 8); 40 | while src < end { 41 | let mut i = 0; 42 | while i < 8 { 43 | encode_bits(src, dst, charset); 44 | src = src.add(1); 45 | dst = dst.add(2); 46 | i += 1; 47 | } 48 | } 49 | encode_short(src, len % 8, dst, charset); 50 | } 51 | 52 | #[inline(always)] 53 | pub unsafe fn encode_fallback(src: *const u8, len: usize, dst: *mut u8, case: AsciiCase) { 54 | encode_long(src, len, dst, case); 55 | } 56 | 57 | #[inline(always)] 58 | pub unsafe fn encode_simd(s: S, src: *const u8, len: usize, dst: *mut u8, case: AsciiCase) { 59 | if cfg!(any(target_arch = "x86", target_arch = "x86_64")) { 60 | if is_isa_type!(S, SSE2) { 61 | return encode_simd_sse2(SSE2::new(), src, len, dst, case); 62 | } 63 | if matches_isa!(S, AVX2) { 64 | return encode_simd_v256(s, src, len, dst, case); 65 | } 66 | } 67 | encode_simd_v128(s, src, len, dst, case); 68 | } 69 | 70 | #[inline(always)] 71 | pub unsafe fn encode_simd_v256( 72 | s: S, 73 | mut src: *const u8, 74 | mut len: usize, 75 | mut dst: *mut u8, 76 | case: AsciiCase, 77 | ) { 78 | let lut = match case { 79 | AsciiCase::Lower => vsimd::hex::ENCODE_LOWER_LUT, 80 | AsciiCase::Upper => vsimd::hex::ENCODE_UPPER_LUT, 81 | }; 82 | 83 | if len == 16 { 84 | let x = s.v128_load_unaligned(src); 85 | let y = vsimd::hex::encode_bytes16(s, x, lut); 86 | s.v256_store_unaligned(dst, y); 87 | return; 88 | } 89 | 90 | let end = src.add(len / 32 * 32); 91 | while src < end { 92 | let x = s.v256_load_unaligned(src); 93 | let (y1, y2) = vsimd::hex::encode_bytes32(s, x, lut); 94 | 95 | s.v256_store_unaligned(dst, y1); 96 | dst = dst.add(32); 97 | 98 | s.v256_store_unaligned(dst, y2); 99 | dst = dst.add(32); 100 | 101 | src = src.add(32); 102 | } 103 | len %= 32; 104 | 105 | if len == 0 { 106 | return; 107 | } 108 | 109 | if len >= 16 { 110 | let x = s.v128_load_unaligned(src); 111 | let y = vsimd::hex::encode_bytes16(s, x, lut); 112 | s.v256_store_unaligned(dst, y); 113 | dst = dst.add(32); 114 | src = src.add(16); 115 | len -= 16; 116 | } 117 | 118 | if len > 0 { 119 | let charset = charset(case).as_ptr(); 120 | encode_short(src, len, dst, charset); 121 | } 122 | } 123 | 124 | #[inline(always)] 125 | pub unsafe fn encode_simd_v128( 126 | s: S, 127 | mut src: *const u8, 128 | mut len: usize, 129 | mut dst: *mut u8, 130 | case: AsciiCase, 131 | ) { 132 | let lut = match case { 133 | AsciiCase::Lower => vsimd::hex::ENCODE_LOWER_LUT, 134 | AsciiCase::Upper => vsimd::hex::ENCODE_UPPER_LUT, 135 | }; 136 | 137 | let end = src.add(len / 16 * 16); 138 | while src < end { 139 | let x = s.v128_load_unaligned(src); 140 | let y = vsimd::hex::encode_bytes16(s, x, lut); 141 | s.v256_store_unaligned(dst, y); 142 | dst = dst.add(32); 143 | src = src.add(16); 144 | } 145 | len %= 16; 146 | 147 | if len == 0 { 148 | return; 149 | } 150 | 151 | let charset = charset(case).as_ptr(); 152 | encode_short(src, len, dst, charset); 153 | } 154 | 155 | #[inline(always)] 156 | pub unsafe fn encode_simd_sse2(s: SSE2, mut src: *const u8, mut len: usize, mut dst: *mut u8, case: AsciiCase) { 157 | let offset = match case { 158 | AsciiCase::Lower => vsimd::hex::sse2::LOWER_OFFSET, 159 | AsciiCase::Upper => vsimd::hex::sse2::UPPER_OFFSET, 160 | }; 161 | 162 | let end = src.add(len / 16 * 16); 163 | while src < end { 164 | let x = s.v128_load_unaligned(src); 165 | src = src.add(16); 166 | 167 | let (y1, y2) = vsimd::hex::sse2::encode16(s, x, offset); 168 | 169 | s.v128_store_unaligned(dst, y1); 170 | dst = dst.add(16); 171 | 172 | s.v128_store_unaligned(dst, y2); 173 | dst = dst.add(16); 174 | } 175 | len %= 16; 176 | 177 | if len == 0 { 178 | return; 179 | } 180 | 181 | let charset = charset(case).as_ptr(); 182 | encode_short(src, len, dst, charset); 183 | } 184 | -------------------------------------------------------------------------------- /crates/hex-simd/src/error.rs: -------------------------------------------------------------------------------- 1 | use core::fmt; 2 | 3 | /// Hex Error 4 | pub struct Error(()); 5 | 6 | impl Error { 7 | #[inline(always)] 8 | pub(crate) const fn new() -> Self { 9 | Error(()) 10 | } 11 | } 12 | 13 | impl fmt::Debug for Error { 14 | #[inline] 15 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 16 | ::fmt("HexError", f) 17 | } 18 | } 19 | 20 | impl fmt::Display for Error { 21 | #[inline] 22 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 23 | ::fmt("HexError", f) 24 | } 25 | } 26 | 27 | #[cfg_attr(docsrs, doc(cfg(feature = "std")))] 28 | #[cfg(feature = "std")] 29 | impl std::error::Error for Error {} 30 | 31 | macro_rules! ensure { 32 | ($cond:expr) => { 33 | if !$cond { 34 | return Err($crate::error::Error::new()); 35 | } 36 | }; 37 | } 38 | 39 | #[allow(unused_macros)] 40 | macro_rules! try_ { 41 | ($result:expr) => { 42 | match $result { 43 | Ok(value) => value, 44 | Err(()) => return Err(Error::new()), 45 | } 46 | }; 47 | } 48 | -------------------------------------------------------------------------------- /crates/hex-simd/src/heap.rs: -------------------------------------------------------------------------------- 1 | use crate::{AppendHexDecode, AppendHexEncode, AsciiCase, Error, FromHexDecode, FromHexEncode}; 2 | 3 | use vsimd::tools::{alloc_uninit_bytes, assume_init, boxed_str, slice_parts}; 4 | 5 | #[cfg(not(any(test, feature = "std")))] 6 | use alloc::boxed::Box; 7 | #[cfg(not(any(test, feature = "std")))] 8 | use alloc::string::String; 9 | #[cfg(not(any(test, feature = "std")))] 10 | use alloc::vec::Vec; 11 | 12 | #[inline] 13 | fn decode_to_boxed_bytes(src: &[u8]) -> Result, Error> { 14 | if src.is_empty() { 15 | return Ok(Box::from([])); 16 | } 17 | 18 | ensure!(src.len() % 2 == 0); 19 | 20 | unsafe { 21 | let mut buf = alloc_uninit_bytes(src.len() / 2); 22 | 23 | { 24 | let (src, len) = slice_parts(src); 25 | let dst: *mut u8 = buf.as_mut_ptr().cast(); 26 | crate::multiversion::decode::auto(src, len, dst)?; 27 | } 28 | 29 | Ok(assume_init(buf)) 30 | } 31 | } 32 | 33 | #[inline] 34 | fn decode_append_vec(src: &[u8], buf: &mut Vec) -> Result<(), Error> { 35 | if src.is_empty() { 36 | return Ok(()); 37 | } 38 | 39 | ensure!(src.len() % 2 == 0); 40 | let m = src.len() / 2; 41 | 42 | buf.reserve_exact(m); 43 | let prev_len = buf.len(); 44 | 45 | unsafe { 46 | let (src, len) = slice_parts(src); 47 | let dst = buf.as_mut_ptr().add(prev_len); 48 | crate::multiversion::decode::auto(src, len, dst)?; 49 | 50 | buf.set_len(prev_len + m); 51 | Ok(()) 52 | } 53 | } 54 | 55 | #[inline] 56 | fn encode_to_boxed_str(src: &[u8], case: AsciiCase) -> Box { 57 | if src.is_empty() { 58 | return Box::from(""); 59 | } 60 | 61 | unsafe { 62 | let m = src.len() * 2; 63 | assert!(m <= usize::MAX / 2); 64 | 65 | let mut buf = alloc_uninit_bytes(m); 66 | 67 | { 68 | let (src, len) = slice_parts(src); 69 | let dst: *mut u8 = buf.as_mut_ptr().cast(); 70 | crate::multiversion::encode::auto(src, len, dst, case); 71 | } 72 | 73 | boxed_str(assume_init(buf)) 74 | } 75 | } 76 | 77 | #[inline] 78 | fn encode_append_vec(src: &[u8], buf: &mut Vec, case: AsciiCase) { 79 | if src.is_empty() { 80 | return; 81 | } 82 | 83 | unsafe { 84 | let m = src.len() * 2; 85 | assert!(m <= usize::MAX / 2); 86 | 87 | buf.reserve_exact(m); 88 | let prev_len = buf.len(); 89 | 90 | let (src, len) = slice_parts(src); 91 | let dst = buf.as_mut_ptr().add(prev_len); 92 | crate::multiversion::encode::auto(src, len, dst, case); 93 | 94 | buf.set_len(prev_len + m); 95 | } 96 | } 97 | 98 | #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))] 99 | impl FromHexDecode for Box<[u8]> { 100 | #[inline] 101 | fn from_hex_decode(data: &[u8]) -> Result { 102 | decode_to_boxed_bytes(data) 103 | } 104 | } 105 | 106 | #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))] 107 | impl FromHexDecode for Vec { 108 | #[inline] 109 | fn from_hex_decode(data: &[u8]) -> Result { 110 | let ans = decode_to_boxed_bytes(data)?; 111 | Ok(Vec::from(ans)) 112 | } 113 | } 114 | 115 | #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))] 116 | impl FromHexEncode for Box<[u8]> { 117 | #[inline] 118 | fn from_hex_encode(data: &[u8], case: AsciiCase) -> Self { 119 | let ans = encode_to_boxed_str(data, case); 120 | ans.into_boxed_bytes() 121 | } 122 | } 123 | 124 | #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))] 125 | impl FromHexEncode for Box { 126 | #[inline] 127 | fn from_hex_encode(data: &[u8], case: AsciiCase) -> Self { 128 | encode_to_boxed_str(data, case) 129 | } 130 | } 131 | 132 | #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))] 133 | impl FromHexEncode for Vec { 134 | #[inline] 135 | fn from_hex_encode(data: &[u8], case: AsciiCase) -> Self { 136 | let ans = encode_to_boxed_str(data, case); 137 | Vec::from(ans.into_boxed_bytes()) 138 | } 139 | } 140 | 141 | #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))] 142 | impl FromHexEncode for String { 143 | #[inline] 144 | fn from_hex_encode(data: &[u8], case: AsciiCase) -> Self { 145 | let ans = encode_to_boxed_str(data, case); 146 | String::from(ans) 147 | } 148 | } 149 | 150 | #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))] 151 | impl AppendHexEncode for Vec { 152 | #[inline] 153 | fn append_hex_encode(src: &[u8], dst: &mut Self, case: AsciiCase) { 154 | encode_append_vec(src, dst, case); 155 | } 156 | } 157 | 158 | #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))] 159 | impl AppendHexEncode for String { 160 | #[inline] 161 | fn append_hex_encode(src: &[u8], dst: &mut Self, case: AsciiCase) { 162 | unsafe { encode_append_vec(src, dst.as_mut_vec(), case) } 163 | } 164 | } 165 | 166 | #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))] 167 | impl AppendHexDecode for Vec { 168 | #[inline] 169 | fn append_hex_decode(src: &[u8], dst: &mut Self) -> Result<(), Error> { 170 | decode_append_vec(src, dst) 171 | } 172 | } 173 | -------------------------------------------------------------------------------- /crates/hex-simd/src/multiversion.rs: -------------------------------------------------------------------------------- 1 | #![allow(missing_docs)] 2 | 3 | use crate::{AsciiCase, Error}; 4 | 5 | vsimd::dispatch!( 6 | name = {check}, 7 | signature = {pub unsafe fn(src: *const u8, len: usize) -> Result<(), Error>}, 8 | fallback = {crate::check::check_fallback}, 9 | simd = {crate::check::check_simd}, 10 | targets = {"avx2", "sse2", "neon", "simd128"}, 11 | fastest = {"avx2", "neon", "simd128"}, 12 | ); 13 | 14 | vsimd::dispatch!( 15 | name = {encode}, 16 | signature = {pub unsafe fn(src: *const u8, len: usize, dst: *mut u8, case: AsciiCase) -> () }, 17 | fallback = {crate::encode::encode_fallback}, 18 | simd = {crate::encode::encode_simd}, 19 | targets = {"avx2", "ssse3", "sse2", "neon", "simd128"}, 20 | fastest = {"avx2", "neon", "simd128"}, 21 | ); 22 | 23 | vsimd::dispatch!( 24 | name = {decode}, 25 | signature = {pub unsafe fn(src: *const u8, len: usize, dst: *mut u8) -> Result<(), Error>}, 26 | fallback = {crate::decode::decode_fallback}, 27 | simd = {crate::decode::decode_simd}, 28 | targets = {"avx2", "ssse3", "sse2", "neon", "simd128"}, 29 | fastest = {"avx2", "neon", "simd128"}, 30 | ); 31 | -------------------------------------------------------------------------------- /crates/hex-simd/tests/it.rs: -------------------------------------------------------------------------------- 1 | use hex_simd::{AsOut, AsciiCase}; 2 | 3 | use core::mem::MaybeUninit; 4 | 5 | fn rand_bytes(n: usize) -> Vec { 6 | use rand::RngCore; 7 | let mut bytes = vec![0u8; n]; 8 | rand::thread_rng().fill_bytes(&mut bytes); 9 | bytes 10 | } 11 | 12 | #[cfg(miri)] 13 | use std::io::Write as _; 14 | 15 | macro_rules! dbgmsg { 16 | ($($fmt:tt)*) => { 17 | // println!($($fmt)*); 18 | // #[cfg(miri)] 19 | // std::io::stdout().flush().unwrap(); 20 | }; 21 | } 22 | 23 | #[cfg_attr(not(target_arch = "wasm32"), test)] 24 | #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)] 25 | fn as_str() { 26 | let src = "hello"; 27 | let mut buf = [MaybeUninit::::uninit(); 10]; 28 | let ans = hex_simd::encode_as_str(src.as_bytes(), buf.as_mut_slice().as_out(), AsciiCase::Lower).unwrap(); 29 | assert_eq!(ans, "68656c6c6f"); 30 | } 31 | 32 | #[cfg(feature = "alloc")] 33 | #[cfg_attr(not(target_arch = "wasm32"), test)] 34 | #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)] 35 | fn allocation() { 36 | { 37 | let src = "hello"; 38 | 39 | let ans: String = hex_simd::encode_type(src, AsciiCase::Lower); 40 | assert_eq!(&*ans, "68656c6c6f"); 41 | 42 | let ans: Vec = hex_simd::decode_type(ans).unwrap(); 43 | assert_eq!(&*ans, src.as_bytes()); 44 | } 45 | 46 | { 47 | let src = [1, 2, 3]; 48 | let prefix = "0x"; 49 | 50 | let mut encode_buf = prefix.to_owned(); 51 | hex_simd::encode_append(src, &mut encode_buf, AsciiCase::Lower); 52 | 53 | assert_eq!(encode_buf, format!("{prefix}010203")); 54 | 55 | let mut decode_buf = b"123".to_vec(); 56 | let src = &encode_buf[prefix.len()..]; 57 | hex_simd::decode_append(src, &mut decode_buf).unwrap(); 58 | 59 | assert_eq!(decode_buf, b"123\x01\x02\x03"); 60 | } 61 | } 62 | 63 | #[cfg_attr(not(target_arch = "wasm32"), test)] 64 | #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)] 65 | fn random() { 66 | let ok_cases: Vec> = { 67 | let mut ans = Vec::new(); 68 | 69 | for n in 0..128usize { 70 | dbgmsg!("generating ok case n = {}", n); 71 | 72 | let iter = (0..16).cycle().take(n).map(|x| char::from_digit(x, 16).unwrap() as u8); 73 | ans.push(iter.collect()); 74 | } 75 | 76 | ans 77 | }; 78 | 79 | let err_cases: Vec> = { 80 | vec![ 81 | vec![0], 82 | vec![b'0', 0], 83 | vec![b'a', b'f', 0], 84 | vec![b'a', b'0', b'c', 0], 85 | vec![b'a', b'0', b'c', b'1', 0], 86 | ] 87 | }; 88 | 89 | macro_rules! test_decode_encode { 90 | ($src: expr, $case: expr) => {{ 91 | let mut decode_buf = vec![0; $src.len() / 2]; 92 | let mut encode_buf = vec![0; $src.len()]; 93 | let decode_buf = hex_simd::decode($src, decode_buf.as_out()).unwrap(); 94 | let encode_buf = hex_simd::encode(decode_buf, encode_buf.as_out(), $case).unwrap(); 95 | assert_eq!(encode_buf, $src); 96 | }}; 97 | } 98 | 99 | macro_rules! test_decode_inplace_encode { 100 | ($src: expr, $case: expr) => {{ 101 | let mut decode_buf = $src.to_owned(); 102 | let mut encode_buf = vec![0; $src.len()]; 103 | let decode_buf = hex_simd::decode_inplace(&mut decode_buf).unwrap(); 104 | let encode_buf = hex_simd::encode(decode_buf, encode_buf.as_out(), $case).unwrap(); 105 | assert_eq!(encode_buf, $src); 106 | }}; 107 | } 108 | 109 | macro_rules! test_encode_decode { 110 | ($src: expr, $case: expr) => {{ 111 | let mut encode_buf = vec![0; $src.len() * 2]; 112 | let mut decode_buf = vec![0; $src.len()]; 113 | let encode_buf = hex_simd::encode($src, encode_buf.as_out(), $case).unwrap(); 114 | let decode_buf = hex_simd::decode(encode_buf, decode_buf.as_out()).unwrap(); 115 | assert_eq!(decode_buf, $src); 116 | }}; 117 | } 118 | 119 | macro_rules! test_encode_decode_inplace { 120 | ($src: expr, $case: expr) => {{ 121 | let mut encode_buf = vec![0; $src.len() * 2]; 122 | let encode_buf = hex_simd::encode($src, encode_buf.as_out(), $case).unwrap(); 123 | let decode_buf = hex_simd::decode_inplace(encode_buf).unwrap(); 124 | assert_eq!(decode_buf, $src); 125 | }}; 126 | } 127 | 128 | for src in &ok_cases { 129 | // for (_, src) in ok_cases.iter().enumerate() { 130 | // dbgmsg!("ok case {}", i + 1); 131 | assert!(hex_simd::check(src).is_ok()); 132 | if src.len() % 2 == 0 { 133 | test_decode_encode!(src, AsciiCase::Lower); 134 | test_decode_inplace_encode!(src, AsciiCase::Lower); 135 | } else { 136 | test_encode_decode!(src, AsciiCase::Upper); 137 | test_encode_decode_inplace!(src, AsciiCase::Lower); 138 | } 139 | } 140 | 141 | for src in &err_cases { 142 | // for (_, src) in err_cases.iter().enumerate() { 143 | // dbgmsg!("err case {}", i + 1); 144 | assert!(hex_simd::check(src).is_err()); 145 | let mut buf = vec![0; src.len() / 2]; 146 | assert!(hex_simd::decode(src, buf.as_out()).is_err(), "src = {src:?}"); 147 | } 148 | 149 | for n in 0..128 { 150 | dbgmsg!("rand case n = {}", n); 151 | let bytes = rand_bytes(n); 152 | let src = bytes.as_slice(); 153 | test_encode_decode!(src, AsciiCase::Lower); 154 | test_encode_decode_inplace!(src, AsciiCase::Upper); 155 | } 156 | } 157 | -------------------------------------------------------------------------------- /crates/unicode-simd/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "unicode-simd" 3 | version = "0.9.0-dev" 4 | edition = "2021" 5 | description = "SIMD-accelerated Unicode validation and transcoding" 6 | license = "MIT" 7 | repository = "https://github.com/Nugine/simd" 8 | keywords = ["utf8", "unicode", "string", "validation", "simd"] 9 | categories = ["encoding", "algorithms", "no-std", "text-processing"] 10 | readme = "README.md" 11 | rust-version = "1.63" 12 | 13 | [package.metadata.docs.rs] 14 | all-features = true 15 | rustdoc-args = ["--cfg", "docsrs"] 16 | 17 | [features] 18 | default = ["std", "detect"] 19 | alloc = ["vsimd/alloc"] 20 | std = ["alloc", "vsimd/std"] 21 | detect = ["vsimd/detect"] 22 | unstable = ["vsimd/unstable"] 23 | 24 | [dependencies] 25 | outref = "0.5.1" 26 | vsimd = { path = "../vsimd", version = "0.9.0-dev" } 27 | 28 | [dev-dependencies] 29 | rand = "0.8.5" 30 | 31 | [target.'cfg(target_arch="wasm32")'.dev-dependencies] 32 | getrandom = { version = "0.2.8", features = ["js"] } 33 | wasm-bindgen-test = "0.3.34" 34 | -------------------------------------------------------------------------------- /crates/unicode-simd/LICENSE: -------------------------------------------------------------------------------- 1 | ../../LICENSE -------------------------------------------------------------------------------- /crates/unicode-simd/README.md: -------------------------------------------------------------------------------- 1 | # unicode-simd 2 | 3 | [![Crates.io](https://img.shields.io/crates/v/unicode-simd.svg)](https://crates.io/crates/unicode-simd) 4 | [![Docs](https://docs.rs/unicode-simd/badge.svg)](https://docs.rs/unicode-simd/) 5 | [![MIT licensed][mit-badge]][mit-url] 6 | 7 | [mit-badge]: https://img.shields.io/badge/license-MIT-blue.svg 8 | [mit-url]: ../../LICENSE 9 | 10 | SIMD-accelerated Unicode validation and transcoding. 11 | 12 | Documentation: 13 | 14 | Repository: 15 | -------------------------------------------------------------------------------- /crates/unicode-simd/build.rs: -------------------------------------------------------------------------------- 1 | fn main() { 2 | println!("cargo:rustc-check-cfg=cfg(vsimd_dump_symbols)") 3 | } 4 | -------------------------------------------------------------------------------- /crates/unicode-simd/src/ascii.rs: -------------------------------------------------------------------------------- 1 | use vsimd::mask::u8x32_highbit_any; 2 | use vsimd::SIMD256; 3 | 4 | use core::ops::Not; 5 | 6 | #[inline(always)] 7 | pub unsafe fn is_ascii_fallback(mut src: *const u8, len: usize) -> bool { 8 | let mut ans = 0; 9 | let end = src.add(len); 10 | while src < end { 11 | ans |= src.read(); 12 | src = src.add(1); 13 | } 14 | ans < 0x80 15 | } 16 | 17 | #[inline(always)] 18 | pub unsafe fn is_ascii_simd(s: S, src: *const u8, len: usize) -> bool { 19 | #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] 20 | { 21 | use vsimd::isa::SSE2; 22 | use vsimd::matches_isa; 23 | 24 | if matches_isa!(S, SSE2) { 25 | return is_ascii_sse2(src, len); 26 | } 27 | } 28 | 29 | is_ascii_simd_v256(s, src, len) 30 | } 31 | 32 | #[inline(always)] 33 | pub unsafe fn is_ascii_simd_v256(s: S, mut src: *const u8, mut len: usize) -> bool { 34 | let end = src.add(len / 32 * 32); 35 | let mut y = s.v256_create_zero(); 36 | while src < end { 37 | let x = s.v256_load_unaligned(src); 38 | y = s.v256_or(y, x); 39 | src = src.add(32); 40 | } 41 | len %= 32; 42 | 43 | let mut ans = u8x32_highbit_any(s, y).not(); 44 | ans &= is_ascii_fallback(src, len); 45 | ans 46 | } 47 | 48 | #[allow(clippy::too_many_lines)] 49 | #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] 50 | #[inline] 51 | #[must_use] 52 | pub unsafe fn is_ascii_sse2(src: *const u8, len: usize) -> bool { 53 | use core::arch::x86_64::*; 54 | 55 | use vsimd::vector::V128; 56 | 57 | macro_rules! ensure { 58 | ($cond:expr) => { 59 | if !$cond { 60 | return false; 61 | } 62 | }; 63 | } 64 | 65 | #[inline(always)] 66 | unsafe fn loadu(p: *const u8) -> T { 67 | p.cast::().read_unaligned() 68 | } 69 | 70 | #[inline(always)] 71 | fn check4(x: u32) -> bool { 72 | (x & 0x8080_8080) == 0 73 | } 74 | 75 | #[inline(always)] 76 | fn check8(x: u64) -> bool { 77 | (x & 0x8080_8080_8080_8080) == 0 78 | } 79 | 80 | #[inline(always)] 81 | unsafe fn check16(x: __m128i) -> bool { 82 | if cfg!(miri) { 83 | let x = core::mem::transmute::<__m128i, V128>(x); 84 | vsimd::simulation::u8x16_bitmask(x) == 0 85 | } else { 86 | _mm_movemask_epi8(x) as u32 as u16 == 0 87 | } 88 | } 89 | 90 | #[inline(always)] 91 | unsafe fn or(a: __m128i, b: __m128i) -> __m128i { 92 | _mm_or_si128(a, b) 93 | } 94 | 95 | /// len in 0..=8 96 | #[inline(always)] 97 | unsafe fn check_tiny(mut src: *const u8, mut len: usize) -> bool { 98 | if len == 8 { 99 | return check8(loadu(src)); 100 | } 101 | if len >= 4 { 102 | ensure!(check4(loadu(src))); 103 | src = src.add(4); 104 | len -= 4; 105 | } 106 | { 107 | let mut acc: u8 = 0; 108 | let end = src.add(len); 109 | for _ in 0..3 { 110 | if src < end { 111 | acc |= src.read(); 112 | src = src.add(1); 113 | } 114 | } 115 | acc < 0x80 116 | } 117 | } 118 | 119 | /// len in 9..=16 120 | #[inline(always)] 121 | unsafe fn check_short(src: *const u8, len: usize) -> bool { 122 | let x1: u64 = loadu(src); 123 | let x2: u64 = loadu(src.add(len - 8)); 124 | check8(x1 | x2) 125 | } 126 | 127 | /// len in 17..64 128 | #[inline(always)] 129 | unsafe fn check_medium(src: *const u8, len: usize) -> bool { 130 | let mut x: __m128i = loadu(src); 131 | if len >= 32 { 132 | x = or(x, loadu(src.add(16))); 133 | } 134 | if len >= 48 { 135 | x = or(x, loadu(src.add(32))); 136 | } 137 | x = or(x, loadu(src.add(len - 16))); 138 | check16(x) 139 | } 140 | 141 | /// len >= 64 142 | #[inline(always)] 143 | unsafe fn check_long(mut src: *const u8, mut len: usize) -> bool { 144 | let end = src.add(len / 64 * 64); 145 | while src < end { 146 | let x: [__m128i; 4] = loadu(src); 147 | ensure!(check16(or(or(x[0], x[1]), or(x[2], x[3])))); 148 | src = src.add(64); 149 | } 150 | len %= 64; 151 | if len == 0 { 152 | return true; 153 | } 154 | if len <= 8 { 155 | check_tiny(src, len) 156 | } else if len <= 16 { 157 | check_short(src, len) 158 | } else { 159 | check_medium(src, len) 160 | } 161 | } 162 | 163 | { 164 | if len <= 8 { 165 | check_tiny(src, len) 166 | } else if len <= 16 { 167 | check_short(src, len) 168 | } else if len < 64 { 169 | check_medium(src, len) 170 | } else { 171 | check_long(src, len) 172 | } 173 | } 174 | } 175 | -------------------------------------------------------------------------------- /crates/unicode-simd/src/lib.rs: -------------------------------------------------------------------------------- 1 | //! SIMD-accelerated Unicode validation and transcoding 2 | //! 3 | #![doc=vsimd::shared_docs!()] 4 | // 5 | #![cfg_attr(not(any(feature = "std", test)), no_std)] 6 | #![cfg_attr(feature = "unstable", feature(arm_target_feature))] 7 | #![cfg_attr(docsrs, feature(doc_cfg))] 8 | #![cfg_attr(test, deny(warnings))] 9 | // 10 | #![deny( 11 | missing_debug_implementations, 12 | missing_docs, 13 | clippy::all, 14 | clippy::pedantic, 15 | clippy::cargo, 16 | clippy::missing_inline_in_public_items 17 | )] 18 | #![warn(clippy::todo)] 19 | #![allow( 20 | clippy::inline_always, 21 | clippy::cast_sign_loss, 22 | clippy::cast_possible_truncation, 23 | clippy::wildcard_imports 24 | )] 25 | 26 | #[cfg(feature = "alloc")] 27 | extern crate alloc; 28 | 29 | mod ascii; 30 | mod utf16; 31 | mod utf32; 32 | 33 | mod multiversion; 34 | 35 | pub use outref::{AsOut, Out}; 36 | 37 | // ------------------------------------------------------------------------------------------------ 38 | 39 | use vsimd::tools::{slice_mut, slice_parts}; 40 | 41 | /// Checks if `data` is a valid ASCII string. 42 | #[inline] 43 | #[must_use] 44 | pub fn is_ascii(data: &[u8]) -> bool { 45 | let (src, len) = slice_parts(data); 46 | unsafe { crate::multiversion::is_ascii::auto(src, len) } 47 | } 48 | 49 | /// Converts ascii bytes to a string slice. 50 | #[inline] 51 | #[must_use] 52 | pub fn from_ascii(data: &[u8]) -> Option<&str> { 53 | is_ascii(data).then(|| unsafe { core::str::from_utf8_unchecked(data) }) 54 | } 55 | 56 | /// TODO: test, bench 57 | #[inline] 58 | #[must_use] 59 | pub fn is_utf32le(data: &[u32]) -> bool { 60 | let (src, len) = slice_parts(data); 61 | unsafe { crate::multiversion::is_utf32le::auto(src, len) } 62 | } 63 | 64 | /// TODO: test, bench 65 | #[inline] 66 | pub fn utf32_swap_endianness_inplace(data: &mut [u32]) { 67 | let len = data.len(); 68 | let dst = data.as_mut_ptr(); 69 | let src = dst; 70 | unsafe { crate::multiversion::utf32_swap_endianness::auto(src, len, dst) } 71 | } 72 | 73 | /// TODO: test, bench 74 | /// 75 | /// # Panics 76 | /// This function asserts that `src.len() <= dst.len()` 77 | #[inline] 78 | #[must_use] 79 | pub fn utf32_swap_endianness<'d>(src: &[u32], mut dst: Out<'d, [u32]>) -> &'d mut [u32] { 80 | assert!(src.len() <= dst.len()); 81 | let len = src.len(); 82 | let src = src.as_ptr(); 83 | let dst = dst.as_mut_ptr(); 84 | unsafe { crate::multiversion::utf32_swap_endianness::auto(src, len, dst) }; 85 | unsafe { slice_mut(dst, len) } 86 | } 87 | 88 | /// TODO: test, bench 89 | #[inline] 90 | pub fn utf16_swap_endianness_inplace(data: &mut [u16]) { 91 | let len = data.len(); 92 | let dst = data.as_mut_ptr(); 93 | let src = dst; 94 | unsafe { crate::multiversion::utf16_swap_endianness::auto(src, len, dst) } 95 | } 96 | 97 | /// TODO: test, bench 98 | /// 99 | /// # Panics 100 | /// This function asserts that `src.len() <= dst.len()` 101 | #[inline] 102 | #[must_use] 103 | pub fn utf16_swap_endianness<'d>(src: &[u16], mut dst: Out<'d, [u16]>) -> &'d mut [u16] { 104 | assert!(src.len() <= dst.len()); 105 | let len = src.len(); 106 | let src = src.as_ptr(); 107 | let dst = dst.as_mut_ptr(); 108 | unsafe { crate::multiversion::utf16_swap_endianness::auto(src, len, dst) }; 109 | unsafe { slice_mut(dst, len) } 110 | } 111 | -------------------------------------------------------------------------------- /crates/unicode-simd/src/multiversion.rs: -------------------------------------------------------------------------------- 1 | #![allow(missing_docs)] 2 | 3 | vsimd::dispatch! ( 4 | name = {is_ascii}, 5 | signature = {pub unsafe fn(src: *const u8, len: usize) -> bool}, 6 | fallback = {crate::ascii::is_ascii_fallback}, 7 | simd = {crate::ascii::is_ascii_simd}, 8 | targets = {"avx2", "sse2", "neon", "simd128"}, 9 | fastest = {"avx2", "neon", "simd128"}, 10 | ); 11 | 12 | vsimd::dispatch!( 13 | name = {is_utf32le}, 14 | signature = {pub unsafe fn(src: *const u32, len: usize) -> bool}, 15 | fallback = {crate::utf32::is_utf32le_fallback}, 16 | simd = {crate::utf32::is_utf32le_simd}, 17 | targets = {"avx2", "sse4.1", "neon", "simd128"}, 18 | fastest = {"avx2", "neon", "simd128"}, 19 | ); 20 | 21 | vsimd::dispatch!( 22 | name = {utf32_swap_endianness}, 23 | signature = {pub unsafe fn(src: *const u32, len: usize, dst: *mut u32) -> ()}, 24 | fallback = {crate::utf32::swap_endianness_fallback}, 25 | simd = {crate::utf32::swap_endianness_simd}, 26 | targets = {"avx2", "ssse3", "neon", "simd128"}, 27 | fastest = {"avx2", "neon", "simd128"}, 28 | ); 29 | 30 | vsimd::dispatch!( 31 | name = {utf16_swap_endianness}, 32 | signature = {pub unsafe fn(src: *const u16, len: usize, dst: *mut u16) -> ()}, 33 | fallback = {crate::utf16::swap_endianness_fallback}, 34 | simd = {crate::utf16::swap_endianness_simd}, 35 | targets = {"avx2", "ssse3", "neon", "simd128"}, 36 | fastest = {"avx2", "neon", "simd128"}, 37 | ); 38 | -------------------------------------------------------------------------------- /crates/unicode-simd/src/utf16.rs: -------------------------------------------------------------------------------- 1 | use vsimd::SIMD256; 2 | 3 | #[inline(always)] 4 | pub unsafe fn swap_endianness_fallback(src: *const u16, len: usize, dst: *mut u16) { 5 | vsimd::bswap::bswap_fallback(src, len, dst); 6 | } 7 | 8 | #[inline(always)] 9 | pub unsafe fn swap_endianness_simd(s: S, src: *const u16, len: usize, dst: *mut u16) { 10 | vsimd::bswap::bswap_simd(s, src, len, dst); 11 | } 12 | -------------------------------------------------------------------------------- /crates/unicode-simd/src/utf32.rs: -------------------------------------------------------------------------------- 1 | use vsimd::SIMD256; 2 | 3 | #[inline(always)] 4 | pub unsafe fn swap_endianness_fallback(src: *const u32, len: usize, dst: *mut u32) { 5 | vsimd::bswap::bswap_fallback(src, len, dst); 6 | } 7 | 8 | #[inline(always)] 9 | pub unsafe fn swap_endianness_simd(s: S, src: *const u32, len: usize, dst: *mut u32) { 10 | vsimd::bswap::bswap_simd(s, src, len, dst); 11 | } 12 | 13 | // ---------------------------------------------------------------- 14 | 15 | /// See [`char::from_u32`](core::char::from_u32) 16 | #[inline(always)] 17 | fn is_unicode_scalar_value(x: u32) -> bool { 18 | (x ^ 0xD800).wrapping_sub(0x800) < (0x11_0000 - 0x800) 19 | } 20 | 21 | #[inline] 22 | pub unsafe fn is_utf32le_fallback(mut src: *const u32, len: usize) -> bool { 23 | let mut flag = true; 24 | 25 | let end = src.add(len); 26 | while src < end { 27 | let x = src.read(); 28 | flag &= is_unicode_scalar_value(x); 29 | src = src.add(1); 30 | } 31 | 32 | flag 33 | } 34 | 35 | #[inline(always)] 36 | pub unsafe fn is_utf32le_simd(s: S, mut src: *const u32, mut len: usize) -> bool { 37 | let mut y = s.u32x8_splat(0); 38 | 39 | let end = src.add(len / 8 * 8); 40 | while src < end { 41 | let x = s.v256_load_unaligned(src.cast::()); 42 | let a1 = s.v256_xor(x, s.u32x8_splat(0xD800)); 43 | let a2 = s.u32x8_sub(a1, s.u32x8_splat(0x800)); 44 | y = s.u32x8_max(y, a2); 45 | 46 | src = src.add(8); 47 | } 48 | len %= 8; 49 | 50 | let m = s.u32x8_splat(0x11_0000 - 0x800 - 1); 51 | let mut ans = s.v256_all_zero(s.u32x8_lt(m, y)); 52 | 53 | ans &= is_utf32le_fallback(src, len); 54 | ans 55 | } 56 | -------------------------------------------------------------------------------- /crates/unicode-simd/tests/it.rs: -------------------------------------------------------------------------------- 1 | use std::ops::Not; 2 | 3 | use rand::Rng; 4 | 5 | fn rand_bytes(n: usize) -> Vec { 6 | use rand::RngCore; 7 | let mut bytes = vec![0u8; n]; 8 | rand::thread_rng().fill_bytes(&mut bytes); 9 | bytes 10 | } 11 | 12 | #[cfg(miri)] 13 | use std::io::Write as _; 14 | 15 | macro_rules! dbgmsg { 16 | ($($fmt:tt)*) => { 17 | println!($($fmt)*); 18 | #[cfg(miri)] 19 | std::io::stdout().flush().unwrap(); 20 | }; 21 | } 22 | 23 | #[cfg_attr(not(target_arch = "wasm32"), test)] 24 | #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)] 25 | fn random() { 26 | for n in 0..256 { 27 | dbgmsg!("n = {n}"); 28 | 29 | let mut src = rand_bytes(n); 30 | src.iter_mut().for_each(|x| *x >>= 1); 31 | 32 | assert!(unicode_simd::is_ascii(&src)); 33 | 34 | if n > 0 { 35 | let pos = rand::thread_rng().gen_range(0..n); 36 | src[pos] = 0x80; 37 | assert!(unicode_simd::is_ascii(&src).not()); 38 | } 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /crates/uuid-simd/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "uuid-simd" 3 | version = "0.9.0-dev" 4 | edition = "2021" 5 | description = "SIMD-accelerated UUID operations" 6 | license = "MIT" 7 | repository = "https://github.com/Nugine/simd" 8 | keywords = ["uuid", "guid", "simd"] 9 | categories = ["no-std", "parser-implementations"] 10 | readme = "README.md" 11 | rust-version = "1.63" 12 | 13 | [package.metadata.docs.rs] 14 | all-features = true 15 | rustdoc-args = ["--cfg", "docsrs"] 16 | 17 | [features] 18 | default = ["std", "detect", "uuid"] 19 | alloc = ["vsimd/alloc"] 20 | std = ["alloc", "vsimd/std"] 21 | detect = ["vsimd/detect"] 22 | unstable = ["vsimd/unstable", "hex-simd/unstable"] 23 | 24 | [dependencies] 25 | outref = "0.5.1" 26 | vsimd = { path = "../vsimd", version = "0.9.0-dev" } 27 | uuid = { version = "1.3.0", optional = true } 28 | 29 | [dev-dependencies] 30 | hex-simd = { path = "../hex-simd", version = "0.9.0-dev" } 31 | 32 | [target.'cfg(target_arch="wasm32")'.dev-dependencies] 33 | wasm-bindgen-test = "0.3.34" 34 | -------------------------------------------------------------------------------- /crates/uuid-simd/LICENSE: -------------------------------------------------------------------------------- 1 | ../../LICENSE -------------------------------------------------------------------------------- /crates/uuid-simd/README.md: -------------------------------------------------------------------------------- 1 | # uuid-simd 2 | 3 | [![Crates.io](https://img.shields.io/crates/v/uuid-simd.svg)](https://crates.io/crates/uuid-simd) 4 | [![Docs](https://docs.rs/uuid-simd/badge.svg)](https://docs.rs/uuid-simd/) 5 | [![MIT licensed][mit-badge]][mit-url] 6 | 7 | [mit-badge]: https://img.shields.io/badge/license-MIT-blue.svg 8 | [mit-url]: ../../LICENSE 9 | 10 | SIMD-accelerated UUID operations 11 | 12 | Documentation: 13 | 14 | Repository: 15 | -------------------------------------------------------------------------------- /crates/uuid-simd/build.rs: -------------------------------------------------------------------------------- 1 | fn main() { 2 | println!("cargo:rustc-check-cfg=cfg(vsimd_dump_symbols)") 3 | } 4 | -------------------------------------------------------------------------------- /crates/uuid-simd/src/error.rs: -------------------------------------------------------------------------------- 1 | use core::fmt; 2 | 3 | /// UUID Error 4 | pub struct Error(()); 5 | 6 | impl Error { 7 | #[inline(always)] 8 | pub(crate) const fn new() -> Self { 9 | Error(()) 10 | } 11 | } 12 | 13 | impl fmt::Debug for Error { 14 | #[inline] 15 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 16 | ::fmt("UUIDError", f) 17 | } 18 | } 19 | 20 | impl fmt::Display for Error { 21 | #[inline] 22 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 23 | ::fmt("UUIDError", f) 24 | } 25 | } 26 | 27 | #[cfg_attr(docsrs, doc(cfg(feature = "std")))] 28 | #[cfg(feature = "std")] 29 | impl std::error::Error for Error {} 30 | 31 | macro_rules! ensure { 32 | ($cond:expr) => { 33 | if !$cond { 34 | return Err($crate::error::Error::new()); 35 | } 36 | }; 37 | } 38 | 39 | #[allow(unused_macros)] 40 | macro_rules! try_ { 41 | ($result:expr) => { 42 | match $result { 43 | Ok(value) => value, 44 | Err(()) => return Err(Error::new()), 45 | } 46 | }; 47 | } 48 | -------------------------------------------------------------------------------- /crates/uuid-simd/src/ext.rs: -------------------------------------------------------------------------------- 1 | use crate::{AsciiCase, Error, Out, AsOut}; 2 | 3 | use core::fmt; 4 | use core::mem::MaybeUninit; 5 | 6 | use uuid::Uuid; 7 | 8 | /// An extension trait for [`uuid::Uuid`] 9 | #[cfg_attr(docsrs, doc(cfg(feature = "uuid")))] 10 | pub trait UuidExt: Sized { 11 | /// Parses an UUID from arbitrary bytes. 12 | /// 13 | /// # Errors 14 | /// This function returns `Err` if: 15 | /// 16 | /// + The length of `src` doesn't match any UUID format variants. 17 | /// + The content of `src` is invalid. 18 | /// 19 | fn parse(src: impl AsRef<[u8]>) -> Result; 20 | 21 | /// Parses a simple UUID from arbitrary bytes. 22 | /// 23 | /// # Errors 24 | /// This function returns `Err` if: 25 | /// 26 | /// + The length of `src` doesn't match the "simple" format. 27 | /// + The content of `src` is invalid. 28 | /// 29 | fn parse_simple(src: impl AsRef<[u8]>) -> Result; 30 | 31 | /// Parses a hyphenated UUID from arbitrary bytes. 32 | /// 33 | /// # Errors 34 | /// This function returns `Err` if: 35 | /// 36 | /// + The length of `src` doesn't match the "hyphenated" format. 37 | /// + The content of `src` is invalid. 38 | /// 39 | fn parse_hyphenated(src: impl AsRef<[u8]>) -> Result; 40 | 41 | /// Returns a fmt adapter with "simple" format. 42 | fn format_simple(&self) -> Simple<'_>; 43 | 44 | /// Returns a fmt adapter with "hyphenated" format. 45 | fn format_hyphenated(&self) -> Hyphenated<'_>; 46 | } 47 | 48 | #[allow(clippy::type_complexity)] 49 | #[inline(always)] 50 | unsafe fn parse_uuid( 51 | src: &[u8], 52 | f: for<'s, 'd> fn(&'s [u8], Out<'d, [u8; 16]>) -> Result<&'d mut [u8; 16], Error>, 53 | ) -> Result { 54 | let mut uuid = MaybeUninit::::uninit(); 55 | let out = Out::new(uuid.as_mut_ptr().cast()); 56 | f(src, out)?; 57 | Ok(uuid.assume_init()) 58 | } 59 | 60 | impl UuidExt for Uuid { 61 | #[inline] 62 | fn parse(src: impl AsRef<[u8]>) -> Result { 63 | unsafe { parse_uuid(src.as_ref(), crate::parse) } 64 | } 65 | 66 | #[inline] 67 | fn parse_simple(src: impl AsRef<[u8]>) -> Result { 68 | unsafe { parse_uuid(src.as_ref(), crate::parse_simple) } 69 | } 70 | 71 | #[inline] 72 | fn parse_hyphenated(src: impl AsRef<[u8]>) -> Result { 73 | unsafe { parse_uuid(src.as_ref(), crate::parse_hyphenated) } 74 | } 75 | 76 | #[inline] 77 | fn format_simple(&self) -> Simple<'_> { 78 | Simple(self) 79 | } 80 | 81 | #[inline] 82 | fn format_hyphenated(&self) -> Hyphenated<'_> { 83 | Hyphenated(self) 84 | } 85 | } 86 | 87 | /// A simple UUID 88 | #[cfg_attr(docsrs, doc(cfg(feature = "uuid")))] 89 | #[derive(Debug)] 90 | pub struct Simple<'a>(&'a Uuid); 91 | 92 | /// A hyphenated UUID 93 | #[cfg_attr(docsrs, doc(cfg(feature = "uuid")))] 94 | #[derive(Debug)] 95 | pub struct Hyphenated<'a>(&'a Uuid); 96 | 97 | #[allow(clippy::type_complexity)] 98 | #[inline] 99 | unsafe fn format_uuid( 100 | uuid: &Uuid, 101 | case: AsciiCase, 102 | f: for<'s, 'd> fn(&'s [u8; 16], Out<'d, [u8; N]>, case: AsciiCase) -> &'d mut [u8; N], 103 | g: impl FnOnce(&str) -> R, 104 | ) -> R { 105 | let mut buf = MaybeUninit::<[u8; N]>::uninit(); 106 | let src = uuid.as_bytes(); 107 | let dst = buf.as_out(); 108 | let ans = f(src, dst, case); 109 | g(core::str::from_utf8_unchecked(ans)) 110 | } 111 | 112 | impl fmt::LowerHex for Simple<'_> { 113 | #[inline] 114 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 115 | let case = AsciiCase::Lower; 116 | unsafe { format_uuid(self.0, case, crate::format_simple, |s| ::fmt(s, f)) } 117 | } 118 | } 119 | 120 | impl fmt::LowerHex for Hyphenated<'_> { 121 | #[inline] 122 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 123 | let case = AsciiCase::Lower; 124 | unsafe { 125 | format_uuid(self.0, case, crate::format_hyphenated, |s| { 126 | ::fmt(s, f) 127 | }) 128 | } 129 | } 130 | } 131 | 132 | impl fmt::UpperHex for Simple<'_> { 133 | #[inline] 134 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 135 | let case = AsciiCase::Upper; 136 | unsafe { format_uuid(self.0, case, crate::format_simple, |s| ::fmt(s, f)) } 137 | } 138 | } 139 | 140 | impl fmt::UpperHex for Hyphenated<'_> { 141 | #[inline] 142 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 143 | let case = AsciiCase::Upper; 144 | unsafe { 145 | format_uuid(self.0, case, crate::format_hyphenated, |s| { 146 | ::fmt(s, f) 147 | }) 148 | } 149 | } 150 | } 151 | 152 | impl fmt::Display for Simple<'_> { 153 | #[inline] 154 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 155 | ::fmt(self, f) 156 | } 157 | } 158 | 159 | impl fmt::Display for Hyphenated<'_> { 160 | #[inline] 161 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 162 | ::fmt(self, f) 163 | } 164 | } 165 | 166 | #[cfg(test)] 167 | mod tests { 168 | use super::*; 169 | 170 | #[cfg_attr(not(target_arch = "wasm32"), test)] 171 | #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)] 172 | fn test_uuid_ext() { 173 | let s1 = "67e5504410b1426f9247bb680e5fe0c8"; 174 | let s2 = "67e55044-10b1-426f-9247-bb680e5fe0c8"; 175 | 176 | let u = Uuid::parse(s1).unwrap(); 177 | 178 | let a1 = u.format_simple().to_string(); 179 | let a2 = format!("{:X}", u.format_hyphenated()); 180 | 181 | assert_eq!(a1, s1); 182 | assert_eq!(a2, s2.to_ascii_uppercase()); 183 | } 184 | } 185 | -------------------------------------------------------------------------------- /crates/uuid-simd/src/format.rs: -------------------------------------------------------------------------------- 1 | use crate::spec::*; 2 | 3 | use vsimd::ascii::AsciiCase; 4 | use vsimd::is_isa_type; 5 | use vsimd::isa::{InstructionSet, SSE2}; 6 | use vsimd::tools::{read, write}; 7 | use vsimd::vector::V256; 8 | use vsimd::{SIMD128, SIMD256}; 9 | 10 | #[inline(always)] 11 | const fn char_lut_fallback(case: AsciiCase) -> &'static [u8; 16] { 12 | match case { 13 | AsciiCase::Lower => vsimd::hex::LOWER_CHARSET, 14 | AsciiCase::Upper => vsimd::hex::UPPER_CHARSET, 15 | } 16 | } 17 | 18 | #[inline(always)] 19 | pub unsafe fn format_simple_fallback(src: *const u8, dst: *mut u8, case: AsciiCase) { 20 | let lut = char_lut_fallback(case).as_ptr(); 21 | for i in 0..16 { 22 | let x = read(src, i); 23 | let hi = read(lut, (x >> 4) as usize); 24 | let lo = read(lut, (x & 0x0f) as usize); 25 | write(dst, i * 2, hi); 26 | write(dst, i * 2 + 1, lo); 27 | } 28 | } 29 | 30 | #[inline(always)] 31 | pub unsafe fn format_hyphenated_fallback(src: *const u8, dst: *mut u8, case: AsciiCase) { 32 | let lut = char_lut_fallback(case).as_ptr(); 33 | let groups = [(0, 8), (9, 13), (14, 18), (19, 23), (24, 36)]; 34 | 35 | let mut g = 0; 36 | let mut i = 0; 37 | while g < 5 { 38 | let (start, end) = groups[g]; 39 | 40 | let mut j = start; 41 | while j < end { 42 | let x = read(src, i); 43 | i += 1; 44 | 45 | let hi = read(lut, (x >> 4) as usize); 46 | let lo = read(lut, (x & 0x0f) as usize); 47 | write(dst, j, hi); 48 | write(dst, j + 1, lo); 49 | j += 2; 50 | } 51 | 52 | if g < 4 { 53 | write(dst, end, b'-'); 54 | } 55 | 56 | g += 1; 57 | } 58 | } 59 | 60 | #[inline(always)] 61 | const fn char_lut_simd(case: AsciiCase) -> V256 { 62 | match case { 63 | AsciiCase::Lower => vsimd::hex::ENCODE_LOWER_LUT, 64 | AsciiCase::Upper => vsimd::hex::ENCODE_UPPER_LUT, 65 | } 66 | } 67 | 68 | #[inline(always)] 69 | pub unsafe fn format_simple_simd(s: S, src: *const u8, dst: *mut u8, case: AsciiCase) { 70 | if is_isa_type!(S, SSE2) { 71 | return format_simple_simd_sse2(SSE2::new(), src, dst, case); 72 | } 73 | { 74 | let lut = char_lut_simd(case); 75 | let x = s.v128_load_unaligned(src); 76 | let y = vsimd::hex::encode_bytes16(s, x, lut); 77 | s.v256_store_unaligned(dst, y); 78 | } 79 | } 80 | 81 | #[inline(always)] 82 | pub unsafe fn format_hyphenated_simd(s: S, src: *const u8, dst: *mut u8, case: AsciiCase) { 83 | const SWIZZLE: V256 = V256::from_bytes([ 84 | 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, // 85 | 0x80, 0x08, 0x09, 0x0a, 0x0b, 0x80, 0x0c, 0x0d, // 86 | 0x80, 0x80, 0x80, 0x00, 0x01, 0x02, 0x03, 0x80, // 87 | 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, // 88 | ]); 89 | 90 | const DASH: V256 = V256::from_bytes([ 91 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 92 | 0x2d, 0x00, 0x00, 0x00, 0x00, 0x2d, 0x00, 0x00, // 93 | 0x00, 0x00, 0x2d, 0x00, 0x00, 0x00, 0x00, 0x2d, // 94 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 95 | ]); 96 | 97 | let lut = char_lut_simd(case); 98 | let a = vsimd::hex::encode_bytes16(s, s.v128_load_unaligned(src), lut); 99 | 100 | let a1 = s.u8x16x2_swizzle(a, SWIZZLE); 101 | let a2 = s.v256_or(a1, DASH); 102 | s.v256_store_unaligned(dst, a2); 103 | 104 | let a = a.to_v128x2(); 105 | let bytes_14_15 = i16x8_get_lane7(s, a.0) as u16; 106 | let bytes_28_31 = i32x4_get_lane3(s, a.1) as u32; 107 | core::ptr::write_unaligned(dst.add(16).cast(), bytes_14_15); 108 | core::ptr::write_unaligned(dst.add(32).cast(), bytes_28_31); 109 | } 110 | 111 | #[inline(always)] 112 | pub unsafe fn format_simple_simd_sse2(s: SSE2, src: *const u8, dst: *mut u8, case: AsciiCase) { 113 | let offset = match case { 114 | AsciiCase::Lower => vsimd::hex::sse2::LOWER_OFFSET, 115 | AsciiCase::Upper => vsimd::hex::sse2::UPPER_OFFSET, 116 | }; 117 | 118 | let x = s.v128_load_unaligned(src); 119 | let (y1, y2) = vsimd::hex::sse2::encode16(s, x, offset); 120 | 121 | s.v128_store_unaligned(dst, y1); 122 | s.v128_store_unaligned(dst.add(16), y2); 123 | } 124 | -------------------------------------------------------------------------------- /crates/uuid-simd/src/lib.rs: -------------------------------------------------------------------------------- 1 | //! SIMD-accelerated UUID operations. 2 | //! 3 | //! # Examples 4 | //! 5 | //! ``` 6 | //! # #[cfg(feature="uuid")] 7 | //! # { 8 | //! use uuid::Uuid; 9 | //! use uuid_simd::UuidExt; 10 | //! 11 | //! let text = "67e55044-10b1-426f-9247-bb680e5fe0c8"; 12 | //! let uuid: Uuid = Uuid::parse(text.as_bytes()).unwrap(); 13 | //! println!("{}", uuid.format_simple()) 14 | //! # } 15 | //! ``` 16 | //! 17 | #![doc=vsimd::shared_docs!()] 18 | // 19 | #![cfg_attr(not(any(feature = "std", test)), no_std)] 20 | #![cfg_attr( 21 | all(feature = "unstable", target_arch = "arm"), 22 | feature(arm_target_feature), 23 | feature(stdarch_arm_feature_detection), 24 | feature(stdarch_arm_neon_intrinsics) 25 | )] 26 | #![cfg_attr(docsrs, feature(doc_cfg))] 27 | #![cfg_attr(test, deny(warnings))] 28 | // 29 | #![deny( 30 | missing_debug_implementations, 31 | missing_docs, 32 | clippy::all, 33 | clippy::pedantic, 34 | clippy::cargo, 35 | clippy::missing_inline_in_public_items 36 | )] 37 | #![warn(clippy::todo)] 38 | #![allow( 39 | clippy::inline_always, 40 | clippy::wildcard_imports, 41 | clippy::module_name_repetitions, 42 | clippy::cast_sign_loss, 43 | clippy::cast_lossless, 44 | clippy::cast_possible_truncation, 45 | clippy::items_after_statements, 46 | clippy::let_underscore_untyped 47 | )] 48 | 49 | #[macro_use] 50 | mod error; 51 | pub use self::error::Error; 52 | 53 | mod spec; 54 | 55 | mod format; 56 | mod parse; 57 | 58 | mod multiversion; 59 | 60 | #[cfg(feature = "uuid")] 61 | vsimd::item_group! { 62 | mod ext; 63 | pub use self::ext::*; 64 | } 65 | 66 | pub use outref::{AsOut, Out}; 67 | pub use vsimd::ascii::AsciiCase; 68 | 69 | // ------------------------------------------------------------------------------------------------- 70 | 71 | use vsimd::tools::read; 72 | 73 | /// Parses an UUID from arbitrary bytes. 74 | /// 75 | /// # Errors 76 | /// This function returns `Err` if: 77 | /// 78 | /// + The length of `src` doesn't match any UUID format variants. 79 | /// + The content of `src` is invalid. 80 | #[inline] 81 | pub fn parse<'d>(src: &[u8], mut dst: Out<'d, [u8; 16]>) -> Result<&'d mut [u8; 16], Error> { 82 | let n = src.len(); 83 | 84 | if n == 32 { 85 | unsafe { 86 | let src = src.as_ptr(); 87 | let dst = dst.as_mut_ptr().cast::(); 88 | crate::multiversion::parse_simple::auto(src, dst)?; 89 | return Ok(&mut *dst.cast()); 90 | } 91 | } 92 | 93 | unsafe { 94 | let src = match n { 95 | 36 => src.as_ptr(), 96 | // Microsoft GUID 97 | 38 => { 98 | let src = src.as_ptr(); 99 | ensure!(read(src, 0) == b'{' && read(src, 37) == b'}'); 100 | src.add(1) 101 | } 102 | // URN prefixed UUID 103 | 45 => src.strip_prefix(b"urn:uuid:").ok_or_else(Error::new)?.as_ptr(), 104 | _ => return Err(Error::new()), 105 | }; 106 | let dst = dst.as_mut_ptr().cast::(); 107 | crate::multiversion::parse_hyphenated::auto(src, dst)?; 108 | Ok(&mut *dst.cast()) 109 | } 110 | } 111 | 112 | /// Parses a simple UUID from arbitrary bytes. 113 | /// 114 | /// # Errors 115 | /// This function returns `Err` if: 116 | /// 117 | /// + The length of `src` doesn't match the "simple" format. 118 | /// + The content of `src` is invalid. 119 | #[inline] 120 | pub fn parse_simple<'d>(src: &[u8], mut dst: Out<'d, [u8; 16]>) -> Result<&'d mut [u8; 16], Error> { 121 | ensure!(src.len() == 32); 122 | unsafe { 123 | let src = src.as_ptr(); 124 | let dst = dst.as_mut_ptr().cast::(); 125 | crate::multiversion::parse_simple::auto(src, dst)?; 126 | Ok(&mut *dst.cast()) 127 | } 128 | } 129 | 130 | /// Parses a hyphenated UUID from arbitrary bytes. 131 | /// 132 | /// # Errors 133 | /// This function returns `Err` if: 134 | /// 135 | /// + The length of `src` doesn't match the "hyphenated" format. 136 | /// + The content of `src` is invalid. 137 | #[inline] 138 | pub fn parse_hyphenated<'d>(src: &[u8], mut dst: Out<'d, [u8; 16]>) -> Result<&'d mut [u8; 16], Error> { 139 | ensure!(src.len() == 36); 140 | unsafe { 141 | let src = src.as_ptr(); 142 | let dst = dst.as_mut_ptr().cast::(); 143 | crate::multiversion::parse_hyphenated::auto(src, dst)?; 144 | Ok(&mut *dst.cast()) 145 | } 146 | } 147 | 148 | /// Formats an UUID to a simple UUID string. 149 | #[inline] 150 | #[must_use] 151 | pub fn format_simple<'d>(src: &[u8; 16], mut dst: Out<'d, [u8; 32]>, case: AsciiCase) -> &'d mut [u8; 32] { 152 | unsafe { 153 | let src = src.as_ptr(); 154 | let dst = dst.as_mut_ptr().cast::(); 155 | crate::multiversion::format_simple::auto(src, dst, case); 156 | &mut *dst.cast() 157 | } 158 | } 159 | 160 | /// Formats an UUID to a hyphenated UUID string. 161 | #[inline] 162 | #[must_use] 163 | pub fn format_hyphenated<'d>(src: &[u8; 16], mut dst: Out<'d, [u8; 36]>, case: AsciiCase) -> &'d mut [u8; 36] { 164 | unsafe { 165 | let src = src.as_ptr(); 166 | let dst = dst.as_mut_ptr().cast::(); 167 | crate::multiversion::format_hyphenated::auto(src, dst, case); 168 | &mut *dst.cast() 169 | } 170 | } 171 | -------------------------------------------------------------------------------- /crates/uuid-simd/src/multiversion.rs: -------------------------------------------------------------------------------- 1 | use crate::error::Error; 2 | 3 | use vsimd::ascii::AsciiCase; 4 | 5 | vsimd::dispatch!( 6 | name = {parse_simple}, 7 | signature = {pub unsafe fn(src: *const u8, dst: *mut u8) -> Result<(), Error>}, 8 | fallback = {crate::parse::parse_simple_fallback}, 9 | simd = {crate::parse::parse_simple_simd}, 10 | targets = {"avx2", "ssse3", "sse2", "neon", "simd128"}, 11 | fastest = {"avx2", "neon", "simd128"}, 12 | ); 13 | 14 | vsimd::dispatch!( 15 | name = {parse_hyphenated}, 16 | signature = {pub unsafe fn(src: *const u8, dst: *mut u8) -> Result<(), Error>}, 17 | fallback = {crate::parse::parse_hyphenated_fallback}, 18 | simd = {crate::parse::parse_hyphenated_simd}, 19 | targets = {"avx2", "sse4.1", "neon", "simd128"}, 20 | fastest = {"avx2", "neon", "simd128"}, 21 | ); 22 | 23 | vsimd::dispatch!( 24 | name = {format_simple}, 25 | signature = {pub unsafe fn(src: *const u8, dst: *mut u8, case: AsciiCase) -> ()}, 26 | fallback = {crate::format::format_simple_fallback}, 27 | simd = {crate::format::format_simple_simd}, 28 | targets = {"avx2", "ssse3", "sse2", "neon", "simd128"}, 29 | fastest = {"avx2", "neon", "simd128"}, 30 | ); 31 | 32 | vsimd::dispatch!( 33 | name = {format_hyphenated}, 34 | signature = {pub unsafe fn(src: *const u8, dst: *mut u8, case: AsciiCase) -> ()}, 35 | fallback = {crate::format::format_hyphenated_fallback}, 36 | simd = {crate::format::format_hyphenated_simd}, 37 | targets = {"avx2", "sse4.1", "neon", "simd128"}, 38 | fastest = {"avx2", "neon", "simd128"}, 39 | ); 40 | -------------------------------------------------------------------------------- /crates/uuid-simd/src/parse.rs: -------------------------------------------------------------------------------- 1 | use crate::spec::*; 2 | use crate::Error; 3 | 4 | use vsimd::hex::unhex; 5 | use vsimd::is_isa_type; 6 | use vsimd::isa::{InstructionSet, SSE2}; 7 | use vsimd::tools::{read, write}; 8 | use vsimd::vector::{V256, V64}; 9 | use vsimd::{SIMD128, SIMD256}; 10 | 11 | #[inline(always)] 12 | const fn shl4(x: u8) -> u8 { 13 | x.wrapping_shl(4) 14 | } 15 | 16 | #[inline(always)] 17 | pub unsafe fn parse_simple_fallback(src: *const u8, dst: *mut u8) -> Result<(), Error> { 18 | let mut flag = 0; 19 | for i in 0..16 { 20 | let h1 = unhex(read(src, i * 2)); 21 | let h2 = unhex(read(src, i * 2 + 1)); 22 | flag |= h1 | h2; 23 | write(dst, i, shl4(h1) | h2); 24 | } 25 | ensure!(flag != 0xff); 26 | Ok(()) 27 | } 28 | 29 | #[inline(always)] 30 | pub unsafe fn parse_hyphenated_fallback(src: *const u8, dst: *mut u8) -> Result<(), Error> { 31 | match [read(src, 8), read(src, 13), read(src, 18), read(src, 23)] { 32 | [b'-', b'-', b'-', b'-'] => {} 33 | _ => return Err(Error::new()), 34 | } 35 | 36 | let mut flag = 0; 37 | let positions: [usize; 8] = [0, 4, 9, 14, 19, 24, 28, 32]; 38 | for (j, i) in positions.iter().copied().enumerate() { 39 | let h1 = unhex(read(src, i)); 40 | let h2 = unhex(read(src, i + 1)); 41 | let h3 = unhex(read(src, i + 2)); 42 | let h4 = unhex(read(src, i + 3)); 43 | flag |= h1 | h2 | h3 | h4; 44 | write(dst, j * 2, shl4(h1) | h2); 45 | write(dst, j * 2 + 1, shl4(h3) | h4); 46 | } 47 | ensure!(flag != 0xff); 48 | Ok(()) 49 | } 50 | 51 | #[inline(always)] 52 | pub unsafe fn parse_simple_simd(s: S, src: *const u8, dst: *mut u8) -> Result<(), Error> { 53 | if is_isa_type!(S, SSE2) { 54 | return parse_simple_simd_sse2(SSE2::new(), src, dst); 55 | } 56 | { 57 | let x = s.v256_load_unaligned(src); 58 | let y = try_!(vsimd::hex::decode_ascii32(s, x)); 59 | s.v128_store_unaligned(dst, y); 60 | Ok(()) 61 | } 62 | } 63 | 64 | #[inline(always)] 65 | pub unsafe fn parse_hyphenated_simd(s: S, src: *const u8, dst: *mut u8) -> Result<(), Error> { 66 | match [read(src, 8), read(src, 13), read(src, 18), read(src, 23)] { 67 | [b'-', b'-', b'-', b'-'] => {} 68 | _ => return Err(Error::new()), 69 | } 70 | 71 | const SWIZZLE: V256 = V256::from_bytes([ 72 | 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, // 73 | 0x09, 0x0a, 0x0b, 0x0c, 0x0e, 0x0f, 0x80, 0x80, // 74 | 0x03, 0x04, 0x05, 0x06, 0x08, 0x09, 0x0a, 0x0b, // 75 | 0x0c, 0x0d, 0x0e, 0x0f, 0x80, 0x80, 0x80, 0x80, // 76 | ]); 77 | 78 | let a0 = s.v256_load_unaligned(src); 79 | let a1 = s.u8x16x2_swizzle(a0, SWIZZLE); 80 | 81 | let a2 = i16x16_set_lane7(s, a1, src.add(16).cast::().read_unaligned()); 82 | let a3 = i32x8_set_lane7(s, a2, src.add(32).cast::().read_unaligned()); 83 | 84 | let ans = try_!(vsimd::hex::decode_ascii32(s, a3)); 85 | s.v128_store_unaligned(dst, ans); 86 | 87 | Ok(()) 88 | } 89 | 90 | #[inline(always)] 91 | pub unsafe fn parse_simple_simd_sse2(s: SSE2, src: *const u8, dst: *mut u8) -> Result<(), Error> { 92 | let x1 = s.v128_load_unaligned(src); 93 | let x2 = s.v128_load_unaligned(src.add(16)); 94 | 95 | let (n1, f1) = vsimd::hex::sse2::decode_nibbles(s, x1); 96 | let (n2, f2) = vsimd::hex::sse2::decode_nibbles(s, x2); 97 | 98 | let flag = s.v128_or(f1, f2); 99 | ensure!(s.u8x16_bitmask(flag) == 0); 100 | 101 | let y1 = vsimd::hex::sse2::merge_bits(s, n1); 102 | let y2 = vsimd::hex::sse2::merge_bits(s, n2); 103 | 104 | dst.cast::<[V64; 2]>().write_unaligned([y1, y2]); 105 | Ok(()) 106 | } 107 | -------------------------------------------------------------------------------- /crates/uuid-simd/src/spec.rs: -------------------------------------------------------------------------------- 1 | #![allow(clippy::missing_transmute_annotations)] 2 | 3 | use vsimd::vector::{V128, V256}; 4 | use vsimd::{SIMD128, SIMD256}; 5 | 6 | #[cfg(any( 7 | any(target_arch = "x86", target_arch = "x86_64"), 8 | any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"), 9 | target_arch = "wasm32" 10 | ))] 11 | vsimd::item_group! { 12 | use vsimd::matches_isa; 13 | use core::mem::transmute as t; 14 | } 15 | 16 | #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] 17 | use vsimd::isa::{AVX2, SSE2, SSE41}; 18 | 19 | #[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))] 20 | use vsimd::isa::NEON; 21 | 22 | #[cfg(target_arch = "wasm32")] 23 | use vsimd::isa::WASM128; 24 | 25 | #[cfg(target_arch = "x86")] 26 | use core::arch::x86::*; 27 | 28 | #[cfg(target_arch = "x86_64")] 29 | use core::arch::x86_64::*; 30 | 31 | #[cfg(all(feature = "unstable", target_arch = "arm"))] 32 | use core::arch::arm::*; 33 | 34 | #[cfg(target_arch = "aarch64")] 35 | use core::arch::aarch64::*; 36 | 37 | #[cfg(target_arch = "wasm32")] 38 | use core::arch::wasm32::*; 39 | 40 | #[inline(always)] 41 | pub fn i16x16_set_lane7(s: S, a: V256, x: i16) -> V256 { 42 | #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] 43 | if matches_isa!(S, AVX2) { 44 | return unsafe { t(_mm256_insert_epi16::<7>(t(a), x)) }; 45 | } 46 | #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] 47 | if matches_isa!(S, SSE2) { 48 | let a = a.to_v128x2(); 49 | let a0 = unsafe { t(_mm_insert_epi16::<7>(t(a.0), x as i32)) }; 50 | return V256::from_v128x2((a0, a.1)); 51 | } 52 | #[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))] 53 | if matches_isa!(S, NEON) { 54 | return unsafe { 55 | let a: uint8x16x2_t = t(a); 56 | let a0 = vsetq_lane_s16::<7>(x, t(a.0)); 57 | t(uint8x16x2_t(t(a0), a.1)) 58 | }; 59 | } 60 | #[cfg(target_arch = "wasm32")] 61 | if matches_isa!(S, WASM128) { 62 | let a = a.to_v128x2(); 63 | let a0 = unsafe { t(i16x8_replace_lane::<7>(t(a.0), x)) }; 64 | return V256::from_v128x2((a0, a.1)); 65 | } 66 | { 67 | let _ = (s, a, x); 68 | unreachable!() 69 | } 70 | } 71 | 72 | #[inline(always)] 73 | pub fn i32x8_set_lane7(s: S, a: V256, x: i32) -> V256 { 74 | #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] 75 | if matches_isa!(S, AVX2) { 76 | return unsafe { t(_mm256_insert_epi32::<7>(t(a), x)) }; 77 | } 78 | #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] 79 | if matches_isa!(S, SSE41) { 80 | let a = a.to_v128x2(); 81 | let a1 = unsafe { t(_mm_insert_epi32::<3>(t(a.1), x)) }; 82 | return V256::from_v128x2((a.0, a1)); 83 | } 84 | #[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))] 85 | if matches_isa!(S, NEON) { 86 | return unsafe { 87 | let a: uint8x16x2_t = t(a); 88 | let a1 = vsetq_lane_s32::<3>(x, t(a.1)); 89 | t(uint8x16x2_t(a.0, t(a1))) 90 | }; 91 | } 92 | #[cfg(target_arch = "wasm32")] 93 | if matches_isa!(S, WASM128) { 94 | let a = a.to_v128x2(); 95 | let a1 = unsafe { t(i32x4_replace_lane::<3>(t(a.1), x)) }; 96 | return V256::from_v128x2((a.0, a1)); 97 | } 98 | { 99 | let _ = (s, a, x); 100 | unreachable!() 101 | } 102 | } 103 | 104 | #[inline(always)] 105 | pub fn i32x4_get_lane3(s: S, a: V128) -> i32 { 106 | #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] 107 | if matches_isa!(S, SSE41) { 108 | return unsafe { _mm_extract_epi32::<3>(t(a)) }; 109 | } 110 | #[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))] 111 | if matches_isa!(S, NEON) { 112 | return unsafe { vgetq_lane_s32::<3>(t(a)) }; 113 | } 114 | #[cfg(target_arch = "wasm32")] 115 | if matches_isa!(S, WASM128) { 116 | return unsafe { i32x4_extract_lane::<3>(t(a)) }; 117 | } 118 | { 119 | let _ = (s, a); 120 | unreachable!() 121 | } 122 | } 123 | 124 | #[inline(always)] 125 | pub fn i16x8_get_lane7(s: S, a: V128) -> i16 { 126 | #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] 127 | if matches_isa!(S, SSE2) { 128 | return unsafe { _mm_extract_epi16::<7>(t(a)) as i16 }; 129 | } 130 | #[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))] 131 | if matches_isa!(S, NEON) { 132 | return unsafe { vgetq_lane_s16::<7>(t(a)) }; 133 | } 134 | #[cfg(target_arch = "wasm32")] 135 | if matches_isa!(S, WASM128) { 136 | return unsafe { i16x8_extract_lane::<7>(t(a)) }; 137 | } 138 | { 139 | let _ = (s, a); 140 | unreachable!() 141 | } 142 | } 143 | -------------------------------------------------------------------------------- /crates/uuid-simd/tests/it.rs: -------------------------------------------------------------------------------- 1 | use uuid_simd::{AsOut, AsciiCase}; 2 | 3 | fn ok_cases() -> &'static [(&'static str, &'static str)] { 4 | const A1: &str = "67e5504410b1426f9247bb680e5fe0c8"; 5 | const A2: &str = "00000000000000000000000000000000"; 6 | 7 | const OK: &[(&str, &str)] = &[ 8 | (A1, "67e55044-10b1-426f-9247-bb680e5fe0c8"), 9 | (A1, "67e5504410b1426f9247bb680e5fe0c8"), 10 | (A1, "{67e55044-10b1-426f-9247-bb680e5fe0c8}"), 11 | (A1, "urn:uuid:67e55044-10b1-426f-9247-bb680e5fe0c8"), 12 | (A2, "00000000000000000000000000000000"), 13 | (A2, "00000000-0000-0000-0000-000000000000"), 14 | ( 15 | "01020304111221223132414243444546", 16 | "01020304-1112-2122-3132-414243444546", 17 | ), 18 | ( 19 | "F9168C5ECEB24faaB6BF329BF39FA1E4", 20 | "F9168C5E-CEB2-4faa-B6BF-329BF39FA1E4", 21 | ), 22 | ( 23 | "6d93badebd9f4e1389149474e1e3567b", 24 | "{6d93bade-bd9f-4e13-8914-9474e1e3567b}", 25 | ), 26 | ]; 27 | 28 | OK 29 | } 30 | 31 | fn err_cases() -> &'static [&'static str] { 32 | const ERR: &[&str] = &[ 33 | "", 34 | "!", 35 | "F9168C5E-CEB2-4faa-B6BF-329BF39FA1E45", 36 | "F9168C5E-CEB2-4faa-BBF-329BF39FA1E4", 37 | "F9168C5E-CEB2-4faa", 38 | "{F9168C5E-CEB2-4faa9B6BFF329BF39FA1E41", 39 | "67e5504410b1426f9247bb680e5fe0c", 40 | "67e5504410b1426f9247bb680e5fe0c88", 41 | "67e5504410b1426f9247bb680e5fe0cg8", 42 | "{00000000000000000000000000000000}", 43 | "67e5504410b1426f9247bb680e5fe0c", 44 | "F9168C5E-CEB2-4faa-B6BF1-02BF39FA1E4", 45 | "231231212212423424324323477343246663", 46 | "01020304-1112-2122-3132-41424344", 47 | "F9168C5E-CEB2-4faa-BGBF-329BF39FA1E4", 48 | "F9168C5E-CEB2F4faaFB6BFF329BF39FA1E4", 49 | "F9168C5E-CEB2-4faaFB6BFF329BF39FA1E4", 50 | "F9168C5E-CEB2-4faa-B6BFF329BF39FA1E4", 51 | "F9168C5E-CEB2-4faaXB6BFF329BF39FA1E4", 52 | "67e5504410b1426%9247bb680e5fe0c8", 53 | "67e550X410b1426f9247bb680e5fe0cd", 54 | "67e550-4105b1426f9247bb680e5fe0c", 55 | "F9168C5E-CEB-24fa-eB6BFF32-BF39FA1E4", 56 | ]; 57 | ERR 58 | } 59 | 60 | fn format_cases() -> &'static [(&'static str, &'static str)] { 61 | &[ 62 | ( 63 | "67e5504410b1426f9247bb680e5fe0c8", 64 | "67e55044-10b1-426f-9247-bb680e5fe0c8", 65 | ), 66 | ( 67 | "01020304111221223132414243444546", 68 | "01020304-1112-2122-3132-414243444546", 69 | ), 70 | ( 71 | "00000000000000000000000000000000", 72 | "00000000-0000-0000-0000-000000000000", 73 | ), 74 | ] 75 | } 76 | 77 | #[cfg_attr(not(target_arch = "wasm32"), test)] 78 | #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)] 79 | fn basic() { 80 | for &(expected, input) in ok_cases() { 81 | let mut expected_buf = [0u8; 16]; 82 | let expected_bytes = hex_simd::decode(expected.as_bytes(), expected_buf.as_mut_slice().as_out()).unwrap(); 83 | 84 | let mut output_buf = [0; 16]; 85 | let output_bytes = uuid_simd::parse(input.as_bytes(), output_buf.as_out()).unwrap(); 86 | 87 | assert_eq!(output_bytes, expected_bytes); 88 | } 89 | 90 | for &input in err_cases() { 91 | let mut output_buf = [0; 16]; 92 | uuid_simd::parse(input.as_bytes(), output_buf.as_out()).unwrap_err(); 93 | } 94 | 95 | for &(input, expected) in format_cases() { 96 | let mut src = [0; 16]; 97 | hex_simd::decode(input.as_bytes(), src.as_mut_slice().as_out()).unwrap(); 98 | 99 | let mut output_buf = [0; 32]; 100 | let output = uuid_simd::format_simple(&src, output_buf.as_out(), AsciiCase::Upper); 101 | assert_eq!(output.as_slice(), input.to_ascii_uppercase().as_bytes()); 102 | let output = uuid_simd::format_simple(&src, output_buf.as_out(), AsciiCase::Lower); 103 | assert_eq!(output.as_slice(), input.to_ascii_lowercase().as_bytes()); 104 | 105 | let mut output_buf = [0; 36]; 106 | let output = uuid_simd::format_hyphenated(&src, output_buf.as_out(), AsciiCase::Upper); 107 | assert_eq!(output.as_slice(), expected.to_ascii_uppercase().as_bytes()); 108 | let output = uuid_simd::format_hyphenated(&src, output_buf.as_out(), AsciiCase::Lower); 109 | assert_eq!(output.as_slice(), expected.to_ascii_lowercase().as_bytes()); 110 | } 111 | } 112 | -------------------------------------------------------------------------------- /crates/vsimd/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "vsimd" 3 | version = "0.9.0-dev" 4 | edition = "2021" 5 | description = "SIMD utilities" 6 | license = "MIT" 7 | repository = "https://github.com/Nugine/simd" 8 | keywords = ["simd"] 9 | categories = ["no-std"] 10 | readme = "README.md" 11 | rust-version = "1.63" 12 | 13 | [package.metadata.docs.rs] 14 | all-features = true 15 | rustdoc-args = ["--cfg", "docsrs"] 16 | 17 | [features] 18 | alloc = [] 19 | std = ["alloc"] 20 | detect = ["std"] 21 | unstable = [] 22 | 23 | [dev-dependencies] 24 | const-str = "0.5.3" 25 | rand = "0.8.5" 26 | 27 | [target.'cfg(target_arch="wasm32")'.dev-dependencies] 28 | getrandom = { version = "0.2.8", features = ["js"] } 29 | wasm-bindgen-test = "0.3.34" 30 | -------------------------------------------------------------------------------- /crates/vsimd/LICENSE: -------------------------------------------------------------------------------- 1 | ../../LICENSE -------------------------------------------------------------------------------- /crates/vsimd/README.md: -------------------------------------------------------------------------------- 1 | # vsimd 2 | 3 | [![Crates.io](https://img.shields.io/crates/v/vsimd.svg)](https://crates.io/crates/vsimd) 4 | [![Docs](https://docs.rs/vsimd/badge.svg)](https://docs.rs/vsimd/) 5 | [![MIT licensed][mit-badge]][mit-url] 6 | 7 | [mit-badge]: https://img.shields.io/badge/license-MIT-blue.svg 8 | [mit-url]: ../../LICENSE 9 | 10 | ⚠️ This crate contains shared implementation details. Do not directly depend on it. 11 | -------------------------------------------------------------------------------- /crates/vsimd/src/alsw.rs: -------------------------------------------------------------------------------- 1 | // ALSW: Avgr, Lookup, Saturating_add, Wrapping_add 2 | // Inspired by 3 | // 4 | 5 | use crate::pod::POD; 6 | use crate::table::u8x16xn_lookup; 7 | use crate::vector::{V128, V256}; 8 | use crate::Scalable; 9 | 10 | use core::ops::Not; 11 | 12 | #[inline] 13 | #[must_use] 14 | pub const fn lookup(lut: &[u8; 16], x: u8) -> u8 { 15 | if x < 0x80 { 16 | lut[(x & 0x0f) as usize] 17 | } else { 18 | 0 19 | } 20 | } 21 | 22 | #[inline] 23 | #[must_use] 24 | pub const fn avgr(a: u8, b: u8) -> u8 { 25 | ((a as u16 + b as u16 + 1) >> 1) as u8 26 | } 27 | 28 | #[inline] 29 | #[must_use] 30 | pub const fn hash(hash_lut: &[u8; 16], c: u8) -> u8 { 31 | avgr(0xE0 | (c >> 3), lookup(hash_lut, c)) 32 | } 33 | 34 | #[inline] 35 | #[must_use] 36 | pub const fn check(hash_lut: &[u8; 16], offset: &[u8; 16], c: u8) -> u8 { 37 | let h = hash(hash_lut, c); 38 | let o = lookup(offset, h); 39 | (c as i8).saturating_add(o as i8) as u8 40 | } 41 | 42 | #[inline] 43 | #[must_use] 44 | pub const fn decode(hash_lut: &[u8; 16], offset: &[u8; 16], c: u8) -> u8 { 45 | let h = hash(hash_lut, c); 46 | let o = lookup(offset, h); 47 | c.wrapping_add(o) 48 | } 49 | 50 | #[derive(Debug, Clone, Copy)] 51 | pub struct AlswLut { 52 | pub hash: V, 53 | pub offset: V, 54 | } 55 | 56 | impl AlswLut { 57 | #[inline] 58 | #[must_use] 59 | pub const fn x2(self) -> AlswLut { 60 | AlswLut { 61 | hash: self.hash.x2(), 62 | offset: self.offset.x2(), 63 | } 64 | } 65 | } 66 | 67 | #[inline(always)] 68 | pub fn check_ascii_xn, V: POD>(s: S, x: V, check: AlswLut) -> bool { 69 | let shr3 = s.u32xn_shr::<3>(x); 70 | let h1 = s.u8xn_avgr(shr3, u8x16xn_lookup(s, check.hash, x)); 71 | let o1 = u8x16xn_lookup(s, check.offset, h1); 72 | let c1 = s.i8xn_add_sat(x, o1); 73 | s.u8xn_highbit_any(c1).not() 74 | } 75 | 76 | #[inline(always)] 77 | pub fn decode_ascii_xn, V: POD>(s: S, x: V, check: AlswLut, decode: AlswLut) -> (V, V) { 78 | let shr3 = s.u32xn_shr::<3>(x); 79 | 80 | let h1 = u8xn_avgr(s, shr3, u8x16xn_lookup(s, check.hash, x)); 81 | let h2 = u8xn_avgr(s, shr3, u8x16xn_lookup(s, decode.hash, x)); 82 | 83 | let o1 = u8x16xn_lookup(s, check.offset, h1); 84 | let o2 = u8x16xn_lookup(s, decode.offset, h2); 85 | 86 | let c1 = s.i8xn_add_sat(x, o1); 87 | let c2 = s.u8xn_add(x, o2); 88 | 89 | (c1, c2) 90 | } 91 | 92 | // FIXME: https://github.com/rust-lang/rust/issues/124216 93 | // TODO: workaround for SSE2 94 | #[inline(always)] 95 | fn u8xn_avgr, V: POD>(s: S, a: V, b: V) -> V { 96 | #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] 97 | { 98 | use crate::isa::AVX2; 99 | use crate::tools::transmute_copy as tc; 100 | 101 | use core::arch::asm; 102 | 103 | #[cfg(target_arch = "x86")] 104 | use core::arch::x86::*; 105 | 106 | #[cfg(target_arch = "x86_64")] 107 | use core::arch::x86_64::*; 108 | 109 | if matches_isa!(S, AVX2) && is_pod_type!(V, V256) { 110 | return unsafe { tc(&vpavgb(tc(&a), tc(&b))) }; 111 | } 112 | 113 | #[target_feature(enable = "avx")] 114 | unsafe fn vpavgb(a: __m256i, mut b: __m256i) -> __m256i { 115 | asm!( 116 | "vpavgb {b}, {a}, {b}", 117 | options(pure, nomem, nostack), 118 | a = in(ymm_reg) a, 119 | b = inout(ymm_reg) b, 120 | ); 121 | b 122 | } 123 | } 124 | 125 | s.u8xn_avgr(a, b) 126 | } 127 | 128 | #[macro_export] 129 | macro_rules! impl_alsw { 130 | ($spec:ty) => { 131 | impl $spec { 132 | const CHECK_HASH: [u8; 16] = { 133 | let mut arr = [0; 16]; 134 | let mut i = 0; 135 | while i < 16 { 136 | let x: u8 = Self::check_hash(i as u8); 137 | arr[i] = (x << 1) - 1; 138 | i += 1; 139 | } 140 | arr 141 | }; 142 | 143 | const CHECK_OFFSET: [u8; 16] = { 144 | let mut arr = [0x80; 16]; 145 | let mut c: u8 = 255; 146 | loop { 147 | if Self::decode(c) != 0xff { 148 | let h = $crate::alsw::hash(&Self::CHECK_HASH, c); 149 | arr[(h & 0x0f) as usize] = 0u8.wrapping_sub(c); 150 | } 151 | if c == 0 { 152 | break; 153 | } 154 | c -= 1; 155 | } 156 | arr 157 | }; 158 | 159 | const DECODE_HASH: [u8; 16] = { 160 | let mut arr = [0; 16]; 161 | let mut i = 0; 162 | while i < 16 { 163 | let x: u8 = Self::decode_hash(i as u8); 164 | arr[i] = (x << 1) - 1; 165 | i += 1; 166 | } 167 | arr 168 | }; 169 | 170 | const DECODE_OFFSET: [u8; 16] = { 171 | let mut arr = [0x80; 16]; 172 | let mut c: u8 = 255; 173 | loop { 174 | let idx = Self::decode(c); 175 | if idx != 0xff { 176 | let h = $crate::alsw::hash(&Self::DECODE_HASH, c); 177 | arr[(h & 0x0f) as usize] = idx.wrapping_sub(c); 178 | } 179 | if c == 0 { 180 | break; 181 | } 182 | c -= 1; 183 | } 184 | arr 185 | }; 186 | 187 | #[inline] 188 | #[must_use] 189 | const fn check_lut() -> AlswLut { 190 | AlswLut { 191 | hash: V128::from_bytes(Self::CHECK_HASH), 192 | offset: V128::from_bytes(Self::CHECK_OFFSET), 193 | } 194 | } 195 | 196 | #[inline] 197 | #[must_use] 198 | const fn decode_lut() -> AlswLut { 199 | AlswLut { 200 | hash: V128::from_bytes(Self::DECODE_HASH), 201 | offset: V128::from_bytes(Self::DECODE_OFFSET), 202 | } 203 | } 204 | 205 | #[cfg(test)] 206 | fn test_check() { 207 | let hash = &Self::CHECK_HASH; 208 | let offset = &Self::CHECK_OFFSET; 209 | 210 | let check = |c: u8| $crate::alsw::check(hash, offset, c); 211 | 212 | for c in 0..=255u8 { 213 | assert_eq!(check(c) < 0x80, Self::decode(c) != 0xff); 214 | } 215 | } 216 | 217 | #[cfg(test)] 218 | fn test_decode() { 219 | let hash = &Self::DECODE_HASH; 220 | let offset = &Self::DECODE_OFFSET; 221 | 222 | let decode = |c: u8| $crate::alsw::decode(hash, offset, c); 223 | 224 | for c in 0..=255u8 { 225 | let idx = Self::decode(c); 226 | if idx != 0xff { 227 | assert_eq!(decode(c), idx); 228 | } 229 | } 230 | } 231 | } 232 | }; 233 | } 234 | -------------------------------------------------------------------------------- /crates/vsimd/src/ascii.rs: -------------------------------------------------------------------------------- 1 | use crate::pod::POD; 2 | use crate::Scalable; 3 | 4 | /// An enum type which represents the case of ascii letters. 5 | #[derive(Debug, Clone, Copy, PartialEq, Eq)] 6 | pub enum AsciiCase { 7 | /// a-z are lower case letters. 8 | Lower, 9 | /// A-Z are upper case letters. 10 | Upper, 11 | } 12 | 13 | #[inline(always)] 14 | fn convert_ascii_case, V: POD, const C: u8>(s: S, x: V) -> V { 15 | assert!(matches!(C, b'A' | b'a')); 16 | let x1 = s.u8xn_sub(x, s.u8xn_splat(C + 0x80)); 17 | let x2 = s.i8xn_lt(x1, s.i8xn_splat(-0x80 + 26)); 18 | let x3 = s.and(x2, s.u8xn_splat(0x20)); 19 | s.xor(x, x3) 20 | } 21 | 22 | #[inline(always)] 23 | pub fn to_ascii_lowercase, V: POD>(s: S, x: V) -> V { 24 | convert_ascii_case::(s, x) 25 | } 26 | 27 | #[inline(always)] 28 | pub fn to_ascii_uppercase, V: POD>(s: S, x: V) -> V { 29 | convert_ascii_case::(s, x) 30 | } 31 | 32 | #[cfg(test)] 33 | mod algorithm { 34 | #[cfg(feature = "std")] 35 | fn i8_lt(a: i8, b: i8) -> u8 { 36 | if a < b { 37 | 0xff 38 | } else { 39 | 0x00 40 | } 41 | } 42 | 43 | #[cfg(feature = "std")] 44 | #[test] 45 | #[ignore] 46 | fn convert_case() { 47 | let convert = |c: u8, shift: u8| { 48 | let x1 = c.wrapping_sub(shift + 0x80); 49 | let x2 = i8_lt(x1 as i8, -0x80 + 26); 50 | let x3 = x2 & 0x20; 51 | c ^ x3 52 | }; 53 | let to_upper = |c: u8| convert(c, b'a'); 54 | let to_lower = |c: u8| convert(c, b'A'); 55 | 56 | crate::tools::print_fn_table(|c| c.is_ascii_lowercase(), to_upper); 57 | crate::tools::print_fn_table(|c| c.is_ascii_uppercase(), to_lower); 58 | 59 | for c in 0..=255u8 { 60 | assert_eq!(to_upper(c), c.to_ascii_uppercase()); 61 | assert_eq!(to_lower(c), c.to_ascii_lowercase()); 62 | } 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /crates/vsimd/src/bswap.rs: -------------------------------------------------------------------------------- 1 | use crate::pod::POD; 2 | use crate::vector::{V128, V256}; 3 | use crate::SIMD256; 4 | 5 | pub(crate) const SHUFFLE_U16X8: V128 = V128::from_bytes([ 6 | 0x01, 0x00, 0x03, 0x02, 0x05, 0x04, 0x07, 0x06, // 7 | 0x09, 0x08, 0x0b, 0x0a, 0x0d, 0x0c, 0x0f, 0x0e, // 8 | ]); 9 | 10 | pub(crate) const SHUFFLE_U32X4: V128 = V128::from_bytes([ 11 | 0x03, 0x02, 0x01, 0x00, 0x07, 0x06, 0x05, 0x04, // 12 | 0x0b, 0x0a, 0x09, 0x08, 0x0f, 0x0e, 0x0d, 0x0c, // 13 | ]); 14 | 15 | pub(crate) const SHUFFLE_U64X2: V128 = V128::from_bytes([ 16 | 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00, // 17 | 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, // 18 | ]); 19 | 20 | pub(crate) const SHUFFLE_U16X16: V256 = SHUFFLE_U16X8.x2(); 21 | 22 | pub(crate) const SHUFFLE_U32X8: V256 = SHUFFLE_U32X4.x2(); 23 | 24 | pub(crate) const SHUFFLE_U64X4: V256 = SHUFFLE_U64X2.x2(); 25 | 26 | pub unsafe trait BSwap: POD { 27 | const LANES: usize; 28 | fn swap_single(x: Self) -> Self; 29 | fn swap_simd(s: S, a: V256) -> V256; 30 | } 31 | 32 | unsafe impl BSwap for u16 { 33 | const LANES: usize = 16; 34 | 35 | #[inline(always)] 36 | fn swap_single(x: Self) -> Self { 37 | x.swap_bytes() 38 | } 39 | 40 | #[inline(always)] 41 | fn swap_simd(s: S, a: V256) -> V256 { 42 | s.u16x16_bswap(a) 43 | } 44 | } 45 | 46 | unsafe impl BSwap for u32 { 47 | const LANES: usize = 8; 48 | 49 | #[inline(always)] 50 | fn swap_single(x: Self) -> Self { 51 | x.swap_bytes() 52 | } 53 | 54 | #[inline(always)] 55 | fn swap_simd(s: S, a: V256) -> V256 { 56 | s.u32x8_bswap(a) 57 | } 58 | } 59 | 60 | unsafe impl BSwap for u64 { 61 | const LANES: usize = 4; 62 | 63 | #[inline(always)] 64 | fn swap_single(x: Self) -> Self { 65 | x.swap_bytes() 66 | } 67 | 68 | #[inline(always)] 69 | fn swap_simd(s: S, a: V256) -> V256 { 70 | s.u64x4_bswap(a) 71 | } 72 | } 73 | 74 | #[inline(always)] 75 | pub unsafe fn bswap_fallback(mut src: *const T, len: usize, mut dst: *mut T) 76 | where 77 | T: BSwap, 78 | { 79 | let end = src.add(len); 80 | while src < end { 81 | let x = src.read(); 82 | let y = ::swap_single(x); 83 | dst.write(y); 84 | src = src.add(1); 85 | dst = dst.add(1); 86 | } 87 | } 88 | 89 | #[inline(always)] 90 | pub unsafe fn bswap_simd(s: S, mut src: *const T, mut len: usize, mut dst: *mut T) 91 | where 92 | T: BSwap, 93 | { 94 | let end = src.add(len / T::LANES * T::LANES); 95 | while src < end { 96 | let x = s.v256_load_unaligned(src.cast()); 97 | let y = ::swap_simd(s, x); 98 | s.v256_store_unaligned(dst.cast(), y); 99 | src = src.add(T::LANES); 100 | dst = dst.add(T::LANES); 101 | } 102 | len %= T::LANES; 103 | 104 | bswap_fallback(src, len, dst); 105 | } 106 | -------------------------------------------------------------------------------- /crates/vsimd/src/isa.rs: -------------------------------------------------------------------------------- 1 | use crate::{SIMD128, SIMD256, SIMD64}; 2 | 3 | pub unsafe trait InstructionSet: Copy + 'static { 4 | const ID: InstructionSetTypeId; 5 | const ARCH: bool; 6 | 7 | unsafe fn new() -> Self; 8 | 9 | fn is_enabled() -> bool; 10 | } 11 | 12 | #[inline(always)] 13 | #[must_use] 14 | pub fn detect() -> Option { 15 | S::is_enabled().then(|| unsafe { S::new() }) 16 | } 17 | 18 | #[derive(Debug, Clone, Copy, PartialEq, Eq)] 19 | pub enum InstructionSetTypeId { 20 | Fallback, 21 | SSE2, 22 | SSSE3, 23 | SSE41, 24 | AVX2, 25 | NEON, 26 | WASM128, 27 | } 28 | 29 | #[doc(hidden)] 30 | #[inline] 31 | #[must_use] 32 | pub const fn matches_isa_impl() -> bool 33 | where 34 | S: InstructionSet, 35 | U: InstructionSet, 36 | { 37 | #[allow(clippy::enum_glob_use)] 38 | use InstructionSetTypeId::*; 39 | 40 | let (self_ty, super_ty) = (S::ID, U::ID); 41 | let inherits = match self_ty { 42 | Fallback => matches!(super_ty, Fallback), 43 | SSE2 => matches!(super_ty, Fallback | SSE2), 44 | SSSE3 => matches!(super_ty, Fallback | SSE2 | SSSE3), 45 | SSE41 => matches!(super_ty, Fallback | SSE2 | SSSE3 | SSE41), 46 | AVX2 => matches!(super_ty, Fallback | SSE2 | SSSE3 | SSE41 | AVX2), 47 | NEON => matches!(super_ty, Fallback | NEON), 48 | WASM128 => matches!(super_ty, Fallback | WASM128), 49 | }; 50 | 51 | S::ARCH && U::ARCH && inherits 52 | } 53 | 54 | #[macro_export] 55 | macro_rules! is_isa_type { 56 | ($self:ident, $isa:ident) => {{ 57 | matches!( 58 | <$self as $crate::isa::InstructionSet>::ID, 59 | <$isa as $crate::isa::InstructionSet>::ID 60 | ) 61 | }}; 62 | } 63 | 64 | #[macro_export] 65 | macro_rules! matches_isa { 66 | ($self:ident, $super:ident $(| $other:ident)*) => {{ 67 | // TODO: inline const 68 | use $crate::isa::InstructionSet; 69 | struct MatchesISA(S); 70 | impl MatchesISA { 71 | const VALUE: bool = { $crate::isa::matches_isa_impl::() $(||$crate::isa::matches_isa_impl::())* }; 72 | } 73 | MatchesISA::<$self>::VALUE 74 | }}; 75 | } 76 | 77 | #[derive(Debug, Clone, Copy)] 78 | pub struct Fallback(()); 79 | 80 | unsafe impl InstructionSet for Fallback { 81 | const ID: InstructionSetTypeId = InstructionSetTypeId::Fallback; 82 | const ARCH: bool = true; 83 | 84 | #[inline(always)] 85 | unsafe fn new() -> Self { 86 | Self(()) 87 | } 88 | 89 | #[inline(always)] 90 | fn is_enabled() -> bool { 91 | true 92 | } 93 | } 94 | 95 | #[allow(unused_macros)] 96 | macro_rules! is_feature_detected { 97 | ($feature:tt) => {{ 98 | #[cfg(target_feature = $feature)] 99 | { 100 | true 101 | } 102 | #[cfg(not(target_feature = $feature))] 103 | { 104 | #[cfg(feature = "detect")] 105 | { 106 | #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] 107 | { 108 | std::arch::is_x86_feature_detected!($feature) 109 | } 110 | #[cfg(target_arch = "arm")] 111 | { 112 | std::arch::is_arm_feature_detected!($feature) 113 | } 114 | #[cfg(target_arch = "aarch64")] 115 | { 116 | std::arch::is_aarch64_feature_detected!($feature) 117 | } 118 | #[cfg(not(any( 119 | target_arch = "x86", 120 | target_arch = "x86_64", 121 | target_arch = "arm", 122 | target_arch = "aarch64" 123 | )))] 124 | { 125 | false 126 | } 127 | } 128 | #[cfg(not(feature = "detect"))] 129 | { 130 | false 131 | } 132 | } 133 | }}; 134 | } 135 | 136 | macro_rules! x86_is_enabled { 137 | ($feature:tt) => {{ 138 | #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] 139 | { 140 | is_feature_detected!($feature) 141 | } 142 | #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))] 143 | { 144 | false 145 | } 146 | }}; 147 | } 148 | 149 | #[derive(Debug, Clone, Copy)] 150 | pub struct SSE2(()); 151 | 152 | unsafe impl InstructionSet for SSE2 { 153 | const ID: InstructionSetTypeId = InstructionSetTypeId::SSE2; 154 | const ARCH: bool = cfg!(any(target_arch = "x86", target_arch = "x86_64")); 155 | 156 | #[inline(always)] 157 | unsafe fn new() -> Self { 158 | Self(()) 159 | } 160 | 161 | #[inline(always)] 162 | fn is_enabled() -> bool { 163 | x86_is_enabled!("sse2") 164 | } 165 | } 166 | 167 | unsafe impl SIMD64 for SSE2 {} 168 | unsafe impl SIMD128 for SSE2 {} 169 | unsafe impl SIMD256 for SSE2 {} 170 | 171 | #[derive(Debug, Clone, Copy)] 172 | pub struct SSSE3(()); 173 | 174 | unsafe impl InstructionSet for SSSE3 { 175 | const ID: InstructionSetTypeId = InstructionSetTypeId::SSSE3; 176 | const ARCH: bool = cfg!(any(target_arch = "x86", target_arch = "x86_64")); 177 | 178 | #[inline(always)] 179 | unsafe fn new() -> Self { 180 | Self(()) 181 | } 182 | 183 | #[inline(always)] 184 | fn is_enabled() -> bool { 185 | x86_is_enabled!("ssse3") 186 | } 187 | } 188 | 189 | unsafe impl SIMD64 for SSSE3 {} 190 | unsafe impl SIMD128 for SSSE3 {} 191 | unsafe impl SIMD256 for SSSE3 {} 192 | 193 | #[derive(Debug, Clone, Copy)] 194 | pub struct SSE41(()); 195 | 196 | unsafe impl InstructionSet for SSE41 { 197 | const ID: InstructionSetTypeId = InstructionSetTypeId::SSE41; 198 | const ARCH: bool = cfg!(any(target_arch = "x86", target_arch = "x86_64")); 199 | 200 | #[inline(always)] 201 | unsafe fn new() -> Self { 202 | Self(()) 203 | } 204 | 205 | #[inline(always)] 206 | fn is_enabled() -> bool { 207 | x86_is_enabled!("sse4.1") 208 | } 209 | } 210 | 211 | unsafe impl SIMD64 for SSE41 {} 212 | unsafe impl SIMD128 for SSE41 {} 213 | unsafe impl SIMD256 for SSE41 {} 214 | 215 | #[derive(Debug, Clone, Copy)] 216 | pub struct AVX2(()); 217 | 218 | unsafe impl InstructionSet for AVX2 { 219 | const ID: InstructionSetTypeId = InstructionSetTypeId::AVX2; 220 | const ARCH: bool = cfg!(any(target_arch = "x86", target_arch = "x86_64")); 221 | 222 | #[inline(always)] 223 | unsafe fn new() -> Self { 224 | Self(()) 225 | } 226 | 227 | #[inline(always)] 228 | fn is_enabled() -> bool { 229 | x86_is_enabled!("avx2") 230 | } 231 | } 232 | 233 | unsafe impl SIMD64 for AVX2 {} 234 | unsafe impl SIMD128 for AVX2 {} 235 | unsafe impl SIMD256 for AVX2 {} 236 | 237 | #[allow(clippy::upper_case_acronyms)] 238 | #[derive(Debug, Clone, Copy)] 239 | pub struct NEON(()); 240 | 241 | unsafe impl InstructionSet for NEON { 242 | const ID: InstructionSetTypeId = InstructionSetTypeId::NEON; 243 | const ARCH: bool = cfg!(any(target_arch = "arm", target_arch = "aarch64")); 244 | 245 | #[inline(always)] 246 | unsafe fn new() -> Self { 247 | Self(()) 248 | } 249 | 250 | #[inline(always)] 251 | fn is_enabled() -> bool { 252 | #[cfg(target_arch = "arm")] 253 | { 254 | #[cfg(feature = "unstable")] 255 | { 256 | is_feature_detected!("neon") 257 | } 258 | #[cfg(not(feature = "unstable"))] 259 | { 260 | false 261 | } 262 | } 263 | #[cfg(target_arch = "aarch64")] 264 | { 265 | is_feature_detected!("neon") 266 | } 267 | #[cfg(not(any(target_arch = "arm", target_arch = "aarch64")))] 268 | { 269 | false 270 | } 271 | } 272 | } 273 | 274 | unsafe impl SIMD64 for NEON {} 275 | unsafe impl SIMD128 for NEON {} 276 | unsafe impl SIMD256 for NEON {} 277 | 278 | #[derive(Debug, Clone, Copy)] 279 | pub struct WASM128(()); 280 | 281 | unsafe impl InstructionSet for WASM128 { 282 | const ID: InstructionSetTypeId = InstructionSetTypeId::WASM128; 283 | const ARCH: bool = cfg!(target_arch = "wasm32"); 284 | 285 | #[inline(always)] 286 | unsafe fn new() -> Self { 287 | Self(()) 288 | } 289 | 290 | #[inline(always)] 291 | fn is_enabled() -> bool { 292 | #[cfg(target_arch = "wasm32")] 293 | { 294 | is_feature_detected!("simd128") 295 | } 296 | #[cfg(not(target_arch = "wasm32"))] 297 | { 298 | false 299 | } 300 | } 301 | } 302 | 303 | unsafe impl SIMD64 for WASM128 {} 304 | unsafe impl SIMD128 for WASM128 {} 305 | unsafe impl SIMD256 for WASM128 {} 306 | -------------------------------------------------------------------------------- /crates/vsimd/src/lib.rs: -------------------------------------------------------------------------------- 1 | //! ⚠️ This crate contains shared implementation details. Do not directly depend on it. 2 | #![cfg_attr(not(any(test, feature = "std")), no_std)] 3 | #![cfg_attr(feature = "unstable", feature(portable_simd), feature(array_chunks))] 4 | #![cfg_attr( 5 | all(feature = "unstable", target_arch = "arm"), 6 | feature(arm_target_feature), 7 | feature(stdarch_arm_feature_detection), 8 | feature(stdarch_arm_neon_intrinsics) 9 | )] 10 | #![cfg_attr(docsrs, feature(doc_cfg))] 11 | #![cfg_attr(test, deny(warnings))] 12 | // 13 | #![deny( 14 | missing_debug_implementations, 15 | missing_docs, 16 | clippy::all, 17 | clippy::pedantic, 18 | clippy::cargo, 19 | clippy::missing_inline_in_public_items 20 | )] 21 | #![warn(clippy::todo)] 22 | #![allow( 23 | clippy::inline_always, 24 | missing_docs, 25 | clippy::missing_safety_doc, 26 | clippy::missing_errors_doc, 27 | clippy::missing_panics_doc, 28 | clippy::cast_possible_truncation, 29 | clippy::cast_sign_loss, 30 | clippy::cast_possible_wrap, 31 | clippy::cast_lossless, 32 | clippy::verbose_bit_mask, 33 | clippy::module_name_repetitions, 34 | clippy::wildcard_imports, 35 | clippy::items_after_statements, 36 | clippy::match_same_arms, 37 | clippy::many_single_char_names, 38 | clippy::let_underscore_untyped 39 | )] 40 | 41 | #[cfg(feature = "alloc")] 42 | extern crate alloc; 43 | 44 | #[macro_use] 45 | mod macros; 46 | 47 | #[macro_use] 48 | pub mod isa; 49 | 50 | pub mod vector; 51 | 52 | #[macro_use] 53 | pub mod pod; 54 | pub use self::pod::POD; 55 | 56 | pub mod simulation; 57 | mod unified; 58 | 59 | mod simd64; 60 | pub use self::simd64::SIMD64; 61 | 62 | mod simd128; 63 | pub use self::simd128::SIMD128; 64 | 65 | #[macro_use] 66 | mod simd256; 67 | pub use self::simd256::SIMD256; 68 | 69 | mod scalable; 70 | pub use self::scalable::Scalable; 71 | 72 | pub mod tools; 73 | 74 | #[macro_use] 75 | pub mod alsw; 76 | 77 | pub mod ascii; 78 | pub mod bswap; 79 | pub mod hex; 80 | pub mod mask; 81 | pub mod native; 82 | pub mod table; 83 | 84 | #[cfg(feature = "unstable")] 85 | pub mod unstable; 86 | -------------------------------------------------------------------------------- /crates/vsimd/src/mask.rs: -------------------------------------------------------------------------------- 1 | use crate::isa::{AVX2, NEON, SSE2, WASM128}; 2 | use crate::vector::{V128, V256}; 3 | use crate::{SIMD128, SIMD256}; 4 | 5 | use core::ops::Not; 6 | 7 | #[inline(always)] 8 | pub fn mask8x16_all(s: S, x: V128) -> bool { 9 | if matches_isa!(S, SSE2 | WASM128) { 10 | return s.u8x16_bitmask(x) == u16::MAX; 11 | } 12 | if matches_isa!(S, NEON) { 13 | if cfg!(target_arch = "arm") { 14 | return s.u8x16_any_zero(x).not(); 15 | } 16 | if cfg!(target_arch = "aarch64") { 17 | return s.u8x16_reduce_min(x) != 0; 18 | } 19 | } 20 | unreachable!() 21 | } 22 | 23 | #[inline(always)] 24 | pub fn mask8x32_all(s: S, x: V256) -> bool { 25 | if matches_isa!(S, AVX2) { 26 | return s.u8x32_bitmask(x) == u32::MAX; 27 | } 28 | if matches_isa!(S, SSE2 | WASM128 | NEON) { 29 | let x = x.to_v128x2(); 30 | let x = s.v128_and(x.0, x.1); 31 | return mask8x16_all(s, x); 32 | } 33 | unreachable!() 34 | } 35 | 36 | #[inline(always)] 37 | pub fn mask8x16_any(s: S, x: V128) -> bool { 38 | if matches_isa!(S, SSE2 | WASM128) { 39 | return s.u8x16_bitmask(x) != 0; 40 | } 41 | if matches_isa!(S, NEON) { 42 | return s.v128_all_zero(x).not(); 43 | } 44 | unreachable!() 45 | } 46 | 47 | #[inline(always)] 48 | pub fn mask8x32_any(s: S, x: V256) -> bool { 49 | if matches_isa!(S, AVX2) { 50 | return s.u8x32_bitmask(x) != 0; 51 | } 52 | if matches_isa!(S, SSE2 | WASM128 | NEON) { 53 | let x = x.to_v128x2(); 54 | let x = s.v128_or(x.0, x.1); 55 | return mask8x16_any(s, x); 56 | } 57 | unreachable!() 58 | } 59 | 60 | #[inline(always)] 61 | pub fn u8x16_highbit_all(s: S, x: V128) -> bool { 62 | if matches_isa!(S, SSE2 | WASM128) { 63 | return s.u8x16_bitmask(x) == u16::MAX; 64 | } 65 | if matches_isa!(S, NEON) { 66 | if cfg!(target_arch = "arm") { 67 | return mask8x16_all(s, s.i8x16_lt(x, s.v128_create_zero())); 68 | } 69 | if cfg!(target_arch = "aarch64") { 70 | return s.u8x16_reduce_min(x) >= 0x80; 71 | } 72 | } 73 | unreachable!() 74 | } 75 | 76 | #[inline(always)] 77 | pub fn u8x32_highbit_all(s: S, x: V256) -> bool { 78 | if matches_isa!(S, AVX2) { 79 | return s.u8x32_bitmask(x) == u32::MAX; 80 | } 81 | if matches_isa!(S, SSE2 | WASM128 | NEON) { 82 | let x = x.to_v128x2(); 83 | let x = s.v128_and(x.0, x.1); 84 | return u8x16_highbit_all(s, x); 85 | } 86 | unreachable!() 87 | } 88 | 89 | #[inline(always)] 90 | pub fn u8x16_highbit_any(s: S, x: V128) -> bool { 91 | if matches_isa!(S, SSE2 | WASM128) { 92 | return s.u8x16_bitmask(x) != 0; 93 | } 94 | if matches_isa!(S, NEON) { 95 | if cfg!(target_arch = "arm") { 96 | return mask8x16_any(s, s.i8x16_lt(x, s.v128_create_zero())); 97 | } 98 | if cfg!(target_arch = "aarch64") { 99 | return s.u8x16_reduce_max(x) >= 0x80; 100 | } 101 | } 102 | unreachable!() 103 | } 104 | 105 | #[inline(always)] 106 | pub fn u8x32_highbit_any(s: S, x: V256) -> bool { 107 | if matches_isa!(S, AVX2) { 108 | return s.u8x32_bitmask(x) != 0; 109 | } 110 | if matches_isa!(S, SSE2 | WASM128 | NEON) { 111 | let x = x.to_v128x2(); 112 | let x = s.v128_or(x.0, x.1); 113 | return u8x16_highbit_any(s, x); 114 | } 115 | unreachable!() 116 | } 117 | -------------------------------------------------------------------------------- /crates/vsimd/src/native.rs: -------------------------------------------------------------------------------- 1 | #[derive(Debug, Clone, Copy)] 2 | pub struct Native(Arch); 3 | 4 | #[derive(Debug, Clone, Copy)] 5 | enum Arch { 6 | #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] 7 | Avx2, 8 | 9 | #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] 10 | Sse41, 11 | 12 | #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] 13 | Sse2, 14 | 15 | #[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))] 16 | Neon, 17 | 18 | #[cfg(target_arch = "wasm32")] 19 | Simd128, 20 | 21 | Fallback, 22 | } 23 | 24 | impl Native { 25 | #[inline] 26 | #[must_use] 27 | pub fn detect() -> Self { 28 | #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] 29 | { 30 | if is_feature_detected!("avx2") { 31 | return Self(Arch::Avx2); 32 | } 33 | 34 | if is_feature_detected!("sse4.1") { 35 | return Self(Arch::Sse41); 36 | } 37 | 38 | if is_feature_detected!("sse2") { 39 | return Self(Arch::Sse2); 40 | } 41 | } 42 | #[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))] 43 | { 44 | if is_feature_detected!("neon") { 45 | return Self(Arch::Neon); 46 | } 47 | } 48 | #[cfg(target_arch = "wasm32")] 49 | { 50 | if is_feature_detected!("simd128") { 51 | return Self(Arch::Simd128); 52 | } 53 | } 54 | Self(Arch::Fallback) 55 | } 56 | 57 | #[inline] 58 | pub fn exec(self, f: F) -> O 59 | where 60 | F: FnOnce() -> O, 61 | { 62 | #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] 63 | { 64 | match self.0 { 65 | Arch::Avx2 => unsafe { x86::avx2(f) }, 66 | Arch::Sse41 => unsafe { x86::sse41(f) }, 67 | Arch::Sse2 => unsafe { x86::sse2(f) }, 68 | Arch::Fallback => f(), 69 | } 70 | } 71 | #[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))] 72 | { 73 | match self.0 { 74 | Arch::Neon => unsafe { arm::neon(f) }, 75 | Arch::Fallback => f(), 76 | } 77 | } 78 | #[cfg(target_arch = "wasm32")] 79 | { 80 | match self.0 { 81 | Arch::Simd128 => unsafe { wasm::simd128(f) }, 82 | Arch::Fallback => f(), 83 | } 84 | } 85 | #[cfg(not(any( // 86 | any(target_arch = "x86", target_arch = "x86_64"), // 87 | any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"), // 88 | target_arch = "wasm32" // 89 | )))] 90 | { 91 | f() 92 | } 93 | } 94 | } 95 | 96 | #[allow(unused_macros)] 97 | macro_rules! generic_dispatch { 98 | ($name: ident, $feature: tt) => { 99 | #[inline] 100 | #[target_feature(enable = $feature)] 101 | pub unsafe fn $name(f: F) -> O 102 | where 103 | F: FnOnce() -> O, 104 | { 105 | f() 106 | } 107 | }; 108 | } 109 | 110 | #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] 111 | mod x86 { 112 | generic_dispatch!(avx2, "avx2"); 113 | generic_dispatch!(sse41, "sse4.1"); 114 | generic_dispatch!(sse2, "sse2"); 115 | } 116 | 117 | #[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))] 118 | mod arm { 119 | generic_dispatch!(neon, "neon"); 120 | } 121 | 122 | #[cfg(target_arch = "wasm32")] 123 | mod wasm { 124 | generic_dispatch!(simd128, "simd128"); 125 | } 126 | -------------------------------------------------------------------------------- /crates/vsimd/src/pod.rs: -------------------------------------------------------------------------------- 1 | use crate::vector::{V128, V256, V512, V64}; 2 | 3 | pub unsafe trait POD: Copy + 'static { 4 | const ID: PodTypeId; 5 | } 6 | 7 | macro_rules! mark_pod { 8 | ($($ty:ident),*) => { 9 | $( 10 | unsafe impl POD for $ty { 11 | const ID: PodTypeId = PodTypeId::$ty; 12 | } 13 | )* 14 | }; 15 | } 16 | 17 | mark_pod!(u8, u16, u32, u64, u128, usize); 18 | mark_pod!(i8, i16, i32, i64, i128, isize); 19 | mark_pod!(f32, f64); 20 | mark_pod!(V64, V128, V256, V512); 21 | 22 | #[inline(always)] 23 | pub fn align(slice: &[T]) -> (&[T], &[U], &[T]) { 24 | unsafe { slice.align_to() } 25 | } 26 | 27 | #[allow(non_camel_case_types)] 28 | #[derive(Debug, Clone, Copy)] 29 | pub enum PodTypeId { 30 | u8, 31 | u16, 32 | u32, 33 | u64, 34 | u128, 35 | usize, 36 | 37 | i8, 38 | i16, 39 | i32, 40 | i64, 41 | i128, 42 | isize, 43 | 44 | f32, 45 | f64, 46 | 47 | V64, 48 | V128, 49 | V256, 50 | V512, 51 | } 52 | 53 | #[macro_export] 54 | macro_rules! is_pod_type { 55 | ($self:ident, $x:ident $(| $xs:ident)*) => {{ 56 | // TODO: inline const 57 | use $crate::pod::POD; 58 | struct IsPodType(T); 59 | impl IsPodType { 60 | const VALUE: bool = { matches!(::ID, $crate::pod::PodTypeId::$x $(| $crate::pod::PodTypeId::$xs)*) }; 61 | } 62 | IsPodType::<$self>::VALUE 63 | }}; 64 | } 65 | -------------------------------------------------------------------------------- /crates/vsimd/src/scalable.rs: -------------------------------------------------------------------------------- 1 | use crate::isa::InstructionSet; 2 | use crate::pod::POD; 3 | use crate::vector::{V128, V256}; 4 | use crate::{mask::*, unified}; 5 | use crate::{SIMD128, SIMD256}; 6 | 7 | pub unsafe trait Scalable: InstructionSet { 8 | #[inline(always)] 9 | fn and(self, a: V, b: V) -> V { 10 | unified::and(self, a, b) 11 | } 12 | 13 | #[inline(always)] 14 | fn or(self, a: V, b: V) -> V { 15 | unified::or(self, a, b) 16 | } 17 | 18 | #[inline(always)] 19 | fn xor(self, a: V, b: V) -> V { 20 | unified::xor(self, a, b) 21 | } 22 | 23 | #[inline(always)] 24 | fn andnot(self, a: V, b: V) -> V { 25 | unified::andnot(self, a, b) 26 | } 27 | 28 | #[inline(always)] 29 | fn u8xn_splat(self, x: u8) -> V { 30 | unified::splat::<_, u8, _>(self, x) 31 | } 32 | 33 | #[inline(always)] 34 | fn i8xn_splat(self, x: i8) -> V { 35 | unified::splat::<_, i8, _>(self, x) 36 | } 37 | 38 | #[inline(always)] 39 | fn u32xn_splat(self, x: u32) -> V { 40 | unified::splat::<_, u32, _>(self, x) 41 | } 42 | 43 | #[inline(always)] 44 | fn u8xn_add(self, a: V, b: V) -> V { 45 | unified::add::<_, u8, _>(self, a, b) 46 | } 47 | 48 | #[inline(always)] 49 | fn u8xn_sub(self, a: V, b: V) -> V { 50 | unified::sub::<_, u8, _>(self, a, b) 51 | } 52 | 53 | #[inline(always)] 54 | fn u32xn_sub(self, a: V, b: V) -> V { 55 | unified::sub::<_, u32, _>(self, a, b) 56 | } 57 | 58 | #[inline(always)] 59 | fn u8xn_add_sat(self, a: V, b: V) -> V { 60 | unified::add_sat::<_, u8, _>(self, a, b) 61 | } 62 | 63 | #[inline(always)] 64 | fn i8xn_add_sat(self, a: V, b: V) -> V { 65 | unified::add_sat::<_, i8, _>(self, a, b) 66 | } 67 | 68 | #[inline(always)] 69 | fn u8xn_sub_sat(self, a: V, b: V) -> V { 70 | unified::sub_sat::<_, u8, _>(self, a, b) 71 | } 72 | 73 | #[inline(always)] 74 | fn u8xn_eq(self, a: V, b: V) -> V { 75 | unified::eq::<_, u8, _>(self, a, b) 76 | } 77 | 78 | #[inline(always)] 79 | fn i8xn_lt(self, a: V, b: V) -> V { 80 | unified::lt::<_, i8, _>(self, a, b) 81 | } 82 | 83 | #[inline(always)] 84 | fn u32xn_lt(self, a: V, b: V) -> V { 85 | unified::lt::<_, u32, _>(self, a, b) 86 | } 87 | 88 | #[inline(always)] 89 | fn u32xn_max(self, a: V, b: V) -> V { 90 | unified::max::<_, u32, _>(self, a, b) 91 | } 92 | 93 | fn u16xn_shl(self, a: V) -> V; 94 | 95 | fn u16xn_shr(self, a: V) -> V; 96 | fn u32xn_shr(self, a: V) -> V; 97 | 98 | fn u8xn_avgr(self, a: V, b: V) -> V; 99 | 100 | fn u8x16xn_swizzle(self, a: V, b: V) -> V; 101 | 102 | fn all_zero(self, a: V) -> bool; 103 | 104 | fn mask8xn_all(self, a: V) -> bool; 105 | fn mask8xn_any(self, a: V) -> bool; 106 | 107 | fn u8xn_highbit_all(self, a: V) -> bool; 108 | fn u8xn_highbit_any(self, a: V) -> bool; 109 | 110 | fn u16xn_bswap(self, a: V) -> V; 111 | fn u32xn_bswap(self, a: V) -> V; 112 | fn u64xn_bswap(self, a: V) -> V; 113 | } 114 | 115 | unsafe impl Scalable for S 116 | where 117 | S: SIMD128, 118 | { 119 | #[inline(always)] 120 | fn u16xn_shl(self, a: V128) -> V128 { 121 | self.u16x8_shl::(a) 122 | } 123 | 124 | #[inline(always)] 125 | fn u16xn_shr(self, a: V128) -> V128 { 126 | self.u16x8_shr::(a) 127 | } 128 | 129 | #[inline(always)] 130 | fn u32xn_shr(self, a: V128) -> V128 { 131 | self.u32x4_shr::(a) 132 | } 133 | 134 | #[inline(always)] 135 | fn u8xn_avgr(self, a: V128, b: V128) -> V128 { 136 | self.u8x16_avgr(a, b) 137 | } 138 | 139 | #[inline(always)] 140 | fn u8x16xn_swizzle(self, a: V128, b: V128) -> V128 { 141 | self.u8x16_swizzle(a, b) 142 | } 143 | 144 | #[inline(always)] 145 | fn all_zero(self, a: V128) -> bool { 146 | self.v128_all_zero(a) 147 | } 148 | 149 | #[inline(always)] 150 | fn mask8xn_all(self, a: V128) -> bool { 151 | mask8x16_all(self, a) 152 | } 153 | 154 | #[inline(always)] 155 | fn mask8xn_any(self, a: V128) -> bool { 156 | mask8x16_any(self, a) 157 | } 158 | 159 | #[inline(always)] 160 | fn u8xn_highbit_all(self, a: V128) -> bool { 161 | u8x16_highbit_all(self, a) 162 | } 163 | 164 | #[inline(always)] 165 | fn u8xn_highbit_any(self, a: V128) -> bool { 166 | u8x16_highbit_any(self, a) 167 | } 168 | 169 | #[inline(always)] 170 | fn u16xn_bswap(self, a: V128) -> V128 { 171 | self.u16x8_bswap(a) 172 | } 173 | 174 | #[inline(always)] 175 | fn u32xn_bswap(self, a: V128) -> V128 { 176 | self.u32x4_bswap(a) 177 | } 178 | 179 | #[inline(always)] 180 | fn u64xn_bswap(self, a: V128) -> V128 { 181 | self.u64x2_bswap(a) 182 | } 183 | } 184 | 185 | unsafe impl Scalable for S 186 | where 187 | S: SIMD256, 188 | { 189 | #[inline(always)] 190 | fn u16xn_shl(self, a: V256) -> V256 { 191 | self.u16x16_shl::(a) 192 | } 193 | 194 | #[inline(always)] 195 | fn u16xn_shr(self, a: V256) -> V256 { 196 | self.u16x16_shr::(a) 197 | } 198 | 199 | #[inline(always)] 200 | fn u32xn_shr(self, a: V256) -> V256 { 201 | self.u32x8_shr::(a) 202 | } 203 | 204 | #[inline(always)] 205 | fn u8xn_avgr(self, a: V256, b: V256) -> V256 { 206 | self.u8x32_avgr(a, b) 207 | } 208 | 209 | #[inline(always)] 210 | fn u8x16xn_swizzle(self, a: V256, b: V256) -> V256 { 211 | self.u8x16x2_swizzle(a, b) 212 | } 213 | 214 | #[inline(always)] 215 | fn all_zero(self, a: V256) -> bool { 216 | self.v256_all_zero(a) 217 | } 218 | 219 | #[inline(always)] 220 | fn mask8xn_all(self, a: V256) -> bool { 221 | mask8x32_all(self, a) 222 | } 223 | 224 | #[inline(always)] 225 | fn mask8xn_any(self, a: V256) -> bool { 226 | mask8x32_any(self, a) 227 | } 228 | 229 | #[inline(always)] 230 | fn u8xn_highbit_all(self, a: V256) -> bool { 231 | u8x32_highbit_all(self, a) 232 | } 233 | 234 | #[inline(always)] 235 | fn u8xn_highbit_any(self, a: V256) -> bool { 236 | u8x32_highbit_any(self, a) 237 | } 238 | 239 | #[inline(always)] 240 | fn u16xn_bswap(self, a: V256) -> V256 { 241 | self.u16x16_bswap(a) 242 | } 243 | 244 | #[inline(always)] 245 | fn u32xn_bswap(self, a: V256) -> V256 { 246 | self.u32x8_bswap(a) 247 | } 248 | 249 | #[inline(always)] 250 | fn u64xn_bswap(self, a: V256) -> V256 { 251 | self.u64x4_bswap(a) 252 | } 253 | } 254 | -------------------------------------------------------------------------------- /crates/vsimd/src/simd64.rs: -------------------------------------------------------------------------------- 1 | #![allow(clippy::missing_transmute_annotations)] 2 | 3 | use crate::isa::InstructionSet; 4 | use crate::vector::V64; 5 | 6 | #[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))] 7 | use crate::isa::NEON; 8 | 9 | #[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))] 10 | use core::mem::transmute as t; 11 | 12 | #[cfg(all(feature = "unstable", target_arch = "arm"))] 13 | use core::arch::arm::*; 14 | 15 | #[cfg(target_arch = "aarch64")] 16 | use core::arch::aarch64::*; 17 | 18 | pub unsafe trait SIMD64: InstructionSet { 19 | #[inline(always)] 20 | #[must_use] 21 | fn u8x8_unzip_even(self, a: V64, b: V64) -> V64 { 22 | #[cfg(all(feature = "unstable", target_arch = "arm"))] 23 | if matches_isa!(Self, NEON) { 24 | return unsafe { t(vuzp_u8(t(a), t(b)).0) }; 25 | } 26 | #[cfg(target_arch = "aarch64")] 27 | if matches_isa!(Self, NEON) { 28 | return unsafe { t(vuzp1_u8(t(a), t(b))) }; 29 | } 30 | { 31 | let _ = (a, b); 32 | unreachable!() 33 | } 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /crates/vsimd/src/simulation.rs: -------------------------------------------------------------------------------- 1 | use crate::vector::V128; 2 | 3 | use core::mem::transmute as t; 4 | 5 | #[cfg(miri)] 6 | use core::cmp::{max, min}; 7 | 8 | // TODO: waiting for MIRI's support 9 | 10 | #[cfg(miri)] 11 | #[inline(always)] 12 | pub fn u8x16_max(a: V128, b: V128) -> V128 { 13 | let (a, b) = (a.as_bytes(), b.as_bytes()); 14 | let mut c = [0; 16]; 15 | for i in 0..16 { 16 | c[i] = max(a[i], b[i]); 17 | } 18 | V128::from_bytes(c) 19 | } 20 | 21 | #[cfg(miri)] 22 | #[inline(always)] 23 | pub fn u8x16_min(a: V128, b: V128) -> V128 { 24 | let (a, b) = (a.as_bytes(), b.as_bytes()); 25 | let mut c = [0; 16]; 26 | for i in 0..16 { 27 | c[i] = min(a[i], b[i]); 28 | } 29 | V128::from_bytes(c) 30 | } 31 | 32 | #[allow(clippy::needless_range_loop)] 33 | #[inline(always)] 34 | #[must_use] 35 | pub fn u8x16_bitmask(a: V128) -> u16 { 36 | // FIXME: is it defined behavior? 37 | // https://github.com/rust-lang/miri/issues/2617 38 | // https://github.com/rust-lang/stdarch/issues/1347 39 | 40 | let a = a.as_bytes(); 41 | let mut m: u16 = 0; 42 | for i in 0..16 { 43 | m |= ((a[i] >> 7) as u16) << i; 44 | } 45 | m 46 | } 47 | 48 | #[allow(clippy::needless_range_loop)] 49 | #[inline(always)] 50 | #[must_use] 51 | pub fn u16x8_shr(a: V128, imm8: u8) -> V128 { 52 | let mut a: [u16; 8] = unsafe { t(a) }; 53 | for i in 0..8 { 54 | a[i] >>= imm8; 55 | } 56 | unsafe { t(a) } 57 | } 58 | 59 | #[allow(clippy::needless_range_loop)] 60 | #[inline(always)] 61 | #[must_use] 62 | pub fn u16x8_shl(a: V128, imm8: u8) -> V128 { 63 | let mut a: [u16; 8] = unsafe { t(a) }; 64 | for i in 0..8 { 65 | a[i] <<= imm8; 66 | } 67 | unsafe { t(a) } 68 | } 69 | 70 | #[inline(always)] 71 | #[must_use] 72 | pub fn i16x8_packus(a: V128, b: V128) -> V128 { 73 | let a: [i16; 8] = unsafe { t(a) }; 74 | let b: [i16; 8] = unsafe { t(b) }; 75 | let sat_u8 = |x: i16| { 76 | if x < 0 { 77 | 0 78 | } else if x > 255 { 79 | 255 80 | } else { 81 | x as u8 82 | } 83 | }; 84 | let mut c: [u8; 16] = [0; 16]; 85 | for i in 0..8 { 86 | c[i] = sat_u8(a[i]); 87 | c[i + 8] = sat_u8(b[i]); 88 | } 89 | V128::from_bytes(c) 90 | } 91 | -------------------------------------------------------------------------------- /crates/vsimd/src/table.rs: -------------------------------------------------------------------------------- 1 | use crate::isa::{NEON, SSSE3, WASM128}; 2 | use crate::pod::POD; 3 | use crate::Scalable; 4 | 5 | #[inline(always)] 6 | pub fn u8x16xn_lookup(s: S, lut: V, x: V) -> V 7 | where 8 | S: Scalable, 9 | V: POD, 10 | { 11 | if matches_isa!(S, SSSE3) { 12 | return s.u8x16xn_swizzle(lut, x); 13 | } 14 | 15 | if matches_isa!(S, NEON | WASM128) { 16 | let idx = s.and(x, s.u8xn_splat(0x8f)); 17 | return s.u8x16xn_swizzle(lut, idx); 18 | } 19 | 20 | unreachable!() 21 | } 22 | -------------------------------------------------------------------------------- /crates/vsimd/src/tools.rs: -------------------------------------------------------------------------------- 1 | #[cfg(feature = "alloc")] 2 | item_group! { 3 | use core::mem::MaybeUninit; 4 | use alloc::boxed::Box; 5 | } 6 | 7 | /// Allocates uninit bytes 8 | /// 9 | /// # Safety 10 | /// This function requires: 11 | /// 12 | /// + `len > 0` 13 | /// + `len <= isize::MAX` 14 | /// 15 | #[cfg(feature = "alloc")] 16 | #[inline] 17 | #[must_use] 18 | pub unsafe fn alloc_uninit_bytes(len: usize) -> Box<[MaybeUninit]> { 19 | #[allow(clippy::checked_conversions)] 20 | #[cfg(any(debug_assertions, miri))] 21 | { 22 | assert!(len > 0 && len <= (isize::MAX as usize)); 23 | } 24 | use alloc::alloc::{alloc, handle_alloc_error, Layout}; 25 | let layout = Layout::from_size_align_unchecked(len, 1); 26 | let p = alloc(layout); 27 | if p.is_null() { 28 | handle_alloc_error(layout) 29 | } 30 | let ptr = p.cast(); 31 | Box::from_raw(core::ptr::slice_from_raw_parts_mut(ptr, len)) 32 | } 33 | 34 | #[cfg(feature = "alloc")] 35 | #[inline] 36 | #[must_use] 37 | pub unsafe fn assume_init(b: Box<[MaybeUninit]>) -> Box<[u8]> { 38 | let len = b.len(); 39 | let ptr = Box::into_raw(b).cast::(); 40 | Box::from_raw(core::ptr::slice_from_raw_parts_mut(ptr, len)) 41 | } 42 | 43 | #[inline(always)] 44 | pub unsafe fn read(base: *const T, offset: usize) -> T { 45 | base.add(offset).read() 46 | } 47 | 48 | #[inline(always)] 49 | pub unsafe fn write(base: *mut T, offset: usize, value: T) { 50 | base.add(offset).write(value); 51 | } 52 | 53 | #[inline(always)] 54 | pub unsafe fn slice<'a, T>(data: *const T, len: usize) -> &'a [T] { 55 | core::slice::from_raw_parts(data, len) 56 | } 57 | 58 | #[inline(always)] 59 | pub unsafe fn slice_mut<'a, T>(data: *mut T, len: usize) -> &'a mut [T] { 60 | core::slice::from_raw_parts_mut(data, len) 61 | } 62 | 63 | #[inline(always)] 64 | pub fn unroll(slice: &[T], chunk_size: usize, mut f: impl FnMut(&T)) { 65 | let mut iter = slice.chunks_exact(chunk_size); 66 | for chunk in &mut iter { 67 | chunk.iter().for_each(&mut f); 68 | } 69 | iter.remainder().iter().for_each(&mut f); 70 | } 71 | 72 | #[inline(always)] 73 | #[must_use] 74 | pub fn is_same_type() -> bool 75 | where 76 | A: 'static, 77 | B: 'static, 78 | { 79 | use core::any::TypeId; 80 | TypeId::of::() == TypeId::of::() 81 | } 82 | 83 | #[inline(always)] 84 | pub fn slice_parts(slice: &[T]) -> (*const T, usize) { 85 | let len = slice.len(); 86 | let ptr = slice.as_ptr(); 87 | (ptr, len) 88 | } 89 | 90 | #[cfg(feature = "alloc")] 91 | #[inline(always)] 92 | #[must_use] 93 | pub unsafe fn boxed_str(b: Box<[u8]>) -> Box { 94 | let ptr = Box::into_raw(b); 95 | Box::from_raw(core::str::from_utf8_unchecked_mut(&mut *ptr)) 96 | } 97 | 98 | #[allow(clippy::ptr_as_ptr)] 99 | #[inline(always)] 100 | #[cfg_attr(debug_assertions, track_caller)] 101 | pub unsafe fn transmute_copy(a: &A) -> B { 102 | debug_assert!(core::mem::size_of::() == core::mem::size_of::()); 103 | *(a as *const A as *const B) 104 | } 105 | 106 | #[cfg(feature = "std")] 107 | #[inline] 108 | pub fn print_fn_table(is_primary: impl Fn(u8) -> bool, f: impl Fn(u8) -> u8) { 109 | print!(" 0 1 2 3 4 5 6 7 8 9 A B C D E F"); 110 | for c in 0..=255u8 { 111 | let val = f(c); 112 | 113 | if c & 0x0f == 0 { 114 | println!(); 115 | print!("{:x} | ", c >> 4); 116 | } 117 | 118 | if is_primary(c) { 119 | print!("\x1b[1;31m{val:0>2X}\x1b[0m "); 120 | } else if val >= 0x80 { 121 | print!("\x1b[1;36m{val:0>2X}\x1b[0m "); 122 | } else { 123 | print!("\x1b[1;32m{val:0>2X}\x1b[0m "); 124 | } 125 | } 126 | println!(); 127 | println!(); 128 | } 129 | -------------------------------------------------------------------------------- /crates/vsimd/src/unstable.rs: -------------------------------------------------------------------------------- 1 | use core::simd::*; 2 | 3 | #[inline(always)] 4 | pub fn splat(x: T) -> Simd 5 | where 6 | T: SimdElement, 7 | LaneCount: SupportedLaneCount, 8 | { 9 | Simd::splat(x) 10 | } 11 | -------------------------------------------------------------------------------- /crates/vsimd/src/vector.rs: -------------------------------------------------------------------------------- 1 | #![allow(dead_code)] // FIXME: temp workaround 2 | 3 | use core::mem::transmute; 4 | 5 | // vectors should have `repr(simd)` if possible. 6 | 7 | #[cfg(feature = "unstable")] 8 | item_group! { 9 | use core::simd::{u8x16, u8x32, u8x64, u8x8}; 10 | 11 | #[derive(Debug, Clone, Copy)] 12 | #[repr(transparent)] 13 | pub struct V64(u8x8); 14 | 15 | #[derive(Debug, Clone, Copy)] 16 | #[repr(transparent)] 17 | pub struct V128(u8x16); 18 | 19 | #[derive(Debug, Clone, Copy)] 20 | #[repr(transparent)] 21 | pub struct V256(u8x32); 22 | 23 | #[derive(Debug, Clone, Copy)] 24 | #[repr(transparent)] 25 | pub struct V512(u8x64); 26 | } 27 | 28 | #[cfg(all(not(feature = "unstable"), any(target_arch = "x86", target_arch = "x86_64")))] 29 | item_group! { 30 | #[cfg(target_arch = "x86")] 31 | use core::arch::x86::*; 32 | 33 | #[cfg(target_arch = "x86_64")] 34 | use core::arch::x86_64::*; 35 | 36 | #[derive(Debug, Clone, Copy)] 37 | #[repr(transparent)] 38 | pub struct V64(u64); 39 | 40 | #[derive(Debug, Clone, Copy)] 41 | #[repr(transparent)] 42 | pub struct V128(__m128i); 43 | 44 | #[derive(Debug, Clone, Copy)] 45 | #[repr(transparent)] 46 | pub struct V256(__m256i); 47 | 48 | #[derive(Debug, Clone, Copy)] 49 | #[repr(C, align(64))] 50 | pub struct V512(__m256i, __m256i); 51 | } 52 | 53 | #[cfg(all(not(feature = "unstable"), target_arch = "aarch64"))] 54 | item_group! { 55 | use core::arch::aarch64::*; 56 | 57 | #[derive(Debug, Clone, Copy)] 58 | #[repr(transparent)] 59 | pub struct V64(uint8x8_t); 60 | 61 | #[derive(Debug, Clone, Copy)] 62 | #[repr(transparent)] 63 | pub struct V128(uint8x16_t); 64 | 65 | #[derive(Debug, Clone, Copy)] 66 | #[repr(transparent)] 67 | pub struct V256(uint8x16x2_t); 68 | 69 | #[derive(Debug, Clone, Copy)] 70 | #[repr(transparent)] 71 | pub struct V512(uint8x16x4_t); 72 | } 73 | 74 | #[cfg(all(not(feature = "unstable"), target_arch = "wasm32"))] 75 | item_group! { 76 | #[cfg(target_arch = "wasm32")] 77 | use core::arch::wasm32::*; 78 | 79 | #[derive(Debug, Clone, Copy)] 80 | #[repr(transparent)] 81 | pub struct V64(u64); 82 | 83 | #[derive(Debug, Clone, Copy)] 84 | #[repr(transparent)] 85 | pub struct V128(v128); 86 | 87 | #[derive(Debug, Clone, Copy)] 88 | #[repr(C, align(32))] 89 | pub struct V256(v128, v128); 90 | 91 | #[derive(Debug, Clone, Copy)] 92 | #[repr(C, align(64))] 93 | pub struct V512(v128, v128, v128, v128); 94 | } 95 | 96 | #[cfg(all( 97 | not(feature = "unstable"), 98 | not(any( 99 | any(target_arch = "x86", target_arch = "x86_64"), 100 | target_arch = "aarch64", 101 | target_arch = "wasm32" 102 | )) 103 | ))] 104 | item_group! { 105 | #[derive(Debug, Clone, Copy)] 106 | #[repr(C, align(8))] 107 | pub struct V64([u8; 8]); 108 | 109 | #[derive(Debug, Clone, Copy)] 110 | #[repr(C, align(16))] 111 | pub struct V128([u8; 16]); 112 | 113 | #[derive(Debug, Clone, Copy)] 114 | #[repr(C, align(32))] 115 | pub struct V256([u8; 32]); 116 | 117 | #[derive(Debug, Clone, Copy)] 118 | #[repr(C, align(64))] 119 | pub struct V512([u8; 64]); 120 | } 121 | 122 | impl V64 { 123 | #[inline(always)] 124 | #[must_use] 125 | pub const fn from_bytes(bytes: [u8; 8]) -> Self { 126 | unsafe { transmute(bytes) } 127 | } 128 | 129 | #[inline(always)] 130 | #[must_use] 131 | pub const fn as_bytes(&self) -> &[u8; 8] { 132 | unsafe { transmute(self) } 133 | } 134 | 135 | #[inline(always)] 136 | #[must_use] 137 | pub fn to_u64(self) -> u64 { 138 | unsafe { transmute(self) } 139 | } 140 | } 141 | 142 | impl V128 { 143 | #[inline(always)] 144 | #[must_use] 145 | pub const fn from_bytes(bytes: [u8; 16]) -> Self { 146 | unsafe { transmute(bytes) } 147 | } 148 | 149 | #[inline(always)] 150 | #[must_use] 151 | pub const fn as_bytes(&self) -> &[u8; 16] { 152 | unsafe { transmute(self) } 153 | } 154 | 155 | #[inline(always)] 156 | #[must_use] 157 | pub const fn to_v64x2(self) -> (V64, V64) { 158 | let x: [V64; 2] = unsafe { transmute(self) }; 159 | (x[0], x[1]) 160 | } 161 | 162 | #[inline(always)] 163 | #[must_use] 164 | pub const fn x2(self) -> V256 { 165 | unsafe { transmute([self, self]) } 166 | } 167 | } 168 | 169 | impl V256 { 170 | #[inline(always)] 171 | #[must_use] 172 | pub const fn from_bytes(bytes: [u8; 32]) -> Self { 173 | unsafe { transmute(bytes) } 174 | } 175 | 176 | #[inline(always)] 177 | #[must_use] 178 | pub const fn as_bytes(&self) -> &[u8; 32] { 179 | unsafe { transmute(self) } 180 | } 181 | 182 | #[inline(always)] 183 | #[must_use] 184 | pub const fn from_v128x2(x: (V128, V128)) -> Self { 185 | unsafe { transmute([x.0, x.1]) } 186 | } 187 | 188 | #[inline(always)] 189 | #[must_use] 190 | pub const fn to_v128x2(self) -> (V128, V128) { 191 | let x: [V128; 2] = unsafe { transmute(self) }; 192 | (x[0], x[1]) 193 | } 194 | 195 | #[inline(always)] 196 | #[must_use] 197 | pub const fn double_bytes(bytes: [u8; 16]) -> Self { 198 | unsafe { transmute([bytes, bytes]) } 199 | } 200 | 201 | #[inline(always)] 202 | #[must_use] 203 | pub const fn x2(self) -> V512 { 204 | unsafe { transmute([self, self]) } 205 | } 206 | } 207 | 208 | impl V512 { 209 | #[inline(always)] 210 | #[must_use] 211 | pub const fn from_bytes(bytes: [u8; 64]) -> Self { 212 | unsafe { transmute(bytes) } 213 | } 214 | 215 | #[inline(always)] 216 | #[must_use] 217 | pub const fn as_bytes(&self) -> &[u8; 64] { 218 | unsafe { transmute(self) } 219 | } 220 | 221 | #[inline(always)] 222 | #[must_use] 223 | pub const fn from_v256x2(x: (V256, V256)) -> Self { 224 | unsafe { transmute([x.0, x.1]) } 225 | } 226 | 227 | #[inline(always)] 228 | #[must_use] 229 | pub const fn to_v256x2(self) -> (V256, V256) { 230 | let x: [V256; 2] = unsafe { transmute(self) }; 231 | (x[0], x[1]) 232 | } 233 | 234 | #[inline(always)] 235 | #[must_use] 236 | pub const fn double_bytes(bytes: [u8; 32]) -> Self { 237 | unsafe { transmute([bytes, bytes]) } 238 | } 239 | } 240 | -------------------------------------------------------------------------------- /crates/vsimd/tests/it.rs: -------------------------------------------------------------------------------- 1 | use vsimd::isa::detect; 2 | use vsimd::isa::{NEON, SSE2, WASM128}; 3 | use vsimd::vector::V128; 4 | use vsimd::SIMD128; 5 | 6 | use const_str::hex; 7 | 8 | #[cfg(not(miri))] 9 | #[cfg_attr(not(target_arch = "wasm32"), test)] 10 | #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)] 11 | fn native_sum() { 12 | use vsimd::native::Native; 13 | 14 | let x: u32 = rand::random::() / 2; 15 | let y: u32 = rand::random::() / 2; 16 | 17 | const N: usize = 100; 18 | let a = [x; N]; 19 | let b = [y; N]; 20 | let mut c = [0; N]; 21 | 22 | Native::detect().exec(|| { 23 | assert!(a.len() == N && b.len() == N && c.len() == N); 24 | for i in 0..N { 25 | c[i] = a[i] + b[i]; 26 | } 27 | }); 28 | 29 | assert!(c.iter().copied().all(|z| z == x + y)); 30 | } 31 | 32 | #[cfg_attr(not(target_arch = "wasm32"), test)] 33 | #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)] 34 | fn u8x16_any_zero() { 35 | fn f(a: [u8; 16]) -> bool { 36 | let a = V128::from_bytes(a); 37 | if let Some(s) = detect::() { 38 | return s.u8x16_any_zero(a); 39 | } 40 | if let Some(s) = detect::() { 41 | return s.u8x16_any_zero(a); 42 | } 43 | if let Some(s) = detect::() { 44 | return s.u8x16_any_zero(a); 45 | } 46 | a.as_bytes().iter().any(|&x| x == 0) 47 | } 48 | 49 | fn test(a: [u8; 16], expected: bool) { 50 | assert_eq!(f(a), expected); 51 | } 52 | 53 | test([0x00; 16], true); 54 | test([0xff; 16], false); 55 | test(hex!("00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F"), true); 56 | test(hex!("10 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F"), false); 57 | } 58 | -------------------------------------------------------------------------------- /justfile: -------------------------------------------------------------------------------- 1 | dev: fmt clippy 2 | /usr/bin/time -v -o target/time-test-all.txt just test-all 3 | cat target/time-test-all.txt 4 | 5 | clippy: fmt 6 | cargo clippy --target x86_64-unknown-linux-gnu 7 | cargo clippy --target armv7-unknown-linux-gnueabihf 8 | cargo clippy --target aarch64-unknown-linux-gnu 9 | cargo clippy --target wasm32-unknown-unknown 10 | 11 | doc pkg="vsimd": 12 | #!/bin/bash -e 13 | export RUSTDOCFLAGS="--cfg docsrs" 14 | cargo doc --no-deps --all-features 15 | cargo doc --open -p {{pkg}} 16 | 17 | js-bench: 18 | #!/bin/bash -e 19 | cd {{justfile_directory()}} 20 | 21 | F=./scripts/base64.js 22 | echo "running $F" 23 | echo 24 | 25 | echo "node" `node -v` 26 | node ./scripts/base64.js 27 | echo 28 | 29 | deno -V 30 | deno run --allow-hrtime ./scripts/base64.js 31 | echo 32 | 33 | echo "bun" `bun --version` 34 | bun ./scripts/base64.js 35 | echo 36 | 37 | sync-version: 38 | #!/bin/bash -ex 39 | cd {{justfile_directory()}} 40 | cargo set-version -p uuid-simd '0.9.0-dev' 41 | cargo set-version -p hex-simd '0.9.0-dev' 42 | cargo set-version -p base64-simd '0.9.0-dev' 43 | cargo set-version -p unicode-simd '0.9.0-dev' 44 | cargo set-version -p base32-simd '0.9.0-dev' 45 | cargo set-version -p vsimd '0.9.0-dev' 46 | 47 | fmt: 48 | #!/bin/bash -ex 49 | cd {{justfile_directory()}} 50 | cargo fmt 51 | # cargo sort -w > /dev/null 52 | 53 | test crate: 54 | #!/bin/bash -ex 55 | cd {{justfile_directory()}} 56 | ./scripts/testgen.py --crate {{crate}} | bash -ex 57 | 58 | x86-test crate: 59 | #!/bin/bash -ex 60 | cd {{justfile_directory()}} 61 | ./scripts/testgen.py --crate {{crate}} --mode x86 | bash -ex 62 | 63 | arm-test crate: 64 | #!/bin/bash -ex 65 | cd {{justfile_directory()}} 66 | ./scripts/testgen.py --crate {{crate}} --mode arm | bash -ex 67 | 68 | wasm-test crate: 69 | #!/bin/bash -ex 70 | cd {{justfile_directory()}} 71 | ./scripts/testgen.py --crate {{crate}} --mode wasm | bash -ex 72 | 73 | miri-test crate: 74 | #!/bin/bash -ex 75 | cd {{justfile_directory()}} 76 | cargo miri test -p {{crate}} 77 | 78 | mips-test crate: 79 | #!/bin/bash -ex 80 | cd {{justfile_directory()}} 81 | ./scripts/testgen.py --crate {{crate}} --mode mips | bash -ex 82 | 83 | test-all: 84 | #!/bin/bash -ex 85 | cd {{justfile_directory()}} 86 | ./scripts/testgen.py | bash -ex 87 | cargo miri test --workspace --exclude simd-benches 88 | 89 | dump-asm: 90 | #!/bin/bash -ex 91 | cd {{justfile_directory()}} 92 | ./scripts/dump-symbols.py --mode asm | bash -ex 93 | COMMIT_HASH=`git rev-parse --short HEAD` 94 | cd target/symbols 95 | F=$COMMIT_HASH-asm.txt 96 | tokei -f -s files -t assembly -c 150 > $F 97 | tokei -f -s lines -t assembly -c 150 98 | echo target/symbols/$F 99 | 100 | dump-llvm-ir: 101 | #!/bin/bash -ex 102 | cd {{justfile_directory()}} 103 | ./scripts/dump-symbols.py --mode llvm-ir | bash -ex 104 | COMMIT_HASH=`git rev-parse --short HEAD` 105 | cd target/symbols 106 | F=$COMMIT_HASH-llvm-ir.txt 107 | tokei -f -s files -t LLVM -c 150 > $F 108 | tokei -f -s lines -t LLVM -c 150 109 | echo target/symbols/$F 110 | -------------------------------------------------------------------------------- /rustfmt.toml: -------------------------------------------------------------------------------- 1 | max_width = 120 2 | -------------------------------------------------------------------------------- /scripts/base64.js: -------------------------------------------------------------------------------- 1 | const now = (() => { 2 | if ("Deno" in globalThis) { 3 | return () => (performance.now() * 1e6); 4 | } 5 | if ("Bun" in globalThis) { 6 | return Bun.nanoseconds; 7 | } 8 | return () => Number(process.hrtime.bigint()); 9 | })(); 10 | 11 | function bench(name, iter, input, f) { 12 | const t1 = now(); 13 | for (let i = 0; i < iter; ++i) { 14 | f(input); 15 | } 16 | const t2 = now(); 17 | 18 | const dt = (t2 - t1) / 1e9; 19 | const freq = iter / dt; 20 | const time = (t2 - t1) / iter; 21 | 22 | const msg = [ 23 | `${name.padEnd(16)}|`, 24 | `len = ${input.length.toString().padStart(8)} |`, 25 | `iter = ${iter.toString().padStart(7)} |`, 26 | `dt = ${dt.toFixed(3).toString().padStart(8)}s |`, 27 | `freq = ${freq.toFixed(3).toString().padStart(12)}/s |`, 28 | `time = ${time.toFixed(0).toString().padStart(10)}ns/op` 29 | ]; 30 | 31 | console.log(msg.join(" ")); 32 | } 33 | 34 | const TEST_CASES = [ 35 | { 36 | data: "helloworld".repeat(1e5), 37 | iter: 100, 38 | }, 39 | { 40 | data: "helloworld".repeat(1e4), 41 | iter: 1000, 42 | }, 43 | { 44 | data: "helloworld".repeat(1e2), 45 | iter: 1e5, 46 | }, 47 | { 48 | data: "helloworld".repeat(10), 49 | iter: 1e6, 50 | }, 51 | { 52 | data: "abcdefghijklmnopqrstuvwx", 53 | iter: 1e5, 54 | }, 55 | { 56 | data: "123", 57 | iter: 1e6 58 | } 59 | ] 60 | 61 | const FUNCTIONS = [ 62 | { 63 | name: "encode+decode", 64 | call(input) { return atob(btoa(input)); } 65 | }, 66 | { 67 | name: "encode", 68 | call(input) { return btoa(input); } 69 | }, 70 | { 71 | name: "decode", 72 | call(input) { return atob(input); } 73 | } 74 | ] 75 | 76 | for (const t of TEST_CASES) { 77 | for (const f of FUNCTIONS) { 78 | bench(f.name, t.iter, t.data, f.call); 79 | } 80 | console.log(" ") 81 | } 82 | -------------------------------------------------------------------------------- /scripts/dump-symbols.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | import argparse 3 | 4 | SYMBOLS = { 5 | "hex-simd": { 6 | "check": ["avx2", "sse2", "neon", "simd128"], 7 | "encode": ["avx2", "ssse3", "sse2", "neon", "simd128"], 8 | "decode": ["avx2", "ssse3", "sse2", "neon", "simd128"], 9 | }, 10 | "base64-simd": { 11 | "encode": ["avx2", "ssse3", "neon", "simd128"], 12 | "decode": ["avx2", "ssse3", "neon", "simd128"], 13 | "check": ["avx2", "ssse3", "neon", "simd128"], 14 | "find_non_ascii_whitespace": ["avx2", "sse2", "neon", "simd128"], 15 | }, 16 | "unicode-simd": { 17 | "is_ascii": ["avx2", "sse2", "neon", "simd128"], 18 | "is_utf32le": ["avx2", "sse4.1", "neon", "simd128"], 19 | "utf32_swap_endianness": ["avx2", "ssse3", "neon", "simd128"], 20 | "utf16_swap_endianness": ["avx2", "ssse3", "neon", "simd128"], 21 | }, 22 | "uuid-simd": { 23 | "parse_simple": ["avx2", "ssse3", "sse2", "neon", "simd128"], 24 | "parse_hyphenated": ["avx2", "sse4.1", "neon", "simd128"], 25 | "format_simple": ["avx2", "ssse3", "sse2", "neon", "simd128"], 26 | "format_hyphenated": ["avx2", "sse4.1", "neon", "simd128"], 27 | }, 28 | "base32-simd": { 29 | "check": ["avx2", "ssse3", "neon", "simd128"], 30 | "encode": ["avx2", "sse4.1", "neon", "simd128"], 31 | "decode": ["avx2", "sse4.1", "neon", "simd128"], 32 | }, 33 | } 34 | 35 | TARGETS = { 36 | "avx2": ["x86_64-unknown-linux-gnu", "i686-unknown-linux-gnu"], 37 | "sse4.1": ["x86_64-unknown-linux-gnu", "i686-unknown-linux-gnu"], 38 | "ssse3": ["x86_64-unknown-linux-gnu", "i686-unknown-linux-gnu"], 39 | "sse2": ["x86_64-unknown-linux-gnu", "i686-unknown-linux-gnu"], 40 | "neon": ["aarch64-unknown-linux-gnu", "armv7-unknown-linux-gnueabihf"], 41 | "simd128": ["wasm32-unknown-unknown"], 42 | } 43 | 44 | 45 | def space_join(l): 46 | return " ".join(l) 47 | 48 | 49 | if __name__ == "__main__": 50 | opt = argparse.ArgumentParser() 51 | opt.add_argument("--mode", type=str, choices=["asm", "llvm-ir"], required=True) 52 | args = opt.parse_args() 53 | 54 | print("#!/bin/bash -ex") 55 | 56 | for pkg, symbols in SYMBOLS.items(): 57 | for name, features in symbols.items(): 58 | for feature in features: 59 | for target in TARGETS[feature]: 60 | print(f"mkdir -p target/symbols/{target}") 61 | 62 | symbol = f"{pkg.replace('-', '_')}::multiversion::{name}::{feature.replace('.', '')}" 63 | 64 | rustflags = ["--cfg vsimd_dump_symbols"] 65 | if target == "wasm32-unknown-unknown": 66 | rustflags.append("-C target-feature=+simd128") 67 | 68 | if args.mode == "asm": 69 | extra_flags = "--wasm" if target == "wasm32-unknown-unknown" else "" 70 | print( 71 | f'RUSTFLAGS="{space_join(rustflags)}" ' 72 | f"cargo asm -p {pkg} --lib --simplify --features unstable --target {target} {extra_flags} -- {symbol} " 73 | f"| awk NF" 74 | f"> target/symbols/{target}/{symbol}.asm" 75 | ) 76 | elif args.mode == "llvm-ir": 77 | rustflags.append("-Cdebuginfo=0") 78 | print( 79 | f'RUSTFLAGS="{space_join(rustflags)}" ' 80 | f"cargo asm -p {pkg} --lib --llvm --features unstable --target {target} -- {symbol} " 81 | f"> target/symbols/{target}/{symbol}.ll" 82 | ) 83 | -------------------------------------------------------------------------------- /scripts/testgen.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | from itertools import product 3 | import argparse 4 | import subprocess 5 | 6 | CRATES = [ 7 | "vsimd", 8 | "hex-simd", 9 | "base64-simd", 10 | "uuid-simd", 11 | "base32-simd", 12 | "unicode-simd", 13 | ] 14 | 15 | FEATURES = [ 16 | ["", "unstable"], 17 | ["", "detect"], 18 | ["", "alloc", "std"], 19 | ] 20 | 21 | RUSTFLAGS = { 22 | "x86": [ 23 | "-Zsanitizer=address -C target-feature=+avx2", 24 | "-Zsanitizer=address -C target-feature=+sse4.1", 25 | "-Zsanitizer=address", 26 | "-C target-feature=+avx2", 27 | "-C target-feature=+sse4.1", 28 | "", 29 | ], 30 | "arm": [ 31 | "-C target-feature=+neon", 32 | "", 33 | ], 34 | "wasm": [ 35 | "-C target-feature=+simd128", 36 | "", 37 | ], 38 | "mips": [""], 39 | } 40 | 41 | TARGETS = { 42 | "x86": [ 43 | "x86_64-unknown-linux-gnu", 44 | "i686-unknown-linux-gnu", 45 | ], 46 | "arm": [ 47 | "aarch64-unknown-linux-gnu", 48 | "armv7-unknown-linux-gnueabihf", 49 | ], 50 | "wasm": ["wasm32-unknown-unknown"], 51 | "mips": ["mips-unknown-linux-gnu"], 52 | } 53 | 54 | TARGET_REMAP = { 55 | "x86_64-unknown-linux-gnu": "x86", 56 | "i686-unknown-linux-gnu": "x86", 57 | "aarch64-unknown-linux-gnu": "arm", 58 | "armv7-unknown-linux-gnueabihf": "arm", 59 | "wasm32-unknown-unknown": "wasm", 60 | "mips-unknown-linux-gnu": "mips", 61 | } 62 | 63 | TEST_MODES = ["x86", "arm", "wasm", "mips"] 64 | 65 | 66 | def gen(mode: str, target: str, rustflag: str, host: str): 67 | for feat in product(*FEATURES): 68 | feat = ",".join(s for s in feat if len(s) > 0) 69 | if len(feat) > 0: 70 | feat = "--features " + feat 71 | 72 | if mode == "wasm": 73 | print(f'RUSTFLAGS="{rustflag}" wasm-pack test --node -- --no-default-features {feat} $@') 74 | continue 75 | 76 | prog = "cross" if target != host else "cargo" 77 | skip_others = "--lib --tests" if mode == "x86" else "" 78 | print(f'RUSTFLAGS="{rustflag}" {prog} test --target {target} {skip_others} --no-default-features {feat} $@') 79 | 80 | 81 | def get_rustc_host(): 82 | v = subprocess.check_output(["rustc", "-V", "-v"]).decode() 83 | for line in v.splitlines(): 84 | if line.startswith("host:"): 85 | return line.split()[1] 86 | raise Exception("Failed to get host") 87 | 88 | 89 | if __name__ == "__main__": 90 | opt = argparse.ArgumentParser() 91 | opt.add_argument("--mode", type=str, choices=TEST_MODES) 92 | opt.add_argument("--crate", type=str, choices=CRATES) 93 | opt.add_argument("--target", type=str) 94 | args = opt.parse_args() 95 | 96 | host = get_rustc_host() 97 | 98 | modes = TEST_MODES 99 | targets = TARGETS 100 | if args.mode is not None: 101 | modes = [args.mode] 102 | if args.target is not None: 103 | assert args.target in TARGETS[args.mode] 104 | targets = {args.mode: [args.target]} 105 | else: 106 | if args.target is not None: 107 | modes = [TARGET_REMAP[args.target]] 108 | targets = {modes[0]: [args.target]} 109 | 110 | crates = CRATES 111 | if args.crate is not None: 112 | crates = [args.crate] 113 | 114 | print("#!/bin/bash -ex") 115 | 116 | for mode in modes: 117 | for target in targets[mode]: 118 | for rustflag in RUSTFLAGS[mode]: 119 | if target == "i686-unknown-linux-gnu" and "sanitizer" in rustflag: 120 | continue 121 | 122 | for crate in crates: 123 | print(f"pushd crates/{crate}") 124 | gen(mode, target, rustflag, host) 125 | print("popd") 126 | --------------------------------------------------------------------------------