├── .gitattributes ├── .github ├── FUNDING.yml └── workflows │ └── rust.yml ├── .gitignore ├── .reuse └── dep5 ├── .editorconfig ├── LICENSE-MIT ├── src ├── leading_zeros.rs ├── binary16 │ ├── arch │ │ ├── loongarch64.rs │ │ ├── x86.rs │ │ └── aarch64.rs │ └── arch.rs ├── rand_distr.rs ├── bfloat │ └── convert.rs ├── vec.rs ├── lib.rs └── slice.rs ├── Cargo.toml ├── .circleci └── config.yml ├── Makefile.toml ├── README.md ├── benches └── convert.rs ├── LICENSE-APACHE ├── CHANGELOG.md └── Cargo.lock /.gitattributes: -------------------------------------------------------------------------------- 1 | * text=auto 2 | *.rs whitespace=tab-in-indent,trailing-space,tabwidth=4 -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: VoidStarKat 4 | patreon: StarKatradora 5 | 6 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Rust 2 | target/ 3 | **/*.rs.bak 4 | 5 | # IntelliJ 6 | .idea/ 7 | *.iml 8 | 9 | # VS Code 10 | .vscode/ 11 | -------------------------------------------------------------------------------- /.reuse/dep5: -------------------------------------------------------------------------------- 1 | Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ 2 | Copyright: Kathryn Long 3 | License: MIT OR Apache-2.0 4 | 5 | Files: * 6 | Copyright: 2021 Kathryn Long 7 | License: MIT OR Apache-2.0 8 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | # EditorConfig is awesome: https://EditorConfig.org 2 | 3 | # top-most EditorConfig file 4 | root = true 5 | 6 | [*] 7 | indent_style = space 8 | indent_size = 4 9 | end_of_line = lf 10 | charset = utf-8 11 | trim_trailing_whitespace = true 12 | insert_final_newline = true 13 | 14 | [*.md] 15 | # double whitespace at end of line 16 | # denotes a line break in Markdown 17 | trim_trailing_whitespace = false 18 | -------------------------------------------------------------------------------- /LICENSE-MIT: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | SOFTWARE. 20 | -------------------------------------------------------------------------------- /src/leading_zeros.rs: -------------------------------------------------------------------------------- 1 | // https://doc.rust-lang.org/std/primitive.u16.html#method.leading_zeros 2 | 3 | #[cfg(not(any(all( 4 | target_arch = "spirv", 5 | not(all( 6 | target_feature = "IntegerFunctions2INTEL", 7 | target_feature = "SPV_INTEL_shader_integer_functions2" 8 | )) 9 | ))))] 10 | #[inline] 11 | pub(crate) const fn leading_zeros_u16(x: u16) -> u32 { 12 | x.leading_zeros() 13 | } 14 | 15 | #[cfg(all( 16 | target_arch = "spirv", 17 | not(all( 18 | target_feature = "IntegerFunctions2INTEL", 19 | target_feature = "SPV_INTEL_shader_integer_functions2" 20 | )) 21 | ))] 22 | #[inline] 23 | pub(crate) const fn leading_zeros_u16(x: u16) -> u32 { 24 | leading_zeros_u16_fallback(x) 25 | } 26 | 27 | #[cfg(any( 28 | test, 29 | all( 30 | target_arch = "spirv", 31 | not(all( 32 | target_feature = "IntegerFunctions2INTEL", 33 | target_feature = "SPV_INTEL_shader_integer_functions2" 34 | )) 35 | ) 36 | ))] 37 | #[inline] 38 | const fn leading_zeros_u16_fallback(mut x: u16) -> u32 { 39 | use crunchy::unroll; 40 | let mut c = 0; 41 | let msb = 1 << 15; 42 | unroll! { for i in 0 .. 16 { 43 | if x & msb == 0 { 44 | c += 1; 45 | } else { 46 | return c; 47 | } 48 | #[allow(unused_assignments)] 49 | if i < 15 { 50 | x <<= 1; 51 | } 52 | }} 53 | c 54 | } 55 | 56 | #[cfg(test)] 57 | mod test { 58 | 59 | #[test] 60 | fn leading_zeros_u16_fallback() { 61 | for x in [44, 97, 304, 1179, 23571] { 62 | assert_eq!(super::leading_zeros_u16_fallback(x), x.leading_zeros()); 63 | } 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "half" 3 | # Remember to keep in sync with html_root_url crate attribute 4 | version = "2.7.1" 5 | authors = ["Kathryn Long "] 6 | description = "Half-precision floating point f16 and bf16 types for Rust implementing the IEEE 754-2008 standard binary16 and bfloat16 types." 7 | repository = "https://github.com/VoidStarKat/half-rs" 8 | readme = "README.md" 9 | keywords = ["f16", "bfloat16", "no_std"] 10 | license = "MIT OR Apache-2.0" 11 | categories = ["no-std", "data-structures", "encoding"] 12 | edition = "2021" 13 | rust-version = "1.81" 14 | exclude = [".git*", ".editorconfig", ".circleci"] 15 | 16 | [features] 17 | default = ["std"] 18 | std = ["alloc"] 19 | use-intrinsics = [] # Deprecated 20 | alloc = [] 21 | rand_distr = ["dep:rand", "dep:rand_distr"] 22 | zerocopy = [] # Deprecated 23 | nightly = [] 24 | 25 | [dependencies] 26 | cfg-if = "1.0.0" 27 | bytemuck = { version = "1.4.1", default-features = false, features = [ 28 | "derive", 29 | ], optional = true } 30 | serde = { version = "1.0", default-features = false, features = [ 31 | "derive", 32 | ], optional = true } 33 | num-traits = { version = "0.2.16", default-features = false, features = [ 34 | "libm", 35 | ], optional = true } 36 | zerocopy = { version = "0.8.26", default-features = false, features = [ 37 | "derive", 38 | "simd", 39 | ] } 40 | rand = { version = "0.9.0", default-features = false, features = [ 41 | "thread_rng", 42 | ], optional = true } 43 | rand_distr = { version = "0.5.0", default-features = false, optional = true } 44 | rkyv = { version = "0.8.0", optional = true } 45 | arbitrary = { version = "1.4.1", features = ["derive"], optional = true } 46 | 47 | [target.'cfg(target_arch = "spirv")'.dependencies] 48 | crunchy = "0.2.2" 49 | 50 | [dev-dependencies] 51 | criterion = "0.5" 52 | quickcheck = "1.0" 53 | quickcheck_macros = "1.0" 54 | rand = "0.9.0" 55 | crunchy = "0.2.2" 56 | 57 | [[bench]] 58 | name = "convert" 59 | harness = false 60 | 61 | [package.metadata.docs.rs] 62 | rustdoc-args = ["--cfg", "docsrs"] 63 | all-features = true 64 | -------------------------------------------------------------------------------- /.circleci/config.yml: -------------------------------------------------------------------------------- 1 | # Use the latest 2.1 version of CircleCI pipeline process engine. 2 | # See: https://circleci.com/docs/2.0/configuration-reference 3 | version: 2.1 4 | 5 | # Define a job to be invoked later in a workflow. 6 | # See: https://circleci.com/docs/2.0/configuration-reference/#jobs 7 | jobs: 8 | rust: 9 | machine: 10 | image: ubuntu-2004:current 11 | resource_class: arm.medium 12 | parameters: 13 | toolchain: 14 | type: string 15 | features: 16 | type: string 17 | environment: 18 | CARGO_INCREMENTAL: 0 19 | CARGO_NET_RETRY: 10 20 | RUSTUP_MAX_RETRIES: 10 21 | CARGO_TERM_COLOR: always 22 | RUST_BACKTRACE: full 23 | RUSTFLAGS: "-D warnings" 24 | CARGO_PROFILE_DEV_DEBUG: 0 25 | CARGO_PROFILE_TEST_DEBUG: 0 26 | CARGO_PROFILE_BENCH_DEBUG: 0 27 | CI: 1 28 | # Add steps to the job 29 | # See: https://circleci.com/docs/2.0/configuration-reference/#steps 30 | steps: 31 | - checkout 32 | - run: 33 | name: Install rust 34 | command: curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -v -y --profile minimal --default-toolchain << parameters.toolchain >> 35 | - run: source "$HOME/.cargo/env" 36 | - run: 37 | name: cargo test 38 | command: cargo -v test << parameters.features >> -- --nocapture 39 | - run: 40 | name: cargo test no_std 41 | command: cargo -v test --no-default-features -- --nocapture 42 | - run: 43 | name: cargo test no_std+alloc 44 | command: cargo -v test --no-default-features --features alloc -- --nocapture 45 | 46 | # Invoke jobs via workflows 47 | # See: https://circleci.com/docs/2.0/configuration-reference/#workflows 48 | workflows: 49 | rust-workflow: 50 | jobs: 51 | - rust: 52 | name: Rust AArch64 (stable) 53 | toolchain: stable 54 | features: --all-features 55 | - rust: 56 | name: Rust AArch64 (1.81.0) 57 | toolchain: 1.81.0 58 | features: --all-features 59 | - rust: 60 | name: Rust AArch64 (nightly) 61 | toolchain: nightly 62 | features: --all-features 63 | -------------------------------------------------------------------------------- /src/binary16/arch/loongarch64.rs: -------------------------------------------------------------------------------- 1 | use core::{mem::MaybeUninit, ptr}; 2 | 3 | #[cfg(target_arch = "loongarch64")] 4 | use core::arch::loongarch64::{lsx_vfcvt_h_s, lsx_vfcvtl_s_h, m128, m128i}; 5 | 6 | /////////////// loongarch64 lsx/lasx //////////////// 7 | 8 | #[target_feature(enable = "lsx")] 9 | #[inline] 10 | pub(super) unsafe fn f16_to_f32_lsx(i: u16) -> f32 { 11 | let mut vec = MaybeUninit::::zeroed(); 12 | vec.as_mut_ptr().cast::().write(i); 13 | let retval = lsx_vfcvtl_s_h(vec.assume_init()); 14 | *(&retval as *const m128).cast() 15 | } 16 | 17 | #[target_feature(enable = "lsx")] 18 | #[inline] 19 | pub(super) unsafe fn f32_to_f16_lsx(f: f32) -> u16 { 20 | let mut vec = MaybeUninit::::zeroed(); 21 | vec.as_mut_ptr().cast::().write(f); 22 | let retval = lsx_vfcvt_h_s(vec.assume_init(), vec.assume_init()); 23 | *(&retval as *const m128i).cast() 24 | } 25 | 26 | #[target_feature(enable = "lsx")] 27 | #[inline] 28 | pub(super) unsafe fn f16x4_to_f32x4_lsx(v: &[u16; 4]) -> [f32; 4] { 29 | let mut vec = MaybeUninit::::zeroed(); 30 | ptr::copy_nonoverlapping(v.as_ptr(), vec.as_mut_ptr().cast(), 4); 31 | let retval = lsx_vfcvtl_s_h(vec.assume_init()); 32 | *(&retval as *const m128).cast() 33 | } 34 | 35 | #[target_feature(enable = "lsx")] 36 | #[inline] 37 | pub(super) unsafe fn f32x4_to_f16x4_lsx(v: &[f32; 4]) -> [u16; 4] { 38 | let mut vec = MaybeUninit::::uninit(); 39 | ptr::copy_nonoverlapping(v.as_ptr(), vec.as_mut_ptr().cast(), 4); 40 | let retval = lsx_vfcvt_h_s(vec.assume_init(), vec.assume_init()); 41 | *(&retval as *const m128i).cast() 42 | } 43 | 44 | #[target_feature(enable = "lsx")] 45 | #[inline] 46 | pub(super) unsafe fn f16x4_to_f64x4_lsx(v: &[u16; 4]) -> [f64; 4] { 47 | let array = f16x4_to_f32x4_lsx(v); 48 | // Let compiler vectorize this regular cast for now. 49 | [ 50 | array[0] as f64, 51 | array[1] as f64, 52 | array[2] as f64, 53 | array[3] as f64, 54 | ] 55 | } 56 | 57 | #[target_feature(enable = "lsx")] 58 | #[inline] 59 | pub(super) unsafe fn f64x4_to_f16x4_lsx(v: &[f64; 4]) -> [u16; 4] { 60 | // Let compiler vectorize this regular cast for now. 61 | let v = [v[0] as f32, v[1] as f32, v[2] as f32, v[3] as f32]; 62 | f32x4_to_f16x4_lsx(&v) 63 | } 64 | -------------------------------------------------------------------------------- /Makefile.toml: -------------------------------------------------------------------------------- 1 | [config] 2 | min_version = "0.35.0" 3 | 4 | [env] 5 | CI_CARGO_TEST_FLAGS = { value = "--locked -- --nocapture", condition = { env_true = [ 6 | "CARGO_MAKE_CI", 7 | ] } } 8 | CARGO_MAKE_CARGO_ALL_FEATURES = { source = "${CARGO_MAKE_RUST_CHANNEL}", default_value = "--all-features", mapping = { "nightly" = "--all-features" } } 9 | CARGO_MAKE_CLIPPY_ARGS = { value = "${CARGO_MAKE_CLIPPY_ALL_FEATURES_WARN}", condition = { env_true = [ 10 | "CARGO_MAKE_CI", 11 | ] } } 12 | 13 | # Override for CI flag additions 14 | [tasks.test] 15 | args = [ 16 | "test", 17 | "@@remove-empty(CARGO_MAKE_CARGO_VERBOSE_FLAGS)", 18 | "@@split(CARGO_MAKE_CARGO_BUILD_TEST_FLAGS, )", 19 | "@@split(CI_CARGO_TEST_FLAGS, )", 20 | ] 21 | 22 | # Let clippy run on non-nightly CI 23 | [tasks.clippy-ci-flow] 24 | condition = { env_set = ["CARGO_MAKE_RUN_CLIPPY"] } 25 | 26 | # Let format check run on non-nightly CI 27 | [tasks.check-format-ci-flow] 28 | condition = { env_set = ["CARGO_MAKE_RUN_CHECK_FORMAT"] } 29 | 30 | [tasks.check-docs] 31 | description = "Checks docs for errors." 32 | category = "Documentation" 33 | install_crate = false 34 | env = { RUSTDOCFLAGS = "-D warnings" } 35 | command = "cargo" 36 | args = [ 37 | "doc", 38 | "--workspace", 39 | "--no-deps", 40 | "@@remove-empty(CARGO_MAKE_CARGO_VERBOSE_FLAGS)", 41 | "${CARGO_MAKE_CARGO_ALL_FEATURES}", 42 | ] 43 | 44 | # Build & Test with no features enabled 45 | [tasks.post-ci-flow] 46 | run_task = [ 47 | { name = [ 48 | "check-docs", 49 | "build-no-std", 50 | "test-no-std", 51 | "build-no-std-alloc", 52 | "test-no-std-alloc", 53 | ] }, 54 | ] 55 | 56 | [tasks.build-no-std] 57 | description = "Build without any features" 58 | category = "Build" 59 | env = { CARGO_MAKE_CARGO_BUILD_TEST_FLAGS = "--no-default-features" } 60 | run_task = "build" 61 | 62 | [tasks.test-no-std] 63 | description = "Run tests without any features" 64 | category = "Test" 65 | env = { CARGO_MAKE_CARGO_BUILD_TEST_FLAGS = "--no-default-features" } 66 | run_task = "test" 67 | 68 | [tasks.build-no-std-alloc] 69 | description = "Build without any features except alloc" 70 | category = "Build" 71 | env = { CARGO_MAKE_CARGO_BUILD_TEST_FLAGS = "--no-default-features --features alloc" } 72 | run_task = "build" 73 | 74 | [tasks.test-no-std-alloc] 75 | description = "Run tests without any features except alloc" 76 | category = "Test" 77 | env = { CARGO_MAKE_CARGO_BUILD_TEST_FLAGS = "--no-default-features --features alloc" } 78 | run_task = "test" 79 | -------------------------------------------------------------------------------- /.github/workflows/rust.yml: -------------------------------------------------------------------------------- 1 | name: Rust 2 | 3 | on: [push, pull_request] 4 | 5 | env: 6 | CARGO_INCREMENTAL: 0 7 | CARGO_NET_RETRY: 10 8 | RUSTUP_MAX_RETRIES: 10 9 | CARGO_TERM_COLOR: always 10 | RUST_BACKTRACE: full 11 | RUSTFLAGS: "-D warnings" 12 | # Disable debuginfo for faster compile 13 | CARGO_PROFILE_DEV_DEBUG: 0 14 | CARGO_PROFILE_TEST_DEBUG: 0 15 | CARGO_PROFILE_BENCH_DEBUG: 0 16 | CI: 1 17 | CARGO_MAKE_CI: 1 18 | CARGO_MAKE_RUN_CHECK_FORMAT: 1 19 | CARGO_MAKE_RUN_CLIPPY: 1 20 | CARGO_MAKE_BUILD_BENCH: 1 21 | 22 | jobs: 23 | rust: 24 | name: Rust 25 | runs-on: ${{ matrix.os }} 26 | continue-on-error: ${{ matrix.rust == 'nightly' }} 27 | strategy: 28 | matrix: 29 | os: [ubuntu-latest, windows-latest] 30 | target: [x86_64, i686, aarch64] 31 | rust: 32 | - stable 33 | - beta 34 | - nightly 35 | - 1.81.0 36 | 37 | steps: 38 | - name: Checkout 39 | uses: actions/checkout@v4 40 | 41 | - name: Linux Install Rust toolchain 42 | if: ${{ matrix.os == 'ubuntu-latest' }} 43 | uses: dtolnay/rust-toolchain@master 44 | with: 45 | toolchain: ${{ matrix.rust }} 46 | targets: ${{ matrix.target }}-unknown-linux-gnu 47 | components: clippy,rustfmt 48 | 49 | - name: Windows Install Rust toolchain 50 | if: ${{ matrix.os == 'windows-latest' }} 51 | uses: dtolnay/rust-toolchain@master 52 | with: 53 | toolchain: ${{ matrix.rust }} 54 | targets: ${{ matrix.target }}-pc-windows-msvc 55 | components: clippy,rustfmt 56 | 57 | - name: Cache Dependencies 58 | uses: Swatinem/rust-cache@v2 59 | 60 | - name: Install Cargo Make 61 | if: ${{ matrix.target == 'x86_64' }} 62 | uses: davidB/rust-cargo-make@v1 63 | 64 | - run: cargo make ci-flow 65 | if: ${{ matrix.target == 'x86_64' }} 66 | 67 | - name: linux cargo-make build-only 68 | if: ${{ matrix.target != 'x86_64' && matrix.os == 'ubuntu-latest' }} 69 | run: cargo build --all-features --target ${{ matrix.target }}-unknown-linux-gnu 70 | 71 | - name: windows cargo-make build-only 72 | if: ${{ matrix.target != 'x86_64' && matrix.os == 'windows-latest' }} 73 | run: cargo build --all-features --target ${{ matrix.target }}-pc-windows-msvc 74 | 75 | miri: 76 | name: Miri 77 | runs-on: ubuntu-latest 78 | steps: 79 | - name: Checkout 80 | uses: actions/checkout@v4 81 | - name: Install Rust toolchain 82 | uses: dtolnay/rust-toolchain@master 83 | with: 84 | toolchain: nightly 85 | components: miri 86 | - run: cargo miri test 87 | env: 88 | MIRIFLAGS: -Zmiri-strict-provenance 89 | -------------------------------------------------------------------------------- /src/binary16/arch/x86.rs: -------------------------------------------------------------------------------- 1 | use core::{mem::MaybeUninit, ptr}; 2 | use zerocopy::transmute; 3 | 4 | #[cfg(target_arch = "x86")] 5 | use core::arch::x86::{ 6 | __m128, __m128i, __m256, _mm256_cvtph_ps, _mm256_cvtps_ph, _mm_cvtph_ps, 7 | _MM_FROUND_TO_NEAREST_INT, 8 | }; 9 | #[cfg(target_arch = "x86_64")] 10 | use core::arch::x86_64::{ 11 | __m128, __m128i, __m256, _mm256_cvtph_ps, _mm256_cvtps_ph, _mm_cvtph_ps, _mm_cvtps_ph, 12 | _MM_FROUND_TO_NEAREST_INT, 13 | }; 14 | 15 | #[cfg(target_arch = "x86")] 16 | use core::arch::x86::_mm_cvtps_ph; 17 | 18 | use super::convert_chunked_slice_8; 19 | 20 | /////////////// x86/x86_64 f16c //////////////// 21 | 22 | #[target_feature(enable = "f16c")] 23 | #[inline] 24 | pub(super) unsafe fn f16_to_f32_x86_f16c(i: u16) -> f32 { 25 | let vec: __m128i = transmute!([i, 0, 0, 0, 0, 0, 0, 0]); 26 | let retval: [f32; 4] = transmute!(_mm_cvtph_ps(vec)); 27 | retval[0] 28 | } 29 | 30 | #[target_feature(enable = "f16c")] 31 | #[inline] 32 | pub(super) unsafe fn f32_to_f16_x86_f16c(f: f32) -> u16 { 33 | let vec: __m128 = transmute!([f, 0.0, 0.0, 0.0]); 34 | let retval = _mm_cvtps_ph(vec, _MM_FROUND_TO_NEAREST_INT); 35 | let retval: [u16; 8] = transmute!(retval); 36 | retval[0] 37 | } 38 | 39 | #[target_feature(enable = "f16c")] 40 | #[inline] 41 | pub(super) unsafe fn f16x4_to_f32x4_x86_f16c(v: &[u16; 4]) -> [f32; 4] { 42 | let vec: __m128i = transmute!([*v, [0, 0, 0, 0]]); 43 | transmute!(_mm_cvtph_ps(vec)) 44 | } 45 | 46 | #[target_feature(enable = "f16c")] 47 | #[inline] 48 | pub(super) unsafe fn f32x4_to_f16x4_x86_f16c(v: &[f32; 4]) -> [u16; 4] { 49 | let vec: __m128 = zerocopy::transmute!(*v); 50 | let retval = _mm_cvtps_ph(vec, _MM_FROUND_TO_NEAREST_INT); 51 | let retval: [[u16; 4]; 2] = transmute!(retval); 52 | retval[0] 53 | } 54 | 55 | #[target_feature(enable = "f16c")] 56 | #[inline] 57 | pub(super) unsafe fn f16x4_to_f64x4_x86_f16c(v: &[u16; 4]) -> [f64; 4] { 58 | let array = f16x4_to_f32x4_x86_f16c(v); 59 | // Let compiler vectorize this regular cast for now. 60 | // TODO: investigate auto-detecting sse2/avx convert features 61 | [ 62 | array[0] as f64, 63 | array[1] as f64, 64 | array[2] as f64, 65 | array[3] as f64, 66 | ] 67 | } 68 | 69 | #[target_feature(enable = "f16c")] 70 | #[inline] 71 | pub(super) unsafe fn f64x4_to_f16x4_x86_f16c(v: &[f64; 4]) -> [u16; 4] { 72 | // Let compiler vectorize this regular cast for now. 73 | // TODO: investigate auto-detecting sse2/avx convert features 74 | let v = [v[0] as f32, v[1] as f32, v[2] as f32, v[3] as f32]; 75 | f32x4_to_f16x4_x86_f16c(&v) 76 | } 77 | 78 | #[target_feature(enable = "f16c")] 79 | #[inline] 80 | pub(super) unsafe fn f16x8_to_f32x8_x86_f16c(v: &[u16; 8]) -> [f32; 8] { 81 | let vec: __m128i = transmute!(*v); 82 | transmute!(_mm256_cvtph_ps(vec)) 83 | } 84 | 85 | #[target_feature(enable = "f16c")] 86 | #[inline] 87 | pub(super) unsafe fn f32x8_to_f16x8_x86_f16c(v: &[f32; 8]) -> [u16; 8] { 88 | let vec: __m256 = transmute!(*v); 89 | let retval = _mm256_cvtps_ph(vec, _MM_FROUND_TO_NEAREST_INT); 90 | transmute!(retval) 91 | } 92 | 93 | #[target_feature(enable = "f16c")] 94 | #[inline] 95 | pub(super) unsafe fn f16x8_to_f64x8_x86_f16c(v: &[u16; 8]) -> [f64; 8] { 96 | let array = f16x8_to_f32x8_x86_f16c(v); 97 | // Let compiler vectorize this regular cast for now. 98 | // TODO: investigate auto-detecting sse2/avx convert features 99 | [ 100 | array[0] as f64, 101 | array[1] as f64, 102 | array[2] as f64, 103 | array[3] as f64, 104 | array[4] as f64, 105 | array[5] as f64, 106 | array[6] as f64, 107 | array[7] as f64, 108 | ] 109 | } 110 | 111 | #[target_feature(enable = "f16c")] 112 | #[inline] 113 | pub(super) unsafe fn f64x8_to_f16x8_x86_f16c(v: &[f64; 8]) -> [u16; 8] { 114 | // Let compiler vectorize this regular cast for now. 115 | // TODO: investigate auto-detecting sse2/avx convert features 116 | let v = [ 117 | v[0] as f32, 118 | v[1] as f32, 119 | v[2] as f32, 120 | v[3] as f32, 121 | v[4] as f32, 122 | v[5] as f32, 123 | v[6] as f32, 124 | v[7] as f32, 125 | ]; 126 | f32x8_to_f16x8_x86_f16c(&v) 127 | } 128 | -------------------------------------------------------------------------------- /src/binary16/arch/aarch64.rs: -------------------------------------------------------------------------------- 1 | use core::{ 2 | arch::{ 3 | aarch64::{float32x4_t, float64x2_t, uint16x4_t}, 4 | asm, 5 | }, 6 | mem::MaybeUninit, 7 | ptr, 8 | }; 9 | use zerocopy::transmute; 10 | 11 | #[target_feature(enable = "fp16")] 12 | #[inline] 13 | pub(super) unsafe fn f16_to_f32_fp16(i: u16) -> f32 { 14 | let result: f32; 15 | asm!( 16 | "fcvt {0:s}, {1:h}", 17 | out(vreg) result, 18 | in(vreg) i, 19 | options(pure, nomem, nostack, preserves_flags)); 20 | result 21 | } 22 | 23 | #[target_feature(enable = "fp16")] 24 | #[inline] 25 | pub(super) unsafe fn f16_to_f64_fp16(i: u16) -> f64 { 26 | let result: f64; 27 | asm!( 28 | "fcvt {0:d}, {1:h}", 29 | out(vreg) result, 30 | in(vreg) i, 31 | options(pure, nomem, nostack, preserves_flags)); 32 | result 33 | } 34 | 35 | #[target_feature(enable = "fp16")] 36 | #[inline] 37 | pub(super) unsafe fn f32_to_f16_fp16(f: f32) -> u16 { 38 | let result: u16; 39 | asm!( 40 | "fcvt {0:h}, {1:s}", 41 | out(vreg) result, 42 | in(vreg) f, 43 | options(pure, nomem, nostack, preserves_flags)); 44 | result 45 | } 46 | 47 | #[target_feature(enable = "fp16")] 48 | #[inline] 49 | pub(super) unsafe fn f64_to_f16_fp16(f: f64) -> u16 { 50 | let result: u16; 51 | asm!( 52 | "fcvt {0:h}, {1:d}", 53 | out(vreg) result, 54 | in(vreg) f, 55 | options(pure, nomem, nostack, preserves_flags)); 56 | result 57 | } 58 | 59 | #[target_feature(enable = "fp16")] 60 | #[inline] 61 | pub(super) unsafe fn f16x4_to_f32x4_fp16(v: &[u16; 4]) -> [f32; 4] { 62 | let vec: uint16x4_t = transmute!(*v); 63 | let result: float32x4_t; 64 | asm!( 65 | "fcvtl {0:v}.4s, {1:v}.4h", 66 | out(vreg) result, 67 | in(vreg) vec, 68 | options(pure, nomem, nostack)); 69 | transmute!(result) 70 | } 71 | 72 | #[target_feature(enable = "fp16")] 73 | #[inline] 74 | pub(super) unsafe fn f32x4_to_f16x4_fp16(v: &[f32; 4]) -> [u16; 4] { 75 | let vec: float32x4_t = transmute!(*v); 76 | let result: uint16x4_t; 77 | asm!( 78 | "fcvtn {0:v}.4h, {1:v}.4s", 79 | out(vreg) result, 80 | in(vreg) vec, 81 | options(pure, nomem, nostack)); 82 | transmute!(result) 83 | } 84 | 85 | #[target_feature(enable = "fp16")] 86 | #[inline] 87 | pub(super) unsafe fn f16x4_to_f64x4_fp16(v: &[u16; 4]) -> [f64; 4] { 88 | let vec: uint16x4_t = transmute!(*v); 89 | let low: float64x2_t; 90 | let high: float64x2_t; 91 | asm!( 92 | "fcvtl {2:v}.4s, {3:v}.4h", // Convert to f32 93 | "fcvtl {0:v}.2d, {2:v}.2s", // Convert low part to f64 94 | "fcvtl2 {1:v}.2d, {2:v}.4s", // Convert high part to f64 95 | lateout(vreg) low, 96 | lateout(vreg) high, 97 | out(vreg) _, 98 | in(vreg) vec, 99 | options(pure, nomem, nostack)); 100 | transmute!([low, high]) 101 | } 102 | 103 | #[target_feature(enable = "fp16")] 104 | #[inline] 105 | pub(super) unsafe fn f64x4_to_f16x4_fp16(v: &[f64; 4]) -> [u16; 4] { 106 | let mut low = MaybeUninit::::uninit(); 107 | let mut high = MaybeUninit::::uninit(); 108 | ptr::copy_nonoverlapping(v.as_ptr(), low.as_mut_ptr().cast(), 2); 109 | ptr::copy_nonoverlapping(v[2..].as_ptr(), high.as_mut_ptr().cast(), 2); 110 | let result: uint16x4_t; 111 | asm!( 112 | "fcvtn {1:v}.2s, {2:v}.2d", // Convert low to f32 113 | "fcvtn2 {1:v}.4s, {3:v}.2d", // Convert high to f32 114 | "fcvtn {0:v}.4h, {1:v}.4s", // Convert to f16 115 | lateout(vreg) result, 116 | out(vreg) _, 117 | in(vreg) low.assume_init(), 118 | in(vreg) high.assume_init(), 119 | options(pure, nomem, nostack)); 120 | transmute!(result) 121 | } 122 | 123 | #[target_feature(enable = "fp16")] 124 | #[inline] 125 | pub(super) unsafe fn add_f16_fp16(a: u16, b: u16) -> u16 { 126 | let result: u16; 127 | asm!( 128 | "fadd {0:h}, {1:h}, {2:h}", 129 | out(vreg) result, 130 | in(vreg) a, 131 | in(vreg) b, 132 | options(pure, nomem, nostack)); 133 | result 134 | } 135 | 136 | #[target_feature(enable = "fp16")] 137 | #[inline] 138 | pub(super) unsafe fn subtract_f16_fp16(a: u16, b: u16) -> u16 { 139 | let result: u16; 140 | asm!( 141 | "fsub {0:h}, {1:h}, {2:h}", 142 | out(vreg) result, 143 | in(vreg) a, 144 | in(vreg) b, 145 | options(pure, nomem, nostack)); 146 | result 147 | } 148 | 149 | #[target_feature(enable = "fp16")] 150 | #[inline] 151 | pub(super) unsafe fn multiply_f16_fp16(a: u16, b: u16) -> u16 { 152 | let result: u16; 153 | asm!( 154 | "fmul {0:h}, {1:h}, {2:h}", 155 | out(vreg) result, 156 | in(vreg) a, 157 | in(vreg) b, 158 | options(pure, nomem, nostack)); 159 | result 160 | } 161 | 162 | #[target_feature(enable = "fp16")] 163 | #[inline] 164 | pub(super) unsafe fn divide_f16_fp16(a: u16, b: u16) -> u16 { 165 | let result: u16; 166 | asm!( 167 | "fdiv {0:h}, {1:h}, {2:h}", 168 | out(vreg) result, 169 | in(vreg) a, 170 | in(vreg) b, 171 | options(pure, nomem, nostack)); 172 | result 173 | } 174 | -------------------------------------------------------------------------------- /src/rand_distr.rs: -------------------------------------------------------------------------------- 1 | use crate::{bf16, f16}; 2 | 3 | use rand::{distr::Distribution, Rng}; 4 | use rand_distr::uniform::UniformFloat; 5 | 6 | macro_rules! impl_distribution_via_f32 { 7 | ($Ty:ty, $Distr:ty) => { 8 | impl Distribution<$Ty> for $Distr { 9 | fn sample(&self, rng: &mut R) -> $Ty { 10 | <$Ty>::from_f32(>::sample(self, rng)) 11 | } 12 | } 13 | }; 14 | } 15 | 16 | impl_distribution_via_f32!(f16, rand_distr::StandardUniform); 17 | impl_distribution_via_f32!(f16, rand_distr::StandardNormal); 18 | impl_distribution_via_f32!(f16, rand_distr::Exp1); 19 | impl_distribution_via_f32!(f16, rand_distr::Open01); 20 | impl_distribution_via_f32!(f16, rand_distr::OpenClosed01); 21 | 22 | impl_distribution_via_f32!(bf16, rand_distr::StandardUniform); 23 | impl_distribution_via_f32!(bf16, rand_distr::StandardNormal); 24 | impl_distribution_via_f32!(bf16, rand_distr::Exp1); 25 | impl_distribution_via_f32!(bf16, rand_distr::Open01); 26 | impl_distribution_via_f32!(bf16, rand_distr::OpenClosed01); 27 | 28 | impl rand::distr::weighted::Weight for f16 { 29 | const ZERO: Self = Self::ZERO; 30 | 31 | fn checked_add_assign(&mut self, v: &Self) -> Result<(), ()> { 32 | // Floats have an explicit representation for overflow 33 | *self += v; 34 | Ok(()) 35 | } 36 | } 37 | 38 | impl rand::distr::weighted::Weight for bf16 { 39 | const ZERO: Self = Self::ZERO; 40 | 41 | fn checked_add_assign(&mut self, v: &Self) -> Result<(), ()> { 42 | // Floats have an explicit representation for overflow 43 | *self += v; 44 | Ok(()) 45 | } 46 | } 47 | 48 | #[derive(Debug, Clone, Copy)] 49 | pub struct Float16Sampler(UniformFloat); 50 | 51 | impl rand_distr::uniform::SampleUniform for f16 { 52 | type Sampler = Float16Sampler; 53 | } 54 | 55 | impl rand_distr::uniform::UniformSampler for Float16Sampler { 56 | type X = f16; 57 | fn new(low: B1, high: B2) -> Result 58 | where 59 | B1: rand_distr::uniform::SampleBorrow + Sized, 60 | B2: rand_distr::uniform::SampleBorrow + Sized, 61 | { 62 | Ok(Self(UniformFloat::new( 63 | low.borrow().to_f32(), 64 | high.borrow().to_f32(), 65 | )?)) 66 | } 67 | fn new_inclusive(low: B1, high: B2) -> Result 68 | where 69 | B1: rand_distr::uniform::SampleBorrow + Sized, 70 | B2: rand_distr::uniform::SampleBorrow + Sized, 71 | { 72 | Ok(Self(UniformFloat::new_inclusive( 73 | low.borrow().to_f32(), 74 | high.borrow().to_f32(), 75 | )?)) 76 | } 77 | fn sample(&self, rng: &mut R) -> Self::X { 78 | f16::from_f32(self.0.sample(rng)) 79 | } 80 | } 81 | 82 | #[derive(Debug, Clone, Copy)] 83 | pub struct BFloat16Sampler(UniformFloat); 84 | 85 | impl rand_distr::uniform::SampleUniform for bf16 { 86 | type Sampler = BFloat16Sampler; 87 | } 88 | 89 | impl rand_distr::uniform::UniformSampler for BFloat16Sampler { 90 | type X = bf16; 91 | fn new(low: B1, high: B2) -> Result 92 | where 93 | B1: rand_distr::uniform::SampleBorrow + Sized, 94 | B2: rand_distr::uniform::SampleBorrow + Sized, 95 | { 96 | Ok(Self(UniformFloat::new( 97 | low.borrow().to_f32(), 98 | high.borrow().to_f32(), 99 | )?)) 100 | } 101 | fn new_inclusive(low: B1, high: B2) -> Result 102 | where 103 | B1: rand_distr::uniform::SampleBorrow + Sized, 104 | B2: rand_distr::uniform::SampleBorrow + Sized, 105 | { 106 | Ok(Self(UniformFloat::new_inclusive( 107 | low.borrow().to_f32(), 108 | high.borrow().to_f32(), 109 | )?)) 110 | } 111 | fn sample(&self, rng: &mut R) -> Self::X { 112 | bf16::from_f32(self.0.sample(rng)) 113 | } 114 | } 115 | 116 | #[cfg(test)] 117 | mod tests { 118 | use super::*; 119 | 120 | #[allow(unused_imports)] 121 | use rand::{rng, Rng}; 122 | use rand_distr::{StandardNormal, StandardUniform, Uniform}; 123 | 124 | #[test] 125 | fn test_sample_f16() { 126 | let mut rng = rng(); 127 | let _: f16 = rng.sample(StandardUniform); 128 | let _: f16 = rng.sample(StandardNormal); 129 | let _: f16 = rng.sample(Uniform::new(f16::from_f32(0.0), f16::from_f32(1.0)).unwrap()); 130 | #[cfg(feature = "num-traits")] 131 | let _: f16 = 132 | rng.sample(rand_distr::Normal::new(f16::from_f32(0.0), f16::from_f32(1.0)).unwrap()); 133 | } 134 | 135 | #[test] 136 | fn test_sample_bf16() { 137 | let mut rng = rng(); 138 | let _: bf16 = rng.sample(StandardUniform); 139 | let _: bf16 = rng.sample(StandardNormal); 140 | let _: bf16 = rng.sample(Uniform::new(bf16::from_f32(0.0), bf16::from_f32(1.0)).unwrap()); 141 | #[cfg(feature = "num-traits")] 142 | let _: bf16 = 143 | rng.sample(rand_distr::Normal::new(bf16::from_f32(0.0), bf16::from_f32(1.0)).unwrap()); 144 | } 145 | } 146 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # `f16` and `bf16` floating point types for Rust 2 | [![Crates.io](https://img.shields.io/crates/v/half.svg)](https://crates.io/crates/half/) [![Documentation](https://docs.rs/half/badge.svg)](https://docs.rs/half/) ![Crates.io](https://img.shields.io/crates/l/half) [![Build status](https://github.com/VoidStarKat/half-rs/actions/workflows/rust.yml/badge.svg?branch=main&event=push)](https://github.com/VoidStarKat/half-rs/actions/workflows/rust.yml) [![CircleCI](https://dl.circleci.com/status-badge/img/gh/VoidStarKat/half-rs/tree/main.svg?style=svg)](https://dl.circleci.com/status-badge/redirect/gh/VoidStarKat/half-rs/tree/main) 3 | 4 | This crate implements a half-precision floating point `f16` type for Rust implementing the IEEE 5 | 754-2008 standard [`binary16`](https://en.wikipedia.org/wiki/Half-precision_floating-point_format) 6 | a.k.a "half" format, as well as a `bf16` type implementing the 7 | [`bfloat16`](https://en.wikipedia.org/wiki/Bfloat16_floating-point_format) format. 8 | 9 | ## Usage 10 | 11 | The `f16` and `bf16` types attempt to match existing Rust floating point type functionality where possible, and provides both conversion operations (such as to/from `f32` and `f64`) and basic 12 | arithmetic operations. Hardware support for these operations will be used whenever hardware support 13 | is available—either through instrinsics or targeted assembly—although a nightly Rust toolchain may 14 | be required for some hardware. 15 | 16 | This crate provides [`no_std`](https://rust-embedded.github.io/book/intro/no-std.html) support so can easily be used in embedded code where a smaller float format is most useful. 17 | 18 | *Requires Rust 1.81 or greater.* If you need support for older versions of Rust, use previous 19 | versions of this crate. 20 | 21 | See the [crate documentation](https://docs.rs/half/) for more details. 22 | 23 | ### Optional Features 24 | 25 | - **`alloc`** — Enable use of the [`alloc`](https://doc.rust-lang.org/alloc/) crate when not using 26 | the `std` library. 27 | 28 | This enables the `vec` module, which contains zero-copy conversions for the `Vec` type. This 29 | allows fast conversion between raw `Vec` bits and `Vec` or `Vec` arrays, and vice 30 | versa. 31 | 32 | - **`std`** — Enable features that depend on the Rust `std` library, including everything in the 33 | `alloc` feature. 34 | 35 | Enabling the `std` feature enables runtime CPU feature detection of hardware support. 36 | Without this feature detection, harware is only used when compiler target supports them. 37 | 38 | - **`serde`** - Implement `Serialize` and `Deserialize` traits for `f16` and `bf16`. This adds a 39 | dependency on the [`serde`](https://crates.io/crates/serde) crate. 40 | 41 | - **`num-traits`** — Enable `ToPrimitive`, `FromPrimitive`, `ToBytes`, `FromBytes`, `Num`, `Float`, 42 | `FloatCore`, `Signed`, and `Bounded` trait implementations from the 43 | [`num-traits`](https://crates.io/crates/num-traits) crate. 44 | 45 | - **`bytemuck`** — Enable `Zeroable` and `Pod` trait implementations from the 46 | [`bytemuck`](https://crates.io/crates/bytemuck) crate. 47 | 48 | - **`rand_distr`** — Enable sampling from distributions like `StandardUniform` and `StandardNormal` 49 | from the [`rand_distr`](https://crates.io/crates/rand_distr) crate. 50 | 51 | - **`rkyv`** -- Enable zero-copy deserializtion with [`rkyv`](https://crates.io/crates/rkyv) crate. 52 | 53 | - **`aribtrary`** -- Enable fuzzing support with [`arbitrary`](https://crates.io/crates/arbitrary) 54 | crate by implementing `Arbitrary` trait. 55 | 56 | - **`nightly`** -- Enable nightly-only features (currently `loongarch64` intrinsics). 57 | 58 | ### Hardware support 59 | 60 | The following list details hardware support for floating point types in this crate. When using `std` 61 | library, runtime CPU target detection will be used. To get the most performance benefits, compile 62 | for specific CPU features which avoids the runtime overhead and works in a `no_std` environment. 63 | 64 | | Architecture | CPU Target Feature | Notes | 65 | | ------------ | ------------------ |--------------------------------------------------------------------------------------------------------------------------------------------------------| 66 | | `x86`/`x86_64` | `f16c` | This supports conversion to/from `f16` only (including vector SIMD) and does not support any `bf16` or arithmetic operations. | 67 | | `aarch64` | `fp16` | This supports all operations on `f16` only. | 68 | | `loongarch64` | `lsx` | (`nightly` feature only) This supports conversion to/from `f16` only (including vector SIMD) and does not support any `bf16` or arithmetic operations. | 69 | 70 | ### More Documentation 71 | 72 | - [Crate API Reference](https://docs.rs/half/) 73 | - [Latest Changes](CHANGELOG.md) 74 | 75 | ## License 76 | 77 | All files in this library are dual-licensed and distributed under the terms of either of: 78 | 79 | * [MIT License](LICENSE-MIT) 80 | ([http://opensource.org/licenses/MIT](http://opensource.org/licenses/MIT)) 81 | * [Apache License, Version 2.0](LICENSE-APACHE) 82 | ([http://www.apache.org/licenses/LICENSE-2.0](http://www.apache.org/licenses/LICENSE-2.0)) 83 | 84 | at your option. 85 | 86 | ### Contributing 87 | 88 | Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in the 89 | work by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any 90 | additional terms or conditions. 91 | -------------------------------------------------------------------------------- /src/bfloat/convert.rs: -------------------------------------------------------------------------------- 1 | use crate::leading_zeros::leading_zeros_u16; 2 | use zerocopy::transmute; 3 | 4 | #[inline] 5 | pub(crate) const fn f32_to_bf16(value: f32) -> u16 { 6 | // TODO: Replace transmute with to_bits() once to_bits is const-stabilized 7 | // Convert to raw bytes 8 | let x: u32 = transmute!(value); 9 | 10 | // check for NaN 11 | if x & 0x7FFF_FFFFu32 > 0x7F80_0000u32 { 12 | // Keep high part of current mantissa but also set most significiant mantissa bit 13 | return ((x >> 16) | 0x0040u32) as u16; 14 | } 15 | 16 | // round and shift 17 | let round_bit = 0x0000_8000u32; 18 | if (x & round_bit) != 0 && (x & (3 * round_bit - 1)) != 0 { 19 | (x >> 16) as u16 + 1 20 | } else { 21 | (x >> 16) as u16 22 | } 23 | } 24 | 25 | #[inline] 26 | pub(crate) const fn f64_to_bf16(value: f64) -> u16 { 27 | // TODO: Replace transmute with to_bits() once to_bits is const-stabilized 28 | // Convert to raw bytes, truncating the last 32-bits of mantissa; that precision will always 29 | // be lost on half-precision. 30 | let val: u64 = transmute!(value); 31 | let x = (val >> 32) as u32; 32 | 33 | // Extract IEEE754 components 34 | let sign = x & 0x8000_0000u32; 35 | let exp = x & 0x7FF0_0000u32; 36 | let man = x & 0x000F_FFFFu32; 37 | 38 | // Check for all exponent bits being set, which is Infinity or NaN 39 | if exp == 0x7FF0_0000u32 { 40 | // Set mantissa MSB for NaN (and also keep shifted mantissa bits). 41 | // We also have to check the last 32 bits. 42 | let nan_bit = if man == 0 && (val as u32 == 0) { 43 | 0 44 | } else { 45 | 0x0040u32 46 | }; 47 | return ((sign >> 16) | 0x7F80u32 | nan_bit | (man >> 13)) as u16; 48 | } 49 | 50 | // The number is normalized, start assembling half precision version 51 | let half_sign = sign >> 16; 52 | // Unbias the exponent, then bias for bfloat16 precision 53 | let unbiased_exp = ((exp >> 20) as i64) - 1023; 54 | let half_exp = unbiased_exp + 127; 55 | 56 | // Check for exponent overflow, return +infinity 57 | if half_exp >= 0xFF { 58 | return (half_sign | 0x7F80u32) as u16; 59 | } 60 | 61 | // Check for underflow 62 | if half_exp <= 0 { 63 | // Check mantissa for what we can do 64 | if 7 - half_exp > 21 { 65 | // No rounding possibility, so this is a full underflow, return signed zero 66 | return half_sign as u16; 67 | } 68 | // Don't forget about hidden leading mantissa bit when assembling mantissa 69 | let man = man | 0x0010_0000u32; 70 | let mut half_man = man >> (14 - half_exp); 71 | // Check for rounding 72 | let round_bit = 1 << (13 - half_exp); 73 | if (man & round_bit) != 0 && (man & (3 * round_bit - 1)) != 0 { 74 | half_man += 1; 75 | } 76 | // No exponent for subnormals 77 | return (half_sign | half_man) as u16; 78 | } 79 | 80 | // Rebias the exponent 81 | let half_exp = (half_exp as u32) << 7; 82 | let half_man = man >> 13; 83 | // Check for rounding 84 | let round_bit = 0x0000_1000u32; 85 | if (man & round_bit) != 0 && (man & (3 * round_bit - 1)) != 0 { 86 | // Round it 87 | ((half_sign | half_exp | half_man) + 1) as u16 88 | } else { 89 | (half_sign | half_exp | half_man) as u16 90 | } 91 | } 92 | 93 | #[inline] 94 | pub(crate) const fn bf16_to_f32(i: u16) -> f32 { 95 | // TODO: Replace transmute with from_bits() once from_bits is const-stabilized 96 | // If NaN, keep current mantissa but also set most significiant mantissa bit 97 | if i & 0x7FFFu16 > 0x7F80u16 { 98 | transmute!((i as u32 | 0x0040u32) << 16) 99 | } else { 100 | transmute!((i as u32) << 16) 101 | } 102 | } 103 | 104 | #[inline] 105 | pub(crate) const fn bf16_to_f64(i: u16) -> f64 { 106 | // TODO: Replace transmute with from_bits() once from_bits is const-stabilized 107 | // Check for signed zero 108 | if i & 0x7FFFu16 == 0 { 109 | return transmute!((i as u64) << 48); 110 | } 111 | 112 | let half_sign = (i & 0x8000u16) as u64; 113 | let half_exp = (i & 0x7F80u16) as u64; 114 | let half_man = (i & 0x007Fu16) as u64; 115 | 116 | // Check for an infinity or NaN when all exponent bits set 117 | if half_exp == 0x7F80u64 { 118 | // Check for signed infinity if mantissa is zero 119 | if half_man == 0 { 120 | return transmute!((half_sign << 48) | 0x7FF0_0000_0000_0000u64); 121 | } else { 122 | // NaN, keep current mantissa but also set most significiant mantissa bit 123 | return transmute!((half_sign << 48) | 0x7FF8_0000_0000_0000u64 | (half_man << 45)); 124 | } 125 | } 126 | 127 | // Calculate double-precision components with adjusted exponent 128 | let sign = half_sign << 48; 129 | // Unbias exponent 130 | let unbiased_exp = ((half_exp as i64) >> 7) - 127; 131 | 132 | // Check for subnormals, which will be normalized by adjusting exponent 133 | if half_exp == 0 { 134 | // Calculate how much to adjust the exponent by 135 | let e = leading_zeros_u16(half_man as u16) - 9; 136 | 137 | // Rebias and adjust exponent 138 | let exp = ((1023 - 127 - e) as u64) << 52; 139 | let man = (half_man << (46 + e)) & 0xF_FFFF_FFFF_FFFFu64; 140 | return transmute!(sign | exp | man); 141 | } 142 | // Rebias exponent for a normalized normal 143 | let exp = ((unbiased_exp + 1023) as u64) << 52; 144 | let man = (half_man & 0x007Fu64) << 45; 145 | transmute!(sign | exp | man) 146 | } 147 | -------------------------------------------------------------------------------- /benches/convert.rs: -------------------------------------------------------------------------------- 1 | use criterion::{black_box, criterion_group, criterion_main, Bencher, BenchmarkId, Criterion}; 2 | use half::prelude::*; 3 | use std::{f32, f64, iter}; 4 | 5 | const SIMD_LARGE_BENCH_SLICE_LEN: usize = 1024; 6 | 7 | fn bench_f32_to_f16(c: &mut Criterion) { 8 | let mut group = c.benchmark_group("Convert f16 From f32"); 9 | for val in &[ 10 | 0., 11 | -0., 12 | 1., 13 | f32::MIN, 14 | f32::MAX, 15 | f32::MIN_POSITIVE, 16 | f32::NEG_INFINITY, 17 | f32::INFINITY, 18 | f32::NAN, 19 | f32::consts::E, 20 | f32::consts::PI, 21 | ] { 22 | group.bench_with_input(BenchmarkId::new("f16::from_f32", val), val, |b, i| { 23 | b.iter(|| f16::from_f32(*i)) 24 | }); 25 | } 26 | } 27 | 28 | fn bench_f64_to_f16(c: &mut Criterion) { 29 | let mut group = c.benchmark_group("Convert f16 From f64"); 30 | for val in &[ 31 | 0., 32 | -0., 33 | 1., 34 | f64::MIN, 35 | f64::MAX, 36 | f64::MIN_POSITIVE, 37 | f64::NEG_INFINITY, 38 | f64::INFINITY, 39 | f64::NAN, 40 | f64::consts::E, 41 | f64::consts::PI, 42 | ] { 43 | group.bench_with_input(BenchmarkId::new("f16::from_f64", val), val, |b, i| { 44 | b.iter(|| f16::from_f64(*i)) 45 | }); 46 | } 47 | } 48 | 49 | fn bench_f16_to_f32(c: &mut Criterion) { 50 | let mut group = c.benchmark_group("Convert f16 to f32"); 51 | for val in &[ 52 | f16::ZERO, 53 | f16::NEG_ZERO, 54 | f16::ONE, 55 | f16::MIN, 56 | f16::MAX, 57 | f16::MIN_POSITIVE, 58 | f16::NEG_INFINITY, 59 | f16::INFINITY, 60 | f16::NAN, 61 | f16::E, 62 | f16::PI, 63 | ] { 64 | group.bench_with_input(BenchmarkId::new("f16::to_f32", val), val, |b, i| { 65 | b.iter(|| i.to_f32()) 66 | }); 67 | } 68 | } 69 | 70 | fn bench_f16_to_f64(c: &mut Criterion) { 71 | let mut group = c.benchmark_group("Convert f16 to f64"); 72 | for val in &[ 73 | f16::ZERO, 74 | f16::NEG_ZERO, 75 | f16::ONE, 76 | f16::MIN, 77 | f16::MAX, 78 | f16::MIN_POSITIVE, 79 | f16::NEG_INFINITY, 80 | f16::INFINITY, 81 | f16::NAN, 82 | f16::E, 83 | f16::PI, 84 | ] { 85 | group.bench_with_input(BenchmarkId::new("f16::to_f64", val), val, |b, i| { 86 | b.iter(|| i.to_f64()) 87 | }); 88 | } 89 | } 90 | 91 | criterion_group!( 92 | f16_sisd, 93 | bench_f32_to_f16, 94 | bench_f64_to_f16, 95 | bench_f16_to_f32, 96 | bench_f16_to_f64 97 | ); 98 | 99 | fn bench_slice_f32_to_f16(c: &mut Criterion) { 100 | let mut constant_buffer = [f16::ZERO; 11]; 101 | let constants = [ 102 | 0., 103 | -0., 104 | 1., 105 | f32::MIN, 106 | f32::MAX, 107 | f32::MIN_POSITIVE, 108 | f32::NEG_INFINITY, 109 | f32::INFINITY, 110 | f32::NAN, 111 | f32::consts::E, 112 | f32::consts::PI, 113 | ]; 114 | c.bench_function( 115 | "HalfFloatSliceExt::convert_from_f32_slice/constants", 116 | |b: &mut Bencher<'_>| { 117 | b.iter(|| black_box(&mut constant_buffer).convert_from_f32_slice(black_box(&constants))) 118 | }, 119 | ); 120 | 121 | let large: Vec<_> = iter::repeat(0) 122 | .enumerate() 123 | .map(|(i, _)| i as f32) 124 | .take(SIMD_LARGE_BENCH_SLICE_LEN) 125 | .collect(); 126 | let mut large_buffer = [f16::ZERO; SIMD_LARGE_BENCH_SLICE_LEN]; 127 | c.bench_function( 128 | "HalfFloatSliceExt::convert_from_f32_slice/large", 129 | |b: &mut Bencher<'_>| { 130 | b.iter(|| black_box(&mut large_buffer).convert_from_f32_slice(black_box(&large))) 131 | }, 132 | ); 133 | } 134 | 135 | fn bench_slice_f64_to_f16(c: &mut Criterion) { 136 | let mut constant_buffer = [f16::ZERO; 11]; 137 | let constants = [ 138 | 0., 139 | -0., 140 | 1., 141 | f64::MIN, 142 | f64::MAX, 143 | f64::MIN_POSITIVE, 144 | f64::NEG_INFINITY, 145 | f64::INFINITY, 146 | f64::NAN, 147 | f64::consts::E, 148 | f64::consts::PI, 149 | ]; 150 | c.bench_function( 151 | "HalfFloatSliceExt::convert_from_f64_slice/constants", 152 | |b: &mut Bencher<'_>| { 153 | b.iter(|| black_box(&mut constant_buffer).convert_from_f64_slice(black_box(&constants))) 154 | }, 155 | ); 156 | 157 | let large: Vec<_> = iter::repeat(0) 158 | .enumerate() 159 | .map(|(i, _)| i as f64) 160 | .take(SIMD_LARGE_BENCH_SLICE_LEN) 161 | .collect(); 162 | let mut large_buffer = [f16::ZERO; SIMD_LARGE_BENCH_SLICE_LEN]; 163 | c.bench_function( 164 | "HalfFloatSliceExt::convert_from_f64_slice/large", 165 | |b: &mut Bencher<'_>| { 166 | b.iter(|| black_box(&mut large_buffer).convert_from_f64_slice(black_box(&large))) 167 | }, 168 | ); 169 | } 170 | 171 | fn bench_slice_f16_to_f32(c: &mut Criterion) { 172 | let mut constant_buffer = [0f32; 11]; 173 | let constants = [ 174 | f16::ZERO, 175 | f16::NEG_ZERO, 176 | f16::ONE, 177 | f16::MIN, 178 | f16::MAX, 179 | f16::MIN_POSITIVE, 180 | f16::NEG_INFINITY, 181 | f16::INFINITY, 182 | f16::NAN, 183 | f16::E, 184 | f16::PI, 185 | ]; 186 | c.bench_function( 187 | "HalfFloatSliceExt::convert_to_f32_slice/constants", 188 | |b: &mut Bencher<'_>| { 189 | b.iter(|| black_box(&constants).convert_to_f32_slice(black_box(&mut constant_buffer))) 190 | }, 191 | ); 192 | 193 | let large: Vec<_> = iter::repeat(0) 194 | .enumerate() 195 | .map(|(i, _)| f16::from_f32(i as f32)) 196 | .take(SIMD_LARGE_BENCH_SLICE_LEN) 197 | .collect(); 198 | let mut large_buffer = [0f32; SIMD_LARGE_BENCH_SLICE_LEN]; 199 | c.bench_function( 200 | "HalfFloatSliceExt::convert_to_f32_slice/large", 201 | |b: &mut Bencher<'_>| { 202 | b.iter(|| black_box(&large).convert_to_f32_slice(black_box(&mut large_buffer))) 203 | }, 204 | ); 205 | } 206 | 207 | fn bench_slice_f16_to_f64(c: &mut Criterion) { 208 | let mut constant_buffer = [0f64; 11]; 209 | let constants = [ 210 | f16::ZERO, 211 | f16::NEG_ZERO, 212 | f16::ONE, 213 | f16::MIN, 214 | f16::MAX, 215 | f16::MIN_POSITIVE, 216 | f16::NEG_INFINITY, 217 | f16::INFINITY, 218 | f16::NAN, 219 | f16::E, 220 | f16::PI, 221 | ]; 222 | c.bench_function( 223 | "HalfFloatSliceExt::convert_to_f64_slice/constants", 224 | |b: &mut Bencher<'_>| { 225 | b.iter(|| black_box(&constants).convert_to_f64_slice(black_box(&mut constant_buffer))) 226 | }, 227 | ); 228 | 229 | let large: Vec<_> = iter::repeat(0) 230 | .enumerate() 231 | .map(|(i, _)| f16::from_f64(i as f64)) 232 | .take(SIMD_LARGE_BENCH_SLICE_LEN) 233 | .collect(); 234 | let mut large_buffer = [0f64; SIMD_LARGE_BENCH_SLICE_LEN]; 235 | c.bench_function( 236 | "HalfFloatSliceExt::convert_to_f64_slice/large", 237 | |b: &mut Bencher<'_>| { 238 | b.iter(|| black_box(&large).convert_to_f64_slice(black_box(&mut large_buffer))) 239 | }, 240 | ); 241 | } 242 | 243 | criterion_group!( 244 | f16_simd, 245 | bench_slice_f32_to_f16, 246 | bench_slice_f64_to_f16, 247 | bench_slice_f16_to_f32, 248 | bench_slice_f16_to_f64 249 | ); 250 | 251 | fn bench_f32_to_bf16(c: &mut Criterion) { 252 | let mut group = c.benchmark_group("Convert bf16 From f32"); 253 | for val in &[ 254 | 0., 255 | -0., 256 | 1., 257 | f32::MIN, 258 | f32::MAX, 259 | f32::MIN_POSITIVE, 260 | f32::NEG_INFINITY, 261 | f32::INFINITY, 262 | f32::NAN, 263 | f32::consts::E, 264 | f32::consts::PI, 265 | ] { 266 | group.bench_with_input(BenchmarkId::new("bf16::from_f32", val), val, |b, i| { 267 | b.iter(|| bf16::from_f32(*i)) 268 | }); 269 | } 270 | } 271 | 272 | fn bench_f64_to_bf16(c: &mut Criterion) { 273 | let mut group = c.benchmark_group("Convert bf16 From f64"); 274 | for val in &[ 275 | 0., 276 | -0., 277 | 1., 278 | f64::MIN, 279 | f64::MAX, 280 | f64::MIN_POSITIVE, 281 | f64::NEG_INFINITY, 282 | f64::INFINITY, 283 | f64::NAN, 284 | f64::consts::E, 285 | f64::consts::PI, 286 | ] { 287 | group.bench_with_input(BenchmarkId::new("bf16::from_f64", val), val, |b, i| { 288 | b.iter(|| bf16::from_f64(*i)) 289 | }); 290 | } 291 | } 292 | 293 | fn bench_bf16_to_f32(c: &mut Criterion) { 294 | let mut group = c.benchmark_group("Convert bf16 to f32"); 295 | for val in &[ 296 | bf16::ZERO, 297 | bf16::NEG_ZERO, 298 | bf16::ONE, 299 | bf16::MIN, 300 | bf16::MAX, 301 | bf16::MIN_POSITIVE, 302 | bf16::NEG_INFINITY, 303 | bf16::INFINITY, 304 | bf16::NAN, 305 | bf16::E, 306 | bf16::PI, 307 | ] { 308 | group.bench_with_input(BenchmarkId::new("bf16::to_f32", val), val, |b, i| { 309 | b.iter(|| i.to_f32()) 310 | }); 311 | } 312 | } 313 | 314 | fn bench_bf16_to_f64(c: &mut Criterion) { 315 | let mut group = c.benchmark_group("Convert bf16 to f64"); 316 | for val in &[ 317 | bf16::ZERO, 318 | bf16::NEG_ZERO, 319 | bf16::ONE, 320 | bf16::MIN, 321 | bf16::MAX, 322 | bf16::MIN_POSITIVE, 323 | bf16::NEG_INFINITY, 324 | bf16::INFINITY, 325 | bf16::NAN, 326 | bf16::E, 327 | bf16::PI, 328 | ] { 329 | group.bench_with_input(BenchmarkId::new("bf16::to_f64", val), val, |b, i| { 330 | b.iter(|| i.to_f64()) 331 | }); 332 | } 333 | } 334 | 335 | criterion_group!( 336 | bf16_sisd, 337 | bench_f32_to_bf16, 338 | bench_f64_to_bf16, 339 | bench_bf16_to_f32, 340 | bench_bf16_to_f64 341 | ); 342 | 343 | criterion_main!(f16_sisd, bf16_sisd, f16_simd); 344 | -------------------------------------------------------------------------------- /LICENSE-APACHE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | -------------------------------------------------------------------------------- /src/vec.rs: -------------------------------------------------------------------------------- 1 | //! Contains utility functions and traits to convert between vectors of [`u16`] bits and [`struct@f16`] or 2 | //! [`bf16`] vectors. 3 | //! 4 | //! The utility [`HalfBitsVecExt`] sealed extension trait is implemented for [`Vec`] vectors, 5 | //! while the utility [`HalfFloatVecExt`] sealed extension trait is implemented for both 6 | //! [`Vec`] and [`Vec`] vectors. These traits provide efficient conversions and 7 | //! reinterpret casting of larger buffers of floating point values, and are automatically included 8 | //! in the [`prelude`][crate::prelude] module. 9 | //! 10 | //! This module is only available with the `std` or `alloc` feature. 11 | 12 | use super::{bf16, f16, slice::HalfFloatSliceExt}; 13 | #[cfg(feature = "alloc")] 14 | #[allow(unused_imports)] 15 | use alloc::{vec, vec::Vec}; 16 | use core::mem; 17 | 18 | /// Extensions to [`Vec`] and [`Vec`] to support reinterpret operations. 19 | /// 20 | /// This trait is sealed and cannot be implemented outside of this crate. 21 | pub trait HalfFloatVecExt: private::SealedHalfFloatVec { 22 | /// Reinterprets a vector of [`struct@f16`]or [`bf16`] numbers as a vector of [`u16`] bits. 23 | /// 24 | /// This is a zero-copy operation. The reinterpreted vector has the same memory location as 25 | /// `self`. 26 | /// 27 | /// # Examples 28 | /// 29 | /// ```rust 30 | /// # use half::prelude::*; 31 | /// let float_buffer = vec![f16::from_f32(1.), f16::from_f32(2.), f16::from_f32(3.)]; 32 | /// let int_buffer = float_buffer.reinterpret_into(); 33 | /// 34 | /// assert_eq!(int_buffer, [f16::from_f32(1.).to_bits(), f16::from_f32(2.).to_bits(), f16::from_f32(3.).to_bits()]); 35 | /// ``` 36 | #[must_use] 37 | fn reinterpret_into(self) -> Vec; 38 | 39 | /// Converts all of the elements of a `[f32]` slice into a new [`struct@f16`] or [`bf16`] vector. 40 | /// 41 | /// The conversion operation is vectorized over the slice, meaning the conversion may be more 42 | /// efficient than converting individual elements on some hardware that supports SIMD 43 | /// conversions. See [crate documentation][crate] for more information on hardware conversion 44 | /// support. 45 | /// 46 | /// # Examples 47 | /// ```rust 48 | /// # use half::prelude::*; 49 | /// let float_values = [1., 2., 3., 4.]; 50 | /// let vec: Vec = Vec::from_f32_slice(&float_values); 51 | /// 52 | /// assert_eq!(vec, vec![f16::from_f32(1.), f16::from_f32(2.), f16::from_f32(3.), f16::from_f32(4.)]); 53 | /// ``` 54 | #[must_use] 55 | fn from_f32_slice(slice: &[f32]) -> Self; 56 | 57 | /// Converts all of the elements of a `[f64]` slice into a new [`struct@f16`] or [`bf16`] vector. 58 | /// 59 | /// The conversion operation is vectorized over the slice, meaning the conversion may be more 60 | /// efficient than converting individual elements on some hardware that supports SIMD 61 | /// conversions. See [crate documentation][crate] for more information on hardware conversion 62 | /// support. 63 | /// 64 | /// # Examples 65 | /// ```rust 66 | /// # use half::prelude::*; 67 | /// let float_values = [1., 2., 3., 4.]; 68 | /// let vec: Vec = Vec::from_f64_slice(&float_values); 69 | /// 70 | /// assert_eq!(vec, vec![f16::from_f64(1.), f16::from_f64(2.), f16::from_f64(3.), f16::from_f64(4.)]); 71 | /// ``` 72 | #[must_use] 73 | fn from_f64_slice(slice: &[f64]) -> Self; 74 | } 75 | 76 | /// Extensions to [`Vec`] to support reinterpret operations. 77 | /// 78 | /// This trait is sealed and cannot be implemented outside of this crate. 79 | pub trait HalfBitsVecExt: private::SealedHalfBitsVec { 80 | /// Reinterprets a vector of [`u16`] bits as a vector of [`struct@f16`] or [`bf16`] numbers. 81 | /// 82 | /// `H` is the type to cast to, and must be either the [`struct@f16`] or [`bf16`] type. 83 | /// 84 | /// This is a zero-copy operation. The reinterpreted vector has the same memory location as 85 | /// `self`. 86 | /// 87 | /// # Examples 88 | /// 89 | /// ```rust 90 | /// # use half::prelude::*; 91 | /// let int_buffer = vec![f16::from_f32(1.).to_bits(), f16::from_f32(2.).to_bits(), f16::from_f32(3.).to_bits()]; 92 | /// let float_buffer = int_buffer.reinterpret_into::(); 93 | /// 94 | /// assert_eq!(float_buffer, [f16::from_f32(1.), f16::from_f32(2.), f16::from_f32(3.)]); 95 | /// ``` 96 | #[must_use] 97 | fn reinterpret_into(self) -> Vec 98 | where 99 | H: crate::private::SealedHalf; 100 | } 101 | 102 | mod private { 103 | use crate::{bf16, f16}; 104 | #[cfg(feature = "alloc")] 105 | #[allow(unused_imports)] 106 | use alloc::vec::Vec; 107 | 108 | pub trait SealedHalfFloatVec {} 109 | impl SealedHalfFloatVec for Vec {} 110 | impl SealedHalfFloatVec for Vec {} 111 | 112 | pub trait SealedHalfBitsVec {} 113 | impl SealedHalfBitsVec for Vec {} 114 | } 115 | 116 | impl HalfFloatVecExt for Vec { 117 | #[inline] 118 | fn reinterpret_into(mut self) -> Vec { 119 | // An f16 array has same length and capacity as u16 array 120 | let length = self.len(); 121 | let capacity = self.capacity(); 122 | 123 | // Actually reinterpret the contents of the Vec as u16, 124 | // knowing that structs are represented as only their members in memory, 125 | // which is the u16 part of `f16(u16)` 126 | let pointer = self.as_mut_ptr() as *mut u16; 127 | 128 | // Prevent running a destructor on the old Vec, so the pointer won't be deleted 129 | mem::forget(self); 130 | 131 | // Finally construct a new Vec from the raw pointer 132 | // SAFETY: We are reconstructing full length and capacity of original vector, 133 | // using its original pointer, and the size of elements are identical. 134 | unsafe { Vec::from_raw_parts(pointer, length, capacity) } 135 | } 136 | 137 | #[allow(clippy::uninit_vec)] 138 | fn from_f32_slice(slice: &[f32]) -> Self { 139 | let mut vec = vec![f16::from_bits(0); slice.len()]; 140 | vec.convert_from_f32_slice(slice); 141 | vec 142 | } 143 | 144 | #[allow(clippy::uninit_vec)] 145 | fn from_f64_slice(slice: &[f64]) -> Self { 146 | let mut vec = vec![f16::from_bits(0); slice.len()]; 147 | vec.convert_from_f64_slice(slice); 148 | vec 149 | } 150 | } 151 | 152 | impl HalfFloatVecExt for Vec { 153 | #[inline] 154 | fn reinterpret_into(mut self) -> Vec { 155 | // An f16 array has same length and capacity as u16 array 156 | let length = self.len(); 157 | let capacity = self.capacity(); 158 | 159 | // Actually reinterpret the contents of the Vec as u16, 160 | // knowing that structs are represented as only their members in memory, 161 | // which is the u16 part of `f16(u16)` 162 | let pointer = self.as_mut_ptr() as *mut u16; 163 | 164 | // Prevent running a destructor on the old Vec, so the pointer won't be deleted 165 | mem::forget(self); 166 | 167 | // Finally construct a new Vec from the raw pointer 168 | // SAFETY: We are reconstructing full length and capacity of original vector, 169 | // using its original pointer, and the size of elements are identical. 170 | unsafe { Vec::from_raw_parts(pointer, length, capacity) } 171 | } 172 | 173 | #[allow(clippy::uninit_vec)] 174 | fn from_f32_slice(slice: &[f32]) -> Self { 175 | let mut vec = vec![bf16::from_bits(0); slice.len()]; 176 | vec.convert_from_f32_slice(slice); 177 | vec 178 | } 179 | 180 | #[allow(clippy::uninit_vec)] 181 | fn from_f64_slice(slice: &[f64]) -> Self { 182 | let mut vec = vec![bf16::from_bits(0); slice.len()]; 183 | vec.convert_from_f64_slice(slice); 184 | vec 185 | } 186 | } 187 | 188 | impl HalfBitsVecExt for Vec { 189 | // This is safe because all traits are sealed 190 | #[inline] 191 | fn reinterpret_into(mut self) -> Vec 192 | where 193 | H: crate::private::SealedHalf, 194 | { 195 | // An f16 array has same length and capacity as u16 array 196 | let length = self.len(); 197 | let capacity = self.capacity(); 198 | 199 | // Actually reinterpret the contents of the Vec as f16, 200 | // knowing that structs are represented as only their members in memory, 201 | // which is the u16 part of `f16(u16)` 202 | let pointer = self.as_mut_ptr() as *mut H; 203 | 204 | // Prevent running a destructor on the old Vec, so the pointer won't be deleted 205 | mem::forget(self); 206 | 207 | // Finally construct a new Vec from the raw pointer 208 | // SAFETY: We are reconstructing full length and capacity of original vector, 209 | // using its original pointer, and the size of elements are identical. 210 | unsafe { Vec::from_raw_parts(pointer, length, capacity) } 211 | } 212 | } 213 | 214 | #[cfg(test)] 215 | mod test { 216 | use super::{HalfBitsVecExt, HalfFloatVecExt}; 217 | use crate::{bf16, f16}; 218 | #[cfg(all(feature = "alloc", not(feature = "std")))] 219 | use alloc::vec; 220 | 221 | #[test] 222 | fn test_vec_conversions_f16() { 223 | let numbers = vec![f16::E, f16::PI, f16::EPSILON, f16::FRAC_1_SQRT_2]; 224 | let bits = vec![ 225 | f16::E.to_bits(), 226 | f16::PI.to_bits(), 227 | f16::EPSILON.to_bits(), 228 | f16::FRAC_1_SQRT_2.to_bits(), 229 | ]; 230 | let bits_cloned = bits.clone(); 231 | 232 | // Convert from bits to numbers 233 | let from_bits = bits.reinterpret_into::(); 234 | assert_eq!(&from_bits[..], &numbers[..]); 235 | 236 | // Convert from numbers back to bits 237 | let to_bits = from_bits.reinterpret_into(); 238 | assert_eq!(&to_bits[..], &bits_cloned[..]); 239 | } 240 | 241 | #[test] 242 | fn test_vec_conversions_bf16() { 243 | let numbers = vec![bf16::E, bf16::PI, bf16::EPSILON, bf16::FRAC_1_SQRT_2]; 244 | let bits = vec![ 245 | bf16::E.to_bits(), 246 | bf16::PI.to_bits(), 247 | bf16::EPSILON.to_bits(), 248 | bf16::FRAC_1_SQRT_2.to_bits(), 249 | ]; 250 | let bits_cloned = bits.clone(); 251 | 252 | // Convert from bits to numbers 253 | let from_bits = bits.reinterpret_into::(); 254 | assert_eq!(&from_bits[..], &numbers[..]); 255 | 256 | // Convert from numbers back to bits 257 | let to_bits = from_bits.reinterpret_into(); 258 | assert_eq!(&to_bits[..], &bits_cloned[..]); 259 | } 260 | } 261 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | //! A crate that provides support for half-precision 16-bit floating point types. 2 | //! 3 | //! This crate provides the [`struct@f16`] type, which is an implementation of the IEEE 754-2008 standard 4 | //! [`binary16`] a.k.a "half" floating point type. This 16-bit floating point type is intended for 5 | //! efficient storage where the full range and precision of a larger floating point value is not 6 | //! required. This is especially useful for image storage formats. 7 | //! 8 | //! This crate also provides a [`struct@bf16`] type, an alternative 16-bit floating point format. The 9 | //! [`bfloat16`] format is a truncated IEEE 754 standard `binary32` float that preserves the 10 | //! exponent to allow the same range as [`f32`] but with only 8 bits of precision (instead of 11 11 | //! bits for [`struct@f16`]). See the [`struct@bf16`] type for details. 12 | //! 13 | //! Because [`struct@f16`] and [`struct@bf16`] are primarily for efficient storage, floating point operations such 14 | //! as addition, multiplication, etc. are not always implemented by hardware. When hardware does not 15 | //! support these operations, this crate emulates them by converting the value to 16 | //! [`f32`] before performing the operation and then back afterward. 17 | //! 18 | //! Note that conversion from [`f32`]/[`f64`] to both [`struct@f16`] and [`struct@bf16`] are lossy operations, and 19 | //! just as converting a [`f64`] to [`f32`] is lossy and does not have `Into`/`From` trait 20 | //! implementations, so too do these smaller types not have those trait implementations either. 21 | //! Instead, use `from_f32`/`from_f64` functions for the types in this crate. If you don't care 22 | //! about lossy conversions and need trait conversions, use the appropriate [`num-traits`] 23 | //! traits that are implemented. 24 | //! 25 | //! This crate also provides a [`slice`][mod@slice] module for zero-copy in-place conversions of 26 | //! [`u16`] slices to both [`struct@f16`] and [`struct@bf16`], as well as efficient vectorized conversions of 27 | //! larger buffers of floating point values to and from these half formats. 28 | //! 29 | //! The crate supports `#[no_std]` when the `std` cargo feature is not enabled, so can be used in 30 | //! embedded environments without using the Rust [`std`] library. The `std` feature enables support 31 | //! for the standard library and is enabled by default, see the [Cargo Features](#cargo-features) 32 | //! section below. 33 | //! 34 | //! A [`prelude`] module is provided for easy importing of available utility traits. 35 | //! 36 | //! # Serialization 37 | //! 38 | //! When the `serde` feature is enabled, [`struct@f16`] and [`struct@bf16`] will be serialized as a newtype of 39 | //! [`u16`] by default. In binary formats this is ideal, as it will generally use just two bytes for 40 | //! storage. For string formats like JSON, however, this isn't as useful, and due to design 41 | //! limitations of serde, it's not possible for the default `Serialize` implementation to support 42 | //! different serialization for different formats. 43 | //! 44 | //! Instead, it's up to the containter type of the floats to control how it is serialized. This can 45 | //! easily be controlled when using the derive macros using `#[serde(serialize_with="")]` 46 | //! attributes. For both [`struct@f16`] and [`struct@bf16`] a `serialize_as_f32` and `serialize_as_string` are 47 | //! provided for use with this attribute. 48 | //! 49 | //! Deserialization of both float types supports deserializing from the default serialization, 50 | //! strings, and `f32`/`f64` values, so no additional work is required. 51 | //! 52 | //! # Hardware support 53 | //! 54 | //! Hardware support for these conversions and arithmetic will be used 55 | //! whenever hardware support is available—either through instrinsics or targeted assembly—although 56 | //! a nightly Rust toolchain may be required for some hardware. When hardware supports it the 57 | //! functions and traits in the [`slice`][mod@slice] and [`vec`] modules will also use vectorized 58 | //! SIMD intructions for increased efficiency. 59 | //! 60 | //! The following list details hardware support for floating point types in this crate. When using 61 | //! `std` cargo feature, runtime CPU target detection will be used. To get the most performance 62 | //! benefits, compile for specific CPU features which avoids the runtime overhead and works in a 63 | //! `no_std` environment. 64 | //! 65 | //! | Architecture | CPU Target Feature | Notes | 66 | //! | ------------ | ------------------ | ----- | 67 | //! | `x86`/`x86_64` | `f16c` | This supports conversion to/from [`struct@f16`] only (including vector SIMD) and does not support any [`struct@bf16`] or arithmetic operations. | 68 | //! | `aarch64` | `fp16` | This supports all operations on [`struct@f16`] only. | 69 | //! | `loongarch64` | `lsx` | This supports conversion to/from [`struct@f16`] only (including vector SIMD) and does not support any [`struct@bf16`] or arithmetic operations. | 70 | //! 71 | //! # Cargo Features 72 | //! 73 | //! This crate supports a number of optional cargo features. None of these features are enabled by 74 | //! default, even `std`. 75 | //! 76 | //! - **`alloc`** — Enable use of the [`alloc`] crate when not using the `std` library. 77 | //! 78 | //! Among other functions, this enables the [`vec`] module, which contains zero-copy 79 | //! conversions for the [`Vec`] type. This allows fast conversion between raw `Vec` bits and 80 | //! `Vec` or `Vec` arrays, and vice versa. 81 | //! 82 | //! - **`std`** — Enable features that depend on the Rust [`std`] library. This also enables the 83 | //! `alloc` feature automatically. 84 | //! 85 | //! Enabling the `std` feature enables runtime CPU feature detection of hardware support. 86 | //! Without this feature detection, harware is only used when compiler target supports them. 87 | //! 88 | //! - **`serde`** — Adds support for the [`serde`] crate by implementing [`Serialize`] and 89 | //! [`Deserialize`] traits for both [`struct@f16`] and [`struct@bf16`]. 90 | //! 91 | //! - **`num-traits`** — Adds support for the [`num-traits`] crate by implementing [`ToPrimitive`], 92 | //! [`FromPrimitive`], [`ToBytes`], `FromBytes`, [`AsPrimitive`], [`Num`], [`Float`], 93 | //! [`FloatCore`], [`Signed`], and [`Bounded`] traits for both [`struct@f16`] and [`struct@bf16`]. 94 | //! 95 | //! - **`bytemuck`** — Adds support for the [`bytemuck`] crate by implementing [`Zeroable`] and 96 | //! [`Pod`] traits for both [`struct@f16`] and [`struct@bf16`]. 97 | //! 98 | //! - **`rand_distr`** — Adds support for the [`rand_distr`] crate by implementing [`Distribution`] 99 | //! and other traits for both [`struct@f16`] and [`struct@bf16`]. 100 | //! 101 | //! - **`rkyv`** -- Enable zero-copy deserializtion with [`rkyv`] crate. 102 | //! 103 | //! - **`aribtrary`** -- Enable fuzzing support with [`arbitrary`] crate by implementing 104 | //! [`Arbitrary`] trait. 105 | //! 106 | //! - **`nightly`** -- Enable nightly-only features. 107 | //! 108 | //! [`alloc`]: https://doc.rust-lang.org/alloc/ 109 | //! [`std`]: https://doc.rust-lang.org/std/ 110 | //! [`binary16`]: https://en.wikipedia.org/wiki/Half-precision_floating-point_format 111 | //! [`bfloat16`]: https://en.wikipedia.org/wiki/Bfloat16_floating-point_format 112 | //! [`serde`]: https://crates.io/crates/serde 113 | //! [`bytemuck`]: https://crates.io/crates/bytemuck 114 | //! [`num-traits`]: https://crates.io/crates/num-traits 115 | //! [`zerocopy`]: https://crates.io/crates/zerocopy 116 | //! [`rand_distr`]: https://crates.io/crates/rand_distr 117 | //! [`rkyv`]: (https://crates.io/crates/rkyv) 118 | //! [`arbitrary`]: (https://crates.io/crates/arbitrary) 119 | #![cfg_attr( 120 | feature = "alloc", 121 | doc = " 122 | [`vec`]: mod@vec" 123 | )] 124 | #![cfg_attr( 125 | not(feature = "alloc"), 126 | doc = " 127 | [`vec`]: # 128 | [`Vec`]: https://docs.rust-lang.org/stable/alloc/vec/struct.Vec.html" 129 | )] 130 | #![cfg_attr( 131 | feature = "serde", 132 | doc = " 133 | [`Serialize`]: serde::Serialize 134 | [`Deserialize`]: serde::Deserialize" 135 | )] 136 | #![cfg_attr( 137 | not(feature = "serde"), 138 | doc = " 139 | [`Serialize`]: https://docs.rs/serde/*/serde/trait.Serialize.html 140 | [`Deserialize`]: https://docs.rs/serde/*/serde/trait.Deserialize.html" 141 | )] 142 | #![cfg_attr( 143 | feature = "num-traits", 144 | doc = " 145 | [`ToPrimitive`]: ::num_traits::ToPrimitive 146 | [`FromPrimitive`]: ::num_traits::FromPrimitive 147 | [`ToBytes`]: ::num_traits::ToBytes 148 | [`AsPrimitive`]: ::num_traits::AsPrimitive 149 | [`Num`]: ::num_traits::Num 150 | [`Float`]: ::num_traits::Float 151 | [`FloatCore`]: ::num_traits::float::FloatCore 152 | [`Signed`]: ::num_traits::Signed 153 | [`Bounded`]: ::num_traits::Bounded" 154 | )] 155 | #![cfg_attr( 156 | not(feature = "num-traits"), 157 | doc = " 158 | [`ToPrimitive`]: https://docs.rs/num-traits/*/num_traits/cast/trait.ToPrimitive.html 159 | [`FromPrimitive`]: https://docs.rs/num-traits/*/num_traits/cast/trait.FromPrimitive.html 160 | [`ToBytes`]: https://docs.rs/num-traits/*/num_traits/ops/bytes/trait.ToBytes.html 161 | [`AsPrimitive`]: https://docs.rs/num-traits/*/num_traits/cast/trait.AsPrimitive.html 162 | [`Num`]: https://docs.rs/num-traits/*/num_traits/trait.Num.html 163 | [`Float`]: https://docs.rs/num-traits/*/num_traits/float/trait.Float.html 164 | [`FloatCore`]: https://docs.rs/num-traits/*/num_traits/float/trait.FloatCore.html 165 | [`Bounded`]: https://docs.rs/num-traits/*/num_traits/bounds/trait.Bounded.html" 166 | )] 167 | #![cfg_attr( 168 | feature = "bytemuck", 169 | doc = " 170 | [`Zeroable`]: bytemuck::Zeroable 171 | [`Pod`]: bytemuck::Pod" 172 | )] 173 | #![cfg_attr( 174 | not(feature = "bytemuck"), 175 | doc = " 176 | [`Zeroable`]: https://docs.rs/bytemuck/*/bytemuck/trait.Zeroable.html 177 | [`Pod`]: https://docs.rs/bytemuck/*bytemuck/trait.Pod.html" 178 | )] 179 | #![cfg_attr( 180 | feature = "zerocopy", 181 | doc = " 182 | [`IntoBytes`]: zerocopy::IntoBytes 183 | [`FromBytes`]: zerocopy::FromBytes" 184 | )] 185 | #![cfg_attr( 186 | not(feature = "zerocopy"), 187 | doc = " 188 | [`IntoBytes`]: https://docs.rs/zerocopy/*/zerocopy/trait.IntoBytes.html 189 | [`FromBytes`]: https://docs.rs/zerocopy/*/zerocopy/trait.FromBytes.html" 190 | )] 191 | #![cfg_attr( 192 | feature = "rand_distr", 193 | doc = " 194 | [`Distribution`]: rand::distr::Distribution" 195 | )] 196 | #![cfg_attr( 197 | not(feature = "rand_distr"), 198 | doc = " 199 | [`Distribution`]: https://docs.rs/rand/*/rand/distr/trait.Distribution.html" 200 | )] 201 | #![cfg_attr( 202 | feature = "arbitrary", 203 | doc = " 204 | [`Arbitrary`]: arbitrary::Arbitrary" 205 | )] 206 | #![cfg_attr( 207 | not(feature = "arbitrary"), 208 | doc = " 209 | [`Arbitrary`]: https://docs.rs/arbitrary/*/arbitrary/trait.Arbitrary.html" 210 | )] 211 | #![warn( 212 | missing_docs, 213 | missing_copy_implementations, 214 | trivial_numeric_casts, 215 | future_incompatible 216 | )] 217 | #![cfg_attr(not(target_arch = "spirv"), warn(missing_debug_implementations))] 218 | #![cfg_attr( 219 | all(feature = "nightly", target_arch = "loongarch64"), 220 | feature( 221 | stdarch_loongarch, 222 | stdarch_loongarch_feature_detection, 223 | loongarch_target_feature 224 | ) 225 | )] 226 | #![allow(clippy::verbose_bit_mask, clippy::cast_lossless, unexpected_cfgs)] 227 | #![cfg_attr(not(feature = "std"), no_std)] 228 | #![doc(html_root_url = "https://docs.rs/half/2.7.1")] 229 | #![doc(test(attr(deny(warnings), allow(unused))))] 230 | // Until updated to use newly stabilized `from_bits`, disable new lint warning about the transmutes 231 | #![allow(unknown_lints, unnecessary_transmutes)] 232 | #![warn(unknown_lints)] 233 | 234 | #[cfg(feature = "alloc")] 235 | extern crate alloc; 236 | 237 | mod bfloat; 238 | mod binary16; 239 | mod leading_zeros; 240 | #[cfg(feature = "num-traits")] 241 | mod num_traits; 242 | 243 | #[cfg(not(target_arch = "spirv"))] 244 | pub mod slice; 245 | #[cfg(feature = "alloc")] 246 | pub mod vec; 247 | 248 | pub use bfloat::bf16; 249 | pub use binary16::f16; 250 | 251 | #[cfg(feature = "rand_distr")] 252 | mod rand_distr; 253 | 254 | /// A collection of the most used items and traits in this crate for easy importing. 255 | /// 256 | /// # Examples 257 | /// 258 | /// ```rust 259 | /// use half::prelude::*; 260 | /// ``` 261 | pub mod prelude { 262 | #[doc(no_inline)] 263 | pub use crate::{bf16, f16}; 264 | 265 | #[cfg(not(target_arch = "spirv"))] 266 | #[doc(no_inline)] 267 | pub use crate::slice::{HalfBitsSliceExt, HalfFloatSliceExt}; 268 | 269 | #[cfg(feature = "alloc")] 270 | #[doc(no_inline)] 271 | pub use crate::vec::{HalfBitsVecExt, HalfFloatVecExt}; 272 | } 273 | 274 | // Keep this module private to crate 275 | mod private { 276 | use crate::{bf16, f16}; 277 | use zerocopy::{FromBytes, Immutable, IntoBytes}; 278 | 279 | pub trait SealedHalf: FromBytes + IntoBytes + Immutable {} 280 | 281 | impl SealedHalf for f16 {} 282 | impl SealedHalf for bf16 {} 283 | } 284 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) 4 | and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). 5 | 6 | ## [Unreleased] 7 | 8 | ## [2.7.1] - 2025-10-13 9 | ### Fixed 10 | - `loongarch64` `lsx` hardware intrinsics for `f16` conversions now enabled only under 11 | `nightly` cargo feature, fixing compile errors on stable Rust. 12 | 13 | ## [2.7.0] - 2025-10-08 14 | ### Changed 15 | - `zerocopy` is now a required dependency. The optional `zerocopy` crate feature is deprecated. 16 | This change is to ensure better code safety and prevent potential unsound behavior. 17 | - Git repository URL has changed due to GitHub user name change. Old URL is redirected. 18 | 19 | ### Added 20 | - New `num-traits` implementations: `Signed` for `f16` and `bf16`. By [@djsell]. 21 | - `loongarch64` `lsx` hardware intrinsic support for `f16` conversions. By [@heiher]. 22 | - Implemented `Weight` trait from `rand` crate for `f16` and `bf16` with `rand` optional cargo 23 | feature. By [@majian4work]. 24 | 25 | ### Fixed 26 | - `min` and `max` incorrectly propagate `NaN` values when `self` is `NaN`. Fixes [#126], 27 | by [@mgottscho]. 28 | - Suppressed warnings from new `unnecessary_transmutes` lint. 29 | 30 | ### Removed 31 | - `doc_auto_cfg` feature has been removed from docs.rs documentation due to removal of rust 32 | feature. 33 | 34 | ## [2.6.0] - 2025-04-08 35 | ### Changed 36 | - Fixed some incorrect minimum supported versions of dependencies that weren't caught due to 37 | improper `Cargo.lock`: 38 | * `num-traits` 0.2.14 -> 0.2.16 39 | * `zerocopy` 0.8.0 -> 0.8.23 40 | * `arbitrary` 1.3.2 -> 1.4.1 41 | 42 | ### Added 43 | - `f16` and `bf16` now implement `Immutable` and `KnownLayout` for `zerocopy` crate. By [@usamoi]. 44 | 45 | ## [2.5.0] - 2025-03-13 46 | ### Changed 47 | - Updated optional dependencies to latest major versions: 48 | * `zercopy` 0.6 -> 0.8 49 | * `rand` 0.8 -> 0.9 50 | * `rand_distr` 0.4 -> 0.5 51 | * `rkyv` 0.7 -> 0.8 52 | * (dev) `criterion` 0.4 -> 0.5 53 | - Minimum supported Rust version has been changed to 1.81 due to above dependency updates. 54 | - Minor restructuring of included license file locations to be more consistent with crates ecosystem. 55 | 56 | ### Added 57 | - Added support for `arbitrary` crate. Fixes [#110]. By [@FL33TW00D]. 58 | - New `num-traits` implementations: `FromBytes` and `ToBytes` for `f16` and `bf16`. By [@kpreid]. 59 | 60 | ### Fixed 61 | - Suppressed unexpected_cfg lint warnings on newer versions of stable Rust. 62 | - Resolved ambiguous rustdoc warnings due to new unstable `f16` primitive in compiler. 63 | 64 | ## [2.4.1] - 2024-04-06 65 | ### Fixed 66 | - Missing macro import causing build failure on `no_std` + `alloc` feature set. Fixes [#107]. 67 | - Clippy warning on nightly rust. 68 | 69 | ## [2.4.0] - 2024-02-25 70 | ### Added 71 | - Optional `rkyv` support. Fixes [#100], by [@comath]. 72 | - New `num-traits` implementations: `AsPrimitive` for `bf16` and `AsPrimitive` for 73 | `f16`, allowing lossy conversions between the two types. By [@charles-r-earp]. 74 | - `Cargo.lock` added to vcs as is now recommended for library crates. 75 | ### Fixed 76 | - Remove some unit NaN conversion sign tests due to non-deterministic hardware. Fixes [#103]. 77 | - Redundant import warnings on nightly Rust. 78 | 79 | ## [2.3.1] - 2023-06-24 80 | ### Fixed 81 | - Compile error on x86 (not x86_64) targets. Fixes [#93]. 82 | 83 | ## [2.3.0] - 2023-06-24 84 | ### Added 85 | - Support for Kani Rust Verifier. By [@cameron1024]. 86 | - Support for `rand_distr::Distribution` implementations behind `rand_distr` optional cargo 87 | feature. By [@coreylowman]. 88 | - Floating point formatting options in `Display` and `Debug` implementations. By [@eiz]. 89 | 90 | ### Changed 91 | - **Breaking Change** Minimum supported Rust version is now 1.70. 92 | - **Breaking Change** Minimum supported Rust version policy reverted to original policy of allowing 93 | minimum supported Rust version updates for minor releases instead of only major to avoid 94 | segmentation and allow optimizing hardware implementations without unnecessary major releases. 95 | - Hardware intrinsics/assembly is finally available on stable Rust, including using hardware 96 | feature detection (`std` only), including: 97 | - AArch64 now uses FP16 hardware instructions for conversions and math operations when 98 | available. 99 | - x86/x86-64 now uses F16C hardware instructions for conversions (but no math operations) when 100 | available. Fixes [#54]. 101 | 102 | ### Deprecated 103 | - `use-intrinsics` cargo feature no longer used. Hardware support will now always be used whenever 104 | possible. A future version may output deprecation warnings if this feature is enabled. 105 | 106 | ### Fixed 107 | - Improve code generation of `leading_zeros` functions by inlining. By [@encounter]. 108 | - `Sum` implementation of `bf16` incorrectly performed product instead of sum. By [@wx-csy]. 109 | - Compile failed when `serde` cargo feature enabled but `std` not enabled. 110 | - Incorrect black boxing of benchmark tests. 111 | - Rustdoc cfg display on docs.rs not getting enabled. 112 | 113 | ## [2.2.1] - 2023-01-08 114 | ### Changed 115 | - Reduced unnecessary bounds checks for SIMD operations on slices. By [@Shnatsel]. 116 | - Further slice conversion optimizations for slices. Resolves [#66]. 117 | 118 | ## [2.2.0] - 2022-12-30 119 | ### Added 120 | - Add `serialize_as_f32` and `serialize_as_string` functions when `serde` cargo feature is enabled. 121 | They allowing customizing the serialization by using 122 | `#[serde(serialize_with="f16::serialize_as_f32")]` attribute in serde derive macros. Closes [#60]. 123 | - Deserialize now supports deserializing from `f32`, `f64`, and string values in addition to its 124 | previous default deserialization. Closes [#60]. 125 | 126 | ### Changed 127 | - Add `#[inline]` on fallback functions, which improved conversion execution on non-nightly rust 128 | by up to 50%. By [@Shnatsel]. 129 | 130 | ## [2.1.0] - 2022-07-18 131 | ### Added 132 | - Add support for target_arch `spirv`. Some traits and functions are unavailble on this 133 | architecture. By [@charles-r-earp]. 134 | - Add `total_cmp` method to both float types. Closes [#55], by [@joseluis]. 135 | 136 | ## [2.0.0] - 2022-06-21 137 | ### Changed 138 | - **Breaking Change** Minimum supported Rust version is now 1.58. 139 | - **Breaking Change** `std` is now enabled as a default cargo feature. Disable default features to 140 | continue using `no_std` support. 141 | - Migrated to Rust Edition 2021. 142 | - Added `#[must_use]` attributes to functions, as appropriate. 143 | 144 | ### Fixed 145 | - Fix a soundness bug with `slice::as_ptr` not correctly using mutable reference. By [@Nilstrieb]. 146 | 147 | ### Added 148 | - Added `const` conversion methods to both `f16` and `bf16`. These methods never use hardware 149 | intrinsics, unlike the current conversion methods, which is why they are separated into new 150 | methods. The following `const` methods were added: 151 | - `from_f32_const` 152 | - `from_f64_const` 153 | - `to_f32_const` 154 | - `to_f64_const` 155 | - Added `Neg` trait support for borrowed values `&f16` and `&bf16`. By [@pthariensflame]. 156 | - Added `AsPrimitive` implementations from and to self, `usize`, and `isize`. By [@kali]. 157 | 158 | ### Removed 159 | - **Breaking Change** The deprecated `serialize` cargo feature has been removed. Use `serde` cargo 160 | feature instead. 161 | - **Breaking Change** The deprecated `consts` module has been removed. Use associated constants on 162 | `f16` instead. 163 | - **Breaking Change** The following deprecated functions have been removed: 164 | - `f16::as_bits` 165 | - `slice::from_bits_mut` 166 | - `slice::to_bits_mut` 167 | - `slice::from_bits` 168 | - `slice::to_bits` 169 | - `vec::from_bits` 170 | - `vec::to_bits` 171 | 172 | ## [1.8.2] - 2021-10-22 173 | ### Fixed 174 | - Remove cargo resolver=2 from manifest to resolve errors in older versions of Rust that still 175 | worked with 1.8.0. Going forward, MSRV increases will be major version increases. Fixes [#48]. 176 | 177 | ## [1.8.1] - 2021-10-21 - **Yanked** 178 | ### ***Yanked*** 179 | *Not recommended due to introducing compilation error in Rust versions that worked with 1.8.0.* 180 | ### Changed 181 | - Now uses cargo resolver version 2 to prevent dev-dependencies from enabling `std` feature on 182 | optional dependencies. 183 | 184 | ### Fixed 185 | - Fixed compile failure when `std` feature is not enabled and `num-traits` is enabled under new 186 | resolver. Now properly uses `libm` num-traits feature. 187 | 188 | ## [1.8.0] - 2021-10-13 189 | ### Changed 190 | - Now always implements `Add`, `Div`, `Mul`, `Neg`, `Rem`, and `Sub` traits. 191 | Previously, these were only implemented under the `num-traits` feature. Keep in mind they still 192 | convert to `f32` and back in the implementation. 193 | - Minimum supported Rust version is now 1.51. 194 | - Made crate package [REUSE compliant](https://reuse.software/). 195 | - Docs now use intra-doc links instead of manual (and hard to maintain) links. 196 | - The following methods on both `f16` and `bf16` are now `const`: 197 | - `to_le_bytes` 198 | - `to_be_bytes` 199 | - `to_ne_bytes` 200 | - `from_le_bytes` 201 | - `from_be_bytes` 202 | - `from_ne_bytes` 203 | - `is_normal` 204 | - `classify` 205 | - `signum` 206 | 207 | ### Added 208 | - Added optional implementations of `zerocopy` traits `AsBytes` and `FromBytes` 209 | under `zerocopy` cargo feature. By [@samcrow]. 210 | - Implemented the `core::iter::Product` and `core::iter::Sum` traits, with the same caveat as above 211 | about converting to `f32` and back under the hood. 212 | - Added new associated const `NEG_ONE` to both `f16` and `bf16`. 213 | - Added the following new methods on both `f16` and `bf16`: 214 | - `copysign` 215 | - `max` 216 | - `min` 217 | - `clamp` 218 | 219 | ### Fixed 220 | - Fixed a number of minor lints discovered due to improved CI. 221 | 222 | ## [1.7.1] - 2021-01-17 223 | ### Fixed 224 | - Docs.rs now generates docs for `bytemuck` and `num-traits` optional features. 225 | 226 | ## [1.7.0] - 2021-01-17 227 | ### Added 228 | - Added optional implementations of `bytemuck` traits `Zeroable` and `Pod` under `bytemuck` cargo 229 | feature. By [@charles-r-earp]. 230 | - Added optional implementations of `num-traits` traits `ToPrimitive` and `FromPrimitive` under 231 | `num-traits` cargo feature. By [@charles-r-earp]. 232 | - Added implementations of `Binary`, `Octal`, `LowerHex`, and `UpperHex` string format traits to 233 | format raw `f16`/`bf16` bytes to string. 234 | 235 | ### Changed 236 | - `Debug` trait implementation now formats `f16`/`bf16` as float instead of raw bytes hex. Use newly 237 | implemented formatting traits to format in hex instead of `Debug`. Fixes [#37]. 238 | 239 | 240 | ## [1.6.0] - 2020-05-09 241 | ### Added 242 | - Added `LOG2_10` and `LOG10_2` constants to both `f16` and `bf16`, which were added to `f32` and 243 | `f64` in the standard library in 1.43.0. By [@tspiteri]. 244 | - Added `to_le/be/ne_bytes` and `from_le/be/ne_bytes` to both `f16` and `bf16`, which were added to 245 | the standard library in 1.40.0. By [@bzm3r]. 246 | 247 | ## [1.5.0] - 2020-03-03 248 | ### Added 249 | - Added the `alloc` feature to support the `alloc` crate in `no_std` environments. By [@zserik]. The 250 | `vec` module is now available with either `alloc` or `std` feature. 251 | 252 | ## [1.4.1] - 2020-02-10 253 | ### Fixed 254 | - Added `#[repr(transparent)]` to `f16`/`bf16` to remove undefined behavior. By [@jfrimmel]. 255 | 256 | ## [1.4.0] - 2019-10-13 257 | ### Added 258 | - Added a `bf16` type implementing the alternative 259 | [`bfloat16`](https://en.wikipedia.org/wiki/Bfloat16_floating-point_format) 16-bit floating point 260 | format. By [@tspiteri]. 261 | - `f16::from_bits`, `f16::to_bits`, `f16::is_nan`, `f16::is_infinite`, `f16::is_finite`, 262 | `f16::is_sign_positive`, and `f16::is_sign_negative` are now `const` fns. 263 | - `slice::HalfBitsSliceExt` and `slice::HalfBitsSliceExt` extension traits have been added for 264 | performing efficient reinterpret casts and conversions of slices to and from `[f16]` and 265 | `[bf16]`. These traits will use hardware SIMD conversion instructions when available and the 266 | `use-intrinsics` cargo feature is enabled. 267 | - `vec::HalfBitsVecExt` and `vec::HalfFloatVecExt` extension traits have been added for 268 | performing efficient reinterpret casts to and from `Vec` and `Vec`. These traits 269 | are only available with the `std` cargo feature. 270 | - `prelude` has been added, for easy importing of most common functionality. Currently the 271 | prelude imports `f16`, `bf16`, and the new slice and vec extension traits. 272 | - New associated constants on `f16` type to replace deprecated `consts` module. 273 | 274 | ### Fixed 275 | - Software conversion (when not using `use-intrinsics` feature) now matches hardware rounding 276 | by rounding to nearest, ties to even. Fixes [#24], by [@tspiteri]. 277 | - NaN value conversions now behave like `f32` to `f64` conversions, retaining sign. Fixes [#23], 278 | by [@tspiteri]. 279 | 280 | ### Changed 281 | - Minimum rustc version bumped to 1.32. 282 | - Runtime target host feature detection is now used if both `std` and `use-intrinsics` features are 283 | enabled and the compile target host does not support required features. 284 | - When `use-intrinsics` feature is enabled, will now always compile and run without error correctly 285 | regardless of compile target options. 286 | 287 | ### Deprecated 288 | - `consts` module and all its constants have been deprecated; use the associated constants on `f16` 289 | instead. 290 | - `slice::from_bits` has been deprecated; use `slice::HalfBitsSliceExt::reinterpret_cast` instead. 291 | - `slice::from_bits_mut` has been deprecated; use `slice::HalfBitsSliceExt::reinterpret_cast_mut` 292 | instead. 293 | - `slice::to_bits` has been deprecated; use `slice::HalfFloatSliceExt::reinterpret_cast` instead. 294 | - `slice::to_bits_mut` has been deprecated; use `slice::HalfFloatSliceExt::reinterpret_cast_mut` 295 | instead. 296 | - `vec::from_bits` has been deprecated; use `vec::HalfBitsVecExt::reinterpret_into` instead. 297 | - `vec::to_bits` has been deprecated; use `vec::HalfFloatVecExt::reinterpret_into` instead. 298 | 299 | ## [1.3.1] - 2019-10-04 300 | ### Fixed 301 | - Corrected values of constants `EPSILON`, `MAX_10_EXP`, `MAX_EXP`, `MIN_10_EXP`, and `MIN_EXP` 302 | in `consts` module, as well as setting `consts::NAN` to match value of `f32::NAN` converted to 303 | `f16`. By [@tspiteri]. 304 | 305 | ## [1.3.0] - 2018-10-02 306 | ### Added 307 | - `slice::from_bits_mut` and `slice::to_bits_mut` for conversion between mutable `u16` and `f16` 308 | slices. Fixes [#16], by [@johannesvollmer]. 309 | 310 | ## [1.2.0] - 2018-09-03 311 | ### Added 312 | - `slice` and optional `vec` (only included with `std` feature) modules for conversions between 313 | `u16` and `f16` buffers. Fixes [#14], by [@johannesvollmer]. 314 | - `to_bits` added to replace `as_bits`. Fixes [#12], by [@tspiteri]. 315 | ### Fixed 316 | - `serde` optional dependency no longer uses its default `std` feature. 317 | ### Deprecated 318 | - `as_bits` has been deprecated; use `to_bits` instead. 319 | - `serialize` cargo feature is deprecated; use `serde` instead. 320 | 321 | ## [1.1.2] - 2018-07-12 322 | ### Fixed 323 | - Fixed compilation error in 1.1.1 on rustc < 1.27, now compiles again on rustc >= 1.10. Fixes 324 | [#11]. 325 | 326 | ## [1.1.1] - 2018-06-24 - **Yanked** 327 | ### ***Yanked*** 328 | *Not recommended due to introducing compilation error on rustc versions prior to 1.27.* 329 | ### Fixed 330 | - Fix subnormal float conversions when `use-intrinsics` is not enabled. By [@Moongoodboy-K]. 331 | 332 | ## [1.1.0] - 2018-03-17 333 | ### Added 334 | - Made `to_f32` and `to_f64` public. Fixes [#7], by [@PSeitz]. 335 | 336 | ## [1.0.2] - 2018-01-12 337 | ### Changed 338 | - Update behavior of `is_sign_positive` and `is_sign_negative` to match the IEEE754 conforming 339 | behavior of the standard library since Rust 1.20.0. Fixes [#3], by [@tspiteri]. 340 | - Small optimization on `is_nan` and `is_infinite` from [@tspiteri]. 341 | ### Fixed 342 | - Fix comparisons of +0 to -0 and comparisons involving negative numbers. Fixes [#2], by 343 | [@tspiteri]. 344 | - Fix loss of sign when converting `f16` and `f32` to `f16`, and case where `f64` NaN could be 345 | converted to `f16` infinity instead of NaN. Fixes [#5], by [@tspiteri]. 346 | 347 | ## [1.0.1] - 2017-08-30 348 | ### Added 349 | - More README documentation. 350 | - Badges and categories in crate metadata. 351 | ### Changed 352 | - `serde` dependency updated to 1.0 stable. 353 | - Writing changelog manually. 354 | 355 | ## [1.0.0] - 2017-02-03 356 | ### Added 357 | - Update to `serde` 0.9 and stable Rust 1.15 for `serialize` feature. 358 | 359 | ## [0.1.1] - 2017-01-08 360 | ### Added 361 | - Add `serde` support under new `serialize` feature. 362 | ### Changed 363 | - Use `no_std` for crate by default. 364 | 365 | ## 0.1.0 - 2016-03-17 366 | ### Added 367 | - Initial release of `f16` type. 368 | 369 | [#2]: https://github.com/starkat99/half-rs/issues/2 370 | [#3]: https://github.com/starkat99/half-rs/issues/3 371 | [#5]: https://github.com/starkat99/half-rs/issues/5 372 | [#7]: https://github.com/starkat99/half-rs/issues/7 373 | [#11]: https://github.com/starkat99/half-rs/issues/11 374 | [#12]: https://github.com/starkat99/half-rs/issues/12 375 | [#14]: https://github.com/starkat99/half-rs/issues/14 376 | [#16]: https://github.com/starkat99/half-rs/issues/16 377 | [#23]: https://github.com/starkat99/half-rs/issues/23 378 | [#24]: https://github.com/starkat99/half-rs/issues/24 379 | [#37]: https://github.com/starkat99/half-rs/issues/37 380 | [#48]: https://github.com/starkat99/half-rs/issues/48 381 | [#55]: https://github.com/starkat99/half-rs/issues/55 382 | [#60]: https://github.com/starkat99/half-rs/issues/60 383 | [#66]: https://github.com/starkat99/half-rs/issues/66 384 | [#54]: https://github.com/starkat99/half-rs/issues/54 385 | [#93]: https://github.com/starkat99/half-rs/issues/54 386 | [#100]: https://github.com/starkat99/half-rs/issues/100 387 | [#103]: https://github.com/starkat99/half-rs/issues/103 388 | [#107]: https://github.com/starkat99/half-rs/issues/107 389 | [#110]: https://github.com/starkat99/half-rs/issues/110 390 | [#126]: https://github.com/starkat99/half-rs/issues/126 391 | 392 | [@tspiteri]: https://github.com/tspiteri 393 | [@PSeitz]: https://github.com/PSeitz 394 | [@Moongoodboy-K]: https://github.com/Moongoodboy-K 395 | [@johannesvollmer]: https://github.com/johannesvollmer 396 | [@jfrimmel]: https://github.com/jfrimmel 397 | [@zserik]: https://github.com/zserik 398 | [@bzm3r]: https://github.com/bzm3r 399 | [@charles-r-earp]: https://github.com/charles-r-earp 400 | [@samcrow]: https://github.com/samcrow 401 | [@pthariensflame]: https://github.com/pthariensflame 402 | [@kali]: https://github.com/kali 403 | [@Nilstrieb]: https://github.com/Nilstrieb 404 | [@joseluis]: https://github.com/joseluis 405 | [@Shnatsel]: https://github.com/Shnatsel 406 | [@cameron1024]: https://github.com/cameron1024 407 | [@encounter]: https://github.com/encounter 408 | [@coreylowman]: https://github.com/coreylowman 409 | [@wx-csy]: https://github.com/wx-csy 410 | [@eiz]: https://github.com/eiz 411 | [@comath]: https://github.com/comath 412 | [@FL33TW00D]: https://github.com/FL33TW00D 413 | [@kpreid]: https://github.com/kpreid 414 | [@usamoi]: https://github.com/usamoi 415 | [@mgottscho]: https://github.com/mgottscho 416 | [@djsell]: https://github.com/djsell 417 | [@heiher]: https://github.com/heiher 418 | [@majian4work]: https://github.com/majian4work 419 | 420 | 421 | [Unreleased]: https://github.com/starkat99/half-rs/compare/v2.7.1...HEAD 422 | [2.7.1]: https://github.com/starkat99/half-rs/compare/v2.7.0...v2.7.1 423 | [2.7.0]: https://github.com/starkat99/half-rs/compare/v2.6.0...v2.7.0 424 | [2.6.0]: https://github.com/starkat99/half-rs/compare/v2.5.0...v2.6.0 425 | [2.5.0]: https://github.com/starkat99/half-rs/compare/v2.4.1...v2.5.0 426 | [2.4.1]: https://github.com/starkat99/half-rs/compare/v2.4.0...v2.4.1 427 | [2.4.0]: https://github.com/starkat99/half-rs/compare/v2.3.1...v2.4.0 428 | [2.3.1]: https://github.com/starkat99/half-rs/compare/v2.3.0...v2.3.1 429 | [2.3.0]: https://github.com/starkat99/half-rs/compare/v2.2.1...v2.3.0 430 | [2.2.1]: https://github.com/starkat99/half-rs/compare/v2.2.0...v2.2.1 431 | [2.2.0]: https://github.com/starkat99/half-rs/compare/v2.1.0...v2.2.0 432 | [2.1.0]: https://github.com/starkat99/half-rs/compare/v2.0.0...v2.1.0 433 | [2.0.0]: https://github.com/starkat99/half-rs/compare/v1.8.2...v2.0.0 434 | [1.8.2]: https://github.com/starkat99/half-rs/compare/v1.8.1...v1.8.2 435 | [1.8.1]: https://github.com/starkat99/half-rs/compare/v1.8.0...v1.8.1 436 | [1.8.0]: https://github.com/starkat99/half-rs/compare/v1.7.1...v1.8.0 437 | [1.7.1]: https://github.com/starkat99/half-rs/compare/v1.7.0...v1.7.1 438 | [1.7.0]: https://github.com/starkat99/half-rs/compare/v1.6.0...v1.7.0 439 | [1.6.0]: https://github.com/starkat99/half-rs/compare/v1.5.0...v1.6.0 440 | [1.5.0]: https://github.com/starkat99/half-rs/compare/v1.4.1...v1.5.0 441 | [1.4.1]: https://github.com/starkat99/half-rs/compare/v1.4.0...v1.4.1 442 | [1.4.0]: https://github.com/starkat99/half-rs/compare/v1.3.1...v1.4.0 443 | [1.3.1]: https://github.com/starkat99/half-rs/compare/v1.3.0...v1.3.1 444 | [1.3.0]: https://github.com/starkat99/half-rs/compare/v1.2.0...v1.3.0 445 | [1.2.0]: https://github.com/starkat99/half-rs/compare/v1.1.2...v1.2.0 446 | [1.1.2]: https://github.com/starkat99/half-rs/compare/v1.1.1...v1.1.2 447 | [1.1.1]: https://github.com/starkat99/half-rs/compare/v1.1.0...v1.1.1 448 | [1.1.0]: https://github.com/starkat99/half-rs/compare/v1.0.2...v1.1.0 449 | [1.0.2]: https://github.com/starkat99/half-rs/compare/v1.0.1...v1.0.2 450 | [1.0.1]: https://github.com/starkat99/half-rs/compare/v1.0.0...v1.0.1 451 | [1.0.0]: https://github.com/starkat99/half-rs/compare/v0.1.1...v1.0.0 452 | [0.1.1]: https://github.com/starkat99/half-rs/compare/v0.1.0...v0.1.1 453 | -------------------------------------------------------------------------------- /src/slice.rs: -------------------------------------------------------------------------------- 1 | //! Contains utility functions and traits to convert between slices of [`u16`] bits and [`struct@f16`] or 2 | //! [`struct@bf16`] numbers. 3 | //! 4 | //! The utility [`HalfBitsSliceExt`] sealed extension trait is implemented for `[u16]` slices, 5 | //! while the utility [`HalfFloatSliceExt`] sealed extension trait is implemented for both `[f16]` 6 | //! and `[bf16]` slices. These traits provide efficient conversions and reinterpret casting of 7 | //! larger buffers of floating point values, and are automatically included in the 8 | //! [`prelude`][crate::prelude] module. 9 | 10 | use crate::{bf16, binary16::arch, f16}; 11 | #[cfg(feature = "alloc")] 12 | #[allow(unused_imports)] 13 | use alloc::{vec, vec::Vec}; 14 | use zerocopy::{transmute_mut, transmute_ref}; 15 | 16 | /// Extensions to `[f16]` and `[bf16]` slices to support conversion and reinterpret operations. 17 | /// 18 | /// This trait is sealed and cannot be implemented outside of this crate. 19 | pub trait HalfFloatSliceExt: private::SealedHalfFloatSlice { 20 | /// Reinterprets a slice of [`struct@f16`] or [`struct@bf16`] numbers as a slice of [`u16`] bits. 21 | /// 22 | /// This is a zero-copy operation. The reinterpreted slice has the same lifetime and memory 23 | /// location as `self`. 24 | /// 25 | /// # Examples 26 | /// 27 | /// ```rust 28 | /// # use half::prelude::*; 29 | /// let float_buffer = [f16::from_f32(1.), f16::from_f32(2.), f16::from_f32(3.)]; 30 | /// let int_buffer = float_buffer.reinterpret_cast(); 31 | /// 32 | /// assert_eq!(int_buffer, [float_buffer[0].to_bits(), float_buffer[1].to_bits(), float_buffer[2].to_bits()]); 33 | /// ``` 34 | #[must_use] 35 | fn reinterpret_cast(&self) -> &[u16]; 36 | 37 | /// Reinterprets a mutable slice of [`struct@f16`] or [`struct@bf16`] numbers as a mutable slice of [`u16`]. 38 | /// bits 39 | /// 40 | /// This is a zero-copy operation. The transmuted slice has the same lifetime as the original, 41 | /// which prevents mutating `self` as long as the returned `&mut [u16]` is borrowed. 42 | /// 43 | /// # Examples 44 | /// 45 | /// ```rust 46 | /// # use half::prelude::*; 47 | /// let mut float_buffer = [f16::from_f32(1.), f16::from_f32(2.), f16::from_f32(3.)]; 48 | /// 49 | /// { 50 | /// let int_buffer = float_buffer.reinterpret_cast_mut(); 51 | /// 52 | /// assert_eq!(int_buffer, [f16::from_f32(1.).to_bits(), f16::from_f32(2.).to_bits(), f16::from_f32(3.).to_bits()]); 53 | /// 54 | /// // Mutating the u16 slice will mutating the original 55 | /// int_buffer[0] = 0; 56 | /// } 57 | /// 58 | /// // Note that we need to drop int_buffer before using float_buffer again or we will get a borrow error. 59 | /// assert_eq!(float_buffer, [f16::from_f32(0.), f16::from_f32(2.), f16::from_f32(3.)]); 60 | /// ``` 61 | #[must_use] 62 | fn reinterpret_cast_mut(&mut self) -> &mut [u16]; 63 | 64 | /// Converts all of the elements of a `[f32]` slice into [`struct@f16`] or [`struct@bf16`] values in `self`. 65 | /// 66 | /// The length of `src` must be the same as `self`. 67 | /// 68 | /// The conversion operation is vectorized over the slice, meaning the conversion may be more 69 | /// efficient than converting individual elements on some hardware that supports SIMD 70 | /// conversions. See [crate documentation](crate) for more information on hardware conversion 71 | /// support. 72 | /// 73 | /// # Panics 74 | /// 75 | /// This function will panic if the two slices have different lengths. 76 | /// 77 | /// # Examples 78 | /// ```rust 79 | /// # use half::prelude::*; 80 | /// // Initialize an empty buffer 81 | /// let mut buffer = [0u16; 4]; 82 | /// let buffer = buffer.reinterpret_cast_mut::(); 83 | /// 84 | /// let float_values = [1., 2., 3., 4.]; 85 | /// 86 | /// // Now convert 87 | /// buffer.convert_from_f32_slice(&float_values); 88 | /// 89 | /// assert_eq!(buffer, [f16::from_f32(1.), f16::from_f32(2.), f16::from_f32(3.), f16::from_f32(4.)]); 90 | /// ``` 91 | fn convert_from_f32_slice(&mut self, src: &[f32]); 92 | 93 | /// Converts all of the elements of a `[f64]` slice into [`struct@f16`] or [`struct@bf16`] values in `self`. 94 | /// 95 | /// The length of `src` must be the same as `self`. 96 | /// 97 | /// The conversion operation is vectorized over the slice, meaning the conversion may be more 98 | /// efficient than converting individual elements on some hardware that supports SIMD 99 | /// conversions. See [crate documentation](crate) for more information on hardware conversion 100 | /// support. 101 | /// 102 | /// # Panics 103 | /// 104 | /// This function will panic if the two slices have different lengths. 105 | /// 106 | /// # Examples 107 | /// ```rust 108 | /// # use half::prelude::*; 109 | /// // Initialize an empty buffer 110 | /// let mut buffer = [0u16; 4]; 111 | /// let buffer = buffer.reinterpret_cast_mut::(); 112 | /// 113 | /// let float_values = [1., 2., 3., 4.]; 114 | /// 115 | /// // Now convert 116 | /// buffer.convert_from_f64_slice(&float_values); 117 | /// 118 | /// assert_eq!(buffer, [f16::from_f64(1.), f16::from_f64(2.), f16::from_f64(3.), f16::from_f64(4.)]); 119 | /// ``` 120 | fn convert_from_f64_slice(&mut self, src: &[f64]); 121 | 122 | /// Converts all of the [`struct@f16`] or [`struct@bf16`] elements of `self` into [`f32`] values in `dst`. 123 | /// 124 | /// The length of `src` must be the same as `self`. 125 | /// 126 | /// The conversion operation is vectorized over the slice, meaning the conversion may be more 127 | /// efficient than converting individual elements on some hardware that supports SIMD 128 | /// conversions. See [crate documentation](crate) for more information on hardware conversion 129 | /// support. 130 | /// 131 | /// # Panics 132 | /// 133 | /// This function will panic if the two slices have different lengths. 134 | /// 135 | /// # Examples 136 | /// ```rust 137 | /// # use half::prelude::*; 138 | /// // Initialize an empty buffer 139 | /// let mut buffer = [0f32; 4]; 140 | /// 141 | /// let half_values = [f16::from_f32(1.), f16::from_f32(2.), f16::from_f32(3.), f16::from_f32(4.)]; 142 | /// 143 | /// // Now convert 144 | /// half_values.convert_to_f32_slice(&mut buffer); 145 | /// 146 | /// assert_eq!(buffer, [1., 2., 3., 4.]); 147 | /// ``` 148 | fn convert_to_f32_slice(&self, dst: &mut [f32]); 149 | 150 | /// Converts all of the [`struct@f16`] or [`struct@bf16`] elements of `self` into [`f64`] values in `dst`. 151 | /// 152 | /// The length of `src` must be the same as `self`. 153 | /// 154 | /// The conversion operation is vectorized over the slice, meaning the conversion may be more 155 | /// efficient than converting individual elements on some hardware that supports SIMD 156 | /// conversions. See [crate documentation](crate) for more information on hardware conversion 157 | /// support. 158 | /// 159 | /// # Panics 160 | /// 161 | /// This function will panic if the two slices have different lengths. 162 | /// 163 | /// # Examples 164 | /// ```rust 165 | /// # use half::prelude::*; 166 | /// // Initialize an empty buffer 167 | /// let mut buffer = [0f64; 4]; 168 | /// 169 | /// let half_values = [f16::from_f64(1.), f16::from_f64(2.), f16::from_f64(3.), f16::from_f64(4.)]; 170 | /// 171 | /// // Now convert 172 | /// half_values.convert_to_f64_slice(&mut buffer); 173 | /// 174 | /// assert_eq!(buffer, [1., 2., 3., 4.]); 175 | /// ``` 176 | fn convert_to_f64_slice(&self, dst: &mut [f64]); 177 | 178 | // Because trait is sealed, we can get away with different interfaces between features. 179 | 180 | /// Converts all of the [`struct@f16`] or [`struct@bf16`] elements of `self` into [`f32`] values in a new 181 | /// vector 182 | /// 183 | /// The conversion operation is vectorized over the slice, meaning the conversion may be more 184 | /// efficient than converting individual elements on some hardware that supports SIMD 185 | /// conversions. See [crate documentation](crate) for more information on hardware conversion 186 | /// support. 187 | /// 188 | /// This method is only available with the `std` or `alloc` feature. 189 | /// 190 | /// # Examples 191 | /// ```rust 192 | /// # use half::prelude::*; 193 | /// let half_values = [f16::from_f32(1.), f16::from_f32(2.), f16::from_f32(3.), f16::from_f32(4.)]; 194 | /// let vec = half_values.to_f32_vec(); 195 | /// 196 | /// assert_eq!(vec, vec![1., 2., 3., 4.]); 197 | /// ``` 198 | #[cfg(any(feature = "alloc", feature = "std"))] 199 | #[must_use] 200 | fn to_f32_vec(&self) -> Vec; 201 | 202 | /// Converts all of the [`struct@f16`] or [`struct@bf16`] elements of `self` into [`f64`] values in a new 203 | /// vector. 204 | /// 205 | /// The conversion operation is vectorized over the slice, meaning the conversion may be more 206 | /// efficient than converting individual elements on some hardware that supports SIMD 207 | /// conversions. See [crate documentation](crate) for more information on hardware conversion 208 | /// support. 209 | /// 210 | /// This method is only available with the `std` or `alloc` feature. 211 | /// 212 | /// # Examples 213 | /// ```rust 214 | /// # use half::prelude::*; 215 | /// let half_values = [f16::from_f64(1.), f16::from_f64(2.), f16::from_f64(3.), f16::from_f64(4.)]; 216 | /// let vec = half_values.to_f64_vec(); 217 | /// 218 | /// assert_eq!(vec, vec![1., 2., 3., 4.]); 219 | /// ``` 220 | #[cfg(feature = "alloc")] 221 | #[must_use] 222 | fn to_f64_vec(&self) -> Vec; 223 | } 224 | 225 | /// Extensions to `[u16]` slices to support reinterpret operations. 226 | /// 227 | /// This trait is sealed and cannot be implemented outside of this crate. 228 | pub trait HalfBitsSliceExt: private::SealedHalfBitsSlice { 229 | /// Reinterprets a slice of [`u16`] bits as a slice of [`struct@f16`] or [`struct@bf16`] numbers. 230 | /// 231 | /// `H` is the type to cast to, and must be either the [`struct@f16`] or [`struct@bf16`] type. 232 | /// 233 | /// This is a zero-copy operation. The reinterpreted slice has the same lifetime and memory 234 | /// location as `self`. 235 | /// 236 | /// # Examples 237 | /// 238 | /// ```rust 239 | /// # use half::prelude::*; 240 | /// let int_buffer = [f16::from_f32(1.).to_bits(), f16::from_f32(2.).to_bits(), f16::from_f32(3.).to_bits()]; 241 | /// let float_buffer: &[f16] = int_buffer.reinterpret_cast(); 242 | /// 243 | /// assert_eq!(float_buffer, [f16::from_f32(1.), f16::from_f32(2.), f16::from_f32(3.)]); 244 | /// 245 | /// // You may have to specify the cast type directly if the compiler can't infer the type. 246 | /// // The following is also valid in Rust. 247 | /// let typed_buffer = int_buffer.reinterpret_cast::(); 248 | /// ``` 249 | #[must_use] 250 | fn reinterpret_cast(&self) -> &[H] 251 | where 252 | H: crate::private::SealedHalf; 253 | 254 | /// Reinterprets a mutable slice of [`u16`] bits as a mutable slice of [`struct@f16`] or [`struct@bf16`] 255 | /// numbers. 256 | /// 257 | /// `H` is the type to cast to, and must be either the [`struct@f16`] or [`struct@bf16`] type. 258 | /// 259 | /// This is a zero-copy operation. The transmuted slice has the same lifetime as the original, 260 | /// which prevents mutating `self` as long as the returned `&mut [f16]` is borrowed. 261 | /// 262 | /// # Examples 263 | /// 264 | /// ```rust 265 | /// # use half::prelude::*; 266 | /// let mut int_buffer = [f16::from_f32(1.).to_bits(), f16::from_f32(2.).to_bits(), f16::from_f32(3.).to_bits()]; 267 | /// 268 | /// { 269 | /// let float_buffer: &mut [f16] = int_buffer.reinterpret_cast_mut(); 270 | /// 271 | /// assert_eq!(float_buffer, [f16::from_f32(1.), f16::from_f32(2.), f16::from_f32(3.)]); 272 | /// 273 | /// // Mutating the f16 slice will mutating the original 274 | /// float_buffer[0] = f16::from_f32(0.); 275 | /// } 276 | /// 277 | /// // Note that we need to drop float_buffer before using int_buffer again or we will get a borrow error. 278 | /// assert_eq!(int_buffer, [f16::from_f32(0.).to_bits(), f16::from_f32(2.).to_bits(), f16::from_f32(3.).to_bits()]); 279 | /// 280 | /// // You may have to specify the cast type directly if the compiler can't infer the type. 281 | /// // The following is also valid in Rust. 282 | /// let typed_buffer = int_buffer.reinterpret_cast_mut::(); 283 | /// ``` 284 | #[must_use] 285 | fn reinterpret_cast_mut(&mut self) -> &mut [H] 286 | where 287 | H: crate::private::SealedHalf; 288 | } 289 | 290 | mod private { 291 | use crate::{bf16, f16}; 292 | 293 | pub trait SealedHalfFloatSlice {} 294 | impl SealedHalfFloatSlice for [f16] {} 295 | impl SealedHalfFloatSlice for [bf16] {} 296 | 297 | pub trait SealedHalfBitsSlice {} 298 | impl SealedHalfBitsSlice for [u16] {} 299 | } 300 | 301 | impl HalfFloatSliceExt for [f16] { 302 | #[inline] 303 | fn reinterpret_cast(&self) -> &[u16] { 304 | transmute_ref!(self) 305 | } 306 | 307 | #[inline] 308 | fn reinterpret_cast_mut(&mut self) -> &mut [u16] { 309 | transmute_mut!(self) 310 | } 311 | 312 | #[inline] 313 | fn convert_from_f32_slice(&mut self, src: &[f32]) { 314 | assert_eq!( 315 | self.len(), 316 | src.len(), 317 | "destination and source slices have different lengths" 318 | ); 319 | 320 | arch::f32_to_f16_slice(src, self.reinterpret_cast_mut()) 321 | } 322 | 323 | #[inline] 324 | fn convert_from_f64_slice(&mut self, src: &[f64]) { 325 | assert_eq!( 326 | self.len(), 327 | src.len(), 328 | "destination and source slices have different lengths" 329 | ); 330 | 331 | arch::f64_to_f16_slice(src, self.reinterpret_cast_mut()) 332 | } 333 | 334 | #[inline] 335 | fn convert_to_f32_slice(&self, dst: &mut [f32]) { 336 | assert_eq!( 337 | self.len(), 338 | dst.len(), 339 | "destination and source slices have different lengths" 340 | ); 341 | 342 | arch::f16_to_f32_slice(self.reinterpret_cast(), dst) 343 | } 344 | 345 | #[inline] 346 | fn convert_to_f64_slice(&self, dst: &mut [f64]) { 347 | assert_eq!( 348 | self.len(), 349 | dst.len(), 350 | "destination and source slices have different lengths" 351 | ); 352 | 353 | arch::f16_to_f64_slice(self.reinterpret_cast(), dst) 354 | } 355 | 356 | #[cfg(any(feature = "alloc", feature = "std"))] 357 | #[inline] 358 | #[allow(clippy::uninit_vec)] 359 | fn to_f32_vec(&self) -> Vec { 360 | let mut vec = vec![0f32; self.len()]; 361 | self.convert_to_f32_slice(&mut vec); 362 | vec 363 | } 364 | 365 | #[cfg(any(feature = "alloc", feature = "std"))] 366 | #[inline] 367 | #[allow(clippy::uninit_vec)] 368 | fn to_f64_vec(&self) -> Vec { 369 | let mut vec = vec![0f64; self.len()]; 370 | self.convert_to_f64_slice(&mut vec); 371 | vec 372 | } 373 | } 374 | 375 | impl HalfFloatSliceExt for [bf16] { 376 | #[inline] 377 | fn reinterpret_cast(&self) -> &[u16] { 378 | transmute_ref!(self) 379 | } 380 | 381 | #[inline] 382 | fn reinterpret_cast_mut(&mut self) -> &mut [u16] { 383 | transmute_mut!(self) 384 | } 385 | 386 | #[inline] 387 | fn convert_from_f32_slice(&mut self, src: &[f32]) { 388 | assert_eq!( 389 | self.len(), 390 | src.len(), 391 | "destination and source slices have different lengths" 392 | ); 393 | 394 | // Just use regular loop here until there's any bf16 SIMD support. 395 | for (i, f) in src.iter().enumerate() { 396 | self[i] = bf16::from_f32(*f); 397 | } 398 | } 399 | 400 | #[inline] 401 | fn convert_from_f64_slice(&mut self, src: &[f64]) { 402 | assert_eq!( 403 | self.len(), 404 | src.len(), 405 | "destination and source slices have different lengths" 406 | ); 407 | 408 | // Just use regular loop here until there's any bf16 SIMD support. 409 | for (i, f) in src.iter().enumerate() { 410 | self[i] = bf16::from_f64(*f); 411 | } 412 | } 413 | 414 | #[inline] 415 | fn convert_to_f32_slice(&self, dst: &mut [f32]) { 416 | assert_eq!( 417 | self.len(), 418 | dst.len(), 419 | "destination and source slices have different lengths" 420 | ); 421 | 422 | // Just use regular loop here until there's any bf16 SIMD support. 423 | for (i, f) in self.iter().enumerate() { 424 | dst[i] = f.to_f32(); 425 | } 426 | } 427 | 428 | #[inline] 429 | fn convert_to_f64_slice(&self, dst: &mut [f64]) { 430 | assert_eq!( 431 | self.len(), 432 | dst.len(), 433 | "destination and source slices have different lengths" 434 | ); 435 | 436 | // Just use regular loop here until there's any bf16 SIMD support. 437 | for (i, f) in self.iter().enumerate() { 438 | dst[i] = f.to_f64(); 439 | } 440 | } 441 | 442 | #[cfg(any(feature = "alloc", feature = "std"))] 443 | #[inline] 444 | #[allow(clippy::uninit_vec)] 445 | fn to_f32_vec(&self) -> Vec { 446 | let mut vec = vec![0f32; self.len()]; 447 | self.convert_to_f32_slice(&mut vec); 448 | vec 449 | } 450 | 451 | #[cfg(any(feature = "alloc", feature = "std"))] 452 | #[inline] 453 | #[allow(clippy::uninit_vec)] 454 | fn to_f64_vec(&self) -> Vec { 455 | let mut vec = vec![0f64; self.len()]; 456 | self.convert_to_f64_slice(&mut vec); 457 | vec 458 | } 459 | } 460 | 461 | impl HalfBitsSliceExt for [u16] { 462 | // Since we sealed all the traits involved, these are safe. 463 | #[inline] 464 | fn reinterpret_cast(&self) -> &[H] 465 | where 466 | H: crate::private::SealedHalf, 467 | { 468 | transmute_ref!(self) 469 | } 470 | 471 | #[inline] 472 | fn reinterpret_cast_mut(&mut self) -> &mut [H] 473 | where 474 | H: crate::private::SealedHalf, 475 | { 476 | transmute_mut!(self) 477 | } 478 | } 479 | 480 | #[allow(clippy::float_cmp)] 481 | #[cfg(test)] 482 | mod test { 483 | use super::{HalfBitsSliceExt, HalfFloatSliceExt}; 484 | use crate::{bf16, f16}; 485 | 486 | #[test] 487 | fn test_slice_conversions_f16() { 488 | let bits = &[ 489 | f16::E.to_bits(), 490 | f16::PI.to_bits(), 491 | f16::EPSILON.to_bits(), 492 | f16::FRAC_1_SQRT_2.to_bits(), 493 | ]; 494 | let numbers = &[f16::E, f16::PI, f16::EPSILON, f16::FRAC_1_SQRT_2]; 495 | 496 | // Convert from bits to numbers 497 | let from_bits = bits.reinterpret_cast::(); 498 | assert_eq!(from_bits, numbers); 499 | 500 | // Convert from numbers back to bits 501 | let to_bits = from_bits.reinterpret_cast(); 502 | assert_eq!(to_bits, bits); 503 | } 504 | 505 | #[test] 506 | fn test_mutablility_f16() { 507 | let mut bits_array = [f16::PI.to_bits()]; 508 | let bits = &mut bits_array[..]; 509 | 510 | { 511 | // would not compile without these braces 512 | let numbers = bits.reinterpret_cast_mut(); 513 | numbers[0] = f16::E; 514 | } 515 | 516 | assert_eq!(bits, &[f16::E.to_bits()]); 517 | 518 | bits[0] = f16::LN_2.to_bits(); 519 | assert_eq!(bits, &[f16::LN_2.to_bits()]); 520 | } 521 | 522 | #[test] 523 | fn test_slice_conversions_bf16() { 524 | let bits = &[ 525 | bf16::E.to_bits(), 526 | bf16::PI.to_bits(), 527 | bf16::EPSILON.to_bits(), 528 | bf16::FRAC_1_SQRT_2.to_bits(), 529 | ]; 530 | let numbers = &[bf16::E, bf16::PI, bf16::EPSILON, bf16::FRAC_1_SQRT_2]; 531 | 532 | // Convert from bits to numbers 533 | let from_bits = bits.reinterpret_cast::(); 534 | assert_eq!(from_bits, numbers); 535 | 536 | // Convert from numbers back to bits 537 | let to_bits = from_bits.reinterpret_cast(); 538 | assert_eq!(to_bits, bits); 539 | } 540 | 541 | #[test] 542 | fn test_mutablility_bf16() { 543 | let mut bits_array = [bf16::PI.to_bits()]; 544 | let bits = &mut bits_array[..]; 545 | 546 | { 547 | // would not compile without these braces 548 | let numbers = bits.reinterpret_cast_mut(); 549 | numbers[0] = bf16::E; 550 | } 551 | 552 | assert_eq!(bits, &[bf16::E.to_bits()]); 553 | 554 | bits[0] = bf16::LN_2.to_bits(); 555 | assert_eq!(bits, &[bf16::LN_2.to_bits()]); 556 | } 557 | 558 | #[test] 559 | fn slice_convert_f16_f32() { 560 | // Exact chunks 561 | let vf32 = [1., 2., 3., 4., 5., 6., 7., 8.]; 562 | let vf16 = [ 563 | f16::from_f32(1.), 564 | f16::from_f32(2.), 565 | f16::from_f32(3.), 566 | f16::from_f32(4.), 567 | f16::from_f32(5.), 568 | f16::from_f32(6.), 569 | f16::from_f32(7.), 570 | f16::from_f32(8.), 571 | ]; 572 | let mut buf32 = vf32; 573 | let mut buf16 = vf16; 574 | 575 | vf16.convert_to_f32_slice(&mut buf32); 576 | assert_eq!(&vf32, &buf32); 577 | 578 | buf16.convert_from_f32_slice(&vf32); 579 | assert_eq!(&vf16, &buf16); 580 | 581 | // Partial with chunks 582 | let vf32 = [1., 2., 3., 4., 5., 6., 7., 8., 9.]; 583 | let vf16 = [ 584 | f16::from_f32(1.), 585 | f16::from_f32(2.), 586 | f16::from_f32(3.), 587 | f16::from_f32(4.), 588 | f16::from_f32(5.), 589 | f16::from_f32(6.), 590 | f16::from_f32(7.), 591 | f16::from_f32(8.), 592 | f16::from_f32(9.), 593 | ]; 594 | let mut buf32 = vf32; 595 | let mut buf16 = vf16; 596 | 597 | vf16.convert_to_f32_slice(&mut buf32); 598 | assert_eq!(&vf32, &buf32); 599 | 600 | buf16.convert_from_f32_slice(&vf32); 601 | assert_eq!(&vf16, &buf16); 602 | 603 | // Partial with chunks 604 | let vf32 = [1., 2.]; 605 | let vf16 = [f16::from_f32(1.), f16::from_f32(2.)]; 606 | let mut buf32 = vf32; 607 | let mut buf16 = vf16; 608 | 609 | vf16.convert_to_f32_slice(&mut buf32); 610 | assert_eq!(&vf32, &buf32); 611 | 612 | buf16.convert_from_f32_slice(&vf32); 613 | assert_eq!(&vf16, &buf16); 614 | } 615 | 616 | #[test] 617 | fn slice_convert_bf16_f32() { 618 | // Exact chunks 619 | let vf32 = [1., 2., 3., 4., 5., 6., 7., 8.]; 620 | let vf16 = [ 621 | bf16::from_f32(1.), 622 | bf16::from_f32(2.), 623 | bf16::from_f32(3.), 624 | bf16::from_f32(4.), 625 | bf16::from_f32(5.), 626 | bf16::from_f32(6.), 627 | bf16::from_f32(7.), 628 | bf16::from_f32(8.), 629 | ]; 630 | let mut buf32 = vf32; 631 | let mut buf16 = vf16; 632 | 633 | vf16.convert_to_f32_slice(&mut buf32); 634 | assert_eq!(&vf32, &buf32); 635 | 636 | buf16.convert_from_f32_slice(&vf32); 637 | assert_eq!(&vf16, &buf16); 638 | 639 | // Partial with chunks 640 | let vf32 = [1., 2., 3., 4., 5., 6., 7., 8., 9.]; 641 | let vf16 = [ 642 | bf16::from_f32(1.), 643 | bf16::from_f32(2.), 644 | bf16::from_f32(3.), 645 | bf16::from_f32(4.), 646 | bf16::from_f32(5.), 647 | bf16::from_f32(6.), 648 | bf16::from_f32(7.), 649 | bf16::from_f32(8.), 650 | bf16::from_f32(9.), 651 | ]; 652 | let mut buf32 = vf32; 653 | let mut buf16 = vf16; 654 | 655 | vf16.convert_to_f32_slice(&mut buf32); 656 | assert_eq!(&vf32, &buf32); 657 | 658 | buf16.convert_from_f32_slice(&vf32); 659 | assert_eq!(&vf16, &buf16); 660 | 661 | // Partial with chunks 662 | let vf32 = [1., 2.]; 663 | let vf16 = [bf16::from_f32(1.), bf16::from_f32(2.)]; 664 | let mut buf32 = vf32; 665 | let mut buf16 = vf16; 666 | 667 | vf16.convert_to_f32_slice(&mut buf32); 668 | assert_eq!(&vf32, &buf32); 669 | 670 | buf16.convert_from_f32_slice(&vf32); 671 | assert_eq!(&vf16, &buf16); 672 | } 673 | 674 | #[test] 675 | fn slice_convert_f16_f64() { 676 | // Exact chunks 677 | let vf64 = [1., 2., 3., 4., 5., 6., 7., 8.]; 678 | let vf16 = [ 679 | f16::from_f64(1.), 680 | f16::from_f64(2.), 681 | f16::from_f64(3.), 682 | f16::from_f64(4.), 683 | f16::from_f64(5.), 684 | f16::from_f64(6.), 685 | f16::from_f64(7.), 686 | f16::from_f64(8.), 687 | ]; 688 | let mut buf64 = vf64; 689 | let mut buf16 = vf16; 690 | 691 | vf16.convert_to_f64_slice(&mut buf64); 692 | assert_eq!(&vf64, &buf64); 693 | 694 | buf16.convert_from_f64_slice(&vf64); 695 | assert_eq!(&vf16, &buf16); 696 | 697 | // Partial with chunks 698 | let vf64 = [1., 2., 3., 4., 5., 6., 7., 8., 9.]; 699 | let vf16 = [ 700 | f16::from_f64(1.), 701 | f16::from_f64(2.), 702 | f16::from_f64(3.), 703 | f16::from_f64(4.), 704 | f16::from_f64(5.), 705 | f16::from_f64(6.), 706 | f16::from_f64(7.), 707 | f16::from_f64(8.), 708 | f16::from_f64(9.), 709 | ]; 710 | let mut buf64 = vf64; 711 | let mut buf16 = vf16; 712 | 713 | vf16.convert_to_f64_slice(&mut buf64); 714 | assert_eq!(&vf64, &buf64); 715 | 716 | buf16.convert_from_f64_slice(&vf64); 717 | assert_eq!(&vf16, &buf16); 718 | 719 | // Partial with chunks 720 | let vf64 = [1., 2.]; 721 | let vf16 = [f16::from_f64(1.), f16::from_f64(2.)]; 722 | let mut buf64 = vf64; 723 | let mut buf16 = vf16; 724 | 725 | vf16.convert_to_f64_slice(&mut buf64); 726 | assert_eq!(&vf64, &buf64); 727 | 728 | buf16.convert_from_f64_slice(&vf64); 729 | assert_eq!(&vf16, &buf16); 730 | } 731 | 732 | #[test] 733 | fn slice_convert_bf16_f64() { 734 | // Exact chunks 735 | let vf64 = [1., 2., 3., 4., 5., 6., 7., 8.]; 736 | let vf16 = [ 737 | bf16::from_f64(1.), 738 | bf16::from_f64(2.), 739 | bf16::from_f64(3.), 740 | bf16::from_f64(4.), 741 | bf16::from_f64(5.), 742 | bf16::from_f64(6.), 743 | bf16::from_f64(7.), 744 | bf16::from_f64(8.), 745 | ]; 746 | let mut buf64 = vf64; 747 | let mut buf16 = vf16; 748 | 749 | vf16.convert_to_f64_slice(&mut buf64); 750 | assert_eq!(&vf64, &buf64); 751 | 752 | buf16.convert_from_f64_slice(&vf64); 753 | assert_eq!(&vf16, &buf16); 754 | 755 | // Partial with chunks 756 | let vf64 = [1., 2., 3., 4., 5., 6., 7., 8., 9.]; 757 | let vf16 = [ 758 | bf16::from_f64(1.), 759 | bf16::from_f64(2.), 760 | bf16::from_f64(3.), 761 | bf16::from_f64(4.), 762 | bf16::from_f64(5.), 763 | bf16::from_f64(6.), 764 | bf16::from_f64(7.), 765 | bf16::from_f64(8.), 766 | bf16::from_f64(9.), 767 | ]; 768 | let mut buf64 = vf64; 769 | let mut buf16 = vf16; 770 | 771 | vf16.convert_to_f64_slice(&mut buf64); 772 | assert_eq!(&vf64, &buf64); 773 | 774 | buf16.convert_from_f64_slice(&vf64); 775 | assert_eq!(&vf16, &buf16); 776 | 777 | // Partial with chunks 778 | let vf64 = [1., 2.]; 779 | let vf16 = [bf16::from_f64(1.), bf16::from_f64(2.)]; 780 | let mut buf64 = vf64; 781 | let mut buf16 = vf16; 782 | 783 | vf16.convert_to_f64_slice(&mut buf64); 784 | assert_eq!(&vf64, &buf64); 785 | 786 | buf16.convert_from_f64_slice(&vf64); 787 | assert_eq!(&vf16, &buf16); 788 | } 789 | 790 | #[test] 791 | #[should_panic] 792 | fn convert_from_f32_slice_len_mismatch_panics() { 793 | let mut slice1 = [f16::ZERO; 3]; 794 | let slice2 = [0f32; 4]; 795 | slice1.convert_from_f32_slice(&slice2); 796 | } 797 | 798 | #[test] 799 | #[should_panic] 800 | fn convert_from_f64_slice_len_mismatch_panics() { 801 | let mut slice1 = [f16::ZERO; 3]; 802 | let slice2 = [0f64; 4]; 803 | slice1.convert_from_f64_slice(&slice2); 804 | } 805 | 806 | #[test] 807 | #[should_panic] 808 | fn convert_to_f32_slice_len_mismatch_panics() { 809 | let slice1 = [f16::ZERO; 3]; 810 | let mut slice2 = [0f32; 4]; 811 | slice1.convert_to_f32_slice(&mut slice2); 812 | } 813 | 814 | #[test] 815 | #[should_panic] 816 | fn convert_to_f64_slice_len_mismatch_panics() { 817 | let slice1 = [f16::ZERO; 3]; 818 | let mut slice2 = [0f64; 4]; 819 | slice1.convert_to_f64_slice(&mut slice2); 820 | } 821 | } 822 | -------------------------------------------------------------------------------- /Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | version = 3 4 | 5 | [[package]] 6 | name = "aho-corasick" 7 | version = "1.1.3" 8 | source = "registry+https://github.com/rust-lang/crates.io-index" 9 | checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" 10 | dependencies = [ 11 | "memchr", 12 | ] 13 | 14 | [[package]] 15 | name = "anes" 16 | version = "0.1.6" 17 | source = "registry+https://github.com/rust-lang/crates.io-index" 18 | checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" 19 | 20 | [[package]] 21 | name = "anstyle" 22 | version = "1.0.10" 23 | source = "registry+https://github.com/rust-lang/crates.io-index" 24 | checksum = "55cc3b69f167a1ef2e161439aa98aed94e6028e5f9a59be9a6ffb47aef1651f9" 25 | 26 | [[package]] 27 | name = "arbitrary" 28 | version = "1.4.1" 29 | source = "registry+https://github.com/rust-lang/crates.io-index" 30 | checksum = "dde20b3d026af13f561bdd0f15edf01fc734f0dafcedbaf42bba506a9517f223" 31 | dependencies = [ 32 | "derive_arbitrary", 33 | ] 34 | 35 | [[package]] 36 | name = "autocfg" 37 | version = "1.4.0" 38 | source = "registry+https://github.com/rust-lang/crates.io-index" 39 | checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" 40 | 41 | [[package]] 42 | name = "bitflags" 43 | version = "2.9.0" 44 | source = "registry+https://github.com/rust-lang/crates.io-index" 45 | checksum = "5c8214115b7bf84099f1309324e63141d4c5d7cc26862f97a0a857dbefe165bd" 46 | 47 | [[package]] 48 | name = "bumpalo" 49 | version = "3.17.0" 50 | source = "registry+https://github.com/rust-lang/crates.io-index" 51 | checksum = "1628fb46dfa0b37568d12e5edd512553eccf6a22a78e8bde00bb4aed84d5bdbf" 52 | 53 | [[package]] 54 | name = "bytecheck" 55 | version = "0.8.1" 56 | source = "registry+https://github.com/rust-lang/crates.io-index" 57 | checksum = "50690fb3370fb9fe3550372746084c46f2ac8c9685c583d2be10eefd89d3d1a3" 58 | dependencies = [ 59 | "bytecheck_derive", 60 | "ptr_meta", 61 | "rancor", 62 | "simdutf8", 63 | ] 64 | 65 | [[package]] 66 | name = "bytecheck_derive" 67 | version = "0.8.1" 68 | source = "registry+https://github.com/rust-lang/crates.io-index" 69 | checksum = "efb7846e0cb180355c2dec69e721edafa36919850f1a9f52ffba4ebc0393cb71" 70 | dependencies = [ 71 | "proc-macro2", 72 | "quote", 73 | "syn 2.0.100", 74 | ] 75 | 76 | [[package]] 77 | name = "bytemuck" 78 | version = "1.4.1" 79 | source = "registry+https://github.com/rust-lang/crates.io-index" 80 | checksum = "41aa2ec95ca3b5c54cf73c91acf06d24f4495d5f1b1c12506ae3483d646177ac" 81 | dependencies = [ 82 | "bytemuck_derive", 83 | ] 84 | 85 | [[package]] 86 | name = "bytemuck_derive" 87 | version = "1.8.1" 88 | source = "registry+https://github.com/rust-lang/crates.io-index" 89 | checksum = "3fa76293b4f7bb636ab88fd78228235b5248b4d05cc589aed610f954af5d7c7a" 90 | dependencies = [ 91 | "proc-macro2", 92 | "quote", 93 | "syn 2.0.100", 94 | ] 95 | 96 | [[package]] 97 | name = "bytes" 98 | version = "1.10.1" 99 | source = "registry+https://github.com/rust-lang/crates.io-index" 100 | checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a" 101 | 102 | [[package]] 103 | name = "cast" 104 | version = "0.3.0" 105 | source = "registry+https://github.com/rust-lang/crates.io-index" 106 | checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" 107 | 108 | [[package]] 109 | name = "cfg-if" 110 | version = "1.0.0" 111 | source = "registry+https://github.com/rust-lang/crates.io-index" 112 | checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" 113 | 114 | [[package]] 115 | name = "ciborium" 116 | version = "0.2.2" 117 | source = "registry+https://github.com/rust-lang/crates.io-index" 118 | checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e" 119 | dependencies = [ 120 | "ciborium-io", 121 | "ciborium-ll", 122 | "serde", 123 | ] 124 | 125 | [[package]] 126 | name = "ciborium-io" 127 | version = "0.2.2" 128 | source = "registry+https://github.com/rust-lang/crates.io-index" 129 | checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757" 130 | 131 | [[package]] 132 | name = "ciborium-ll" 133 | version = "0.2.2" 134 | source = "registry+https://github.com/rust-lang/crates.io-index" 135 | checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9" 136 | dependencies = [ 137 | "ciborium-io", 138 | "half 2.4.1", 139 | ] 140 | 141 | [[package]] 142 | name = "clap" 143 | version = "4.5.32" 144 | source = "registry+https://github.com/rust-lang/crates.io-index" 145 | checksum = "6088f3ae8c3608d19260cd7445411865a485688711b78b5be70d78cd96136f83" 146 | dependencies = [ 147 | "clap_builder", 148 | ] 149 | 150 | [[package]] 151 | name = "clap_builder" 152 | version = "4.5.32" 153 | source = "registry+https://github.com/rust-lang/crates.io-index" 154 | checksum = "22a7ef7f676155edfb82daa97f99441f3ebf4a58d5e32f295a56259f1b6facc8" 155 | dependencies = [ 156 | "anstyle", 157 | "clap_lex", 158 | ] 159 | 160 | [[package]] 161 | name = "clap_lex" 162 | version = "0.7.4" 163 | source = "registry+https://github.com/rust-lang/crates.io-index" 164 | checksum = "f46ad14479a25103f283c0f10005961cf086d8dc42205bb44c46ac563475dca6" 165 | 166 | [[package]] 167 | name = "criterion" 168 | version = "0.5.1" 169 | source = "registry+https://github.com/rust-lang/crates.io-index" 170 | checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f" 171 | dependencies = [ 172 | "anes", 173 | "cast", 174 | "ciborium", 175 | "clap", 176 | "criterion-plot", 177 | "is-terminal", 178 | "itertools", 179 | "num-traits", 180 | "once_cell", 181 | "oorandom", 182 | "plotters", 183 | "rayon", 184 | "regex", 185 | "serde", 186 | "serde_derive", 187 | "serde_json", 188 | "tinytemplate", 189 | "walkdir", 190 | ] 191 | 192 | [[package]] 193 | name = "criterion-plot" 194 | version = "0.5.0" 195 | source = "registry+https://github.com/rust-lang/crates.io-index" 196 | checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" 197 | dependencies = [ 198 | "cast", 199 | "itertools", 200 | ] 201 | 202 | [[package]] 203 | name = "crossbeam-deque" 204 | version = "0.8.6" 205 | source = "registry+https://github.com/rust-lang/crates.io-index" 206 | checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" 207 | dependencies = [ 208 | "crossbeam-epoch", 209 | "crossbeam-utils", 210 | ] 211 | 212 | [[package]] 213 | name = "crossbeam-epoch" 214 | version = "0.9.18" 215 | source = "registry+https://github.com/rust-lang/crates.io-index" 216 | checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" 217 | dependencies = [ 218 | "crossbeam-utils", 219 | ] 220 | 221 | [[package]] 222 | name = "crossbeam-utils" 223 | version = "0.8.21" 224 | source = "registry+https://github.com/rust-lang/crates.io-index" 225 | checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" 226 | 227 | [[package]] 228 | name = "crunchy" 229 | version = "0.2.3" 230 | source = "registry+https://github.com/rust-lang/crates.io-index" 231 | checksum = "43da5946c66ffcc7745f48db692ffbb10a83bfe0afd96235c5c2a4fb23994929" 232 | 233 | [[package]] 234 | name = "derive_arbitrary" 235 | version = "1.4.1" 236 | source = "registry+https://github.com/rust-lang/crates.io-index" 237 | checksum = "30542c1ad912e0e3d22a1935c290e12e8a29d704a420177a31faad4a601a0800" 238 | dependencies = [ 239 | "proc-macro2", 240 | "quote", 241 | "syn 2.0.100", 242 | ] 243 | 244 | [[package]] 245 | name = "either" 246 | version = "1.15.0" 247 | source = "registry+https://github.com/rust-lang/crates.io-index" 248 | checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" 249 | 250 | [[package]] 251 | name = "env_logger" 252 | version = "0.8.4" 253 | source = "registry+https://github.com/rust-lang/crates.io-index" 254 | checksum = "a19187fea3ac7e84da7dacf48de0c45d63c6a76f9490dae389aead16c243fce3" 255 | dependencies = [ 256 | "log", 257 | "regex", 258 | ] 259 | 260 | [[package]] 261 | name = "equivalent" 262 | version = "1.0.2" 263 | source = "registry+https://github.com/rust-lang/crates.io-index" 264 | checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" 265 | 266 | [[package]] 267 | name = "getrandom" 268 | version = "0.2.15" 269 | source = "registry+https://github.com/rust-lang/crates.io-index" 270 | checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" 271 | dependencies = [ 272 | "cfg-if", 273 | "libc", 274 | "wasi 0.11.0+wasi-snapshot-preview1", 275 | ] 276 | 277 | [[package]] 278 | name = "getrandom" 279 | version = "0.3.1" 280 | source = "registry+https://github.com/rust-lang/crates.io-index" 281 | checksum = "43a49c392881ce6d5c3b8cb70f98717b7c07aabbdff06687b9030dbfbe2725f8" 282 | dependencies = [ 283 | "cfg-if", 284 | "libc", 285 | "wasi 0.13.3+wasi-0.2.2", 286 | "windows-targets", 287 | ] 288 | 289 | [[package]] 290 | name = "half" 291 | version = "2.4.1" 292 | source = "registry+https://github.com/rust-lang/crates.io-index" 293 | checksum = "6dd08c532ae367adf81c312a4580bc67f1d0fe8bc9c460520283f4c0ff277888" 294 | dependencies = [ 295 | "cfg-if", 296 | "crunchy", 297 | ] 298 | 299 | [[package]] 300 | name = "half" 301 | version = "2.7.1" 302 | dependencies = [ 303 | "arbitrary", 304 | "bytemuck", 305 | "cfg-if", 306 | "criterion", 307 | "crunchy", 308 | "num-traits", 309 | "quickcheck", 310 | "quickcheck_macros", 311 | "rand 0.9.0", 312 | "rand_distr", 313 | "rkyv", 314 | "serde", 315 | "zerocopy", 316 | ] 317 | 318 | [[package]] 319 | name = "hashbrown" 320 | version = "0.14.5" 321 | source = "registry+https://github.com/rust-lang/crates.io-index" 322 | checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" 323 | 324 | [[package]] 325 | name = "hashbrown" 326 | version = "0.15.2" 327 | source = "registry+https://github.com/rust-lang/crates.io-index" 328 | checksum = "bf151400ff0baff5465007dd2f3e717f3fe502074ca563069ce3a6629d07b289" 329 | 330 | [[package]] 331 | name = "hermit-abi" 332 | version = "0.5.0" 333 | source = "registry+https://github.com/rust-lang/crates.io-index" 334 | checksum = "fbd780fe5cc30f81464441920d82ac8740e2e46b29a6fad543ddd075229ce37e" 335 | 336 | [[package]] 337 | name = "indexmap" 338 | version = "2.8.0" 339 | source = "registry+https://github.com/rust-lang/crates.io-index" 340 | checksum = "3954d50fe15b02142bf25d3b8bdadb634ec3948f103d04ffe3031bc8fe9d7058" 341 | dependencies = [ 342 | "equivalent", 343 | "hashbrown 0.15.2", 344 | ] 345 | 346 | [[package]] 347 | name = "is-terminal" 348 | version = "0.4.16" 349 | source = "registry+https://github.com/rust-lang/crates.io-index" 350 | checksum = "e04d7f318608d35d4b61ddd75cbdaee86b023ebe2bd5a66ee0915f0bf93095a9" 351 | dependencies = [ 352 | "hermit-abi", 353 | "libc", 354 | "windows-sys", 355 | ] 356 | 357 | [[package]] 358 | name = "itertools" 359 | version = "0.10.5" 360 | source = "registry+https://github.com/rust-lang/crates.io-index" 361 | checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" 362 | dependencies = [ 363 | "either", 364 | ] 365 | 366 | [[package]] 367 | name = "itoa" 368 | version = "1.0.15" 369 | source = "registry+https://github.com/rust-lang/crates.io-index" 370 | checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" 371 | 372 | [[package]] 373 | name = "js-sys" 374 | version = "0.3.77" 375 | source = "registry+https://github.com/rust-lang/crates.io-index" 376 | checksum = "1cfaf33c695fc6e08064efbc1f72ec937429614f25eef83af942d0e227c3a28f" 377 | dependencies = [ 378 | "once_cell", 379 | "wasm-bindgen", 380 | ] 381 | 382 | [[package]] 383 | name = "libc" 384 | version = "0.2.171" 385 | source = "registry+https://github.com/rust-lang/crates.io-index" 386 | checksum = "c19937216e9d3aa9956d9bb8dfc0b0c8beb6058fc4f7a4dc4d850edf86a237d6" 387 | 388 | [[package]] 389 | name = "libm" 390 | version = "0.2.11" 391 | source = "registry+https://github.com/rust-lang/crates.io-index" 392 | checksum = "8355be11b20d696c8f18f6cc018c4e372165b1fa8126cef092399c9951984ffa" 393 | 394 | [[package]] 395 | name = "log" 396 | version = "0.4.26" 397 | source = "registry+https://github.com/rust-lang/crates.io-index" 398 | checksum = "30bde2b3dc3671ae49d8e2e9f044c7c005836e7a023ee57cffa25ab82764bb9e" 399 | 400 | [[package]] 401 | name = "memchr" 402 | version = "2.7.4" 403 | source = "registry+https://github.com/rust-lang/crates.io-index" 404 | checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" 405 | 406 | [[package]] 407 | name = "munge" 408 | version = "0.4.3" 409 | source = "registry+https://github.com/rust-lang/crates.io-index" 410 | checksum = "a0091202c98cf06da46c279fdf50cccb6b1c43b4521abdf6a27b4c7e71d5d9d7" 411 | dependencies = [ 412 | "munge_macro", 413 | ] 414 | 415 | [[package]] 416 | name = "munge_macro" 417 | version = "0.4.3" 418 | source = "registry+https://github.com/rust-lang/crates.io-index" 419 | checksum = "734799cf91479720b2f970c61a22850940dd91e27d4f02b1c6fc792778df2459" 420 | dependencies = [ 421 | "proc-macro2", 422 | "quote", 423 | "syn 2.0.100", 424 | ] 425 | 426 | [[package]] 427 | name = "num-traits" 428 | version = "0.2.16" 429 | source = "registry+https://github.com/rust-lang/crates.io-index" 430 | checksum = "f30b0abd723be7e2ffca1272140fac1a2f084c77ec3e123c192b66af1ee9e6c2" 431 | dependencies = [ 432 | "autocfg", 433 | "libm", 434 | ] 435 | 436 | [[package]] 437 | name = "once_cell" 438 | version = "1.21.1" 439 | source = "registry+https://github.com/rust-lang/crates.io-index" 440 | checksum = "d75b0bedcc4fe52caa0e03d9f1151a323e4aa5e2d78ba3580400cd3c9e2bc4bc" 441 | 442 | [[package]] 443 | name = "oorandom" 444 | version = "11.1.5" 445 | source = "registry+https://github.com/rust-lang/crates.io-index" 446 | checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e" 447 | 448 | [[package]] 449 | name = "plotters" 450 | version = "0.3.7" 451 | source = "registry+https://github.com/rust-lang/crates.io-index" 452 | checksum = "5aeb6f403d7a4911efb1e33402027fc44f29b5bf6def3effcc22d7bb75f2b747" 453 | dependencies = [ 454 | "num-traits", 455 | "plotters-backend", 456 | "plotters-svg", 457 | "wasm-bindgen", 458 | "web-sys", 459 | ] 460 | 461 | [[package]] 462 | name = "plotters-backend" 463 | version = "0.3.7" 464 | source = "registry+https://github.com/rust-lang/crates.io-index" 465 | checksum = "df42e13c12958a16b3f7f4386b9ab1f3e7933914ecea48da7139435263a4172a" 466 | 467 | [[package]] 468 | name = "plotters-svg" 469 | version = "0.3.7" 470 | source = "registry+https://github.com/rust-lang/crates.io-index" 471 | checksum = "51bae2ac328883f7acdfea3d66a7c35751187f870bc81f94563733a154d7a670" 472 | dependencies = [ 473 | "plotters-backend", 474 | ] 475 | 476 | [[package]] 477 | name = "ppv-lite86" 478 | version = "0.2.21" 479 | source = "registry+https://github.com/rust-lang/crates.io-index" 480 | checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" 481 | dependencies = [ 482 | "zerocopy", 483 | ] 484 | 485 | [[package]] 486 | name = "proc-macro2" 487 | version = "1.0.94" 488 | source = "registry+https://github.com/rust-lang/crates.io-index" 489 | checksum = "a31971752e70b8b2686d7e46ec17fb38dad4051d94024c88df49b667caea9c84" 490 | dependencies = [ 491 | "unicode-ident", 492 | ] 493 | 494 | [[package]] 495 | name = "ptr_meta" 496 | version = "0.3.0" 497 | source = "registry+https://github.com/rust-lang/crates.io-index" 498 | checksum = "fe9e76f66d3f9606f44e45598d155cb13ecf09f4a28199e48daf8c8fc937ea90" 499 | dependencies = [ 500 | "ptr_meta_derive", 501 | ] 502 | 503 | [[package]] 504 | name = "ptr_meta_derive" 505 | version = "0.3.0" 506 | source = "registry+https://github.com/rust-lang/crates.io-index" 507 | checksum = "ca414edb151b4c8d125c12566ab0d74dc9cdba36fb80eb7b848c15f495fd32d1" 508 | dependencies = [ 509 | "proc-macro2", 510 | "quote", 511 | "syn 2.0.100", 512 | ] 513 | 514 | [[package]] 515 | name = "quickcheck" 516 | version = "1.0.3" 517 | source = "registry+https://github.com/rust-lang/crates.io-index" 518 | checksum = "588f6378e4dd99458b60ec275b4477add41ce4fa9f64dcba6f15adccb19b50d6" 519 | dependencies = [ 520 | "env_logger", 521 | "log", 522 | "rand 0.8.5", 523 | ] 524 | 525 | [[package]] 526 | name = "quickcheck_macros" 527 | version = "1.0.0" 528 | source = "registry+https://github.com/rust-lang/crates.io-index" 529 | checksum = "b22a693222d716a9587786f37ac3f6b4faedb5b80c23914e7303ff5a1d8016e9" 530 | dependencies = [ 531 | "proc-macro2", 532 | "quote", 533 | "syn 1.0.109", 534 | ] 535 | 536 | [[package]] 537 | name = "quote" 538 | version = "1.0.40" 539 | source = "registry+https://github.com/rust-lang/crates.io-index" 540 | checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" 541 | dependencies = [ 542 | "proc-macro2", 543 | ] 544 | 545 | [[package]] 546 | name = "rancor" 547 | version = "0.1.0" 548 | source = "registry+https://github.com/rust-lang/crates.io-index" 549 | checksum = "caf5f7161924b9d1cea0e4cabc97c372cea92b5f927fc13c6bca67157a0ad947" 550 | dependencies = [ 551 | "ptr_meta", 552 | ] 553 | 554 | [[package]] 555 | name = "rand" 556 | version = "0.8.5" 557 | source = "registry+https://github.com/rust-lang/crates.io-index" 558 | checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" 559 | dependencies = [ 560 | "rand_core 0.6.4", 561 | ] 562 | 563 | [[package]] 564 | name = "rand" 565 | version = "0.9.0" 566 | source = "registry+https://github.com/rust-lang/crates.io-index" 567 | checksum = "3779b94aeb87e8bd4e834cee3650289ee9e0d5677f976ecdb6d219e5f4f6cd94" 568 | dependencies = [ 569 | "rand_chacha", 570 | "rand_core 0.9.3", 571 | "zerocopy", 572 | ] 573 | 574 | [[package]] 575 | name = "rand_chacha" 576 | version = "0.9.0" 577 | source = "registry+https://github.com/rust-lang/crates.io-index" 578 | checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" 579 | dependencies = [ 580 | "ppv-lite86", 581 | "rand_core 0.9.3", 582 | ] 583 | 584 | [[package]] 585 | name = "rand_core" 586 | version = "0.6.4" 587 | source = "registry+https://github.com/rust-lang/crates.io-index" 588 | checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" 589 | dependencies = [ 590 | "getrandom 0.2.15", 591 | ] 592 | 593 | [[package]] 594 | name = "rand_core" 595 | version = "0.9.3" 596 | source = "registry+https://github.com/rust-lang/crates.io-index" 597 | checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38" 598 | dependencies = [ 599 | "getrandom 0.3.1", 600 | ] 601 | 602 | [[package]] 603 | name = "rand_distr" 604 | version = "0.5.0" 605 | source = "registry+https://github.com/rust-lang/crates.io-index" 606 | checksum = "ddc3b5afe4c995c44540865b8ca5c52e6a59fa362da96c5d30886930ddc8da1c" 607 | dependencies = [ 608 | "num-traits", 609 | "rand 0.9.0", 610 | ] 611 | 612 | [[package]] 613 | name = "rayon" 614 | version = "1.10.0" 615 | source = "registry+https://github.com/rust-lang/crates.io-index" 616 | checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa" 617 | dependencies = [ 618 | "either", 619 | "rayon-core", 620 | ] 621 | 622 | [[package]] 623 | name = "rayon-core" 624 | version = "1.12.1" 625 | source = "registry+https://github.com/rust-lang/crates.io-index" 626 | checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" 627 | dependencies = [ 628 | "crossbeam-deque", 629 | "crossbeam-utils", 630 | ] 631 | 632 | [[package]] 633 | name = "regex" 634 | version = "1.11.1" 635 | source = "registry+https://github.com/rust-lang/crates.io-index" 636 | checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" 637 | dependencies = [ 638 | "aho-corasick", 639 | "memchr", 640 | "regex-automata", 641 | "regex-syntax", 642 | ] 643 | 644 | [[package]] 645 | name = "regex-automata" 646 | version = "0.4.9" 647 | source = "registry+https://github.com/rust-lang/crates.io-index" 648 | checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908" 649 | dependencies = [ 650 | "aho-corasick", 651 | "memchr", 652 | "regex-syntax", 653 | ] 654 | 655 | [[package]] 656 | name = "regex-syntax" 657 | version = "0.8.5" 658 | source = "registry+https://github.com/rust-lang/crates.io-index" 659 | checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" 660 | 661 | [[package]] 662 | name = "rend" 663 | version = "0.5.2" 664 | source = "registry+https://github.com/rust-lang/crates.io-index" 665 | checksum = "a35e8a6bf28cd121053a66aa2e6a2e3eaffad4a60012179f0e864aa5ffeff215" 666 | dependencies = [ 667 | "bytecheck", 668 | ] 669 | 670 | [[package]] 671 | name = "rkyv" 672 | version = "0.8.0" 673 | source = "registry+https://github.com/rust-lang/crates.io-index" 674 | checksum = "6d7fa2297190bd08087add407c3dedf28eb3be1d75955ffbd3bc312834325760" 675 | dependencies = [ 676 | "bytecheck", 677 | "bytes", 678 | "hashbrown 0.14.5", 679 | "indexmap", 680 | "munge", 681 | "ptr_meta", 682 | "rancor", 683 | "rend", 684 | "rkyv_derive", 685 | "tinyvec", 686 | "uuid", 687 | ] 688 | 689 | [[package]] 690 | name = "rkyv_derive" 691 | version = "0.8.0" 692 | source = "registry+https://github.com/rust-lang/crates.io-index" 693 | checksum = "4aad510db4f88722adf0e4586ff0dedfca4af57b17c075b2420bac1db446d22c" 694 | dependencies = [ 695 | "proc-macro2", 696 | "quote", 697 | "syn 2.0.100", 698 | ] 699 | 700 | [[package]] 701 | name = "rustversion" 702 | version = "1.0.20" 703 | source = "registry+https://github.com/rust-lang/crates.io-index" 704 | checksum = "eded382c5f5f786b989652c49544c4877d9f015cc22e145a5ea8ea66c2921cd2" 705 | 706 | [[package]] 707 | name = "ryu" 708 | version = "1.0.20" 709 | source = "registry+https://github.com/rust-lang/crates.io-index" 710 | checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" 711 | 712 | [[package]] 713 | name = "same-file" 714 | version = "1.0.6" 715 | source = "registry+https://github.com/rust-lang/crates.io-index" 716 | checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" 717 | dependencies = [ 718 | "winapi-util", 719 | ] 720 | 721 | [[package]] 722 | name = "serde" 723 | version = "1.0.219" 724 | source = "registry+https://github.com/rust-lang/crates.io-index" 725 | checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6" 726 | dependencies = [ 727 | "serde_derive", 728 | ] 729 | 730 | [[package]] 731 | name = "serde_derive" 732 | version = "1.0.219" 733 | source = "registry+https://github.com/rust-lang/crates.io-index" 734 | checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" 735 | dependencies = [ 736 | "proc-macro2", 737 | "quote", 738 | "syn 2.0.100", 739 | ] 740 | 741 | [[package]] 742 | name = "serde_json" 743 | version = "1.0.140" 744 | source = "registry+https://github.com/rust-lang/crates.io-index" 745 | checksum = "20068b6e96dc6c9bd23e01df8827e6c7e1f2fddd43c21810382803c136b99373" 746 | dependencies = [ 747 | "itoa", 748 | "memchr", 749 | "ryu", 750 | "serde", 751 | ] 752 | 753 | [[package]] 754 | name = "simdutf8" 755 | version = "0.1.5" 756 | source = "registry+https://github.com/rust-lang/crates.io-index" 757 | checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e" 758 | 759 | [[package]] 760 | name = "syn" 761 | version = "1.0.109" 762 | source = "registry+https://github.com/rust-lang/crates.io-index" 763 | checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" 764 | dependencies = [ 765 | "proc-macro2", 766 | "quote", 767 | "unicode-ident", 768 | ] 769 | 770 | [[package]] 771 | name = "syn" 772 | version = "2.0.100" 773 | source = "registry+https://github.com/rust-lang/crates.io-index" 774 | checksum = "b09a44accad81e1ba1cd74a32461ba89dee89095ba17b32f5d03683b1b1fc2a0" 775 | dependencies = [ 776 | "proc-macro2", 777 | "quote", 778 | "unicode-ident", 779 | ] 780 | 781 | [[package]] 782 | name = "tinytemplate" 783 | version = "1.2.1" 784 | source = "registry+https://github.com/rust-lang/crates.io-index" 785 | checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" 786 | dependencies = [ 787 | "serde", 788 | "serde_json", 789 | ] 790 | 791 | [[package]] 792 | name = "tinyvec" 793 | version = "1.9.0" 794 | source = "registry+https://github.com/rust-lang/crates.io-index" 795 | checksum = "09b3661f17e86524eccd4371ab0429194e0d7c008abb45f7a7495b1719463c71" 796 | dependencies = [ 797 | "tinyvec_macros", 798 | ] 799 | 800 | [[package]] 801 | name = "tinyvec_macros" 802 | version = "0.1.1" 803 | source = "registry+https://github.com/rust-lang/crates.io-index" 804 | checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" 805 | 806 | [[package]] 807 | name = "unicode-ident" 808 | version = "1.0.18" 809 | source = "registry+https://github.com/rust-lang/crates.io-index" 810 | checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" 811 | 812 | [[package]] 813 | name = "uuid" 814 | version = "1.15.1" 815 | source = "registry+https://github.com/rust-lang/crates.io-index" 816 | checksum = "e0f540e3240398cce6128b64ba83fdbdd86129c16a3aa1a3a252efd66eb3d587" 817 | 818 | [[package]] 819 | name = "walkdir" 820 | version = "2.5.0" 821 | source = "registry+https://github.com/rust-lang/crates.io-index" 822 | checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" 823 | dependencies = [ 824 | "same-file", 825 | "winapi-util", 826 | ] 827 | 828 | [[package]] 829 | name = "wasi" 830 | version = "0.11.0+wasi-snapshot-preview1" 831 | source = "registry+https://github.com/rust-lang/crates.io-index" 832 | checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" 833 | 834 | [[package]] 835 | name = "wasi" 836 | version = "0.13.3+wasi-0.2.2" 837 | source = "registry+https://github.com/rust-lang/crates.io-index" 838 | checksum = "26816d2e1a4a36a2940b96c5296ce403917633dff8f3440e9b236ed6f6bacad2" 839 | dependencies = [ 840 | "wit-bindgen-rt", 841 | ] 842 | 843 | [[package]] 844 | name = "wasm-bindgen" 845 | version = "0.2.100" 846 | source = "registry+https://github.com/rust-lang/crates.io-index" 847 | checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5" 848 | dependencies = [ 849 | "cfg-if", 850 | "once_cell", 851 | "rustversion", 852 | "wasm-bindgen-macro", 853 | ] 854 | 855 | [[package]] 856 | name = "wasm-bindgen-backend" 857 | version = "0.2.100" 858 | source = "registry+https://github.com/rust-lang/crates.io-index" 859 | checksum = "2f0a0651a5c2bc21487bde11ee802ccaf4c51935d0d3d42a6101f98161700bc6" 860 | dependencies = [ 861 | "bumpalo", 862 | "log", 863 | "proc-macro2", 864 | "quote", 865 | "syn 2.0.100", 866 | "wasm-bindgen-shared", 867 | ] 868 | 869 | [[package]] 870 | name = "wasm-bindgen-macro" 871 | version = "0.2.100" 872 | source = "registry+https://github.com/rust-lang/crates.io-index" 873 | checksum = "7fe63fc6d09ed3792bd0897b314f53de8e16568c2b3f7982f468c0bf9bd0b407" 874 | dependencies = [ 875 | "quote", 876 | "wasm-bindgen-macro-support", 877 | ] 878 | 879 | [[package]] 880 | name = "wasm-bindgen-macro-support" 881 | version = "0.2.100" 882 | source = "registry+https://github.com/rust-lang/crates.io-index" 883 | checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de" 884 | dependencies = [ 885 | "proc-macro2", 886 | "quote", 887 | "syn 2.0.100", 888 | "wasm-bindgen-backend", 889 | "wasm-bindgen-shared", 890 | ] 891 | 892 | [[package]] 893 | name = "wasm-bindgen-shared" 894 | version = "0.2.100" 895 | source = "registry+https://github.com/rust-lang/crates.io-index" 896 | checksum = "1a05d73b933a847d6cccdda8f838a22ff101ad9bf93e33684f39c1f5f0eece3d" 897 | dependencies = [ 898 | "unicode-ident", 899 | ] 900 | 901 | [[package]] 902 | name = "web-sys" 903 | version = "0.3.77" 904 | source = "registry+https://github.com/rust-lang/crates.io-index" 905 | checksum = "33b6dd2ef9186f1f2072e409e99cd22a975331a6b3591b12c764e0e55c60d5d2" 906 | dependencies = [ 907 | "js-sys", 908 | "wasm-bindgen", 909 | ] 910 | 911 | [[package]] 912 | name = "winapi-util" 913 | version = "0.1.9" 914 | source = "registry+https://github.com/rust-lang/crates.io-index" 915 | checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" 916 | dependencies = [ 917 | "windows-sys", 918 | ] 919 | 920 | [[package]] 921 | name = "windows-sys" 922 | version = "0.59.0" 923 | source = "registry+https://github.com/rust-lang/crates.io-index" 924 | checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" 925 | dependencies = [ 926 | "windows-targets", 927 | ] 928 | 929 | [[package]] 930 | name = "windows-targets" 931 | version = "0.52.6" 932 | source = "registry+https://github.com/rust-lang/crates.io-index" 933 | checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" 934 | dependencies = [ 935 | "windows_aarch64_gnullvm", 936 | "windows_aarch64_msvc", 937 | "windows_i686_gnu", 938 | "windows_i686_gnullvm", 939 | "windows_i686_msvc", 940 | "windows_x86_64_gnu", 941 | "windows_x86_64_gnullvm", 942 | "windows_x86_64_msvc", 943 | ] 944 | 945 | [[package]] 946 | name = "windows_aarch64_gnullvm" 947 | version = "0.52.6" 948 | source = "registry+https://github.com/rust-lang/crates.io-index" 949 | checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" 950 | 951 | [[package]] 952 | name = "windows_aarch64_msvc" 953 | version = "0.52.6" 954 | source = "registry+https://github.com/rust-lang/crates.io-index" 955 | checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" 956 | 957 | [[package]] 958 | name = "windows_i686_gnu" 959 | version = "0.52.6" 960 | source = "registry+https://github.com/rust-lang/crates.io-index" 961 | checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" 962 | 963 | [[package]] 964 | name = "windows_i686_gnullvm" 965 | version = "0.52.6" 966 | source = "registry+https://github.com/rust-lang/crates.io-index" 967 | checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" 968 | 969 | [[package]] 970 | name = "windows_i686_msvc" 971 | version = "0.52.6" 972 | source = "registry+https://github.com/rust-lang/crates.io-index" 973 | checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" 974 | 975 | [[package]] 976 | name = "windows_x86_64_gnu" 977 | version = "0.52.6" 978 | source = "registry+https://github.com/rust-lang/crates.io-index" 979 | checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" 980 | 981 | [[package]] 982 | name = "windows_x86_64_gnullvm" 983 | version = "0.52.6" 984 | source = "registry+https://github.com/rust-lang/crates.io-index" 985 | checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" 986 | 987 | [[package]] 988 | name = "windows_x86_64_msvc" 989 | version = "0.52.6" 990 | source = "registry+https://github.com/rust-lang/crates.io-index" 991 | checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" 992 | 993 | [[package]] 994 | name = "wit-bindgen-rt" 995 | version = "0.33.0" 996 | source = "registry+https://github.com/rust-lang/crates.io-index" 997 | checksum = "3268f3d866458b787f390cf61f4bbb563b922d091359f9608842999eaee3943c" 998 | dependencies = [ 999 | "bitflags", 1000 | ] 1001 | 1002 | [[package]] 1003 | name = "zerocopy" 1004 | version = "0.8.26" 1005 | source = "registry+https://github.com/rust-lang/crates.io-index" 1006 | checksum = "1039dd0d3c310cf05de012d8a39ff557cb0d23087fd44cad61df08fc31907a2f" 1007 | dependencies = [ 1008 | "zerocopy-derive", 1009 | ] 1010 | 1011 | [[package]] 1012 | name = "zerocopy-derive" 1013 | version = "0.8.26" 1014 | source = "registry+https://github.com/rust-lang/crates.io-index" 1015 | checksum = "9ecf5b4cc5364572d7f4c329661bcc82724222973f2cab6f050a4e5c22f75181" 1016 | dependencies = [ 1017 | "proc-macro2", 1018 | "quote", 1019 | "syn 2.0.100", 1020 | ] 1021 | -------------------------------------------------------------------------------- /src/binary16/arch.rs: -------------------------------------------------------------------------------- 1 | #![allow(dead_code, unused_imports)] 2 | use crate::leading_zeros::leading_zeros_u16; 3 | use core::mem; 4 | 5 | #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] 6 | mod x86; 7 | 8 | #[cfg(target_arch = "aarch64")] 9 | mod aarch64; 10 | 11 | #[cfg(all(feature = "nightly", target_arch = "loongarch64"))] 12 | mod loongarch64; 13 | 14 | macro_rules! convert_fn { 15 | (if x86_feature("f16c") { $f16c:expr } 16 | else if aarch64_feature("fp16") { $aarch64:expr } 17 | else if loongarch64_feature("lsx") { $loongarch64:expr } 18 | else { $fallback:expr }) => { 19 | cfg_if::cfg_if! { 20 | // Use intrinsics directly when a compile target or using no_std 21 | if #[cfg(all( 22 | any(target_arch = "x86", target_arch = "x86_64"), 23 | target_feature = "f16c" 24 | ))] { 25 | $f16c 26 | } 27 | else if #[cfg(all( 28 | target_arch = "aarch64", 29 | target_feature = "fp16" 30 | ))] { 31 | $aarch64 32 | } 33 | else if #[cfg(all( 34 | feature = "nightly", 35 | target_arch = "loongarch64", 36 | target_feature = "lsx" 37 | ))] { 38 | $loongarch64 39 | } 40 | 41 | // Use CPU feature detection if using std 42 | else if #[cfg(all( 43 | feature = "std", 44 | any(target_arch = "x86", target_arch = "x86_64") 45 | ))] { 46 | use std::arch::is_x86_feature_detected; 47 | if is_x86_feature_detected!("f16c") { 48 | $f16c 49 | } else { 50 | $fallback 51 | } 52 | } 53 | else if #[cfg(all( 54 | feature = "std", 55 | target_arch = "aarch64", 56 | ))] { 57 | use std::arch::is_aarch64_feature_detected; 58 | if is_aarch64_feature_detected!("fp16") { 59 | $aarch64 60 | } else { 61 | $fallback 62 | } 63 | } 64 | else if #[cfg(all( 65 | feature = "std", 66 | feature = "nightly", 67 | target_arch = "loongarch64", 68 | ))] { 69 | use std::arch::is_loongarch_feature_detected; 70 | if is_loongarch_feature_detected!("lsx") { 71 | $loongarch64 72 | } else { 73 | $fallback 74 | } 75 | } 76 | 77 | // Fallback to software 78 | else { 79 | $fallback 80 | } 81 | } 82 | }; 83 | } 84 | 85 | #[inline] 86 | pub(crate) fn f32_to_f16(f: f32) -> u16 { 87 | convert_fn! { 88 | if x86_feature("f16c") { 89 | unsafe { x86::f32_to_f16_x86_f16c(f) } 90 | } else if aarch64_feature("fp16") { 91 | unsafe { aarch64::f32_to_f16_fp16(f) } 92 | } else if loongarch64_feature("lsx") { 93 | unsafe { loongarch64::f32_to_f16_lsx(f) } 94 | } else { 95 | f32_to_f16_fallback(f) 96 | } 97 | } 98 | } 99 | 100 | #[inline] 101 | pub(crate) fn f64_to_f16(f: f64) -> u16 { 102 | convert_fn! { 103 | if x86_feature("f16c") { 104 | unsafe { x86::f32_to_f16_x86_f16c(f as f32) } 105 | } else if aarch64_feature("fp16") { 106 | unsafe { aarch64::f64_to_f16_fp16(f) } 107 | } else if loongarch64_feature("lsx") { 108 | f64_to_f16_fallback(f) 109 | } else { 110 | f64_to_f16_fallback(f) 111 | } 112 | } 113 | } 114 | 115 | #[inline] 116 | pub(crate) fn f16_to_f32(i: u16) -> f32 { 117 | convert_fn! { 118 | if x86_feature("f16c") { 119 | unsafe { x86::f16_to_f32_x86_f16c(i) } 120 | } else if aarch64_feature("fp16") { 121 | unsafe { aarch64::f16_to_f32_fp16(i) } 122 | } else if loongarch64_feature("lsx") { 123 | unsafe { loongarch64::f16_to_f32_lsx(i) } 124 | } else { 125 | f16_to_f32_fallback(i) 126 | } 127 | } 128 | } 129 | 130 | #[inline] 131 | pub(crate) fn f16_to_f64(i: u16) -> f64 { 132 | convert_fn! { 133 | if x86_feature("f16c") { 134 | unsafe { x86::f16_to_f32_x86_f16c(i) as f64 } 135 | } else if aarch64_feature("fp16") { 136 | unsafe { aarch64::f16_to_f64_fp16(i) } 137 | } else if loongarch64_feature("lsx") { 138 | unsafe { loongarch64::f16_to_f32_lsx(i) as f64 } 139 | } else { 140 | f16_to_f64_fallback(i) 141 | } 142 | } 143 | } 144 | 145 | #[inline] 146 | pub(crate) fn f32x4_to_f16x4(f: &[f32; 4]) -> [u16; 4] { 147 | convert_fn! { 148 | if x86_feature("f16c") { 149 | unsafe { x86::f32x4_to_f16x4_x86_f16c(f) } 150 | } else if aarch64_feature("fp16") { 151 | unsafe { aarch64::f32x4_to_f16x4_fp16(f) } 152 | } else if loongarch64_feature("lsx") { 153 | unsafe { loongarch64::f32x4_to_f16x4_lsx(f) } 154 | } else { 155 | f32x4_to_f16x4_fallback(f) 156 | } 157 | } 158 | } 159 | 160 | #[inline] 161 | pub(crate) fn f16x4_to_f32x4(i: &[u16; 4]) -> [f32; 4] { 162 | convert_fn! { 163 | if x86_feature("f16c") { 164 | unsafe { x86::f16x4_to_f32x4_x86_f16c(i) } 165 | } else if aarch64_feature("fp16") { 166 | unsafe { aarch64::f16x4_to_f32x4_fp16(i) } 167 | } else if loongarch64_feature("lsx") { 168 | unsafe { loongarch64::f16x4_to_f32x4_lsx(i) } 169 | } else { 170 | f16x4_to_f32x4_fallback(i) 171 | } 172 | } 173 | } 174 | 175 | #[inline] 176 | pub(crate) fn f64x4_to_f16x4(f: &[f64; 4]) -> [u16; 4] { 177 | convert_fn! { 178 | if x86_feature("f16c") { 179 | unsafe { x86::f64x4_to_f16x4_x86_f16c(f) } 180 | } else if aarch64_feature("fp16") { 181 | unsafe { aarch64::f64x4_to_f16x4_fp16(f) } 182 | } else if loongarch64_feature("lsx") { 183 | unsafe { loongarch64::f64x4_to_f16x4_lsx(f) } 184 | } else { 185 | f64x4_to_f16x4_fallback(f) 186 | } 187 | } 188 | } 189 | 190 | #[inline] 191 | pub(crate) fn f16x4_to_f64x4(i: &[u16; 4]) -> [f64; 4] { 192 | convert_fn! { 193 | if x86_feature("f16c") { 194 | unsafe { x86::f16x4_to_f64x4_x86_f16c(i) } 195 | } else if aarch64_feature("fp16") { 196 | unsafe { aarch64::f16x4_to_f64x4_fp16(i) } 197 | } else if loongarch64_feature("lsx") { 198 | unsafe { loongarch64::f16x4_to_f64x4_lsx(i) } 199 | } else { 200 | f16x4_to_f64x4_fallback(i) 201 | } 202 | } 203 | } 204 | 205 | #[inline] 206 | pub(crate) fn f32x8_to_f16x8(f: &[f32; 8]) -> [u16; 8] { 207 | convert_fn! { 208 | if x86_feature("f16c") { 209 | unsafe { x86::f32x8_to_f16x8_x86_f16c(f) } 210 | } else if aarch64_feature("fp16") { 211 | { 212 | let mut result = [0u16; 8]; 213 | convert_chunked_slice_4(f.as_slice(), result.as_mut_slice(), 214 | aarch64::f32x4_to_f16x4_fp16); 215 | result 216 | } 217 | } else if loongarch64_feature("lsx") { 218 | { 219 | let mut result = [0u16; 8]; 220 | convert_chunked_slice_4(f.as_slice(), result.as_mut_slice(), 221 | loongarch64::f32x4_to_f16x4_lsx); 222 | result 223 | } 224 | } else { 225 | f32x8_to_f16x8_fallback(f) 226 | } 227 | } 228 | } 229 | 230 | #[inline] 231 | pub(crate) fn f16x8_to_f32x8(i: &[u16; 8]) -> [f32; 8] { 232 | convert_fn! { 233 | if x86_feature("f16c") { 234 | unsafe { x86::f16x8_to_f32x8_x86_f16c(i) } 235 | } else if aarch64_feature("fp16") { 236 | { 237 | let mut result = [0f32; 8]; 238 | convert_chunked_slice_4(i.as_slice(), result.as_mut_slice(), 239 | aarch64::f16x4_to_f32x4_fp16); 240 | result 241 | } 242 | } else if loongarch64_feature("lsx") { 243 | { 244 | let mut result = [0f32; 8]; 245 | convert_chunked_slice_4(i.as_slice(), result.as_mut_slice(), 246 | loongarch64::f16x4_to_f32x4_lsx); 247 | result 248 | } 249 | } else { 250 | f16x8_to_f32x8_fallback(i) 251 | } 252 | } 253 | } 254 | 255 | #[inline] 256 | pub(crate) fn f64x8_to_f16x8(f: &[f64; 8]) -> [u16; 8] { 257 | convert_fn! { 258 | if x86_feature("f16c") { 259 | unsafe { x86::f64x8_to_f16x8_x86_f16c(f) } 260 | } else if aarch64_feature("fp16") { 261 | { 262 | let mut result = [0u16; 8]; 263 | convert_chunked_slice_4(f.as_slice(), result.as_mut_slice(), 264 | aarch64::f64x4_to_f16x4_fp16); 265 | result 266 | } 267 | } else if loongarch64_feature("lsx") { 268 | { 269 | let mut result = [0u16; 8]; 270 | convert_chunked_slice_4(f.as_slice(), result.as_mut_slice(), 271 | loongarch64::f64x4_to_f16x4_lsx); 272 | result 273 | } 274 | } else { 275 | f64x8_to_f16x8_fallback(f) 276 | } 277 | } 278 | } 279 | 280 | #[inline] 281 | pub(crate) fn f16x8_to_f64x8(i: &[u16; 8]) -> [f64; 8] { 282 | convert_fn! { 283 | if x86_feature("f16c") { 284 | unsafe { x86::f16x8_to_f64x8_x86_f16c(i) } 285 | } else if aarch64_feature("fp16") { 286 | { 287 | let mut result = [0f64; 8]; 288 | convert_chunked_slice_4(i.as_slice(), result.as_mut_slice(), 289 | aarch64::f16x4_to_f64x4_fp16); 290 | result 291 | } 292 | } else if loongarch64_feature("lsx") { 293 | { 294 | let mut result = [0f64; 8]; 295 | convert_chunked_slice_4(i.as_slice(), result.as_mut_slice(), 296 | loongarch64::f16x4_to_f64x4_lsx); 297 | result 298 | } 299 | } else { 300 | f16x8_to_f64x8_fallback(i) 301 | } 302 | } 303 | } 304 | 305 | #[inline] 306 | pub(crate) fn f32_to_f16_slice(src: &[f32], dst: &mut [u16]) { 307 | convert_fn! { 308 | if x86_feature("f16c") { 309 | convert_chunked_slice_8(src, dst, x86::f32x8_to_f16x8_x86_f16c, 310 | x86::f32x4_to_f16x4_x86_f16c) 311 | } else if aarch64_feature("fp16") { 312 | convert_chunked_slice_4(src, dst, aarch64::f32x4_to_f16x4_fp16) 313 | } else if loongarch64_feature("lsx") { 314 | convert_chunked_slice_4(src, dst, loongarch64::f32x4_to_f16x4_lsx) 315 | } else { 316 | slice_fallback(src, dst, f32_to_f16_fallback) 317 | } 318 | } 319 | } 320 | 321 | #[inline] 322 | pub(crate) fn f16_to_f32_slice(src: &[u16], dst: &mut [f32]) { 323 | convert_fn! { 324 | if x86_feature("f16c") { 325 | convert_chunked_slice_8(src, dst, x86::f16x8_to_f32x8_x86_f16c, 326 | x86::f16x4_to_f32x4_x86_f16c) 327 | } else if aarch64_feature("fp16") { 328 | convert_chunked_slice_4(src, dst, aarch64::f16x4_to_f32x4_fp16) 329 | } else if loongarch64_feature("lsx") { 330 | convert_chunked_slice_4(src, dst, loongarch64::f16x4_to_f32x4_lsx) 331 | } else { 332 | slice_fallback(src, dst, f16_to_f32_fallback) 333 | } 334 | } 335 | } 336 | 337 | #[inline] 338 | pub(crate) fn f64_to_f16_slice(src: &[f64], dst: &mut [u16]) { 339 | convert_fn! { 340 | if x86_feature("f16c") { 341 | convert_chunked_slice_8(src, dst, x86::f64x8_to_f16x8_x86_f16c, 342 | x86::f64x4_to_f16x4_x86_f16c) 343 | } else if aarch64_feature("fp16") { 344 | convert_chunked_slice_4(src, dst, aarch64::f64x4_to_f16x4_fp16) 345 | } else if loongarch64_feature("lsx") { 346 | convert_chunked_slice_4(src, dst, loongarch64::f64x4_to_f16x4_lsx) 347 | } else { 348 | slice_fallback(src, dst, f64_to_f16_fallback) 349 | } 350 | } 351 | } 352 | 353 | #[inline] 354 | pub(crate) fn f16_to_f64_slice(src: &[u16], dst: &mut [f64]) { 355 | convert_fn! { 356 | if x86_feature("f16c") { 357 | convert_chunked_slice_8(src, dst, x86::f16x8_to_f64x8_x86_f16c, 358 | x86::f16x4_to_f64x4_x86_f16c) 359 | } else if aarch64_feature("fp16") { 360 | convert_chunked_slice_4(src, dst, aarch64::f16x4_to_f64x4_fp16) 361 | } else if loongarch64_feature("lsx") { 362 | convert_chunked_slice_4(src, dst, loongarch64::f16x4_to_f64x4_lsx) 363 | } else { 364 | slice_fallback(src, dst, f16_to_f64_fallback) 365 | } 366 | } 367 | } 368 | 369 | macro_rules! math_fn { 370 | (if aarch64_feature("fp16") { $aarch64:expr } 371 | else { $fallback:expr }) => { 372 | cfg_if::cfg_if! { 373 | // Use intrinsics directly when a compile target or using no_std 374 | if #[cfg(all( 375 | target_arch = "aarch64", 376 | target_feature = "fp16" 377 | ))] { 378 | $aarch64 379 | } 380 | 381 | // Use CPU feature detection if using std 382 | else if #[cfg(all( 383 | feature = "std", 384 | target_arch = "aarch64", 385 | not(target_feature = "fp16") 386 | ))] { 387 | use std::arch::is_aarch64_feature_detected; 388 | if is_aarch64_feature_detected!("fp16") { 389 | $aarch64 390 | } else { 391 | $fallback 392 | } 393 | } 394 | 395 | // Fallback to software 396 | else { 397 | $fallback 398 | } 399 | } 400 | }; 401 | } 402 | 403 | #[inline] 404 | pub(crate) fn add_f16(a: u16, b: u16) -> u16 { 405 | math_fn! { 406 | if aarch64_feature("fp16") { 407 | unsafe { aarch64::add_f16_fp16(a, b) } 408 | } else { 409 | add_f16_fallback(a, b) 410 | } 411 | } 412 | } 413 | 414 | #[inline] 415 | pub(crate) fn subtract_f16(a: u16, b: u16) -> u16 { 416 | math_fn! { 417 | if aarch64_feature("fp16") { 418 | unsafe { aarch64::subtract_f16_fp16(a, b) } 419 | } else { 420 | subtract_f16_fallback(a, b) 421 | } 422 | } 423 | } 424 | 425 | #[inline] 426 | pub(crate) fn multiply_f16(a: u16, b: u16) -> u16 { 427 | math_fn! { 428 | if aarch64_feature("fp16") { 429 | unsafe { aarch64::multiply_f16_fp16(a, b) } 430 | } else { 431 | multiply_f16_fallback(a, b) 432 | } 433 | } 434 | } 435 | 436 | #[inline] 437 | pub(crate) fn divide_f16(a: u16, b: u16) -> u16 { 438 | math_fn! { 439 | if aarch64_feature("fp16") { 440 | unsafe { aarch64::divide_f16_fp16(a, b) } 441 | } else { 442 | divide_f16_fallback(a, b) 443 | } 444 | } 445 | } 446 | 447 | #[inline] 448 | pub(crate) fn remainder_f16(a: u16, b: u16) -> u16 { 449 | remainder_f16_fallback(a, b) 450 | } 451 | 452 | #[inline] 453 | pub(crate) fn product_f16>(iter: I) -> u16 { 454 | math_fn! { 455 | if aarch64_feature("fp16") { 456 | iter.fold(0, |acc, x| unsafe { aarch64::multiply_f16_fp16(acc, x) }) 457 | } else { 458 | product_f16_fallback(iter) 459 | } 460 | } 461 | } 462 | 463 | #[inline] 464 | pub(crate) fn sum_f16>(iter: I) -> u16 { 465 | math_fn! { 466 | if aarch64_feature("fp16") { 467 | iter.fold(0, |acc, x| unsafe { aarch64::add_f16_fp16(acc, x) }) 468 | } else { 469 | sum_f16_fallback(iter) 470 | } 471 | } 472 | } 473 | 474 | /// Chunks sliced into x8 or x4 arrays 475 | #[inline] 476 | fn convert_chunked_slice_8( 477 | src: &[S], 478 | dst: &mut [D], 479 | fn8: unsafe fn(&[S; 8]) -> [D; 8], 480 | fn4: unsafe fn(&[S; 4]) -> [D; 4], 481 | ) { 482 | assert_eq!(src.len(), dst.len()); 483 | 484 | // TODO: Can be further optimized with array_chunks when it becomes stabilized 485 | 486 | let src_chunks = src.chunks_exact(8); 487 | let mut dst_chunks = dst.chunks_exact_mut(8); 488 | let src_remainder = src_chunks.remainder(); 489 | for (s, d) in src_chunks.zip(&mut dst_chunks) { 490 | let chunk: &[S; 8] = s.try_into().unwrap(); 491 | d.copy_from_slice(unsafe { &fn8(chunk) }); 492 | } 493 | 494 | // Process remainder 495 | if src_remainder.len() > 4 { 496 | let mut buf: [S; 8] = Default::default(); 497 | buf[..src_remainder.len()].copy_from_slice(src_remainder); 498 | let vec = unsafe { fn8(&buf) }; 499 | let dst_remainder = dst_chunks.into_remainder(); 500 | dst_remainder.copy_from_slice(&vec[..dst_remainder.len()]); 501 | } else if !src_remainder.is_empty() { 502 | let mut buf: [S; 4] = Default::default(); 503 | buf[..src_remainder.len()].copy_from_slice(src_remainder); 504 | let vec = unsafe { fn4(&buf) }; 505 | let dst_remainder = dst_chunks.into_remainder(); 506 | dst_remainder.copy_from_slice(&vec[..dst_remainder.len()]); 507 | } 508 | } 509 | 510 | /// Chunks sliced into x4 arrays 511 | #[inline] 512 | fn convert_chunked_slice_4( 513 | src: &[S], 514 | dst: &mut [D], 515 | f: unsafe fn(&[S; 4]) -> [D; 4], 516 | ) { 517 | assert_eq!(src.len(), dst.len()); 518 | 519 | // TODO: Can be further optimized with array_chunks when it becomes stabilized 520 | 521 | let src_chunks = src.chunks_exact(4); 522 | let mut dst_chunks = dst.chunks_exact_mut(4); 523 | let src_remainder = src_chunks.remainder(); 524 | for (s, d) in src_chunks.zip(&mut dst_chunks) { 525 | let chunk: &[S; 4] = s.try_into().unwrap(); 526 | d.copy_from_slice(unsafe { &f(chunk) }); 527 | } 528 | 529 | // Process remainder 530 | if !src_remainder.is_empty() { 531 | let mut buf: [S; 4] = Default::default(); 532 | buf[..src_remainder.len()].copy_from_slice(src_remainder); 533 | let vec = unsafe { f(&buf) }; 534 | let dst_remainder = dst_chunks.into_remainder(); 535 | dst_remainder.copy_from_slice(&vec[..dst_remainder.len()]); 536 | } 537 | } 538 | 539 | /////////////// Fallbacks //////////////// 540 | 541 | // In the below functions, round to nearest, with ties to even. 542 | // Let us call the most significant bit that will be shifted out the round_bit. 543 | // 544 | // Round up if either 545 | // a) Removed part > tie. 546 | // (mantissa & round_bit) != 0 && (mantissa & (round_bit - 1)) != 0 547 | // b) Removed part == tie, and retained part is odd. 548 | // (mantissa & round_bit) != 0 && (mantissa & (2 * round_bit)) != 0 549 | // (If removed part == tie and retained part is even, do not round up.) 550 | // These two conditions can be combined into one: 551 | // (mantissa & round_bit) != 0 && (mantissa & ((round_bit - 1) | (2 * round_bit))) != 0 552 | // which can be simplified into 553 | // (mantissa & round_bit) != 0 && (mantissa & (3 * round_bit - 1)) != 0 554 | 555 | #[inline] 556 | pub(crate) const fn f32_to_f16_fallback(value: f32) -> u16 { 557 | // TODO: Replace mem::transmute with to_bits() once to_bits is const-stabilized 558 | // Convert to raw bytes 559 | let x: u32 = unsafe { mem::transmute::(value) }; 560 | 561 | // Extract IEEE754 components 562 | let sign = x & 0x8000_0000u32; 563 | let exp = x & 0x7F80_0000u32; 564 | let man = x & 0x007F_FFFFu32; 565 | 566 | // Check for all exponent bits being set, which is Infinity or NaN 567 | if exp == 0x7F80_0000u32 { 568 | // Set mantissa MSB for NaN (and also keep shifted mantissa bits) 569 | let nan_bit = if man == 0 { 0 } else { 0x0200u32 }; 570 | return ((sign >> 16) | 0x7C00u32 | nan_bit | (man >> 13)) as u16; 571 | } 572 | 573 | // The number is normalized, start assembling half precision version 574 | let half_sign = sign >> 16; 575 | // Unbias the exponent, then bias for half precision 576 | let unbiased_exp = ((exp >> 23) as i32) - 127; 577 | let half_exp = unbiased_exp + 15; 578 | 579 | // Check for exponent overflow, return +infinity 580 | if half_exp >= 0x1F { 581 | return (half_sign | 0x7C00u32) as u16; 582 | } 583 | 584 | // Check for underflow 585 | if half_exp <= 0 { 586 | // Check mantissa for what we can do 587 | if 14 - half_exp > 24 { 588 | // No rounding possibility, so this is a full underflow, return signed zero 589 | return half_sign as u16; 590 | } 591 | // Don't forget about hidden leading mantissa bit when assembling mantissa 592 | let man = man | 0x0080_0000u32; 593 | let mut half_man = man >> (14 - half_exp); 594 | // Check for rounding (see comment above functions) 595 | let round_bit = 1 << (13 - half_exp); 596 | if (man & round_bit) != 0 && (man & (3 * round_bit - 1)) != 0 { 597 | half_man += 1; 598 | } 599 | // No exponent for subnormals 600 | return (half_sign | half_man) as u16; 601 | } 602 | 603 | // Rebias the exponent 604 | let half_exp = (half_exp as u32) << 10; 605 | let half_man = man >> 13; 606 | // Check for rounding (see comment above functions) 607 | let round_bit = 0x0000_1000u32; 608 | if (man & round_bit) != 0 && (man & (3 * round_bit - 1)) != 0 { 609 | // Round it 610 | ((half_sign | half_exp | half_man) + 1) as u16 611 | } else { 612 | (half_sign | half_exp | half_man) as u16 613 | } 614 | } 615 | 616 | #[inline] 617 | pub(crate) const fn f64_to_f16_fallback(value: f64) -> u16 { 618 | // Convert to raw bytes, truncating the last 32-bits of mantissa; that precision will always 619 | // be lost on half-precision. 620 | // TODO: Replace mem::transmute with to_bits() once to_bits is const-stabilized 621 | let val: u64 = unsafe { mem::transmute::(value) }; 622 | let x = (val >> 32) as u32; 623 | 624 | // Extract IEEE754 components 625 | let sign = x & 0x8000_0000u32; 626 | let exp = x & 0x7FF0_0000u32; 627 | let man = x & 0x000F_FFFFu32; 628 | 629 | // Check for all exponent bits being set, which is Infinity or NaN 630 | if exp == 0x7FF0_0000u32 { 631 | // Set mantissa MSB for NaN (and also keep shifted mantissa bits). 632 | // We also have to check the last 32 bits. 633 | let nan_bit = if man == 0 && (val as u32 == 0) { 634 | 0 635 | } else { 636 | 0x0200u32 637 | }; 638 | return ((sign >> 16) | 0x7C00u32 | nan_bit | (man >> 10)) as u16; 639 | } 640 | 641 | // The number is normalized, start assembling half precision version 642 | let half_sign = sign >> 16; 643 | // Unbias the exponent, then bias for half precision 644 | let unbiased_exp = ((exp >> 20) as i64) - 1023; 645 | let half_exp = unbiased_exp + 15; 646 | 647 | // Check for exponent overflow, return +infinity 648 | if half_exp >= 0x1F { 649 | return (half_sign | 0x7C00u32) as u16; 650 | } 651 | 652 | // Check for underflow 653 | if half_exp <= 0 { 654 | // Check mantissa for what we can do 655 | if 10 - half_exp > 21 { 656 | // No rounding possibility, so this is a full underflow, return signed zero 657 | return half_sign as u16; 658 | } 659 | // Don't forget about hidden leading mantissa bit when assembling mantissa 660 | let man = man | 0x0010_0000u32; 661 | let mut half_man = man >> (11 - half_exp); 662 | // Check for rounding (see comment above functions) 663 | let round_bit = 1 << (10 - half_exp); 664 | if (man & round_bit) != 0 && (man & (3 * round_bit - 1)) != 0 { 665 | half_man += 1; 666 | } 667 | // No exponent for subnormals 668 | return (half_sign | half_man) as u16; 669 | } 670 | 671 | // Rebias the exponent 672 | let half_exp = (half_exp as u32) << 10; 673 | let half_man = man >> 10; 674 | // Check for rounding (see comment above functions) 675 | let round_bit = 0x0000_0200u32; 676 | if (man & round_bit) != 0 && (man & (3 * round_bit - 1)) != 0 { 677 | // Round it 678 | ((half_sign | half_exp | half_man) + 1) as u16 679 | } else { 680 | (half_sign | half_exp | half_man) as u16 681 | } 682 | } 683 | 684 | #[inline] 685 | pub(crate) const fn f16_to_f32_fallback(i: u16) -> f32 { 686 | // Check for signed zero 687 | // TODO: Replace mem::transmute with from_bits() once from_bits is const-stabilized 688 | if i & 0x7FFFu16 == 0 { 689 | return unsafe { mem::transmute::((i as u32) << 16) }; 690 | } 691 | 692 | let half_sign = (i & 0x8000u16) as u32; 693 | let half_exp = (i & 0x7C00u16) as u32; 694 | let half_man = (i & 0x03FFu16) as u32; 695 | 696 | // Check for an infinity or NaN when all exponent bits set 697 | if half_exp == 0x7C00u32 { 698 | // Check for signed infinity if mantissa is zero 699 | if half_man == 0 { 700 | return unsafe { mem::transmute::((half_sign << 16) | 0x7F80_0000u32) }; 701 | } else { 702 | // NaN, keep current mantissa but also set most significiant mantissa bit 703 | return unsafe { 704 | mem::transmute::((half_sign << 16) | 0x7FC0_0000u32 | (half_man << 13)) 705 | }; 706 | } 707 | } 708 | 709 | // Calculate single-precision components with adjusted exponent 710 | let sign = half_sign << 16; 711 | // Unbias exponent 712 | let unbiased_exp = ((half_exp as i32) >> 10) - 15; 713 | 714 | // Check for subnormals, which will be normalized by adjusting exponent 715 | if half_exp == 0 { 716 | // Calculate how much to adjust the exponent by 717 | let e = leading_zeros_u16(half_man as u16) - 6; 718 | 719 | // Rebias and adjust exponent 720 | let exp = (127 - 15 - e) << 23; 721 | let man = (half_man << (14 + e)) & 0x7F_FF_FFu32; 722 | return unsafe { mem::transmute::(sign | exp | man) }; 723 | } 724 | 725 | // Rebias exponent for a normalized normal 726 | let exp = ((unbiased_exp + 127) as u32) << 23; 727 | let man = (half_man & 0x03FFu32) << 13; 728 | unsafe { mem::transmute::(sign | exp | man) } 729 | } 730 | 731 | #[inline] 732 | pub(crate) const fn f16_to_f64_fallback(i: u16) -> f64 { 733 | // Check for signed zero 734 | // TODO: Replace mem::transmute with from_bits() once from_bits is const-stabilized 735 | if i & 0x7FFFu16 == 0 { 736 | return unsafe { mem::transmute::((i as u64) << 48) }; 737 | } 738 | 739 | let half_sign = (i & 0x8000u16) as u64; 740 | let half_exp = (i & 0x7C00u16) as u64; 741 | let half_man = (i & 0x03FFu16) as u64; 742 | 743 | // Check for an infinity or NaN when all exponent bits set 744 | if half_exp == 0x7C00u64 { 745 | // Check for signed infinity if mantissa is zero 746 | if half_man == 0 { 747 | return unsafe { 748 | mem::transmute::((half_sign << 48) | 0x7FF0_0000_0000_0000u64) 749 | }; 750 | } else { 751 | // NaN, keep current mantissa but also set most significiant mantissa bit 752 | return unsafe { 753 | mem::transmute::( 754 | (half_sign << 48) | 0x7FF8_0000_0000_0000u64 | (half_man << 42), 755 | ) 756 | }; 757 | } 758 | } 759 | 760 | // Calculate double-precision components with adjusted exponent 761 | let sign = half_sign << 48; 762 | // Unbias exponent 763 | let unbiased_exp = ((half_exp as i64) >> 10) - 15; 764 | 765 | // Check for subnormals, which will be normalized by adjusting exponent 766 | if half_exp == 0 { 767 | // Calculate how much to adjust the exponent by 768 | let e = leading_zeros_u16(half_man as u16) - 6; 769 | 770 | // Rebias and adjust exponent 771 | let exp = ((1023 - 15 - e) as u64) << 52; 772 | let man = (half_man << (43 + e)) & 0xF_FFFF_FFFF_FFFFu64; 773 | return unsafe { mem::transmute::(sign | exp | man) }; 774 | } 775 | 776 | // Rebias exponent for a normalized normal 777 | let exp = ((unbiased_exp + 1023) as u64) << 52; 778 | let man = (half_man & 0x03FFu64) << 42; 779 | unsafe { mem::transmute::(sign | exp | man) } 780 | } 781 | 782 | #[inline] 783 | fn f16x4_to_f32x4_fallback(v: &[u16; 4]) -> [f32; 4] { 784 | [ 785 | f16_to_f32_fallback(v[0]), 786 | f16_to_f32_fallback(v[1]), 787 | f16_to_f32_fallback(v[2]), 788 | f16_to_f32_fallback(v[3]), 789 | ] 790 | } 791 | 792 | #[inline] 793 | fn f32x4_to_f16x4_fallback(v: &[f32; 4]) -> [u16; 4] { 794 | [ 795 | f32_to_f16_fallback(v[0]), 796 | f32_to_f16_fallback(v[1]), 797 | f32_to_f16_fallback(v[2]), 798 | f32_to_f16_fallback(v[3]), 799 | ] 800 | } 801 | 802 | #[inline] 803 | fn f16x4_to_f64x4_fallback(v: &[u16; 4]) -> [f64; 4] { 804 | [ 805 | f16_to_f64_fallback(v[0]), 806 | f16_to_f64_fallback(v[1]), 807 | f16_to_f64_fallback(v[2]), 808 | f16_to_f64_fallback(v[3]), 809 | ] 810 | } 811 | 812 | #[inline] 813 | fn f64x4_to_f16x4_fallback(v: &[f64; 4]) -> [u16; 4] { 814 | [ 815 | f64_to_f16_fallback(v[0]), 816 | f64_to_f16_fallback(v[1]), 817 | f64_to_f16_fallback(v[2]), 818 | f64_to_f16_fallback(v[3]), 819 | ] 820 | } 821 | 822 | #[inline] 823 | fn f16x8_to_f32x8_fallback(v: &[u16; 8]) -> [f32; 8] { 824 | [ 825 | f16_to_f32_fallback(v[0]), 826 | f16_to_f32_fallback(v[1]), 827 | f16_to_f32_fallback(v[2]), 828 | f16_to_f32_fallback(v[3]), 829 | f16_to_f32_fallback(v[4]), 830 | f16_to_f32_fallback(v[5]), 831 | f16_to_f32_fallback(v[6]), 832 | f16_to_f32_fallback(v[7]), 833 | ] 834 | } 835 | 836 | #[inline] 837 | fn f32x8_to_f16x8_fallback(v: &[f32; 8]) -> [u16; 8] { 838 | [ 839 | f32_to_f16_fallback(v[0]), 840 | f32_to_f16_fallback(v[1]), 841 | f32_to_f16_fallback(v[2]), 842 | f32_to_f16_fallback(v[3]), 843 | f32_to_f16_fallback(v[4]), 844 | f32_to_f16_fallback(v[5]), 845 | f32_to_f16_fallback(v[6]), 846 | f32_to_f16_fallback(v[7]), 847 | ] 848 | } 849 | 850 | #[inline] 851 | fn f16x8_to_f64x8_fallback(v: &[u16; 8]) -> [f64; 8] { 852 | [ 853 | f16_to_f64_fallback(v[0]), 854 | f16_to_f64_fallback(v[1]), 855 | f16_to_f64_fallback(v[2]), 856 | f16_to_f64_fallback(v[3]), 857 | f16_to_f64_fallback(v[4]), 858 | f16_to_f64_fallback(v[5]), 859 | f16_to_f64_fallback(v[6]), 860 | f16_to_f64_fallback(v[7]), 861 | ] 862 | } 863 | 864 | #[inline] 865 | fn f64x8_to_f16x8_fallback(v: &[f64; 8]) -> [u16; 8] { 866 | [ 867 | f64_to_f16_fallback(v[0]), 868 | f64_to_f16_fallback(v[1]), 869 | f64_to_f16_fallback(v[2]), 870 | f64_to_f16_fallback(v[3]), 871 | f64_to_f16_fallback(v[4]), 872 | f64_to_f16_fallback(v[5]), 873 | f64_to_f16_fallback(v[6]), 874 | f64_to_f16_fallback(v[7]), 875 | ] 876 | } 877 | 878 | #[inline] 879 | fn slice_fallback(src: &[S], dst: &mut [D], f: fn(S) -> D) { 880 | assert_eq!(src.len(), dst.len()); 881 | for (s, d) in src.iter().copied().zip(dst.iter_mut()) { 882 | *d = f(s); 883 | } 884 | } 885 | 886 | #[inline] 887 | fn add_f16_fallback(a: u16, b: u16) -> u16 { 888 | f32_to_f16(f16_to_f32(a) + f16_to_f32(b)) 889 | } 890 | 891 | #[inline] 892 | fn subtract_f16_fallback(a: u16, b: u16) -> u16 { 893 | f32_to_f16(f16_to_f32(a) - f16_to_f32(b)) 894 | } 895 | 896 | #[inline] 897 | fn multiply_f16_fallback(a: u16, b: u16) -> u16 { 898 | f32_to_f16(f16_to_f32(a) * f16_to_f32(b)) 899 | } 900 | 901 | #[inline] 902 | fn divide_f16_fallback(a: u16, b: u16) -> u16 { 903 | f32_to_f16(f16_to_f32(a) / f16_to_f32(b)) 904 | } 905 | 906 | #[inline] 907 | fn remainder_f16_fallback(a: u16, b: u16) -> u16 { 908 | f32_to_f16(f16_to_f32(a) % f16_to_f32(b)) 909 | } 910 | 911 | #[inline] 912 | fn product_f16_fallback>(iter: I) -> u16 { 913 | f32_to_f16(iter.map(f16_to_f32).product()) 914 | } 915 | 916 | #[inline] 917 | fn sum_f16_fallback>(iter: I) -> u16 { 918 | f32_to_f16(iter.map(f16_to_f32).sum()) 919 | } 920 | 921 | // TODO SIMD arithmetic 922 | --------------------------------------------------------------------------------