├── .gitattributes
├── .github
    ├── FUNDING.yml
    └── workflows
    │   └── rust.yml
├── .gitignore
├── .reuse
    └── dep5
├── .editorconfig
├── LICENSE-MIT
├── src
    ├── leading_zeros.rs
    ├── binary16
    │   ├── arch
    │   │   ├── loongarch64.rs
    │   │   ├── x86.rs
    │   │   └── aarch64.rs
    │   └── arch.rs
    ├── rand_distr.rs
    ├── bfloat
    │   └── convert.rs
    ├── vec.rs
    ├── lib.rs
    └── slice.rs
├── Cargo.toml
├── .circleci
    └── config.yml
├── Makefile.toml
├── README.md
├── benches
    └── convert.rs
├── LICENSE-APACHE
├── CHANGELOG.md
└── Cargo.lock


/.gitattributes:
--------------------------------------------------------------------------------
1 | * text=auto
2 | *.rs whitespace=tab-in-indent,trailing-space,tabwidth=4


--------------------------------------------------------------------------------
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | # These are supported funding model platforms
2 | 
3 | github: VoidStarKat
4 | patreon: StarKatradora
5 | 
6 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Rust
 2 | target/
 3 | **/*.rs.bak
 4 | 
 5 | # IntelliJ
 6 | .idea/
 7 | *.iml
 8 | 
 9 | # VS Code
10 | .vscode/
11 | 


--------------------------------------------------------------------------------
/.reuse/dep5:
--------------------------------------------------------------------------------
1 | Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
2 | Copyright: Kathryn Long <squeeself@gmail.com>
3 | License: MIT OR Apache-2.0
4 | 
5 | Files: *
6 | Copyright: 2021 Kathryn Long <squeeself@gmail.com>
7 | License: MIT OR Apache-2.0
8 | 


--------------------------------------------------------------------------------
/.editorconfig:
--------------------------------------------------------------------------------
 1 | # EditorConfig is awesome: https://EditorConfig.org
 2 | 
 3 | # top-most EditorConfig file
 4 | root = true
 5 | 
 6 | [*]
 7 | indent_style = space
 8 | indent_size = 4
 9 | end_of_line = lf
10 | charset = utf-8
11 | trim_trailing_whitespace = true
12 | insert_final_newline = true
13 | 
14 | [*.md]
15 | # double whitespace at end of line
16 | # denotes a line break in Markdown
17 | trim_trailing_whitespace = false
18 | 


--------------------------------------------------------------------------------
/LICENSE-MIT:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy
 4 | of this software and associated documentation files (the "Software"), to deal
 5 | in the Software without restriction, including without limitation the rights
 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 7 | copies of the Software, and to permit persons to whom the Software is
 8 | furnished to do so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in all
11 | copies or substantial portions of the Software.
12 | 
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19 | SOFTWARE.
20 | 


--------------------------------------------------------------------------------
/src/leading_zeros.rs:
--------------------------------------------------------------------------------
 1 | // https://doc.rust-lang.org/std/primitive.u16.html#method.leading_zeros
 2 | 
 3 | #[cfg(not(any(all(
 4 |     target_arch = "spirv",
 5 |     not(all(
 6 |         target_feature = "IntegerFunctions2INTEL",
 7 |         target_feature = "SPV_INTEL_shader_integer_functions2"
 8 |     ))
 9 | ))))]
10 | #[inline]
11 | pub(crate) const fn leading_zeros_u16(x: u16) -> u32 {
12 |     x.leading_zeros()
13 | }
14 | 
15 | #[cfg(all(
16 |     target_arch = "spirv",
17 |     not(all(
18 |         target_feature = "IntegerFunctions2INTEL",
19 |         target_feature = "SPV_INTEL_shader_integer_functions2"
20 |     ))
21 | ))]
22 | #[inline]
23 | pub(crate) const fn leading_zeros_u16(x: u16) -> u32 {
24 |     leading_zeros_u16_fallback(x)
25 | }
26 | 
27 | #[cfg(any(
28 |     test,
29 |     all(
30 |         target_arch = "spirv",
31 |         not(all(
32 |             target_feature = "IntegerFunctions2INTEL",
33 |             target_feature = "SPV_INTEL_shader_integer_functions2"
34 |         ))
35 |     )
36 | ))]
37 | #[inline]
38 | const fn leading_zeros_u16_fallback(mut x: u16) -> u32 {
39 |     use crunchy::unroll;
40 |     let mut c = 0;
41 |     let msb = 1 << 15;
42 |     unroll! { for i in 0 .. 16 {
43 |         if x & msb == 0 {
44 |             c += 1;
45 |         } else {
46 |             return c;
47 |         }
48 |         #[allow(unused_assignments)]
49 |         if i < 15 {
50 |             x <<= 1;
51 |         }
52 |     }}
53 |     c
54 | }
55 | 
56 | #[cfg(test)]
57 | mod test {
58 | 
59 |     #[test]
60 |     fn leading_zeros_u16_fallback() {
61 |         for x in [44, 97, 304, 1179, 23571] {
62 |             assert_eq!(super::leading_zeros_u16_fallback(x), x.leading_zeros());
63 |         }
64 |     }
65 | }
66 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "half"
 3 | # Remember to keep in sync with html_root_url crate attribute
 4 | version = "2.7.1"
 5 | authors = ["Kathryn Long <squeeself@gmail.com>"]
 6 | description = "Half-precision floating point f16 and bf16 types for Rust implementing the IEEE 754-2008 standard binary16 and bfloat16 types."
 7 | repository = "https://github.com/VoidStarKat/half-rs"
 8 | readme = "README.md"
 9 | keywords = ["f16", "bfloat16", "no_std"]
10 | license = "MIT OR Apache-2.0"
11 | categories = ["no-std", "data-structures", "encoding"]
12 | edition = "2021"
13 | rust-version = "1.81"
14 | exclude = [".git*", ".editorconfig", ".circleci"]
15 | 
16 | [features]
17 | default = ["std"]
18 | std = ["alloc"]
19 | use-intrinsics = []                         # Deprecated
20 | alloc = []
21 | rand_distr = ["dep:rand", "dep:rand_distr"]
22 | zerocopy = []                               # Deprecated
23 | nightly = []
24 | 
25 | [dependencies]
26 | cfg-if = "1.0.0"
27 | bytemuck = { version = "1.4.1", default-features = false, features = [
28 |     "derive",
29 | ], optional = true }
30 | serde = { version = "1.0", default-features = false, features = [
31 |     "derive",
32 | ], optional = true }
33 | num-traits = { version = "0.2.16", default-features = false, features = [
34 |     "libm",
35 | ], optional = true }
36 | zerocopy = { version = "0.8.26", default-features = false, features = [
37 |     "derive",
38 |     "simd",
39 | ] }
40 | rand = { version = "0.9.0", default-features = false, features = [
41 |     "thread_rng",
42 | ], optional = true }
43 | rand_distr = { version = "0.5.0", default-features = false, optional = true }
44 | rkyv = { version = "0.8.0", optional = true }
45 | arbitrary = { version = "1.4.1", features = ["derive"], optional = true }
46 | 
47 | [target.'cfg(target_arch = "spirv")'.dependencies]
48 | crunchy = "0.2.2"
49 | 
50 | [dev-dependencies]
51 | criterion = "0.5"
52 | quickcheck = "1.0"
53 | quickcheck_macros = "1.0"
54 | rand = "0.9.0"
55 | crunchy = "0.2.2"
56 | 
57 | [[bench]]
58 | name = "convert"
59 | harness = false
60 | 
61 | [package.metadata.docs.rs]
62 | rustdoc-args = ["--cfg", "docsrs"]
63 | all-features = true
64 | 


--------------------------------------------------------------------------------
/.circleci/config.yml:
--------------------------------------------------------------------------------
 1 | # Use the latest 2.1 version of CircleCI pipeline process engine.
 2 | # See: https://circleci.com/docs/2.0/configuration-reference
 3 | version: 2.1
 4 | 
 5 | # Define a job to be invoked later in a workflow.
 6 | # See: https://circleci.com/docs/2.0/configuration-reference/#jobs
 7 | jobs:
 8 |   rust:
 9 |     machine:
10 |       image: ubuntu-2004:current
11 |     resource_class: arm.medium
12 |     parameters:
13 |       toolchain:
14 |         type: string
15 |       features:
16 |         type: string
17 |     environment:
18 |       CARGO_INCREMENTAL: 0
19 |       CARGO_NET_RETRY: 10
20 |       RUSTUP_MAX_RETRIES: 10
21 |       CARGO_TERM_COLOR: always
22 |       RUST_BACKTRACE: full
23 |       RUSTFLAGS: "-D warnings"
24 |       CARGO_PROFILE_DEV_DEBUG: 0
25 |       CARGO_PROFILE_TEST_DEBUG: 0
26 |       CARGO_PROFILE_BENCH_DEBUG: 0
27 |       CI: 1
28 |     # Add steps to the job
29 |     # See: https://circleci.com/docs/2.0/configuration-reference/#steps
30 |     steps:
31 |       - checkout
32 |       - run:
33 |           name: Install rust
34 |           command: curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -v -y --profile minimal --default-toolchain << parameters.toolchain >>
35 |       - run: source "$HOME/.cargo/env"
36 |       - run:
37 |           name: cargo test
38 |           command: cargo -v test << parameters.features >> -- --nocapture
39 |       - run:
40 |           name: cargo test no_std
41 |           command: cargo -v test --no-default-features -- --nocapture
42 |       - run:
43 |           name: cargo test no_std+alloc
44 |           command: cargo -v test --no-default-features --features alloc -- --nocapture
45 | 
46 | # Invoke jobs via workflows
47 | # See: https://circleci.com/docs/2.0/configuration-reference/#workflows
48 | workflows:
49 |   rust-workflow:
50 |     jobs:
51 |       - rust:
52 |           name: Rust AArch64 (stable)
53 |           toolchain: stable
54 |           features: --all-features
55 |       - rust:
56 |           name: Rust AArch64 (1.81.0)
57 |           toolchain: 1.81.0
58 |           features: --all-features
59 |       - rust:
60 |           name: Rust AArch64 (nightly)
61 |           toolchain: nightly
62 |           features: --all-features
63 | 


--------------------------------------------------------------------------------
/src/binary16/arch/loongarch64.rs:
--------------------------------------------------------------------------------
 1 | use core::{mem::MaybeUninit, ptr};
 2 | 
 3 | #[cfg(target_arch = "loongarch64")]
 4 | use core::arch::loongarch64::{lsx_vfcvt_h_s, lsx_vfcvtl_s_h, m128, m128i};
 5 | 
 6 | /////////////// loongarch64 lsx/lasx ////////////////
 7 | 
 8 | #[target_feature(enable = "lsx")]
 9 | #[inline]
10 | pub(super) unsafe fn f16_to_f32_lsx(i: u16) -> f32 {
11 |     let mut vec = MaybeUninit::<m128i>::zeroed();
12 |     vec.as_mut_ptr().cast::<u16>().write(i);
13 |     let retval = lsx_vfcvtl_s_h(vec.assume_init());
14 |     *(&retval as *const m128).cast()
15 | }
16 | 
17 | #[target_feature(enable = "lsx")]
18 | #[inline]
19 | pub(super) unsafe fn f32_to_f16_lsx(f: f32) -> u16 {
20 |     let mut vec = MaybeUninit::<m128>::zeroed();
21 |     vec.as_mut_ptr().cast::<f32>().write(f);
22 |     let retval = lsx_vfcvt_h_s(vec.assume_init(), vec.assume_init());
23 |     *(&retval as *const m128i).cast()
24 | }
25 | 
26 | #[target_feature(enable = "lsx")]
27 | #[inline]
28 | pub(super) unsafe fn f16x4_to_f32x4_lsx(v: &[u16; 4]) -> [f32; 4] {
29 |     let mut vec = MaybeUninit::<m128i>::zeroed();
30 |     ptr::copy_nonoverlapping(v.as_ptr(), vec.as_mut_ptr().cast(), 4);
31 |     let retval = lsx_vfcvtl_s_h(vec.assume_init());
32 |     *(&retval as *const m128).cast()
33 | }
34 | 
35 | #[target_feature(enable = "lsx")]
36 | #[inline]
37 | pub(super) unsafe fn f32x4_to_f16x4_lsx(v: &[f32; 4]) -> [u16; 4] {
38 |     let mut vec = MaybeUninit::<m128>::uninit();
39 |     ptr::copy_nonoverlapping(v.as_ptr(), vec.as_mut_ptr().cast(), 4);
40 |     let retval = lsx_vfcvt_h_s(vec.assume_init(), vec.assume_init());
41 |     *(&retval as *const m128i).cast()
42 | }
43 | 
44 | #[target_feature(enable = "lsx")]
45 | #[inline]
46 | pub(super) unsafe fn f16x4_to_f64x4_lsx(v: &[u16; 4]) -> [f64; 4] {
47 |     let array = f16x4_to_f32x4_lsx(v);
48 |     // Let compiler vectorize this regular cast for now.
49 |     [
50 |         array[0] as f64,
51 |         array[1] as f64,
52 |         array[2] as f64,
53 |         array[3] as f64,
54 |     ]
55 | }
56 | 
57 | #[target_feature(enable = "lsx")]
58 | #[inline]
59 | pub(super) unsafe fn f64x4_to_f16x4_lsx(v: &[f64; 4]) -> [u16; 4] {
60 |     // Let compiler vectorize this regular cast for now.
61 |     let v = [v[0] as f32, v[1] as f32, v[2] as f32, v[3] as f32];
62 |     f32x4_to_f16x4_lsx(&v)
63 | }
64 | 


--------------------------------------------------------------------------------
/Makefile.toml:
--------------------------------------------------------------------------------
 1 | [config]
 2 | min_version = "0.35.0"
 3 | 
 4 | [env]
 5 | CI_CARGO_TEST_FLAGS = { value = "--locked -- --nocapture", condition = { env_true = [
 6 |     "CARGO_MAKE_CI",
 7 | ] } }
 8 | CARGO_MAKE_CARGO_ALL_FEATURES = { source = "${CARGO_MAKE_RUST_CHANNEL}", default_value = "--all-features", mapping = { "nightly" = "--all-features" } }
 9 | CARGO_MAKE_CLIPPY_ARGS = { value = "${CARGO_MAKE_CLIPPY_ALL_FEATURES_WARN}", condition = { env_true = [
10 |     "CARGO_MAKE_CI",
11 | ] } }
12 | 
13 | # Override for CI flag additions
14 | [tasks.test]
15 | args = [
16 |     "test",
17 |     "@@remove-empty(CARGO_MAKE_CARGO_VERBOSE_FLAGS)",
18 |     "@@split(CARGO_MAKE_CARGO_BUILD_TEST_FLAGS, )",
19 |     "@@split(CI_CARGO_TEST_FLAGS, )",
20 | ]
21 | 
22 | # Let clippy run on non-nightly CI
23 | [tasks.clippy-ci-flow]
24 | condition = { env_set = ["CARGO_MAKE_RUN_CLIPPY"] }
25 | 
26 | # Let format check run on non-nightly CI
27 | [tasks.check-format-ci-flow]
28 | condition = { env_set = ["CARGO_MAKE_RUN_CHECK_FORMAT"] }
29 | 
30 | [tasks.check-docs]
31 | description = "Checks docs for errors."
32 | category = "Documentation"
33 | install_crate = false
34 | env = { RUSTDOCFLAGS = "-D warnings" }
35 | command = "cargo"
36 | args = [
37 |     "doc",
38 |     "--workspace",
39 |     "--no-deps",
40 |     "@@remove-empty(CARGO_MAKE_CARGO_VERBOSE_FLAGS)",
41 |     "${CARGO_MAKE_CARGO_ALL_FEATURES}",
42 | ]
43 | 
44 | # Build & Test with no features enabled
45 | [tasks.post-ci-flow]
46 | run_task = [
47 |     { name = [
48 |         "check-docs",
49 |         "build-no-std",
50 |         "test-no-std",
51 |         "build-no-std-alloc",
52 |         "test-no-std-alloc",
53 |     ] },
54 | ]
55 | 
56 | [tasks.build-no-std]
57 | description = "Build without any features"
58 | category = "Build"
59 | env = { CARGO_MAKE_CARGO_BUILD_TEST_FLAGS = "--no-default-features" }
60 | run_task = "build"
61 | 
62 | [tasks.test-no-std]
63 | description = "Run tests without any features"
64 | category = "Test"
65 | env = { CARGO_MAKE_CARGO_BUILD_TEST_FLAGS = "--no-default-features" }
66 | run_task = "test"
67 | 
68 | [tasks.build-no-std-alloc]
69 | description = "Build without any features except alloc"
70 | category = "Build"
71 | env = { CARGO_MAKE_CARGO_BUILD_TEST_FLAGS = "--no-default-features --features alloc" }
72 | run_task = "build"
73 | 
74 | [tasks.test-no-std-alloc]
75 | description = "Run tests without any features except alloc"
76 | category = "Test"
77 | env = { CARGO_MAKE_CARGO_BUILD_TEST_FLAGS = "--no-default-features --features alloc" }
78 | run_task = "test"
79 | 


--------------------------------------------------------------------------------
/.github/workflows/rust.yml:
--------------------------------------------------------------------------------
 1 | name: Rust
 2 | 
 3 | on: [push, pull_request]
 4 | 
 5 | env:
 6 |   CARGO_INCREMENTAL: 0
 7 |   CARGO_NET_RETRY: 10
 8 |   RUSTUP_MAX_RETRIES: 10
 9 |   CARGO_TERM_COLOR: always
10 |   RUST_BACKTRACE: full
11 |   RUSTFLAGS: "-D warnings"
12 |   # Disable debuginfo for faster compile
13 |   CARGO_PROFILE_DEV_DEBUG: 0
14 |   CARGO_PROFILE_TEST_DEBUG: 0
15 |   CARGO_PROFILE_BENCH_DEBUG: 0
16 |   CI: 1
17 |   CARGO_MAKE_CI: 1
18 |   CARGO_MAKE_RUN_CHECK_FORMAT: 1
19 |   CARGO_MAKE_RUN_CLIPPY: 1
20 |   CARGO_MAKE_BUILD_BENCH: 1
21 | 
22 | jobs:
23 |   rust:
24 |     name: Rust
25 |     runs-on: ${{ matrix.os }}
26 |     continue-on-error: ${{ matrix.rust == 'nightly' }}
27 |     strategy:
28 |       matrix:
29 |         os: [ubuntu-latest, windows-latest]
30 |         target: [x86_64, i686, aarch64]
31 |         rust:
32 |           - stable
33 |           - beta
34 |           - nightly
35 |           - 1.81.0
36 | 
37 |     steps:
38 |     - name: Checkout
39 |       uses: actions/checkout@v4
40 | 
41 |     - name: Linux Install Rust toolchain
42 |       if: ${{ matrix.os == 'ubuntu-latest' }}
43 |       uses: dtolnay/rust-toolchain@master
44 |       with:
45 |         toolchain: ${{ matrix.rust }}
46 |         targets: ${{ matrix.target }}-unknown-linux-gnu
47 |         components: clippy,rustfmt
48 | 
49 |     - name: Windows Install Rust toolchain
50 |       if: ${{ matrix.os == 'windows-latest' }}
51 |       uses: dtolnay/rust-toolchain@master
52 |       with:
53 |         toolchain: ${{ matrix.rust }}
54 |         targets: ${{ matrix.target }}-pc-windows-msvc
55 |         components: clippy,rustfmt
56 | 
57 |     - name: Cache Dependencies
58 |       uses: Swatinem/rust-cache@v2
59 | 
60 |     - name: Install Cargo Make
61 |       if: ${{ matrix.target == 'x86_64' }}
62 |       uses: davidB/rust-cargo-make@v1
63 | 
64 |     - run: cargo make ci-flow
65 |       if: ${{ matrix.target == 'x86_64' }}
66 | 
67 |     - name: linux cargo-make build-only
68 |       if: ${{ matrix.target != 'x86_64' && matrix.os == 'ubuntu-latest' }}
69 |       run: cargo build --all-features --target ${{ matrix.target }}-unknown-linux-gnu
70 | 
71 |     - name: windows cargo-make build-only
72 |       if: ${{ matrix.target != 'x86_64' && matrix.os == 'windows-latest' }}
73 |       run: cargo build --all-features --target ${{ matrix.target }}-pc-windows-msvc
74 | 
75 |   miri:
76 |     name: Miri
77 |     runs-on: ubuntu-latest
78 |     steps:
79 |       - name: Checkout
80 |         uses: actions/checkout@v4
81 |       - name: Install Rust toolchain
82 |         uses: dtolnay/rust-toolchain@master
83 |         with:
84 |           toolchain: nightly
85 |           components: miri
86 |       - run: cargo miri test
87 |         env:
88 |           MIRIFLAGS: -Zmiri-strict-provenance
89 | 


--------------------------------------------------------------------------------
/src/binary16/arch/x86.rs:
--------------------------------------------------------------------------------
  1 | use core::{mem::MaybeUninit, ptr};
  2 | use zerocopy::transmute;
  3 | 
  4 | #[cfg(target_arch = "x86")]
  5 | use core::arch::x86::{
  6 |     __m128, __m128i, __m256, _mm256_cvtph_ps, _mm256_cvtps_ph, _mm_cvtph_ps,
  7 |     _MM_FROUND_TO_NEAREST_INT,
  8 | };
  9 | #[cfg(target_arch = "x86_64")]
 10 | use core::arch::x86_64::{
 11 |     __m128, __m128i, __m256, _mm256_cvtph_ps, _mm256_cvtps_ph, _mm_cvtph_ps, _mm_cvtps_ph,
 12 |     _MM_FROUND_TO_NEAREST_INT,
 13 | };
 14 | 
 15 | #[cfg(target_arch = "x86")]
 16 | use core::arch::x86::_mm_cvtps_ph;
 17 | 
 18 | use super::convert_chunked_slice_8;
 19 | 
 20 | /////////////// x86/x86_64 f16c ////////////////
 21 | 
 22 | #[target_feature(enable = "f16c")]
 23 | #[inline]
 24 | pub(super) unsafe fn f16_to_f32_x86_f16c(i: u16) -> f32 {
 25 |     let vec: __m128i = transmute!([i, 0, 0, 0, 0, 0, 0, 0]);
 26 |     let retval: [f32; 4] = transmute!(_mm_cvtph_ps(vec));
 27 |     retval[0]
 28 | }
 29 | 
 30 | #[target_feature(enable = "f16c")]
 31 | #[inline]
 32 | pub(super) unsafe fn f32_to_f16_x86_f16c(f: f32) -> u16 {
 33 |     let vec: __m128 = transmute!([f, 0.0, 0.0, 0.0]);
 34 |     let retval = _mm_cvtps_ph(vec, _MM_FROUND_TO_NEAREST_INT);
 35 |     let retval: [u16; 8] = transmute!(retval);
 36 |     retval[0]
 37 | }
 38 | 
 39 | #[target_feature(enable = "f16c")]
 40 | #[inline]
 41 | pub(super) unsafe fn f16x4_to_f32x4_x86_f16c(v: &[u16; 4]) -> [f32; 4] {
 42 |     let vec: __m128i = transmute!([*v, [0, 0, 0, 0]]);
 43 |     transmute!(_mm_cvtph_ps(vec))
 44 | }
 45 | 
 46 | #[target_feature(enable = "f16c")]
 47 | #[inline]
 48 | pub(super) unsafe fn f32x4_to_f16x4_x86_f16c(v: &[f32; 4]) -> [u16; 4] {
 49 |     let vec: __m128 = zerocopy::transmute!(*v);
 50 |     let retval = _mm_cvtps_ph(vec, _MM_FROUND_TO_NEAREST_INT);
 51 |     let retval: [[u16; 4]; 2] = transmute!(retval);
 52 |     retval[0]
 53 | }
 54 | 
 55 | #[target_feature(enable = "f16c")]
 56 | #[inline]
 57 | pub(super) unsafe fn f16x4_to_f64x4_x86_f16c(v: &[u16; 4]) -> [f64; 4] {
 58 |     let array = f16x4_to_f32x4_x86_f16c(v);
 59 |     // Let compiler vectorize this regular cast for now.
 60 |     // TODO: investigate auto-detecting sse2/avx convert features
 61 |     [
 62 |         array[0] as f64,
 63 |         array[1] as f64,
 64 |         array[2] as f64,
 65 |         array[3] as f64,
 66 |     ]
 67 | }
 68 | 
 69 | #[target_feature(enable = "f16c")]
 70 | #[inline]
 71 | pub(super) unsafe fn f64x4_to_f16x4_x86_f16c(v: &[f64; 4]) -> [u16; 4] {
 72 |     // Let compiler vectorize this regular cast for now.
 73 |     // TODO: investigate auto-detecting sse2/avx convert features
 74 |     let v = [v[0] as f32, v[1] as f32, v[2] as f32, v[3] as f32];
 75 |     f32x4_to_f16x4_x86_f16c(&v)
 76 | }
 77 | 
 78 | #[target_feature(enable = "f16c")]
 79 | #[inline]
 80 | pub(super) unsafe fn f16x8_to_f32x8_x86_f16c(v: &[u16; 8]) -> [f32; 8] {
 81 |     let vec: __m128i = transmute!(*v);
 82 |     transmute!(_mm256_cvtph_ps(vec))
 83 | }
 84 | 
 85 | #[target_feature(enable = "f16c")]
 86 | #[inline]
 87 | pub(super) unsafe fn f32x8_to_f16x8_x86_f16c(v: &[f32; 8]) -> [u16; 8] {
 88 |     let vec: __m256 = transmute!(*v);
 89 |     let retval = _mm256_cvtps_ph(vec, _MM_FROUND_TO_NEAREST_INT);
 90 |     transmute!(retval)
 91 | }
 92 | 
 93 | #[target_feature(enable = "f16c")]
 94 | #[inline]
 95 | pub(super) unsafe fn f16x8_to_f64x8_x86_f16c(v: &[u16; 8]) -> [f64; 8] {
 96 |     let array = f16x8_to_f32x8_x86_f16c(v);
 97 |     // Let compiler vectorize this regular cast for now.
 98 |     // TODO: investigate auto-detecting sse2/avx convert features
 99 |     [
100 |         array[0] as f64,
101 |         array[1] as f64,
102 |         array[2] as f64,
103 |         array[3] as f64,
104 |         array[4] as f64,
105 |         array[5] as f64,
106 |         array[6] as f64,
107 |         array[7] as f64,
108 |     ]
109 | }
110 | 
111 | #[target_feature(enable = "f16c")]
112 | #[inline]
113 | pub(super) unsafe fn f64x8_to_f16x8_x86_f16c(v: &[f64; 8]) -> [u16; 8] {
114 |     // Let compiler vectorize this regular cast for now.
115 |     // TODO: investigate auto-detecting sse2/avx convert features
116 |     let v = [
117 |         v[0] as f32,
118 |         v[1] as f32,
119 |         v[2] as f32,
120 |         v[3] as f32,
121 |         v[4] as f32,
122 |         v[5] as f32,
123 |         v[6] as f32,
124 |         v[7] as f32,
125 |     ];
126 |     f32x8_to_f16x8_x86_f16c(&v)
127 | }
128 | 


--------------------------------------------------------------------------------
/src/binary16/arch/aarch64.rs:
--------------------------------------------------------------------------------
  1 | use core::{
  2 |     arch::{
  3 |         aarch64::{float32x4_t, float64x2_t, uint16x4_t},
  4 |         asm,
  5 |     },
  6 |     mem::MaybeUninit,
  7 |     ptr,
  8 | };
  9 | use zerocopy::transmute;
 10 | 
 11 | #[target_feature(enable = "fp16")]
 12 | #[inline]
 13 | pub(super) unsafe fn f16_to_f32_fp16(i: u16) -> f32 {
 14 |     let result: f32;
 15 |     asm!(
 16 |         "fcvt {0:s}, {1:h}",
 17 |         out(vreg) result,
 18 |         in(vreg) i,
 19 |         options(pure, nomem, nostack, preserves_flags));
 20 |     result
 21 | }
 22 | 
 23 | #[target_feature(enable = "fp16")]
 24 | #[inline]
 25 | pub(super) unsafe fn f16_to_f64_fp16(i: u16) -> f64 {
 26 |     let result: f64;
 27 |     asm!(
 28 |         "fcvt {0:d}, {1:h}",
 29 |         out(vreg) result,
 30 |         in(vreg) i,
 31 |         options(pure, nomem, nostack, preserves_flags));
 32 |     result
 33 | }
 34 | 
 35 | #[target_feature(enable = "fp16")]
 36 | #[inline]
 37 | pub(super) unsafe fn f32_to_f16_fp16(f: f32) -> u16 {
 38 |     let result: u16;
 39 |     asm!(
 40 |         "fcvt {0:h}, {1:s}",
 41 |         out(vreg) result,
 42 |         in(vreg) f,
 43 |         options(pure, nomem, nostack, preserves_flags));
 44 |     result
 45 | }
 46 | 
 47 | #[target_feature(enable = "fp16")]
 48 | #[inline]
 49 | pub(super) unsafe fn f64_to_f16_fp16(f: f64) -> u16 {
 50 |     let result: u16;
 51 |     asm!(
 52 |         "fcvt {0:h}, {1:d}",
 53 |         out(vreg) result,
 54 |         in(vreg) f,
 55 |         options(pure, nomem, nostack, preserves_flags));
 56 |     result
 57 | }
 58 | 
 59 | #[target_feature(enable = "fp16")]
 60 | #[inline]
 61 | pub(super) unsafe fn f16x4_to_f32x4_fp16(v: &[u16; 4]) -> [f32; 4] {
 62 |     let vec: uint16x4_t = transmute!(*v);
 63 |     let result: float32x4_t;
 64 |     asm!(
 65 |         "fcvtl {0:v}.4s, {1:v}.4h",
 66 |         out(vreg) result,
 67 |         in(vreg) vec,
 68 |         options(pure, nomem, nostack));
 69 |     transmute!(result)
 70 | }
 71 | 
 72 | #[target_feature(enable = "fp16")]
 73 | #[inline]
 74 | pub(super) unsafe fn f32x4_to_f16x4_fp16(v: &[f32; 4]) -> [u16; 4] {
 75 |     let vec: float32x4_t = transmute!(*v);
 76 |     let result: uint16x4_t;
 77 |     asm!(
 78 |         "fcvtn {0:v}.4h, {1:v}.4s",
 79 |         out(vreg) result,
 80 |         in(vreg) vec,
 81 |         options(pure, nomem, nostack));
 82 |     transmute!(result)
 83 | }
 84 | 
 85 | #[target_feature(enable = "fp16")]
 86 | #[inline]
 87 | pub(super) unsafe fn f16x4_to_f64x4_fp16(v: &[u16; 4]) -> [f64; 4] {
 88 |     let vec: uint16x4_t = transmute!(*v);
 89 |     let low: float64x2_t;
 90 |     let high: float64x2_t;
 91 |     asm!(
 92 |         "fcvtl {2:v}.4s, {3:v}.4h", // Convert to f32
 93 |         "fcvtl {0:v}.2d, {2:v}.2s", // Convert low part to f64
 94 |         "fcvtl2 {1:v}.2d, {2:v}.4s", // Convert high part to f64
 95 |         lateout(vreg) low,
 96 |         lateout(vreg) high,
 97 |         out(vreg) _,
 98 |         in(vreg) vec,
 99 |         options(pure, nomem, nostack));
100 |     transmute!([low, high])
101 | }
102 | 
103 | #[target_feature(enable = "fp16")]
104 | #[inline]
105 | pub(super) unsafe fn f64x4_to_f16x4_fp16(v: &[f64; 4]) -> [u16; 4] {
106 |     let mut low = MaybeUninit::<float64x2_t>::uninit();
107 |     let mut high = MaybeUninit::<float64x2_t>::uninit();
108 |     ptr::copy_nonoverlapping(v.as_ptr(), low.as_mut_ptr().cast(), 2);
109 |     ptr::copy_nonoverlapping(v[2..].as_ptr(), high.as_mut_ptr().cast(), 2);
110 |     let result: uint16x4_t;
111 |     asm!(
112 |         "fcvtn {1:v}.2s, {2:v}.2d", // Convert low to f32
113 |         "fcvtn2 {1:v}.4s, {3:v}.2d", // Convert high to f32
114 |         "fcvtn {0:v}.4h, {1:v}.4s", // Convert to f16
115 |         lateout(vreg) result,
116 |         out(vreg) _,
117 |         in(vreg) low.assume_init(),
118 |         in(vreg) high.assume_init(),
119 |         options(pure, nomem, nostack));
120 |     transmute!(result)
121 | }
122 | 
123 | #[target_feature(enable = "fp16")]
124 | #[inline]
125 | pub(super) unsafe fn add_f16_fp16(a: u16, b: u16) -> u16 {
126 |     let result: u16;
127 |     asm!(
128 |         "fadd {0:h}, {1:h}, {2:h}",
129 |         out(vreg) result,
130 |         in(vreg) a,
131 |         in(vreg) b,
132 |         options(pure, nomem, nostack));
133 |     result
134 | }
135 | 
136 | #[target_feature(enable = "fp16")]
137 | #[inline]
138 | pub(super) unsafe fn subtract_f16_fp16(a: u16, b: u16) -> u16 {
139 |     let result: u16;
140 |     asm!(
141 |         "fsub {0:h}, {1:h}, {2:h}",
142 |         out(vreg) result,
143 |         in(vreg) a,
144 |         in(vreg) b,
145 |         options(pure, nomem, nostack));
146 |     result
147 | }
148 | 
149 | #[target_feature(enable = "fp16")]
150 | #[inline]
151 | pub(super) unsafe fn multiply_f16_fp16(a: u16, b: u16) -> u16 {
152 |     let result: u16;
153 |     asm!(
154 |         "fmul {0:h}, {1:h}, {2:h}",
155 |         out(vreg) result,
156 |         in(vreg) a,
157 |         in(vreg) b,
158 |         options(pure, nomem, nostack));
159 |     result
160 | }
161 | 
162 | #[target_feature(enable = "fp16")]
163 | #[inline]
164 | pub(super) unsafe fn divide_f16_fp16(a: u16, b: u16) -> u16 {
165 |     let result: u16;
166 |     asm!(
167 |         "fdiv {0:h}, {1:h}, {2:h}",
168 |         out(vreg) result,
169 |         in(vreg) a,
170 |         in(vreg) b,
171 |         options(pure, nomem, nostack));
172 |     result
173 | }
174 | 


--------------------------------------------------------------------------------
/src/rand_distr.rs:
--------------------------------------------------------------------------------
  1 | use crate::{bf16, f16};
  2 | 
  3 | use rand::{distr::Distribution, Rng};
  4 | use rand_distr::uniform::UniformFloat;
  5 | 
  6 | macro_rules! impl_distribution_via_f32 {
  7 |     ($Ty:ty, $Distr:ty) => {
  8 |         impl Distribution<$Ty> for $Distr {
  9 |             fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> $Ty {
 10 |                 <$Ty>::from_f32(<Self as Distribution<f32>>::sample(self, rng))
 11 |             }
 12 |         }
 13 |     };
 14 | }
 15 | 
 16 | impl_distribution_via_f32!(f16, rand_distr::StandardUniform);
 17 | impl_distribution_via_f32!(f16, rand_distr::StandardNormal);
 18 | impl_distribution_via_f32!(f16, rand_distr::Exp1);
 19 | impl_distribution_via_f32!(f16, rand_distr::Open01);
 20 | impl_distribution_via_f32!(f16, rand_distr::OpenClosed01);
 21 | 
 22 | impl_distribution_via_f32!(bf16, rand_distr::StandardUniform);
 23 | impl_distribution_via_f32!(bf16, rand_distr::StandardNormal);
 24 | impl_distribution_via_f32!(bf16, rand_distr::Exp1);
 25 | impl_distribution_via_f32!(bf16, rand_distr::Open01);
 26 | impl_distribution_via_f32!(bf16, rand_distr::OpenClosed01);
 27 | 
 28 | impl rand::distr::weighted::Weight for f16 {
 29 |     const ZERO: Self = Self::ZERO;
 30 | 
 31 |     fn checked_add_assign(&mut self, v: &Self) -> Result<(), ()> {
 32 |         // Floats have an explicit representation for overflow
 33 |         *self += v;
 34 |         Ok(())
 35 |     }
 36 | }
 37 | 
 38 | impl rand::distr::weighted::Weight for bf16 {
 39 |     const ZERO: Self = Self::ZERO;
 40 | 
 41 |     fn checked_add_assign(&mut self, v: &Self) -> Result<(), ()> {
 42 |         // Floats have an explicit representation for overflow
 43 |         *self += v;
 44 |         Ok(())
 45 |     }
 46 | }
 47 | 
 48 | #[derive(Debug, Clone, Copy)]
 49 | pub struct Float16Sampler(UniformFloat<f32>);
 50 | 
 51 | impl rand_distr::uniform::SampleUniform for f16 {
 52 |     type Sampler = Float16Sampler;
 53 | }
 54 | 
 55 | impl rand_distr::uniform::UniformSampler for Float16Sampler {
 56 |     type X = f16;
 57 |     fn new<B1, B2>(low: B1, high: B2) -> Result<Self, rand_distr::uniform::Error>
 58 |     where
 59 |         B1: rand_distr::uniform::SampleBorrow<Self::X> + Sized,
 60 |         B2: rand_distr::uniform::SampleBorrow<Self::X> + Sized,
 61 |     {
 62 |         Ok(Self(UniformFloat::new(
 63 |             low.borrow().to_f32(),
 64 |             high.borrow().to_f32(),
 65 |         )?))
 66 |     }
 67 |     fn new_inclusive<B1, B2>(low: B1, high: B2) -> Result<Self, rand_distr::uniform::Error>
 68 |     where
 69 |         B1: rand_distr::uniform::SampleBorrow<Self::X> + Sized,
 70 |         B2: rand_distr::uniform::SampleBorrow<Self::X> + Sized,
 71 |     {
 72 |         Ok(Self(UniformFloat::new_inclusive(
 73 |             low.borrow().to_f32(),
 74 |             high.borrow().to_f32(),
 75 |         )?))
 76 |     }
 77 |     fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> Self::X {
 78 |         f16::from_f32(self.0.sample(rng))
 79 |     }
 80 | }
 81 | 
 82 | #[derive(Debug, Clone, Copy)]
 83 | pub struct BFloat16Sampler(UniformFloat<f32>);
 84 | 
 85 | impl rand_distr::uniform::SampleUniform for bf16 {
 86 |     type Sampler = BFloat16Sampler;
 87 | }
 88 | 
 89 | impl rand_distr::uniform::UniformSampler for BFloat16Sampler {
 90 |     type X = bf16;
 91 |     fn new<B1, B2>(low: B1, high: B2) -> Result<Self, rand_distr::uniform::Error>
 92 |     where
 93 |         B1: rand_distr::uniform::SampleBorrow<Self::X> + Sized,
 94 |         B2: rand_distr::uniform::SampleBorrow<Self::X> + Sized,
 95 |     {
 96 |         Ok(Self(UniformFloat::new(
 97 |             low.borrow().to_f32(),
 98 |             high.borrow().to_f32(),
 99 |         )?))
100 |     }
101 |     fn new_inclusive<B1, B2>(low: B1, high: B2) -> Result<Self, rand_distr::uniform::Error>
102 |     where
103 |         B1: rand_distr::uniform::SampleBorrow<Self::X> + Sized,
104 |         B2: rand_distr::uniform::SampleBorrow<Self::X> + Sized,
105 |     {
106 |         Ok(Self(UniformFloat::new_inclusive(
107 |             low.borrow().to_f32(),
108 |             high.borrow().to_f32(),
109 |         )?))
110 |     }
111 |     fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> Self::X {
112 |         bf16::from_f32(self.0.sample(rng))
113 |     }
114 | }
115 | 
116 | #[cfg(test)]
117 | mod tests {
118 |     use super::*;
119 | 
120 |     #[allow(unused_imports)]
121 |     use rand::{rng, Rng};
122 |     use rand_distr::{StandardNormal, StandardUniform, Uniform};
123 | 
124 |     #[test]
125 |     fn test_sample_f16() {
126 |         let mut rng = rng();
127 |         let _: f16 = rng.sample(StandardUniform);
128 |         let _: f16 = rng.sample(StandardNormal);
129 |         let _: f16 = rng.sample(Uniform::new(f16::from_f32(0.0), f16::from_f32(1.0)).unwrap());
130 |         #[cfg(feature = "num-traits")]
131 |         let _: f16 =
132 |             rng.sample(rand_distr::Normal::new(f16::from_f32(0.0), f16::from_f32(1.0)).unwrap());
133 |     }
134 | 
135 |     #[test]
136 |     fn test_sample_bf16() {
137 |         let mut rng = rng();
138 |         let _: bf16 = rng.sample(StandardUniform);
139 |         let _: bf16 = rng.sample(StandardNormal);
140 |         let _: bf16 = rng.sample(Uniform::new(bf16::from_f32(0.0), bf16::from_f32(1.0)).unwrap());
141 |         #[cfg(feature = "num-traits")]
142 |         let _: bf16 =
143 |             rng.sample(rand_distr::Normal::new(bf16::from_f32(0.0), bf16::from_f32(1.0)).unwrap());
144 |     }
145 | }
146 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # `f16` and `bf16` floating point types for Rust
 2 | [![Crates.io](https://img.shields.io/crates/v/half.svg)](https://crates.io/crates/half/) [![Documentation](https://docs.rs/half/badge.svg)](https://docs.rs/half/) ![Crates.io](https://img.shields.io/crates/l/half) [![Build status](https://github.com/VoidStarKat/half-rs/actions/workflows/rust.yml/badge.svg?branch=main&event=push)](https://github.com/VoidStarKat/half-rs/actions/workflows/rust.yml) [![CircleCI](https://dl.circleci.com/status-badge/img/gh/VoidStarKat/half-rs/tree/main.svg?style=svg)](https://dl.circleci.com/status-badge/redirect/gh/VoidStarKat/half-rs/tree/main)
 3 | 
 4 | This crate implements a half-precision floating point `f16` type for Rust implementing the IEEE
 5 | 754-2008 standard [`binary16`](https://en.wikipedia.org/wiki/Half-precision_floating-point_format)
 6 | a.k.a "half" format, as well as a `bf16` type implementing the
 7 | [`bfloat16`](https://en.wikipedia.org/wiki/Bfloat16_floating-point_format) format.
 8 | 
 9 | ## Usage
10 | 
11 | The `f16` and `bf16` types attempt to match existing Rust floating point type functionality where possible, and provides both conversion operations (such as to/from `f32` and `f64`) and basic
12 | arithmetic operations. Hardware support for these operations will be used whenever hardware support
13 | is available—either through instrinsics or targeted assembly—although a nightly Rust toolchain may
14 | be required for some hardware.
15 | 
16 | This crate provides [`no_std`](https://rust-embedded.github.io/book/intro/no-std.html) support so can easily be used in embedded code where a smaller float format is most useful.
17 | 
18 | *Requires Rust 1.81 or greater.* If you need support for older versions of Rust, use previous 
19 | versions of this crate.
20 | 
21 | See the [crate documentation](https://docs.rs/half/) for more details.
22 | 
23 | ### Optional Features
24 | 
25 | - **`alloc`** — Enable use of the [`alloc`](https://doc.rust-lang.org/alloc/) crate when not using
26 |   the `std` library.
27 | 
28 |   This enables the `vec` module, which contains zero-copy conversions for the `Vec` type. This
29 |   allows fast conversion between raw `Vec<u16>` bits and `Vec<f16>` or `Vec<bf16>` arrays, and vice
30 |   versa.
31 | 
32 | - **`std`** — Enable features that depend on the Rust `std` library, including everything in the
33 |   `alloc` feature.
34 | 
35 |   Enabling the `std` feature enables runtime CPU feature detection of hardware support.
36 |   Without this feature detection, harware is only used when compiler target supports them.
37 | 
38 | - **`serde`** - Implement `Serialize` and `Deserialize` traits for `f16` and `bf16`. This adds a
39 |   dependency on the [`serde`](https://crates.io/crates/serde) crate.
40 | 
41 | - **`num-traits`** — Enable `ToPrimitive`, `FromPrimitive`, `ToBytes`, `FromBytes`, `Num`, `Float`,
42 |   `FloatCore`, `Signed`, and `Bounded` trait implementations from the
43 |   [`num-traits`](https://crates.io/crates/num-traits) crate.
44 | 
45 | - **`bytemuck`** — Enable `Zeroable` and `Pod` trait implementations from the
46 |   [`bytemuck`](https://crates.io/crates/bytemuck) crate.
47 | 
48 | - **`rand_distr`** — Enable sampling from distributions like `StandardUniform` and `StandardNormal` 
49 |   from the [`rand_distr`](https://crates.io/crates/rand_distr) crate.
50 | 
51 | - **`rkyv`** -- Enable zero-copy deserializtion with [`rkyv`](https://crates.io/crates/rkyv) crate.
52 | 
53 | - **`aribtrary`** -- Enable fuzzing support with [`arbitrary`](https://crates.io/crates/arbitrary) 
54 |   crate by implementing `Arbitrary` trait.
55 | 
56 | - **`nightly`** -- Enable nightly-only features (currently `loongarch64` intrinsics).
57 | 
58 | ### Hardware support
59 | 
60 | The following list details hardware support for floating point types in this crate. When using `std`
61 | library, runtime CPU target detection will be used. To get the most performance benefits, compile
62 | for specific CPU features which avoids the runtime overhead and works in a `no_std` environment.
63 | 
64 | | Architecture | CPU Target Feature | Notes                                                                                                                                                  |
65 | | ------------ | ------------------ |--------------------------------------------------------------------------------------------------------------------------------------------------------|
66 | | `x86`/`x86_64` | `f16c` | This supports conversion to/from `f16` only (including vector SIMD) and does not support any `bf16` or arithmetic operations.                          |
67 | | `aarch64` | `fp16` | This supports all operations on `f16` only.                                                                                                            |
68 | | `loongarch64` | `lsx` | (`nightly` feature only) This supports conversion to/from `f16` only (including vector SIMD) and does not support any `bf16` or arithmetic operations. |
69 | 
70 | ### More Documentation
71 | 
72 | - [Crate API Reference](https://docs.rs/half/)
73 | - [Latest Changes](CHANGELOG.md)
74 | 
75 | ## License
76 | 
77 | All files in this library are dual-licensed and distributed under the terms of either of:
78 | 
79 | * [MIT License](LICENSE-MIT)
80 |   ([http://opensource.org/licenses/MIT](http://opensource.org/licenses/MIT))
81 | * [Apache License, Version 2.0](LICENSE-APACHE)
82 |   ([http://www.apache.org/licenses/LICENSE-2.0](http://www.apache.org/licenses/LICENSE-2.0))
83 | 
84 | at your option.
85 | 
86 | ### Contributing
87 | 
88 | Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in the
89 | work by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any
90 | additional terms or conditions.
91 | 


--------------------------------------------------------------------------------
/src/bfloat/convert.rs:
--------------------------------------------------------------------------------
  1 | use crate::leading_zeros::leading_zeros_u16;
  2 | use zerocopy::transmute;
  3 | 
  4 | #[inline]
  5 | pub(crate) const fn f32_to_bf16(value: f32) -> u16 {
  6 |     // TODO: Replace transmute with to_bits() once to_bits is const-stabilized
  7 |     // Convert to raw bytes
  8 |     let x: u32 = transmute!(value);
  9 | 
 10 |     // check for NaN
 11 |     if x & 0x7FFF_FFFFu32 > 0x7F80_0000u32 {
 12 |         // Keep high part of current mantissa but also set most significiant mantissa bit
 13 |         return ((x >> 16) | 0x0040u32) as u16;
 14 |     }
 15 | 
 16 |     // round and shift
 17 |     let round_bit = 0x0000_8000u32;
 18 |     if (x & round_bit) != 0 && (x & (3 * round_bit - 1)) != 0 {
 19 |         (x >> 16) as u16 + 1
 20 |     } else {
 21 |         (x >> 16) as u16
 22 |     }
 23 | }
 24 | 
 25 | #[inline]
 26 | pub(crate) const fn f64_to_bf16(value: f64) -> u16 {
 27 |     // TODO: Replace transmute with to_bits() once to_bits is const-stabilized
 28 |     // Convert to raw bytes, truncating the last 32-bits of mantissa; that precision will always
 29 |     // be lost on half-precision.
 30 |     let val: u64 = transmute!(value);
 31 |     let x = (val >> 32) as u32;
 32 | 
 33 |     // Extract IEEE754 components
 34 |     let sign = x & 0x8000_0000u32;
 35 |     let exp = x & 0x7FF0_0000u32;
 36 |     let man = x & 0x000F_FFFFu32;
 37 | 
 38 |     // Check for all exponent bits being set, which is Infinity or NaN
 39 |     if exp == 0x7FF0_0000u32 {
 40 |         // Set mantissa MSB for NaN (and also keep shifted mantissa bits).
 41 |         // We also have to check the last 32 bits.
 42 |         let nan_bit = if man == 0 && (val as u32 == 0) {
 43 |             0
 44 |         } else {
 45 |             0x0040u32
 46 |         };
 47 |         return ((sign >> 16) | 0x7F80u32 | nan_bit | (man >> 13)) as u16;
 48 |     }
 49 | 
 50 |     // The number is normalized, start assembling half precision version
 51 |     let half_sign = sign >> 16;
 52 |     // Unbias the exponent, then bias for bfloat16 precision
 53 |     let unbiased_exp = ((exp >> 20) as i64) - 1023;
 54 |     let half_exp = unbiased_exp + 127;
 55 | 
 56 |     // Check for exponent overflow, return +infinity
 57 |     if half_exp >= 0xFF {
 58 |         return (half_sign | 0x7F80u32) as u16;
 59 |     }
 60 | 
 61 |     // Check for underflow
 62 |     if half_exp <= 0 {
 63 |         // Check mantissa for what we can do
 64 |         if 7 - half_exp > 21 {
 65 |             // No rounding possibility, so this is a full underflow, return signed zero
 66 |             return half_sign as u16;
 67 |         }
 68 |         // Don't forget about hidden leading mantissa bit when assembling mantissa
 69 |         let man = man | 0x0010_0000u32;
 70 |         let mut half_man = man >> (14 - half_exp);
 71 |         // Check for rounding
 72 |         let round_bit = 1 << (13 - half_exp);
 73 |         if (man & round_bit) != 0 && (man & (3 * round_bit - 1)) != 0 {
 74 |             half_man += 1;
 75 |         }
 76 |         // No exponent for subnormals
 77 |         return (half_sign | half_man) as u16;
 78 |     }
 79 | 
 80 |     // Rebias the exponent
 81 |     let half_exp = (half_exp as u32) << 7;
 82 |     let half_man = man >> 13;
 83 |     // Check for rounding
 84 |     let round_bit = 0x0000_1000u32;
 85 |     if (man & round_bit) != 0 && (man & (3 * round_bit - 1)) != 0 {
 86 |         // Round it
 87 |         ((half_sign | half_exp | half_man) + 1) as u16
 88 |     } else {
 89 |         (half_sign | half_exp | half_man) as u16
 90 |     }
 91 | }
 92 | 
 93 | #[inline]
 94 | pub(crate) const fn bf16_to_f32(i: u16) -> f32 {
 95 |     // TODO: Replace transmute with from_bits() once from_bits is const-stabilized
 96 |     // If NaN, keep current mantissa but also set most significiant mantissa bit
 97 |     if i & 0x7FFFu16 > 0x7F80u16 {
 98 |         transmute!((i as u32 | 0x0040u32) << 16)
 99 |     } else {
100 |         transmute!((i as u32) << 16)
101 |     }
102 | }
103 | 
104 | #[inline]
105 | pub(crate) const fn bf16_to_f64(i: u16) -> f64 {
106 |     // TODO: Replace transmute with from_bits() once from_bits is const-stabilized
107 |     // Check for signed zero
108 |     if i & 0x7FFFu16 == 0 {
109 |         return transmute!((i as u64) << 48);
110 |     }
111 | 
112 |     let half_sign = (i & 0x8000u16) as u64;
113 |     let half_exp = (i & 0x7F80u16) as u64;
114 |     let half_man = (i & 0x007Fu16) as u64;
115 | 
116 |     // Check for an infinity or NaN when all exponent bits set
117 |     if half_exp == 0x7F80u64 {
118 |         // Check for signed infinity if mantissa is zero
119 |         if half_man == 0 {
120 |             return transmute!((half_sign << 48) | 0x7FF0_0000_0000_0000u64);
121 |         } else {
122 |             // NaN, keep current mantissa but also set most significiant mantissa bit
123 |             return transmute!((half_sign << 48) | 0x7FF8_0000_0000_0000u64 | (half_man << 45));
124 |         }
125 |     }
126 | 
127 |     // Calculate double-precision components with adjusted exponent
128 |     let sign = half_sign << 48;
129 |     // Unbias exponent
130 |     let unbiased_exp = ((half_exp as i64) >> 7) - 127;
131 | 
132 |     // Check for subnormals, which will be normalized by adjusting exponent
133 |     if half_exp == 0 {
134 |         // Calculate how much to adjust the exponent by
135 |         let e = leading_zeros_u16(half_man as u16) - 9;
136 | 
137 |         // Rebias and adjust exponent
138 |         let exp = ((1023 - 127 - e) as u64) << 52;
139 |         let man = (half_man << (46 + e)) & 0xF_FFFF_FFFF_FFFFu64;
140 |         return transmute!(sign | exp | man);
141 |     }
142 |     // Rebias exponent for a normalized normal
143 |     let exp = ((unbiased_exp + 1023) as u64) << 52;
144 |     let man = (half_man & 0x007Fu64) << 45;
145 |     transmute!(sign | exp | man)
146 | }
147 | 


--------------------------------------------------------------------------------
/benches/convert.rs:
--------------------------------------------------------------------------------
  1 | use criterion::{black_box, criterion_group, criterion_main, Bencher, BenchmarkId, Criterion};
  2 | use half::prelude::*;
  3 | use std::{f32, f64, iter};
  4 | 
  5 | const SIMD_LARGE_BENCH_SLICE_LEN: usize = 1024;
  6 | 
  7 | fn bench_f32_to_f16(c: &mut Criterion) {
  8 |     let mut group = c.benchmark_group("Convert f16 From f32");
  9 |     for val in &[
 10 |         0.,
 11 |         -0.,
 12 |         1.,
 13 |         f32::MIN,
 14 |         f32::MAX,
 15 |         f32::MIN_POSITIVE,
 16 |         f32::NEG_INFINITY,
 17 |         f32::INFINITY,
 18 |         f32::NAN,
 19 |         f32::consts::E,
 20 |         f32::consts::PI,
 21 |     ] {
 22 |         group.bench_with_input(BenchmarkId::new("f16::from_f32", val), val, |b, i| {
 23 |             b.iter(|| f16::from_f32(*i))
 24 |         });
 25 |     }
 26 | }
 27 | 
 28 | fn bench_f64_to_f16(c: &mut Criterion) {
 29 |     let mut group = c.benchmark_group("Convert f16 From f64");
 30 |     for val in &[
 31 |         0.,
 32 |         -0.,
 33 |         1.,
 34 |         f64::MIN,
 35 |         f64::MAX,
 36 |         f64::MIN_POSITIVE,
 37 |         f64::NEG_INFINITY,
 38 |         f64::INFINITY,
 39 |         f64::NAN,
 40 |         f64::consts::E,
 41 |         f64::consts::PI,
 42 |     ] {
 43 |         group.bench_with_input(BenchmarkId::new("f16::from_f64", val), val, |b, i| {
 44 |             b.iter(|| f16::from_f64(*i))
 45 |         });
 46 |     }
 47 | }
 48 | 
 49 | fn bench_f16_to_f32(c: &mut Criterion) {
 50 |     let mut group = c.benchmark_group("Convert f16 to f32");
 51 |     for val in &[
 52 |         f16::ZERO,
 53 |         f16::NEG_ZERO,
 54 |         f16::ONE,
 55 |         f16::MIN,
 56 |         f16::MAX,
 57 |         f16::MIN_POSITIVE,
 58 |         f16::NEG_INFINITY,
 59 |         f16::INFINITY,
 60 |         f16::NAN,
 61 |         f16::E,
 62 |         f16::PI,
 63 |     ] {
 64 |         group.bench_with_input(BenchmarkId::new("f16::to_f32", val), val, |b, i| {
 65 |             b.iter(|| i.to_f32())
 66 |         });
 67 |     }
 68 | }
 69 | 
 70 | fn bench_f16_to_f64(c: &mut Criterion) {
 71 |     let mut group = c.benchmark_group("Convert f16 to f64");
 72 |     for val in &[
 73 |         f16::ZERO,
 74 |         f16::NEG_ZERO,
 75 |         f16::ONE,
 76 |         f16::MIN,
 77 |         f16::MAX,
 78 |         f16::MIN_POSITIVE,
 79 |         f16::NEG_INFINITY,
 80 |         f16::INFINITY,
 81 |         f16::NAN,
 82 |         f16::E,
 83 |         f16::PI,
 84 |     ] {
 85 |         group.bench_with_input(BenchmarkId::new("f16::to_f64", val), val, |b, i| {
 86 |             b.iter(|| i.to_f64())
 87 |         });
 88 |     }
 89 | }
 90 | 
 91 | criterion_group!(
 92 |     f16_sisd,
 93 |     bench_f32_to_f16,
 94 |     bench_f64_to_f16,
 95 |     bench_f16_to_f32,
 96 |     bench_f16_to_f64
 97 | );
 98 | 
 99 | fn bench_slice_f32_to_f16(c: &mut Criterion) {
100 |     let mut constant_buffer = [f16::ZERO; 11];
101 |     let constants = [
102 |         0.,
103 |         -0.,
104 |         1.,
105 |         f32::MIN,
106 |         f32::MAX,
107 |         f32::MIN_POSITIVE,
108 |         f32::NEG_INFINITY,
109 |         f32::INFINITY,
110 |         f32::NAN,
111 |         f32::consts::E,
112 |         f32::consts::PI,
113 |     ];
114 |     c.bench_function(
115 |         "HalfFloatSliceExt::convert_from_f32_slice/constants",
116 |         |b: &mut Bencher<'_>| {
117 |             b.iter(|| black_box(&mut constant_buffer).convert_from_f32_slice(black_box(&constants)))
118 |         },
119 |     );
120 | 
121 |     let large: Vec<_> = iter::repeat(0)
122 |         .enumerate()
123 |         .map(|(i, _)| i as f32)
124 |         .take(SIMD_LARGE_BENCH_SLICE_LEN)
125 |         .collect();
126 |     let mut large_buffer = [f16::ZERO; SIMD_LARGE_BENCH_SLICE_LEN];
127 |     c.bench_function(
128 |         "HalfFloatSliceExt::convert_from_f32_slice/large",
129 |         |b: &mut Bencher<'_>| {
130 |             b.iter(|| black_box(&mut large_buffer).convert_from_f32_slice(black_box(&large)))
131 |         },
132 |     );
133 | }
134 | 
135 | fn bench_slice_f64_to_f16(c: &mut Criterion) {
136 |     let mut constant_buffer = [f16::ZERO; 11];
137 |     let constants = [
138 |         0.,
139 |         -0.,
140 |         1.,
141 |         f64::MIN,
142 |         f64::MAX,
143 |         f64::MIN_POSITIVE,
144 |         f64::NEG_INFINITY,
145 |         f64::INFINITY,
146 |         f64::NAN,
147 |         f64::consts::E,
148 |         f64::consts::PI,
149 |     ];
150 |     c.bench_function(
151 |         "HalfFloatSliceExt::convert_from_f64_slice/constants",
152 |         |b: &mut Bencher<'_>| {
153 |             b.iter(|| black_box(&mut constant_buffer).convert_from_f64_slice(black_box(&constants)))
154 |         },
155 |     );
156 | 
157 |     let large: Vec<_> = iter::repeat(0)
158 |         .enumerate()
159 |         .map(|(i, _)| i as f64)
160 |         .take(SIMD_LARGE_BENCH_SLICE_LEN)
161 |         .collect();
162 |     let mut large_buffer = [f16::ZERO; SIMD_LARGE_BENCH_SLICE_LEN];
163 |     c.bench_function(
164 |         "HalfFloatSliceExt::convert_from_f64_slice/large",
165 |         |b: &mut Bencher<'_>| {
166 |             b.iter(|| black_box(&mut large_buffer).convert_from_f64_slice(black_box(&large)))
167 |         },
168 |     );
169 | }
170 | 
171 | fn bench_slice_f16_to_f32(c: &mut Criterion) {
172 |     let mut constant_buffer = [0f32; 11];
173 |     let constants = [
174 |         f16::ZERO,
175 |         f16::NEG_ZERO,
176 |         f16::ONE,
177 |         f16::MIN,
178 |         f16::MAX,
179 |         f16::MIN_POSITIVE,
180 |         f16::NEG_INFINITY,
181 |         f16::INFINITY,
182 |         f16::NAN,
183 |         f16::E,
184 |         f16::PI,
185 |     ];
186 |     c.bench_function(
187 |         "HalfFloatSliceExt::convert_to_f32_slice/constants",
188 |         |b: &mut Bencher<'_>| {
189 |             b.iter(|| black_box(&constants).convert_to_f32_slice(black_box(&mut constant_buffer)))
190 |         },
191 |     );
192 | 
193 |     let large: Vec<_> = iter::repeat(0)
194 |         .enumerate()
195 |         .map(|(i, _)| f16::from_f32(i as f32))
196 |         .take(SIMD_LARGE_BENCH_SLICE_LEN)
197 |         .collect();
198 |     let mut large_buffer = [0f32; SIMD_LARGE_BENCH_SLICE_LEN];
199 |     c.bench_function(
200 |         "HalfFloatSliceExt::convert_to_f32_slice/large",
201 |         |b: &mut Bencher<'_>| {
202 |             b.iter(|| black_box(&large).convert_to_f32_slice(black_box(&mut large_buffer)))
203 |         },
204 |     );
205 | }
206 | 
207 | fn bench_slice_f16_to_f64(c: &mut Criterion) {
208 |     let mut constant_buffer = [0f64; 11];
209 |     let constants = [
210 |         f16::ZERO,
211 |         f16::NEG_ZERO,
212 |         f16::ONE,
213 |         f16::MIN,
214 |         f16::MAX,
215 |         f16::MIN_POSITIVE,
216 |         f16::NEG_INFINITY,
217 |         f16::INFINITY,
218 |         f16::NAN,
219 |         f16::E,
220 |         f16::PI,
221 |     ];
222 |     c.bench_function(
223 |         "HalfFloatSliceExt::convert_to_f64_slice/constants",
224 |         |b: &mut Bencher<'_>| {
225 |             b.iter(|| black_box(&constants).convert_to_f64_slice(black_box(&mut constant_buffer)))
226 |         },
227 |     );
228 | 
229 |     let large: Vec<_> = iter::repeat(0)
230 |         .enumerate()
231 |         .map(|(i, _)| f16::from_f64(i as f64))
232 |         .take(SIMD_LARGE_BENCH_SLICE_LEN)
233 |         .collect();
234 |     let mut large_buffer = [0f64; SIMD_LARGE_BENCH_SLICE_LEN];
235 |     c.bench_function(
236 |         "HalfFloatSliceExt::convert_to_f64_slice/large",
237 |         |b: &mut Bencher<'_>| {
238 |             b.iter(|| black_box(&large).convert_to_f64_slice(black_box(&mut large_buffer)))
239 |         },
240 |     );
241 | }
242 | 
243 | criterion_group!(
244 |     f16_simd,
245 |     bench_slice_f32_to_f16,
246 |     bench_slice_f64_to_f16,
247 |     bench_slice_f16_to_f32,
248 |     bench_slice_f16_to_f64
249 | );
250 | 
251 | fn bench_f32_to_bf16(c: &mut Criterion) {
252 |     let mut group = c.benchmark_group("Convert bf16 From f32");
253 |     for val in &[
254 |         0.,
255 |         -0.,
256 |         1.,
257 |         f32::MIN,
258 |         f32::MAX,
259 |         f32::MIN_POSITIVE,
260 |         f32::NEG_INFINITY,
261 |         f32::INFINITY,
262 |         f32::NAN,
263 |         f32::consts::E,
264 |         f32::consts::PI,
265 |     ] {
266 |         group.bench_with_input(BenchmarkId::new("bf16::from_f32", val), val, |b, i| {
267 |             b.iter(|| bf16::from_f32(*i))
268 |         });
269 |     }
270 | }
271 | 
272 | fn bench_f64_to_bf16(c: &mut Criterion) {
273 |     let mut group = c.benchmark_group("Convert bf16 From f64");
274 |     for val in &[
275 |         0.,
276 |         -0.,
277 |         1.,
278 |         f64::MIN,
279 |         f64::MAX,
280 |         f64::MIN_POSITIVE,
281 |         f64::NEG_INFINITY,
282 |         f64::INFINITY,
283 |         f64::NAN,
284 |         f64::consts::E,
285 |         f64::consts::PI,
286 |     ] {
287 |         group.bench_with_input(BenchmarkId::new("bf16::from_f64", val), val, |b, i| {
288 |             b.iter(|| bf16::from_f64(*i))
289 |         });
290 |     }
291 | }
292 | 
293 | fn bench_bf16_to_f32(c: &mut Criterion) {
294 |     let mut group = c.benchmark_group("Convert bf16 to f32");
295 |     for val in &[
296 |         bf16::ZERO,
297 |         bf16::NEG_ZERO,
298 |         bf16::ONE,
299 |         bf16::MIN,
300 |         bf16::MAX,
301 |         bf16::MIN_POSITIVE,
302 |         bf16::NEG_INFINITY,
303 |         bf16::INFINITY,
304 |         bf16::NAN,
305 |         bf16::E,
306 |         bf16::PI,
307 |     ] {
308 |         group.bench_with_input(BenchmarkId::new("bf16::to_f32", val), val, |b, i| {
309 |             b.iter(|| i.to_f32())
310 |         });
311 |     }
312 | }
313 | 
314 | fn bench_bf16_to_f64(c: &mut Criterion) {
315 |     let mut group = c.benchmark_group("Convert bf16 to f64");
316 |     for val in &[
317 |         bf16::ZERO,
318 |         bf16::NEG_ZERO,
319 |         bf16::ONE,
320 |         bf16::MIN,
321 |         bf16::MAX,
322 |         bf16::MIN_POSITIVE,
323 |         bf16::NEG_INFINITY,
324 |         bf16::INFINITY,
325 |         bf16::NAN,
326 |         bf16::E,
327 |         bf16::PI,
328 |     ] {
329 |         group.bench_with_input(BenchmarkId::new("bf16::to_f64", val), val, |b, i| {
330 |             b.iter(|| i.to_f64())
331 |         });
332 |     }
333 | }
334 | 
335 | criterion_group!(
336 |     bf16_sisd,
337 |     bench_f32_to_bf16,
338 |     bench_f64_to_bf16,
339 |     bench_bf16_to_f32,
340 |     bench_bf16_to_f64
341 | );
342 | 
343 | criterion_main!(f16_sisd, bf16_sisd, f16_simd);
344 | 


--------------------------------------------------------------------------------
/LICENSE-APACHE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 


--------------------------------------------------------------------------------
/src/vec.rs:
--------------------------------------------------------------------------------
  1 | //! Contains utility functions and traits to convert between vectors of [`u16`] bits and [`struct@f16`] or
  2 | //! [`bf16`] vectors.
  3 | //!
  4 | //! The utility [`HalfBitsVecExt`] sealed extension trait is implemented for [`Vec<u16>`] vectors,
  5 | //! while the utility [`HalfFloatVecExt`] sealed extension trait is implemented for both
  6 | //! [`Vec<f16>`] and [`Vec<bf16>`] vectors. These traits provide efficient conversions and
  7 | //! reinterpret casting of larger buffers of floating point values, and are automatically included
  8 | //! in the [`prelude`][crate::prelude] module.
  9 | //!
 10 | //! This module is only available with the `std` or `alloc` feature.
 11 | 
 12 | use super::{bf16, f16, slice::HalfFloatSliceExt};
 13 | #[cfg(feature = "alloc")]
 14 | #[allow(unused_imports)]
 15 | use alloc::{vec, vec::Vec};
 16 | use core::mem;
 17 | 
 18 | /// Extensions to [`Vec<f16>`] and [`Vec<bf16>`] to support reinterpret operations.
 19 | ///
 20 | /// This trait is sealed and cannot be implemented outside of this crate.
 21 | pub trait HalfFloatVecExt: private::SealedHalfFloatVec {
 22 |     /// Reinterprets a vector of [`struct@f16`]or [`bf16`] numbers as a vector of [`u16`] bits.
 23 |     ///
 24 |     /// This is a zero-copy operation. The reinterpreted vector has the same memory location as
 25 |     /// `self`.
 26 |     ///
 27 |     /// # Examples
 28 |     ///
 29 |     /// ```rust
 30 |     /// # use half::prelude::*;
 31 |     /// let float_buffer = vec![f16::from_f32(1.), f16::from_f32(2.), f16::from_f32(3.)];
 32 |     /// let int_buffer = float_buffer.reinterpret_into();
 33 |     ///
 34 |     /// assert_eq!(int_buffer, [f16::from_f32(1.).to_bits(), f16::from_f32(2.).to_bits(), f16::from_f32(3.).to_bits()]);
 35 |     /// ```
 36 |     #[must_use]
 37 |     fn reinterpret_into(self) -> Vec<u16>;
 38 | 
 39 |     /// Converts all of the elements of a `[f32]` slice into a new [`struct@f16`] or [`bf16`] vector.
 40 |     ///
 41 |     /// The conversion operation is vectorized over the slice, meaning the conversion may be more
 42 |     /// efficient than converting individual elements on some hardware that supports SIMD
 43 |     /// conversions. See [crate documentation][crate] for more information on hardware conversion
 44 |     /// support.
 45 |     ///
 46 |     /// # Examples
 47 |     /// ```rust
 48 |     /// # use half::prelude::*;
 49 |     /// let float_values = [1., 2., 3., 4.];
 50 |     /// let vec: Vec<f16> = Vec::from_f32_slice(&float_values);
 51 |     ///
 52 |     /// assert_eq!(vec, vec![f16::from_f32(1.), f16::from_f32(2.), f16::from_f32(3.), f16::from_f32(4.)]);
 53 |     /// ```
 54 |     #[must_use]
 55 |     fn from_f32_slice(slice: &[f32]) -> Self;
 56 | 
 57 |     /// Converts all of the elements of a `[f64]` slice into a new [`struct@f16`] or [`bf16`] vector.
 58 |     ///
 59 |     /// The conversion operation is vectorized over the slice, meaning the conversion may be more
 60 |     /// efficient than converting individual elements on some hardware that supports SIMD
 61 |     /// conversions. See [crate documentation][crate] for more information on hardware conversion
 62 |     /// support.
 63 |     ///
 64 |     /// # Examples
 65 |     /// ```rust
 66 |     /// # use half::prelude::*;
 67 |     /// let float_values = [1., 2., 3., 4.];
 68 |     /// let vec: Vec<f16> = Vec::from_f64_slice(&float_values);
 69 |     ///
 70 |     /// assert_eq!(vec, vec![f16::from_f64(1.), f16::from_f64(2.), f16::from_f64(3.), f16::from_f64(4.)]);
 71 |     /// ```
 72 |     #[must_use]
 73 |     fn from_f64_slice(slice: &[f64]) -> Self;
 74 | }
 75 | 
 76 | /// Extensions to [`Vec<u16>`] to support reinterpret operations.
 77 | ///
 78 | /// This trait is sealed and cannot be implemented outside of this crate.
 79 | pub trait HalfBitsVecExt: private::SealedHalfBitsVec {
 80 |     /// Reinterprets a vector of [`u16`] bits as a vector of [`struct@f16`] or [`bf16`] numbers.
 81 |     ///
 82 |     /// `H` is the type to cast to, and must be either the [`struct@f16`] or [`bf16`] type.
 83 |     ///
 84 |     /// This is a zero-copy operation. The reinterpreted vector has the same memory location as
 85 |     /// `self`.
 86 |     ///
 87 |     /// # Examples
 88 |     ///
 89 |     /// ```rust
 90 |     /// # use half::prelude::*;
 91 |     /// let int_buffer = vec![f16::from_f32(1.).to_bits(), f16::from_f32(2.).to_bits(), f16::from_f32(3.).to_bits()];
 92 |     /// let float_buffer = int_buffer.reinterpret_into::<f16>();
 93 |     ///
 94 |     /// assert_eq!(float_buffer, [f16::from_f32(1.), f16::from_f32(2.), f16::from_f32(3.)]);
 95 |     /// ```
 96 |     #[must_use]
 97 |     fn reinterpret_into<H>(self) -> Vec<H>
 98 |     where
 99 |         H: crate::private::SealedHalf;
100 | }
101 | 
102 | mod private {
103 |     use crate::{bf16, f16};
104 |     #[cfg(feature = "alloc")]
105 |     #[allow(unused_imports)]
106 |     use alloc::vec::Vec;
107 | 
108 |     pub trait SealedHalfFloatVec {}
109 |     impl SealedHalfFloatVec for Vec<f16> {}
110 |     impl SealedHalfFloatVec for Vec<bf16> {}
111 | 
112 |     pub trait SealedHalfBitsVec {}
113 |     impl SealedHalfBitsVec for Vec<u16> {}
114 | }
115 | 
116 | impl HalfFloatVecExt for Vec<f16> {
117 |     #[inline]
118 |     fn reinterpret_into(mut self) -> Vec<u16> {
119 |         // An f16 array has same length and capacity as u16 array
120 |         let length = self.len();
121 |         let capacity = self.capacity();
122 | 
123 |         // Actually reinterpret the contents of the Vec<f16> as u16,
124 |         // knowing that structs are represented as only their members in memory,
125 |         // which is the u16 part of `f16(u16)`
126 |         let pointer = self.as_mut_ptr() as *mut u16;
127 | 
128 |         // Prevent running a destructor on the old Vec<u16>, so the pointer won't be deleted
129 |         mem::forget(self);
130 | 
131 |         // Finally construct a new Vec<f16> from the raw pointer
132 |         // SAFETY: We are reconstructing full length and capacity of original vector,
133 |         // using its original pointer, and the size of elements are identical.
134 |         unsafe { Vec::from_raw_parts(pointer, length, capacity) }
135 |     }
136 | 
137 |     #[allow(clippy::uninit_vec)]
138 |     fn from_f32_slice(slice: &[f32]) -> Self {
139 |         let mut vec = vec![f16::from_bits(0); slice.len()];
140 |         vec.convert_from_f32_slice(slice);
141 |         vec
142 |     }
143 | 
144 |     #[allow(clippy::uninit_vec)]
145 |     fn from_f64_slice(slice: &[f64]) -> Self {
146 |         let mut vec = vec![f16::from_bits(0); slice.len()];
147 |         vec.convert_from_f64_slice(slice);
148 |         vec
149 |     }
150 | }
151 | 
152 | impl HalfFloatVecExt for Vec<bf16> {
153 |     #[inline]
154 |     fn reinterpret_into(mut self) -> Vec<u16> {
155 |         // An f16 array has same length and capacity as u16 array
156 |         let length = self.len();
157 |         let capacity = self.capacity();
158 | 
159 |         // Actually reinterpret the contents of the Vec<f16> as u16,
160 |         // knowing that structs are represented as only their members in memory,
161 |         // which is the u16 part of `f16(u16)`
162 |         let pointer = self.as_mut_ptr() as *mut u16;
163 | 
164 |         // Prevent running a destructor on the old Vec<u16>, so the pointer won't be deleted
165 |         mem::forget(self);
166 | 
167 |         // Finally construct a new Vec<f16> from the raw pointer
168 |         // SAFETY: We are reconstructing full length and capacity of original vector,
169 |         // using its original pointer, and the size of elements are identical.
170 |         unsafe { Vec::from_raw_parts(pointer, length, capacity) }
171 |     }
172 | 
173 |     #[allow(clippy::uninit_vec)]
174 |     fn from_f32_slice(slice: &[f32]) -> Self {
175 |         let mut vec = vec![bf16::from_bits(0); slice.len()];
176 |         vec.convert_from_f32_slice(slice);
177 |         vec
178 |     }
179 | 
180 |     #[allow(clippy::uninit_vec)]
181 |     fn from_f64_slice(slice: &[f64]) -> Self {
182 |         let mut vec = vec![bf16::from_bits(0); slice.len()];
183 |         vec.convert_from_f64_slice(slice);
184 |         vec
185 |     }
186 | }
187 | 
188 | impl HalfBitsVecExt for Vec<u16> {
189 |     // This is safe because all traits are sealed
190 |     #[inline]
191 |     fn reinterpret_into<H>(mut self) -> Vec<H>
192 |     where
193 |         H: crate::private::SealedHalf,
194 |     {
195 |         // An f16 array has same length and capacity as u16 array
196 |         let length = self.len();
197 |         let capacity = self.capacity();
198 | 
199 |         // Actually reinterpret the contents of the Vec<u16> as f16,
200 |         // knowing that structs are represented as only their members in memory,
201 |         // which is the u16 part of `f16(u16)`
202 |         let pointer = self.as_mut_ptr() as *mut H;
203 | 
204 |         // Prevent running a destructor on the old Vec<u16>, so the pointer won't be deleted
205 |         mem::forget(self);
206 | 
207 |         // Finally construct a new Vec<f16> from the raw pointer
208 |         // SAFETY: We are reconstructing full length and capacity of original vector,
209 |         // using its original pointer, and the size of elements are identical.
210 |         unsafe { Vec::from_raw_parts(pointer, length, capacity) }
211 |     }
212 | }
213 | 
214 | #[cfg(test)]
215 | mod test {
216 |     use super::{HalfBitsVecExt, HalfFloatVecExt};
217 |     use crate::{bf16, f16};
218 |     #[cfg(all(feature = "alloc", not(feature = "std")))]
219 |     use alloc::vec;
220 | 
221 |     #[test]
222 |     fn test_vec_conversions_f16() {
223 |         let numbers = vec![f16::E, f16::PI, f16::EPSILON, f16::FRAC_1_SQRT_2];
224 |         let bits = vec![
225 |             f16::E.to_bits(),
226 |             f16::PI.to_bits(),
227 |             f16::EPSILON.to_bits(),
228 |             f16::FRAC_1_SQRT_2.to_bits(),
229 |         ];
230 |         let bits_cloned = bits.clone();
231 | 
232 |         // Convert from bits to numbers
233 |         let from_bits = bits.reinterpret_into::<f16>();
234 |         assert_eq!(&from_bits[..], &numbers[..]);
235 | 
236 |         // Convert from numbers back to bits
237 |         let to_bits = from_bits.reinterpret_into();
238 |         assert_eq!(&to_bits[..], &bits_cloned[..]);
239 |     }
240 | 
241 |     #[test]
242 |     fn test_vec_conversions_bf16() {
243 |         let numbers = vec![bf16::E, bf16::PI, bf16::EPSILON, bf16::FRAC_1_SQRT_2];
244 |         let bits = vec![
245 |             bf16::E.to_bits(),
246 |             bf16::PI.to_bits(),
247 |             bf16::EPSILON.to_bits(),
248 |             bf16::FRAC_1_SQRT_2.to_bits(),
249 |         ];
250 |         let bits_cloned = bits.clone();
251 | 
252 |         // Convert from bits to numbers
253 |         let from_bits = bits.reinterpret_into::<bf16>();
254 |         assert_eq!(&from_bits[..], &numbers[..]);
255 | 
256 |         // Convert from numbers back to bits
257 |         let to_bits = from_bits.reinterpret_into();
258 |         assert_eq!(&to_bits[..], &bits_cloned[..]);
259 |     }
260 | }
261 | 


--------------------------------------------------------------------------------
/src/lib.rs:
--------------------------------------------------------------------------------
  1 | //! A crate that provides support for half-precision 16-bit floating point types.
  2 | //!
  3 | //! This crate provides the [`struct@f16`] type, which is an implementation of the IEEE 754-2008 standard
  4 | //! [`binary16`] a.k.a "half" floating point type. This 16-bit floating point type is intended for
  5 | //! efficient storage where the full range and precision of a larger floating point value is not
  6 | //! required. This is especially useful for image storage formats.
  7 | //!
  8 | //! This crate also provides a [`struct@bf16`] type, an alternative 16-bit floating point format. The
  9 | //! [`bfloat16`] format is a truncated IEEE 754 standard `binary32` float that preserves the
 10 | //! exponent to allow the same range as [`f32`] but with only 8 bits of precision (instead of 11
 11 | //! bits for [`struct@f16`]). See the [`struct@bf16`] type for details.
 12 | //!
 13 | //! Because [`struct@f16`] and [`struct@bf16`] are primarily for efficient storage, floating point operations such
 14 | //! as addition, multiplication, etc. are not always implemented by hardware. When hardware does not
 15 | //! support these operations, this crate emulates them by converting the value to
 16 | //! [`f32`] before performing the operation and then back afterward.
 17 | //!
 18 | //! Note that conversion from [`f32`]/[`f64`] to both [`struct@f16`] and [`struct@bf16`] are lossy operations, and
 19 | //! just as converting a [`f64`] to [`f32`] is lossy and does not have `Into`/`From` trait
 20 | //! implementations, so too do these smaller types not have those trait implementations either.
 21 | //! Instead, use `from_f32`/`from_f64` functions for the types in this crate. If you don't care
 22 | //! about lossy conversions and need trait conversions, use the appropriate [`num-traits`]
 23 | //! traits that are implemented.
 24 | //!
 25 | //! This crate also provides a [`slice`][mod@slice] module for zero-copy in-place conversions of
 26 | //! [`u16`] slices to both [`struct@f16`] and [`struct@bf16`], as well as efficient vectorized conversions of
 27 | //! larger buffers of floating point values to and from these half formats.
 28 | //!
 29 | //! The crate supports `#[no_std]` when the `std` cargo feature is not enabled, so can be used in
 30 | //! embedded environments without using the Rust [`std`] library. The `std` feature enables support
 31 | //! for the standard library and is enabled by default, see the [Cargo Features](#cargo-features)
 32 | //! section below.
 33 | //!
 34 | //! A [`prelude`] module is provided for easy importing of available utility traits.
 35 | //!
 36 | //! # Serialization
 37 | //!
 38 | //! When the `serde` feature is enabled, [`struct@f16`] and [`struct@bf16`] will be serialized as a newtype of
 39 | //! [`u16`] by default. In binary formats this is ideal, as it will generally use just two bytes for
 40 | //! storage. For string formats like JSON, however, this isn't as useful, and due to design
 41 | //! limitations of serde, it's not possible for the default `Serialize` implementation to support
 42 | //! different serialization for different formats.
 43 | //!
 44 | //! Instead, it's up to the containter type of the floats to control how it is serialized. This can
 45 | //! easily be controlled when using the derive macros using `#[serde(serialize_with="")]`
 46 | //! attributes. For both [`struct@f16`] and [`struct@bf16`] a `serialize_as_f32` and `serialize_as_string` are
 47 | //! provided for use with this attribute.
 48 | //!
 49 | //! Deserialization of both float types supports deserializing from the default serialization,
 50 | //! strings, and `f32`/`f64` values, so no additional work is required.
 51 | //!
 52 | //! # Hardware support
 53 | //!
 54 | //! Hardware support for these conversions and arithmetic will be used
 55 | //! whenever hardware support is available—either through instrinsics or targeted assembly—although
 56 | //! a nightly Rust toolchain may be required for some hardware. When hardware supports it the
 57 | //! functions and traits in the [`slice`][mod@slice] and [`vec`] modules will also use vectorized
 58 | //! SIMD intructions for increased efficiency.
 59 | //!
 60 | //! The following list details hardware support for floating point types in this crate. When using
 61 | //! `std` cargo feature, runtime CPU target detection will be used. To get the most performance
 62 | //! benefits, compile for specific CPU features which avoids the runtime overhead and works in a
 63 | //! `no_std` environment.
 64 | //!
 65 | //! | Architecture | CPU Target Feature | Notes |
 66 | //! | ------------ | ------------------ | ----- |
 67 | //! | `x86`/`x86_64` | `f16c` | This supports conversion to/from [`struct@f16`] only (including vector SIMD) and does not support any [`struct@bf16`] or arithmetic operations. |
 68 | //! | `aarch64` | `fp16` | This supports all operations on [`struct@f16`] only. |
 69 | //! | `loongarch64` | `lsx` | This supports conversion to/from [`struct@f16`] only (including vector SIMD) and does not support any [`struct@bf16`] or arithmetic operations. |
 70 | //!
 71 | //! # Cargo Features
 72 | //!
 73 | //! This crate supports a number of optional cargo features. None of these features are enabled by
 74 | //! default, even `std`.
 75 | //!
 76 | //! - **`alloc`** — Enable use of the [`alloc`] crate when not using the `std` library.
 77 | //!
 78 | //!   Among other functions, this enables the [`vec`] module, which contains zero-copy
 79 | //!   conversions for the [`Vec`] type. This allows fast conversion between raw `Vec<u16>` bits and
 80 | //!   `Vec<f16>` or `Vec<bf16>` arrays, and vice versa.
 81 | //!
 82 | //! - **`std`** — Enable features that depend on the Rust [`std`] library. This also enables the
 83 | //!   `alloc` feature automatically.
 84 | //!
 85 | //!   Enabling the `std` feature enables runtime CPU feature detection of hardware support.
 86 | //!   Without this feature detection, harware is only used when compiler target supports them.
 87 | //!
 88 | //! - **`serde`** — Adds support for the [`serde`] crate by implementing [`Serialize`] and
 89 | //!   [`Deserialize`] traits for both [`struct@f16`] and [`struct@bf16`].
 90 | //!
 91 | //! - **`num-traits`** — Adds support for the [`num-traits`] crate by implementing [`ToPrimitive`],
 92 | //!   [`FromPrimitive`], [`ToBytes`], `FromBytes`, [`AsPrimitive`], [`Num`], [`Float`],
 93 | //!   [`FloatCore`], [`Signed`], and [`Bounded`] traits for both [`struct@f16`] and [`struct@bf16`].
 94 | //!
 95 | //! - **`bytemuck`** — Adds support for the [`bytemuck`] crate by implementing [`Zeroable`] and
 96 | //!   [`Pod`] traits for both [`struct@f16`] and [`struct@bf16`].
 97 | //!
 98 | //! - **`rand_distr`** — Adds support for the [`rand_distr`] crate by implementing [`Distribution`]
 99 | //!   and other traits for both [`struct@f16`] and [`struct@bf16`].
100 | //!
101 | //! - **`rkyv`** -- Enable zero-copy deserializtion with [`rkyv`] crate.
102 | //!
103 | //! - **`aribtrary`** -- Enable fuzzing support with [`arbitrary`] crate by implementing
104 | //!   [`Arbitrary`] trait.
105 | //!
106 | //! - **`nightly`** -- Enable nightly-only features.
107 | //!
108 | //! [`alloc`]: https://doc.rust-lang.org/alloc/
109 | //! [`std`]: https://doc.rust-lang.org/std/
110 | //! [`binary16`]: https://en.wikipedia.org/wiki/Half-precision_floating-point_format
111 | //! [`bfloat16`]: https://en.wikipedia.org/wiki/Bfloat16_floating-point_format
112 | //! [`serde`]: https://crates.io/crates/serde
113 | //! [`bytemuck`]: https://crates.io/crates/bytemuck
114 | //! [`num-traits`]: https://crates.io/crates/num-traits
115 | //! [`zerocopy`]: https://crates.io/crates/zerocopy
116 | //! [`rand_distr`]: https://crates.io/crates/rand_distr
117 | //! [`rkyv`]: (https://crates.io/crates/rkyv)
118 | //! [`arbitrary`]: (https://crates.io/crates/arbitrary)
119 | #![cfg_attr(
120 |     feature = "alloc",
121 |     doc = "
122 | [`vec`]: mod@vec"
123 | )]
124 | #![cfg_attr(
125 |     not(feature = "alloc"),
126 |     doc = "
127 | [`vec`]: #
128 | [`Vec`]: https://docs.rust-lang.org/stable/alloc/vec/struct.Vec.html"
129 | )]
130 | #![cfg_attr(
131 |     feature = "serde",
132 |     doc = "
133 | [`Serialize`]: serde::Serialize
134 | [`Deserialize`]: serde::Deserialize"
135 | )]
136 | #![cfg_attr(
137 |     not(feature = "serde"),
138 |     doc = "
139 | [`Serialize`]: https://docs.rs/serde/*/serde/trait.Serialize.html
140 | [`Deserialize`]: https://docs.rs/serde/*/serde/trait.Deserialize.html"
141 | )]
142 | #![cfg_attr(
143 |     feature = "num-traits",
144 |     doc = "
145 | [`ToPrimitive`]: ::num_traits::ToPrimitive
146 | [`FromPrimitive`]: ::num_traits::FromPrimitive
147 | [`ToBytes`]: ::num_traits::ToBytes
148 | [`AsPrimitive`]: ::num_traits::AsPrimitive
149 | [`Num`]: ::num_traits::Num
150 | [`Float`]: ::num_traits::Float
151 | [`FloatCore`]: ::num_traits::float::FloatCore
152 | [`Signed`]: ::num_traits::Signed
153 | [`Bounded`]: ::num_traits::Bounded"
154 | )]
155 | #![cfg_attr(
156 |     not(feature = "num-traits"),
157 |     doc = "
158 | [`ToPrimitive`]: https://docs.rs/num-traits/*/num_traits/cast/trait.ToPrimitive.html
159 | [`FromPrimitive`]: https://docs.rs/num-traits/*/num_traits/cast/trait.FromPrimitive.html
160 | [`ToBytes`]: https://docs.rs/num-traits/*/num_traits/ops/bytes/trait.ToBytes.html
161 | [`AsPrimitive`]: https://docs.rs/num-traits/*/num_traits/cast/trait.AsPrimitive.html
162 | [`Num`]: https://docs.rs/num-traits/*/num_traits/trait.Num.html
163 | [`Float`]: https://docs.rs/num-traits/*/num_traits/float/trait.Float.html
164 | [`FloatCore`]: https://docs.rs/num-traits/*/num_traits/float/trait.FloatCore.html
165 | [`Bounded`]: https://docs.rs/num-traits/*/num_traits/bounds/trait.Bounded.html"
166 | )]
167 | #![cfg_attr(
168 |     feature = "bytemuck",
169 |     doc = "
170 | [`Zeroable`]: bytemuck::Zeroable
171 | [`Pod`]: bytemuck::Pod"
172 | )]
173 | #![cfg_attr(
174 |     not(feature = "bytemuck"),
175 |     doc = "
176 | [`Zeroable`]: https://docs.rs/bytemuck/*/bytemuck/trait.Zeroable.html
177 | [`Pod`]: https://docs.rs/bytemuck/*bytemuck/trait.Pod.html"
178 | )]
179 | #![cfg_attr(
180 |     feature = "zerocopy",
181 |     doc = "
182 | [`IntoBytes`]: zerocopy::IntoBytes
183 | [`FromBytes`]: zerocopy::FromBytes"
184 | )]
185 | #![cfg_attr(
186 |     not(feature = "zerocopy"),
187 |     doc = "
188 | [`IntoBytes`]: https://docs.rs/zerocopy/*/zerocopy/trait.IntoBytes.html
189 | [`FromBytes`]: https://docs.rs/zerocopy/*/zerocopy/trait.FromBytes.html"
190 | )]
191 | #![cfg_attr(
192 |     feature = "rand_distr",
193 |     doc = "
194 | [`Distribution`]: rand::distr::Distribution"
195 | )]
196 | #![cfg_attr(
197 |     not(feature = "rand_distr"),
198 |     doc = "
199 | [`Distribution`]: https://docs.rs/rand/*/rand/distr/trait.Distribution.html"
200 | )]
201 | #![cfg_attr(
202 |     feature = "arbitrary",
203 |     doc = "
204 | [`Arbitrary`]: arbitrary::Arbitrary"
205 | )]
206 | #![cfg_attr(
207 |     not(feature = "arbitrary"),
208 |     doc = "
209 | [`Arbitrary`]: https://docs.rs/arbitrary/*/arbitrary/trait.Arbitrary.html"
210 | )]
211 | #![warn(
212 |     missing_docs,
213 |     missing_copy_implementations,
214 |     trivial_numeric_casts,
215 |     future_incompatible
216 | )]
217 | #![cfg_attr(not(target_arch = "spirv"), warn(missing_debug_implementations))]
218 | #![cfg_attr(
219 |     all(feature = "nightly", target_arch = "loongarch64"),
220 |     feature(
221 |         stdarch_loongarch,
222 |         stdarch_loongarch_feature_detection,
223 |         loongarch_target_feature
224 |     )
225 | )]
226 | #![allow(clippy::verbose_bit_mask, clippy::cast_lossless, unexpected_cfgs)]
227 | #![cfg_attr(not(feature = "std"), no_std)]
228 | #![doc(html_root_url = "https://docs.rs/half/2.7.1")]
229 | #![doc(test(attr(deny(warnings), allow(unused))))]
230 | // Until updated to use newly stabilized `from_bits`, disable new lint warning about the transmutes
231 | #![allow(unknown_lints, unnecessary_transmutes)]
232 | #![warn(unknown_lints)]
233 | 
234 | #[cfg(feature = "alloc")]
235 | extern crate alloc;
236 | 
237 | mod bfloat;
238 | mod binary16;
239 | mod leading_zeros;
240 | #[cfg(feature = "num-traits")]
241 | mod num_traits;
242 | 
243 | #[cfg(not(target_arch = "spirv"))]
244 | pub mod slice;
245 | #[cfg(feature = "alloc")]
246 | pub mod vec;
247 | 
248 | pub use bfloat::bf16;
249 | pub use binary16::f16;
250 | 
251 | #[cfg(feature = "rand_distr")]
252 | mod rand_distr;
253 | 
254 | /// A collection of the most used items and traits in this crate for easy importing.
255 | ///
256 | /// # Examples
257 | ///
258 | /// ```rust
259 | /// use half::prelude::*;
260 | /// ```
261 | pub mod prelude {
262 |     #[doc(no_inline)]
263 |     pub use crate::{bf16, f16};
264 | 
265 |     #[cfg(not(target_arch = "spirv"))]
266 |     #[doc(no_inline)]
267 |     pub use crate::slice::{HalfBitsSliceExt, HalfFloatSliceExt};
268 | 
269 |     #[cfg(feature = "alloc")]
270 |     #[doc(no_inline)]
271 |     pub use crate::vec::{HalfBitsVecExt, HalfFloatVecExt};
272 | }
273 | 
274 | // Keep this module private to crate
275 | mod private {
276 |     use crate::{bf16, f16};
277 |     use zerocopy::{FromBytes, Immutable, IntoBytes};
278 | 
279 |     pub trait SealedHalf: FromBytes + IntoBytes + Immutable {}
280 | 
281 |     impl SealedHalf for f16 {}
282 |     impl SealedHalf for bf16 {}
283 | }
284 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
  1 | # Changelog
  2 | 
  3 | The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
  4 | and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
  5 | 
  6 | ## [Unreleased]
  7 | 
  8 | ## [2.7.1] - 2025-10-13 <a name="2.7.1"></a>
  9 | ### Fixed
 10 | - `loongarch64` `lsx` hardware intrinsics for `f16` conversions now enabled only under
 11 |   `nightly` cargo feature, fixing compile errors on stable Rust.
 12 | 
 13 | ## [2.7.0] - 2025-10-08 <a name="2.7.0"></a>
 14 | ### Changed
 15 | - `zerocopy` is now a required dependency. The optional `zerocopy` crate feature is deprecated.
 16 |   This change is to ensure better code safety and prevent potential unsound behavior.
 17 | - Git repository URL has changed due to GitHub user name change. Old URL is redirected.
 18 | 
 19 | ### Added
 20 | - New `num-traits` implementations: `Signed` for `f16` and `bf16`. By [@djsell].
 21 | - `loongarch64` `lsx` hardware intrinsic support for `f16` conversions. By [@heiher].
 22 | - Implemented `Weight` trait from `rand` crate for `f16` and `bf16` with `rand` optional cargo
 23 |   feature. By [@majian4work].
 24 | 
 25 | ### Fixed
 26 | - `min` and `max` incorrectly propagate `NaN` values when `self` is `NaN`. Fixes [#126],
 27 |   by [@mgottscho].
 28 | - Suppressed warnings from new `unnecessary_transmutes` lint.
 29 | 
 30 | ### Removed 
 31 | - `doc_auto_cfg` feature has been removed from docs.rs documentation due to removal of rust
 32 |   feature.
 33 | 
 34 | ## [2.6.0] - 2025-04-08 <a name="2.6.0"></a>
 35 | ### Changed
 36 | - Fixed some incorrect minimum supported versions of dependencies that weren't caught due to
 37 |   improper `Cargo.lock`:
 38 |   * `num-traits` 0.2.14 -> 0.2.16
 39 |   * `zerocopy` 0.8.0 -> 0.8.23
 40 |   * `arbitrary` 1.3.2 -> 1.4.1
 41 | 
 42 | ### Added
 43 | - `f16` and `bf16` now implement `Immutable` and `KnownLayout` for `zerocopy` crate. By [@usamoi].
 44 | 
 45 | ## [2.5.0] - 2025-03-13 <a name="2.5.0"></a>
 46 | ### Changed
 47 | - Updated optional dependencies to latest major versions: 
 48 |   * `zercopy` 0.6 -> 0.8
 49 |   * `rand` 0.8 -> 0.9
 50 |   * `rand_distr` 0.4 -> 0.5
 51 |   * `rkyv` 0.7 -> 0.8
 52 |   * (dev) `criterion` 0.4 -> 0.5
 53 | - Minimum supported Rust version has been changed to 1.81 due to above dependency updates.
 54 | - Minor restructuring of included license file locations to be more consistent with crates ecosystem.
 55 | 
 56 | ### Added
 57 | - Added support for `arbitrary` crate. Fixes [#110]. By [@FL33TW00D].
 58 | - New `num-traits` implementations: `FromBytes` and `ToBytes` for `f16` and `bf16`. By [@kpreid].
 59 | 
 60 | ### Fixed
 61 | - Suppressed unexpected_cfg lint warnings on newer versions of stable Rust.
 62 | - Resolved ambiguous rustdoc warnings due to new unstable `f16` primitive in compiler.
 63 | 
 64 | ## [2.4.1] - 2024-04-06 <a name="2.4.1"></a>
 65 | ### Fixed
 66 | - Missing macro import causing build failure on `no_std` + `alloc` feature set. Fixes [#107].
 67 | - Clippy warning on nightly rust.
 68 | 
 69 | ## [2.4.0] - 2024-02-25 <a name="2.4.0"></a>
 70 | ### Added
 71 | - Optional `rkyv` support. Fixes [#100], by [@comath].
 72 | - New `num-traits` implementations: `AsPrimitive<f16>` for `bf16` and `AsPrimitive<bf16>` for
 73 |   `f16`, allowing lossy conversions between the two types. By [@charles-r-earp].
 74 | - `Cargo.lock` added to vcs as is now recommended for library crates.
 75 | ### Fixed
 76 | - Remove some unit NaN conversion sign tests due to non-deterministic hardware. Fixes [#103].
 77 | - Redundant import warnings on nightly Rust.
 78 | 
 79 | ## [2.3.1] - 2023-06-24 <a name="2.3.1"></a>
 80 | ### Fixed
 81 | - Compile error on x86 (not x86_64) targets. Fixes [#93].
 82 | 
 83 | ## [2.3.0] - 2023-06-24 <a name="2.3.0"></a>
 84 | ### Added
 85 | - Support for Kani Rust Verifier. By [@cameron1024].
 86 | - Support for `rand_distr::Distribution` implementations behind `rand_distr` optional cargo
 87 |   feature. By [@coreylowman].
 88 | - Floating point formatting options in `Display` and `Debug` implementations. By [@eiz].
 89 | 
 90 | ### Changed
 91 | - **Breaking Change** Minimum supported Rust version is now 1.70.
 92 | - **Breaking Change** Minimum supported Rust version policy reverted to original policy of allowing
 93 |   minimum supported Rust version updates for minor releases instead of only major to avoid
 94 |   segmentation and allow optimizing hardware implementations without unnecessary major releases.
 95 | - Hardware intrinsics/assembly is finally available on stable Rust, including using hardware
 96 |   feature detection (`std` only), including:
 97 |     - AArch64 now uses FP16 hardware instructions for conversions and math operations when
 98 |     available.
 99 |     - x86/x86-64 now uses F16C hardware instructions for conversions (but no math operations) when
100 |     available. Fixes [#54].
101 | 
102 | ### Deprecated
103 | - `use-intrinsics` cargo feature no longer used. Hardware support will now always be used whenever
104 |   possible. A future version may output deprecation warnings if this feature is enabled.
105 | 
106 | ### Fixed
107 | - Improve code generation of `leading_zeros` functions by inlining. By [@encounter].
108 | - `Sum` implementation of `bf16` incorrectly performed product instead of sum. By [@wx-csy].
109 | - Compile failed when `serde` cargo feature enabled but `std` not enabled.
110 | - Incorrect black boxing of benchmark tests.
111 | - Rustdoc cfg display on docs.rs not getting enabled.
112 | 
113 | ## [2.2.1] - 2023-01-08 <a name="2.2.1"></a>
114 | ### Changed
115 | - Reduced unnecessary bounds checks for SIMD operations on slices. By [@Shnatsel].
116 | - Further slice conversion optimizations for slices. Resolves [#66].
117 | 
118 | ## [2.2.0] - 2022-12-30 <a name="2.2.0"></a>
119 | ### Added
120 | - Add `serialize_as_f32` and `serialize_as_string` functions when `serde` cargo feature is enabled.
121 |   They allowing customizing the serialization by using 
122 |   `#[serde(serialize_with="f16::serialize_as_f32")]` attribute in serde derive macros. Closes [#60].
123 | - Deserialize now supports deserializing from `f32`, `f64`, and string values in addition to its
124 |   previous default deserialization. Closes [#60].
125 | 
126 | ### Changed
127 | - Add `#[inline]` on fallback functions, which improved conversion execution on non-nightly rust 
128 |   by up to 50%. By [@Shnatsel].
129 | 
130 | ## [2.1.0] - 2022-07-18 <a name="2.1.0"></a>
131 | ### Added
132 | - Add support for target_arch `spirv`. Some traits and functions are unavailble on this
133 |   architecture. By [@charles-r-earp].
134 | - Add `total_cmp` method to both float types. Closes [#55], by [@joseluis].
135 | 
136 | ## [2.0.0] - 2022-06-21 <a name="2.0.0"></a>
137 | ### Changed
138 | - **Breaking Change** Minimum supported Rust version is now 1.58.
139 | - **Breaking Change** `std` is now enabled as a default cargo feature. Disable default features to
140 |   continue using `no_std` support.
141 | - Migrated to Rust Edition 2021.
142 | - Added `#[must_use]` attributes to functions, as appropriate.
143 | 
144 | ### Fixed
145 | - Fix a soundness bug with `slice::as_ptr` not correctly using mutable reference. By [@Nilstrieb].
146 | 
147 | ### Added
148 | - Added `const` conversion methods to both `f16` and `bf16`. These methods never use hardware
149 |   intrinsics, unlike the current conversion methods, which is why they are separated into new
150 |   methods. The following `const` methods were added:
151 |   - `from_f32_const`
152 |   - `from_f64_const`
153 |   - `to_f32_const`
154 |   - `to_f64_const`
155 | - Added `Neg` trait support for borrowed values `&f16` and `&bf16`. By [@pthariensflame].
156 | - Added `AsPrimitive` implementations from and to self, `usize`, and `isize`. By [@kali].
157 | 
158 | ### Removed
159 | - **Breaking Change** The deprecated `serialize` cargo feature has been removed. Use `serde` cargo
160 |   feature instead.
161 | - **Breaking Change** The deprecated `consts` module has been removed. Use associated constants on
162 |   `f16` instead.
163 | - **Breaking Change** The following deprecated functions have been removed:
164 |   - `f16::as_bits`
165 |   - `slice::from_bits_mut`
166 |   - `slice::to_bits_mut`
167 |   - `slice::from_bits`
168 |   - `slice::to_bits`
169 |   - `vec::from_bits`
170 |   - `vec::to_bits`
171 | 
172 | ## [1.8.2] - 2021-10-22 <a name="1.8.2"></a>
173 | ### Fixed
174 | - Remove cargo resolver=2 from manifest to resolve errors in older versions of Rust that still
175 |   worked with 1.8.0. Going forward, MSRV increases will be major version increases. Fixes [#48].
176 | 
177 | ## [1.8.1] - 2021-10-21 - **Yanked** <a name="1.8.1"></a>
178 | ### ***Yanked***
179 | *Not recommended due to introducing compilation error in Rust versions that worked with 1.8.0.*
180 | ### Changed
181 | - Now uses cargo resolver version 2 to prevent dev-dependencies from enabling `std` feature on
182 |   optional dependencies.
183 | 
184 | ### Fixed
185 | - Fixed compile failure when `std` feature is not enabled and `num-traits` is enabled under new
186 |   resolver. Now properly uses `libm` num-traits feature.
187 | 
188 | ## [1.8.0] - 2021-10-13 <a name="1.8.0"></a>
189 | ### Changed
190 | - Now always implements `Add`, `Div`, `Mul`, `Neg`, `Rem`, and `Sub` traits. 
191 |   Previously, these were only implemented under the `num-traits` feature. Keep in mind they still
192 |   convert to `f32` and back in the implementation.
193 | - Minimum supported Rust version is now 1.51.
194 | - Made crate package [REUSE compliant](https://reuse.software/).
195 | - Docs now use intra-doc links instead of manual (and hard to maintain) links.
196 | - The following methods on both `f16` and `bf16` are now `const`:
197 |   - `to_le_bytes`
198 |   - `to_be_bytes`
199 |   - `to_ne_bytes`
200 |   - `from_le_bytes`
201 |   - `from_be_bytes`
202 |   - `from_ne_bytes`
203 |   - `is_normal`
204 |   - `classify`
205 |   - `signum`
206 | 
207 | ### Added
208 | - Added optional implementations of `zerocopy` traits `AsBytes` and `FromBytes`
209 |   under `zerocopy` cargo feature. By [@samcrow].
210 | - Implemented the `core::iter::Product` and `core::iter::Sum` traits, with the same caveat as above
211 |   about converting to `f32` and back under the hood.
212 | - Added new associated const `NEG_ONE` to both `f16` and `bf16`.
213 | - Added the following new methods on both `f16` and `bf16`:
214 |   - `copysign`
215 |   - `max`
216 |   - `min`
217 |   - `clamp`
218 | 
219 | ### Fixed
220 | - Fixed a number of minor lints discovered due to improved CI.
221 | 
222 | ## [1.7.1] - 2021-01-17 <a name="1.7.1"></a>
223 | ### Fixed
224 | - Docs.rs now generates docs for `bytemuck` and `num-traits` optional features.
225 | 
226 | ## [1.7.0] - 2021-01-17 <a name="1.7.0"></a>
227 | ### Added
228 | - Added optional implementations of `bytemuck` traits `Zeroable` and `Pod` under `bytemuck` cargo
229 |   feature. By [@charles-r-earp].
230 | - Added optional implementations of `num-traits` traits `ToPrimitive` and `FromPrimitive` under
231 |   `num-traits` cargo feature. By [@charles-r-earp].
232 | - Added implementations of `Binary`, `Octal`, `LowerHex`, and `UpperHex` string format traits to
233 |   format raw `f16`/`bf16` bytes to string.
234 | 
235 | ### Changed
236 | - `Debug` trait implementation now formats `f16`/`bf16` as float instead of raw bytes hex. Use newly
237 |   implemented formatting traits to format in hex instead of `Debug`. Fixes [#37].
238 | 
239 | 
240 | ## [1.6.0] - 2020-05-09 <a name="1.6.0"></a>
241 | ### Added
242 | - Added `LOG2_10` and `LOG10_2` constants to both `f16` and `bf16`, which were added to `f32` and
243 |   `f64` in the standard library in 1.43.0. By [@tspiteri].
244 | - Added `to_le/be/ne_bytes` and `from_le/be/ne_bytes` to both `f16` and `bf16`, which were added to
245 |   the standard library in 1.40.0. By [@bzm3r].
246 | 
247 | ## [1.5.0] - 2020-03-03 <a name="1.5.0"></a>
248 | ### Added
249 | - Added the `alloc` feature to support the `alloc` crate in `no_std` environments. By [@zserik]. The
250 |   `vec` module is now available with either `alloc` or `std` feature.
251 | 
252 | ## [1.4.1] - 2020-02-10 <a name="1.4.1"></a>
253 | ### Fixed
254 | - Added `#[repr(transparent)]` to `f16`/`bf16` to remove undefined behavior. By [@jfrimmel].
255 | 
256 | ## [1.4.0] - 2019-10-13 <a name="1.4.0"></a>
257 | ### Added
258 | - Added a `bf16` type implementing the alternative
259 |   [`bfloat16`](https://en.wikipedia.org/wiki/Bfloat16_floating-point_format) 16-bit floating point
260 |   format. By [@tspiteri].
261 | - `f16::from_bits`, `f16::to_bits`, `f16::is_nan`, `f16::is_infinite`, `f16::is_finite`,
262 |   `f16::is_sign_positive`, and `f16::is_sign_negative` are now `const` fns.
263 | - `slice::HalfBitsSliceExt` and `slice::HalfBitsSliceExt` extension traits have been added for
264 |   performing efficient reinterpret casts and conversions of slices to and from `[f16]` and
265 |   `[bf16]`.  These traits will use hardware SIMD conversion instructions when available and the
266 |   `use-intrinsics` cargo feature is enabled.
267 | - `vec::HalfBitsVecExt` and `vec::HalfFloatVecExt` extension traits have been added for
268 |    performing efficient reinterpret casts to and from `Vec<f16>` and `Vec<bf16>`. These traits
269 |    are only available with the `std` cargo feature.
270 | - `prelude` has been added, for easy importing of most common functionality. Currently the
271 |   prelude imports `f16`, `bf16`, and the new slice and vec extension traits.
272 | - New associated constants on `f16` type to replace deprecated `consts` module.
273 | 
274 | ### Fixed
275 | - Software conversion (when not using `use-intrinsics` feature) now matches hardware rounding
276 |   by rounding to nearest, ties to even. Fixes [#24], by [@tspiteri].
277 | - NaN value conversions now behave like `f32` to `f64` conversions, retaining sign. Fixes [#23],
278 |   by [@tspiteri].
279 | 
280 | ### Changed
281 | - Minimum rustc version bumped to 1.32.
282 | - Runtime target host feature detection is now used if both `std` and `use-intrinsics` features are
283 |   enabled and the compile target host does not support required features.
284 | - When `use-intrinsics` feature is enabled, will now always compile and run without error correctly
285 |   regardless of compile target options.
286 | 
287 | ### Deprecated
288 | - `consts` module and all its constants have been deprecated; use the associated constants on `f16`
289 |   instead.
290 | - `slice::from_bits` has been deprecated; use `slice::HalfBitsSliceExt::reinterpret_cast` instead.
291 | - `slice::from_bits_mut` has been deprecated; use `slice::HalfBitsSliceExt::reinterpret_cast_mut`
292 |   instead.
293 | - `slice::to_bits` has been deprecated; use `slice::HalfFloatSliceExt::reinterpret_cast` instead.
294 | - `slice::to_bits_mut` has been deprecated; use `slice::HalfFloatSliceExt::reinterpret_cast_mut`
295 |   instead.
296 | - `vec::from_bits` has been deprecated; use `vec::HalfBitsVecExt::reinterpret_into` instead.
297 | - `vec::to_bits` has been deprecated; use `vec::HalfFloatVecExt::reinterpret_into` instead.
298 | 
299 | ## [1.3.1] - 2019-10-04 <a name="1.3.1"></a>
300 | ### Fixed
301 | - Corrected values of constants `EPSILON`, `MAX_10_EXP`, `MAX_EXP`, `MIN_10_EXP`, and `MIN_EXP`
302 |   in `consts` module, as well as setting `consts::NAN` to match value of `f32::NAN` converted to
303 |   `f16`. By [@tspiteri].
304 | 
305 | ## [1.3.0] - 2018-10-02 <a name="1.3.0"></a>
306 | ### Added
307 | - `slice::from_bits_mut` and `slice::to_bits_mut` for conversion between mutable `u16` and `f16`
308 |   slices. Fixes [#16], by [@johannesvollmer].
309 | 
310 | ## [1.2.0] - 2018-09-03 <a name="1.2.0"></a>
311 | ### Added
312 | - `slice` and optional `vec` (only included with `std` feature) modules for conversions between
313 |   `u16` and `f16` buffers. Fixes [#14], by [@johannesvollmer].
314 | - `to_bits` added to replace `as_bits`. Fixes [#12], by [@tspiteri].
315 | ### Fixed
316 | - `serde` optional dependency no longer uses its default `std` feature.
317 | ### Deprecated
318 | - `as_bits` has been deprecated; use `to_bits` instead.
319 | - `serialize` cargo feature is deprecated; use `serde` instead.
320 | 
321 | ## [1.1.2] - 2018-07-12 <a name="1.1.2"></a>
322 | ### Fixed
323 | - Fixed compilation error in 1.1.1 on rustc < 1.27, now compiles again on rustc >= 1.10. Fixes
324 |   [#11].
325 | 
326 | ## [1.1.1] - 2018-06-24 - **Yanked** <a name="1.1.1"></a>
327 | ### ***Yanked***
328 | *Not recommended due to introducing compilation error on rustc versions prior to 1.27.*
329 | ### Fixed
330 | - Fix subnormal float conversions when `use-intrinsics` is not enabled. By [@Moongoodboy-K].
331 | 
332 | ## [1.1.0] - 2018-03-17 <a name="1.1.0"></a>
333 | ### Added
334 | - Made `to_f32` and `to_f64` public. Fixes [#7], by [@PSeitz].
335 | 
336 | ## [1.0.2] - 2018-01-12 <a name="1.0.2"></a>
337 | ### Changed
338 | - Update behavior of `is_sign_positive` and `is_sign_negative` to match the IEEE754 conforming
339 |   behavior of the standard library since Rust 1.20.0. Fixes [#3], by [@tspiteri].
340 | - Small optimization on `is_nan` and `is_infinite` from [@tspiteri].
341 | ### Fixed
342 | - Fix comparisons of +0 to -0 and comparisons involving negative numbers. Fixes [#2], by
343 |   [@tspiteri].
344 | - Fix loss of sign when converting `f16` and `f32` to `f16`, and case where `f64` NaN could be
345 |   converted to `f16` infinity instead of NaN. Fixes [#5], by [@tspiteri].
346 | 
347 | ## [1.0.1] - 2017-08-30 <a name="1.0.1"></a>
348 | ### Added
349 | - More README documentation.
350 | - Badges and categories in crate metadata.
351 | ### Changed
352 | - `serde` dependency updated to 1.0 stable.
353 | - Writing changelog manually.
354 | 
355 | ## [1.0.0] - 2017-02-03 <a name="1.0.0"></a>
356 | ### Added
357 | - Update to `serde` 0.9 and stable Rust 1.15 for `serialize` feature.
358 | 
359 | ## [0.1.1] - 2017-01-08 <a name="0.1.1"></a>
360 | ### Added
361 | - Add `serde` support under new `serialize` feature.
362 | ### Changed
363 | - Use `no_std` for crate by default.
364 | 
365 | ## 0.1.0 - 2016-03-17 <a name="0.1.0"></a>
366 | ### Added
367 | - Initial release of `f16` type.
368 | 
369 | [#2]: https://github.com/starkat99/half-rs/issues/2
370 | [#3]: https://github.com/starkat99/half-rs/issues/3
371 | [#5]: https://github.com/starkat99/half-rs/issues/5
372 | [#7]: https://github.com/starkat99/half-rs/issues/7
373 | [#11]: https://github.com/starkat99/half-rs/issues/11
374 | [#12]: https://github.com/starkat99/half-rs/issues/12
375 | [#14]: https://github.com/starkat99/half-rs/issues/14
376 | [#16]: https://github.com/starkat99/half-rs/issues/16
377 | [#23]: https://github.com/starkat99/half-rs/issues/23
378 | [#24]: https://github.com/starkat99/half-rs/issues/24
379 | [#37]: https://github.com/starkat99/half-rs/issues/37
380 | [#48]: https://github.com/starkat99/half-rs/issues/48
381 | [#55]: https://github.com/starkat99/half-rs/issues/55
382 | [#60]: https://github.com/starkat99/half-rs/issues/60
383 | [#66]: https://github.com/starkat99/half-rs/issues/66
384 | [#54]: https://github.com/starkat99/half-rs/issues/54
385 | [#93]: https://github.com/starkat99/half-rs/issues/54
386 | [#100]: https://github.com/starkat99/half-rs/issues/100
387 | [#103]: https://github.com/starkat99/half-rs/issues/103
388 | [#107]: https://github.com/starkat99/half-rs/issues/107
389 | [#110]: https://github.com/starkat99/half-rs/issues/110
390 | [#126]: https://github.com/starkat99/half-rs/issues/126
391 | 
392 | [@tspiteri]: https://github.com/tspiteri
393 | [@PSeitz]: https://github.com/PSeitz
394 | [@Moongoodboy-K]: https://github.com/Moongoodboy-K
395 | [@johannesvollmer]: https://github.com/johannesvollmer
396 | [@jfrimmel]: https://github.com/jfrimmel
397 | [@zserik]: https://github.com/zserik
398 | [@bzm3r]: https://github.com/bzm3r
399 | [@charles-r-earp]: https://github.com/charles-r-earp
400 | [@samcrow]: https://github.com/samcrow
401 | [@pthariensflame]: https://github.com/pthariensflame
402 | [@kali]: https://github.com/kali
403 | [@Nilstrieb]: https://github.com/Nilstrieb
404 | [@joseluis]: https://github.com/joseluis
405 | [@Shnatsel]: https://github.com/Shnatsel
406 | [@cameron1024]: https://github.com/cameron1024
407 | [@encounter]: https://github.com/encounter
408 | [@coreylowman]: https://github.com/coreylowman
409 | [@wx-csy]: https://github.com/wx-csy
410 | [@eiz]: https://github.com/eiz
411 | [@comath]: https://github.com/comath
412 | [@FL33TW00D]: https://github.com/FL33TW00D
413 | [@kpreid]: https://github.com/kpreid
414 | [@usamoi]: https://github.com/usamoi
415 | [@mgottscho]: https://github.com/mgottscho
416 | [@djsell]: https://github.com/djsell
417 | [@heiher]: https://github.com/heiher
418 | [@majian4work]: https://github.com/majian4work
419 | 
420 | 
421 | [Unreleased]: https://github.com/starkat99/half-rs/compare/v2.7.1...HEAD
422 | [2.7.1]: https://github.com/starkat99/half-rs/compare/v2.7.0...v2.7.1
423 | [2.7.0]: https://github.com/starkat99/half-rs/compare/v2.6.0...v2.7.0
424 | [2.6.0]: https://github.com/starkat99/half-rs/compare/v2.5.0...v2.6.0
425 | [2.5.0]: https://github.com/starkat99/half-rs/compare/v2.4.1...v2.5.0
426 | [2.4.1]: https://github.com/starkat99/half-rs/compare/v2.4.0...v2.4.1
427 | [2.4.0]: https://github.com/starkat99/half-rs/compare/v2.3.1...v2.4.0
428 | [2.3.1]: https://github.com/starkat99/half-rs/compare/v2.3.0...v2.3.1
429 | [2.3.0]: https://github.com/starkat99/half-rs/compare/v2.2.1...v2.3.0
430 | [2.2.1]: https://github.com/starkat99/half-rs/compare/v2.2.0...v2.2.1
431 | [2.2.0]: https://github.com/starkat99/half-rs/compare/v2.1.0...v2.2.0
432 | [2.1.0]: https://github.com/starkat99/half-rs/compare/v2.0.0...v2.1.0
433 | [2.0.0]: https://github.com/starkat99/half-rs/compare/v1.8.2...v2.0.0
434 | [1.8.2]: https://github.com/starkat99/half-rs/compare/v1.8.1...v1.8.2
435 | [1.8.1]: https://github.com/starkat99/half-rs/compare/v1.8.0...v1.8.1
436 | [1.8.0]: https://github.com/starkat99/half-rs/compare/v1.7.1...v1.8.0
437 | [1.7.1]: https://github.com/starkat99/half-rs/compare/v1.7.0...v1.7.1
438 | [1.7.0]: https://github.com/starkat99/half-rs/compare/v1.6.0...v1.7.0
439 | [1.6.0]: https://github.com/starkat99/half-rs/compare/v1.5.0...v1.6.0
440 | [1.5.0]: https://github.com/starkat99/half-rs/compare/v1.4.1...v1.5.0
441 | [1.4.1]: https://github.com/starkat99/half-rs/compare/v1.4.0...v1.4.1
442 | [1.4.0]: https://github.com/starkat99/half-rs/compare/v1.3.1...v1.4.0
443 | [1.3.1]: https://github.com/starkat99/half-rs/compare/v1.3.0...v1.3.1
444 | [1.3.0]: https://github.com/starkat99/half-rs/compare/v1.2.0...v1.3.0
445 | [1.2.0]: https://github.com/starkat99/half-rs/compare/v1.1.2...v1.2.0
446 | [1.1.2]: https://github.com/starkat99/half-rs/compare/v1.1.1...v1.1.2
447 | [1.1.1]: https://github.com/starkat99/half-rs/compare/v1.1.0...v1.1.1
448 | [1.1.0]: https://github.com/starkat99/half-rs/compare/v1.0.2...v1.1.0
449 | [1.0.2]: https://github.com/starkat99/half-rs/compare/v1.0.1...v1.0.2
450 | [1.0.1]: https://github.com/starkat99/half-rs/compare/v1.0.0...v1.0.1
451 | [1.0.0]: https://github.com/starkat99/half-rs/compare/v0.1.1...v1.0.0
452 | [0.1.1]: https://github.com/starkat99/half-rs/compare/v0.1.0...v0.1.1
453 | 


--------------------------------------------------------------------------------
/src/slice.rs:
--------------------------------------------------------------------------------
  1 | //! Contains utility functions and traits to convert between slices of [`u16`] bits and [`struct@f16`] or
  2 | //! [`struct@bf16`] numbers.
  3 | //!
  4 | //! The utility [`HalfBitsSliceExt`] sealed extension trait is implemented for `[u16]` slices,
  5 | //! while the utility [`HalfFloatSliceExt`] sealed extension trait is implemented for both `[f16]`
  6 | //! and `[bf16]` slices. These traits provide efficient conversions and reinterpret casting of
  7 | //! larger buffers of floating point values, and are automatically included in the
  8 | //! [`prelude`][crate::prelude] module.
  9 | 
 10 | use crate::{bf16, binary16::arch, f16};
 11 | #[cfg(feature = "alloc")]
 12 | #[allow(unused_imports)]
 13 | use alloc::{vec, vec::Vec};
 14 | use zerocopy::{transmute_mut, transmute_ref};
 15 | 
 16 | /// Extensions to `[f16]` and `[bf16]` slices to support conversion and reinterpret operations.
 17 | ///
 18 | /// This trait is sealed and cannot be implemented outside of this crate.
 19 | pub trait HalfFloatSliceExt: private::SealedHalfFloatSlice {
 20 |     /// Reinterprets a slice of [`struct@f16`] or [`struct@bf16`] numbers as a slice of [`u16`] bits.
 21 |     ///
 22 |     /// This is a zero-copy operation. The reinterpreted slice has the same lifetime and memory
 23 |     /// location as `self`.
 24 |     ///
 25 |     /// # Examples
 26 |     ///
 27 |     /// ```rust
 28 |     /// # use half::prelude::*;
 29 |     /// let float_buffer = [f16::from_f32(1.), f16::from_f32(2.), f16::from_f32(3.)];
 30 |     /// let int_buffer = float_buffer.reinterpret_cast();
 31 |     ///
 32 |     /// assert_eq!(int_buffer, [float_buffer[0].to_bits(), float_buffer[1].to_bits(), float_buffer[2].to_bits()]);
 33 |     /// ```
 34 |     #[must_use]
 35 |     fn reinterpret_cast(&self) -> &[u16];
 36 | 
 37 |     /// Reinterprets a mutable slice of [`struct@f16`] or [`struct@bf16`] numbers as a mutable slice of [`u16`].
 38 |     /// bits
 39 |     ///
 40 |     /// This is a zero-copy operation. The transmuted slice has the same lifetime as the original,
 41 |     /// which prevents mutating `self` as long as the returned `&mut [u16]` is borrowed.
 42 |     ///
 43 |     /// # Examples
 44 |     ///
 45 |     /// ```rust
 46 |     /// # use half::prelude::*;
 47 |     /// let mut float_buffer = [f16::from_f32(1.), f16::from_f32(2.), f16::from_f32(3.)];
 48 |     ///
 49 |     /// {
 50 |     ///     let int_buffer = float_buffer.reinterpret_cast_mut();
 51 |     ///
 52 |     ///     assert_eq!(int_buffer, [f16::from_f32(1.).to_bits(), f16::from_f32(2.).to_bits(), f16::from_f32(3.).to_bits()]);
 53 |     ///
 54 |     ///     // Mutating the u16 slice will mutating the original
 55 |     ///     int_buffer[0] = 0;
 56 |     /// }
 57 |     ///
 58 |     /// // Note that we need to drop int_buffer before using float_buffer again or we will get a borrow error.
 59 |     /// assert_eq!(float_buffer, [f16::from_f32(0.), f16::from_f32(2.), f16::from_f32(3.)]);
 60 |     /// ```
 61 |     #[must_use]
 62 |     fn reinterpret_cast_mut(&mut self) -> &mut [u16];
 63 | 
 64 |     /// Converts all of the elements of a `[f32]` slice into [`struct@f16`] or [`struct@bf16`] values in `self`.
 65 |     ///
 66 |     /// The length of `src` must be the same as `self`.
 67 |     ///
 68 |     /// The conversion operation is vectorized over the slice, meaning the conversion may be more
 69 |     /// efficient than converting individual elements on some hardware that supports SIMD
 70 |     /// conversions. See [crate documentation](crate) for more information on hardware conversion
 71 |     /// support.
 72 |     ///
 73 |     /// # Panics
 74 |     ///
 75 |     /// This function will panic if the two slices have different lengths.
 76 |     ///
 77 |     /// # Examples
 78 |     /// ```rust
 79 |     /// # use half::prelude::*;
 80 |     /// // Initialize an empty buffer
 81 |     /// let mut buffer = [0u16; 4];
 82 |     /// let buffer = buffer.reinterpret_cast_mut::<f16>();
 83 |     ///
 84 |     /// let float_values = [1., 2., 3., 4.];
 85 |     ///
 86 |     /// // Now convert
 87 |     /// buffer.convert_from_f32_slice(&float_values);
 88 |     ///
 89 |     /// assert_eq!(buffer, [f16::from_f32(1.), f16::from_f32(2.), f16::from_f32(3.), f16::from_f32(4.)]);
 90 |     /// ```
 91 |     fn convert_from_f32_slice(&mut self, src: &[f32]);
 92 | 
 93 |     /// Converts all of the elements of a `[f64]` slice into [`struct@f16`] or [`struct@bf16`] values in `self`.
 94 |     ///
 95 |     /// The length of `src` must be the same as `self`.
 96 |     ///
 97 |     /// The conversion operation is vectorized over the slice, meaning the conversion may be more
 98 |     /// efficient than converting individual elements on some hardware that supports SIMD
 99 |     /// conversions. See [crate documentation](crate) for more information on hardware conversion
100 |     /// support.
101 |     ///
102 |     /// # Panics
103 |     ///
104 |     /// This function will panic if the two slices have different lengths.
105 |     ///
106 |     /// # Examples
107 |     /// ```rust
108 |     /// # use half::prelude::*;
109 |     /// // Initialize an empty buffer
110 |     /// let mut buffer = [0u16; 4];
111 |     /// let buffer = buffer.reinterpret_cast_mut::<f16>();
112 |     ///
113 |     /// let float_values = [1., 2., 3., 4.];
114 |     ///
115 |     /// // Now convert
116 |     /// buffer.convert_from_f64_slice(&float_values);
117 |     ///
118 |     /// assert_eq!(buffer, [f16::from_f64(1.), f16::from_f64(2.), f16::from_f64(3.), f16::from_f64(4.)]);
119 |     /// ```
120 |     fn convert_from_f64_slice(&mut self, src: &[f64]);
121 | 
122 |     /// Converts all of the [`struct@f16`] or [`struct@bf16`] elements of `self` into [`f32`] values in `dst`.
123 |     ///
124 |     /// The length of `src` must be the same as `self`.
125 |     ///
126 |     /// The conversion operation is vectorized over the slice, meaning the conversion may be more
127 |     /// efficient than converting individual elements on some hardware that supports SIMD
128 |     /// conversions. See [crate documentation](crate) for more information on hardware conversion
129 |     /// support.
130 |     ///
131 |     /// # Panics
132 |     ///
133 |     /// This function will panic if the two slices have different lengths.
134 |     ///
135 |     /// # Examples
136 |     /// ```rust
137 |     /// # use half::prelude::*;
138 |     /// // Initialize an empty buffer
139 |     /// let mut buffer = [0f32; 4];
140 |     ///
141 |     /// let half_values = [f16::from_f32(1.), f16::from_f32(2.), f16::from_f32(3.), f16::from_f32(4.)];
142 |     ///
143 |     /// // Now convert
144 |     /// half_values.convert_to_f32_slice(&mut buffer);
145 |     ///
146 |     /// assert_eq!(buffer, [1., 2., 3., 4.]);
147 |     /// ```
148 |     fn convert_to_f32_slice(&self, dst: &mut [f32]);
149 | 
150 |     /// Converts all of the [`struct@f16`] or [`struct@bf16`] elements of `self` into [`f64`] values in `dst`.
151 |     ///
152 |     /// The length of `src` must be the same as `self`.
153 |     ///
154 |     /// The conversion operation is vectorized over the slice, meaning the conversion may be more
155 |     /// efficient than converting individual elements on some hardware that supports SIMD
156 |     /// conversions. See [crate documentation](crate) for more information on hardware conversion
157 |     /// support.
158 |     ///
159 |     /// # Panics
160 |     ///
161 |     /// This function will panic if the two slices have different lengths.
162 |     ///
163 |     /// # Examples
164 |     /// ```rust
165 |     /// # use half::prelude::*;
166 |     /// // Initialize an empty buffer
167 |     /// let mut buffer = [0f64; 4];
168 |     ///
169 |     /// let half_values = [f16::from_f64(1.), f16::from_f64(2.), f16::from_f64(3.), f16::from_f64(4.)];
170 |     ///
171 |     /// // Now convert
172 |     /// half_values.convert_to_f64_slice(&mut buffer);
173 |     ///
174 |     /// assert_eq!(buffer, [1., 2., 3., 4.]);
175 |     /// ```
176 |     fn convert_to_f64_slice(&self, dst: &mut [f64]);
177 | 
178 |     // Because trait is sealed, we can get away with different interfaces between features.
179 | 
180 |     /// Converts all of the [`struct@f16`] or [`struct@bf16`] elements of `self` into [`f32`] values in a new
181 |     /// vector
182 |     ///
183 |     /// The conversion operation is vectorized over the slice, meaning the conversion may be more
184 |     /// efficient than converting individual elements on some hardware that supports SIMD
185 |     /// conversions. See [crate documentation](crate) for more information on hardware conversion
186 |     /// support.
187 |     ///
188 |     /// This method is only available with the `std` or `alloc` feature.
189 |     ///
190 |     /// # Examples
191 |     /// ```rust
192 |     /// # use half::prelude::*;
193 |     /// let half_values = [f16::from_f32(1.), f16::from_f32(2.), f16::from_f32(3.), f16::from_f32(4.)];
194 |     /// let vec = half_values.to_f32_vec();
195 |     ///
196 |     /// assert_eq!(vec, vec![1., 2., 3., 4.]);
197 |     /// ```
198 |     #[cfg(any(feature = "alloc", feature = "std"))]
199 |     #[must_use]
200 |     fn to_f32_vec(&self) -> Vec<f32>;
201 | 
202 |     /// Converts all of the [`struct@f16`] or [`struct@bf16`] elements of `self` into [`f64`] values in a new
203 |     /// vector.
204 |     ///
205 |     /// The conversion operation is vectorized over the slice, meaning the conversion may be more
206 |     /// efficient than converting individual elements on some hardware that supports SIMD
207 |     /// conversions. See [crate documentation](crate) for more information on hardware conversion
208 |     /// support.
209 |     ///
210 |     /// This method is only available with the `std` or `alloc` feature.
211 |     ///
212 |     /// # Examples
213 |     /// ```rust
214 |     /// # use half::prelude::*;
215 |     /// let half_values = [f16::from_f64(1.), f16::from_f64(2.), f16::from_f64(3.), f16::from_f64(4.)];
216 |     /// let vec = half_values.to_f64_vec();
217 |     ///
218 |     /// assert_eq!(vec, vec![1., 2., 3., 4.]);
219 |     /// ```
220 |     #[cfg(feature = "alloc")]
221 |     #[must_use]
222 |     fn to_f64_vec(&self) -> Vec<f64>;
223 | }
224 | 
225 | /// Extensions to `[u16]` slices to support reinterpret operations.
226 | ///
227 | /// This trait is sealed and cannot be implemented outside of this crate.
228 | pub trait HalfBitsSliceExt: private::SealedHalfBitsSlice {
229 |     /// Reinterprets a slice of [`u16`] bits as a slice of [`struct@f16`] or [`struct@bf16`] numbers.
230 |     ///
231 |     /// `H` is the type to cast to, and must be either the [`struct@f16`] or [`struct@bf16`] type.
232 |     ///
233 |     /// This is a zero-copy operation. The reinterpreted slice has the same lifetime and memory
234 |     /// location as `self`.
235 |     ///
236 |     /// # Examples
237 |     ///
238 |     /// ```rust
239 |     /// # use half::prelude::*;
240 |     /// let int_buffer = [f16::from_f32(1.).to_bits(), f16::from_f32(2.).to_bits(), f16::from_f32(3.).to_bits()];
241 |     /// let float_buffer: &[f16] = int_buffer.reinterpret_cast();
242 |     ///
243 |     /// assert_eq!(float_buffer, [f16::from_f32(1.), f16::from_f32(2.), f16::from_f32(3.)]);
244 |     ///
245 |     /// // You may have to specify the cast type directly if the compiler can't infer the type.
246 |     /// // The following is also valid in Rust.
247 |     /// let typed_buffer = int_buffer.reinterpret_cast::<f16>();
248 |     /// ```
249 |     #[must_use]
250 |     fn reinterpret_cast<H>(&self) -> &[H]
251 |     where
252 |         H: crate::private::SealedHalf;
253 | 
254 |     /// Reinterprets a mutable slice of [`u16`] bits as a mutable slice of [`struct@f16`] or [`struct@bf16`]
255 |     /// numbers.
256 |     ///
257 |     /// `H` is the type to cast to, and must be either the [`struct@f16`] or [`struct@bf16`] type.
258 |     ///
259 |     /// This is a zero-copy operation. The transmuted slice has the same lifetime as the original,
260 |     /// which prevents mutating `self` as long as the returned `&mut [f16]` is borrowed.
261 |     ///
262 |     /// # Examples
263 |     ///
264 |     /// ```rust
265 |     /// # use half::prelude::*;
266 |     /// let mut int_buffer = [f16::from_f32(1.).to_bits(), f16::from_f32(2.).to_bits(), f16::from_f32(3.).to_bits()];
267 |     ///
268 |     /// {
269 |     ///     let float_buffer: &mut [f16] = int_buffer.reinterpret_cast_mut();
270 |     ///
271 |     ///     assert_eq!(float_buffer, [f16::from_f32(1.), f16::from_f32(2.), f16::from_f32(3.)]);
272 |     ///
273 |     ///     // Mutating the f16 slice will mutating the original
274 |     ///     float_buffer[0] = f16::from_f32(0.);
275 |     /// }
276 |     ///
277 |     /// // Note that we need to drop float_buffer before using int_buffer again or we will get a borrow error.
278 |     /// assert_eq!(int_buffer, [f16::from_f32(0.).to_bits(), f16::from_f32(2.).to_bits(), f16::from_f32(3.).to_bits()]);
279 |     ///
280 |     /// // You may have to specify the cast type directly if the compiler can't infer the type.
281 |     /// // The following is also valid in Rust.
282 |     /// let typed_buffer = int_buffer.reinterpret_cast_mut::<f16>();
283 |     /// ```
284 |     #[must_use]
285 |     fn reinterpret_cast_mut<H>(&mut self) -> &mut [H]
286 |     where
287 |         H: crate::private::SealedHalf;
288 | }
289 | 
290 | mod private {
291 |     use crate::{bf16, f16};
292 | 
293 |     pub trait SealedHalfFloatSlice {}
294 |     impl SealedHalfFloatSlice for [f16] {}
295 |     impl SealedHalfFloatSlice for [bf16] {}
296 | 
297 |     pub trait SealedHalfBitsSlice {}
298 |     impl SealedHalfBitsSlice for [u16] {}
299 | }
300 | 
301 | impl HalfFloatSliceExt for [f16] {
302 |     #[inline]
303 |     fn reinterpret_cast(&self) -> &[u16] {
304 |         transmute_ref!(self)
305 |     }
306 | 
307 |     #[inline]
308 |     fn reinterpret_cast_mut(&mut self) -> &mut [u16] {
309 |         transmute_mut!(self)
310 |     }
311 | 
312 |     #[inline]
313 |     fn convert_from_f32_slice(&mut self, src: &[f32]) {
314 |         assert_eq!(
315 |             self.len(),
316 |             src.len(),
317 |             "destination and source slices have different lengths"
318 |         );
319 | 
320 |         arch::f32_to_f16_slice(src, self.reinterpret_cast_mut())
321 |     }
322 | 
323 |     #[inline]
324 |     fn convert_from_f64_slice(&mut self, src: &[f64]) {
325 |         assert_eq!(
326 |             self.len(),
327 |             src.len(),
328 |             "destination and source slices have different lengths"
329 |         );
330 | 
331 |         arch::f64_to_f16_slice(src, self.reinterpret_cast_mut())
332 |     }
333 | 
334 |     #[inline]
335 |     fn convert_to_f32_slice(&self, dst: &mut [f32]) {
336 |         assert_eq!(
337 |             self.len(),
338 |             dst.len(),
339 |             "destination and source slices have different lengths"
340 |         );
341 | 
342 |         arch::f16_to_f32_slice(self.reinterpret_cast(), dst)
343 |     }
344 | 
345 |     #[inline]
346 |     fn convert_to_f64_slice(&self, dst: &mut [f64]) {
347 |         assert_eq!(
348 |             self.len(),
349 |             dst.len(),
350 |             "destination and source slices have different lengths"
351 |         );
352 | 
353 |         arch::f16_to_f64_slice(self.reinterpret_cast(), dst)
354 |     }
355 | 
356 |     #[cfg(any(feature = "alloc", feature = "std"))]
357 |     #[inline]
358 |     #[allow(clippy::uninit_vec)]
359 |     fn to_f32_vec(&self) -> Vec<f32> {
360 |         let mut vec = vec![0f32; self.len()];
361 |         self.convert_to_f32_slice(&mut vec);
362 |         vec
363 |     }
364 | 
365 |     #[cfg(any(feature = "alloc", feature = "std"))]
366 |     #[inline]
367 |     #[allow(clippy::uninit_vec)]
368 |     fn to_f64_vec(&self) -> Vec<f64> {
369 |         let mut vec = vec![0f64; self.len()];
370 |         self.convert_to_f64_slice(&mut vec);
371 |         vec
372 |     }
373 | }
374 | 
375 | impl HalfFloatSliceExt for [bf16] {
376 |     #[inline]
377 |     fn reinterpret_cast(&self) -> &[u16] {
378 |         transmute_ref!(self)
379 |     }
380 | 
381 |     #[inline]
382 |     fn reinterpret_cast_mut(&mut self) -> &mut [u16] {
383 |         transmute_mut!(self)
384 |     }
385 | 
386 |     #[inline]
387 |     fn convert_from_f32_slice(&mut self, src: &[f32]) {
388 |         assert_eq!(
389 |             self.len(),
390 |             src.len(),
391 |             "destination and source slices have different lengths"
392 |         );
393 | 
394 |         // Just use regular loop here until there's any bf16 SIMD support.
395 |         for (i, f) in src.iter().enumerate() {
396 |             self[i] = bf16::from_f32(*f);
397 |         }
398 |     }
399 | 
400 |     #[inline]
401 |     fn convert_from_f64_slice(&mut self, src: &[f64]) {
402 |         assert_eq!(
403 |             self.len(),
404 |             src.len(),
405 |             "destination and source slices have different lengths"
406 |         );
407 | 
408 |         // Just use regular loop here until there's any bf16 SIMD support.
409 |         for (i, f) in src.iter().enumerate() {
410 |             self[i] = bf16::from_f64(*f);
411 |         }
412 |     }
413 | 
414 |     #[inline]
415 |     fn convert_to_f32_slice(&self, dst: &mut [f32]) {
416 |         assert_eq!(
417 |             self.len(),
418 |             dst.len(),
419 |             "destination and source slices have different lengths"
420 |         );
421 | 
422 |         // Just use regular loop here until there's any bf16 SIMD support.
423 |         for (i, f) in self.iter().enumerate() {
424 |             dst[i] = f.to_f32();
425 |         }
426 |     }
427 | 
428 |     #[inline]
429 |     fn convert_to_f64_slice(&self, dst: &mut [f64]) {
430 |         assert_eq!(
431 |             self.len(),
432 |             dst.len(),
433 |             "destination and source slices have different lengths"
434 |         );
435 | 
436 |         // Just use regular loop here until there's any bf16 SIMD support.
437 |         for (i, f) in self.iter().enumerate() {
438 |             dst[i] = f.to_f64();
439 |         }
440 |     }
441 | 
442 |     #[cfg(any(feature = "alloc", feature = "std"))]
443 |     #[inline]
444 |     #[allow(clippy::uninit_vec)]
445 |     fn to_f32_vec(&self) -> Vec<f32> {
446 |         let mut vec = vec![0f32; self.len()];
447 |         self.convert_to_f32_slice(&mut vec);
448 |         vec
449 |     }
450 | 
451 |     #[cfg(any(feature = "alloc", feature = "std"))]
452 |     #[inline]
453 |     #[allow(clippy::uninit_vec)]
454 |     fn to_f64_vec(&self) -> Vec<f64> {
455 |         let mut vec = vec![0f64; self.len()];
456 |         self.convert_to_f64_slice(&mut vec);
457 |         vec
458 |     }
459 | }
460 | 
461 | impl HalfBitsSliceExt for [u16] {
462 |     // Since we sealed all the traits involved, these are safe.
463 |     #[inline]
464 |     fn reinterpret_cast<H>(&self) -> &[H]
465 |     where
466 |         H: crate::private::SealedHalf,
467 |     {
468 |         transmute_ref!(self)
469 |     }
470 | 
471 |     #[inline]
472 |     fn reinterpret_cast_mut<H>(&mut self) -> &mut [H]
473 |     where
474 |         H: crate::private::SealedHalf,
475 |     {
476 |         transmute_mut!(self)
477 |     }
478 | }
479 | 
480 | #[allow(clippy::float_cmp)]
481 | #[cfg(test)]
482 | mod test {
483 |     use super::{HalfBitsSliceExt, HalfFloatSliceExt};
484 |     use crate::{bf16, f16};
485 | 
486 |     #[test]
487 |     fn test_slice_conversions_f16() {
488 |         let bits = &[
489 |             f16::E.to_bits(),
490 |             f16::PI.to_bits(),
491 |             f16::EPSILON.to_bits(),
492 |             f16::FRAC_1_SQRT_2.to_bits(),
493 |         ];
494 |         let numbers = &[f16::E, f16::PI, f16::EPSILON, f16::FRAC_1_SQRT_2];
495 | 
496 |         // Convert from bits to numbers
497 |         let from_bits = bits.reinterpret_cast::<f16>();
498 |         assert_eq!(from_bits, numbers);
499 | 
500 |         // Convert from numbers back to bits
501 |         let to_bits = from_bits.reinterpret_cast();
502 |         assert_eq!(to_bits, bits);
503 |     }
504 | 
505 |     #[test]
506 |     fn test_mutablility_f16() {
507 |         let mut bits_array = [f16::PI.to_bits()];
508 |         let bits = &mut bits_array[..];
509 | 
510 |         {
511 |             // would not compile without these braces
512 |             let numbers = bits.reinterpret_cast_mut();
513 |             numbers[0] = f16::E;
514 |         }
515 | 
516 |         assert_eq!(bits, &[f16::E.to_bits()]);
517 | 
518 |         bits[0] = f16::LN_2.to_bits();
519 |         assert_eq!(bits, &[f16::LN_2.to_bits()]);
520 |     }
521 | 
522 |     #[test]
523 |     fn test_slice_conversions_bf16() {
524 |         let bits = &[
525 |             bf16::E.to_bits(),
526 |             bf16::PI.to_bits(),
527 |             bf16::EPSILON.to_bits(),
528 |             bf16::FRAC_1_SQRT_2.to_bits(),
529 |         ];
530 |         let numbers = &[bf16::E, bf16::PI, bf16::EPSILON, bf16::FRAC_1_SQRT_2];
531 | 
532 |         // Convert from bits to numbers
533 |         let from_bits = bits.reinterpret_cast::<bf16>();
534 |         assert_eq!(from_bits, numbers);
535 | 
536 |         // Convert from numbers back to bits
537 |         let to_bits = from_bits.reinterpret_cast();
538 |         assert_eq!(to_bits, bits);
539 |     }
540 | 
541 |     #[test]
542 |     fn test_mutablility_bf16() {
543 |         let mut bits_array = [bf16::PI.to_bits()];
544 |         let bits = &mut bits_array[..];
545 | 
546 |         {
547 |             // would not compile without these braces
548 |             let numbers = bits.reinterpret_cast_mut();
549 |             numbers[0] = bf16::E;
550 |         }
551 | 
552 |         assert_eq!(bits, &[bf16::E.to_bits()]);
553 | 
554 |         bits[0] = bf16::LN_2.to_bits();
555 |         assert_eq!(bits, &[bf16::LN_2.to_bits()]);
556 |     }
557 | 
558 |     #[test]
559 |     fn slice_convert_f16_f32() {
560 |         // Exact chunks
561 |         let vf32 = [1., 2., 3., 4., 5., 6., 7., 8.];
562 |         let vf16 = [
563 |             f16::from_f32(1.),
564 |             f16::from_f32(2.),
565 |             f16::from_f32(3.),
566 |             f16::from_f32(4.),
567 |             f16::from_f32(5.),
568 |             f16::from_f32(6.),
569 |             f16::from_f32(7.),
570 |             f16::from_f32(8.),
571 |         ];
572 |         let mut buf32 = vf32;
573 |         let mut buf16 = vf16;
574 | 
575 |         vf16.convert_to_f32_slice(&mut buf32);
576 |         assert_eq!(&vf32, &buf32);
577 | 
578 |         buf16.convert_from_f32_slice(&vf32);
579 |         assert_eq!(&vf16, &buf16);
580 | 
581 |         // Partial with chunks
582 |         let vf32 = [1., 2., 3., 4., 5., 6., 7., 8., 9.];
583 |         let vf16 = [
584 |             f16::from_f32(1.),
585 |             f16::from_f32(2.),
586 |             f16::from_f32(3.),
587 |             f16::from_f32(4.),
588 |             f16::from_f32(5.),
589 |             f16::from_f32(6.),
590 |             f16::from_f32(7.),
591 |             f16::from_f32(8.),
592 |             f16::from_f32(9.),
593 |         ];
594 |         let mut buf32 = vf32;
595 |         let mut buf16 = vf16;
596 | 
597 |         vf16.convert_to_f32_slice(&mut buf32);
598 |         assert_eq!(&vf32, &buf32);
599 | 
600 |         buf16.convert_from_f32_slice(&vf32);
601 |         assert_eq!(&vf16, &buf16);
602 | 
603 |         // Partial with chunks
604 |         let vf32 = [1., 2.];
605 |         let vf16 = [f16::from_f32(1.), f16::from_f32(2.)];
606 |         let mut buf32 = vf32;
607 |         let mut buf16 = vf16;
608 | 
609 |         vf16.convert_to_f32_slice(&mut buf32);
610 |         assert_eq!(&vf32, &buf32);
611 | 
612 |         buf16.convert_from_f32_slice(&vf32);
613 |         assert_eq!(&vf16, &buf16);
614 |     }
615 | 
616 |     #[test]
617 |     fn slice_convert_bf16_f32() {
618 |         // Exact chunks
619 |         let vf32 = [1., 2., 3., 4., 5., 6., 7., 8.];
620 |         let vf16 = [
621 |             bf16::from_f32(1.),
622 |             bf16::from_f32(2.),
623 |             bf16::from_f32(3.),
624 |             bf16::from_f32(4.),
625 |             bf16::from_f32(5.),
626 |             bf16::from_f32(6.),
627 |             bf16::from_f32(7.),
628 |             bf16::from_f32(8.),
629 |         ];
630 |         let mut buf32 = vf32;
631 |         let mut buf16 = vf16;
632 | 
633 |         vf16.convert_to_f32_slice(&mut buf32);
634 |         assert_eq!(&vf32, &buf32);
635 | 
636 |         buf16.convert_from_f32_slice(&vf32);
637 |         assert_eq!(&vf16, &buf16);
638 | 
639 |         // Partial with chunks
640 |         let vf32 = [1., 2., 3., 4., 5., 6., 7., 8., 9.];
641 |         let vf16 = [
642 |             bf16::from_f32(1.),
643 |             bf16::from_f32(2.),
644 |             bf16::from_f32(3.),
645 |             bf16::from_f32(4.),
646 |             bf16::from_f32(5.),
647 |             bf16::from_f32(6.),
648 |             bf16::from_f32(7.),
649 |             bf16::from_f32(8.),
650 |             bf16::from_f32(9.),
651 |         ];
652 |         let mut buf32 = vf32;
653 |         let mut buf16 = vf16;
654 | 
655 |         vf16.convert_to_f32_slice(&mut buf32);
656 |         assert_eq!(&vf32, &buf32);
657 | 
658 |         buf16.convert_from_f32_slice(&vf32);
659 |         assert_eq!(&vf16, &buf16);
660 | 
661 |         // Partial with chunks
662 |         let vf32 = [1., 2.];
663 |         let vf16 = [bf16::from_f32(1.), bf16::from_f32(2.)];
664 |         let mut buf32 = vf32;
665 |         let mut buf16 = vf16;
666 | 
667 |         vf16.convert_to_f32_slice(&mut buf32);
668 |         assert_eq!(&vf32, &buf32);
669 | 
670 |         buf16.convert_from_f32_slice(&vf32);
671 |         assert_eq!(&vf16, &buf16);
672 |     }
673 | 
674 |     #[test]
675 |     fn slice_convert_f16_f64() {
676 |         // Exact chunks
677 |         let vf64 = [1., 2., 3., 4., 5., 6., 7., 8.];
678 |         let vf16 = [
679 |             f16::from_f64(1.),
680 |             f16::from_f64(2.),
681 |             f16::from_f64(3.),
682 |             f16::from_f64(4.),
683 |             f16::from_f64(5.),
684 |             f16::from_f64(6.),
685 |             f16::from_f64(7.),
686 |             f16::from_f64(8.),
687 |         ];
688 |         let mut buf64 = vf64;
689 |         let mut buf16 = vf16;
690 | 
691 |         vf16.convert_to_f64_slice(&mut buf64);
692 |         assert_eq!(&vf64, &buf64);
693 | 
694 |         buf16.convert_from_f64_slice(&vf64);
695 |         assert_eq!(&vf16, &buf16);
696 | 
697 |         // Partial with chunks
698 |         let vf64 = [1., 2., 3., 4., 5., 6., 7., 8., 9.];
699 |         let vf16 = [
700 |             f16::from_f64(1.),
701 |             f16::from_f64(2.),
702 |             f16::from_f64(3.),
703 |             f16::from_f64(4.),
704 |             f16::from_f64(5.),
705 |             f16::from_f64(6.),
706 |             f16::from_f64(7.),
707 |             f16::from_f64(8.),
708 |             f16::from_f64(9.),
709 |         ];
710 |         let mut buf64 = vf64;
711 |         let mut buf16 = vf16;
712 | 
713 |         vf16.convert_to_f64_slice(&mut buf64);
714 |         assert_eq!(&vf64, &buf64);
715 | 
716 |         buf16.convert_from_f64_slice(&vf64);
717 |         assert_eq!(&vf16, &buf16);
718 | 
719 |         // Partial with chunks
720 |         let vf64 = [1., 2.];
721 |         let vf16 = [f16::from_f64(1.), f16::from_f64(2.)];
722 |         let mut buf64 = vf64;
723 |         let mut buf16 = vf16;
724 | 
725 |         vf16.convert_to_f64_slice(&mut buf64);
726 |         assert_eq!(&vf64, &buf64);
727 | 
728 |         buf16.convert_from_f64_slice(&vf64);
729 |         assert_eq!(&vf16, &buf16);
730 |     }
731 | 
732 |     #[test]
733 |     fn slice_convert_bf16_f64() {
734 |         // Exact chunks
735 |         let vf64 = [1., 2., 3., 4., 5., 6., 7., 8.];
736 |         let vf16 = [
737 |             bf16::from_f64(1.),
738 |             bf16::from_f64(2.),
739 |             bf16::from_f64(3.),
740 |             bf16::from_f64(4.),
741 |             bf16::from_f64(5.),
742 |             bf16::from_f64(6.),
743 |             bf16::from_f64(7.),
744 |             bf16::from_f64(8.),
745 |         ];
746 |         let mut buf64 = vf64;
747 |         let mut buf16 = vf16;
748 | 
749 |         vf16.convert_to_f64_slice(&mut buf64);
750 |         assert_eq!(&vf64, &buf64);
751 | 
752 |         buf16.convert_from_f64_slice(&vf64);
753 |         assert_eq!(&vf16, &buf16);
754 | 
755 |         // Partial with chunks
756 |         let vf64 = [1., 2., 3., 4., 5., 6., 7., 8., 9.];
757 |         let vf16 = [
758 |             bf16::from_f64(1.),
759 |             bf16::from_f64(2.),
760 |             bf16::from_f64(3.),
761 |             bf16::from_f64(4.),
762 |             bf16::from_f64(5.),
763 |             bf16::from_f64(6.),
764 |             bf16::from_f64(7.),
765 |             bf16::from_f64(8.),
766 |             bf16::from_f64(9.),
767 |         ];
768 |         let mut buf64 = vf64;
769 |         let mut buf16 = vf16;
770 | 
771 |         vf16.convert_to_f64_slice(&mut buf64);
772 |         assert_eq!(&vf64, &buf64);
773 | 
774 |         buf16.convert_from_f64_slice(&vf64);
775 |         assert_eq!(&vf16, &buf16);
776 | 
777 |         // Partial with chunks
778 |         let vf64 = [1., 2.];
779 |         let vf16 = [bf16::from_f64(1.), bf16::from_f64(2.)];
780 |         let mut buf64 = vf64;
781 |         let mut buf16 = vf16;
782 | 
783 |         vf16.convert_to_f64_slice(&mut buf64);
784 |         assert_eq!(&vf64, &buf64);
785 | 
786 |         buf16.convert_from_f64_slice(&vf64);
787 |         assert_eq!(&vf16, &buf16);
788 |     }
789 | 
790 |     #[test]
791 |     #[should_panic]
792 |     fn convert_from_f32_slice_len_mismatch_panics() {
793 |         let mut slice1 = [f16::ZERO; 3];
794 |         let slice2 = [0f32; 4];
795 |         slice1.convert_from_f32_slice(&slice2);
796 |     }
797 | 
798 |     #[test]
799 |     #[should_panic]
800 |     fn convert_from_f64_slice_len_mismatch_panics() {
801 |         let mut slice1 = [f16::ZERO; 3];
802 |         let slice2 = [0f64; 4];
803 |         slice1.convert_from_f64_slice(&slice2);
804 |     }
805 | 
806 |     #[test]
807 |     #[should_panic]
808 |     fn convert_to_f32_slice_len_mismatch_panics() {
809 |         let slice1 = [f16::ZERO; 3];
810 |         let mut slice2 = [0f32; 4];
811 |         slice1.convert_to_f32_slice(&mut slice2);
812 |     }
813 | 
814 |     #[test]
815 |     #[should_panic]
816 |     fn convert_to_f64_slice_len_mismatch_panics() {
817 |         let slice1 = [f16::ZERO; 3];
818 |         let mut slice2 = [0f64; 4];
819 |         slice1.convert_to_f64_slice(&mut slice2);
820 |     }
821 | }
822 | 


--------------------------------------------------------------------------------
/Cargo.lock:
--------------------------------------------------------------------------------
   1 | # This file is automatically @generated by Cargo.
   2 | # It is not intended for manual editing.
   3 | version = 3
   4 | 
   5 | [[package]]
   6 | name = "aho-corasick"
   7 | version = "1.1.3"
   8 | source = "registry+https://github.com/rust-lang/crates.io-index"
   9 | checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916"
  10 | dependencies = [
  11 |  "memchr",
  12 | ]
  13 | 
  14 | [[package]]
  15 | name = "anes"
  16 | version = "0.1.6"
  17 | source = "registry+https://github.com/rust-lang/crates.io-index"
  18 | checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299"
  19 | 
  20 | [[package]]
  21 | name = "anstyle"
  22 | version = "1.0.10"
  23 | source = "registry+https://github.com/rust-lang/crates.io-index"
  24 | checksum = "55cc3b69f167a1ef2e161439aa98aed94e6028e5f9a59be9a6ffb47aef1651f9"
  25 | 
  26 | [[package]]
  27 | name = "arbitrary"
  28 | version = "1.4.1"
  29 | source = "registry+https://github.com/rust-lang/crates.io-index"
  30 | checksum = "dde20b3d026af13f561bdd0f15edf01fc734f0dafcedbaf42bba506a9517f223"
  31 | dependencies = [
  32 |  "derive_arbitrary",
  33 | ]
  34 | 
  35 | [[package]]
  36 | name = "autocfg"
  37 | version = "1.4.0"
  38 | source = "registry+https://github.com/rust-lang/crates.io-index"
  39 | checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26"
  40 | 
  41 | [[package]]
  42 | name = "bitflags"
  43 | version = "2.9.0"
  44 | source = "registry+https://github.com/rust-lang/crates.io-index"
  45 | checksum = "5c8214115b7bf84099f1309324e63141d4c5d7cc26862f97a0a857dbefe165bd"
  46 | 
  47 | [[package]]
  48 | name = "bumpalo"
  49 | version = "3.17.0"
  50 | source = "registry+https://github.com/rust-lang/crates.io-index"
  51 | checksum = "1628fb46dfa0b37568d12e5edd512553eccf6a22a78e8bde00bb4aed84d5bdbf"
  52 | 
  53 | [[package]]
  54 | name = "bytecheck"
  55 | version = "0.8.1"
  56 | source = "registry+https://github.com/rust-lang/crates.io-index"
  57 | checksum = "50690fb3370fb9fe3550372746084c46f2ac8c9685c583d2be10eefd89d3d1a3"
  58 | dependencies = [
  59 |  "bytecheck_derive",
  60 |  "ptr_meta",
  61 |  "rancor",
  62 |  "simdutf8",
  63 | ]
  64 | 
  65 | [[package]]
  66 | name = "bytecheck_derive"
  67 | version = "0.8.1"
  68 | source = "registry+https://github.com/rust-lang/crates.io-index"
  69 | checksum = "efb7846e0cb180355c2dec69e721edafa36919850f1a9f52ffba4ebc0393cb71"
  70 | dependencies = [
  71 |  "proc-macro2",
  72 |  "quote",
  73 |  "syn 2.0.100",
  74 | ]
  75 | 
  76 | [[package]]
  77 | name = "bytemuck"
  78 | version = "1.4.1"
  79 | source = "registry+https://github.com/rust-lang/crates.io-index"
  80 | checksum = "41aa2ec95ca3b5c54cf73c91acf06d24f4495d5f1b1c12506ae3483d646177ac"
  81 | dependencies = [
  82 |  "bytemuck_derive",
  83 | ]
  84 | 
  85 | [[package]]
  86 | name = "bytemuck_derive"
  87 | version = "1.8.1"
  88 | source = "registry+https://github.com/rust-lang/crates.io-index"
  89 | checksum = "3fa76293b4f7bb636ab88fd78228235b5248b4d05cc589aed610f954af5d7c7a"
  90 | dependencies = [
  91 |  "proc-macro2",
  92 |  "quote",
  93 |  "syn 2.0.100",
  94 | ]
  95 | 
  96 | [[package]]
  97 | name = "bytes"
  98 | version = "1.10.1"
  99 | source = "registry+https://github.com/rust-lang/crates.io-index"
 100 | checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a"
 101 | 
 102 | [[package]]
 103 | name = "cast"
 104 | version = "0.3.0"
 105 | source = "registry+https://github.com/rust-lang/crates.io-index"
 106 | checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
 107 | 
 108 | [[package]]
 109 | name = "cfg-if"
 110 | version = "1.0.0"
 111 | source = "registry+https://github.com/rust-lang/crates.io-index"
 112 | checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
 113 | 
 114 | [[package]]
 115 | name = "ciborium"
 116 | version = "0.2.2"
 117 | source = "registry+https://github.com/rust-lang/crates.io-index"
 118 | checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e"
 119 | dependencies = [
 120 |  "ciborium-io",
 121 |  "ciborium-ll",
 122 |  "serde",
 123 | ]
 124 | 
 125 | [[package]]
 126 | name = "ciborium-io"
 127 | version = "0.2.2"
 128 | source = "registry+https://github.com/rust-lang/crates.io-index"
 129 | checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757"
 130 | 
 131 | [[package]]
 132 | name = "ciborium-ll"
 133 | version = "0.2.2"
 134 | source = "registry+https://github.com/rust-lang/crates.io-index"
 135 | checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9"
 136 | dependencies = [
 137 |  "ciborium-io",
 138 |  "half 2.4.1",
 139 | ]
 140 | 
 141 | [[package]]
 142 | name = "clap"
 143 | version = "4.5.32"
 144 | source = "registry+https://github.com/rust-lang/crates.io-index"
 145 | checksum = "6088f3ae8c3608d19260cd7445411865a485688711b78b5be70d78cd96136f83"
 146 | dependencies = [
 147 |  "clap_builder",
 148 | ]
 149 | 
 150 | [[package]]
 151 | name = "clap_builder"
 152 | version = "4.5.32"
 153 | source = "registry+https://github.com/rust-lang/crates.io-index"
 154 | checksum = "22a7ef7f676155edfb82daa97f99441f3ebf4a58d5e32f295a56259f1b6facc8"
 155 | dependencies = [
 156 |  "anstyle",
 157 |  "clap_lex",
 158 | ]
 159 | 
 160 | [[package]]
 161 | name = "clap_lex"
 162 | version = "0.7.4"
 163 | source = "registry+https://github.com/rust-lang/crates.io-index"
 164 | checksum = "f46ad14479a25103f283c0f10005961cf086d8dc42205bb44c46ac563475dca6"
 165 | 
 166 | [[package]]
 167 | name = "criterion"
 168 | version = "0.5.1"
 169 | source = "registry+https://github.com/rust-lang/crates.io-index"
 170 | checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f"
 171 | dependencies = [
 172 |  "anes",
 173 |  "cast",
 174 |  "ciborium",
 175 |  "clap",
 176 |  "criterion-plot",
 177 |  "is-terminal",
 178 |  "itertools",
 179 |  "num-traits",
 180 |  "once_cell",
 181 |  "oorandom",
 182 |  "plotters",
 183 |  "rayon",
 184 |  "regex",
 185 |  "serde",
 186 |  "serde_derive",
 187 |  "serde_json",
 188 |  "tinytemplate",
 189 |  "walkdir",
 190 | ]
 191 | 
 192 | [[package]]
 193 | name = "criterion-plot"
 194 | version = "0.5.0"
 195 | source = "registry+https://github.com/rust-lang/crates.io-index"
 196 | checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1"
 197 | dependencies = [
 198 |  "cast",
 199 |  "itertools",
 200 | ]
 201 | 
 202 | [[package]]
 203 | name = "crossbeam-deque"
 204 | version = "0.8.6"
 205 | source = "registry+https://github.com/rust-lang/crates.io-index"
 206 | checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51"
 207 | dependencies = [
 208 |  "crossbeam-epoch",
 209 |  "crossbeam-utils",
 210 | ]
 211 | 
 212 | [[package]]
 213 | name = "crossbeam-epoch"
 214 | version = "0.9.18"
 215 | source = "registry+https://github.com/rust-lang/crates.io-index"
 216 | checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
 217 | dependencies = [
 218 |  "crossbeam-utils",
 219 | ]
 220 | 
 221 | [[package]]
 222 | name = "crossbeam-utils"
 223 | version = "0.8.21"
 224 | source = "registry+https://github.com/rust-lang/crates.io-index"
 225 | checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
 226 | 
 227 | [[package]]
 228 | name = "crunchy"
 229 | version = "0.2.3"
 230 | source = "registry+https://github.com/rust-lang/crates.io-index"
 231 | checksum = "43da5946c66ffcc7745f48db692ffbb10a83bfe0afd96235c5c2a4fb23994929"
 232 | 
 233 | [[package]]
 234 | name = "derive_arbitrary"
 235 | version = "1.4.1"
 236 | source = "registry+https://github.com/rust-lang/crates.io-index"
 237 | checksum = "30542c1ad912e0e3d22a1935c290e12e8a29d704a420177a31faad4a601a0800"
 238 | dependencies = [
 239 |  "proc-macro2",
 240 |  "quote",
 241 |  "syn 2.0.100",
 242 | ]
 243 | 
 244 | [[package]]
 245 | name = "either"
 246 | version = "1.15.0"
 247 | source = "registry+https://github.com/rust-lang/crates.io-index"
 248 | checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
 249 | 
 250 | [[package]]
 251 | name = "env_logger"
 252 | version = "0.8.4"
 253 | source = "registry+https://github.com/rust-lang/crates.io-index"
 254 | checksum = "a19187fea3ac7e84da7dacf48de0c45d63c6a76f9490dae389aead16c243fce3"
 255 | dependencies = [
 256 |  "log",
 257 |  "regex",
 258 | ]
 259 | 
 260 | [[package]]
 261 | name = "equivalent"
 262 | version = "1.0.2"
 263 | source = "registry+https://github.com/rust-lang/crates.io-index"
 264 | checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
 265 | 
 266 | [[package]]
 267 | name = "getrandom"
 268 | version = "0.2.15"
 269 | source = "registry+https://github.com/rust-lang/crates.io-index"
 270 | checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7"
 271 | dependencies = [
 272 |  "cfg-if",
 273 |  "libc",
 274 |  "wasi 0.11.0+wasi-snapshot-preview1",
 275 | ]
 276 | 
 277 | [[package]]
 278 | name = "getrandom"
 279 | version = "0.3.1"
 280 | source = "registry+https://github.com/rust-lang/crates.io-index"
 281 | checksum = "43a49c392881ce6d5c3b8cb70f98717b7c07aabbdff06687b9030dbfbe2725f8"
 282 | dependencies = [
 283 |  "cfg-if",
 284 |  "libc",
 285 |  "wasi 0.13.3+wasi-0.2.2",
 286 |  "windows-targets",
 287 | ]
 288 | 
 289 | [[package]]
 290 | name = "half"
 291 | version = "2.4.1"
 292 | source = "registry+https://github.com/rust-lang/crates.io-index"
 293 | checksum = "6dd08c532ae367adf81c312a4580bc67f1d0fe8bc9c460520283f4c0ff277888"
 294 | dependencies = [
 295 |  "cfg-if",
 296 |  "crunchy",
 297 | ]
 298 | 
 299 | [[package]]
 300 | name = "half"
 301 | version = "2.7.1"
 302 | dependencies = [
 303 |  "arbitrary",
 304 |  "bytemuck",
 305 |  "cfg-if",
 306 |  "criterion",
 307 |  "crunchy",
 308 |  "num-traits",
 309 |  "quickcheck",
 310 |  "quickcheck_macros",
 311 |  "rand 0.9.0",
 312 |  "rand_distr",
 313 |  "rkyv",
 314 |  "serde",
 315 |  "zerocopy",
 316 | ]
 317 | 
 318 | [[package]]
 319 | name = "hashbrown"
 320 | version = "0.14.5"
 321 | source = "registry+https://github.com/rust-lang/crates.io-index"
 322 | checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1"
 323 | 
 324 | [[package]]
 325 | name = "hashbrown"
 326 | version = "0.15.2"
 327 | source = "registry+https://github.com/rust-lang/crates.io-index"
 328 | checksum = "bf151400ff0baff5465007dd2f3e717f3fe502074ca563069ce3a6629d07b289"
 329 | 
 330 | [[package]]
 331 | name = "hermit-abi"
 332 | version = "0.5.0"
 333 | source = "registry+https://github.com/rust-lang/crates.io-index"
 334 | checksum = "fbd780fe5cc30f81464441920d82ac8740e2e46b29a6fad543ddd075229ce37e"
 335 | 
 336 | [[package]]
 337 | name = "indexmap"
 338 | version = "2.8.0"
 339 | source = "registry+https://github.com/rust-lang/crates.io-index"
 340 | checksum = "3954d50fe15b02142bf25d3b8bdadb634ec3948f103d04ffe3031bc8fe9d7058"
 341 | dependencies = [
 342 |  "equivalent",
 343 |  "hashbrown 0.15.2",
 344 | ]
 345 | 
 346 | [[package]]
 347 | name = "is-terminal"
 348 | version = "0.4.16"
 349 | source = "registry+https://github.com/rust-lang/crates.io-index"
 350 | checksum = "e04d7f318608d35d4b61ddd75cbdaee86b023ebe2bd5a66ee0915f0bf93095a9"
 351 | dependencies = [
 352 |  "hermit-abi",
 353 |  "libc",
 354 |  "windows-sys",
 355 | ]
 356 | 
 357 | [[package]]
 358 | name = "itertools"
 359 | version = "0.10.5"
 360 | source = "registry+https://github.com/rust-lang/crates.io-index"
 361 | checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473"
 362 | dependencies = [
 363 |  "either",
 364 | ]
 365 | 
 366 | [[package]]
 367 | name = "itoa"
 368 | version = "1.0.15"
 369 | source = "registry+https://github.com/rust-lang/crates.io-index"
 370 | checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c"
 371 | 
 372 | [[package]]
 373 | name = "js-sys"
 374 | version = "0.3.77"
 375 | source = "registry+https://github.com/rust-lang/crates.io-index"
 376 | checksum = "1cfaf33c695fc6e08064efbc1f72ec937429614f25eef83af942d0e227c3a28f"
 377 | dependencies = [
 378 |  "once_cell",
 379 |  "wasm-bindgen",
 380 | ]
 381 | 
 382 | [[package]]
 383 | name = "libc"
 384 | version = "0.2.171"
 385 | source = "registry+https://github.com/rust-lang/crates.io-index"
 386 | checksum = "c19937216e9d3aa9956d9bb8dfc0b0c8beb6058fc4f7a4dc4d850edf86a237d6"
 387 | 
 388 | [[package]]
 389 | name = "libm"
 390 | version = "0.2.11"
 391 | source = "registry+https://github.com/rust-lang/crates.io-index"
 392 | checksum = "8355be11b20d696c8f18f6cc018c4e372165b1fa8126cef092399c9951984ffa"
 393 | 
 394 | [[package]]
 395 | name = "log"
 396 | version = "0.4.26"
 397 | source = "registry+https://github.com/rust-lang/crates.io-index"
 398 | checksum = "30bde2b3dc3671ae49d8e2e9f044c7c005836e7a023ee57cffa25ab82764bb9e"
 399 | 
 400 | [[package]]
 401 | name = "memchr"
 402 | version = "2.7.4"
 403 | source = "registry+https://github.com/rust-lang/crates.io-index"
 404 | checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3"
 405 | 
 406 | [[package]]
 407 | name = "munge"
 408 | version = "0.4.3"
 409 | source = "registry+https://github.com/rust-lang/crates.io-index"
 410 | checksum = "a0091202c98cf06da46c279fdf50cccb6b1c43b4521abdf6a27b4c7e71d5d9d7"
 411 | dependencies = [
 412 |  "munge_macro",
 413 | ]
 414 | 
 415 | [[package]]
 416 | name = "munge_macro"
 417 | version = "0.4.3"
 418 | source = "registry+https://github.com/rust-lang/crates.io-index"
 419 | checksum = "734799cf91479720b2f970c61a22850940dd91e27d4f02b1c6fc792778df2459"
 420 | dependencies = [
 421 |  "proc-macro2",
 422 |  "quote",
 423 |  "syn 2.0.100",
 424 | ]
 425 | 
 426 | [[package]]
 427 | name = "num-traits"
 428 | version = "0.2.16"
 429 | source = "registry+https://github.com/rust-lang/crates.io-index"
 430 | checksum = "f30b0abd723be7e2ffca1272140fac1a2f084c77ec3e123c192b66af1ee9e6c2"
 431 | dependencies = [
 432 |  "autocfg",
 433 |  "libm",
 434 | ]
 435 | 
 436 | [[package]]
 437 | name = "once_cell"
 438 | version = "1.21.1"
 439 | source = "registry+https://github.com/rust-lang/crates.io-index"
 440 | checksum = "d75b0bedcc4fe52caa0e03d9f1151a323e4aa5e2d78ba3580400cd3c9e2bc4bc"
 441 | 
 442 | [[package]]
 443 | name = "oorandom"
 444 | version = "11.1.5"
 445 | source = "registry+https://github.com/rust-lang/crates.io-index"
 446 | checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e"
 447 | 
 448 | [[package]]
 449 | name = "plotters"
 450 | version = "0.3.7"
 451 | source = "registry+https://github.com/rust-lang/crates.io-index"
 452 | checksum = "5aeb6f403d7a4911efb1e33402027fc44f29b5bf6def3effcc22d7bb75f2b747"
 453 | dependencies = [
 454 |  "num-traits",
 455 |  "plotters-backend",
 456 |  "plotters-svg",
 457 |  "wasm-bindgen",
 458 |  "web-sys",
 459 | ]
 460 | 
 461 | [[package]]
 462 | name = "plotters-backend"
 463 | version = "0.3.7"
 464 | source = "registry+https://github.com/rust-lang/crates.io-index"
 465 | checksum = "df42e13c12958a16b3f7f4386b9ab1f3e7933914ecea48da7139435263a4172a"
 466 | 
 467 | [[package]]
 468 | name = "plotters-svg"
 469 | version = "0.3.7"
 470 | source = "registry+https://github.com/rust-lang/crates.io-index"
 471 | checksum = "51bae2ac328883f7acdfea3d66a7c35751187f870bc81f94563733a154d7a670"
 472 | dependencies = [
 473 |  "plotters-backend",
 474 | ]
 475 | 
 476 | [[package]]
 477 | name = "ppv-lite86"
 478 | version = "0.2.21"
 479 | source = "registry+https://github.com/rust-lang/crates.io-index"
 480 | checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9"
 481 | dependencies = [
 482 |  "zerocopy",
 483 | ]
 484 | 
 485 | [[package]]
 486 | name = "proc-macro2"
 487 | version = "1.0.94"
 488 | source = "registry+https://github.com/rust-lang/crates.io-index"
 489 | checksum = "a31971752e70b8b2686d7e46ec17fb38dad4051d94024c88df49b667caea9c84"
 490 | dependencies = [
 491 |  "unicode-ident",
 492 | ]
 493 | 
 494 | [[package]]
 495 | name = "ptr_meta"
 496 | version = "0.3.0"
 497 | source = "registry+https://github.com/rust-lang/crates.io-index"
 498 | checksum = "fe9e76f66d3f9606f44e45598d155cb13ecf09f4a28199e48daf8c8fc937ea90"
 499 | dependencies = [
 500 |  "ptr_meta_derive",
 501 | ]
 502 | 
 503 | [[package]]
 504 | name = "ptr_meta_derive"
 505 | version = "0.3.0"
 506 | source = "registry+https://github.com/rust-lang/crates.io-index"
 507 | checksum = "ca414edb151b4c8d125c12566ab0d74dc9cdba36fb80eb7b848c15f495fd32d1"
 508 | dependencies = [
 509 |  "proc-macro2",
 510 |  "quote",
 511 |  "syn 2.0.100",
 512 | ]
 513 | 
 514 | [[package]]
 515 | name = "quickcheck"
 516 | version = "1.0.3"
 517 | source = "registry+https://github.com/rust-lang/crates.io-index"
 518 | checksum = "588f6378e4dd99458b60ec275b4477add41ce4fa9f64dcba6f15adccb19b50d6"
 519 | dependencies = [
 520 |  "env_logger",
 521 |  "log",
 522 |  "rand 0.8.5",
 523 | ]
 524 | 
 525 | [[package]]
 526 | name = "quickcheck_macros"
 527 | version = "1.0.0"
 528 | source = "registry+https://github.com/rust-lang/crates.io-index"
 529 | checksum = "b22a693222d716a9587786f37ac3f6b4faedb5b80c23914e7303ff5a1d8016e9"
 530 | dependencies = [
 531 |  "proc-macro2",
 532 |  "quote",
 533 |  "syn 1.0.109",
 534 | ]
 535 | 
 536 | [[package]]
 537 | name = "quote"
 538 | version = "1.0.40"
 539 | source = "registry+https://github.com/rust-lang/crates.io-index"
 540 | checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d"
 541 | dependencies = [
 542 |  "proc-macro2",
 543 | ]
 544 | 
 545 | [[package]]
 546 | name = "rancor"
 547 | version = "0.1.0"
 548 | source = "registry+https://github.com/rust-lang/crates.io-index"
 549 | checksum = "caf5f7161924b9d1cea0e4cabc97c372cea92b5f927fc13c6bca67157a0ad947"
 550 | dependencies = [
 551 |  "ptr_meta",
 552 | ]
 553 | 
 554 | [[package]]
 555 | name = "rand"
 556 | version = "0.8.5"
 557 | source = "registry+https://github.com/rust-lang/crates.io-index"
 558 | checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
 559 | dependencies = [
 560 |  "rand_core 0.6.4",
 561 | ]
 562 | 
 563 | [[package]]
 564 | name = "rand"
 565 | version = "0.9.0"
 566 | source = "registry+https://github.com/rust-lang/crates.io-index"
 567 | checksum = "3779b94aeb87e8bd4e834cee3650289ee9e0d5677f976ecdb6d219e5f4f6cd94"
 568 | dependencies = [
 569 |  "rand_chacha",
 570 |  "rand_core 0.9.3",
 571 |  "zerocopy",
 572 | ]
 573 | 
 574 | [[package]]
 575 | name = "rand_chacha"
 576 | version = "0.9.0"
 577 | source = "registry+https://github.com/rust-lang/crates.io-index"
 578 | checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb"
 579 | dependencies = [
 580 |  "ppv-lite86",
 581 |  "rand_core 0.9.3",
 582 | ]
 583 | 
 584 | [[package]]
 585 | name = "rand_core"
 586 | version = "0.6.4"
 587 | source = "registry+https://github.com/rust-lang/crates.io-index"
 588 | checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
 589 | dependencies = [
 590 |  "getrandom 0.2.15",
 591 | ]
 592 | 
 593 | [[package]]
 594 | name = "rand_core"
 595 | version = "0.9.3"
 596 | source = "registry+https://github.com/rust-lang/crates.io-index"
 597 | checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38"
 598 | dependencies = [
 599 |  "getrandom 0.3.1",
 600 | ]
 601 | 
 602 | [[package]]
 603 | name = "rand_distr"
 604 | version = "0.5.0"
 605 | source = "registry+https://github.com/rust-lang/crates.io-index"
 606 | checksum = "ddc3b5afe4c995c44540865b8ca5c52e6a59fa362da96c5d30886930ddc8da1c"
 607 | dependencies = [
 608 |  "num-traits",
 609 |  "rand 0.9.0",
 610 | ]
 611 | 
 612 | [[package]]
 613 | name = "rayon"
 614 | version = "1.10.0"
 615 | source = "registry+https://github.com/rust-lang/crates.io-index"
 616 | checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa"
 617 | dependencies = [
 618 |  "either",
 619 |  "rayon-core",
 620 | ]
 621 | 
 622 | [[package]]
 623 | name = "rayon-core"
 624 | version = "1.12.1"
 625 | source = "registry+https://github.com/rust-lang/crates.io-index"
 626 | checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2"
 627 | dependencies = [
 628 |  "crossbeam-deque",
 629 |  "crossbeam-utils",
 630 | ]
 631 | 
 632 | [[package]]
 633 | name = "regex"
 634 | version = "1.11.1"
 635 | source = "registry+https://github.com/rust-lang/crates.io-index"
 636 | checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191"
 637 | dependencies = [
 638 |  "aho-corasick",
 639 |  "memchr",
 640 |  "regex-automata",
 641 |  "regex-syntax",
 642 | ]
 643 | 
 644 | [[package]]
 645 | name = "regex-automata"
 646 | version = "0.4.9"
 647 | source = "registry+https://github.com/rust-lang/crates.io-index"
 648 | checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908"
 649 | dependencies = [
 650 |  "aho-corasick",
 651 |  "memchr",
 652 |  "regex-syntax",
 653 | ]
 654 | 
 655 | [[package]]
 656 | name = "regex-syntax"
 657 | version = "0.8.5"
 658 | source = "registry+https://github.com/rust-lang/crates.io-index"
 659 | checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c"
 660 | 
 661 | [[package]]
 662 | name = "rend"
 663 | version = "0.5.2"
 664 | source = "registry+https://github.com/rust-lang/crates.io-index"
 665 | checksum = "a35e8a6bf28cd121053a66aa2e6a2e3eaffad4a60012179f0e864aa5ffeff215"
 666 | dependencies = [
 667 |  "bytecheck",
 668 | ]
 669 | 
 670 | [[package]]
 671 | name = "rkyv"
 672 | version = "0.8.0"
 673 | source = "registry+https://github.com/rust-lang/crates.io-index"
 674 | checksum = "6d7fa2297190bd08087add407c3dedf28eb3be1d75955ffbd3bc312834325760"
 675 | dependencies = [
 676 |  "bytecheck",
 677 |  "bytes",
 678 |  "hashbrown 0.14.5",
 679 |  "indexmap",
 680 |  "munge",
 681 |  "ptr_meta",
 682 |  "rancor",
 683 |  "rend",
 684 |  "rkyv_derive",
 685 |  "tinyvec",
 686 |  "uuid",
 687 | ]
 688 | 
 689 | [[package]]
 690 | name = "rkyv_derive"
 691 | version = "0.8.0"
 692 | source = "registry+https://github.com/rust-lang/crates.io-index"
 693 | checksum = "4aad510db4f88722adf0e4586ff0dedfca4af57b17c075b2420bac1db446d22c"
 694 | dependencies = [
 695 |  "proc-macro2",
 696 |  "quote",
 697 |  "syn 2.0.100",
 698 | ]
 699 | 
 700 | [[package]]
 701 | name = "rustversion"
 702 | version = "1.0.20"
 703 | source = "registry+https://github.com/rust-lang/crates.io-index"
 704 | checksum = "eded382c5f5f786b989652c49544c4877d9f015cc22e145a5ea8ea66c2921cd2"
 705 | 
 706 | [[package]]
 707 | name = "ryu"
 708 | version = "1.0.20"
 709 | source = "registry+https://github.com/rust-lang/crates.io-index"
 710 | checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f"
 711 | 
 712 | [[package]]
 713 | name = "same-file"
 714 | version = "1.0.6"
 715 | source = "registry+https://github.com/rust-lang/crates.io-index"
 716 | checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502"
 717 | dependencies = [
 718 |  "winapi-util",
 719 | ]
 720 | 
 721 | [[package]]
 722 | name = "serde"
 723 | version = "1.0.219"
 724 | source = "registry+https://github.com/rust-lang/crates.io-index"
 725 | checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6"
 726 | dependencies = [
 727 |  "serde_derive",
 728 | ]
 729 | 
 730 | [[package]]
 731 | name = "serde_derive"
 732 | version = "1.0.219"
 733 | source = "registry+https://github.com/rust-lang/crates.io-index"
 734 | checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00"
 735 | dependencies = [
 736 |  "proc-macro2",
 737 |  "quote",
 738 |  "syn 2.0.100",
 739 | ]
 740 | 
 741 | [[package]]
 742 | name = "serde_json"
 743 | version = "1.0.140"
 744 | source = "registry+https://github.com/rust-lang/crates.io-index"
 745 | checksum = "20068b6e96dc6c9bd23e01df8827e6c7e1f2fddd43c21810382803c136b99373"
 746 | dependencies = [
 747 |  "itoa",
 748 |  "memchr",
 749 |  "ryu",
 750 |  "serde",
 751 | ]
 752 | 
 753 | [[package]]
 754 | name = "simdutf8"
 755 | version = "0.1.5"
 756 | source = "registry+https://github.com/rust-lang/crates.io-index"
 757 | checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e"
 758 | 
 759 | [[package]]
 760 | name = "syn"
 761 | version = "1.0.109"
 762 | source = "registry+https://github.com/rust-lang/crates.io-index"
 763 | checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237"
 764 | dependencies = [
 765 |  "proc-macro2",
 766 |  "quote",
 767 |  "unicode-ident",
 768 | ]
 769 | 
 770 | [[package]]
 771 | name = "syn"
 772 | version = "2.0.100"
 773 | source = "registry+https://github.com/rust-lang/crates.io-index"
 774 | checksum = "b09a44accad81e1ba1cd74a32461ba89dee89095ba17b32f5d03683b1b1fc2a0"
 775 | dependencies = [
 776 |  "proc-macro2",
 777 |  "quote",
 778 |  "unicode-ident",
 779 | ]
 780 | 
 781 | [[package]]
 782 | name = "tinytemplate"
 783 | version = "1.2.1"
 784 | source = "registry+https://github.com/rust-lang/crates.io-index"
 785 | checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc"
 786 | dependencies = [
 787 |  "serde",
 788 |  "serde_json",
 789 | ]
 790 | 
 791 | [[package]]
 792 | name = "tinyvec"
 793 | version = "1.9.0"
 794 | source = "registry+https://github.com/rust-lang/crates.io-index"
 795 | checksum = "09b3661f17e86524eccd4371ab0429194e0d7c008abb45f7a7495b1719463c71"
 796 | dependencies = [
 797 |  "tinyvec_macros",
 798 | ]
 799 | 
 800 | [[package]]
 801 | name = "tinyvec_macros"
 802 | version = "0.1.1"
 803 | source = "registry+https://github.com/rust-lang/crates.io-index"
 804 | checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
 805 | 
 806 | [[package]]
 807 | name = "unicode-ident"
 808 | version = "1.0.18"
 809 | source = "registry+https://github.com/rust-lang/crates.io-index"
 810 | checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512"
 811 | 
 812 | [[package]]
 813 | name = "uuid"
 814 | version = "1.15.1"
 815 | source = "registry+https://github.com/rust-lang/crates.io-index"
 816 | checksum = "e0f540e3240398cce6128b64ba83fdbdd86129c16a3aa1a3a252efd66eb3d587"
 817 | 
 818 | [[package]]
 819 | name = "walkdir"
 820 | version = "2.5.0"
 821 | source = "registry+https://github.com/rust-lang/crates.io-index"
 822 | checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b"
 823 | dependencies = [
 824 |  "same-file",
 825 |  "winapi-util",
 826 | ]
 827 | 
 828 | [[package]]
 829 | name = "wasi"
 830 | version = "0.11.0+wasi-snapshot-preview1"
 831 | source = "registry+https://github.com/rust-lang/crates.io-index"
 832 | checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
 833 | 
 834 | [[package]]
 835 | name = "wasi"
 836 | version = "0.13.3+wasi-0.2.2"
 837 | source = "registry+https://github.com/rust-lang/crates.io-index"
 838 | checksum = "26816d2e1a4a36a2940b96c5296ce403917633dff8f3440e9b236ed6f6bacad2"
 839 | dependencies = [
 840 |  "wit-bindgen-rt",
 841 | ]
 842 | 
 843 | [[package]]
 844 | name = "wasm-bindgen"
 845 | version = "0.2.100"
 846 | source = "registry+https://github.com/rust-lang/crates.io-index"
 847 | checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5"
 848 | dependencies = [
 849 |  "cfg-if",
 850 |  "once_cell",
 851 |  "rustversion",
 852 |  "wasm-bindgen-macro",
 853 | ]
 854 | 
 855 | [[package]]
 856 | name = "wasm-bindgen-backend"
 857 | version = "0.2.100"
 858 | source = "registry+https://github.com/rust-lang/crates.io-index"
 859 | checksum = "2f0a0651a5c2bc21487bde11ee802ccaf4c51935d0d3d42a6101f98161700bc6"
 860 | dependencies = [
 861 |  "bumpalo",
 862 |  "log",
 863 |  "proc-macro2",
 864 |  "quote",
 865 |  "syn 2.0.100",
 866 |  "wasm-bindgen-shared",
 867 | ]
 868 | 
 869 | [[package]]
 870 | name = "wasm-bindgen-macro"
 871 | version = "0.2.100"
 872 | source = "registry+https://github.com/rust-lang/crates.io-index"
 873 | checksum = "7fe63fc6d09ed3792bd0897b314f53de8e16568c2b3f7982f468c0bf9bd0b407"
 874 | dependencies = [
 875 |  "quote",
 876 |  "wasm-bindgen-macro-support",
 877 | ]
 878 | 
 879 | [[package]]
 880 | name = "wasm-bindgen-macro-support"
 881 | version = "0.2.100"
 882 | source = "registry+https://github.com/rust-lang/crates.io-index"
 883 | checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de"
 884 | dependencies = [
 885 |  "proc-macro2",
 886 |  "quote",
 887 |  "syn 2.0.100",
 888 |  "wasm-bindgen-backend",
 889 |  "wasm-bindgen-shared",
 890 | ]
 891 | 
 892 | [[package]]
 893 | name = "wasm-bindgen-shared"
 894 | version = "0.2.100"
 895 | source = "registry+https://github.com/rust-lang/crates.io-index"
 896 | checksum = "1a05d73b933a847d6cccdda8f838a22ff101ad9bf93e33684f39c1f5f0eece3d"
 897 | dependencies = [
 898 |  "unicode-ident",
 899 | ]
 900 | 
 901 | [[package]]
 902 | name = "web-sys"
 903 | version = "0.3.77"
 904 | source = "registry+https://github.com/rust-lang/crates.io-index"
 905 | checksum = "33b6dd2ef9186f1f2072e409e99cd22a975331a6b3591b12c764e0e55c60d5d2"
 906 | dependencies = [
 907 |  "js-sys",
 908 |  "wasm-bindgen",
 909 | ]
 910 | 
 911 | [[package]]
 912 | name = "winapi-util"
 913 | version = "0.1.9"
 914 | source = "registry+https://github.com/rust-lang/crates.io-index"
 915 | checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb"
 916 | dependencies = [
 917 |  "windows-sys",
 918 | ]
 919 | 
 920 | [[package]]
 921 | name = "windows-sys"
 922 | version = "0.59.0"
 923 | source = "registry+https://github.com/rust-lang/crates.io-index"
 924 | checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b"
 925 | dependencies = [
 926 |  "windows-targets",
 927 | ]
 928 | 
 929 | [[package]]
 930 | name = "windows-targets"
 931 | version = "0.52.6"
 932 | source = "registry+https://github.com/rust-lang/crates.io-index"
 933 | checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
 934 | dependencies = [
 935 |  "windows_aarch64_gnullvm",
 936 |  "windows_aarch64_msvc",
 937 |  "windows_i686_gnu",
 938 |  "windows_i686_gnullvm",
 939 |  "windows_i686_msvc",
 940 |  "windows_x86_64_gnu",
 941 |  "windows_x86_64_gnullvm",
 942 |  "windows_x86_64_msvc",
 943 | ]
 944 | 
 945 | [[package]]
 946 | name = "windows_aarch64_gnullvm"
 947 | version = "0.52.6"
 948 | source = "registry+https://github.com/rust-lang/crates.io-index"
 949 | checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
 950 | 
 951 | [[package]]
 952 | name = "windows_aarch64_msvc"
 953 | version = "0.52.6"
 954 | source = "registry+https://github.com/rust-lang/crates.io-index"
 955 | checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
 956 | 
 957 | [[package]]
 958 | name = "windows_i686_gnu"
 959 | version = "0.52.6"
 960 | source = "registry+https://github.com/rust-lang/crates.io-index"
 961 | checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
 962 | 
 963 | [[package]]
 964 | name = "windows_i686_gnullvm"
 965 | version = "0.52.6"
 966 | source = "registry+https://github.com/rust-lang/crates.io-index"
 967 | checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
 968 | 
 969 | [[package]]
 970 | name = "windows_i686_msvc"
 971 | version = "0.52.6"
 972 | source = "registry+https://github.com/rust-lang/crates.io-index"
 973 | checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
 974 | 
 975 | [[package]]
 976 | name = "windows_x86_64_gnu"
 977 | version = "0.52.6"
 978 | source = "registry+https://github.com/rust-lang/crates.io-index"
 979 | checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
 980 | 
 981 | [[package]]
 982 | name = "windows_x86_64_gnullvm"
 983 | version = "0.52.6"
 984 | source = "registry+https://github.com/rust-lang/crates.io-index"
 985 | checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
 986 | 
 987 | [[package]]
 988 | name = "windows_x86_64_msvc"
 989 | version = "0.52.6"
 990 | source = "registry+https://github.com/rust-lang/crates.io-index"
 991 | checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
 992 | 
 993 | [[package]]
 994 | name = "wit-bindgen-rt"
 995 | version = "0.33.0"
 996 | source = "registry+https://github.com/rust-lang/crates.io-index"
 997 | checksum = "3268f3d866458b787f390cf61f4bbb563b922d091359f9608842999eaee3943c"
 998 | dependencies = [
 999 |  "bitflags",
1000 | ]
1001 | 
1002 | [[package]]
1003 | name = "zerocopy"
1004 | version = "0.8.26"
1005 | source = "registry+https://github.com/rust-lang/crates.io-index"
1006 | checksum = "1039dd0d3c310cf05de012d8a39ff557cb0d23087fd44cad61df08fc31907a2f"
1007 | dependencies = [
1008 |  "zerocopy-derive",
1009 | ]
1010 | 
1011 | [[package]]
1012 | name = "zerocopy-derive"
1013 | version = "0.8.26"
1014 | source = "registry+https://github.com/rust-lang/crates.io-index"
1015 | checksum = "9ecf5b4cc5364572d7f4c329661bcc82724222973f2cab6f050a4e5c22f75181"
1016 | dependencies = [
1017 |  "proc-macro2",
1018 |  "quote",
1019 |  "syn 2.0.100",
1020 | ]
1021 | 


--------------------------------------------------------------------------------
/src/binary16/arch.rs:
--------------------------------------------------------------------------------
  1 | #![allow(dead_code, unused_imports)]
  2 | use crate::leading_zeros::leading_zeros_u16;
  3 | use core::mem;
  4 | 
  5 | #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
  6 | mod x86;
  7 | 
  8 | #[cfg(target_arch = "aarch64")]
  9 | mod aarch64;
 10 | 
 11 | #[cfg(all(feature = "nightly", target_arch = "loongarch64"))]
 12 | mod loongarch64;
 13 | 
 14 | macro_rules! convert_fn {
 15 |     (if x86_feature("f16c") { $f16c:expr }
 16 |     else if aarch64_feature("fp16") { $aarch64:expr }
 17 |     else if loongarch64_feature("lsx") { $loongarch64:expr }
 18 |     else { $fallback:expr }) => {
 19 |         cfg_if::cfg_if! {
 20 |             // Use intrinsics directly when a compile target or using no_std
 21 |             if #[cfg(all(
 22 |                 any(target_arch = "x86", target_arch = "x86_64"),
 23 |                 target_feature = "f16c"
 24 |             ))] {
 25 |                 $f16c
 26 |             }
 27 |             else if #[cfg(all(
 28 |                 target_arch = "aarch64",
 29 |                 target_feature = "fp16"
 30 |             ))] {
 31 |                 $aarch64
 32 |             }
 33 |             else if #[cfg(all(
 34 |                 feature = "nightly",
 35 |                 target_arch = "loongarch64",
 36 |                 target_feature = "lsx"
 37 |             ))] {
 38 |                 $loongarch64
 39 |             }
 40 | 
 41 |             // Use CPU feature detection if using std
 42 |             else if #[cfg(all(
 43 |                 feature = "std",
 44 |                 any(target_arch = "x86", target_arch = "x86_64")
 45 |             ))] {
 46 |                 use std::arch::is_x86_feature_detected;
 47 |                 if is_x86_feature_detected!("f16c") {
 48 |                     $f16c
 49 |                 } else {
 50 |                     $fallback
 51 |                 }
 52 |             }
 53 |             else if #[cfg(all(
 54 |                 feature = "std",
 55 |                 target_arch = "aarch64",
 56 |             ))] {
 57 |                 use std::arch::is_aarch64_feature_detected;
 58 |                 if is_aarch64_feature_detected!("fp16") {
 59 |                     $aarch64
 60 |                 } else {
 61 |                     $fallback
 62 |                 }
 63 |             }
 64 |             else if #[cfg(all(
 65 |                 feature = "std",
 66 |                 feature = "nightly",
 67 |                 target_arch = "loongarch64",
 68 |             ))] {
 69 |                 use std::arch::is_loongarch_feature_detected;
 70 |                 if is_loongarch_feature_detected!("lsx") {
 71 |                     $loongarch64
 72 |                 } else {
 73 |                     $fallback
 74 |                 }
 75 |             }
 76 | 
 77 |             // Fallback to software
 78 |             else {
 79 |                 $fallback
 80 |             }
 81 |         }
 82 |     };
 83 | }
 84 | 
 85 | #[inline]
 86 | pub(crate) fn f32_to_f16(f: f32) -> u16 {
 87 |     convert_fn! {
 88 |         if x86_feature("f16c") {
 89 |             unsafe { x86::f32_to_f16_x86_f16c(f) }
 90 |         } else if aarch64_feature("fp16") {
 91 |             unsafe { aarch64::f32_to_f16_fp16(f) }
 92 |         } else if loongarch64_feature("lsx") {
 93 |             unsafe { loongarch64::f32_to_f16_lsx(f) }
 94 |         } else {
 95 |             f32_to_f16_fallback(f)
 96 |         }
 97 |     }
 98 | }
 99 | 
100 | #[inline]
101 | pub(crate) fn f64_to_f16(f: f64) -> u16 {
102 |     convert_fn! {
103 |         if x86_feature("f16c") {
104 |             unsafe { x86::f32_to_f16_x86_f16c(f as f32) }
105 |         } else if aarch64_feature("fp16") {
106 |             unsafe { aarch64::f64_to_f16_fp16(f) }
107 |         } else if loongarch64_feature("lsx") {
108 |             f64_to_f16_fallback(f)
109 |         } else {
110 |             f64_to_f16_fallback(f)
111 |         }
112 |     }
113 | }
114 | 
115 | #[inline]
116 | pub(crate) fn f16_to_f32(i: u16) -> f32 {
117 |     convert_fn! {
118 |         if x86_feature("f16c") {
119 |             unsafe { x86::f16_to_f32_x86_f16c(i) }
120 |         } else if aarch64_feature("fp16") {
121 |             unsafe { aarch64::f16_to_f32_fp16(i) }
122 |         } else if loongarch64_feature("lsx") {
123 |             unsafe { loongarch64::f16_to_f32_lsx(i) }
124 |         } else {
125 |             f16_to_f32_fallback(i)
126 |         }
127 |     }
128 | }
129 | 
130 | #[inline]
131 | pub(crate) fn f16_to_f64(i: u16) -> f64 {
132 |     convert_fn! {
133 |         if x86_feature("f16c") {
134 |             unsafe { x86::f16_to_f32_x86_f16c(i) as f64 }
135 |         } else if aarch64_feature("fp16") {
136 |             unsafe { aarch64::f16_to_f64_fp16(i) }
137 |         } else if loongarch64_feature("lsx") {
138 |             unsafe { loongarch64::f16_to_f32_lsx(i) as f64 }
139 |         } else {
140 |             f16_to_f64_fallback(i)
141 |         }
142 |     }
143 | }
144 | 
145 | #[inline]
146 | pub(crate) fn f32x4_to_f16x4(f: &[f32; 4]) -> [u16; 4] {
147 |     convert_fn! {
148 |         if x86_feature("f16c") {
149 |             unsafe { x86::f32x4_to_f16x4_x86_f16c(f) }
150 |         } else if aarch64_feature("fp16") {
151 |             unsafe { aarch64::f32x4_to_f16x4_fp16(f) }
152 |         } else if loongarch64_feature("lsx") {
153 |             unsafe { loongarch64::f32x4_to_f16x4_lsx(f) }
154 |         } else {
155 |             f32x4_to_f16x4_fallback(f)
156 |         }
157 |     }
158 | }
159 | 
160 | #[inline]
161 | pub(crate) fn f16x4_to_f32x4(i: &[u16; 4]) -> [f32; 4] {
162 |     convert_fn! {
163 |         if x86_feature("f16c") {
164 |             unsafe { x86::f16x4_to_f32x4_x86_f16c(i) }
165 |         } else if aarch64_feature("fp16") {
166 |             unsafe { aarch64::f16x4_to_f32x4_fp16(i) }
167 |         } else if loongarch64_feature("lsx") {
168 |             unsafe { loongarch64::f16x4_to_f32x4_lsx(i) }
169 |         } else {
170 |             f16x4_to_f32x4_fallback(i)
171 |         }
172 |     }
173 | }
174 | 
175 | #[inline]
176 | pub(crate) fn f64x4_to_f16x4(f: &[f64; 4]) -> [u16; 4] {
177 |     convert_fn! {
178 |         if x86_feature("f16c") {
179 |             unsafe { x86::f64x4_to_f16x4_x86_f16c(f) }
180 |         } else if aarch64_feature("fp16") {
181 |             unsafe { aarch64::f64x4_to_f16x4_fp16(f) }
182 |         } else if loongarch64_feature("lsx") {
183 |             unsafe { loongarch64::f64x4_to_f16x4_lsx(f) }
184 |         } else {
185 |             f64x4_to_f16x4_fallback(f)
186 |         }
187 |     }
188 | }
189 | 
190 | #[inline]
191 | pub(crate) fn f16x4_to_f64x4(i: &[u16; 4]) -> [f64; 4] {
192 |     convert_fn! {
193 |         if x86_feature("f16c") {
194 |             unsafe { x86::f16x4_to_f64x4_x86_f16c(i) }
195 |         } else if aarch64_feature("fp16") {
196 |             unsafe { aarch64::f16x4_to_f64x4_fp16(i) }
197 |         } else if loongarch64_feature("lsx") {
198 |             unsafe { loongarch64::f16x4_to_f64x4_lsx(i) }
199 |         } else {
200 |             f16x4_to_f64x4_fallback(i)
201 |         }
202 |     }
203 | }
204 | 
205 | #[inline]
206 | pub(crate) fn f32x8_to_f16x8(f: &[f32; 8]) -> [u16; 8] {
207 |     convert_fn! {
208 |         if x86_feature("f16c") {
209 |             unsafe { x86::f32x8_to_f16x8_x86_f16c(f) }
210 |         } else if aarch64_feature("fp16") {
211 |             {
212 |                 let mut result = [0u16; 8];
213 |                 convert_chunked_slice_4(f.as_slice(), result.as_mut_slice(),
214 |                     aarch64::f32x4_to_f16x4_fp16);
215 |                 result
216 |             }
217 |         } else if loongarch64_feature("lsx") {
218 |             {
219 |                 let mut result = [0u16; 8];
220 |                 convert_chunked_slice_4(f.as_slice(), result.as_mut_slice(),
221 |                     loongarch64::f32x4_to_f16x4_lsx);
222 |                 result
223 |             }
224 |         } else {
225 |             f32x8_to_f16x8_fallback(f)
226 |         }
227 |     }
228 | }
229 | 
230 | #[inline]
231 | pub(crate) fn f16x8_to_f32x8(i: &[u16; 8]) -> [f32; 8] {
232 |     convert_fn! {
233 |         if x86_feature("f16c") {
234 |             unsafe { x86::f16x8_to_f32x8_x86_f16c(i) }
235 |         } else if aarch64_feature("fp16") {
236 |             {
237 |                 let mut result = [0f32; 8];
238 |                 convert_chunked_slice_4(i.as_slice(), result.as_mut_slice(),
239 |                     aarch64::f16x4_to_f32x4_fp16);
240 |                 result
241 |             }
242 |         } else if loongarch64_feature("lsx") {
243 |             {
244 |                 let mut result = [0f32; 8];
245 |                 convert_chunked_slice_4(i.as_slice(), result.as_mut_slice(),
246 |                     loongarch64::f16x4_to_f32x4_lsx);
247 |                 result
248 |             }
249 |         } else {
250 |             f16x8_to_f32x8_fallback(i)
251 |         }
252 |     }
253 | }
254 | 
255 | #[inline]
256 | pub(crate) fn f64x8_to_f16x8(f: &[f64; 8]) -> [u16; 8] {
257 |     convert_fn! {
258 |         if x86_feature("f16c") {
259 |             unsafe { x86::f64x8_to_f16x8_x86_f16c(f) }
260 |         } else if aarch64_feature("fp16") {
261 |             {
262 |                 let mut result = [0u16; 8];
263 |                 convert_chunked_slice_4(f.as_slice(), result.as_mut_slice(),
264 |                     aarch64::f64x4_to_f16x4_fp16);
265 |                 result
266 |             }
267 |         } else if loongarch64_feature("lsx") {
268 |             {
269 |                 let mut result = [0u16; 8];
270 |                 convert_chunked_slice_4(f.as_slice(), result.as_mut_slice(),
271 |                     loongarch64::f64x4_to_f16x4_lsx);
272 |                 result
273 |             }
274 |         } else {
275 |             f64x8_to_f16x8_fallback(f)
276 |         }
277 |     }
278 | }
279 | 
280 | #[inline]
281 | pub(crate) fn f16x8_to_f64x8(i: &[u16; 8]) -> [f64; 8] {
282 |     convert_fn! {
283 |         if x86_feature("f16c") {
284 |             unsafe { x86::f16x8_to_f64x8_x86_f16c(i) }
285 |         } else if aarch64_feature("fp16") {
286 |             {
287 |                 let mut result = [0f64; 8];
288 |                 convert_chunked_slice_4(i.as_slice(), result.as_mut_slice(),
289 |                     aarch64::f16x4_to_f64x4_fp16);
290 |                 result
291 |             }
292 |         } else if loongarch64_feature("lsx") {
293 |             {
294 |                 let mut result = [0f64; 8];
295 |                 convert_chunked_slice_4(i.as_slice(), result.as_mut_slice(),
296 |                     loongarch64::f16x4_to_f64x4_lsx);
297 |                 result
298 |             }
299 |         } else {
300 |             f16x8_to_f64x8_fallback(i)
301 |         }
302 |     }
303 | }
304 | 
305 | #[inline]
306 | pub(crate) fn f32_to_f16_slice(src: &[f32], dst: &mut [u16]) {
307 |     convert_fn! {
308 |         if x86_feature("f16c") {
309 |             convert_chunked_slice_8(src, dst, x86::f32x8_to_f16x8_x86_f16c,
310 |                 x86::f32x4_to_f16x4_x86_f16c)
311 |         } else if aarch64_feature("fp16") {
312 |             convert_chunked_slice_4(src, dst, aarch64::f32x4_to_f16x4_fp16)
313 |         } else if loongarch64_feature("lsx") {
314 |             convert_chunked_slice_4(src, dst, loongarch64::f32x4_to_f16x4_lsx)
315 |         } else {
316 |             slice_fallback(src, dst, f32_to_f16_fallback)
317 |         }
318 |     }
319 | }
320 | 
321 | #[inline]
322 | pub(crate) fn f16_to_f32_slice(src: &[u16], dst: &mut [f32]) {
323 |     convert_fn! {
324 |         if x86_feature("f16c") {
325 |             convert_chunked_slice_8(src, dst, x86::f16x8_to_f32x8_x86_f16c,
326 |                 x86::f16x4_to_f32x4_x86_f16c)
327 |         } else if aarch64_feature("fp16") {
328 |             convert_chunked_slice_4(src, dst, aarch64::f16x4_to_f32x4_fp16)
329 |         } else if loongarch64_feature("lsx") {
330 |             convert_chunked_slice_4(src, dst, loongarch64::f16x4_to_f32x4_lsx)
331 |         } else {
332 |             slice_fallback(src, dst, f16_to_f32_fallback)
333 |         }
334 |     }
335 | }
336 | 
337 | #[inline]
338 | pub(crate) fn f64_to_f16_slice(src: &[f64], dst: &mut [u16]) {
339 |     convert_fn! {
340 |         if x86_feature("f16c") {
341 |             convert_chunked_slice_8(src, dst, x86::f64x8_to_f16x8_x86_f16c,
342 |                 x86::f64x4_to_f16x4_x86_f16c)
343 |         } else if aarch64_feature("fp16") {
344 |             convert_chunked_slice_4(src, dst, aarch64::f64x4_to_f16x4_fp16)
345 |         } else if loongarch64_feature("lsx") {
346 |             convert_chunked_slice_4(src, dst, loongarch64::f64x4_to_f16x4_lsx)
347 |         } else {
348 |             slice_fallback(src, dst, f64_to_f16_fallback)
349 |         }
350 |     }
351 | }
352 | 
353 | #[inline]
354 | pub(crate) fn f16_to_f64_slice(src: &[u16], dst: &mut [f64]) {
355 |     convert_fn! {
356 |         if x86_feature("f16c") {
357 |             convert_chunked_slice_8(src, dst, x86::f16x8_to_f64x8_x86_f16c,
358 |                 x86::f16x4_to_f64x4_x86_f16c)
359 |         } else if aarch64_feature("fp16") {
360 |             convert_chunked_slice_4(src, dst, aarch64::f16x4_to_f64x4_fp16)
361 |         } else if loongarch64_feature("lsx") {
362 |             convert_chunked_slice_4(src, dst, loongarch64::f16x4_to_f64x4_lsx)
363 |         } else {
364 |             slice_fallback(src, dst, f16_to_f64_fallback)
365 |         }
366 |     }
367 | }
368 | 
369 | macro_rules! math_fn {
370 |     (if aarch64_feature("fp16") { $aarch64:expr }
371 |     else { $fallback:expr }) => {
372 |         cfg_if::cfg_if! {
373 |             // Use intrinsics directly when a compile target or using no_std
374 |             if #[cfg(all(
375 |                 target_arch = "aarch64",
376 |                 target_feature = "fp16"
377 |             ))] {
378 |                 $aarch64
379 |             }
380 | 
381 |             // Use CPU feature detection if using std
382 |             else if #[cfg(all(
383 |                 feature = "std",
384 |                 target_arch = "aarch64",
385 |                 not(target_feature = "fp16")
386 |             ))] {
387 |                 use std::arch::is_aarch64_feature_detected;
388 |                 if is_aarch64_feature_detected!("fp16") {
389 |                     $aarch64
390 |                 } else {
391 |                     $fallback
392 |                 }
393 |             }
394 | 
395 |             // Fallback to software
396 |             else {
397 |                 $fallback
398 |             }
399 |         }
400 |     };
401 | }
402 | 
403 | #[inline]
404 | pub(crate) fn add_f16(a: u16, b: u16) -> u16 {
405 |     math_fn! {
406 |         if aarch64_feature("fp16") {
407 |             unsafe { aarch64::add_f16_fp16(a, b) }
408 |         } else {
409 |             add_f16_fallback(a, b)
410 |         }
411 |     }
412 | }
413 | 
414 | #[inline]
415 | pub(crate) fn subtract_f16(a: u16, b: u16) -> u16 {
416 |     math_fn! {
417 |         if aarch64_feature("fp16") {
418 |             unsafe { aarch64::subtract_f16_fp16(a, b) }
419 |         } else {
420 |             subtract_f16_fallback(a, b)
421 |         }
422 |     }
423 | }
424 | 
425 | #[inline]
426 | pub(crate) fn multiply_f16(a: u16, b: u16) -> u16 {
427 |     math_fn! {
428 |         if aarch64_feature("fp16") {
429 |             unsafe { aarch64::multiply_f16_fp16(a, b) }
430 |         } else {
431 |             multiply_f16_fallback(a, b)
432 |         }
433 |     }
434 | }
435 | 
436 | #[inline]
437 | pub(crate) fn divide_f16(a: u16, b: u16) -> u16 {
438 |     math_fn! {
439 |         if aarch64_feature("fp16") {
440 |             unsafe { aarch64::divide_f16_fp16(a, b) }
441 |         } else {
442 |             divide_f16_fallback(a, b)
443 |         }
444 |     }
445 | }
446 | 
447 | #[inline]
448 | pub(crate) fn remainder_f16(a: u16, b: u16) -> u16 {
449 |     remainder_f16_fallback(a, b)
450 | }
451 | 
452 | #[inline]
453 | pub(crate) fn product_f16<I: Iterator<Item = u16>>(iter: I) -> u16 {
454 |     math_fn! {
455 |         if aarch64_feature("fp16") {
456 |             iter.fold(0, |acc, x| unsafe { aarch64::multiply_f16_fp16(acc, x) })
457 |         } else {
458 |             product_f16_fallback(iter)
459 |         }
460 |     }
461 | }
462 | 
463 | #[inline]
464 | pub(crate) fn sum_f16<I: Iterator<Item = u16>>(iter: I) -> u16 {
465 |     math_fn! {
466 |         if aarch64_feature("fp16") {
467 |             iter.fold(0, |acc, x| unsafe { aarch64::add_f16_fp16(acc, x) })
468 |         } else {
469 |             sum_f16_fallback(iter)
470 |         }
471 |     }
472 | }
473 | 
474 | /// Chunks sliced into x8 or x4 arrays
475 | #[inline]
476 | fn convert_chunked_slice_8<S: Copy + Default, D: Copy>(
477 |     src: &[S],
478 |     dst: &mut [D],
479 |     fn8: unsafe fn(&[S; 8]) -> [D; 8],
480 |     fn4: unsafe fn(&[S; 4]) -> [D; 4],
481 | ) {
482 |     assert_eq!(src.len(), dst.len());
483 | 
484 |     // TODO: Can be further optimized with array_chunks when it becomes stabilized
485 | 
486 |     let src_chunks = src.chunks_exact(8);
487 |     let mut dst_chunks = dst.chunks_exact_mut(8);
488 |     let src_remainder = src_chunks.remainder();
489 |     for (s, d) in src_chunks.zip(&mut dst_chunks) {
490 |         let chunk: &[S; 8] = s.try_into().unwrap();
491 |         d.copy_from_slice(unsafe { &fn8(chunk) });
492 |     }
493 | 
494 |     // Process remainder
495 |     if src_remainder.len() > 4 {
496 |         let mut buf: [S; 8] = Default::default();
497 |         buf[..src_remainder.len()].copy_from_slice(src_remainder);
498 |         let vec = unsafe { fn8(&buf) };
499 |         let dst_remainder = dst_chunks.into_remainder();
500 |         dst_remainder.copy_from_slice(&vec[..dst_remainder.len()]);
501 |     } else if !src_remainder.is_empty() {
502 |         let mut buf: [S; 4] = Default::default();
503 |         buf[..src_remainder.len()].copy_from_slice(src_remainder);
504 |         let vec = unsafe { fn4(&buf) };
505 |         let dst_remainder = dst_chunks.into_remainder();
506 |         dst_remainder.copy_from_slice(&vec[..dst_remainder.len()]);
507 |     }
508 | }
509 | 
510 | /// Chunks sliced into x4 arrays
511 | #[inline]
512 | fn convert_chunked_slice_4<S: Copy + Default, D: Copy>(
513 |     src: &[S],
514 |     dst: &mut [D],
515 |     f: unsafe fn(&[S; 4]) -> [D; 4],
516 | ) {
517 |     assert_eq!(src.len(), dst.len());
518 | 
519 |     // TODO: Can be further optimized with array_chunks when it becomes stabilized
520 | 
521 |     let src_chunks = src.chunks_exact(4);
522 |     let mut dst_chunks = dst.chunks_exact_mut(4);
523 |     let src_remainder = src_chunks.remainder();
524 |     for (s, d) in src_chunks.zip(&mut dst_chunks) {
525 |         let chunk: &[S; 4] = s.try_into().unwrap();
526 |         d.copy_from_slice(unsafe { &f(chunk) });
527 |     }
528 | 
529 |     // Process remainder
530 |     if !src_remainder.is_empty() {
531 |         let mut buf: [S; 4] = Default::default();
532 |         buf[..src_remainder.len()].copy_from_slice(src_remainder);
533 |         let vec = unsafe { f(&buf) };
534 |         let dst_remainder = dst_chunks.into_remainder();
535 |         dst_remainder.copy_from_slice(&vec[..dst_remainder.len()]);
536 |     }
537 | }
538 | 
539 | /////////////// Fallbacks ////////////////
540 | 
541 | // In the below functions, round to nearest, with ties to even.
542 | // Let us call the most significant bit that will be shifted out the round_bit.
543 | //
544 | // Round up if either
545 | //  a) Removed part > tie.
546 | //     (mantissa & round_bit) != 0 && (mantissa & (round_bit - 1)) != 0
547 | //  b) Removed part == tie, and retained part is odd.
548 | //     (mantissa & round_bit) != 0 && (mantissa & (2 * round_bit)) != 0
549 | // (If removed part == tie and retained part is even, do not round up.)
550 | // These two conditions can be combined into one:
551 | //     (mantissa & round_bit) != 0 && (mantissa & ((round_bit - 1) | (2 * round_bit))) != 0
552 | // which can be simplified into
553 | //     (mantissa & round_bit) != 0 && (mantissa & (3 * round_bit - 1)) != 0
554 | 
555 | #[inline]
556 | pub(crate) const fn f32_to_f16_fallback(value: f32) -> u16 {
557 |     // TODO: Replace mem::transmute with to_bits() once to_bits is const-stabilized
558 |     // Convert to raw bytes
559 |     let x: u32 = unsafe { mem::transmute::<f32, u32>(value) };
560 | 
561 |     // Extract IEEE754 components
562 |     let sign = x & 0x8000_0000u32;
563 |     let exp = x & 0x7F80_0000u32;
564 |     let man = x & 0x007F_FFFFu32;
565 | 
566 |     // Check for all exponent bits being set, which is Infinity or NaN
567 |     if exp == 0x7F80_0000u32 {
568 |         // Set mantissa MSB for NaN (and also keep shifted mantissa bits)
569 |         let nan_bit = if man == 0 { 0 } else { 0x0200u32 };
570 |         return ((sign >> 16) | 0x7C00u32 | nan_bit | (man >> 13)) as u16;
571 |     }
572 | 
573 |     // The number is normalized, start assembling half precision version
574 |     let half_sign = sign >> 16;
575 |     // Unbias the exponent, then bias for half precision
576 |     let unbiased_exp = ((exp >> 23) as i32) - 127;
577 |     let half_exp = unbiased_exp + 15;
578 | 
579 |     // Check for exponent overflow, return +infinity
580 |     if half_exp >= 0x1F {
581 |         return (half_sign | 0x7C00u32) as u16;
582 |     }
583 | 
584 |     // Check for underflow
585 |     if half_exp <= 0 {
586 |         // Check mantissa for what we can do
587 |         if 14 - half_exp > 24 {
588 |             // No rounding possibility, so this is a full underflow, return signed zero
589 |             return half_sign as u16;
590 |         }
591 |         // Don't forget about hidden leading mantissa bit when assembling mantissa
592 |         let man = man | 0x0080_0000u32;
593 |         let mut half_man = man >> (14 - half_exp);
594 |         // Check for rounding (see comment above functions)
595 |         let round_bit = 1 << (13 - half_exp);
596 |         if (man & round_bit) != 0 && (man & (3 * round_bit - 1)) != 0 {
597 |             half_man += 1;
598 |         }
599 |         // No exponent for subnormals
600 |         return (half_sign | half_man) as u16;
601 |     }
602 | 
603 |     // Rebias the exponent
604 |     let half_exp = (half_exp as u32) << 10;
605 |     let half_man = man >> 13;
606 |     // Check for rounding (see comment above functions)
607 |     let round_bit = 0x0000_1000u32;
608 |     if (man & round_bit) != 0 && (man & (3 * round_bit - 1)) != 0 {
609 |         // Round it
610 |         ((half_sign | half_exp | half_man) + 1) as u16
611 |     } else {
612 |         (half_sign | half_exp | half_man) as u16
613 |     }
614 | }
615 | 
616 | #[inline]
617 | pub(crate) const fn f64_to_f16_fallback(value: f64) -> u16 {
618 |     // Convert to raw bytes, truncating the last 32-bits of mantissa; that precision will always
619 |     // be lost on half-precision.
620 |     // TODO: Replace mem::transmute with to_bits() once to_bits is const-stabilized
621 |     let val: u64 = unsafe { mem::transmute::<f64, u64>(value) };
622 |     let x = (val >> 32) as u32;
623 | 
624 |     // Extract IEEE754 components
625 |     let sign = x & 0x8000_0000u32;
626 |     let exp = x & 0x7FF0_0000u32;
627 |     let man = x & 0x000F_FFFFu32;
628 | 
629 |     // Check for all exponent bits being set, which is Infinity or NaN
630 |     if exp == 0x7FF0_0000u32 {
631 |         // Set mantissa MSB for NaN (and also keep shifted mantissa bits).
632 |         // We also have to check the last 32 bits.
633 |         let nan_bit = if man == 0 && (val as u32 == 0) {
634 |             0
635 |         } else {
636 |             0x0200u32
637 |         };
638 |         return ((sign >> 16) | 0x7C00u32 | nan_bit | (man >> 10)) as u16;
639 |     }
640 | 
641 |     // The number is normalized, start assembling half precision version
642 |     let half_sign = sign >> 16;
643 |     // Unbias the exponent, then bias for half precision
644 |     let unbiased_exp = ((exp >> 20) as i64) - 1023;
645 |     let half_exp = unbiased_exp + 15;
646 | 
647 |     // Check for exponent overflow, return +infinity
648 |     if half_exp >= 0x1F {
649 |         return (half_sign | 0x7C00u32) as u16;
650 |     }
651 | 
652 |     // Check for underflow
653 |     if half_exp <= 0 {
654 |         // Check mantissa for what we can do
655 |         if 10 - half_exp > 21 {
656 |             // No rounding possibility, so this is a full underflow, return signed zero
657 |             return half_sign as u16;
658 |         }
659 |         // Don't forget about hidden leading mantissa bit when assembling mantissa
660 |         let man = man | 0x0010_0000u32;
661 |         let mut half_man = man >> (11 - half_exp);
662 |         // Check for rounding (see comment above functions)
663 |         let round_bit = 1 << (10 - half_exp);
664 |         if (man & round_bit) != 0 && (man & (3 * round_bit - 1)) != 0 {
665 |             half_man += 1;
666 |         }
667 |         // No exponent for subnormals
668 |         return (half_sign | half_man) as u16;
669 |     }
670 | 
671 |     // Rebias the exponent
672 |     let half_exp = (half_exp as u32) << 10;
673 |     let half_man = man >> 10;
674 |     // Check for rounding (see comment above functions)
675 |     let round_bit = 0x0000_0200u32;
676 |     if (man & round_bit) != 0 && (man & (3 * round_bit - 1)) != 0 {
677 |         // Round it
678 |         ((half_sign | half_exp | half_man) + 1) as u16
679 |     } else {
680 |         (half_sign | half_exp | half_man) as u16
681 |     }
682 | }
683 | 
684 | #[inline]
685 | pub(crate) const fn f16_to_f32_fallback(i: u16) -> f32 {
686 |     // Check for signed zero
687 |     // TODO: Replace mem::transmute with from_bits() once from_bits is const-stabilized
688 |     if i & 0x7FFFu16 == 0 {
689 |         return unsafe { mem::transmute::<u32, f32>((i as u32) << 16) };
690 |     }
691 | 
692 |     let half_sign = (i & 0x8000u16) as u32;
693 |     let half_exp = (i & 0x7C00u16) as u32;
694 |     let half_man = (i & 0x03FFu16) as u32;
695 | 
696 |     // Check for an infinity or NaN when all exponent bits set
697 |     if half_exp == 0x7C00u32 {
698 |         // Check for signed infinity if mantissa is zero
699 |         if half_man == 0 {
700 |             return unsafe { mem::transmute::<u32, f32>((half_sign << 16) | 0x7F80_0000u32) };
701 |         } else {
702 |             // NaN, keep current mantissa but also set most significiant mantissa bit
703 |             return unsafe {
704 |                 mem::transmute::<u32, f32>((half_sign << 16) | 0x7FC0_0000u32 | (half_man << 13))
705 |             };
706 |         }
707 |     }
708 | 
709 |     // Calculate single-precision components with adjusted exponent
710 |     let sign = half_sign << 16;
711 |     // Unbias exponent
712 |     let unbiased_exp = ((half_exp as i32) >> 10) - 15;
713 | 
714 |     // Check for subnormals, which will be normalized by adjusting exponent
715 |     if half_exp == 0 {
716 |         // Calculate how much to adjust the exponent by
717 |         let e = leading_zeros_u16(half_man as u16) - 6;
718 | 
719 |         // Rebias and adjust exponent
720 |         let exp = (127 - 15 - e) << 23;
721 |         let man = (half_man << (14 + e)) & 0x7F_FF_FFu32;
722 |         return unsafe { mem::transmute::<u32, f32>(sign | exp | man) };
723 |     }
724 | 
725 |     // Rebias exponent for a normalized normal
726 |     let exp = ((unbiased_exp + 127) as u32) << 23;
727 |     let man = (half_man & 0x03FFu32) << 13;
728 |     unsafe { mem::transmute::<u32, f32>(sign | exp | man) }
729 | }
730 | 
731 | #[inline]
732 | pub(crate) const fn f16_to_f64_fallback(i: u16) -> f64 {
733 |     // Check for signed zero
734 |     // TODO: Replace mem::transmute with from_bits() once from_bits is const-stabilized
735 |     if i & 0x7FFFu16 == 0 {
736 |         return unsafe { mem::transmute::<u64, f64>((i as u64) << 48) };
737 |     }
738 | 
739 |     let half_sign = (i & 0x8000u16) as u64;
740 |     let half_exp = (i & 0x7C00u16) as u64;
741 |     let half_man = (i & 0x03FFu16) as u64;
742 | 
743 |     // Check for an infinity or NaN when all exponent bits set
744 |     if half_exp == 0x7C00u64 {
745 |         // Check for signed infinity if mantissa is zero
746 |         if half_man == 0 {
747 |             return unsafe {
748 |                 mem::transmute::<u64, f64>((half_sign << 48) | 0x7FF0_0000_0000_0000u64)
749 |             };
750 |         } else {
751 |             // NaN, keep current mantissa but also set most significiant mantissa bit
752 |             return unsafe {
753 |                 mem::transmute::<u64, f64>(
754 |                     (half_sign << 48) | 0x7FF8_0000_0000_0000u64 | (half_man << 42),
755 |                 )
756 |             };
757 |         }
758 |     }
759 | 
760 |     // Calculate double-precision components with adjusted exponent
761 |     let sign = half_sign << 48;
762 |     // Unbias exponent
763 |     let unbiased_exp = ((half_exp as i64) >> 10) - 15;
764 | 
765 |     // Check for subnormals, which will be normalized by adjusting exponent
766 |     if half_exp == 0 {
767 |         // Calculate how much to adjust the exponent by
768 |         let e = leading_zeros_u16(half_man as u16) - 6;
769 | 
770 |         // Rebias and adjust exponent
771 |         let exp = ((1023 - 15 - e) as u64) << 52;
772 |         let man = (half_man << (43 + e)) & 0xF_FFFF_FFFF_FFFFu64;
773 |         return unsafe { mem::transmute::<u64, f64>(sign | exp | man) };
774 |     }
775 | 
776 |     // Rebias exponent for a normalized normal
777 |     let exp = ((unbiased_exp + 1023) as u64) << 52;
778 |     let man = (half_man & 0x03FFu64) << 42;
779 |     unsafe { mem::transmute::<u64, f64>(sign | exp | man) }
780 | }
781 | 
782 | #[inline]
783 | fn f16x4_to_f32x4_fallback(v: &[u16; 4]) -> [f32; 4] {
784 |     [
785 |         f16_to_f32_fallback(v[0]),
786 |         f16_to_f32_fallback(v[1]),
787 |         f16_to_f32_fallback(v[2]),
788 |         f16_to_f32_fallback(v[3]),
789 |     ]
790 | }
791 | 
792 | #[inline]
793 | fn f32x4_to_f16x4_fallback(v: &[f32; 4]) -> [u16; 4] {
794 |     [
795 |         f32_to_f16_fallback(v[0]),
796 |         f32_to_f16_fallback(v[1]),
797 |         f32_to_f16_fallback(v[2]),
798 |         f32_to_f16_fallback(v[3]),
799 |     ]
800 | }
801 | 
802 | #[inline]
803 | fn f16x4_to_f64x4_fallback(v: &[u16; 4]) -> [f64; 4] {
804 |     [
805 |         f16_to_f64_fallback(v[0]),
806 |         f16_to_f64_fallback(v[1]),
807 |         f16_to_f64_fallback(v[2]),
808 |         f16_to_f64_fallback(v[3]),
809 |     ]
810 | }
811 | 
812 | #[inline]
813 | fn f64x4_to_f16x4_fallback(v: &[f64; 4]) -> [u16; 4] {
814 |     [
815 |         f64_to_f16_fallback(v[0]),
816 |         f64_to_f16_fallback(v[1]),
817 |         f64_to_f16_fallback(v[2]),
818 |         f64_to_f16_fallback(v[3]),
819 |     ]
820 | }
821 | 
822 | #[inline]
823 | fn f16x8_to_f32x8_fallback(v: &[u16; 8]) -> [f32; 8] {
824 |     [
825 |         f16_to_f32_fallback(v[0]),
826 |         f16_to_f32_fallback(v[1]),
827 |         f16_to_f32_fallback(v[2]),
828 |         f16_to_f32_fallback(v[3]),
829 |         f16_to_f32_fallback(v[4]),
830 |         f16_to_f32_fallback(v[5]),
831 |         f16_to_f32_fallback(v[6]),
832 |         f16_to_f32_fallback(v[7]),
833 |     ]
834 | }
835 | 
836 | #[inline]
837 | fn f32x8_to_f16x8_fallback(v: &[f32; 8]) -> [u16; 8] {
838 |     [
839 |         f32_to_f16_fallback(v[0]),
840 |         f32_to_f16_fallback(v[1]),
841 |         f32_to_f16_fallback(v[2]),
842 |         f32_to_f16_fallback(v[3]),
843 |         f32_to_f16_fallback(v[4]),
844 |         f32_to_f16_fallback(v[5]),
845 |         f32_to_f16_fallback(v[6]),
846 |         f32_to_f16_fallback(v[7]),
847 |     ]
848 | }
849 | 
850 | #[inline]
851 | fn f16x8_to_f64x8_fallback(v: &[u16; 8]) -> [f64; 8] {
852 |     [
853 |         f16_to_f64_fallback(v[0]),
854 |         f16_to_f64_fallback(v[1]),
855 |         f16_to_f64_fallback(v[2]),
856 |         f16_to_f64_fallback(v[3]),
857 |         f16_to_f64_fallback(v[4]),
858 |         f16_to_f64_fallback(v[5]),
859 |         f16_to_f64_fallback(v[6]),
860 |         f16_to_f64_fallback(v[7]),
861 |     ]
862 | }
863 | 
864 | #[inline]
865 | fn f64x8_to_f16x8_fallback(v: &[f64; 8]) -> [u16; 8] {
866 |     [
867 |         f64_to_f16_fallback(v[0]),
868 |         f64_to_f16_fallback(v[1]),
869 |         f64_to_f16_fallback(v[2]),
870 |         f64_to_f16_fallback(v[3]),
871 |         f64_to_f16_fallback(v[4]),
872 |         f64_to_f16_fallback(v[5]),
873 |         f64_to_f16_fallback(v[6]),
874 |         f64_to_f16_fallback(v[7]),
875 |     ]
876 | }
877 | 
878 | #[inline]
879 | fn slice_fallback<S: Copy, D>(src: &[S], dst: &mut [D], f: fn(S) -> D) {
880 |     assert_eq!(src.len(), dst.len());
881 |     for (s, d) in src.iter().copied().zip(dst.iter_mut()) {
882 |         *d = f(s);
883 |     }
884 | }
885 | 
886 | #[inline]
887 | fn add_f16_fallback(a: u16, b: u16) -> u16 {
888 |     f32_to_f16(f16_to_f32(a) + f16_to_f32(b))
889 | }
890 | 
891 | #[inline]
892 | fn subtract_f16_fallback(a: u16, b: u16) -> u16 {
893 |     f32_to_f16(f16_to_f32(a) - f16_to_f32(b))
894 | }
895 | 
896 | #[inline]
897 | fn multiply_f16_fallback(a: u16, b: u16) -> u16 {
898 |     f32_to_f16(f16_to_f32(a) * f16_to_f32(b))
899 | }
900 | 
901 | #[inline]
902 | fn divide_f16_fallback(a: u16, b: u16) -> u16 {
903 |     f32_to_f16(f16_to_f32(a) / f16_to_f32(b))
904 | }
905 | 
906 | #[inline]
907 | fn remainder_f16_fallback(a: u16, b: u16) -> u16 {
908 |     f32_to_f16(f16_to_f32(a) % f16_to_f32(b))
909 | }
910 | 
911 | #[inline]
912 | fn product_f16_fallback<I: Iterator<Item = u16>>(iter: I) -> u16 {
913 |     f32_to_f16(iter.map(f16_to_f32).product())
914 | }
915 | 
916 | #[inline]
917 | fn sum_f16_fallback<I: Iterator<Item = u16>>(iter: I) -> u16 {
918 |     f32_to_f16(iter.map(f16_to_f32).sum())
919 | }
920 | 
921 | // TODO SIMD arithmetic
922 | 


--------------------------------------------------------------------------------