├── .github
    ├── ISSUE_TEMPLATE
    │   ├── blank_issue.md
    │   ├── bug_report.md
    │   ├── config.yml
    │   └── feature_request.md
    ├── PULL_REQUEST_TEMPLATE.md
    └── workflows
    │   ├── ci.yml
    │   └── doc.yml
├── .gitignore
├── CONTRIBUTING.md
├── Cargo.lock
├── Cargo.toml
├── Cross.toml
├── LICENSE-APACHE
├── LICENSE-MIT
├── README.md
├── beginners-guide.md
├── crates
    ├── core_simd
    │   ├── Cargo.toml
    │   ├── LICENSE-APACHE
    │   ├── LICENSE-MIT
    │   ├── examples
    │   │   ├── README.md
    │   │   ├── dot_product.rs
    │   │   ├── matrix_inversion.rs
    │   │   ├── nbody.rs
    │   │   └── spectral_norm.rs
    │   ├── src
    │   │   ├── alias.rs
    │   │   ├── cast.rs
    │   │   ├── core_simd_docs.md
    │   │   ├── fmt.rs
    │   │   ├── iter.rs
    │   │   ├── lane_count.rs
    │   │   ├── lib.rs
    │   │   ├── masks.rs
    │   │   ├── masks
    │   │   │   ├── bitmask.rs
    │   │   │   └── full_masks.rs
    │   │   ├── mod.rs
    │   │   ├── ops.rs
    │   │   ├── ops
    │   │   │   ├── assign.rs
    │   │   │   ├── deref.rs
    │   │   │   ├── shift_scalar.rs
    │   │   │   └── unary.rs
    │   │   ├── select.rs
    │   │   ├── simd
    │   │   │   ├── cmp.rs
    │   │   │   ├── cmp
    │   │   │   │   ├── eq.rs
    │   │   │   │   └── ord.rs
    │   │   │   ├── num.rs
    │   │   │   ├── num
    │   │   │   │   ├── float.rs
    │   │   │   │   ├── int.rs
    │   │   │   │   └── uint.rs
    │   │   │   ├── prelude.rs
    │   │   │   ├── ptr.rs
    │   │   │   └── ptr
    │   │   │   │   ├── const_ptr.rs
    │   │   │   │   └── mut_ptr.rs
    │   │   ├── swizzle.rs
    │   │   ├── swizzle_dyn.rs
    │   │   ├── to_bytes.rs
    │   │   ├── vector.rs
    │   │   ├── vendor.rs
    │   │   └── vendor
    │   │   │   ├── arm.rs
    │   │   │   ├── loongarch64.rs
    │   │   │   ├── powerpc.rs
    │   │   │   ├── wasm32.rs
    │   │   │   └── x86.rs
    │   ├── tests
    │   │   ├── autoderef.rs
    │   │   ├── cast.rs
    │   │   ├── f32_ops.rs
    │   │   ├── f64_ops.rs
    │   │   ├── i16_ops.rs
    │   │   ├── i32_ops.rs
    │   │   ├── i64_ops.rs
    │   │   ├── i8_ops.rs
    │   │   ├── isize_ops.rs
    │   │   ├── layout.rs
    │   │   ├── mask_ops.rs
    │   │   ├── mask_ops_impl
    │   │   │   ├── mask16.rs
    │   │   │   ├── mask32.rs
    │   │   │   ├── mask64.rs
    │   │   │   ├── mask8.rs
    │   │   │   ├── mask_macros.rs
    │   │   │   ├── masksize.rs
    │   │   │   └── mod.rs
    │   │   ├── masked_load_store.rs
    │   │   ├── masks.rs
    │   │   ├── ops_macros.rs
    │   │   ├── pointers.rs
    │   │   ├── round.rs
    │   │   ├── swizzle.rs
    │   │   ├── swizzle_dyn.rs
    │   │   ├── to_bytes.rs
    │   │   ├── try_from_slice.rs
    │   │   ├── u16_ops.rs
    │   │   ├── u32_ops.rs
    │   │   ├── u64_ops.rs
    │   │   ├── u8_ops.rs
    │   │   └── usize_ops.rs
    │   └── webdriver.json
    ├── std_float
    │   ├── Cargo.toml
    │   ├── src
    │   │   └── lib.rs
    │   └── tests
    │   │   └── float.rs
    └── test_helpers
    │   ├── Cargo.toml
    │   └── src
    │       ├── array.rs
    │       ├── biteq.rs
    │       ├── lib.rs
    │       ├── subnormals.rs
    │       └── wasm.rs
├── rust-toolchain.toml
└── subtree-sync.sh


/.github/ISSUE_TEMPLATE/blank_issue.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Blank Issue
3 | about: Create a blank issue.
4 | ---
5 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug Report
 3 | about: Create a bug report for Rust.
 4 | labels: C-bug
 5 | ---
 6 | <!--
 7 | Thank you for filing a bug report! 🐛 Please provide a short summary of the bug,
 8 | along with any information you feel relevant to replicating the bug.
 9 | -->
10 | 
11 | I tried this code:
12 | 
13 | ```rust
14 | <code>
15 | ```
16 | 
17 | I expected to see this happen: *explanation*
18 | 
19 | Instead, this happened: *explanation*
20 | 
21 | ### Meta
22 | 
23 | `rustc --version --verbose`:
24 | ```
25 | <version>
26 | ```
27 | 
28 | 
29 | `crate version in Cargo.toml`:
30 | ```toml
31 | [dependencies]
32 | stdsimd = 
33 | ```
34 | <!-- If this specifies the repo at HEAD, please include the latest commit. -->
35 | 
36 | 
37 | <!--
38 | If a backtrace is available, please include a backtrace in the code block by
39 | setting `RUST_BACKTRACE=1` in your environment. e.g.
40 | `RUST_BACKTRACE=1 cargo build`.
41 | -->
42 | <details><summary>Backtrace</summary>
43 | <p>
44 | 
45 | ```
46 | <backtrace>
47 | ```
48 | 
49 | </p>
50 | </details>
51 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/config.yml:
--------------------------------------------------------------------------------
 1 | # This only controls whether a tiny, hard-to-find "open a blank issue" link appears at the end of
 2 | # the template list.
 3 | blank_issues_enabled: true
 4 | contact_links:
 5 |   - name: Intrinsic Support
 6 |     url: https://github.com/rust-lang/stdarch/issues
 7 |     about: Please direct issues about Rust's support for vendor intrinsics to core::arch
 8 |   - name: Internal Compiler Error
 9 |     url: https://github.com/rust-lang/rust/issues
10 |     about: Please report ICEs to the rustc repository
11 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature Request
 3 | about: Request an addition to the core::simd API
 4 | labels: C-feature-request
 5 | ---
 6 | <!--
 7 |   Hello!
 8 | 
 9 |   We are very interested in any feature requests you may have.
10 | 
11 |   However, please be aware that core::simd exists to address concerns with creating a portable SIMD API for Rust.
12 |   Requests for extensions to compiler features, such as `target_feature`, binary versioning for SIMD APIs, or
13 |   improving specific compilation issues in general should be discussed at https://internals.rust-lang.org/
14 | -->
15 | 


--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | Hello, welcome to `std::simd`!
 2 | 
 3 | It seems this pull request template checklist was created while a lot of vector math ops were being implemented, and only really applies to ops. Feel free to delete everything here if it's not applicable, or ask for help if you're not sure what it means!
 4 | 
 5 | For a given vector math operation on TxN, please add tests for interactions with:
 6 |   - [ ] `T::MAX`
 7 |   - [ ] `T::MIN`
 8 |   - [ ] -1
 9 |   - [ ] 1
10 |   - [ ] 0
11 | 
12 | 
13 | For a given vector math operation on TxN where T is a float, please add tests for test interactions with:
14 |   - [ ] a really large number, larger than the mantissa
15 |   - [ ] a really small "subnormal" number
16 |   - [ ] NaN
17 |   - [ ] Infinity
18 |   - [ ] Negative Infinity
19 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
  1 | name: CI
  2 | 
  3 | on:
  4 |   pull_request:
  5 |   push:
  6 |     branches:
  7 |       - master
  8 | 
  9 | env:
 10 |   CARGO_NET_RETRY: 10
 11 |   RUSTUP_MAX_RETRIES: 10
 12 |   PROPTEST_CASES: 64
 13 | 
 14 | jobs:
 15 |   rustfmt:
 16 |     name: "rustfmt"
 17 |     runs-on: ubuntu-latest
 18 | 
 19 |     steps:
 20 |       - uses: actions/checkout@v4
 21 |       - name: Run rustfmt
 22 |         run: cargo fmt --all -- --check
 23 | 
 24 |   clippy:
 25 |     name: "clippy on ${{ matrix.target }}"
 26 |     runs-on: ubuntu-latest
 27 |     strategy:
 28 |       fail-fast: false
 29 |       matrix:
 30 |         target:
 31 |           # We shouldn't really have any OS-specific code, so think of this as a list of architectures
 32 |           - x86_64-unknown-linux-gnu
 33 |           - i686-unknown-linux-gnu
 34 |           - i586-unknown-linux-gnu
 35 |           - aarch64-unknown-linux-gnu
 36 |           - arm64ec-pc-windows-msvc
 37 |           - armv7-unknown-linux-gnueabihf
 38 |           - loongarch64-unknown-linux-gnu
 39 |           # non-nightly since https://github.com/rust-lang/rust/pull/113274
 40 |           # - mips-unknown-linux-gnu
 41 |           # - mips64-unknown-linux-gnuabi64
 42 |           - powerpc-unknown-linux-gnu
 43 |           - powerpc64-unknown-linux-gnu
 44 |           - riscv64gc-unknown-linux-gnu
 45 |           - s390x-unknown-linux-gnu
 46 |           - sparc64-unknown-linux-gnu
 47 |           - wasm32-unknown-unknown
 48 | 
 49 |     steps:
 50 |       - uses: actions/checkout@v4
 51 |       - name: Setup Rust
 52 |         run: rustup target add ${{ matrix.target }}
 53 |       - name: Run Clippy
 54 |         run: cargo clippy --all-targets --target ${{ matrix.target }}
 55 | 
 56 |   x86-tests:
 57 |     name: "${{ matrix.target_feature }} on ${{ matrix.target }}"
 58 |     runs-on: ${{ matrix.os }}
 59 |     strategy:
 60 |       fail-fast: false
 61 |       matrix:
 62 |         target: [x86_64-pc-windows-msvc, i686-pc-windows-msvc, i586-pc-windows-msvc, x86_64-unknown-linux-gnu]
 63 |         # `default` means we use the default target config for the target,
 64 |         # `native` means we run with `-Ctarget-cpu=native`, and anything else is
 65 |         # an arg to `-Ctarget-feature`
 66 |         target_feature: [default, native, +sse3, +ssse3, +sse4.1, +sse4.2, +avx, +avx2]
 67 | 
 68 |         exclude:
 69 |           # -Ctarget-cpu=native sounds like bad-news if target != host
 70 |           - { target: i686-pc-windows-msvc, target_feature: native }
 71 |           - { target: i586-pc-windows-msvc, target_feature: native }
 72 | 
 73 |         include:
 74 |           # Populate the `matrix.os` field
 75 |           - { target: x86_64-unknown-linux-gnu, os: ubuntu-latest }
 76 |           - { target: x86_64-pc-windows-msvc,   os: windows-latest }
 77 |           - { target: i686-pc-windows-msvc,     os: windows-latest }
 78 |           - { target: i586-pc-windows-msvc,     os: windows-latest }
 79 | 
 80 |           # These are globally available on all the other targets.
 81 |           - { target: i586-pc-windows-msvc, target_feature: +sse, os: windows-latest }
 82 |           - { target: i586-pc-windows-msvc, target_feature: +sse2, os: windows-latest }
 83 | 
 84 |           # Annoyingly, the x86_64-unknown-linux-gnu runner *almost* always has
 85 |           # avx512vl, but occasionally doesn't.  Maybe one day we can enable it.
 86 | 
 87 |     steps:
 88 |       - uses: actions/checkout@v4
 89 |       - name: Setup Rust
 90 |         run: rustup target add ${{ matrix.target }}
 91 | 
 92 |       - name: Configure RUSTFLAGS
 93 |         shell: bash
 94 |         run: |
 95 |           case "${{ matrix.target_feature }}" in
 96 |             default)
 97 |               echo "RUSTFLAGS=-Dwarnings" >> $GITHUB_ENV;;
 98 |             native)
 99 |               echo "RUSTFLAGS=-Dwarnings -Ctarget-cpu=native" >> $GITHUB_ENV
100 |               ;;
101 |             *)
102 |               echo "RUSTFLAGS=-Dwarnings -Ctarget-feature=${{ matrix.target_feature }}" >> $GITHUB_ENV
103 |               ;;
104 |           esac
105 | 
106 |       # Super useful for debugging why a SIGILL occurred.
107 |       - name: Dump target configuration and support
108 |         run: |
109 |           rustc -Vv
110 | 
111 |           echo "Caveat: not all target features are expected to be logged"
112 | 
113 |           echo "## Requested target configuration (RUSTFLAGS=$RUSTFLAGS)"
114 |           rustc --print=cfg --target=${{ matrix.target }} $RUSTFLAGS
115 | 
116 |           echo "## Supported target configuration for --target=${{ matrix.target }}"
117 |           rustc --print=cfg --target=${{ matrix.target }} -Ctarget-cpu=native
118 | 
119 |           echo "## Natively supported target configuration"
120 |           rustc --print=cfg -Ctarget-cpu=native
121 | 
122 |       - name: Test (debug)
123 |         run: cargo test --verbose --target=${{ matrix.target }}
124 | 
125 |       - name: Test (release)
126 |         run: cargo test --verbose --target=${{ matrix.target }} --release
127 | 
128 |       - name: Generate docs
129 |         run: cargo doc --verbose --target=${{ matrix.target }}
130 |         env:
131 |           RUSTDOCFLAGS: -Dwarnings
132 |     
133 |   macos-tests:
134 |     name: ${{ matrix.target }}
135 |     runs-on: macos-latest
136 |     strategy:
137 |       fail-fast: false
138 |       matrix:
139 |         target:
140 |           - aarch64-apple-darwin
141 |           - x86_64-apple-darwin
142 |     steps:
143 |       - uses: actions/checkout@v4
144 |       - name: Setup Rust
145 |         run: rustup target add ${{ matrix.target }}
146 | 
147 |       - name: Configure RUSTFLAGS
148 |         shell: bash
149 |         run: echo "RUSTFLAGS=-Dwarnings" >> $GITHUB_ENV
150 | 
151 |       - name: Test (debug)
152 |         run: cargo test --verbose --target=${{ matrix.target }}
153 | 
154 |       - name: Test (release)
155 |         run: cargo test --verbose --target=${{ matrix.target }} --release
156 | 
157 |       - name: Generate docs
158 |         run: cargo doc --verbose --target=${{ matrix.target }}
159 |         env:
160 |           RUSTDOCFLAGS: -Dwarnings
161 | 
162 |   wasm-tests:
163 |     name: "wasm (firefox, ${{ matrix.name }})"
164 |     runs-on: ubuntu-latest
165 |     strategy:
166 |       matrix:
167 |         include:
168 |           - { name: default, RUSTFLAGS: "" }
169 |           - { name: simd128, RUSTFLAGS: "-C target-feature=+simd128" }
170 |     steps:
171 |       - uses: actions/checkout@v4
172 |       - name: Install wasm-pack
173 |         run: curl https://rustwasm.github.io/wasm-pack/installer/init.sh -sSf | sh
174 |       - name: Test (debug)
175 |         run: wasm-pack test --firefox --headless crates/core_simd
176 |         env:
177 |             RUSTFLAGS: ${{ matrix.rustflags }}
178 |       - name: Test (release)
179 |         run: wasm-pack test --firefox --headless crates/core_simd --release
180 |         env:
181 |             RUSTFLAGS: ${{ matrix.rustflags }}
182 | 
183 |   cross-tests:
184 |     name: "${{ matrix.target_feature }} on ${{ matrix.target }} (via cross)"
185 |     runs-on: ubuntu-latest
186 |     env:
187 |       PROPTEST_CASES: 16
188 |     strategy:
189 |       fail-fast: false
190 | 
191 |       matrix:
192 |         target:
193 |           - armv7-unknown-linux-gnueabihf
194 |           - thumbv7neon-unknown-linux-gnueabihf # includes neon by default
195 |           - aarch64-unknown-linux-gnu           # includes neon by default
196 |           - powerpc-unknown-linux-gnu
197 |           - powerpc64le-unknown-linux-gnu       # includes altivec by default
198 |           - riscv64gc-unknown-linux-gnu
199 |           - loongarch64-unknown-linux-gnu
200 |           # MIPS uses a nonstandard binary representation for NaNs which makes it worth testing
201 |           # non-nightly since https://github.com/rust-lang/rust/pull/113274
202 |           # - mips-unknown-linux-gnu
203 |           # - mips64-unknown-linux-gnuabi64
204 |           # Lots of errors in QEMU and no real hardware to test on. Not clear if it's QEMU or bad codegen.
205 |           # - powerpc64-unknown-linux-gnu
206 |         target_feature: [default]
207 |         include:
208 |           - { target: powerpc64le-unknown-linux-gnu, target_feature: "+vsx" }
209 |           # Fails due to QEMU floating point errors, probably handling subnormals incorrectly.
210 |           # This target is somewhat redundant, since ppc64le has altivec as well.
211 |           # - { target: powerpc-unknown-linux-gnu, target_feature: "+altivec" }
212 |           # We should test this, but cross currently can't run it
213 |           # - { target: riscv64gc-unknown-linux-gnu, target_feature: "+v,+zvl128b" }
214 | 
215 |     steps:
216 |       - uses: actions/checkout@v4
217 |       - name: Setup Rust
218 |         run: rustup target add ${{ matrix.target }}
219 | 
220 |       - name: Install Cross
221 |         # Install the latest git version for newer targets.
222 |         run: |
223 |           cargo install cross --git https://github.com/cross-rs/cross --rev 4090beca3cfffa44371a5bba524de3a578aa46c3
224 | 
225 |       - name: Configure Emulated CPUs
226 |         run: |
227 |           echo "CARGO_TARGET_POWERPC_UNKNOWN_LINUX_GNU_RUNNER=qemu-ppc -cpu e600" >> $GITHUB_ENV
228 |           # echo "CARGO_TARGET_RISCV64GC_UNKNOWN_LINUX_GNU_RUNNER=qemu-riscv64 -cpu rv64,zba=true,zbb=true,v=true,vlen=256,vext_spec=v1.0" >> $GITHUB_ENV
229 | 
230 |       - name: Configure RUSTFLAGS
231 |         shell: bash
232 |         run: |
233 |           case "${{ matrix.target_feature }}" in
234 |             default)
235 |               echo "RUSTFLAGS=" >> $GITHUB_ENV;;
236 |             *)
237 |               echo "RUSTFLAGS=-Ctarget-feature=${{ matrix.target_feature }}" >> $GITHUB_ENV
238 |               ;;
239 |           esac
240 | 
241 |       - name: Test (debug)
242 |         run: cross test --verbose --target=${{ matrix.target }}
243 | 
244 |       - name: Test (release)
245 |         run: cross test --verbose --target=${{ matrix.target }} --release
246 | 
247 |   miri:
248 |     runs-on: ubuntu-latest
249 |     env:
250 |       PROPTEST_CASES: 16
251 |     steps:
252 |       - uses: actions/checkout@v4
253 |       - name: Test (Miri)
254 |         run: cargo miri test
255 | 


--------------------------------------------------------------------------------
/.github/workflows/doc.yml:
--------------------------------------------------------------------------------
 1 | name: Documentation
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - master
 7 | 
 8 | jobs:
 9 |   release:
10 |     name: Deploy Documentation
11 |     runs-on: ubuntu-latest
12 | 
13 |     steps:
14 |       - name: Checkout Repository
15 |         uses: actions/checkout@v4
16 | 
17 |       - name: Setup Rust
18 |         run: |
19 |           rustup update nightly --no-self-update
20 |           rustup default nightly
21 | 
22 |       - name: Build Documentation
23 |         run: cargo doc --no-deps
24 |       
25 |       - name: Deploy Documentation
26 |         uses: peaceiris/actions-gh-pages@v3
27 |         with:
28 |           github_token: ${{ secrets.GITHUB_TOKEN }}
29 |           publish_branch: gh-pages
30 |           publish_dir: ./target/doc
31 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | /target
2 | git-subtree.sh
3 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing to `std::simd`
 2 | 
 3 | Simple version:
 4 | 1. Fork it and `git clone` it
 5 | 2. Create your feature branch: `git checkout -b my-branch`
 6 | 3. Write your changes.
 7 | 4. Test it: `cargo test`. Remember to enable whatever SIMD features you intend to test by setting `RUSTFLAGS`.
 8 | 5. Commit your changes: `git commit add ./path/to/changes && git commit -m 'Fix some bug'`
 9 | 6. Push the branch: `git push --set-upstream origin my-branch`
10 | 7. Submit a pull request!
11 | 
12 | ## Taking on an Issue
13 | 
14 | SIMD can be quite complex, and even a "simple" issue can be huge. If an issue is organized like a tracking issue, with an itemized list of items that don't necessarily have to be done in a specific order, please take the issue one item at a time. This will help by letting work proceed apace on the rest of the issue. If it's a (relatively) small issue, feel free to announce your intention to solve it on the issue tracker and take it in one go!
15 | 
16 | ## CI
17 | 
18 | We currently use GitHub Actions which will automatically build and test your change in order to verify that `std::simd`'s portable API is, in fact, portable. If your change builds locally, but does not build in CI, this is likely due to a platform-specific concern that your code has not addressed. Please consult the build logs and address the error, or ask for help if you need it.
19 | 
20 | ## Beyond stdsimd
21 | 
22 | A large amount of the core SIMD implementation is found in the rustc_codegen_* crates in the [main rustc repo](https://github.com/rust-lang/rust). In addition, actual platform-specific functions are implemented in [stdarch]. Not all changes to `std::simd` require interacting with either of these, but if you're wondering where something is and it doesn't seem to be in this repository, those might be where to start looking.
23 | 
24 | ## Questions? Concerns? Need Help?
25 | 
26 | Please feel free to ask in the [#project-portable-simd][zulip-portable-simd] stream on the [rust-lang Zulip][zulip] for help with making changes to `std::simd`!
27 | If your changes include directly modifying the compiler, it might also be useful to ask in [#t-compiler/help][zulip-compiler-help].
28 | 
29 | [zulip-portable-simd]: https://rust-lang.zulipchat.com/#narrow/stream/257879-project-portable-simd
30 | [zulip-compiler-help]: https://rust-lang.zulipchat.com/#narrow/stream/182449-t-compiler.2Fhelp
31 | [zulip]: https://rust-lang.zulipchat.com
32 | [stdarch]: https://github.com/rust-lang/stdarch
33 | 


--------------------------------------------------------------------------------
/Cargo.lock:
--------------------------------------------------------------------------------
  1 | # This file is automatically @generated by Cargo.
  2 | # It is not intended for manual editing.
  3 | version = 3
  4 | 
  5 | [[package]]
  6 | name = "autocfg"
  7 | version = "1.1.0"
  8 | source = "registry+https://github.com/rust-lang/crates.io-index"
  9 | checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
 10 | 
 11 | [[package]]
 12 | name = "bitflags"
 13 | version = "1.3.2"
 14 | source = "registry+https://github.com/rust-lang/crates.io-index"
 15 | checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
 16 | 
 17 | [[package]]
 18 | name = "bumpalo"
 19 | version = "3.13.0"
 20 | source = "registry+https://github.com/rust-lang/crates.io-index"
 21 | checksum = "a3e2c3daef883ecc1b5d58c15adae93470a91d425f3532ba1695849656af3fc1"
 22 | 
 23 | [[package]]
 24 | name = "byteorder"
 25 | version = "1.4.3"
 26 | source = "registry+https://github.com/rust-lang/crates.io-index"
 27 | checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610"
 28 | 
 29 | [[package]]
 30 | name = "cfg-if"
 31 | version = "1.0.0"
 32 | source = "registry+https://github.com/rust-lang/crates.io-index"
 33 | checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
 34 | 
 35 | [[package]]
 36 | name = "console_error_panic_hook"
 37 | version = "0.1.7"
 38 | source = "registry+https://github.com/rust-lang/crates.io-index"
 39 | checksum = "a06aeb73f470f66dcdbf7223caeebb85984942f22f1adb2a088cf9668146bbbc"
 40 | dependencies = [
 41 |  "cfg-if",
 42 |  "wasm-bindgen",
 43 | ]
 44 | 
 45 | [[package]]
 46 | name = "core_simd"
 47 | version = "0.1.0"
 48 | dependencies = [
 49 |  "proptest",
 50 |  "std_float",
 51 |  "test_helpers",
 52 |  "wasm-bindgen",
 53 |  "wasm-bindgen-test",
 54 | ]
 55 | 
 56 | [[package]]
 57 | name = "js-sys"
 58 | version = "0.3.64"
 59 | source = "registry+https://github.com/rust-lang/crates.io-index"
 60 | checksum = "c5f195fe497f702db0f318b07fdd68edb16955aed830df8363d837542f8f935a"
 61 | dependencies = [
 62 |  "wasm-bindgen",
 63 | ]
 64 | 
 65 | [[package]]
 66 | name = "log"
 67 | version = "0.4.20"
 68 | source = "registry+https://github.com/rust-lang/crates.io-index"
 69 | checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f"
 70 | 
 71 | [[package]]
 72 | name = "num-traits"
 73 | version = "0.2.16"
 74 | source = "registry+https://github.com/rust-lang/crates.io-index"
 75 | checksum = "f30b0abd723be7e2ffca1272140fac1a2f084c77ec3e123c192b66af1ee9e6c2"
 76 | dependencies = [
 77 |  "autocfg",
 78 | ]
 79 | 
 80 | [[package]]
 81 | name = "once_cell"
 82 | version = "1.18.0"
 83 | source = "registry+https://github.com/rust-lang/crates.io-index"
 84 | checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d"
 85 | 
 86 | [[package]]
 87 | name = "ppv-lite86"
 88 | version = "0.2.17"
 89 | source = "registry+https://github.com/rust-lang/crates.io-index"
 90 | checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de"
 91 | 
 92 | [[package]]
 93 | name = "proc-macro2"
 94 | version = "1.0.66"
 95 | source = "registry+https://github.com/rust-lang/crates.io-index"
 96 | checksum = "18fb31db3f9bddb2ea821cde30a9f70117e3f119938b5ee630b7403aa6e2ead9"
 97 | dependencies = [
 98 |  "unicode-ident",
 99 | ]
100 | 
101 | [[package]]
102 | name = "proptest"
103 | version = "0.10.1"
104 | source = "registry+https://github.com/rust-lang/crates.io-index"
105 | checksum = "12e6c80c1139113c28ee4670dc50cc42915228b51f56a9e407f0ec60f966646f"
106 | dependencies = [
107 |  "bitflags",
108 |  "byteorder",
109 |  "num-traits",
110 |  "rand",
111 |  "rand_chacha",
112 |  "rand_xorshift",
113 | ]
114 | 
115 | [[package]]
116 | name = "quote"
117 | version = "1.0.33"
118 | source = "registry+https://github.com/rust-lang/crates.io-index"
119 | checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae"
120 | dependencies = [
121 |  "proc-macro2",
122 | ]
123 | 
124 | [[package]]
125 | name = "rand"
126 | version = "0.7.3"
127 | source = "registry+https://github.com/rust-lang/crates.io-index"
128 | checksum = "6a6b1679d49b24bbfe0c803429aa1874472f50d9b363131f0e89fc356b544d03"
129 | dependencies = [
130 |  "rand_chacha",
131 |  "rand_core",
132 |  "rand_hc",
133 | ]
134 | 
135 | [[package]]
136 | name = "rand_chacha"
137 | version = "0.2.2"
138 | source = "registry+https://github.com/rust-lang/crates.io-index"
139 | checksum = "f4c8ed856279c9737206bf725bf36935d8666ead7aa69b52be55af369d193402"
140 | dependencies = [
141 |  "ppv-lite86",
142 |  "rand_core",
143 | ]
144 | 
145 | [[package]]
146 | name = "rand_core"
147 | version = "0.5.1"
148 | source = "registry+https://github.com/rust-lang/crates.io-index"
149 | checksum = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19"
150 | 
151 | [[package]]
152 | name = "rand_hc"
153 | version = "0.2.0"
154 | source = "registry+https://github.com/rust-lang/crates.io-index"
155 | checksum = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c"
156 | dependencies = [
157 |  "rand_core",
158 | ]
159 | 
160 | [[package]]
161 | name = "rand_xorshift"
162 | version = "0.2.0"
163 | source = "registry+https://github.com/rust-lang/crates.io-index"
164 | checksum = "77d416b86801d23dde1aa643023b775c3a462efc0ed96443add11546cdf1dca8"
165 | dependencies = [
166 |  "rand_core",
167 | ]
168 | 
169 | [[package]]
170 | name = "scoped-tls"
171 | version = "1.0.1"
172 | source = "registry+https://github.com/rust-lang/crates.io-index"
173 | checksum = "e1cf6437eb19a8f4a6cc0f7dca544973b0b78843adbfeb3683d1a94a0024a294"
174 | 
175 | [[package]]
176 | name = "std_float"
177 | version = "0.1.0"
178 | dependencies = [
179 |  "core_simd",
180 |  "test_helpers",
181 |  "wasm-bindgen",
182 |  "wasm-bindgen-test",
183 | ]
184 | 
185 | [[package]]
186 | name = "syn"
187 | version = "2.0.29"
188 | source = "registry+https://github.com/rust-lang/crates.io-index"
189 | checksum = "c324c494eba9d92503e6f1ef2e6df781e78f6a7705a0202d9801b198807d518a"
190 | dependencies = [
191 |  "proc-macro2",
192 |  "quote",
193 |  "unicode-ident",
194 | ]
195 | 
196 | [[package]]
197 | name = "test_helpers"
198 | version = "0.1.0"
199 | dependencies = [
200 |  "proptest",
201 | ]
202 | 
203 | [[package]]
204 | name = "unicode-ident"
205 | version = "1.0.11"
206 | source = "registry+https://github.com/rust-lang/crates.io-index"
207 | checksum = "301abaae475aa91687eb82514b328ab47a211a533026cb25fc3e519b86adfc3c"
208 | 
209 | [[package]]
210 | name = "wasm-bindgen"
211 | version = "0.2.87"
212 | source = "registry+https://github.com/rust-lang/crates.io-index"
213 | checksum = "7706a72ab36d8cb1f80ffbf0e071533974a60d0a308d01a5d0375bf60499a342"
214 | dependencies = [
215 |  "cfg-if",
216 |  "wasm-bindgen-macro",
217 | ]
218 | 
219 | [[package]]
220 | name = "wasm-bindgen-backend"
221 | version = "0.2.87"
222 | source = "registry+https://github.com/rust-lang/crates.io-index"
223 | checksum = "5ef2b6d3c510e9625e5fe6f509ab07d66a760f0885d858736483c32ed7809abd"
224 | dependencies = [
225 |  "bumpalo",
226 |  "log",
227 |  "once_cell",
228 |  "proc-macro2",
229 |  "quote",
230 |  "syn",
231 |  "wasm-bindgen-shared",
232 | ]
233 | 
234 | [[package]]
235 | name = "wasm-bindgen-futures"
236 | version = "0.4.37"
237 | source = "registry+https://github.com/rust-lang/crates.io-index"
238 | checksum = "c02dbc21516f9f1f04f187958890d7e6026df8d16540b7ad9492bc34a67cea03"
239 | dependencies = [
240 |  "cfg-if",
241 |  "js-sys",
242 |  "wasm-bindgen",
243 |  "web-sys",
244 | ]
245 | 
246 | [[package]]
247 | name = "wasm-bindgen-macro"
248 | version = "0.2.87"
249 | source = "registry+https://github.com/rust-lang/crates.io-index"
250 | checksum = "dee495e55982a3bd48105a7b947fd2a9b4a8ae3010041b9e0faab3f9cd028f1d"
251 | dependencies = [
252 |  "quote",
253 |  "wasm-bindgen-macro-support",
254 | ]
255 | 
256 | [[package]]
257 | name = "wasm-bindgen-macro-support"
258 | version = "0.2.87"
259 | source = "registry+https://github.com/rust-lang/crates.io-index"
260 | checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b"
261 | dependencies = [
262 |  "proc-macro2",
263 |  "quote",
264 |  "syn",
265 |  "wasm-bindgen-backend",
266 |  "wasm-bindgen-shared",
267 | ]
268 | 
269 | [[package]]
270 | name = "wasm-bindgen-shared"
271 | version = "0.2.87"
272 | source = "registry+https://github.com/rust-lang/crates.io-index"
273 | checksum = "ca6ad05a4870b2bf5fe995117d3728437bd27d7cd5f06f13c17443ef369775a1"
274 | 
275 | [[package]]
276 | name = "wasm-bindgen-test"
277 | version = "0.3.37"
278 | source = "registry+https://github.com/rust-lang/crates.io-index"
279 | checksum = "6e6e302a7ea94f83a6d09e78e7dc7d9ca7b186bc2829c24a22d0753efd680671"
280 | dependencies = [
281 |  "console_error_panic_hook",
282 |  "js-sys",
283 |  "scoped-tls",
284 |  "wasm-bindgen",
285 |  "wasm-bindgen-futures",
286 |  "wasm-bindgen-test-macro",
287 | ]
288 | 
289 | [[package]]
290 | name = "wasm-bindgen-test-macro"
291 | version = "0.3.37"
292 | source = "registry+https://github.com/rust-lang/crates.io-index"
293 | checksum = "ecb993dd8c836930ed130e020e77d9b2e65dd0fbab1b67c790b0f5d80b11a575"
294 | dependencies = [
295 |  "proc-macro2",
296 |  "quote",
297 | ]
298 | 
299 | [[package]]
300 | name = "web-sys"
301 | version = "0.3.64"
302 | source = "registry+https://github.com/rust-lang/crates.io-index"
303 | checksum = "9b85cbef8c220a6abc02aefd892dfc0fc23afb1c6a426316ec33253a3877249b"
304 | dependencies = [
305 |  "js-sys",
306 |  "wasm-bindgen",
307 | ]
308 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [workspace]
 2 | resolver = "1"
 3 | members = [
 4 |     "crates/core_simd",
 5 |     "crates/std_float",
 6 |     "crates/test_helpers",
 7 | ]
 8 | 
 9 | [profile.test.package."*"]
10 | opt-level = 2
11 | 
12 | [profile.test.package.test_helpers]
13 | opt-level = 2
14 | 


--------------------------------------------------------------------------------
/Cross.toml:
--------------------------------------------------------------------------------
1 | [build.env]
2 | passthrough = ["PROPTEST_CASES"]
3 | 


--------------------------------------------------------------------------------
/LICENSE-MIT:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2020 The Rust Project Developers
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy
 4 | of this software and associated documentation files (the "Software"), to deal
 5 | in the Software without restriction, including without limitation the rights
 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 7 | copies of the Software, and to permit persons to whom the Software is
 8 | furnished to do so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in all
11 | copies or substantial portions of the Software.
12 | 
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19 | SOFTWARE.
20 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # The Rust standard library's portable SIMD API
 2 | ![Build Status](https://github.com/rust-lang/portable-simd/actions/workflows/ci.yml/badge.svg?branch=master)
 3 | 
 4 | Code repository for the [Portable SIMD Project Group](https://github.com/rust-lang/project-portable-simd).
 5 | Please refer to [CONTRIBUTING.md](./CONTRIBUTING.md) for our contributing guidelines.
 6 | 
 7 | The docs for this crate are published from the main branch.
 8 | You can [read them here][docs].
 9 | 
10 | If you have questions about SIMD, we have begun writing a [guide][simd-guide].
11 | We can also be found on [Zulip][zulip-project-portable-simd].
12 | 
13 | If you are interested in support for a specific architecture, you may want [stdarch] instead.
14 | 
15 | ## Hello World
16 | 
17 | Now we're gonna dip our toes into this world with a small SIMD "Hello, World!" example. Make sure your compiler is up to date and using `nightly`. We can do that by running 
18 | 
19 | ```bash
20 | rustup update -- nightly
21 | ```
22 | 
23 | or by setting up `rustup default nightly` or else with `cargo +nightly {build,test,run}`. After updating, run 
24 | ```bash
25 | cargo new hellosimd
26 | ```
27 | to create a new crate. Finally write this in `src/main.rs`:
28 | ```rust
29 | #![feature(portable_simd)]
30 | use std::simd::f32x4;
31 | fn main() {
32 |     let a = f32x4::splat(10.0);
33 |     let b = f32x4::from_array([1.0, 2.0, 3.0, 4.0]);
34 |     println!("{:?}", a + b);
35 | }
36 | ```
37 | 
38 | Explanation: We construct our SIMD vectors with methods like `splat` or `from_array`. Next, we can use operators like `+` on them, and the appropriate SIMD instructions will be carried out. When we run `cargo run` you should get `[11.0, 12.0, 13.0, 14.0]`.
39 | 
40 | ## Supported vectors
41 | 
42 | Currently, vectors may have up to 64 elements, but aliases are provided only up to 512-bit vectors.
43 | 
44 | Depending on the size of the primitive type, the number of lanes the vector will have varies. For example, 128-bit vectors have four `f32` lanes and two `f64` lanes.
45 | 
46 | The supported element types are as follows:
47 | * **Floating Point:** `f32`, `f64`
48 | * **Signed Integers:** `i8`, `i16`, `i32`, `i64`, `isize` (`i128` excluded)
49 | * **Unsigned Integers:** `u8`, `u16`, `u32`, `u64`, `usize` (`u128` excluded)
50 | * **Pointers:** `*const T` and `*mut T` (zero-sized metadata only)
51 | * **Masks:** 8-bit, 16-bit, 32-bit, 64-bit, and `usize`-sized masks
52 | 
53 | Floating point, signed integers, unsigned integers, and pointers are the [primitive types](https://doc.rust-lang.org/core/primitive/index.html) you're already used to.
54 | The mask types have elements that are "truthy" values, like `bool`, but have an unspecified layout because different architectures prefer different layouts for mask types.
55 | 
56 | [simd-guide]: ./beginners-guide.md
57 | [zulip-project-portable-simd]: https://rust-lang.zulipchat.com/#narrow/stream/257879-project-portable-simd
58 | [stdarch]: https://github.com/rust-lang/stdarch
59 | [docs]: https://rust-lang.github.io/portable-simd/core_simd
60 | 


--------------------------------------------------------------------------------
/crates/core_simd/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "core_simd"
 3 | version = "0.1.0"
 4 | edition = "2024"
 5 | homepage = "https://github.com/rust-lang/portable-simd"
 6 | repository = "https://github.com/rust-lang/portable-simd"
 7 | keywords = ["core", "simd", "intrinsics"]
 8 | categories = ["hardware-support", "no-std"]
 9 | license = "MIT OR Apache-2.0"
10 | 
11 | [features]
12 | default = ["as_crate", "std"]
13 | as_crate = []
14 | std = []
15 | 
16 | [target.'cfg(target_arch = "wasm32")'.dev-dependencies]
17 | wasm-bindgen = "0.2"
18 | wasm-bindgen-test = "0.3"
19 | 
20 | [dev-dependencies.proptest]
21 | version = "0.10"
22 | default-features = false
23 | features = ["alloc"]
24 | 
25 | [dev-dependencies.test_helpers]
26 | path = "../test_helpers"
27 | 
28 | [dev-dependencies]
29 | std_float = { path = "../std_float/", features = ["as_crate"] }
30 | 


--------------------------------------------------------------------------------
/crates/core_simd/LICENSE-MIT:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2020 The Rust Project Developers
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy
 4 | of this software and associated documentation files (the "Software"), to deal
 5 | in the Software without restriction, including without limitation the rights
 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 7 | copies of the Software, and to permit persons to whom the Software is
 8 | furnished to do so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in all
11 | copies or substantial portions of the Software.
12 | 
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19 | SOFTWARE.
20 | 


--------------------------------------------------------------------------------
/crates/core_simd/examples/README.md:
--------------------------------------------------------------------------------
 1 | ### `stdsimd` examples
 2 | 
 3 | This crate is a port of example uses of `stdsimd`, mostly taken from the `packed_simd` crate.
 4 | 
 5 | The examples contain, as in the case of `dot_product.rs`, multiple ways of solving the problem, in order to show idiomatic uses of SIMD and iteration of performance designs.
 6 | 
 7 | Run the tests with the command 
 8 | 
 9 | ```
10 | cargo run --example dot_product
11 | ```
12 | 
13 | and verify the code for `dot_product.rs` on your machine.
14 | 


--------------------------------------------------------------------------------
/crates/core_simd/examples/dot_product.rs:
--------------------------------------------------------------------------------
  1 | //! Code taken from the `packed_simd` crate.
  2 | //! Run this code with `cargo test --example dot_product`.
  3 | 
  4 | #![feature(array_chunks)]
  5 | #![feature(slice_as_chunks)]
  6 | // Add these imports to use the stdsimd library
  7 | #![feature(portable_simd)]
  8 | use core_simd::simd::prelude::*;
  9 | 
 10 | // This is your barebones dot product implementation:
 11 | // Take 2 vectors, multiply them element wise and *then*
 12 | // go along the resulting array and add up the result.
 13 | // In the next example we will see if there
 14 | //  is any difference to adding and multiplying in tandem.
 15 | pub fn dot_prod_scalar_0(a: &[f32], b: &[f32]) -> f32 {
 16 |     assert_eq!(a.len(), b.len());
 17 | 
 18 |     a.iter().zip(b.iter()).map(|(a, b)| a * b).sum()
 19 | }
 20 | 
 21 | // When dealing with SIMD, it is very important to think about the amount
 22 | // of data movement and when it happens. We're going over simple computation examples here, and yet
 23 | // it is not trivial to understand what may or may not contribute to performance
 24 | // changes. Eventually, you will need tools to inspect the generated assembly and confirm your
 25 | // hypothesis and benchmarks - we will mention them later on.
 26 | // With the use of `fold`, we're doing a multiplication,
 27 | // and then adding it to the sum, one element from both vectors at a time.
 28 | pub fn dot_prod_scalar_1(a: &[f32], b: &[f32]) -> f32 {
 29 |     assert_eq!(a.len(), b.len());
 30 |     a.iter()
 31 |         .zip(b.iter())
 32 |         .fold(0.0, |a, zipped| a + zipped.0 * zipped.1)
 33 | }
 34 | 
 35 | // We now move on to the SIMD implementations: notice the following constructs:
 36 | // `array_chunks::<4>`: mapping this over the vector will let use construct SIMD vectors
 37 | // `f32x4::from_array`: construct the SIMD vector from a slice
 38 | // `(a * b).reduce_sum()`: Multiply both f32x4 vectors together, and then reduce them.
 39 | // This approach essentially uses SIMD to produce a vector of length N/4 of all the products,
 40 | // and then add those with `sum()`. This is suboptimal.
 41 | // TODO: ASCII diagrams
 42 | pub fn dot_prod_simd_0(a: &[f32], b: &[f32]) -> f32 {
 43 |     assert_eq!(a.len(), b.len());
 44 |     // TODO handle remainder when a.len() % 4 != 0
 45 |     a.array_chunks::<4>()
 46 |         .map(|&a| f32x4::from_array(a))
 47 |         .zip(b.array_chunks::<4>().map(|&b| f32x4::from_array(b)))
 48 |         .map(|(a, b)| (a * b).reduce_sum())
 49 |         .sum()
 50 | }
 51 | 
 52 | // There's some simple ways to improve the previous code:
 53 | // 1. Make a `zero` `f32x4` SIMD vector that we will be accumulating into
 54 | // So that there is only one `sum()` reduction when the last `f32x4` has been processed
 55 | // 2. Exploit Fused Multiply Add so that the multiplication, addition and sinking into the reduciton
 56 | // happen in the same step.
 57 | // If the arrays are large, minimizing the data shuffling will lead to great perf.
 58 | // If the arrays are small, handling the remainder elements when the length isn't a multiple of 4
 59 | // Can become a problem.
 60 | pub fn dot_prod_simd_1(a: &[f32], b: &[f32]) -> f32 {
 61 |     assert_eq!(a.len(), b.len());
 62 |     // TODO handle remainder when a.len() % 4 != 0
 63 |     a.array_chunks::<4>()
 64 |         .map(|&a| f32x4::from_array(a))
 65 |         .zip(b.array_chunks::<4>().map(|&b| f32x4::from_array(b)))
 66 |         .fold(f32x4::splat(0.0), |acc, zipped| acc + zipped.0 * zipped.1)
 67 |         .reduce_sum()
 68 | }
 69 | 
 70 | // A lot of knowledgeable use of SIMD comes from knowing specific instructions that are
 71 | // available - let's try to use the `mul_add` instruction, which is the fused-multiply-add we were looking for.
 72 | use std_float::StdFloat;
 73 | pub fn dot_prod_simd_2(a: &[f32], b: &[f32]) -> f32 {
 74 |     assert_eq!(a.len(), b.len());
 75 |     // TODO handle remainder when a.len() % 4 != 0
 76 |     let mut res = f32x4::splat(0.0);
 77 |     a.array_chunks::<4>()
 78 |         .map(|&a| f32x4::from_array(a))
 79 |         .zip(b.array_chunks::<4>().map(|&b| f32x4::from_array(b)))
 80 |         .for_each(|(a, b)| {
 81 |             res = a.mul_add(b, res);
 82 |         });
 83 |     res.reduce_sum()
 84 | }
 85 | 
 86 | // Finally, we will write the same operation but handling the loop remainder.
 87 | const LANES: usize = 4;
 88 | pub fn dot_prod_simd_3(a: &[f32], b: &[f32]) -> f32 {
 89 |     assert_eq!(a.len(), b.len());
 90 | 
 91 |     let (a_extra, a_chunks) = a.as_rchunks();
 92 |     let (b_extra, b_chunks) = b.as_rchunks();
 93 | 
 94 |     // These are always true, but for emphasis:
 95 |     assert_eq!(a_chunks.len(), b_chunks.len());
 96 |     assert_eq!(a_extra.len(), b_extra.len());
 97 | 
 98 |     let mut sums = [0.0; LANES];
 99 |     for ((x, y), d) in std::iter::zip(a_extra, b_extra).zip(&mut sums) {
100 |         *d = x * y;
101 |     }
102 | 
103 |     let mut sums = f32x4::from_array(sums);
104 |     std::iter::zip(a_chunks, b_chunks).for_each(|(x, y)| {
105 |         sums += f32x4::from_array(*x) * f32x4::from_array(*y);
106 |     });
107 | 
108 |     sums.reduce_sum()
109 | }
110 | 
111 | // Finally, we present an iterator version for handling remainders in a scalar fashion at the end of the loop.
112 | // Unfortunately, this is allocating 1 `XMM` register on the order of `~len(a)` - we'll see how we can get around it in the
113 | // next example.
114 | pub fn dot_prod_simd_4(a: &[f32], b: &[f32]) -> f32 {
115 |     let mut sum = a
116 |         .array_chunks::<4>()
117 |         .map(|&a| f32x4::from_array(a))
118 |         .zip(b.array_chunks::<4>().map(|&b| f32x4::from_array(b)))
119 |         .map(|(a, b)| a * b)
120 |         .fold(f32x4::splat(0.0), std::ops::Add::add)
121 |         .reduce_sum();
122 |     let remain = a.len() - (a.len() % 4);
123 |     sum += a[remain..]
124 |         .iter()
125 |         .zip(&b[remain..])
126 |         .map(|(a, b)| a * b)
127 |         .sum::<f32>();
128 |     sum
129 | }
130 | 
131 | // This version allocates a single `XMM` register for accumulation, and the folds don't allocate on top of that.
132 | // Notice the use of `mul_add`, which can do a multiply and an add operation ber iteration.
133 | pub fn dot_prod_simd_5(a: &[f32], b: &[f32]) -> f32 {
134 |     a.array_chunks::<4>()
135 |         .map(|&a| f32x4::from_array(a))
136 |         .zip(b.array_chunks::<4>().map(|&b| f32x4::from_array(b)))
137 |         .fold(f32x4::splat(0.), |acc, (a, b)| a.mul_add(b, acc))
138 |         .reduce_sum()
139 | }
140 | 
141 | fn main() {
142 |     // Empty main to make cargo happy
143 | }
144 | 
145 | #[cfg(test)]
146 | mod tests {
147 |     #[test]
148 |     fn smoke_test() {
149 |         use super::*;
150 |         let a: Vec<f32> = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
151 |         let b: Vec<f32> = vec![-8.0, -7.0, -6.0, -5.0, 4.0, 3.0, 2.0, 1.0];
152 |         let x: Vec<f32> = [0.5; 1003].to_vec();
153 |         let y: Vec<f32> = [2.0; 1003].to_vec();
154 | 
155 |         // Basic check
156 |         assert_eq!(0.0, dot_prod_scalar_0(&a, &b));
157 |         assert_eq!(0.0, dot_prod_scalar_1(&a, &b));
158 |         assert_eq!(0.0, dot_prod_simd_0(&a, &b));
159 |         assert_eq!(0.0, dot_prod_simd_1(&a, &b));
160 |         assert_eq!(0.0, dot_prod_simd_2(&a, &b));
161 |         assert_eq!(0.0, dot_prod_simd_3(&a, &b));
162 |         assert_eq!(0.0, dot_prod_simd_4(&a, &b));
163 |         assert_eq!(0.0, dot_prod_simd_5(&a, &b));
164 | 
165 |         // We can handle vectors that are non-multiples of 4
166 |         assert_eq!(1003.0, dot_prod_simd_3(&x, &y));
167 |     }
168 | }
169 | 


--------------------------------------------------------------------------------
/crates/core_simd/examples/nbody.rs:
--------------------------------------------------------------------------------
  1 | #![feature(portable_simd)]
  2 | #![allow(clippy::excessive_precision)]
  3 | extern crate std_float;
  4 | 
  5 | /// Benchmarks game nbody code
  6 | /// Taken from the `packed_simd` crate
  7 | /// Run this benchmark with `cargo test --example nbody`
  8 | mod nbody {
  9 |     use core_simd::simd::prelude::*;
 10 |     #[allow(unused)] // False positive?
 11 |     use std_float::StdFloat;
 12 | 
 13 |     use std::f64::consts::PI;
 14 |     const SOLAR_MASS: f64 = 4.0 * PI * PI;
 15 |     const DAYS_PER_YEAR: f64 = 365.24;
 16 | 
 17 |     #[derive(Debug, Clone, Copy)]
 18 |     struct Body {
 19 |         pub x: f64x4,
 20 |         pub v: f64x4,
 21 |         pub mass: f64,
 22 |     }
 23 | 
 24 |     const N_BODIES: usize = 5;
 25 |     const BODIES: [Body; N_BODIES] = [
 26 |         // sun:
 27 |         Body {
 28 |             x: f64x4::from_array([0., 0., 0., 0.]),
 29 |             v: f64x4::from_array([0., 0., 0., 0.]),
 30 |             mass: SOLAR_MASS,
 31 |         },
 32 |         // jupiter:
 33 |         Body {
 34 |             x: f64x4::from_array([
 35 |                 4.84143144246472090e+00,
 36 |                 -1.16032004402742839e+00,
 37 |                 -1.03622044471123109e-01,
 38 |                 0.,
 39 |             ]),
 40 |             v: f64x4::from_array([
 41 |                 1.66007664274403694e-03 * DAYS_PER_YEAR,
 42 |                 7.69901118419740425e-03 * DAYS_PER_YEAR,
 43 |                 -6.90460016972063023e-05 * DAYS_PER_YEAR,
 44 |                 0.,
 45 |             ]),
 46 |             mass: 9.54791938424326609e-04 * SOLAR_MASS,
 47 |         },
 48 |         // saturn:
 49 |         Body {
 50 |             x: f64x4::from_array([
 51 |                 8.34336671824457987e+00,
 52 |                 4.12479856412430479e+00,
 53 |                 -4.03523417114321381e-01,
 54 |                 0.,
 55 |             ]),
 56 |             v: f64x4::from_array([
 57 |                 -2.76742510726862411e-03 * DAYS_PER_YEAR,
 58 |                 4.99852801234917238e-03 * DAYS_PER_YEAR,
 59 |                 2.30417297573763929e-05 * DAYS_PER_YEAR,
 60 |                 0.,
 61 |             ]),
 62 |             mass: 2.85885980666130812e-04 * SOLAR_MASS,
 63 |         },
 64 |         // uranus:
 65 |         Body {
 66 |             x: f64x4::from_array([
 67 |                 1.28943695621391310e+01,
 68 |                 -1.51111514016986312e+01,
 69 |                 -2.23307578892655734e-01,
 70 |                 0.,
 71 |             ]),
 72 |             v: f64x4::from_array([
 73 |                 2.96460137564761618e-03 * DAYS_PER_YEAR,
 74 |                 2.37847173959480950e-03 * DAYS_PER_YEAR,
 75 |                 -2.96589568540237556e-05 * DAYS_PER_YEAR,
 76 |                 0.,
 77 |             ]),
 78 |             mass: 4.36624404335156298e-05 * SOLAR_MASS,
 79 |         },
 80 |         // neptune:
 81 |         Body {
 82 |             x: f64x4::from_array([
 83 |                 1.53796971148509165e+01,
 84 |                 -2.59193146099879641e+01,
 85 |                 1.79258772950371181e-01,
 86 |                 0.,
 87 |             ]),
 88 |             v: f64x4::from_array([
 89 |                 2.68067772490389322e-03 * DAYS_PER_YEAR,
 90 |                 1.62824170038242295e-03 * DAYS_PER_YEAR,
 91 |                 -9.51592254519715870e-05 * DAYS_PER_YEAR,
 92 |                 0.,
 93 |             ]),
 94 |             mass: 5.15138902046611451e-05 * SOLAR_MASS,
 95 |         },
 96 |     ];
 97 | 
 98 |     fn offset_momentum(bodies: &mut [Body; N_BODIES]) {
 99 |         let (sun, rest) = bodies.split_at_mut(1);
100 |         let sun = &mut sun[0];
101 |         for body in rest {
102 |             let m_ratio = body.mass / SOLAR_MASS;
103 |             sun.v -= body.v * Simd::splat(m_ratio);
104 |         }
105 |     }
106 | 
107 |     fn energy(bodies: &[Body; N_BODIES]) -> f64 {
108 |         let mut e = 0.;
109 |         for i in 0..N_BODIES {
110 |             let bi = &bodies[i];
111 |             e += bi.mass * (bi.v * bi.v).reduce_sum() * 0.5;
112 |             for bj in bodies.iter().take(N_BODIES).skip(i + 1) {
113 |                 let dx = bi.x - bj.x;
114 |                 e -= bi.mass * bj.mass / (dx * dx).reduce_sum().sqrt()
115 |             }
116 |         }
117 |         e
118 |     }
119 | 
120 |     fn advance(bodies: &mut [Body; N_BODIES], dt: f64) {
121 |         const N: usize = N_BODIES * (N_BODIES - 1) / 2;
122 | 
123 |         // compute distance between bodies:
124 |         let mut r = [f64x4::splat(0.); N];
125 |         {
126 |             let mut i = 0;
127 |             for j in 0..N_BODIES {
128 |                 for k in j + 1..N_BODIES {
129 |                     r[i] = bodies[j].x - bodies[k].x;
130 |                     i += 1;
131 |                 }
132 |             }
133 |         }
134 | 
135 |         let mut mag = [0.0; N];
136 |         for i in (0..N).step_by(2) {
137 |             let d2s = f64x2::from_array([
138 |                 (r[i] * r[i]).reduce_sum(),
139 |                 (r[i + 1] * r[i + 1]).reduce_sum(),
140 |             ]);
141 |             let dmags = f64x2::splat(dt) / (d2s * d2s.sqrt());
142 |             mag[i] = dmags[0];
143 |             mag[i + 1] = dmags[1];
144 |         }
145 | 
146 |         let mut i = 0;
147 |         for j in 0..N_BODIES {
148 |             for k in j + 1..N_BODIES {
149 |                 let f = r[i] * Simd::splat(mag[i]);
150 |                 bodies[j].v -= f * Simd::splat(bodies[k].mass);
151 |                 bodies[k].v += f * Simd::splat(bodies[j].mass);
152 |                 i += 1
153 |             }
154 |         }
155 |         for body in bodies {
156 |             body.x += Simd::splat(dt) * body.v
157 |         }
158 |     }
159 | 
160 |     pub fn run(n: usize) -> (f64, f64) {
161 |         let mut bodies = BODIES;
162 |         offset_momentum(&mut bodies);
163 |         let energy_before = energy(&bodies);
164 |         for _ in 0..n {
165 |             advance(&mut bodies, 0.01);
166 |         }
167 |         let energy_after = energy(&bodies);
168 | 
169 |         (energy_before, energy_after)
170 |     }
171 | }
172 | 
173 | #[cfg(test)]
174 | mod tests {
175 |     // Good enough for demonstration purposes, not going for strictness here.
176 |     fn approx_eq_f64(a: f64, b: f64) -> bool {
177 |         (a - b).abs() < 0.00001
178 |     }
179 |     #[test]
180 |     fn test() {
181 |         const OUTPUT: [f64; 2] = [-0.169075164, -0.169087605];
182 |         let (energy_before, energy_after) = super::nbody::run(1000);
183 |         assert!(approx_eq_f64(energy_before, OUTPUT[0]));
184 |         assert!(approx_eq_f64(energy_after, OUTPUT[1]));
185 |     }
186 | }
187 | 
188 | fn main() {
189 |     {
190 |         let (energy_before, energy_after) = nbody::run(1000);
191 |         println!("Energy before: {energy_before}");
192 |         println!("Energy after:  {energy_after}");
193 |     }
194 | }
195 | 


--------------------------------------------------------------------------------
/crates/core_simd/examples/spectral_norm.rs:
--------------------------------------------------------------------------------
 1 | #![feature(portable_simd)]
 2 | 
 3 | use core_simd::simd::prelude::*;
 4 | 
 5 | fn a(i: usize, j: usize) -> f64 {
 6 |     ((i + j) * (i + j + 1) / 2 + i + 1) as f64
 7 | }
 8 | 
 9 | fn mult_av(v: &[f64], out: &mut [f64]) {
10 |     assert!(v.len() == out.len());
11 |     assert!(v.len() % 2 == 0);
12 | 
13 |     for (i, out) in out.iter_mut().enumerate() {
14 |         let mut sum = f64x2::splat(0.0);
15 | 
16 |         let mut j = 0;
17 |         while j < v.len() {
18 |             let b = f64x2::from_slice(&v[j..]);
19 |             let a = f64x2::from_array([a(i, j), a(i, j + 1)]);
20 |             sum += b / a;
21 |             j += 2
22 |         }
23 |         *out = sum.reduce_sum();
24 |     }
25 | }
26 | 
27 | fn mult_atv(v: &[f64], out: &mut [f64]) {
28 |     assert!(v.len() == out.len());
29 |     assert!(v.len() % 2 == 0);
30 | 
31 |     for (i, out) in out.iter_mut().enumerate() {
32 |         let mut sum = f64x2::splat(0.0);
33 | 
34 |         let mut j = 0;
35 |         while j < v.len() {
36 |             let b = f64x2::from_slice(&v[j..]);
37 |             let a = f64x2::from_array([a(j, i), a(j + 1, i)]);
38 |             sum += b / a;
39 |             j += 2
40 |         }
41 |         *out = sum.reduce_sum();
42 |     }
43 | }
44 | 
45 | fn mult_atav(v: &[f64], out: &mut [f64], tmp: &mut [f64]) {
46 |     mult_av(v, tmp);
47 |     mult_atv(tmp, out);
48 | }
49 | 
50 | pub fn spectral_norm(n: usize) -> f64 {
51 |     assert!(n % 2 == 0, "only even lengths are accepted");
52 | 
53 |     let mut u = vec![1.0; n];
54 |     let mut v = u.clone();
55 |     let mut tmp = u.clone();
56 | 
57 |     for _ in 0..10 {
58 |         mult_atav(&u, &mut v, &mut tmp);
59 |         mult_atav(&v, &mut u, &mut tmp);
60 |     }
61 |     (dot(&u, &v) / dot(&v, &v)).sqrt()
62 | }
63 | 
64 | fn dot(x: &[f64], y: &[f64]) -> f64 {
65 |     // This is auto-vectorized:
66 |     x.iter().zip(y).map(|(&x, &y)| x * y).sum()
67 | }
68 | 
69 | #[cfg(test)]
70 | #[test]
71 | fn test() {
72 |     assert_eq!(format!("{:.9}", spectral_norm(100)), "1.274219991");
73 | }
74 | 
75 | fn main() {
76 |     // Empty main to make cargo happy
77 | }
78 | 


--------------------------------------------------------------------------------
/crates/core_simd/src/alias.rs:
--------------------------------------------------------------------------------
  1 | macro_rules! number {
  2 |     { 1 } => { "one" };
  3 |     { 2 } => { "two" };
  4 |     { 4 } => { "four" };
  5 |     { 8 } => { "eight" };
  6 |     { $x:literal } => { stringify!($x) };
  7 | }
  8 | 
  9 | macro_rules! plural {
 10 |     { 1 } => { "" };
 11 |     { $x:literal } => { "s" };
 12 | }
 13 | 
 14 | macro_rules! alias {
 15 |     {
 16 |         $(
 17 |             $element_ty:ty = {
 18 |                 $($alias:ident $num_elements:tt)*
 19 |             }
 20 |         )*
 21 |     } => {
 22 |         $(
 23 |             $(
 24 |             #[doc = concat!("A SIMD vector with ", number!($num_elements), " element", plural!($num_elements), " of type [`", stringify!($element_ty), "`].")]
 25 |             #[allow(non_camel_case_types)]
 26 |             pub type $alias = $crate::simd::Simd<$element_ty, $num_elements>;
 27 |             )*
 28 |         )*
 29 |     }
 30 | }
 31 | 
 32 | macro_rules! mask_alias {
 33 |     {
 34 |         $(
 35 |             $element_ty:ty : $size:literal = {
 36 |                 $($alias:ident $num_elements:tt)*
 37 |             }
 38 |         )*
 39 |     } => {
 40 |         $(
 41 |             $(
 42 |             #[doc = concat!("A SIMD mask with ", number!($num_elements), " element", plural!($num_elements), " for vectors with ", $size, " element types.")]
 43 |             ///
 44 |             #[doc = concat!(
 45 |                 "The layout of this type is unspecified, and may change between platforms and/or Rust versions, and code should not assume that it is equivalent to `[",
 46 |                 stringify!($element_ty), "; ", $num_elements, "]`."
 47 |             )]
 48 |             #[allow(non_camel_case_types)]
 49 |             pub type $alias = $crate::simd::Mask<$element_ty, $num_elements>;
 50 |             )*
 51 |         )*
 52 |     }
 53 | }
 54 | 
 55 | alias! {
 56 |     i8 = {
 57 |         i8x1 1
 58 |         i8x2 2
 59 |         i8x4 4
 60 |         i8x8 8
 61 |         i8x16 16
 62 |         i8x32 32
 63 |         i8x64 64
 64 |     }
 65 | 
 66 |     i16 = {
 67 |         i16x1 1
 68 |         i16x2 2
 69 |         i16x4 4
 70 |         i16x8 8
 71 |         i16x16 16
 72 |         i16x32 32
 73 |         i16x64 64
 74 |     }
 75 | 
 76 |     i32 = {
 77 |         i32x1 1
 78 |         i32x2 2
 79 |         i32x4 4
 80 |         i32x8 8
 81 |         i32x16 16
 82 |         i32x32 32
 83 |         i32x64 64
 84 |     }
 85 | 
 86 |     i64 = {
 87 |         i64x1 1
 88 |         i64x2 2
 89 |         i64x4 4
 90 |         i64x8 8
 91 |         i64x16 16
 92 |         i64x32 32
 93 |         i64x64 64
 94 |     }
 95 | 
 96 |     isize = {
 97 |         isizex1 1
 98 |         isizex2 2
 99 |         isizex4 4
100 |         isizex8 8
101 |         isizex16 16
102 |         isizex32 32
103 |         isizex64 64
104 |     }
105 | 
106 |     u8 = {
107 |         u8x1 1
108 |         u8x2 2
109 |         u8x4 4
110 |         u8x8 8
111 |         u8x16 16
112 |         u8x32 32
113 |         u8x64 64
114 |     }
115 | 
116 |     u16 = {
117 |         u16x1 1
118 |         u16x2 2
119 |         u16x4 4
120 |         u16x8 8
121 |         u16x16 16
122 |         u16x32 32
123 |         u16x64 64
124 |     }
125 | 
126 |     u32 = {
127 |         u32x1 1
128 |         u32x2 2
129 |         u32x4 4
130 |         u32x8 8
131 |         u32x16 16
132 |         u32x32 32
133 |         u32x64 64
134 |     }
135 | 
136 |     u64 = {
137 |         u64x1 1
138 |         u64x2 2
139 |         u64x4 4
140 |         u64x8 8
141 |         u64x16 16
142 |         u64x32 32
143 |         u64x64 64
144 |     }
145 | 
146 |     usize = {
147 |         usizex1 1
148 |         usizex2 2
149 |         usizex4 4
150 |         usizex8 8
151 |         usizex16 16
152 |         usizex32 32
153 |         usizex64 64
154 |     }
155 | 
156 |     f32 = {
157 |         f32x1 1
158 |         f32x2 2
159 |         f32x4 4
160 |         f32x8 8
161 |         f32x16 16
162 |         f32x32 32
163 |         f32x64 64
164 |     }
165 | 
166 |     f64 = {
167 |         f64x1 1
168 |         f64x2 2
169 |         f64x4 4
170 |         f64x8 8
171 |         f64x16 16
172 |         f64x32 32
173 |         f64x64 64
174 |     }
175 | }
176 | 
177 | mask_alias! {
178 |     i8 : "8-bit" = {
179 |         mask8x1 1
180 |         mask8x2 2
181 |         mask8x4 4
182 |         mask8x8 8
183 |         mask8x16 16
184 |         mask8x32 32
185 |         mask8x64 64
186 |     }
187 | 
188 |     i16 : "16-bit" = {
189 |         mask16x1 1
190 |         mask16x2 2
191 |         mask16x4 4
192 |         mask16x8 8
193 |         mask16x16 16
194 |         mask16x32 32
195 |         mask16x64 64
196 |     }
197 | 
198 |     i32 : "32-bit" = {
199 |         mask32x1 1
200 |         mask32x2 2
201 |         mask32x4 4
202 |         mask32x8 8
203 |         mask32x16 16
204 |         mask32x32 32
205 |         mask32x64 64
206 |     }
207 | 
208 |     i64 : "64-bit" = {
209 |         mask64x1 1
210 |         mask64x2 2
211 |         mask64x4 4
212 |         mask64x8 8
213 |         mask64x16 16
214 |         mask64x32 32
215 |         mask64x64 64
216 |     }
217 | 
218 |     isize : "pointer-sized" = {
219 |         masksizex1 1
220 |         masksizex2 2
221 |         masksizex4 4
222 |         masksizex8 8
223 |         masksizex16 16
224 |         masksizex32 32
225 |         masksizex64 64
226 |     }
227 | }
228 | 


--------------------------------------------------------------------------------
/crates/core_simd/src/cast.rs:
--------------------------------------------------------------------------------
 1 | use crate::simd::SimdElement;
 2 | 
 3 | mod sealed {
 4 |     /// Cast vector elements to other types.
 5 |     ///
 6 |     /// # Safety
 7 |     /// Implementing this trait asserts that the type is a valid vector element for the `simd_cast`
 8 |     /// or `simd_as` intrinsics.
 9 |     pub unsafe trait Sealed {}
10 | }
11 | use sealed::Sealed;
12 | 
13 | /// Supporting trait for `Simd::cast`.  Typically doesn't need to be used directly.
14 | pub trait SimdCast: Sealed + SimdElement {}
15 | 
16 | // Safety: primitive number types can be cast to other primitive number types
17 | unsafe impl Sealed for i8 {}
18 | impl SimdCast for i8 {}
19 | // Safety: primitive number types can be cast to other primitive number types
20 | unsafe impl Sealed for i16 {}
21 | impl SimdCast for i16 {}
22 | // Safety: primitive number types can be cast to other primitive number types
23 | unsafe impl Sealed for i32 {}
24 | impl SimdCast for i32 {}
25 | // Safety: primitive number types can be cast to other primitive number types
26 | unsafe impl Sealed for i64 {}
27 | impl SimdCast for i64 {}
28 | // Safety: primitive number types can be cast to other primitive number types
29 | unsafe impl Sealed for isize {}
30 | impl SimdCast for isize {}
31 | // Safety: primitive number types can be cast to other primitive number types
32 | unsafe impl Sealed for u8 {}
33 | impl SimdCast for u8 {}
34 | // Safety: primitive number types can be cast to other primitive number types
35 | unsafe impl Sealed for u16 {}
36 | impl SimdCast for u16 {}
37 | // Safety: primitive number types can be cast to other primitive number types
38 | unsafe impl Sealed for u32 {}
39 | impl SimdCast for u32 {}
40 | // Safety: primitive number types can be cast to other primitive number types
41 | unsafe impl Sealed for u64 {}
42 | impl SimdCast for u64 {}
43 | // Safety: primitive number types can be cast to other primitive number types
44 | unsafe impl Sealed for usize {}
45 | impl SimdCast for usize {}
46 | // Safety: primitive number types can be cast to other primitive number types
47 | unsafe impl Sealed for f32 {}
48 | impl SimdCast for f32 {}
49 | // Safety: primitive number types can be cast to other primitive number types
50 | unsafe impl Sealed for f64 {}
51 | impl SimdCast for f64 {}
52 | 


--------------------------------------------------------------------------------
/crates/core_simd/src/core_simd_docs.md:
--------------------------------------------------------------------------------
 1 | Portable SIMD module.
 2 | 
 3 | This module offers a portable abstraction for SIMD operations
 4 | that is not bound to any particular hardware architecture.
 5 | 
 6 | # What is "portable"?
 7 | 
 8 | This module provides a SIMD implementation that is fast and predictable on any target.
 9 | 
10 | ### Portable SIMD works on every target
11 | 
12 | Unlike target-specific SIMD in `std::arch`, portable SIMD compiles for every target.
13 | In this regard, it is just like "regular" Rust.
14 | 
15 | ### Portable SIMD is consistent between targets
16 | 
17 | A program using portable SIMD can expect identical behavior on any target.
18 | In most regards, [`Simd<T, N>`] can be thought of as a parallelized `[T; N]` and operates like a sequence of `T`.
19 | 
20 | This has one notable exception: a handful of older architectures (e.g. `armv7` and `powerpc`) flush [subnormal](`f32::is_subnormal`) `f32` values to zero.
21 | On these architectures, subnormal `f32` input values are replaced with zeros, and any operation producing subnormal `f32` values produces zeros instead.
22 | This doesn't affect most architectures or programs.
23 | 
24 | ### Operations use the best instructions available
25 | 
26 | Operations provided by this module compile to the best available SIMD instructions.
27 | 
28 | Portable SIMD is not a low-level vendor library, and operations in portable SIMD _do not_ necessarily map to a single instruction.
29 | Instead, they map to a reasonable implementation of the operation for the target.
30 | 
31 | Consistency between targets is not compromised to use faster or fewer instructions.
32 | In some cases, `std::arch` will provide a faster function that has slightly different behavior than the `std::simd` equivalent.
33 | For example, `_mm_min_ps`[^1] can be slightly faster than [`SimdFloat::simd_min`](`num::SimdFloat::simd_min`), but does not conform to the IEEE standard also used by [`f32::min`].
34 | When necessary, [`Simd<T, N>`] can be converted to the types provided by `std::arch` to make use of target-specific functions.
35 | 
36 | Many targets simply don't have SIMD, or don't support SIMD for a particular element type.
37 | In those cases, regular scalar operations are generated instead.
38 | 
39 | [^1]: `_mm_min_ps(x, y)` is equivalent to `x.simd_lt(y).select(x, y)`
40 | 


--------------------------------------------------------------------------------
/crates/core_simd/src/fmt.rs:
--------------------------------------------------------------------------------
 1 | use crate::simd::{LaneCount, Simd, SimdElement, SupportedLaneCount};
 2 | use core::fmt;
 3 | 
 4 | impl<T, const N: usize> fmt::Debug for Simd<T, N>
 5 | where
 6 |     LaneCount<N>: SupportedLaneCount,
 7 |     T: SimdElement + fmt::Debug,
 8 | {
 9 |     /// A `Simd<T, N>` has a debug format like the one for `[T]`:
10 |     /// ```
11 |     /// # #![feature(portable_simd)]
12 |     /// # #[cfg(feature = "as_crate")] use core_simd::simd::Simd;
13 |     /// # #[cfg(not(feature = "as_crate"))] use core::simd::Simd;
14 |     /// let floats = Simd::<f32, 4>::splat(-1.0);
15 |     /// assert_eq!(format!("{:?}", [-1.0; 4]), format!("{:?}", floats));
16 |     /// ```
17 |     #[inline]
18 |     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
19 |         <[T] as fmt::Debug>::fmt(self.as_array(), f)
20 |     }
21 | }
22 | 


--------------------------------------------------------------------------------
/crates/core_simd/src/iter.rs:
--------------------------------------------------------------------------------
 1 | use crate::simd::{LaneCount, Simd, SupportedLaneCount};
 2 | use core::{
 3 |     iter::{Product, Sum},
 4 |     ops::{Add, Mul},
 5 | };
 6 | 
 7 | macro_rules! impl_traits {
 8 |     { $type:ty } => {
 9 |         impl<const N: usize> Sum<Self> for Simd<$type, N>
10 |         where
11 |             LaneCount<N>: SupportedLaneCount,
12 |         {
13 |             #[inline]
14 |             fn sum<I: Iterator<Item = Self>>(iter: I) -> Self {
15 |                 iter.fold(Simd::splat(0 as $type), Add::add)
16 |             }
17 |         }
18 | 
19 |         impl<const N: usize> Product<Self> for Simd<$type, N>
20 |         where
21 |             LaneCount<N>: SupportedLaneCount,
22 |         {
23 |             #[inline]
24 |             fn product<I: Iterator<Item = Self>>(iter: I) -> Self {
25 |                 iter.fold(Simd::splat(1 as $type), Mul::mul)
26 |             }
27 |         }
28 | 
29 |         impl<'a, const N: usize> Sum<&'a Self> for Simd<$type, N>
30 |         where
31 |             LaneCount<N>: SupportedLaneCount,
32 |         {
33 |             #[inline]
34 |             fn sum<I: Iterator<Item = &'a Self>>(iter: I) -> Self {
35 |                 iter.fold(Simd::splat(0 as $type), Add::add)
36 |             }
37 |         }
38 | 
39 |         impl<'a, const N: usize> Product<&'a Self> for Simd<$type, N>
40 |         where
41 |             LaneCount<N>: SupportedLaneCount,
42 |         {
43 |             #[inline]
44 |             fn product<I: Iterator<Item = &'a Self>>(iter: I) -> Self {
45 |                 iter.fold(Simd::splat(1 as $type), Mul::mul)
46 |             }
47 |         }
48 |     }
49 | }
50 | 
51 | impl_traits! { f32 }
52 | impl_traits! { f64 }
53 | impl_traits! { u8 }
54 | impl_traits! { u16 }
55 | impl_traits! { u32 }
56 | impl_traits! { u64 }
57 | impl_traits! { usize }
58 | impl_traits! { i8 }
59 | impl_traits! { i16 }
60 | impl_traits! { i32 }
61 | impl_traits! { i64 }
62 | impl_traits! { isize }
63 | 


--------------------------------------------------------------------------------
/crates/core_simd/src/lane_count.rs:
--------------------------------------------------------------------------------
 1 | mod sealed {
 2 |     pub trait Sealed {}
 3 | }
 4 | use sealed::Sealed;
 5 | 
 6 | /// Specifies the number of lanes in a SIMD vector as a type.
 7 | pub struct LaneCount<const N: usize>;
 8 | 
 9 | impl<const N: usize> LaneCount<N> {
10 |     /// The number of bytes in a bitmask with this many lanes.
11 |     pub const BITMASK_LEN: usize = (N + 7) / 8;
12 | }
13 | 
14 | /// Statically guarantees that a lane count is marked as supported.
15 | ///
16 | /// This trait is *sealed*: the list of implementors below is total.
17 | /// Users do not have the ability to mark additional `LaneCount<N>` values as supported.
18 | /// Only SIMD vectors with supported lane counts are constructable.
19 | pub trait SupportedLaneCount: Sealed {
20 |     #[doc(hidden)]
21 |     type BitMask: Copy + Default + AsRef<[u8]> + AsMut<[u8]>;
22 | }
23 | 
24 | impl<const N: usize> Sealed for LaneCount<N> {}
25 | 
26 | macro_rules! supported_lane_count {
27 |     ($($lanes:literal),+) => {
28 |         $(
29 |             impl SupportedLaneCount for LaneCount<$lanes> {
30 |                 type BitMask = [u8; ($lanes + 7) / 8];
31 |             }
32 |         )+
33 |     };
34 | }
35 | 
36 | supported_lane_count!(
37 |     1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
38 |     27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
39 |     51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64
40 | );
41 | 


--------------------------------------------------------------------------------
/crates/core_simd/src/lib.rs:
--------------------------------------------------------------------------------
 1 | #![no_std]
 2 | #![feature(
 3 |     const_eval_select,
 4 |     convert_float_to_int,
 5 |     core_intrinsics,
 6 |     decl_macro,
 7 |     intra_doc_pointers,
 8 |     repr_simd,
 9 |     simd_ffi,
10 |     staged_api,
11 |     prelude_import,
12 |     ptr_metadata
13 | )]
14 | #![cfg_attr(
15 |     all(
16 |         any(target_arch = "aarch64", target_arch = "arm64ec", target_arch = "arm",),
17 |         any(
18 |             all(target_feature = "v6", not(target_feature = "mclass")),
19 |             all(target_feature = "mclass", target_feature = "dsp"),
20 |         )
21 |     ),
22 |     feature(stdarch_arm_dsp)
23 | )]
24 | #![cfg_attr(
25 |     all(target_arch = "arm", target_feature = "v7"),
26 |     feature(stdarch_arm_neon_intrinsics)
27 | )]
28 | #![cfg_attr(target_arch = "loongarch64", feature(stdarch_loongarch))]
29 | #![cfg_attr(
30 |     any(target_arch = "powerpc", target_arch = "powerpc64"),
31 |     feature(stdarch_powerpc)
32 | )]
33 | #![cfg_attr(
34 |     all(target_arch = "x86_64", target_feature = "avx512f"),
35 |     feature(stdarch_x86_avx512)
36 | )]
37 | #![warn(missing_docs, clippy::missing_inline_in_public_items)] // basically all items, really
38 | #![deny(
39 |     unsafe_op_in_unsafe_fn,
40 |     unreachable_pub,
41 |     clippy::undocumented_unsafe_blocks
42 | )]
43 | #![doc(test(attr(deny(warnings))))]
44 | #![allow(internal_features)]
45 | #![unstable(feature = "portable_simd", issue = "86656")]
46 | //! Portable SIMD module.
47 | 
48 | #[path = "mod.rs"]
49 | mod core_simd;
50 | pub use self::core_simd::simd;
51 | 


--------------------------------------------------------------------------------
/crates/core_simd/src/masks/bitmask.rs:
--------------------------------------------------------------------------------
  1 | #![allow(unused_imports)]
  2 | use super::MaskElement;
  3 | use crate::simd::{LaneCount, Simd, SupportedLaneCount};
  4 | use core::marker::PhantomData;
  5 | 
  6 | /// A mask where each lane is represented by a single bit.
  7 | #[repr(transparent)]
  8 | pub(crate) struct Mask<T, const N: usize>(
  9 |     <LaneCount<N> as SupportedLaneCount>::BitMask,
 10 |     PhantomData<T>,
 11 | )
 12 | where
 13 |     T: MaskElement,
 14 |     LaneCount<N>: SupportedLaneCount;
 15 | 
 16 | impl<T, const N: usize> Copy for Mask<T, N>
 17 | where
 18 |     T: MaskElement,
 19 |     LaneCount<N>: SupportedLaneCount,
 20 | {
 21 | }
 22 | 
 23 | impl<T, const N: usize> Clone for Mask<T, N>
 24 | where
 25 |     T: MaskElement,
 26 |     LaneCount<N>: SupportedLaneCount,
 27 | {
 28 |     #[inline]
 29 |     fn clone(&self) -> Self {
 30 |         *self
 31 |     }
 32 | }
 33 | 
 34 | impl<T, const N: usize> PartialEq for Mask<T, N>
 35 | where
 36 |     T: MaskElement,
 37 |     LaneCount<N>: SupportedLaneCount,
 38 | {
 39 |     #[inline]
 40 |     fn eq(&self, other: &Self) -> bool {
 41 |         self.0.as_ref() == other.0.as_ref()
 42 |     }
 43 | }
 44 | 
 45 | impl<T, const N: usize> PartialOrd for Mask<T, N>
 46 | where
 47 |     T: MaskElement,
 48 |     LaneCount<N>: SupportedLaneCount,
 49 | {
 50 |     #[inline]
 51 |     fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
 52 |         self.0.as_ref().partial_cmp(other.0.as_ref())
 53 |     }
 54 | }
 55 | 
 56 | impl<T, const N: usize> Eq for Mask<T, N>
 57 | where
 58 |     T: MaskElement,
 59 |     LaneCount<N>: SupportedLaneCount,
 60 | {
 61 | }
 62 | 
 63 | impl<T, const N: usize> Ord for Mask<T, N>
 64 | where
 65 |     T: MaskElement,
 66 |     LaneCount<N>: SupportedLaneCount,
 67 | {
 68 |     #[inline]
 69 |     fn cmp(&self, other: &Self) -> core::cmp::Ordering {
 70 |         self.0.as_ref().cmp(other.0.as_ref())
 71 |     }
 72 | }
 73 | 
 74 | impl<T, const N: usize> Mask<T, N>
 75 | where
 76 |     T: MaskElement,
 77 |     LaneCount<N>: SupportedLaneCount,
 78 | {
 79 |     #[inline]
 80 |     #[must_use = "method returns a new mask and does not mutate the original value"]
 81 |     pub(crate) fn splat(value: bool) -> Self {
 82 |         let mut mask = <LaneCount<N> as SupportedLaneCount>::BitMask::default();
 83 |         if value {
 84 |             mask.as_mut().fill(u8::MAX)
 85 |         } else {
 86 |             mask.as_mut().fill(u8::MIN)
 87 |         }
 88 |         if N % 8 > 0 {
 89 |             *mask.as_mut().last_mut().unwrap() &= u8::MAX >> (8 - N % 8);
 90 |         }
 91 |         Self(mask, PhantomData)
 92 |     }
 93 | 
 94 |     #[inline]
 95 |     #[must_use = "method returns a new bool and does not mutate the original value"]
 96 |     pub(crate) unsafe fn test_unchecked(&self, lane: usize) -> bool {
 97 |         (self.0.as_ref()[lane / 8] >> (lane % 8)) & 0x1 > 0
 98 |     }
 99 | 
100 |     #[inline]
101 |     pub(crate) unsafe fn set_unchecked(&mut self, lane: usize, value: bool) {
102 |         unsafe {
103 |             self.0.as_mut()[lane / 8] ^= ((value ^ self.test_unchecked(lane)) as u8) << (lane % 8)
104 |         }
105 |     }
106 | 
107 |     #[inline]
108 |     #[must_use = "method returns a new vector and does not mutate the original value"]
109 |     pub(crate) fn to_int(self) -> Simd<T, N> {
110 |         unsafe {
111 |             core::intrinsics::simd::simd_select_bitmask(
112 |                 self.0,
113 |                 Simd::splat(T::TRUE),
114 |                 Simd::splat(T::FALSE),
115 |             )
116 |         }
117 |     }
118 | 
119 |     #[inline]
120 |     #[must_use = "method returns a new mask and does not mutate the original value"]
121 |     pub(crate) unsafe fn from_int_unchecked(value: Simd<T, N>) -> Self {
122 |         unsafe { Self(core::intrinsics::simd::simd_bitmask(value), PhantomData) }
123 |     }
124 | 
125 |     #[inline]
126 |     pub(crate) fn to_bitmask_integer(self) -> u64 {
127 |         let mut bitmask = [0u8; 8];
128 |         bitmask[..self.0.as_ref().len()].copy_from_slice(self.0.as_ref());
129 |         u64::from_ne_bytes(bitmask)
130 |     }
131 | 
132 |     #[inline]
133 |     pub(crate) fn from_bitmask_integer(bitmask: u64) -> Self {
134 |         let mut bytes = <LaneCount<N> as SupportedLaneCount>::BitMask::default();
135 |         let len = bytes.as_mut().len();
136 |         bytes
137 |             .as_mut()
138 |             .copy_from_slice(&bitmask.to_ne_bytes()[..len]);
139 |         Self(bytes, PhantomData)
140 |     }
141 | 
142 |     #[inline]
143 |     #[must_use = "method returns a new mask and does not mutate the original value"]
144 |     pub(crate) fn convert<U>(self) -> Mask<U, N>
145 |     where
146 |         U: MaskElement,
147 |     {
148 |         // Safety: bitmask layout does not depend on the element width
149 |         unsafe { core::mem::transmute_copy(&self) }
150 |     }
151 | 
152 |     #[inline]
153 |     #[must_use = "method returns a new bool and does not mutate the original value"]
154 |     pub(crate) fn any(self) -> bool {
155 |         self != Self::splat(false)
156 |     }
157 | 
158 |     #[inline]
159 |     #[must_use = "method returns a new bool and does not mutate the original value"]
160 |     pub(crate) fn all(self) -> bool {
161 |         self == Self::splat(true)
162 |     }
163 | }
164 | 
165 | impl<T, const N: usize> core::ops::BitAnd for Mask<T, N>
166 | where
167 |     T: MaskElement,
168 |     LaneCount<N>: SupportedLaneCount,
169 |     <LaneCount<N> as SupportedLaneCount>::BitMask: AsRef<[u8]> + AsMut<[u8]>,
170 | {
171 |     type Output = Self;
172 |     #[inline]
173 |     #[must_use = "method returns a new mask and does not mutate the original value"]
174 |     fn bitand(mut self, rhs: Self) -> Self {
175 |         for (l, r) in self.0.as_mut().iter_mut().zip(rhs.0.as_ref().iter()) {
176 |             *l &= r;
177 |         }
178 |         self
179 |     }
180 | }
181 | 
182 | impl<T, const N: usize> core::ops::BitOr for Mask<T, N>
183 | where
184 |     T: MaskElement,
185 |     LaneCount<N>: SupportedLaneCount,
186 |     <LaneCount<N> as SupportedLaneCount>::BitMask: AsRef<[u8]> + AsMut<[u8]>,
187 | {
188 |     type Output = Self;
189 |     #[inline]
190 |     #[must_use = "method returns a new mask and does not mutate the original value"]
191 |     fn bitor(mut self, rhs: Self) -> Self {
192 |         for (l, r) in self.0.as_mut().iter_mut().zip(rhs.0.as_ref().iter()) {
193 |             *l |= r;
194 |         }
195 |         self
196 |     }
197 | }
198 | 
199 | impl<T, const N: usize> core::ops::BitXor for Mask<T, N>
200 | where
201 |     T: MaskElement,
202 |     LaneCount<N>: SupportedLaneCount,
203 | {
204 |     type Output = Self;
205 |     #[inline]
206 |     #[must_use = "method returns a new mask and does not mutate the original value"]
207 |     fn bitxor(mut self, rhs: Self) -> Self::Output {
208 |         for (l, r) in self.0.as_mut().iter_mut().zip(rhs.0.as_ref().iter()) {
209 |             *l ^= r;
210 |         }
211 |         self
212 |     }
213 | }
214 | 
215 | impl<T, const N: usize> core::ops::Not for Mask<T, N>
216 | where
217 |     T: MaskElement,
218 |     LaneCount<N>: SupportedLaneCount,
219 | {
220 |     type Output = Self;
221 |     #[inline]
222 |     #[must_use = "method returns a new mask and does not mutate the original value"]
223 |     fn not(mut self) -> Self::Output {
224 |         for x in self.0.as_mut() {
225 |             *x = !*x;
226 |         }
227 |         if N % 8 > 0 {
228 |             *self.0.as_mut().last_mut().unwrap() &= u8::MAX >> (8 - N % 8);
229 |         }
230 |         self
231 |     }
232 | }
233 | 


--------------------------------------------------------------------------------
/crates/core_simd/src/masks/full_masks.rs:
--------------------------------------------------------------------------------
  1 | //! Masks that take up full SIMD vector registers.
  2 | 
  3 | use crate::simd::{LaneCount, MaskElement, Simd, SupportedLaneCount};
  4 | 
  5 | #[repr(transparent)]
  6 | pub(crate) struct Mask<T, const N: usize>(Simd<T, N>)
  7 | where
  8 |     T: MaskElement,
  9 |     LaneCount<N>: SupportedLaneCount;
 10 | 
 11 | impl<T, const N: usize> Copy for Mask<T, N>
 12 | where
 13 |     T: MaskElement,
 14 |     LaneCount<N>: SupportedLaneCount,
 15 | {
 16 | }
 17 | 
 18 | impl<T, const N: usize> Clone for Mask<T, N>
 19 | where
 20 |     T: MaskElement,
 21 |     LaneCount<N>: SupportedLaneCount,
 22 | {
 23 |     #[inline]
 24 |     fn clone(&self) -> Self {
 25 |         *self
 26 |     }
 27 | }
 28 | 
 29 | impl<T, const N: usize> PartialEq for Mask<T, N>
 30 | where
 31 |     T: MaskElement + PartialEq,
 32 |     LaneCount<N>: SupportedLaneCount,
 33 | {
 34 |     #[inline]
 35 |     fn eq(&self, other: &Self) -> bool {
 36 |         self.0.eq(&other.0)
 37 |     }
 38 | }
 39 | 
 40 | impl<T, const N: usize> PartialOrd for Mask<T, N>
 41 | where
 42 |     T: MaskElement + PartialOrd,
 43 |     LaneCount<N>: SupportedLaneCount,
 44 | {
 45 |     #[inline]
 46 |     fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
 47 |         self.0.partial_cmp(&other.0)
 48 |     }
 49 | }
 50 | 
 51 | impl<T, const N: usize> Eq for Mask<T, N>
 52 | where
 53 |     T: MaskElement + Eq,
 54 |     LaneCount<N>: SupportedLaneCount,
 55 | {
 56 | }
 57 | 
 58 | impl<T, const N: usize> Ord for Mask<T, N>
 59 | where
 60 |     T: MaskElement + Ord,
 61 |     LaneCount<N>: SupportedLaneCount,
 62 | {
 63 |     #[inline]
 64 |     fn cmp(&self, other: &Self) -> core::cmp::Ordering {
 65 |         self.0.cmp(&other.0)
 66 |     }
 67 | }
 68 | 
 69 | // Used for bitmask bit order workaround
 70 | pub(crate) trait ReverseBits {
 71 |     // Reverse the least significant `n` bits of `self`.
 72 |     // (Remaining bits must be 0.)
 73 |     fn reverse_bits(self, n: usize) -> Self;
 74 | }
 75 | 
 76 | macro_rules! impl_reverse_bits {
 77 |     { $($int:ty),* } => {
 78 |         $(
 79 |         impl ReverseBits for $int {
 80 |             #[inline(always)]
 81 |             fn reverse_bits(self, n: usize) -> Self {
 82 |                 let rev = <$int>::reverse_bits(self);
 83 |                 let bitsize = size_of::<$int>() * 8;
 84 |                 if n < bitsize {
 85 |                     // Shift things back to the right
 86 |                     rev >> (bitsize - n)
 87 |                 } else {
 88 |                     rev
 89 |                 }
 90 |             }
 91 |         }
 92 |         )*
 93 |     }
 94 | }
 95 | 
 96 | impl_reverse_bits! { u8, u16, u32, u64 }
 97 | 
 98 | impl<T, const N: usize> Mask<T, N>
 99 | where
100 |     T: MaskElement,
101 |     LaneCount<N>: SupportedLaneCount,
102 | {
103 |     #[inline]
104 |     #[must_use = "method returns a new mask and does not mutate the original value"]
105 |     pub(crate) fn splat(value: bool) -> Self {
106 |         Self(Simd::splat(if value { T::TRUE } else { T::FALSE }))
107 |     }
108 | 
109 |     #[inline]
110 |     #[must_use = "method returns a new bool and does not mutate the original value"]
111 |     pub(crate) unsafe fn test_unchecked(&self, lane: usize) -> bool {
112 |         T::eq(self.0[lane], T::TRUE)
113 |     }
114 | 
115 |     #[inline]
116 |     pub(crate) unsafe fn set_unchecked(&mut self, lane: usize, value: bool) {
117 |         self.0[lane] = if value { T::TRUE } else { T::FALSE }
118 |     }
119 | 
120 |     #[inline]
121 |     #[must_use = "method returns a new vector and does not mutate the original value"]
122 |     pub(crate) fn to_int(self) -> Simd<T, N> {
123 |         self.0
124 |     }
125 | 
126 |     #[inline]
127 |     #[must_use = "method returns a new mask and does not mutate the original value"]
128 |     pub(crate) unsafe fn from_int_unchecked(value: Simd<T, N>) -> Self {
129 |         Self(value)
130 |     }
131 | 
132 |     #[inline]
133 |     #[must_use = "method returns a new mask and does not mutate the original value"]
134 |     pub(crate) fn convert<U>(self) -> Mask<U, N>
135 |     where
136 |         U: MaskElement,
137 |     {
138 |         // Safety: masks are simply integer vectors of 0 and -1, and we can cast the element type.
139 |         unsafe { Mask(core::intrinsics::simd::simd_cast(self.0)) }
140 |     }
141 | 
142 |     #[inline]
143 |     unsafe fn to_bitmask_impl<U: ReverseBits, const M: usize>(self) -> U
144 |     where
145 |         LaneCount<M>: SupportedLaneCount,
146 |     {
147 |         let resized = self.to_int().resize::<M>(T::FALSE);
148 | 
149 |         // Safety: `resized` is an integer vector with length M, which must match T
150 |         let bitmask: U = unsafe { core::intrinsics::simd::simd_bitmask(resized) };
151 | 
152 |         // LLVM assumes bit order should match endianness
153 |         if cfg!(target_endian = "big") {
154 |             bitmask.reverse_bits(M)
155 |         } else {
156 |             bitmask
157 |         }
158 |     }
159 | 
160 |     #[inline]
161 |     unsafe fn from_bitmask_impl<U: ReverseBits, const M: usize>(bitmask: U) -> Self
162 |     where
163 |         LaneCount<M>: SupportedLaneCount,
164 |     {
165 |         // LLVM assumes bit order should match endianness
166 |         let bitmask = if cfg!(target_endian = "big") {
167 |             bitmask.reverse_bits(M)
168 |         } else {
169 |             bitmask
170 |         };
171 | 
172 |         // SAFETY: `mask` is the correct bitmask type for a u64 bitmask
173 |         let mask: Simd<T, M> = unsafe {
174 |             core::intrinsics::simd::simd_select_bitmask(
175 |                 bitmask,
176 |                 Simd::<T, M>::splat(T::TRUE),
177 |                 Simd::<T, M>::splat(T::FALSE),
178 |             )
179 |         };
180 | 
181 |         // SAFETY: `mask` only contains `T::TRUE` or `T::FALSE`
182 |         unsafe { Self::from_int_unchecked(mask.resize::<N>(T::FALSE)) }
183 |     }
184 | 
185 |     #[inline]
186 |     pub(crate) fn to_bitmask_integer(self) -> u64 {
187 |         // TODO modify simd_bitmask to zero-extend output, making this unnecessary
188 |         if N <= 8 {
189 |             // Safety: bitmask matches length
190 |             unsafe { self.to_bitmask_impl::<u8, 8>() as u64 }
191 |         } else if N <= 16 {
192 |             // Safety: bitmask matches length
193 |             unsafe { self.to_bitmask_impl::<u16, 16>() as u64 }
194 |         } else if N <= 32 {
195 |             // Safety: bitmask matches length
196 |             unsafe { self.to_bitmask_impl::<u32, 32>() as u64 }
197 |         } else {
198 |             // Safety: bitmask matches length
199 |             unsafe { self.to_bitmask_impl::<u64, 64>() }
200 |         }
201 |     }
202 | 
203 |     #[inline]
204 |     pub(crate) fn from_bitmask_integer(bitmask: u64) -> Self {
205 |         // TODO modify simd_bitmask_select to truncate input, making this unnecessary
206 |         if N <= 8 {
207 |             // Safety: bitmask matches length
208 |             unsafe { Self::from_bitmask_impl::<u8, 8>(bitmask as u8) }
209 |         } else if N <= 16 {
210 |             // Safety: bitmask matches length
211 |             unsafe { Self::from_bitmask_impl::<u16, 16>(bitmask as u16) }
212 |         } else if N <= 32 {
213 |             // Safety: bitmask matches length
214 |             unsafe { Self::from_bitmask_impl::<u32, 32>(bitmask as u32) }
215 |         } else {
216 |             // Safety: bitmask matches length
217 |             unsafe { Self::from_bitmask_impl::<u64, 64>(bitmask) }
218 |         }
219 |     }
220 | 
221 |     #[inline]
222 |     #[must_use = "method returns a new bool and does not mutate the original value"]
223 |     pub(crate) fn any(self) -> bool {
224 |         // Safety: use `self` as an integer vector
225 |         unsafe { core::intrinsics::simd::simd_reduce_any(self.to_int()) }
226 |     }
227 | 
228 |     #[inline]
229 |     #[must_use = "method returns a new bool and does not mutate the original value"]
230 |     pub(crate) fn all(self) -> bool {
231 |         // Safety: use `self` as an integer vector
232 |         unsafe { core::intrinsics::simd::simd_reduce_all(self.to_int()) }
233 |     }
234 | }
235 | 
236 | impl<T, const N: usize> From<Mask<T, N>> for Simd<T, N>
237 | where
238 |     T: MaskElement,
239 |     LaneCount<N>: SupportedLaneCount,
240 | {
241 |     #[inline]
242 |     fn from(value: Mask<T, N>) -> Self {
243 |         value.0
244 |     }
245 | }
246 | 
247 | impl<T, const N: usize> core::ops::BitAnd for Mask<T, N>
248 | where
249 |     T: MaskElement,
250 |     LaneCount<N>: SupportedLaneCount,
251 | {
252 |     type Output = Self;
253 |     #[inline]
254 |     fn bitand(self, rhs: Self) -> Self {
255 |         // Safety: `self` is an integer vector
256 |         unsafe { Self(core::intrinsics::simd::simd_and(self.0, rhs.0)) }
257 |     }
258 | }
259 | 
260 | impl<T, const N: usize> core::ops::BitOr for Mask<T, N>
261 | where
262 |     T: MaskElement,
263 |     LaneCount<N>: SupportedLaneCount,
264 | {
265 |     type Output = Self;
266 |     #[inline]
267 |     fn bitor(self, rhs: Self) -> Self {
268 |         // Safety: `self` is an integer vector
269 |         unsafe { Self(core::intrinsics::simd::simd_or(self.0, rhs.0)) }
270 |     }
271 | }
272 | 
273 | impl<T, const N: usize> core::ops::BitXor for Mask<T, N>
274 | where
275 |     T: MaskElement,
276 |     LaneCount<N>: SupportedLaneCount,
277 | {
278 |     type Output = Self;
279 |     #[inline]
280 |     fn bitxor(self, rhs: Self) -> Self {
281 |         // Safety: `self` is an integer vector
282 |         unsafe { Self(core::intrinsics::simd::simd_xor(self.0, rhs.0)) }
283 |     }
284 | }
285 | 
286 | impl<T, const N: usize> core::ops::Not for Mask<T, N>
287 | where
288 |     T: MaskElement,
289 |     LaneCount<N>: SupportedLaneCount,
290 | {
291 |     type Output = Self;
292 |     #[inline]
293 |     fn not(self) -> Self::Output {
294 |         Self::splat(true) ^ self
295 |     }
296 | }
297 | 


--------------------------------------------------------------------------------
/crates/core_simd/src/mod.rs:
--------------------------------------------------------------------------------
 1 | #[macro_use]
 2 | mod swizzle;
 3 | 
 4 | mod alias;
 5 | mod cast;
 6 | mod fmt;
 7 | mod iter;
 8 | mod lane_count;
 9 | mod masks;
10 | mod ops;
11 | mod select;
12 | mod swizzle_dyn;
13 | mod to_bytes;
14 | mod vector;
15 | mod vendor;
16 | 
17 | pub mod simd {
18 |     #![doc = include_str!("core_simd_docs.md")]
19 | 
20 |     pub mod prelude;
21 | 
22 |     pub mod num;
23 | 
24 |     pub mod ptr;
25 | 
26 |     pub mod cmp;
27 | 
28 |     pub use crate::core_simd::alias::*;
29 |     pub use crate::core_simd::cast::*;
30 |     pub use crate::core_simd::lane_count::{LaneCount, SupportedLaneCount};
31 |     pub use crate::core_simd::masks::*;
32 |     pub use crate::core_simd::swizzle::*;
33 |     pub use crate::core_simd::to_bytes::ToBytes;
34 |     pub use crate::core_simd::vector::*;
35 | }
36 | 


--------------------------------------------------------------------------------
/crates/core_simd/src/ops.rs:
--------------------------------------------------------------------------------
  1 | use crate::simd::{LaneCount, Simd, SimdElement, SupportedLaneCount, cmp::SimdPartialEq};
  2 | use core::ops::{Add, Mul};
  3 | use core::ops::{BitAnd, BitOr, BitXor};
  4 | use core::ops::{Div, Rem, Sub};
  5 | use core::ops::{Shl, Shr};
  6 | 
  7 | mod assign;
  8 | mod deref;
  9 | mod shift_scalar;
 10 | mod unary;
 11 | 
 12 | impl<I, T, const N: usize> core::ops::Index<I> for Simd<T, N>
 13 | where
 14 |     T: SimdElement,
 15 |     LaneCount<N>: SupportedLaneCount,
 16 |     I: core::slice::SliceIndex<[T]>,
 17 | {
 18 |     type Output = I::Output;
 19 |     #[inline]
 20 |     fn index(&self, index: I) -> &Self::Output {
 21 |         &self.as_array()[index]
 22 |     }
 23 | }
 24 | 
 25 | impl<I, T, const N: usize> core::ops::IndexMut<I> for Simd<T, N>
 26 | where
 27 |     T: SimdElement,
 28 |     LaneCount<N>: SupportedLaneCount,
 29 |     I: core::slice::SliceIndex<[T]>,
 30 | {
 31 |     #[inline]
 32 |     fn index_mut(&mut self, index: I) -> &mut Self::Output {
 33 |         &mut self.as_mut_array()[index]
 34 |     }
 35 | }
 36 | 
 37 | macro_rules! unsafe_base {
 38 |     ($lhs:ident, $rhs:ident, {$simd_call:ident}, $($_:tt)*) => {
 39 |         // Safety: $lhs and $rhs are vectors
 40 |         unsafe { core::intrinsics::simd::$simd_call($lhs, $rhs) }
 41 |     };
 42 | }
 43 | 
 44 | /// SAFETY: This macro should not be used for anything except Shl or Shr, and passed the appropriate shift intrinsic.
 45 | /// It handles performing a bitand in addition to calling the shift operator, so that the result
 46 | /// is well-defined: LLVM can return a poison value if you shl, lshr, or ashr if `rhs >= <Int>::BITS`
 47 | /// At worst, this will maybe add another instruction and cycle,
 48 | /// at best, it may open up more optimization opportunities,
 49 | /// or simply be elided entirely, especially for SIMD ISAs which default to this.
 50 | ///
 51 | // FIXME: Consider implementing this in cg_llvm instead?
 52 | // cg_clif defaults to this, and scalar MIR shifts also default to wrapping
 53 | macro_rules! wrap_bitshift {
 54 |     ($lhs:ident, $rhs:ident, {$simd_call:ident}, $int:ident) => {
 55 |         #[allow(clippy::suspicious_arithmetic_impl)]
 56 |         // Safety: $lhs and the bitand result are vectors
 57 |         unsafe {
 58 |             core::intrinsics::simd::$simd_call(
 59 |                 $lhs,
 60 |                 $rhs.bitand(Simd::splat(<$int>::BITS as $int - 1)),
 61 |             )
 62 |         }
 63 |     };
 64 | }
 65 | 
 66 | /// SAFETY: This macro must only be used to impl Div or Rem and given the matching intrinsic.
 67 | /// It guards against LLVM's UB conditions for integer div or rem using masks and selects,
 68 | /// thus guaranteeing a Rust value returns instead.
 69 | ///
 70 | /// |                  | LLVM | Rust
 71 | /// | :--------------: | :--- | :----------
 72 | /// | N {/,%} 0        | UB   | panic!()
 73 | /// | <$int>::MIN / -1 | UB   | <$int>::MIN
 74 | /// | <$int>::MIN % -1 | UB   | 0
 75 | ///
 76 | macro_rules! int_divrem_guard {
 77 |     (   $lhs:ident,
 78 |         $rhs:ident,
 79 |         {   const PANIC_ZERO: &'static str = $zero:literal;
 80 |             $simd_call:ident, $op:tt
 81 |         },
 82 |         $int:ident ) => {
 83 |         if $rhs.simd_eq(Simd::splat(0 as _)).any() {
 84 |             panic!($zero);
 85 |         } else {
 86 |             // Prevent otherwise-UB overflow on the MIN / -1 case.
 87 |             let rhs = if <$int>::MIN != 0 {
 88 |                 // This should, at worst, optimize to a few branchless logical ops
 89 |                 // Ideally, this entire conditional should evaporate
 90 |                 // Fire LLVM and implement those manually if it doesn't get the hint
 91 |                 ($lhs.simd_eq(Simd::splat(<$int>::MIN))
 92 |                 // type inference can break here, so cut an SInt to size
 93 |                 & $rhs.simd_eq(Simd::splat(-1i64 as _)))
 94 |                 .select(Simd::splat(1 as _), $rhs)
 95 |             } else {
 96 |                 // Nice base case to make it easy to const-fold away the other branch.
 97 |                 $rhs
 98 |             };
 99 | 
100 |             // aarch64 div fails for arbitrary `v % 0`, mod fails when rhs is MIN, for non-powers-of-two
101 |             // these operations aren't vectorized on aarch64 anyway
102 |             #[cfg(target_arch = "aarch64")]
103 |             {
104 |                 let mut out = Simd::splat(0 as _);
105 |                 for i in 0..Self::LEN {
106 |                     out[i] = $lhs[i] $op rhs[i];
107 |                 }
108 |                 out
109 |             }
110 | 
111 |             #[cfg(not(target_arch = "aarch64"))]
112 |             {
113 |                 // Safety: $lhs and rhs are vectors
114 |                 unsafe { core::intrinsics::simd::$simd_call($lhs, rhs) }
115 |             }
116 |         }
117 |     };
118 | }
119 | 
120 | macro_rules! for_base_types {
121 |     (   T = ($($scalar:ident),*);
122 |         type Lhs = Simd<T, N>;
123 |         type Rhs = Simd<T, N>;
124 |         type Output = $out:ty;
125 | 
126 |         impl $op:ident::$call:ident {
127 |             $macro_impl:ident $inner:tt
128 |         }) => {
129 |             $(
130 |                 impl<const N: usize> $op<Self> for Simd<$scalar, N>
131 |                 where
132 |                     $scalar: SimdElement,
133 |                     LaneCount<N>: SupportedLaneCount,
134 |                 {
135 |                     type Output = $out;
136 | 
137 |                     #[inline]
138 |                     // TODO: only useful for int Div::div, but we hope that this
139 |                     // will essentially always get inlined anyway.
140 |                     #[track_caller]
141 |                     fn $call(self, rhs: Self) -> Self::Output {
142 |                         $macro_impl!(self, rhs, $inner, $scalar)
143 |                     }
144 |                 }
145 |             )*
146 |     }
147 | }
148 | 
149 | // A "TokenTree muncher": takes a set of scalar types `T = {};`
150 | // type parameters for the ops it implements, `Op::fn` names,
151 | // and a macro that expands into an expr, substituting in an intrinsic.
152 | // It passes that to for_base_types, which expands an impl for the types,
153 | // using the expanded expr in the function, and recurses with itself.
154 | //
155 | // tl;dr impls a set of ops::{Traits} for a set of types
156 | macro_rules! for_base_ops {
157 |     (
158 |         T = $types:tt;
159 |         type Lhs = Simd<T, N>;
160 |         type Rhs = Simd<T, N>;
161 |         type Output = $out:ident;
162 |         impl $op:ident::$call:ident
163 |             $inner:tt
164 |         $($rest:tt)*
165 |     ) => {
166 |         for_base_types! {
167 |             T = $types;
168 |             type Lhs = Simd<T, N>;
169 |             type Rhs = Simd<T, N>;
170 |             type Output = $out;
171 |             impl $op::$call
172 |                 $inner
173 |         }
174 |         for_base_ops! {
175 |             T = $types;
176 |             type Lhs = Simd<T, N>;
177 |             type Rhs = Simd<T, N>;
178 |             type Output = $out;
179 |             $($rest)*
180 |         }
181 |     };
182 |     ($($done:tt)*) => {
183 |         // Done.
184 |     }
185 | }
186 | 
187 | // Integers can always accept add, mul, sub, bitand, bitor, and bitxor.
188 | // For all of these operations, simd_* intrinsics apply wrapping logic.
189 | for_base_ops! {
190 |     T = (i8, i16, i32, i64, isize, u8, u16, u32, u64, usize);
191 |     type Lhs = Simd<T, N>;
192 |     type Rhs = Simd<T, N>;
193 |     type Output = Self;
194 | 
195 |     impl Add::add {
196 |         unsafe_base { simd_add }
197 |     }
198 | 
199 |     impl Mul::mul {
200 |         unsafe_base { simd_mul }
201 |     }
202 | 
203 |     impl Sub::sub {
204 |         unsafe_base { simd_sub }
205 |     }
206 | 
207 |     impl BitAnd::bitand {
208 |         unsafe_base { simd_and }
209 |     }
210 | 
211 |     impl BitOr::bitor {
212 |         unsafe_base { simd_or }
213 |     }
214 | 
215 |     impl BitXor::bitxor {
216 |         unsafe_base { simd_xor }
217 |     }
218 | 
219 |     impl Div::div {
220 |         int_divrem_guard {
221 |             const PANIC_ZERO: &'static str = "attempt to divide by zero";
222 |             simd_div, /
223 |         }
224 |     }
225 | 
226 |     impl Rem::rem {
227 |         int_divrem_guard {
228 |             const PANIC_ZERO: &'static str = "attempt to calculate the remainder with a divisor of zero";
229 |             simd_rem, %
230 |         }
231 |     }
232 | 
233 |     // The only question is how to handle shifts >= <Int>::BITS?
234 |     // Our current solution uses wrapping logic.
235 |     impl Shl::shl {
236 |         wrap_bitshift { simd_shl }
237 |     }
238 | 
239 |     impl Shr::shr {
240 |         wrap_bitshift {
241 |             // This automatically monomorphizes to lshr or ashr, depending,
242 |             // so it's fine to use it for both UInts and SInts.
243 |             simd_shr
244 |         }
245 |     }
246 | }
247 | 
248 | // We don't need any special precautions here:
249 | // Floats always accept arithmetic ops, but may become NaN.
250 | for_base_ops! {
251 |     T = (f32, f64);
252 |     type Lhs = Simd<T, N>;
253 |     type Rhs = Simd<T, N>;
254 |     type Output = Self;
255 | 
256 |     impl Add::add {
257 |         unsafe_base { simd_add }
258 |     }
259 | 
260 |     impl Mul::mul {
261 |         unsafe_base { simd_mul }
262 |     }
263 | 
264 |     impl Sub::sub {
265 |         unsafe_base { simd_sub }
266 |     }
267 | 
268 |     impl Div::div {
269 |         unsafe_base { simd_div }
270 |     }
271 | 
272 |     impl Rem::rem {
273 |         unsafe_base { simd_rem }
274 |     }
275 | }
276 | 


--------------------------------------------------------------------------------
/crates/core_simd/src/ops/assign.rs:
--------------------------------------------------------------------------------
  1 | //! Assignment operators
  2 | 
  3 | use super::*;
  4 | use core::ops::{AddAssign, MulAssign}; // commutative binary op-assignment
  5 | use core::ops::{BitAndAssign, BitOrAssign, BitXorAssign}; // commutative bit binary op-assignment
  6 | use core::ops::{DivAssign, RemAssign, SubAssign}; // non-commutative binary op-assignment
  7 | use core::ops::{ShlAssign, ShrAssign}; // non-commutative bit binary op-assignment
  8 | 
  9 | // Arithmetic
 10 | 
 11 | macro_rules! assign_ops {
 12 |     ($(impl<T, U, const N: usize> $assignTrait:ident<U> for Simd<T, N>
 13 |         where
 14 |             Self: $trait:ident,
 15 |         {
 16 |             fn $assign_call:ident(rhs: U) {
 17 |                 $call:ident
 18 |             }
 19 |         })*) => {
 20 |         $(impl<T, U, const N: usize> $assignTrait<U> for Simd<T, N>
 21 |         where
 22 |             Self: $trait<U, Output = Self>,
 23 |             T: SimdElement,
 24 |             LaneCount<N>: SupportedLaneCount,
 25 |         {
 26 |             #[inline]
 27 |             fn $assign_call(&mut self, rhs: U) {
 28 |                 *self = self.$call(rhs);
 29 |             }
 30 |         })*
 31 |     }
 32 | }
 33 | 
 34 | assign_ops! {
 35 |     // Arithmetic
 36 |     impl<T, U, const N: usize> AddAssign<U> for Simd<T, N>
 37 |     where
 38 |         Self: Add,
 39 |     {
 40 |         fn add_assign(rhs: U) {
 41 |             add
 42 |         }
 43 |     }
 44 | 
 45 |     impl<T, U, const N: usize> MulAssign<U> for Simd<T, N>
 46 |     where
 47 |         Self: Mul,
 48 |     {
 49 |         fn mul_assign(rhs: U) {
 50 |             mul
 51 |         }
 52 |     }
 53 | 
 54 |     impl<T, U, const N: usize> SubAssign<U> for Simd<T, N>
 55 |     where
 56 |         Self: Sub,
 57 |     {
 58 |         fn sub_assign(rhs: U) {
 59 |             sub
 60 |         }
 61 |     }
 62 | 
 63 |     impl<T, U, const N: usize> DivAssign<U> for Simd<T, N>
 64 |     where
 65 |         Self: Div,
 66 |     {
 67 |         fn div_assign(rhs: U) {
 68 |             div
 69 |         }
 70 |     }
 71 |     impl<T, U, const N: usize> RemAssign<U> for Simd<T, N>
 72 |     where
 73 |         Self: Rem,
 74 |     {
 75 |         fn rem_assign(rhs: U) {
 76 |             rem
 77 |         }
 78 |     }
 79 | 
 80 |     // Bitops
 81 |     impl<T, U, const N: usize> BitAndAssign<U> for Simd<T, N>
 82 |     where
 83 |         Self: BitAnd,
 84 |     {
 85 |         fn bitand_assign(rhs: U) {
 86 |             bitand
 87 |         }
 88 |     }
 89 | 
 90 |     impl<T, U, const N: usize> BitOrAssign<U> for Simd<T, N>
 91 |     where
 92 |         Self: BitOr,
 93 |     {
 94 |         fn bitor_assign(rhs: U) {
 95 |             bitor
 96 |         }
 97 |     }
 98 | 
 99 |     impl<T, U, const N: usize> BitXorAssign<U> for Simd<T, N>
100 |     where
101 |         Self: BitXor,
102 |     {
103 |         fn bitxor_assign(rhs: U) {
104 |             bitxor
105 |         }
106 |     }
107 | 
108 |     impl<T, U, const N: usize> ShlAssign<U> for Simd<T, N>
109 |     where
110 |         Self: Shl,
111 |     {
112 |         fn shl_assign(rhs: U) {
113 |             shl
114 |         }
115 |     }
116 | 
117 |     impl<T, U, const N: usize> ShrAssign<U> for Simd<T, N>
118 |     where
119 |         Self: Shr,
120 |     {
121 |         fn shr_assign(rhs: U) {
122 |             shr
123 |         }
124 |     }
125 | }
126 | 


--------------------------------------------------------------------------------
/crates/core_simd/src/ops/deref.rs:
--------------------------------------------------------------------------------
  1 | //! This module hacks in "implicit deref" for Simd's operators.
  2 | //! Ideally, Rust would take care of this itself,
  3 | //! and method calls usually handle the LHS implicitly.
  4 | //! But this is not the case with arithmetic ops.
  5 | 
  6 | use super::*;
  7 | 
  8 | macro_rules! deref_lhs {
  9 |     (impl<T, const N: usize> $trait:ident for $simd:ty {
 10 |             fn $call:ident
 11 |         }) => {
 12 |         impl<T, const N: usize> $trait<$simd> for &$simd
 13 |         where
 14 |             T: SimdElement,
 15 |             $simd: $trait<$simd, Output = $simd>,
 16 |             LaneCount<N>: SupportedLaneCount,
 17 |         {
 18 |             type Output = Simd<T, N>;
 19 | 
 20 |             #[inline]
 21 |             fn $call(self, rhs: $simd) -> Self::Output {
 22 |                 (*self).$call(rhs)
 23 |             }
 24 |         }
 25 |     };
 26 | }
 27 | 
 28 | macro_rules! deref_rhs {
 29 |     (impl<T, const N: usize> $trait:ident for $simd:ty {
 30 |             fn $call:ident
 31 |         }) => {
 32 |         impl<T, const N: usize> $trait<&$simd> for $simd
 33 |         where
 34 |             T: SimdElement,
 35 |             $simd: $trait<$simd, Output = $simd>,
 36 |             LaneCount<N>: SupportedLaneCount,
 37 |         {
 38 |             type Output = Simd<T, N>;
 39 | 
 40 |             #[inline]
 41 |             fn $call(self, rhs: &$simd) -> Self::Output {
 42 |                 self.$call(*rhs)
 43 |             }
 44 |         }
 45 |     };
 46 | }
 47 | 
 48 | macro_rules! deref_ops {
 49 |     ($(impl<T, const N: usize> $trait:ident for $simd:ty {
 50 |             fn $call:ident
 51 |         })*) => {
 52 |         $(
 53 |             deref_rhs! {
 54 |                 impl<T, const N: usize> $trait for $simd {
 55 |                     fn $call
 56 |                 }
 57 |             }
 58 |             deref_lhs! {
 59 |                 impl<T, const N: usize> $trait for $simd {
 60 |                     fn $call
 61 |                 }
 62 |             }
 63 |             impl<'lhs, 'rhs, T, const N: usize> $trait<&'rhs $simd> for &'lhs $simd
 64 |             where
 65 |                 T: SimdElement,
 66 |                 $simd: $trait<$simd, Output = $simd>,
 67 |                 LaneCount<N>: SupportedLaneCount,
 68 |             {
 69 |                 type Output = $simd;
 70 | 
 71 |                 #[inline]
 72 |                 fn $call(self, rhs: &'rhs $simd) -> Self::Output {
 73 |                     (*self).$call(*rhs)
 74 |                 }
 75 |             }
 76 |         )*
 77 |     }
 78 | }
 79 | 
 80 | deref_ops! {
 81 |     // Arithmetic
 82 |     impl<T, const N: usize> Add for Simd<T, N> {
 83 |         fn add
 84 |     }
 85 | 
 86 |     impl<T, const N: usize> Mul for Simd<T, N> {
 87 |         fn mul
 88 |     }
 89 | 
 90 |     impl<T, const N: usize> Sub for Simd<T, N> {
 91 |         fn sub
 92 |     }
 93 | 
 94 |     impl<T, const N: usize> Div for Simd<T, N> {
 95 |         fn div
 96 |     }
 97 | 
 98 |     impl<T, const N: usize> Rem for Simd<T, N> {
 99 |         fn rem
100 |     }
101 | 
102 |     // Bitops
103 |     impl<T, const N: usize> BitAnd for Simd<T, N> {
104 |         fn bitand
105 |     }
106 | 
107 |     impl<T, const N: usize> BitOr for Simd<T, N> {
108 |         fn bitor
109 |     }
110 | 
111 |     impl<T, const N: usize> BitXor for Simd<T, N> {
112 |         fn bitxor
113 |     }
114 | 
115 |     impl<T, const N: usize> Shl for Simd<T, N> {
116 |         fn shl
117 |     }
118 | 
119 |     impl<T, const N: usize> Shr for Simd<T, N> {
120 |         fn shr
121 |     }
122 | }
123 | 


--------------------------------------------------------------------------------
/crates/core_simd/src/ops/shift_scalar.rs:
--------------------------------------------------------------------------------
 1 | // Shift operations uniquely typically only have a scalar on the right-hand side.
 2 | // Here, we implement shifts for scalar RHS arguments.
 3 | 
 4 | use crate::simd::{LaneCount, Simd, SupportedLaneCount};
 5 | 
 6 | macro_rules! impl_splatted_shifts {
 7 |     { impl $trait:ident :: $trait_fn:ident for $ty:ty } => {
 8 |         impl<const N: usize> core::ops::$trait<$ty> for Simd<$ty, N>
 9 |         where
10 |             LaneCount<N>: SupportedLaneCount,
11 |         {
12 |             type Output = Self;
13 |             #[inline]
14 |             fn $trait_fn(self, rhs: $ty) -> Self::Output {
15 |                 self.$trait_fn(Simd::splat(rhs))
16 |             }
17 |         }
18 | 
19 |         impl<const N: usize> core::ops::$trait<&$ty> for Simd<$ty, N>
20 |         where
21 |             LaneCount<N>: SupportedLaneCount,
22 |         {
23 |             type Output = Self;
24 |             #[inline]
25 |             fn $trait_fn(self, rhs: &$ty) -> Self::Output {
26 |                 self.$trait_fn(Simd::splat(*rhs))
27 |             }
28 |         }
29 | 
30 |         impl<'lhs, const N: usize> core::ops::$trait<$ty> for &'lhs Simd<$ty, N>
31 |         where
32 |             LaneCount<N>: SupportedLaneCount,
33 |         {
34 |             type Output = Simd<$ty, N>;
35 |             #[inline]
36 |             fn $trait_fn(self, rhs: $ty) -> Self::Output {
37 |                 self.$trait_fn(Simd::splat(rhs))
38 |             }
39 |         }
40 | 
41 |         impl<'lhs, const N: usize> core::ops::$trait<&$ty> for &'lhs Simd<$ty, N>
42 |         where
43 |             LaneCount<N>: SupportedLaneCount,
44 |         {
45 |             type Output = Simd<$ty, N>;
46 |             #[inline]
47 |             fn $trait_fn(self, rhs: &$ty) -> Self::Output {
48 |                 self.$trait_fn(Simd::splat(*rhs))
49 |             }
50 |         }
51 |     };
52 |     { $($ty:ty),* } => {
53 |         $(
54 |         impl_splatted_shifts! { impl Shl::shl for $ty }
55 |         impl_splatted_shifts! { impl Shr::shr for $ty }
56 |         )*
57 |     }
58 | }
59 | 
60 | // In the past there were inference issues when generically splatting arguments.
61 | // Enumerate them instead.
62 | impl_splatted_shifts! { i8, i16, i32, i64, isize, u8, u16, u32, u64, usize }
63 | 


--------------------------------------------------------------------------------
/crates/core_simd/src/ops/unary.rs:
--------------------------------------------------------------------------------
 1 | use crate::simd::{LaneCount, Simd, SimdElement, SupportedLaneCount};
 2 | use core::ops::{Neg, Not}; // unary ops
 3 | 
 4 | macro_rules! neg {
 5 |     ($(impl<const N: usize> Neg for Simd<$scalar:ty, N>)*) => {
 6 |         $(impl<const N: usize> Neg for Simd<$scalar, N>
 7 |         where
 8 |             $scalar: SimdElement,
 9 |             LaneCount<N>: SupportedLaneCount,
10 |         {
11 |             type Output = Self;
12 | 
13 |             #[inline]
14 |             fn neg(self) -> Self::Output {
15 |                 // Safety: `self` is a signed vector
16 |                 unsafe { core::intrinsics::simd::simd_neg(self) }
17 |             }
18 |         })*
19 |     }
20 | }
21 | 
22 | neg! {
23 |     impl<const N: usize> Neg for Simd<f32, N>
24 | 
25 |     impl<const N: usize> Neg for Simd<f64, N>
26 | 
27 |     impl<const N: usize> Neg for Simd<i8, N>
28 | 
29 |     impl<const N: usize> Neg for Simd<i16, N>
30 | 
31 |     impl<const N: usize> Neg for Simd<i32, N>
32 | 
33 |     impl<const N: usize> Neg for Simd<i64, N>
34 | 
35 |     impl<const N: usize> Neg for Simd<isize, N>
36 | }
37 | 
38 | macro_rules! not {
39 |     ($(impl<const N: usize> Not for Simd<$scalar:ty, N>)*) => {
40 |         $(impl<const N: usize> Not for Simd<$scalar, N>
41 |         where
42 |             $scalar: SimdElement,
43 |             LaneCount<N>: SupportedLaneCount,
44 |         {
45 |             type Output = Self;
46 | 
47 |             #[inline]
48 |             fn not(self) -> Self::Output {
49 |                 self ^ (Simd::splat(!(0 as $scalar)))
50 |             }
51 |         })*
52 |     }
53 | }
54 | 
55 | not! {
56 |     impl<const N: usize> Not for Simd<i8, N>
57 | 
58 |     impl<const N: usize> Not for Simd<i16, N>
59 | 
60 |     impl<const N: usize> Not for Simd<i32, N>
61 | 
62 |     impl<const N: usize> Not for Simd<i64, N>
63 | 
64 |     impl<const N: usize> Not for Simd<isize, N>
65 | 
66 |     impl<const N: usize> Not for Simd<u8, N>
67 | 
68 |     impl<const N: usize> Not for Simd<u16, N>
69 | 
70 |     impl<const N: usize> Not for Simd<u32, N>
71 | 
72 |     impl<const N: usize> Not for Simd<u64, N>
73 | 
74 |     impl<const N: usize> Not for Simd<usize, N>
75 | }
76 | 


--------------------------------------------------------------------------------
/crates/core_simd/src/select.rs:
--------------------------------------------------------------------------------
 1 | use crate::simd::{LaneCount, Mask, MaskElement, Simd, SimdElement, SupportedLaneCount};
 2 | 
 3 | impl<T, const N: usize> Mask<T, N>
 4 | where
 5 |     T: MaskElement,
 6 |     LaneCount<N>: SupportedLaneCount,
 7 | {
 8 |     /// Choose elements from two vectors.
 9 |     ///
10 |     /// For each element in the mask, choose the corresponding element from `true_values` if
11 |     /// that element mask is true, and `false_values` if that element mask is false.
12 |     ///
13 |     /// # Examples
14 |     /// ```
15 |     /// # #![feature(portable_simd)]
16 |     /// # use core::simd::{Simd, Mask};
17 |     /// let a = Simd::from_array([0, 1, 2, 3]);
18 |     /// let b = Simd::from_array([4, 5, 6, 7]);
19 |     /// let mask = Mask::from_array([true, false, false, true]);
20 |     /// let c = mask.select(a, b);
21 |     /// assert_eq!(c.to_array(), [0, 5, 6, 3]);
22 |     /// ```
23 |     #[inline]
24 |     #[must_use = "method returns a new vector and does not mutate the original inputs"]
25 |     pub fn select<U>(self, true_values: Simd<U, N>, false_values: Simd<U, N>) -> Simd<U, N>
26 |     where
27 |         U: SimdElement<Mask = T>,
28 |     {
29 |         // Safety: The mask has been cast to a vector of integers,
30 |         // and the operands to select between are vectors of the same type and length.
31 |         unsafe { core::intrinsics::simd::simd_select(self.to_int(), true_values, false_values) }
32 |     }
33 | 
34 |     /// Choose elements from two masks.
35 |     ///
36 |     /// For each element in the mask, choose the corresponding element from `true_values` if
37 |     /// that element mask is true, and `false_values` if that element mask is false.
38 |     ///
39 |     /// # Examples
40 |     /// ```
41 |     /// # #![feature(portable_simd)]
42 |     /// # use core::simd::Mask;
43 |     /// let a = Mask::<i32, 4>::from_array([true, true, false, false]);
44 |     /// let b = Mask::<i32, 4>::from_array([false, false, true, true]);
45 |     /// let mask = Mask::<i32, 4>::from_array([true, false, false, true]);
46 |     /// let c = mask.select_mask(a, b);
47 |     /// assert_eq!(c.to_array(), [true, false, true, false]);
48 |     /// ```
49 |     #[inline]
50 |     #[must_use = "method returns a new mask and does not mutate the original inputs"]
51 |     pub fn select_mask(self, true_values: Self, false_values: Self) -> Self {
52 |         self & true_values | !self & false_values
53 |     }
54 | }
55 | 


--------------------------------------------------------------------------------
/crates/core_simd/src/simd/cmp.rs:
--------------------------------------------------------------------------------
1 | //! Traits for comparing and ordering vectors.
2 | 
3 | mod eq;
4 | mod ord;
5 | 
6 | pub use eq::*;
7 | pub use ord::*;
8 | 


--------------------------------------------------------------------------------
/crates/core_simd/src/simd/cmp/eq.rs:
--------------------------------------------------------------------------------
  1 | use crate::simd::{
  2 |     LaneCount, Mask, Simd, SimdElement, SupportedLaneCount,
  3 |     ptr::{SimdConstPtr, SimdMutPtr},
  4 | };
  5 | 
  6 | /// Parallel `PartialEq`.
  7 | pub trait SimdPartialEq {
  8 |     /// The mask type returned by each comparison.
  9 |     type Mask;
 10 | 
 11 |     /// Test if each element is equal to the corresponding element in `other`.
 12 |     #[must_use = "method returns a new mask and does not mutate the original value"]
 13 |     fn simd_eq(self, other: Self) -> Self::Mask;
 14 | 
 15 |     /// Test if each element is not equal to the corresponding element in `other`.
 16 |     #[must_use = "method returns a new mask and does not mutate the original value"]
 17 |     fn simd_ne(self, other: Self) -> Self::Mask;
 18 | }
 19 | 
 20 | macro_rules! impl_number {
 21 |     { $($number:ty),* } => {
 22 |         $(
 23 |         impl<const N: usize> SimdPartialEq for Simd<$number, N>
 24 |         where
 25 |             LaneCount<N>: SupportedLaneCount,
 26 |         {
 27 |             type Mask = Mask<<$number as SimdElement>::Mask, N>;
 28 | 
 29 |             #[inline]
 30 |             fn simd_eq(self, other: Self) -> Self::Mask {
 31 |                 // Safety: `self` is a vector, and the result of the comparison
 32 |                 // is always a valid mask.
 33 |                 unsafe { Mask::from_int_unchecked(core::intrinsics::simd::simd_eq(self, other)) }
 34 |             }
 35 | 
 36 |             #[inline]
 37 |             fn simd_ne(self, other: Self) -> Self::Mask {
 38 |                 // Safety: `self` is a vector, and the result of the comparison
 39 |                 // is always a valid mask.
 40 |                 unsafe { Mask::from_int_unchecked(core::intrinsics::simd::simd_ne(self, other)) }
 41 |             }
 42 |         }
 43 |         )*
 44 |     }
 45 | }
 46 | 
 47 | impl_number! { f32, f64, u8, u16, u32, u64, usize, i8, i16, i32, i64, isize }
 48 | 
 49 | macro_rules! impl_mask {
 50 |     { $($integer:ty),* } => {
 51 |         $(
 52 |         impl<const N: usize> SimdPartialEq for Mask<$integer, N>
 53 |         where
 54 |             LaneCount<N>: SupportedLaneCount,
 55 |         {
 56 |             type Mask = Self;
 57 | 
 58 |             #[inline]
 59 |             fn simd_eq(self, other: Self) -> Self::Mask {
 60 |                 // Safety: `self` is a vector, and the result of the comparison
 61 |                 // is always a valid mask.
 62 |                 unsafe { Self::from_int_unchecked(core::intrinsics::simd::simd_eq(self.to_int(), other.to_int())) }
 63 |             }
 64 | 
 65 |             #[inline]
 66 |             fn simd_ne(self, other: Self) -> Self::Mask {
 67 |                 // Safety: `self` is a vector, and the result of the comparison
 68 |                 // is always a valid mask.
 69 |                 unsafe { Self::from_int_unchecked(core::intrinsics::simd::simd_ne(self.to_int(), other.to_int())) }
 70 |             }
 71 |         }
 72 |         )*
 73 |     }
 74 | }
 75 | 
 76 | impl_mask! { i8, i16, i32, i64, isize }
 77 | 
 78 | impl<T, const N: usize> SimdPartialEq for Simd<*const T, N>
 79 | where
 80 |     LaneCount<N>: SupportedLaneCount,
 81 | {
 82 |     type Mask = Mask<isize, N>;
 83 | 
 84 |     #[inline]
 85 |     fn simd_eq(self, other: Self) -> Self::Mask {
 86 |         self.addr().simd_eq(other.addr())
 87 |     }
 88 | 
 89 |     #[inline]
 90 |     fn simd_ne(self, other: Self) -> Self::Mask {
 91 |         self.addr().simd_ne(other.addr())
 92 |     }
 93 | }
 94 | 
 95 | impl<T, const N: usize> SimdPartialEq for Simd<*mut T, N>
 96 | where
 97 |     LaneCount<N>: SupportedLaneCount,
 98 | {
 99 |     type Mask = Mask<isize, N>;
100 | 
101 |     #[inline]
102 |     fn simd_eq(self, other: Self) -> Self::Mask {
103 |         self.addr().simd_eq(other.addr())
104 |     }
105 | 
106 |     #[inline]
107 |     fn simd_ne(self, other: Self) -> Self::Mask {
108 |         self.addr().simd_ne(other.addr())
109 |     }
110 | }
111 | 


--------------------------------------------------------------------------------
/crates/core_simd/src/simd/num.rs:
--------------------------------------------------------------------------------
 1 | //! Traits for vectors with numeric elements.
 2 | 
 3 | mod float;
 4 | mod int;
 5 | mod uint;
 6 | 
 7 | mod sealed {
 8 |     pub trait Sealed {}
 9 | }
10 | 
11 | pub use float::*;
12 | pub use int::*;
13 | pub use uint::*;
14 | 


--------------------------------------------------------------------------------
/crates/core_simd/src/simd/prelude.rs:
--------------------------------------------------------------------------------
 1 | //! The portable SIMD prelude.
 2 | //!
 3 | //! Includes important traits and types to be imported with a glob:
 4 | //! ```ignore
 5 | //! use std::simd::prelude::*;
 6 | //! ```
 7 | 
 8 | #[doc(no_inline)]
 9 | pub use super::{
10 |     Mask, Simd,
11 |     cmp::{SimdOrd, SimdPartialEq, SimdPartialOrd},
12 |     num::{SimdFloat, SimdInt, SimdUint},
13 |     ptr::{SimdConstPtr, SimdMutPtr},
14 |     simd_swizzle,
15 | };
16 | 
17 | #[rustfmt::skip]
18 | #[doc(no_inline)]
19 | pub use super::{f32x1, f32x2, f32x4, f32x8, f32x16, f32x32, f32x64};
20 | 
21 | #[rustfmt::skip]
22 | #[doc(no_inline)]
23 | pub use super::{f64x1, f64x2, f64x4, f64x8, f64x16, f64x32, f64x64};
24 | 
25 | #[rustfmt::skip]
26 | #[doc(no_inline)]
27 | pub use super::{i8x1, i8x2, i8x4, i8x8, i8x16, i8x32, i8x64};
28 | 
29 | #[rustfmt::skip]
30 | #[doc(no_inline)]
31 | pub use super::{i16x1, i16x2, i16x4, i16x8, i16x16, i16x32, i16x64};
32 | 
33 | #[rustfmt::skip]
34 | #[doc(no_inline)]
35 | pub use super::{i32x1, i32x2, i32x4, i32x8, i32x16, i32x32, i32x64};
36 | 
37 | #[rustfmt::skip]
38 | #[doc(no_inline)]
39 | pub use super::{i64x1, i64x2, i64x4, i64x8, i64x16, i64x32, i64x64};
40 | 
41 | #[rustfmt::skip]
42 | #[doc(no_inline)]
43 | pub use super::{isizex1, isizex2, isizex4, isizex8, isizex16, isizex32, isizex64};
44 | 
45 | #[rustfmt::skip]
46 | #[doc(no_inline)]
47 | pub use super::{u8x1, u8x2, u8x4, u8x8, u8x16, u8x32, u8x64};
48 | 
49 | #[rustfmt::skip]
50 | #[doc(no_inline)]
51 | pub use super::{u16x1, u16x2, u16x4, u16x8, u16x16, u16x32, u16x64};
52 | 
53 | #[rustfmt::skip]
54 | #[doc(no_inline)]
55 | pub use super::{u32x1, u32x2, u32x4, u32x8, u32x16, u32x32, u32x64};
56 | 
57 | #[rustfmt::skip]
58 | #[doc(no_inline)]
59 | pub use super::{u64x1, u64x2, u64x4, u64x8, u64x16, u64x32, u64x64};
60 | 
61 | #[rustfmt::skip]
62 | #[doc(no_inline)]
63 | pub use super::{usizex1, usizex2, usizex4, usizex8, usizex16, usizex32, usizex64};
64 | 
65 | #[rustfmt::skip]
66 | #[doc(no_inline)]
67 | pub use super::{mask8x1, mask8x2, mask8x4, mask8x8, mask8x16, mask8x32, mask8x64};
68 | 
69 | #[rustfmt::skip]
70 | #[doc(no_inline)]
71 | pub use super::{mask16x1, mask16x2, mask16x4, mask16x8, mask16x16, mask16x32, mask16x64};
72 | 
73 | #[rustfmt::skip]
74 | #[doc(no_inline)]
75 | pub use super::{mask32x1, mask32x2, mask32x4, mask32x8, mask32x16, mask32x32, mask32x64};
76 | 
77 | #[rustfmt::skip]
78 | #[doc(no_inline)]
79 | pub use super::{mask64x1, mask64x2, mask64x4, mask64x8, mask64x16, mask64x32, mask64x64};
80 | 
81 | #[rustfmt::skip]
82 | #[doc(no_inline)]
83 | pub use super::{masksizex1, masksizex2, masksizex4, masksizex8, masksizex16, masksizex32, masksizex64};
84 | 


--------------------------------------------------------------------------------
/crates/core_simd/src/simd/ptr.rs:
--------------------------------------------------------------------------------
 1 | //! Traits for vectors of pointers.
 2 | 
 3 | mod const_ptr;
 4 | mod mut_ptr;
 5 | 
 6 | mod sealed {
 7 |     pub trait Sealed {}
 8 | }
 9 | 
10 | pub use const_ptr::*;
11 | pub use mut_ptr::*;
12 | 


--------------------------------------------------------------------------------
/crates/core_simd/src/simd/ptr/const_ptr.rs:
--------------------------------------------------------------------------------
  1 | use super::sealed::Sealed;
  2 | use crate::simd::{LaneCount, Mask, Simd, SupportedLaneCount, cmp::SimdPartialEq, num::SimdUint};
  3 | 
  4 | /// Operations on SIMD vectors of constant pointers.
  5 | pub trait SimdConstPtr: Copy + Sealed {
  6 |     /// Vector of `usize` with the same number of elements.
  7 |     type Usize;
  8 | 
  9 |     /// Vector of `isize` with the same number of elements.
 10 |     type Isize;
 11 | 
 12 |     /// Vector of const pointers with the same number of elements.
 13 |     type CastPtr<T>;
 14 | 
 15 |     /// Vector of mutable pointers to the same type.
 16 |     type MutPtr;
 17 | 
 18 |     /// Mask type used for manipulating this SIMD vector type.
 19 |     type Mask;
 20 | 
 21 |     /// Returns `true` for each element that is null.
 22 |     fn is_null(self) -> Self::Mask;
 23 | 
 24 |     /// Casts to a pointer of another type.
 25 |     ///
 26 |     /// Equivalent to calling [`pointer::cast`] on each element.
 27 |     fn cast<T>(self) -> Self::CastPtr<T>;
 28 | 
 29 |     /// Changes constness without changing the type.
 30 |     ///
 31 |     /// Equivalent to calling [`pointer::cast_mut`] on each element.
 32 |     fn cast_mut(self) -> Self::MutPtr;
 33 | 
 34 |     /// Gets the "address" portion of the pointer.
 35 |     ///
 36 |     /// This method discards pointer semantic metadata, so the result cannot be
 37 |     /// directly cast into a valid pointer.
 38 |     ///
 39 |     /// This method semantically discards *provenance* and
 40 |     /// *address-space* information. To properly restore that information, use [`Self::with_addr`].
 41 |     ///
 42 |     /// Equivalent to calling [`pointer::addr`] on each element.
 43 |     fn addr(self) -> Self::Usize;
 44 | 
 45 |     /// Converts an address to a pointer without giving it any provenance.
 46 |     ///
 47 |     /// Without provenance, this pointer is not associated with any actual allocation. Such a
 48 |     /// no-provenance pointer may be used for zero-sized memory accesses (if suitably aligned), but
 49 |     /// non-zero-sized memory accesses with a no-provenance pointer are UB. No-provenance pointers
 50 |     /// are little more than a usize address in disguise.
 51 |     ///
 52 |     /// This is different from [`Self::with_exposed_provenance`], which creates a pointer that picks up a
 53 |     /// previously exposed provenance.
 54 |     ///
 55 |     /// Equivalent to calling [`core::ptr::without_provenance`] on each element.
 56 |     fn without_provenance(addr: Self::Usize) -> Self;
 57 | 
 58 |     /// Creates a new pointer with the given address.
 59 |     ///
 60 |     /// This performs the same operation as a cast, but copies the *address-space* and
 61 |     /// *provenance* of `self` to the new pointer.
 62 |     ///
 63 |     /// Equivalent to calling [`pointer::with_addr`] on each element.
 64 |     fn with_addr(self, addr: Self::Usize) -> Self;
 65 | 
 66 |     /// Exposes the "provenance" part of the pointer for future use in
 67 |     /// [`Self::with_exposed_provenance`] and returns the "address" portion.
 68 |     fn expose_provenance(self) -> Self::Usize;
 69 | 
 70 |     /// Converts an address back to a pointer, picking up a previously "exposed" provenance.
 71 |     ///
 72 |     /// Equivalent to calling [`core::ptr::with_exposed_provenance`] on each element.
 73 |     fn with_exposed_provenance(addr: Self::Usize) -> Self;
 74 | 
 75 |     /// Calculates the offset from a pointer using wrapping arithmetic.
 76 |     ///
 77 |     /// Equivalent to calling [`pointer::wrapping_offset`] on each element.
 78 |     fn wrapping_offset(self, offset: Self::Isize) -> Self;
 79 | 
 80 |     /// Calculates the offset from a pointer using wrapping arithmetic.
 81 |     ///
 82 |     /// Equivalent to calling [`pointer::wrapping_add`] on each element.
 83 |     fn wrapping_add(self, count: Self::Usize) -> Self;
 84 | 
 85 |     /// Calculates the offset from a pointer using wrapping arithmetic.
 86 |     ///
 87 |     /// Equivalent to calling [`pointer::wrapping_sub`] on each element.
 88 |     fn wrapping_sub(self, count: Self::Usize) -> Self;
 89 | }
 90 | 
 91 | impl<T, const N: usize> Sealed for Simd<*const T, N> where LaneCount<N>: SupportedLaneCount {}
 92 | 
 93 | impl<T, const N: usize> SimdConstPtr for Simd<*const T, N>
 94 | where
 95 |     LaneCount<N>: SupportedLaneCount,
 96 | {
 97 |     type Usize = Simd<usize, N>;
 98 |     type Isize = Simd<isize, N>;
 99 |     type CastPtr<U> = Simd<*const U, N>;
100 |     type MutPtr = Simd<*mut T, N>;
101 |     type Mask = Mask<isize, N>;
102 | 
103 |     #[inline]
104 |     fn is_null(self) -> Self::Mask {
105 |         Simd::splat(core::ptr::null()).simd_eq(self)
106 |     }
107 | 
108 |     #[inline]
109 |     fn cast<U>(self) -> Self::CastPtr<U> {
110 |         // SimdElement currently requires zero-sized metadata, so this should never fail.
111 |         // If this ever changes, `simd_cast_ptr` should produce a post-mono error.
112 |         use core::ptr::Pointee;
113 |         assert_eq!(size_of::<<T as Pointee>::Metadata>(), 0);
114 |         assert_eq!(size_of::<<U as Pointee>::Metadata>(), 0);
115 | 
116 |         // Safety: pointers can be cast
117 |         unsafe { core::intrinsics::simd::simd_cast_ptr(self) }
118 |     }
119 | 
120 |     #[inline]
121 |     fn cast_mut(self) -> Self::MutPtr {
122 |         // Safety: pointers can be cast
123 |         unsafe { core::intrinsics::simd::simd_cast_ptr(self) }
124 |     }
125 | 
126 |     #[inline]
127 |     fn addr(self) -> Self::Usize {
128 |         // FIXME(strict_provenance_magic): I am magic and should be a compiler intrinsic.
129 |         // SAFETY: Pointer-to-integer transmutes are valid (if you are okay with losing the
130 |         // provenance).
131 |         unsafe { core::mem::transmute_copy(&self) }
132 |     }
133 | 
134 |     #[inline]
135 |     fn without_provenance(addr: Self::Usize) -> Self {
136 |         // FIXME(strict_provenance_magic): I am magic and should be a compiler intrinsic.
137 |         // SAFETY: Integer-to-pointer transmutes are valid (if you are okay with not getting any
138 |         // provenance).
139 |         unsafe { core::mem::transmute_copy(&addr) }
140 |     }
141 | 
142 |     #[inline]
143 |     fn with_addr(self, addr: Self::Usize) -> Self {
144 |         // FIXME(strict_provenance_magic): I am magic and should be a compiler intrinsic.
145 |         //
146 |         // In the mean-time, this operation is defined to be "as if" it was
147 |         // a wrapping_offset, so we can emulate it as such. This should properly
148 |         // restore pointer provenance even under today's compiler.
149 |         self.cast::<u8>()
150 |             .wrapping_offset(addr.cast::<isize>() - self.addr().cast::<isize>())
151 |             .cast()
152 |     }
153 | 
154 |     #[inline]
155 |     fn expose_provenance(self) -> Self::Usize {
156 |         // Safety: `self` is a pointer vector
157 |         unsafe { core::intrinsics::simd::simd_expose_provenance(self) }
158 |     }
159 | 
160 |     #[inline]
161 |     fn with_exposed_provenance(addr: Self::Usize) -> Self {
162 |         // Safety: `self` is a pointer vector
163 |         unsafe { core::intrinsics::simd::simd_with_exposed_provenance(addr) }
164 |     }
165 | 
166 |     #[inline]
167 |     fn wrapping_offset(self, count: Self::Isize) -> Self {
168 |         // Safety: simd_arith_offset takes a vector of pointers and a vector of offsets
169 |         unsafe { core::intrinsics::simd::simd_arith_offset(self, count) }
170 |     }
171 | 
172 |     #[inline]
173 |     fn wrapping_add(self, count: Self::Usize) -> Self {
174 |         self.wrapping_offset(count.cast())
175 |     }
176 | 
177 |     #[inline]
178 |     fn wrapping_sub(self, count: Self::Usize) -> Self {
179 |         self.wrapping_offset(-count.cast::<isize>())
180 |     }
181 | }
182 | 


--------------------------------------------------------------------------------
/crates/core_simd/src/simd/ptr/mut_ptr.rs:
--------------------------------------------------------------------------------
  1 | use super::sealed::Sealed;
  2 | use crate::simd::{LaneCount, Mask, Simd, SupportedLaneCount, cmp::SimdPartialEq, num::SimdUint};
  3 | 
  4 | /// Operations on SIMD vectors of mutable pointers.
  5 | pub trait SimdMutPtr: Copy + Sealed {
  6 |     /// Vector of `usize` with the same number of elements.
  7 |     type Usize;
  8 | 
  9 |     /// Vector of `isize` with the same number of elements.
 10 |     type Isize;
 11 | 
 12 |     /// Vector of const pointers with the same number of elements.
 13 |     type CastPtr<T>;
 14 | 
 15 |     /// Vector of constant pointers to the same type.
 16 |     type ConstPtr;
 17 | 
 18 |     /// Mask type used for manipulating this SIMD vector type.
 19 |     type Mask;
 20 | 
 21 |     /// Returns `true` for each element that is null.
 22 |     fn is_null(self) -> Self::Mask;
 23 | 
 24 |     /// Casts to a pointer of another type.
 25 |     ///
 26 |     /// Equivalent to calling [`pointer::cast`] on each element.
 27 |     fn cast<T>(self) -> Self::CastPtr<T>;
 28 | 
 29 |     /// Changes constness without changing the type.
 30 |     ///
 31 |     /// Equivalent to calling [`pointer::cast_const`] on each element.
 32 |     fn cast_const(self) -> Self::ConstPtr;
 33 | 
 34 |     /// Gets the "address" portion of the pointer.
 35 |     ///
 36 |     /// This method discards pointer semantic metadata, so the result cannot be
 37 |     /// directly cast into a valid pointer.
 38 |     ///
 39 |     /// Equivalent to calling [`pointer::addr`] on each element.
 40 |     fn addr(self) -> Self::Usize;
 41 | 
 42 |     /// Converts an address to a pointer without giving it any provenance.
 43 |     ///
 44 |     /// Without provenance, this pointer is not associated with any actual allocation. Such a
 45 |     /// no-provenance pointer may be used for zero-sized memory accesses (if suitably aligned), but
 46 |     /// non-zero-sized memory accesses with a no-provenance pointer are UB. No-provenance pointers
 47 |     /// are little more than a usize address in disguise.
 48 |     ///
 49 |     /// This is different from [`Self::with_exposed_provenance`], which creates a pointer that picks up a
 50 |     /// previously exposed provenance.
 51 |     ///
 52 |     /// Equivalent to calling [`core::ptr::without_provenance`] on each element.
 53 |     fn without_provenance(addr: Self::Usize) -> Self;
 54 | 
 55 |     /// Creates a new pointer with the given address.
 56 |     ///
 57 |     /// This performs the same operation as a cast, but copies the *address-space* and
 58 |     /// *provenance* of `self` to the new pointer.
 59 |     ///
 60 |     /// Equivalent to calling [`pointer::with_addr`] on each element.
 61 |     fn with_addr(self, addr: Self::Usize) -> Self;
 62 | 
 63 |     /// Exposes the "provenance" part of the pointer for future use in
 64 |     /// [`Self::with_exposed_provenance`] and returns the "address" portion.
 65 |     fn expose_provenance(self) -> Self::Usize;
 66 | 
 67 |     /// Converts an address back to a pointer, picking up a previously "exposed" provenance.
 68 |     ///
 69 |     /// Equivalent to calling [`core::ptr::with_exposed_provenance_mut`] on each element.
 70 |     fn with_exposed_provenance(addr: Self::Usize) -> Self;
 71 | 
 72 |     /// Calculates the offset from a pointer using wrapping arithmetic.
 73 |     ///
 74 |     /// Equivalent to calling [`pointer::wrapping_offset`] on each element.
 75 |     fn wrapping_offset(self, offset: Self::Isize) -> Self;
 76 | 
 77 |     /// Calculates the offset from a pointer using wrapping arithmetic.
 78 |     ///
 79 |     /// Equivalent to calling [`pointer::wrapping_add`] on each element.
 80 |     fn wrapping_add(self, count: Self::Usize) -> Self;
 81 | 
 82 |     /// Calculates the offset from a pointer using wrapping arithmetic.
 83 |     ///
 84 |     /// Equivalent to calling [`pointer::wrapping_sub`] on each element.
 85 |     fn wrapping_sub(self, count: Self::Usize) -> Self;
 86 | }
 87 | 
 88 | impl<T, const N: usize> Sealed for Simd<*mut T, N> where LaneCount<N>: SupportedLaneCount {}
 89 | 
 90 | impl<T, const N: usize> SimdMutPtr for Simd<*mut T, N>
 91 | where
 92 |     LaneCount<N>: SupportedLaneCount,
 93 | {
 94 |     type Usize = Simd<usize, N>;
 95 |     type Isize = Simd<isize, N>;
 96 |     type CastPtr<U> = Simd<*mut U, N>;
 97 |     type ConstPtr = Simd<*const T, N>;
 98 |     type Mask = Mask<isize, N>;
 99 | 
100 |     #[inline]
101 |     fn is_null(self) -> Self::Mask {
102 |         Simd::splat(core::ptr::null_mut()).simd_eq(self)
103 |     }
104 | 
105 |     #[inline]
106 |     fn cast<U>(self) -> Self::CastPtr<U> {
107 |         // SimdElement currently requires zero-sized metadata, so this should never fail.
108 |         // If this ever changes, `simd_cast_ptr` should produce a post-mono error.
109 |         use core::ptr::Pointee;
110 |         assert_eq!(size_of::<<T as Pointee>::Metadata>(), 0);
111 |         assert_eq!(size_of::<<U as Pointee>::Metadata>(), 0);
112 | 
113 |         // Safety: pointers can be cast
114 |         unsafe { core::intrinsics::simd::simd_cast_ptr(self) }
115 |     }
116 | 
117 |     #[inline]
118 |     fn cast_const(self) -> Self::ConstPtr {
119 |         // Safety: pointers can be cast
120 |         unsafe { core::intrinsics::simd::simd_cast_ptr(self) }
121 |     }
122 | 
123 |     #[inline]
124 |     fn addr(self) -> Self::Usize {
125 |         // FIXME(strict_provenance_magic): I am magic and should be a compiler intrinsic.
126 |         // SAFETY: Pointer-to-integer transmutes are valid (if you are okay with losing the
127 |         // provenance).
128 |         unsafe { core::mem::transmute_copy(&self) }
129 |     }
130 | 
131 |     #[inline]
132 |     fn without_provenance(addr: Self::Usize) -> Self {
133 |         // FIXME(strict_provenance_magic): I am magic and should be a compiler intrinsic.
134 |         // SAFETY: Integer-to-pointer transmutes are valid (if you are okay with not getting any
135 |         // provenance).
136 |         unsafe { core::mem::transmute_copy(&addr) }
137 |     }
138 | 
139 |     #[inline]
140 |     fn with_addr(self, addr: Self::Usize) -> Self {
141 |         // FIXME(strict_provenance_magic): I am magic and should be a compiler intrinsic.
142 |         //
143 |         // In the mean-time, this operation is defined to be "as if" it was
144 |         // a wrapping_offset, so we can emulate it as such. This should properly
145 |         // restore pointer provenance even under today's compiler.
146 |         self.cast::<u8>()
147 |             .wrapping_offset(addr.cast::<isize>() - self.addr().cast::<isize>())
148 |             .cast()
149 |     }
150 | 
151 |     #[inline]
152 |     fn expose_provenance(self) -> Self::Usize {
153 |         // Safety: `self` is a pointer vector
154 |         unsafe { core::intrinsics::simd::simd_expose_provenance(self) }
155 |     }
156 | 
157 |     #[inline]
158 |     fn with_exposed_provenance(addr: Self::Usize) -> Self {
159 |         // Safety: `self` is a pointer vector
160 |         unsafe { core::intrinsics::simd::simd_with_exposed_provenance(addr) }
161 |     }
162 | 
163 |     #[inline]
164 |     fn wrapping_offset(self, count: Self::Isize) -> Self {
165 |         // Safety: simd_arith_offset takes a vector of pointers and a vector of offsets
166 |         unsafe { core::intrinsics::simd::simd_arith_offset(self, count) }
167 |     }
168 | 
169 |     #[inline]
170 |     fn wrapping_add(self, count: Self::Usize) -> Self {
171 |         self.wrapping_offset(count.cast())
172 |     }
173 | 
174 |     #[inline]
175 |     fn wrapping_sub(self, count: Self::Usize) -> Self {
176 |         self.wrapping_offset(-count.cast::<isize>())
177 |     }
178 | }
179 | 


--------------------------------------------------------------------------------
/crates/core_simd/src/swizzle_dyn.rs:
--------------------------------------------------------------------------------
  1 | use crate::simd::{LaneCount, Simd, SupportedLaneCount};
  2 | use core::mem;
  3 | 
  4 | impl<const N: usize> Simd<u8, N>
  5 | where
  6 |     LaneCount<N>: SupportedLaneCount,
  7 | {
  8 |     /// Swizzle a vector of bytes according to the index vector.
  9 |     /// Indices within range select the appropriate byte.
 10 |     /// Indices "out of bounds" instead select 0.
 11 |     ///
 12 |     /// Note that the current implementation is selected during build-time
 13 |     /// of the standard library, so `cargo build -Zbuild-std` may be necessary
 14 |     /// to unlock better performance, especially for larger vectors.
 15 |     /// A planned compiler improvement will enable using `#[target_feature]` instead.
 16 |     #[inline]
 17 |     pub fn swizzle_dyn(self, idxs: Simd<u8, N>) -> Self {
 18 |         #![allow(unused_imports, unused_unsafe)]
 19 |         #[cfg(all(
 20 |             any(target_arch = "aarch64", target_arch = "arm64ec"),
 21 |             target_endian = "little"
 22 |         ))]
 23 |         use core::arch::aarch64::{uint8x8_t, vqtbl1q_u8, vtbl1_u8};
 24 |         #[cfg(all(
 25 |             target_arch = "arm",
 26 |             target_feature = "v7",
 27 |             target_feature = "neon",
 28 |             target_endian = "little"
 29 |         ))]
 30 |         use core::arch::arm::{uint8x8_t, vtbl1_u8};
 31 |         #[cfg(target_arch = "wasm32")]
 32 |         use core::arch::wasm32 as wasm;
 33 |         #[cfg(target_arch = "wasm64")]
 34 |         use core::arch::wasm64 as wasm;
 35 |         #[cfg(target_arch = "x86")]
 36 |         use core::arch::x86;
 37 |         #[cfg(target_arch = "x86_64")]
 38 |         use core::arch::x86_64 as x86;
 39 |         // SAFETY: Intrinsics covered by cfg
 40 |         unsafe {
 41 |             match N {
 42 |                 #[cfg(all(
 43 |                     any(
 44 |                         target_arch = "aarch64",
 45 |                         target_arch = "arm64ec",
 46 |                         all(target_arch = "arm", target_feature = "v7")
 47 |                     ),
 48 |                     target_feature = "neon",
 49 |                     target_endian = "little"
 50 |                 ))]
 51 |                 8 => transize(vtbl1_u8, self, idxs),
 52 |                 #[cfg(target_feature = "ssse3")]
 53 |                 16 => transize(x86::_mm_shuffle_epi8, self, zeroing_idxs(idxs)),
 54 |                 #[cfg(target_feature = "simd128")]
 55 |                 16 => transize(wasm::i8x16_swizzle, self, idxs),
 56 |                 #[cfg(all(
 57 |                     any(target_arch = "aarch64", target_arch = "arm64ec"),
 58 |                     target_feature = "neon",
 59 |                     target_endian = "little"
 60 |                 ))]
 61 |                 16 => transize(vqtbl1q_u8, self, idxs),
 62 |                 #[cfg(all(
 63 |                     target_arch = "arm",
 64 |                     target_feature = "v7",
 65 |                     target_feature = "neon",
 66 |                     target_endian = "little"
 67 |                 ))]
 68 |                 16 => transize(armv7_neon_swizzle_u8x16, self, idxs),
 69 |                 #[cfg(all(target_feature = "avx2", not(target_feature = "avx512vbmi")))]
 70 |                 32 => transize(avx2_pshufb, self, idxs),
 71 |                 #[cfg(all(target_feature = "avx512vl", target_feature = "avx512vbmi"))]
 72 |                 32 => {
 73 |                     // Unlike vpshufb, vpermb doesn't zero out values in the result based on the index high bit
 74 |                     let swizzler = |bytes, idxs| {
 75 |                         let mask = x86::_mm256_cmp_epu8_mask::<{ x86::_MM_CMPINT_LT }>(
 76 |                             idxs,
 77 |                             Simd::<u8, 32>::splat(N as u8).into(),
 78 |                         );
 79 |                         x86::_mm256_maskz_permutexvar_epi8(mask, idxs, bytes)
 80 |                     };
 81 |                     transize(swizzler, self, idxs)
 82 |                 }
 83 |                 // Notable absence: avx512bw pshufb shuffle
 84 |                 #[cfg(all(target_feature = "avx512vl", target_feature = "avx512vbmi"))]
 85 |                 64 => {
 86 |                     // Unlike vpshufb, vpermb doesn't zero out values in the result based on the index high bit
 87 |                     let swizzler = |bytes, idxs| {
 88 |                         let mask = x86::_mm512_cmp_epu8_mask::<{ x86::_MM_CMPINT_LT }>(
 89 |                             idxs,
 90 |                             Simd::<u8, 64>::splat(N as u8).into(),
 91 |                         );
 92 |                         x86::_mm512_maskz_permutexvar_epi8(mask, idxs, bytes)
 93 |                     };
 94 |                     transize(swizzler, self, idxs)
 95 |                 }
 96 |                 _ => {
 97 |                     let mut array = [0; N];
 98 |                     for (i, k) in idxs.to_array().into_iter().enumerate() {
 99 |                         if (k as usize) < N {
100 |                             array[i] = self[k as usize];
101 |                         };
102 |                     }
103 |                     array.into()
104 |                 }
105 |             }
106 |         }
107 |     }
108 | }
109 | 
110 | /// armv7 neon supports swizzling `u8x16` by swizzling two u8x8 blocks
111 | /// with a u8x8x2 lookup table.
112 | ///
113 | /// # Safety
114 | /// This requires armv7 neon to work
115 | #[cfg(all(
116 |     target_arch = "arm",
117 |     target_feature = "v7",
118 |     target_feature = "neon",
119 |     target_endian = "little"
120 | ))]
121 | unsafe fn armv7_neon_swizzle_u8x16(bytes: Simd<u8, 16>, idxs: Simd<u8, 16>) -> Simd<u8, 16> {
122 |     use core::arch::arm::{uint8x8x2_t, vcombine_u8, vget_high_u8, vget_low_u8, vtbl2_u8};
123 |     // SAFETY: Caller promised arm neon support
124 |     unsafe {
125 |         let bytes = uint8x8x2_t(vget_low_u8(bytes.into()), vget_high_u8(bytes.into()));
126 |         let lo = vtbl2_u8(bytes, vget_low_u8(idxs.into()));
127 |         let hi = vtbl2_u8(bytes, vget_high_u8(idxs.into()));
128 |         vcombine_u8(lo, hi).into()
129 |     }
130 | }
131 | 
132 | /// "vpshufb like it was meant to be" on AVX2
133 | ///
134 | /// # Safety
135 | /// This requires AVX2 to work
136 | #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
137 | #[target_feature(enable = "avx2")]
138 | #[allow(unused)]
139 | #[inline]
140 | #[allow(clippy::let_and_return)]
141 | unsafe fn avx2_pshufb(bytes: Simd<u8, 32>, idxs: Simd<u8, 32>) -> Simd<u8, 32> {
142 |     use crate::simd::cmp::SimdPartialOrd;
143 |     #[cfg(target_arch = "x86")]
144 |     use core::arch::x86;
145 |     #[cfg(target_arch = "x86_64")]
146 |     use core::arch::x86_64 as x86;
147 |     use x86::_mm256_permute2x128_si256 as avx2_cross_shuffle;
148 |     use x86::_mm256_shuffle_epi8 as avx2_half_pshufb;
149 |     let mid = Simd::splat(16u8);
150 |     let high = mid + mid;
151 |     // SAFETY: Caller promised AVX2
152 |     unsafe {
153 |         // This is ordering sensitive, and LLVM will order these how you put them.
154 |         // Most AVX2 impls use ~5 "ports", and only 1 or 2 are capable of permutes.
155 |         // But the "compose" step will lower to ops that can also use at least 1 other port.
156 |         // So this tries to break up permutes so composition flows through "open" ports.
157 |         // Comparative benches should be done on multiple AVX2 CPUs before reordering this
158 | 
159 |         let hihi = avx2_cross_shuffle::<0x11>(bytes.into(), bytes.into());
160 |         let hi_shuf = Simd::from(avx2_half_pshufb(
161 |             hihi,        // duplicate the vector's top half
162 |             idxs.into(), // so that using only 4 bits of an index still picks bytes 16-31
163 |         ));
164 |         // A zero-fill during the compose step gives the "all-Neon-like" OOB-is-0 semantics
165 |         let compose = idxs.simd_lt(high).select(hi_shuf, Simd::splat(0));
166 |         let lolo = avx2_cross_shuffle::<0x00>(bytes.into(), bytes.into());
167 |         let lo_shuf = Simd::from(avx2_half_pshufb(lolo, idxs.into()));
168 |         // Repeat, then pick indices < 16, overwriting indices 0-15 from previous compose step
169 |         let compose = idxs.simd_lt(mid).select(lo_shuf, compose);
170 |         compose
171 |     }
172 | }
173 | 
174 | /// This sets up a call to an architecture-specific function, and in doing so
175 | /// it persuades rustc that everything is the correct size. Which it is.
176 | /// This would not be needed if one could convince Rust that, by matching on N,
177 | /// N is that value, and thus it would be valid to substitute e.g. 16.
178 | ///
179 | /// # Safety
180 | /// The correctness of this function hinges on the sizes agreeing in actuality.
181 | #[allow(dead_code)]
182 | #[inline(always)]
183 | unsafe fn transize<T, const N: usize>(
184 |     f: unsafe fn(T, T) -> T,
185 |     a: Simd<u8, N>,
186 |     b: Simd<u8, N>,
187 | ) -> Simd<u8, N>
188 | where
189 |     LaneCount<N>: SupportedLaneCount,
190 | {
191 |     // SAFETY: Same obligation to use this function as to use mem::transmute_copy.
192 |     unsafe { mem::transmute_copy(&f(mem::transmute_copy(&a), mem::transmute_copy(&b))) }
193 | }
194 | 
195 | /// Make indices that yield 0 for x86
196 | #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
197 | #[allow(unused)]
198 | #[inline(always)]
199 | fn zeroing_idxs<const N: usize>(idxs: Simd<u8, N>) -> Simd<u8, N>
200 | where
201 |     LaneCount<N>: SupportedLaneCount,
202 | {
203 |     use crate::simd::cmp::SimdPartialOrd;
204 |     idxs.simd_lt(Simd::splat(N as u8))
205 |         .select(idxs, Simd::splat(u8::MAX))
206 | }
207 | 


--------------------------------------------------------------------------------
/crates/core_simd/src/to_bytes.rs:
--------------------------------------------------------------------------------
  1 | use crate::simd::{
  2 |     LaneCount, Simd, SimdElement, SupportedLaneCount,
  3 |     num::{SimdFloat, SimdInt, SimdUint},
  4 | };
  5 | 
  6 | mod sealed {
  7 |     use super::*;
  8 |     pub trait Sealed {}
  9 |     impl<T: SimdElement, const N: usize> Sealed for Simd<T, N> where LaneCount<N>: SupportedLaneCount {}
 10 | }
 11 | use sealed::Sealed;
 12 | 
 13 | /// Converts SIMD vectors to vectors of bytes
 14 | pub trait ToBytes: Sealed {
 15 |     /// This type, reinterpreted as bytes.
 16 |     type Bytes: Copy
 17 |         + Unpin
 18 |         + Send
 19 |         + Sync
 20 |         + AsRef<[u8]>
 21 |         + AsMut<[u8]>
 22 |         + SimdUint<Scalar = u8>
 23 |         + 'static;
 24 | 
 25 |     /// Returns the memory representation of this integer as a byte array in native byte
 26 |     /// order.
 27 |     fn to_ne_bytes(self) -> Self::Bytes;
 28 | 
 29 |     /// Returns the memory representation of this integer as a byte array in big-endian
 30 |     /// (network) byte order.
 31 |     fn to_be_bytes(self) -> Self::Bytes;
 32 | 
 33 |     /// Returns the memory representation of this integer as a byte array in little-endian
 34 |     /// byte order.
 35 |     fn to_le_bytes(self) -> Self::Bytes;
 36 | 
 37 |     /// Creates a native endian integer value from its memory representation as a byte array
 38 |     /// in native endianness.
 39 |     fn from_ne_bytes(bytes: Self::Bytes) -> Self;
 40 | 
 41 |     /// Creates an integer value from its representation as a byte array in big endian.
 42 |     fn from_be_bytes(bytes: Self::Bytes) -> Self;
 43 | 
 44 |     /// Creates an integer value from its representation as a byte array in little endian.
 45 |     fn from_le_bytes(bytes: Self::Bytes) -> Self;
 46 | }
 47 | 
 48 | macro_rules! swap_bytes {
 49 |     { f32, $x:expr } => { Simd::from_bits($x.to_bits().swap_bytes()) };
 50 |     { f64, $x:expr } => { Simd::from_bits($x.to_bits().swap_bytes()) };
 51 |     { $ty:ty, $x:expr } => { $x.swap_bytes() }
 52 | }
 53 | 
 54 | macro_rules! impl_to_bytes {
 55 |     { $ty:tt, 1  } => { impl_to_bytes! { $ty, 1  * [1, 2, 4, 8, 16, 32, 64] } };
 56 |     { $ty:tt, 2  } => { impl_to_bytes! { $ty, 2  * [1, 2, 4, 8, 16, 32] } };
 57 |     { $ty:tt, 4  } => { impl_to_bytes! { $ty, 4  * [1, 2, 4, 8, 16] } };
 58 |     { $ty:tt, 8  } => { impl_to_bytes! { $ty, 8  * [1, 2, 4, 8] } };
 59 |     { $ty:tt, 16 } => { impl_to_bytes! { $ty, 16 * [1, 2, 4] } };
 60 |     { $ty:tt, 32 } => { impl_to_bytes! { $ty, 32 * [1, 2] } };
 61 |     { $ty:tt, 64 } => { impl_to_bytes! { $ty, 64 * [1] } };
 62 | 
 63 |     { $ty:tt, $size:literal * [$($elems:literal),*] } => {
 64 |         $(
 65 |         impl ToBytes for Simd<$ty, $elems> {
 66 |             type Bytes = Simd<u8, { $size * $elems }>;
 67 | 
 68 |             #[inline]
 69 |             fn to_ne_bytes(self) -> Self::Bytes {
 70 |                 // Safety: transmuting between vectors is safe
 71 |                 unsafe {
 72 |                     #![allow(clippy::useless_transmute)]
 73 |                     core::mem::transmute(self)
 74 |                 }
 75 |             }
 76 | 
 77 |             #[inline]
 78 |             fn to_be_bytes(mut self) -> Self::Bytes {
 79 |                 if !cfg!(target_endian = "big") {
 80 |                     self = swap_bytes!($ty, self);
 81 |                 }
 82 |                 self.to_ne_bytes()
 83 |             }
 84 | 
 85 |             #[inline]
 86 |             fn to_le_bytes(mut self) -> Self::Bytes {
 87 |                 if !cfg!(target_endian = "little") {
 88 |                     self = swap_bytes!($ty, self);
 89 |                 }
 90 |                 self.to_ne_bytes()
 91 |             }
 92 | 
 93 |             #[inline]
 94 |             fn from_ne_bytes(bytes: Self::Bytes) -> Self {
 95 |                 // Safety: transmuting between vectors is safe
 96 |                 unsafe {
 97 |                     #![allow(clippy::useless_transmute)]
 98 |                     core::mem::transmute(bytes)
 99 |                 }
100 |             }
101 | 
102 |             #[inline]
103 |             fn from_be_bytes(bytes: Self::Bytes) -> Self {
104 |                 let ret = Self::from_ne_bytes(bytes);
105 |                 if cfg!(target_endian = "big") {
106 |                     ret
107 |                 } else {
108 |                     swap_bytes!($ty, ret)
109 |                 }
110 |             }
111 | 
112 |             #[inline]
113 |             fn from_le_bytes(bytes: Self::Bytes) -> Self {
114 |                 let ret = Self::from_ne_bytes(bytes);
115 |                 if cfg!(target_endian = "little") {
116 |                     ret
117 |                 } else {
118 |                     swap_bytes!($ty, ret)
119 |                 }
120 |             }
121 |         }
122 |         )*
123 |     }
124 | }
125 | 
126 | impl_to_bytes! { u8, 1 }
127 | impl_to_bytes! { u16, 2 }
128 | impl_to_bytes! { u32, 4 }
129 | impl_to_bytes! { u64, 8 }
130 | #[cfg(target_pointer_width = "32")]
131 | impl_to_bytes! { usize, 4 }
132 | #[cfg(target_pointer_width = "64")]
133 | impl_to_bytes! { usize, 8 }
134 | 
135 | impl_to_bytes! { i8, 1 }
136 | impl_to_bytes! { i16, 2 }
137 | impl_to_bytes! { i32, 4 }
138 | impl_to_bytes! { i64, 8 }
139 | #[cfg(target_pointer_width = "32")]
140 | impl_to_bytes! { isize, 4 }
141 | #[cfg(target_pointer_width = "64")]
142 | impl_to_bytes! { isize, 8 }
143 | 
144 | impl_to_bytes! { f32, 4 }
145 | impl_to_bytes! { f64, 8 }
146 | 


--------------------------------------------------------------------------------
/crates/core_simd/src/vendor.rs:
--------------------------------------------------------------------------------
 1 | /// Provides implementations of `From<$a> for $b` and `From<$b> for $a` that transmutes the value.
 2 | #[allow(unused)]
 3 | macro_rules! from_transmute {
 4 |     { unsafe $a:ty => $b:ty } => {
 5 |         from_transmute!{ @impl $a => $b }
 6 |         from_transmute!{ @impl $b => $a }
 7 |     };
 8 |     { @impl $from:ty => $to:ty } => {
 9 |         impl core::convert::From<$from> for $to {
10 |             #[inline]
11 |             fn from(value: $from) -> $to {
12 |                 // Safety: transmuting between vectors is safe, but the caller of this macro
13 |                 // checks the invariants
14 |                 unsafe { core::mem::transmute(value) }
15 |             }
16 |         }
17 |     };
18 | }
19 | 
20 | /// Conversions to x86's SIMD types.
21 | #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
22 | mod x86;
23 | 
24 | #[cfg(target_arch = "wasm32")]
25 | mod wasm32;
26 | 
27 | #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec", target_arch = "arm",))]
28 | mod arm;
29 | 
30 | #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))]
31 | mod powerpc;
32 | 
33 | #[cfg(target_arch = "loongarch64")]
34 | mod loongarch64;
35 | 


--------------------------------------------------------------------------------
/crates/core_simd/src/vendor/arm.rs:
--------------------------------------------------------------------------------
 1 | #![allow(unused)]
 2 | use crate::simd::*;
 3 | 
 4 | #[cfg(target_arch = "arm")]
 5 | use core::arch::arm::*;
 6 | 
 7 | #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
 8 | use core::arch::aarch64::*;
 9 | 
10 | #[cfg(all(
11 |     any(
12 |         target_arch = "aarch64",
13 |         target_arch = "arm64ec",
14 |         all(target_arch = "arm", target_feature = "v7"),
15 |     ),
16 |     target_endian = "little"
17 | ))]
18 | mod neon {
19 |     use super::*;
20 | 
21 |     from_transmute! { unsafe f32x2 => float32x2_t }
22 |     from_transmute! { unsafe f32x4 => float32x4_t }
23 | 
24 |     from_transmute! { unsafe u8x8 => uint8x8_t }
25 |     from_transmute! { unsafe u8x16 => uint8x16_t }
26 |     from_transmute! { unsafe i8x8 => int8x8_t }
27 |     from_transmute! { unsafe i8x16 => int8x16_t }
28 |     from_transmute! { unsafe u8x8 => poly8x8_t }
29 |     from_transmute! { unsafe u8x16 => poly8x16_t }
30 | 
31 |     from_transmute! { unsafe u16x4 => uint16x4_t }
32 |     from_transmute! { unsafe u16x8 => uint16x8_t }
33 |     from_transmute! { unsafe i16x4 => int16x4_t }
34 |     from_transmute! { unsafe i16x8 => int16x8_t }
35 |     from_transmute! { unsafe u16x4 => poly16x4_t }
36 |     from_transmute! { unsafe u16x8 => poly16x8_t }
37 | 
38 |     from_transmute! { unsafe u32x2 => uint32x2_t }
39 |     from_transmute! { unsafe u32x4 => uint32x4_t }
40 |     from_transmute! { unsafe i32x2 => int32x2_t }
41 |     from_transmute! { unsafe i32x4 => int32x4_t }
42 | 
43 |     from_transmute! { unsafe Simd<u64, 1> => uint64x1_t }
44 |     from_transmute! { unsafe u64x2 => uint64x2_t }
45 |     from_transmute! { unsafe Simd<i64, 1> => int64x1_t }
46 |     from_transmute! { unsafe i64x2 => int64x2_t }
47 |     from_transmute! { unsafe Simd<u64, 1> => poly64x1_t }
48 |     from_transmute! { unsafe u64x2 => poly64x2_t }
49 | }
50 | 
51 | #[cfg(any(
52 |     all(target_feature = "v6", not(target_feature = "mclass")),
53 |     all(target_feature = "mclass", target_feature = "dsp"),
54 | ))]
55 | mod simd32 {
56 |     use super::*;
57 | 
58 |     from_transmute! { unsafe Simd<u8, 4> => uint8x4_t }
59 |     from_transmute! { unsafe Simd<i8, 4> => int8x4_t }
60 |     from_transmute! { unsafe Simd<u16, 2> => uint16x2_t }
61 |     from_transmute! { unsafe Simd<i16, 2> => int16x2_t }
62 | }
63 | 
64 | #[cfg(all(
65 |     any(target_arch = "aarch64", target_arch = "arm64ec"),
66 |     target_endian = "little"
67 | ))]
68 | mod aarch64 {
69 |     use super::neon::*;
70 |     use super::*;
71 | 
72 |     from_transmute! { unsafe Simd<f64, 1> => float64x1_t }
73 |     from_transmute! { unsafe f64x2 => float64x2_t }
74 | }
75 | 


--------------------------------------------------------------------------------
/crates/core_simd/src/vendor/loongarch64.rs:
--------------------------------------------------------------------------------
 1 | use crate::simd::*;
 2 | use core::arch::loongarch64::*;
 3 | 
 4 | from_transmute! { unsafe u8x16 => v16u8 }
 5 | from_transmute! { unsafe u8x32 => v32u8 }
 6 | from_transmute! { unsafe i8x16 => v16i8 }
 7 | from_transmute! { unsafe i8x32 => v32i8 }
 8 | 
 9 | from_transmute! { unsafe u16x8 => v8u16 }
10 | from_transmute! { unsafe u16x16 => v16u16 }
11 | from_transmute! { unsafe i16x8 => v8i16 }
12 | from_transmute! { unsafe i16x16 => v16i16 }
13 | 
14 | from_transmute! { unsafe u32x4 => v4u32 }
15 | from_transmute! { unsafe u32x8 => v8u32 }
16 | from_transmute! { unsafe i32x4 => v4i32 }
17 | from_transmute! { unsafe i32x8 => v8i32 }
18 | from_transmute! { unsafe f32x4 => v4f32 }
19 | from_transmute! { unsafe f32x8 => v8f32 }
20 | 
21 | from_transmute! { unsafe u64x2 => v2u64 }
22 | from_transmute! { unsafe u64x4 => v4u64 }
23 | from_transmute! { unsafe i64x2 => v2i64 }
24 | from_transmute! { unsafe i64x4 => v4i64 }
25 | from_transmute! { unsafe f64x2 => v2f64 }
26 | from_transmute! { unsafe f64x4 => v4f64 }
27 | 
28 | from_transmute! { unsafe usizex2 => v2u64 }
29 | from_transmute! { unsafe usizex4 => v4u64 }
30 | from_transmute! { unsafe isizex2 => v2i64 }
31 | from_transmute! { unsafe isizex4 => v4i64 }
32 | 


--------------------------------------------------------------------------------
/crates/core_simd/src/vendor/powerpc.rs:
--------------------------------------------------------------------------------
 1 | use crate::simd::*;
 2 | 
 3 | #[cfg(target_arch = "powerpc")]
 4 | use core::arch::powerpc::*;
 5 | 
 6 | #[cfg(target_arch = "powerpc64")]
 7 | use core::arch::powerpc64::*;
 8 | 
 9 | from_transmute! { unsafe f64x2 => vector_double }
10 | from_transmute! { unsafe i64x2 => vector_signed_long }
11 | from_transmute! { unsafe u64x2 => vector_unsigned_long }
12 | 


--------------------------------------------------------------------------------
/crates/core_simd/src/vendor/wasm32.rs:
--------------------------------------------------------------------------------
 1 | use crate::simd::*;
 2 | use core::arch::wasm32::v128;
 3 | 
 4 | from_transmute! { unsafe u8x16 => v128 }
 5 | from_transmute! { unsafe i8x16 => v128 }
 6 | 
 7 | from_transmute! { unsafe u16x8 => v128 }
 8 | from_transmute! { unsafe i16x8 => v128 }
 9 | 
10 | from_transmute! { unsafe u32x4 => v128 }
11 | from_transmute! { unsafe i32x4 => v128 }
12 | from_transmute! { unsafe f32x4 => v128 }
13 | 
14 | from_transmute! { unsafe u64x2 => v128 }
15 | from_transmute! { unsafe i64x2 => v128 }
16 | from_transmute! { unsafe f64x2 => v128 }
17 | 
18 | #[cfg(target_pointer_width = "32")]
19 | mod p32 {
20 |     use super::*;
21 |     from_transmute! { unsafe usizex4 => v128 }
22 |     from_transmute! { unsafe isizex4 => v128 }
23 | }
24 | 
25 | #[cfg(target_pointer_width = "64")]
26 | mod p64 {
27 |     use super::*;
28 |     from_transmute! { unsafe usizex2 => v128 }
29 |     from_transmute! { unsafe isizex2 => v128 }
30 | }
31 | 


--------------------------------------------------------------------------------
/crates/core_simd/src/vendor/x86.rs:
--------------------------------------------------------------------------------
 1 | use crate::simd::*;
 2 | 
 3 | #[cfg(target_arch = "x86")]
 4 | use core::arch::x86::*;
 5 | 
 6 | #[cfg(target_arch = "x86_64")]
 7 | use core::arch::x86_64::*;
 8 | 
 9 | from_transmute! { unsafe u8x16 => __m128i }
10 | from_transmute! { unsafe u8x32 => __m256i }
11 | from_transmute! { unsafe u8x64 => __m512i }
12 | from_transmute! { unsafe i8x16 => __m128i }
13 | from_transmute! { unsafe i8x32 => __m256i }
14 | from_transmute! { unsafe i8x64 => __m512i }
15 | 
16 | from_transmute! { unsafe u16x8 => __m128i }
17 | from_transmute! { unsafe u16x16 => __m256i }
18 | from_transmute! { unsafe u16x32 => __m512i }
19 | from_transmute! { unsafe i16x8 => __m128i }
20 | from_transmute! { unsafe i16x16 => __m256i }
21 | from_transmute! { unsafe i16x32 => __m512i }
22 | 
23 | from_transmute! { unsafe u32x4 => __m128i }
24 | from_transmute! { unsafe u32x8 => __m256i }
25 | from_transmute! { unsafe u32x16 => __m512i }
26 | from_transmute! { unsafe i32x4 => __m128i }
27 | from_transmute! { unsafe i32x8 => __m256i }
28 | from_transmute! { unsafe i32x16 => __m512i }
29 | from_transmute! { unsafe f32x4 => __m128 }
30 | from_transmute! { unsafe f32x8 => __m256 }
31 | from_transmute! { unsafe f32x16 => __m512 }
32 | 
33 | from_transmute! { unsafe u64x2 => __m128i }
34 | from_transmute! { unsafe u64x4 => __m256i }
35 | from_transmute! { unsafe u64x8 => __m512i }
36 | from_transmute! { unsafe i64x2 => __m128i }
37 | from_transmute! { unsafe i64x4 => __m256i }
38 | from_transmute! { unsafe i64x8 => __m512i }
39 | from_transmute! { unsafe f64x2 => __m128d }
40 | from_transmute! { unsafe f64x4 => __m256d }
41 | from_transmute! { unsafe f64x8 => __m512d }
42 | 
43 | #[cfg(target_pointer_width = "32")]
44 | mod p32 {
45 |     use super::*;
46 |     from_transmute! { unsafe usizex4 => __m128i }
47 |     from_transmute! { unsafe usizex8 => __m256i }
48 |     from_transmute! { unsafe Simd<usize, 16> => __m512i }
49 |     from_transmute! { unsafe isizex4 => __m128i }
50 |     from_transmute! { unsafe isizex8 => __m256i }
51 |     from_transmute! { unsafe Simd<isize, 16> => __m512i }
52 | }
53 | 
54 | #[cfg(target_pointer_width = "64")]
55 | mod p64 {
56 |     use super::*;
57 |     from_transmute! { unsafe usizex2 => __m128i }
58 |     from_transmute! { unsafe usizex4 => __m256i }
59 |     from_transmute! { unsafe usizex8 => __m512i }
60 |     from_transmute! { unsafe isizex2 => __m128i }
61 |     from_transmute! { unsafe isizex4 => __m256i }
62 |     from_transmute! { unsafe isizex8 => __m512i }
63 | }
64 | 


--------------------------------------------------------------------------------
/crates/core_simd/tests/autoderef.rs:
--------------------------------------------------------------------------------
 1 | // Test that we handle all our "auto-deref" cases correctly.
 2 | #![feature(portable_simd)]
 3 | use core_simd::simd::f32x4;
 4 | 
 5 | #[cfg(target_arch = "wasm32")]
 6 | use wasm_bindgen_test::*;
 7 | 
 8 | #[cfg(target_arch = "wasm32")]
 9 | wasm_bindgen_test_configure!(run_in_browser);
10 | 
11 | #[test]
12 | #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
13 | fn deref() {
14 |     let x = f32x4::splat(1.0);
15 |     let y = f32x4::splat(2.0);
16 |     let a = &x;
17 |     let b = &y;
18 |     assert_eq!(f32x4::splat(3.0), x + y);
19 |     assert_eq!(f32x4::splat(3.0), x + b);
20 |     assert_eq!(f32x4::splat(3.0), a + y);
21 |     assert_eq!(f32x4::splat(3.0), a + b);
22 | }
23 | 


--------------------------------------------------------------------------------
/crates/core_simd/tests/cast.rs:
--------------------------------------------------------------------------------
 1 | #![feature(portable_simd)]
 2 | macro_rules! cast_types {
 3 |     ($start:ident, $($target:ident),*) => {
 4 |         mod $start {
 5 |             #[allow(unused)]
 6 |             use core_simd::simd::prelude::*;
 7 |             type Vector<const N: usize> = Simd<$start, N>;
 8 |             $(
 9 |                 mod $target {
10 |                     use super::*;
11 |                     test_helpers::test_lanes! {
12 |                         fn cast_as<const N: usize>() {
13 |                             test_helpers::test_unary_elementwise(
14 |                                 &Vector::<N>::cast::<$target>,
15 |                                 &|x| x as $target,
16 |                                 &|_| true,
17 |                             )
18 |                         }
19 |                     }
20 |                 }
21 |             )*
22 |         }
23 |     };
24 | }
25 | 
26 | // The hypothesis is that widening conversions aren't terribly interesting.
27 | cast_types!(f32, f64, i8, u8, usize, isize);
28 | cast_types!(f64, f32, i8, u8, usize, isize);
29 | cast_types!(i8, u8, f32);
30 | cast_types!(u8, i8, f32);
31 | cast_types!(i16, u16, i8, u8, f32);
32 | cast_types!(u16, i16, i8, u8, f32);
33 | cast_types!(i32, u32, i8, u8, f32, f64);
34 | cast_types!(u32, i32, i8, u8, f32, f64);
35 | cast_types!(i64, u64, i8, u8, isize, usize, f32, f64);
36 | cast_types!(u64, i64, i8, u8, isize, usize, f32, f64);
37 | cast_types!(isize, usize, i8, u8, f32, f64);
38 | cast_types!(usize, isize, i8, u8, f32, f64);
39 | 


--------------------------------------------------------------------------------
/crates/core_simd/tests/f32_ops.rs:
--------------------------------------------------------------------------------
1 | #![feature(portable_simd)]
2 | 
3 | #[macro_use]
4 | mod ops_macros;
5 | impl_float_tests! { f32, i32 }
6 | 


--------------------------------------------------------------------------------
/crates/core_simd/tests/f64_ops.rs:
--------------------------------------------------------------------------------
1 | #![feature(portable_simd)]
2 | 
3 | #[macro_use]
4 | mod ops_macros;
5 | impl_float_tests! { f64, i64 }
6 | 


--------------------------------------------------------------------------------
/crates/core_simd/tests/i16_ops.rs:
--------------------------------------------------------------------------------
1 | #![feature(portable_simd)]
2 | 
3 | #[macro_use]
4 | mod ops_macros;
5 | impl_signed_tests! { i16 }
6 | 


--------------------------------------------------------------------------------
/crates/core_simd/tests/i32_ops.rs:
--------------------------------------------------------------------------------
1 | #![feature(portable_simd)]
2 | 
3 | #[macro_use]
4 | mod ops_macros;
5 | impl_signed_tests! { i32 }
6 | 


--------------------------------------------------------------------------------
/crates/core_simd/tests/i64_ops.rs:
--------------------------------------------------------------------------------
1 | #![feature(portable_simd)]
2 | 
3 | #[macro_use]
4 | mod ops_macros;
5 | impl_signed_tests! { i64 }
6 | 


--------------------------------------------------------------------------------
/crates/core_simd/tests/i8_ops.rs:
--------------------------------------------------------------------------------
1 | #![feature(portable_simd)]
2 | 
3 | #[macro_use]
4 | mod ops_macros;
5 | impl_signed_tests! { i8 }
6 | 


--------------------------------------------------------------------------------
/crates/core_simd/tests/isize_ops.rs:
--------------------------------------------------------------------------------
1 | #![feature(portable_simd)]
2 | 
3 | #[macro_use]
4 | mod ops_macros;
5 | impl_signed_tests! { isize }
6 | 


--------------------------------------------------------------------------------
/crates/core_simd/tests/layout.rs:
--------------------------------------------------------------------------------
 1 | #![feature(portable_simd)]
 2 | 
 3 | macro_rules! layout_tests {
 4 |     { $($mod:ident, $ty:ty,)* } => {
 5 |         $(
 6 |         mod $mod {
 7 |             test_helpers::test_lanes! {
 8 |                 fn no_padding<const LANES: usize>() {
 9 |                     assert_eq!(
10 |                         size_of::<core_simd::simd::Simd::<$ty, LANES>>(),
11 |                         size_of::<[$ty; LANES]>(),
12 |                     );
13 |                 }
14 |             }
15 |         }
16 |         )*
17 |     }
18 | }
19 | 
20 | layout_tests! {
21 |     i8, i8,
22 |     i16, i16,
23 |     i32, i32,
24 |     i64, i64,
25 |     isize, isize,
26 |     u8, u8,
27 |     u16, u16,
28 |     u32, u32,
29 |     u64, u64,
30 |     usize, usize,
31 |     f32, f32,
32 |     f64, f64,
33 |     mut_ptr, *mut (),
34 |     const_ptr, *const (),
35 | }
36 | 


--------------------------------------------------------------------------------
/crates/core_simd/tests/mask_ops.rs:
--------------------------------------------------------------------------------
1 | #![feature(portable_simd)]
2 | 
3 | mod mask_ops_impl;
4 | 


--------------------------------------------------------------------------------
/crates/core_simd/tests/mask_ops_impl/mask16.rs:
--------------------------------------------------------------------------------
1 | mask_tests! { mask16x4, 4 }
2 | mask_tests! { mask16x8, 8 }
3 | mask_tests! { mask16x16, 16 }
4 | mask_tests! { mask16x32, 32 }
5 | 


--------------------------------------------------------------------------------
/crates/core_simd/tests/mask_ops_impl/mask32.rs:
--------------------------------------------------------------------------------
1 | mask_tests! { mask32x2, 2 }
2 | mask_tests! { mask32x4, 4 }
3 | mask_tests! { mask32x8, 8 }
4 | mask_tests! { mask32x16, 16 }
5 | 


--------------------------------------------------------------------------------
/crates/core_simd/tests/mask_ops_impl/mask64.rs:
--------------------------------------------------------------------------------
1 | mask_tests! { mask64x2, 2 }
2 | mask_tests! { mask64x4, 4 }
3 | mask_tests! { mask64x8, 8 }
4 | 


--------------------------------------------------------------------------------
/crates/core_simd/tests/mask_ops_impl/mask8.rs:
--------------------------------------------------------------------------------
1 | mask_tests! { mask8x8, 8 }
2 | mask_tests! { mask8x16, 16 }
3 | mask_tests! { mask8x32, 32 }
4 | 


--------------------------------------------------------------------------------
/crates/core_simd/tests/mask_ops_impl/mask_macros.rs:
--------------------------------------------------------------------------------
  1 | macro_rules! mask_tests {
  2 |     { $vector:ident, $lanes:literal } => {
  3 |         #[cfg(test)]
  4 |         mod $vector {
  5 |             use core_simd::simd::$vector as Vector;
  6 |             const LANES: usize = $lanes;
  7 | 
  8 |             #[cfg(target_arch = "wasm32")]
  9 |             use wasm_bindgen_test::*;
 10 | 
 11 |             #[cfg(target_arch = "wasm32")]
 12 |             wasm_bindgen_test_configure!(run_in_browser);
 13 | 
 14 |             fn from_slice(slice: &[bool]) -> Vector {
 15 |                 let mut value = Vector::default();
 16 |                 for (i, b) in slice.iter().take(LANES).enumerate() {
 17 |                     value.set(i, *b);
 18 |                 }
 19 |                 value
 20 |             }
 21 | 
 22 |             fn apply_unary_lanewise(x: Vector, f: impl Fn(bool) -> bool) -> Vector {
 23 |                 let mut value = Vector::default();
 24 |                 for i in 0..LANES {
 25 |                     value.set(i, f(x.test(i)));
 26 |                 }
 27 |                 value
 28 |             }
 29 | 
 30 |             fn apply_binary_lanewise(x: Vector, y: Vector, f: impl Fn(bool, bool) -> bool) -> Vector {
 31 |                 let mut value = Vector::default();
 32 |                 for i in 0..LANES {
 33 |                     value.set(i, f(x.test(i), y.test(i)));
 34 |                 }
 35 |                 value
 36 |             }
 37 | 
 38 |             fn apply_binary_scalar_lhs_lanewise(x: bool, mut y: Vector, f: impl Fn(bool, bool) -> bool) -> Vector {
 39 |                 for i in 0..LANES {
 40 |                     y.set(i, f(x, y.test(i)));
 41 |                 }
 42 |                 y
 43 |             }
 44 | 
 45 |             fn apply_binary_scalar_rhs_lanewise(mut x: Vector, y: bool, f: impl Fn(bool, bool) -> bool) -> Vector {
 46 |                 for i in 0..LANES {
 47 |                     x.set(i, f(x.test(i), y));
 48 |                 }
 49 |                 x
 50 |             }
 51 | 
 52 |             const A: [bool; 64] = [
 53 |                 false, true, false, true, false, false, true, true,
 54 |                 false, true, false, true, false, false, true, true,
 55 |                 false, true, false, true, false, false, true, true,
 56 |                 false, true, false, true, false, false, true, true,
 57 |                 false, true, false, true, false, false, true, true,
 58 |                 false, true, false, true, false, false, true, true,
 59 |                 false, true, false, true, false, false, true, true,
 60 |                 false, true, false, true, false, false, true, true,
 61 |             ];
 62 |             const B: [bool; 64] = [
 63 |                 false, false, true, true, false, true, false, true,
 64 |                 false, false, true, true, false, true, false, true,
 65 |                 false, false, true, true, false, true, false, true,
 66 |                 false, false, true, true, false, true, false, true,
 67 |                 false, false, true, true, false, true, false, true,
 68 |                 false, false, true, true, false, true, false, true,
 69 |                 false, false, true, true, false, true, false, true,
 70 |                 false, false, true, true, false, true, false, true,
 71 |             ];
 72 | 
 73 |             #[test]
 74 |             #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
 75 |             fn bitand() {
 76 |                 let a = from_slice(&A);
 77 |                 let b = from_slice(&B);
 78 |                 let expected = apply_binary_lanewise(a, b, core::ops::BitAnd::bitand);
 79 |                 assert_eq!(a & b, expected);
 80 |             }
 81 | 
 82 |             #[test]
 83 |             #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
 84 |             fn bitand_assign() {
 85 |                 let mut a = from_slice(&A);
 86 |                 let b = from_slice(&B);
 87 |                 let expected = apply_binary_lanewise(a, b, core::ops::BitAnd::bitand);
 88 |                 a &= b;
 89 |                 assert_eq!(a, expected);
 90 |             }
 91 | 
 92 |             #[test]
 93 |             #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
 94 |             fn bitand_scalar_rhs() {
 95 |                 let a = from_slice(&A);
 96 |                 let expected = a;
 97 |                 assert_eq!(a & true, expected);
 98 |                 assert_eq!(a & false, Vector::splat(false));
 99 |             }
100 | 
101 |             #[test]
102 |             #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
103 |             fn bitand_scalar_lhs() {
104 |                 let a = from_slice(&A);
105 |                 let expected = a;
106 |                 assert_eq!(true & a, expected);
107 |                 assert_eq!(false & a, Vector::splat(false));
108 |             }
109 | 
110 |             #[test]
111 |             #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
112 |             fn bitand_assign_scalar() {
113 |                 let mut a = from_slice(&A);
114 |                 let expected = a;
115 |                 a &= true;
116 |                 assert_eq!(a, expected);
117 |                 a &= false;
118 |                 assert_eq!(a, Vector::splat(false));
119 |             }
120 | 
121 |             #[test]
122 |             #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
123 |             fn bitor() {
124 |                 let a = from_slice(&A);
125 |                 let b = from_slice(&B);
126 |                 let expected = apply_binary_lanewise(a, b, core::ops::BitOr::bitor);
127 |                 assert_eq!(a | b, expected);
128 |             }
129 | 
130 |             #[test]
131 |             #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
132 |             fn bitor_assign() {
133 |                 let mut a = from_slice(&A);
134 |                 let b = from_slice(&B);
135 |                 let expected = apply_binary_lanewise(a, b, core::ops::BitOr::bitor);
136 |                 a |= b;
137 |                 assert_eq!(a, expected);
138 |             }
139 | 
140 |             #[test]
141 |             #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
142 |             fn bitor_scalar_rhs() {
143 |                 let a = from_slice(&A);
144 |                 assert_eq!(a | false, a);
145 |                 assert_eq!(a | true, Vector::splat(true));
146 |             }
147 | 
148 |             #[test]
149 |             #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
150 |             fn bitor_scalar_lhs() {
151 |                 let a = from_slice(&A);
152 |                 assert_eq!(false | a, a);
153 |                 assert_eq!(true | a, Vector::splat(true));
154 |             }
155 | 
156 |             #[test]
157 |             #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
158 |             fn bitor_assign_scalar() {
159 |                 let mut a = from_slice(&A);
160 |                 let expected = a;
161 |                 a |= false;
162 |                 assert_eq!(a, expected);
163 |                 a |= true;
164 |                 assert_eq!(a, Vector::splat(true));
165 |             }
166 | 
167 |             #[test]
168 |             #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
169 |             fn bitxor() {
170 |                 let a = from_slice(&A);
171 |                 let b = from_slice(&B);
172 |                 let expected = apply_binary_lanewise(a, b, core::ops::BitXor::bitxor);
173 |                 assert_eq!(a ^ b, expected);
174 |             }
175 | 
176 |             #[test]
177 |             #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
178 |             fn bitxor_assign() {
179 |                 let mut a = from_slice(&A);
180 |                 let b = from_slice(&B);
181 |                 let expected = apply_binary_lanewise(a, b, core::ops::BitXor::bitxor);
182 |                 a ^= b;
183 |                 assert_eq!(a, expected);
184 |             }
185 | 
186 |             #[test]
187 |             #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
188 |             fn bitxor_scalar_rhs() {
189 |                 let a = from_slice(&A);
190 |                 let expected = apply_binary_scalar_rhs_lanewise(a, true, core::ops::BitXor::bitxor);
191 |                 assert_eq!(a ^ false, a);
192 |                 assert_eq!(a ^ true, expected);
193 |             }
194 | 
195 |             #[test]
196 |             #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
197 |             fn bitxor_scalar_lhs() {
198 |                 let a = from_slice(&A);
199 |                 let expected = apply_binary_scalar_lhs_lanewise(true, a, core::ops::BitXor::bitxor);
200 |                 assert_eq!(false ^ a, a);
201 |                 assert_eq!(true ^ a, expected);
202 |             }
203 | 
204 |             #[test]
205 |             #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
206 |             fn bitxor_assign_scalar() {
207 |                 let mut a = from_slice(&A);
208 |                 let expected_unset = a;
209 |                 let expected_set = apply_binary_scalar_rhs_lanewise(a, true, core::ops::BitXor::bitxor);
210 |                 a ^= false;
211 |                 assert_eq!(a, expected_unset);
212 |                 a ^= true;
213 |                 assert_eq!(a, expected_set);
214 |             }
215 | 
216 |             #[test]
217 |             #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
218 |             fn not() {
219 |                 let v = from_slice(&A);
220 |                 let expected = apply_unary_lanewise(v, core::ops::Not::not);
221 |                 assert_eq!(!v, expected);
222 |             }
223 |         }
224 |     }
225 | }
226 | 


--------------------------------------------------------------------------------
/crates/core_simd/tests/mask_ops_impl/masksize.rs:
--------------------------------------------------------------------------------
1 | mask_tests! { masksizex2, 2 }
2 | mask_tests! { masksizex4, 4 }
3 | mask_tests! { masksizex8, 8 }
4 | 


--------------------------------------------------------------------------------
/crates/core_simd/tests/mask_ops_impl/mod.rs:
--------------------------------------------------------------------------------
 1 | #[macro_use]
 2 | mod mask_macros;
 3 | 
 4 | #[rustfmt::skip]
 5 | mod mask8;
 6 | mod mask16;
 7 | mod mask32;
 8 | mod mask64;
 9 | mod masksize;
10 | 


--------------------------------------------------------------------------------
/crates/core_simd/tests/masked_load_store.rs:
--------------------------------------------------------------------------------
 1 | #![feature(portable_simd)]
 2 | use core_simd::simd::prelude::*;
 3 | 
 4 | #[cfg(target_arch = "wasm32")]
 5 | use wasm_bindgen_test::*;
 6 | 
 7 | #[cfg(target_arch = "wasm32")]
 8 | wasm_bindgen_test_configure!(run_in_browser);
 9 | 
10 | #[test]
11 | #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
12 | fn masked_load_store() {
13 |     let mut arr = [u8::MAX; 7];
14 | 
15 |     u8x4::splat(0).store_select(&mut arr[5..], Mask::from_array([false, true, false, true]));
16 |     // write to index 8 is OOB and dropped
17 |     assert_eq!(arr, [255u8, 255, 255, 255, 255, 255, 0]);
18 | 
19 |     u8x4::from_array([0, 1, 2, 3]).store_select(&mut arr[1..], Mask::splat(true));
20 |     assert_eq!(arr, [255u8, 0, 1, 2, 3, 255, 0]);
21 | 
22 |     // read from index 8 is OOB and dropped
23 |     assert_eq!(
24 |         u8x4::load_or(&arr[4..], u8x4::splat(42)),
25 |         u8x4::from_array([3, 255, 0, 42])
26 |     );
27 |     assert_eq!(
28 |         u8x4::load_select(
29 |             &arr[4..],
30 |             Mask::from_array([true, false, true, true]),
31 |             u8x4::splat(42)
32 |         ),
33 |         u8x4::from_array([3, 42, 0, 42])
34 |     );
35 | }
36 | 


--------------------------------------------------------------------------------
/crates/core_simd/tests/masks.rs:
--------------------------------------------------------------------------------
  1 | #![feature(portable_simd)]
  2 | 
  3 | #[cfg(target_arch = "wasm32")]
  4 | use wasm_bindgen_test::*;
  5 | 
  6 | #[cfg(target_arch = "wasm32")]
  7 | wasm_bindgen_test_configure!(run_in_browser);
  8 | 
  9 | macro_rules! test_mask_api {
 10 |     { $type:ident } => {
 11 |         #[allow(non_snake_case)]
 12 |         mod $type {
 13 |             #[cfg(target_arch = "wasm32")]
 14 |             use wasm_bindgen_test::*;
 15 | 
 16 |             use core_simd::simd::Mask;
 17 | 
 18 |             #[test]
 19 |             #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
 20 |             fn set_and_test() {
 21 |                 let values = [true, false, false, true, false, false, true, false];
 22 |                 let mut mask = Mask::<$type, 8>::splat(false);
 23 |                 for (lane, value) in values.iter().copied().enumerate() {
 24 |                     mask.set(lane, value);
 25 |                 }
 26 |                 for (lane, value) in values.iter().copied().enumerate() {
 27 |                     assert_eq!(mask.test(lane), value);
 28 |                 }
 29 |             }
 30 | 
 31 |             #[test]
 32 |             #[should_panic]
 33 |             fn set_invalid_lane() {
 34 |                 let mut mask = Mask::<$type, 8>::splat(false);
 35 |                 mask.set(8, true);
 36 |                 let _ = mask;
 37 |             }
 38 | 
 39 |             #[test]
 40 |             #[should_panic]
 41 |             fn test_invalid_lane() {
 42 |                 let mask = Mask::<$type, 8>::splat(false);
 43 |                 let _ = mask.test(8);
 44 |             }
 45 | 
 46 |             #[test]
 47 |             fn any() {
 48 |                 assert!(!Mask::<$type, 8>::splat(false).any());
 49 |                 assert!(Mask::<$type, 8>::splat(true).any());
 50 |                 let mut v = Mask::<$type, 8>::splat(false);
 51 |                 v.set(2, true);
 52 |                 assert!(v.any());
 53 |             }
 54 | 
 55 |             #[test]
 56 |             fn all() {
 57 |                 assert!(!Mask::<$type, 8>::splat(false).all());
 58 |                 assert!(Mask::<$type, 8>::splat(true).all());
 59 |                 let mut v = Mask::<$type, 8>::splat(false);
 60 |                 v.set(2, true);
 61 |                 assert!(!v.all());
 62 |             }
 63 | 
 64 |             #[test]
 65 |             fn roundtrip_int_conversion() {
 66 |                 let values = [true, false, false, true, false, false, true, false];
 67 |                 let mask = Mask::<$type, 8>::from_array(values);
 68 |                 let int = mask.to_int();
 69 |                 assert_eq!(int.to_array(), [-1, 0, 0, -1, 0, 0, -1, 0]);
 70 |                 assert_eq!(Mask::<$type, 8>::from_int(int), mask);
 71 |             }
 72 | 
 73 |             #[test]
 74 |             fn roundtrip_bitmask_conversion() {
 75 |                 let values = [
 76 |                     true, false, false, true, false, false, true, false,
 77 |                     true, true, false, false, false, false, false, true,
 78 |                 ];
 79 |                 let mask = Mask::<$type, 16>::from_array(values);
 80 |                 let bitmask = mask.to_bitmask();
 81 |                 assert_eq!(bitmask, 0b1000001101001001);
 82 |                 assert_eq!(Mask::<$type, 16>::from_bitmask(bitmask), mask);
 83 |             }
 84 | 
 85 |             #[test]
 86 |             fn roundtrip_bitmask_conversion_short() {
 87 |                 let values = [
 88 |                     false, false, false, true,
 89 |                 ];
 90 |                 let mask = Mask::<$type, 4>::from_array(values);
 91 |                 let bitmask = mask.to_bitmask();
 92 |                 assert_eq!(bitmask, 0b1000);
 93 |                 assert_eq!(Mask::<$type, 4>::from_bitmask(bitmask), mask);
 94 | 
 95 |                 let values = [true, false];
 96 |                 let mask = Mask::<$type, 2>::from_array(values);
 97 |                 let bitmask = mask.to_bitmask();
 98 |                 assert_eq!(bitmask, 0b01);
 99 |                 assert_eq!(Mask::<$type, 2>::from_bitmask(bitmask), mask);
100 |             }
101 | 
102 |             #[test]
103 |             fn roundtrip_bitmask_conversion_odd() {
104 |                 let values = [
105 |                     true, false, true, false, true, true, false, false, false, true, true,
106 |                 ];
107 |                 let mask = Mask::<$type, 11>::from_array(values);
108 |                 let bitmask = mask.to_bitmask();
109 |                 assert_eq!(bitmask, 0b11000110101);
110 |                 assert_eq!(Mask::<$type, 11>::from_bitmask(bitmask), mask);
111 |             }
112 | 
113 | 
114 |             #[test]
115 |             fn cast() {
116 |                 fn cast_impl<T: core_simd::simd::MaskElement>()
117 |                 where
118 |                     Mask<$type, 8>: Into<Mask<T, 8>>,
119 |                 {
120 |                     let values = [true, false, false, true, false, false, true, false];
121 |                     let mask = Mask::<$type, 8>::from_array(values);
122 | 
123 |                     let cast_mask = mask.cast::<T>();
124 |                     assert_eq!(values, cast_mask.to_array());
125 | 
126 |                     let into_mask: Mask<T, 8> = mask.into();
127 |                     assert_eq!(values, into_mask.to_array());
128 |                 }
129 | 
130 |                 cast_impl::<i8>();
131 |                 cast_impl::<i16>();
132 |                 cast_impl::<i32>();
133 |                 cast_impl::<i64>();
134 |                 cast_impl::<isize>();
135 |             }
136 |         }
137 |     }
138 | }
139 | 
140 | mod mask_api {
141 |     test_mask_api! { i8 }
142 |     test_mask_api! { i16 }
143 |     test_mask_api! { i32 }
144 |     test_mask_api! { i64 }
145 |     test_mask_api! { isize }
146 | }
147 | 
148 | #[test]
149 | fn convert() {
150 |     use core_simd::simd::Mask;
151 |     let values = [true, false, false, true, false, false, true, false];
152 |     assert_eq!(
153 |         Mask::<i8, 8>::from_array(values),
154 |         Mask::<i32, 8>::from_array(values).into()
155 |     );
156 | }
157 | 


--------------------------------------------------------------------------------
/crates/core_simd/tests/pointers.rs:
--------------------------------------------------------------------------------
  1 | #![feature(portable_simd)]
  2 | 
  3 | use core_simd::simd::{
  4 |     Simd,
  5 |     ptr::{SimdConstPtr, SimdMutPtr},
  6 | };
  7 | 
  8 | macro_rules! common_tests {
  9 |     { $constness:ident } => {
 10 |         test_helpers::test_lanes! {
 11 |             fn is_null<const LANES: usize>() {
 12 |                 test_helpers::test_unary_mask_elementwise(
 13 |                     &Simd::<*$constness u32, LANES>::is_null,
 14 |                     &<*$constness u32>::is_null,
 15 |                     &|_| true,
 16 |                 );
 17 |             }
 18 | 
 19 |             fn addr<const LANES: usize>() {
 20 |                 test_helpers::test_unary_elementwise(
 21 |                     &Simd::<*$constness u32, LANES>::addr,
 22 |                     &<*$constness u32>::addr,
 23 |                     &|_| true,
 24 |                 );
 25 |             }
 26 | 
 27 |             fn with_addr<const LANES: usize>() {
 28 |                 test_helpers::test_binary_elementwise(
 29 |                     &Simd::<*$constness u32, LANES>::with_addr,
 30 |                     &<*$constness u32>::with_addr,
 31 |                     &|_, _| true,
 32 |                 );
 33 |             }
 34 | 
 35 |             fn expose_provenance<const LANES: usize>() {
 36 |                 test_helpers::test_unary_elementwise(
 37 |                     &Simd::<*$constness u32, LANES>::expose_provenance,
 38 |                     &<*$constness u32>::expose_provenance,
 39 |                     &|_| true,
 40 |                 );
 41 |             }
 42 | 
 43 |             fn wrapping_offset<const LANES: usize>() {
 44 |                 test_helpers::test_binary_elementwise(
 45 |                     &Simd::<*$constness u32, LANES>::wrapping_offset,
 46 |                     &<*$constness u32>::wrapping_offset,
 47 |                     &|_, _| true,
 48 |                 );
 49 |             }
 50 | 
 51 |             fn wrapping_add<const LANES: usize>() {
 52 |                 test_helpers::test_binary_elementwise(
 53 |                     &Simd::<*$constness u32, LANES>::wrapping_add,
 54 |                     &<*$constness u32>::wrapping_add,
 55 |                     &|_, _| true,
 56 |                 );
 57 |             }
 58 | 
 59 |             fn wrapping_sub<const LANES: usize>() {
 60 |                 test_helpers::test_binary_elementwise(
 61 |                     &Simd::<*$constness u32, LANES>::wrapping_sub,
 62 |                     &<*$constness u32>::wrapping_sub,
 63 |                     &|_, _| true,
 64 |                 );
 65 |             }
 66 |         }
 67 |     }
 68 | }
 69 | 
 70 | mod const_ptr {
 71 |     use super::*;
 72 |     common_tests! { const }
 73 | 
 74 |     test_helpers::test_lanes! {
 75 |         fn cast_mut<const LANES: usize>() {
 76 |             test_helpers::test_unary_elementwise(
 77 |                 &Simd::<*const u32, LANES>::cast_mut,
 78 |                 &<*const u32>::cast_mut,
 79 |                 &|_| true,
 80 |             );
 81 |         }
 82 | 
 83 |         fn with_exposed_provenance<const LANES: usize>() {
 84 |             test_helpers::test_unary_elementwise(
 85 |                 &Simd::<*const u32, LANES>::with_exposed_provenance,
 86 |                 &core::ptr::with_exposed_provenance::<u32>,
 87 |                 &|_| true,
 88 |             );
 89 |         }
 90 |     }
 91 | }
 92 | 
 93 | mod mut_ptr {
 94 |     use super::*;
 95 |     common_tests! { mut }
 96 | 
 97 |     test_helpers::test_lanes! {
 98 |         fn cast_const<const LANES: usize>() {
 99 |             test_helpers::test_unary_elementwise(
100 |                 &Simd::<*mut u32, LANES>::cast_const,
101 |                 &<*mut u32>::cast_const,
102 |                 &|_| true,
103 |             );
104 |         }
105 | 
106 |         fn with_exposed_provenance<const LANES: usize>() {
107 |             test_helpers::test_unary_elementwise(
108 |                 &Simd::<*mut u32, LANES>::with_exposed_provenance,
109 |                 &core::ptr::with_exposed_provenance_mut::<u32>,
110 |                 &|_| true,
111 |             );
112 |         }
113 |     }
114 | }
115 | 


--------------------------------------------------------------------------------
/crates/core_simd/tests/round.rs:
--------------------------------------------------------------------------------
 1 | #![feature(portable_simd)]
 2 | 
 3 | macro_rules! float_rounding_test {
 4 |     { $scalar:tt, $int_scalar:tt } => {
 5 |         mod $scalar {
 6 |             use std_float::StdFloat;
 7 | 
 8 |             type Vector<const LANES: usize> = core_simd::simd::Simd<$scalar, LANES>;
 9 |             type Scalar = $scalar;
10 |             type IntScalar = $int_scalar;
11 | 
12 |             test_helpers::test_lanes! {
13 |                 fn ceil<const LANES: usize>() {
14 |                     test_helpers::test_unary_elementwise(
15 |                         &Vector::<LANES>::ceil,
16 |                         &Scalar::ceil,
17 |                         &|_| true,
18 |                     )
19 |                 }
20 | 
21 |                 fn floor<const LANES: usize>() {
22 |                     test_helpers::test_unary_elementwise(
23 |                         &Vector::<LANES>::floor,
24 |                         &Scalar::floor,
25 |                         &|_| true,
26 |                     )
27 |                 }
28 | 
29 |                 fn round<const LANES: usize>() {
30 |                     test_helpers::test_unary_elementwise(
31 |                         &Vector::<LANES>::round,
32 |                         &Scalar::round,
33 |                         &|_| true,
34 |                     )
35 |                 }
36 | 
37 |                 fn trunc<const LANES: usize>() {
38 |                     test_helpers::test_unary_elementwise(
39 |                         &Vector::<LANES>::trunc,
40 |                         &Scalar::trunc,
41 |                         &|_| true,
42 |                     )
43 |                 }
44 | 
45 |                 fn fract<const LANES: usize>() {
46 |                     test_helpers::test_unary_elementwise_flush_subnormals(
47 |                         &Vector::<LANES>::fract,
48 |                         &Scalar::fract,
49 |                         &|_| true,
50 |                     )
51 |                 }
52 |             }
53 | 
54 |             test_helpers::test_lanes! {
55 |                 fn to_int_unchecked<const LANES: usize>() {
56 |                     use core_simd::simd::num::SimdFloat;
57 |                     // The maximum integer that can be represented by the equivalently sized float has
58 |                     // all of the mantissa digits set to 1, pushed up to the MSB.
59 |                     const ALL_MANTISSA_BITS: IntScalar = ((1 << <Scalar>::MANTISSA_DIGITS) - 1);
60 |                     const MAX_REPRESENTABLE_VALUE: Scalar =
61 |                         (ALL_MANTISSA_BITS << (size_of::<Scalar>() * 8 - <Scalar>::MANTISSA_DIGITS as usize - 1)) as Scalar;
62 | 
63 |                     let mut runner = test_helpers::make_runner();
64 |                     runner.run(
65 |                         &test_helpers::array::UniformArrayStrategy::new(-MAX_REPRESENTABLE_VALUE..MAX_REPRESENTABLE_VALUE),
66 |                         |x| {
67 |                             let result_1 = unsafe { Vector::from_array(x).to_int_unchecked::<IntScalar>().to_array() };
68 |                             let result_2 = {
69 |                                 let mut result: [IntScalar; LANES] = [0; LANES];
70 |                                 for (i, o) in x.iter().zip(result.iter_mut()) {
71 |                                     *o = unsafe { i.to_int_unchecked::<IntScalar>() };
72 |                                 }
73 |                                 result
74 |                             };
75 |                             test_helpers::prop_assert_biteq!(result_1, result_2);
76 |                             Ok(())
77 |                         },
78 |                     ).unwrap();
79 |                 }
80 |             }
81 |         }
82 |     }
83 | }
84 | 
85 | float_rounding_test! { f32, i32 }
86 | float_rounding_test! { f64, i64 }
87 | 


--------------------------------------------------------------------------------
/crates/core_simd/tests/swizzle.rs:
--------------------------------------------------------------------------------
 1 | #![feature(portable_simd)]
 2 | use core_simd::simd::{Simd, Swizzle};
 3 | 
 4 | #[cfg(target_arch = "wasm32")]
 5 | use wasm_bindgen_test::*;
 6 | 
 7 | #[cfg(target_arch = "wasm32")]
 8 | wasm_bindgen_test_configure!(run_in_browser);
 9 | 
10 | #[test]
11 | #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
12 | fn swizzle() {
13 |     struct Index;
14 |     impl Swizzle<4> for Index {
15 |         const INDEX: [usize; 4] = [2, 1, 3, 0];
16 |     }
17 |     impl Swizzle<2> for Index {
18 |         const INDEX: [usize; 2] = [1, 1];
19 |     }
20 | 
21 |     let vector = Simd::from_array([2, 4, 1, 9]);
22 |     assert_eq!(Index::swizzle(vector).to_array(), [1, 4, 9, 2]);
23 |     assert_eq!(Index::swizzle(vector).to_array(), [4, 4]);
24 | }
25 | 
26 | #[test]
27 | #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
28 | fn reverse() {
29 |     let a = Simd::from_array([1, 2, 3, 4]);
30 |     assert_eq!(a.reverse().to_array(), [4, 3, 2, 1]);
31 | }
32 | 
33 | #[test]
34 | #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
35 | fn rotate() {
36 |     let a = Simd::from_array([1, 2, 3, 4]);
37 |     assert_eq!(a.rotate_elements_left::<0>().to_array(), [1, 2, 3, 4]);
38 |     assert_eq!(a.rotate_elements_left::<1>().to_array(), [2, 3, 4, 1]);
39 |     assert_eq!(a.rotate_elements_left::<2>().to_array(), [3, 4, 1, 2]);
40 |     assert_eq!(a.rotate_elements_left::<3>().to_array(), [4, 1, 2, 3]);
41 |     assert_eq!(a.rotate_elements_left::<4>().to_array(), [1, 2, 3, 4]);
42 |     assert_eq!(a.rotate_elements_left::<5>().to_array(), [2, 3, 4, 1]);
43 |     assert_eq!(a.rotate_elements_right::<0>().to_array(), [1, 2, 3, 4]);
44 |     assert_eq!(a.rotate_elements_right::<1>().to_array(), [4, 1, 2, 3]);
45 |     assert_eq!(a.rotate_elements_right::<2>().to_array(), [3, 4, 1, 2]);
46 |     assert_eq!(a.rotate_elements_right::<3>().to_array(), [2, 3, 4, 1]);
47 |     assert_eq!(a.rotate_elements_right::<4>().to_array(), [1, 2, 3, 4]);
48 |     assert_eq!(a.rotate_elements_right::<5>().to_array(), [4, 1, 2, 3]);
49 | }
50 | 
51 | #[test]
52 | #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
53 | fn shift() {
54 |     let a = Simd::from_array([1, 2, 3, 4]);
55 |     assert_eq!(a.shift_elements_left::<0>(0).to_array(), [1, 2, 3, 4]);
56 |     assert_eq!(a.shift_elements_left::<1>(0).to_array(), [2, 3, 4, 0]);
57 |     assert_eq!(a.shift_elements_left::<2>(9).to_array(), [3, 4, 9, 9]);
58 |     assert_eq!(a.shift_elements_left::<3>(8).to_array(), [4, 8, 8, 8]);
59 |     assert_eq!(a.shift_elements_left::<4>(7).to_array(), [7, 7, 7, 7]);
60 |     assert_eq!(a.shift_elements_left::<5>(6).to_array(), [6, 6, 6, 6]);
61 |     assert_eq!(a.shift_elements_right::<0>(0).to_array(), [1, 2, 3, 4]);
62 |     assert_eq!(a.shift_elements_right::<1>(0).to_array(), [0, 1, 2, 3]);
63 |     assert_eq!(a.shift_elements_right::<2>(-1).to_array(), [-1, -1, 1, 2]);
64 |     assert_eq!(a.shift_elements_right::<3>(-2).to_array(), [-2, -2, -2, 1]);
65 |     assert_eq!(a.shift_elements_right::<4>(-3).to_array(), [-3, -3, -3, -3]);
66 |     assert_eq!(a.shift_elements_right::<5>(-4).to_array(), [-4, -4, -4, -4]);
67 | }
68 | 
69 | #[test]
70 | #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
71 | fn interleave() {
72 |     let a = Simd::from_array([0, 1, 2, 3, 4, 5, 6, 7]);
73 |     let b = Simd::from_array([8, 9, 10, 11, 12, 13, 14, 15]);
74 |     let (lo, hi) = a.interleave(b);
75 |     assert_eq!(lo.to_array(), [0, 8, 1, 9, 2, 10, 3, 11]);
76 |     assert_eq!(hi.to_array(), [4, 12, 5, 13, 6, 14, 7, 15]);
77 |     let (even, odd) = lo.deinterleave(hi);
78 |     assert_eq!(even, a);
79 |     assert_eq!(odd, b);
80 | }
81 | 
82 | // portable-simd#298
83 | #[test]
84 | #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
85 | fn interleave_one() {
86 |     let a = Simd::from_array([0]);
87 |     let b = Simd::from_array([1]);
88 |     let (lo, hi) = a.interleave(b);
89 |     assert_eq!(lo.to_array(), [0]);
90 |     assert_eq!(hi.to_array(), [1]);
91 |     let (even, odd) = lo.deinterleave(hi);
92 |     assert_eq!(even, a);
93 |     assert_eq!(odd, b);
94 | }
95 | 


--------------------------------------------------------------------------------
/crates/core_simd/tests/swizzle_dyn.rs:
--------------------------------------------------------------------------------
 1 | #![feature(portable_simd)]
 2 | use core::{fmt, ops::RangeInclusive};
 3 | use test_helpers::{biteq, make_runner, prop_assert_biteq};
 4 | 
 5 | fn swizzle_dyn_scalar_ver<const N: usize>(values: [u8; N], idxs: [u8; N]) -> [u8; N] {
 6 |     let mut array = [0; N];
 7 |     for (i, k) in idxs.into_iter().enumerate() {
 8 |         if (k as usize) < N {
 9 |             array[i] = values[k as usize];
10 |         };
11 |     }
12 |     array
13 | }
14 | 
15 | test_helpers::test_lanes! {
16 |     fn swizzle_dyn<const N: usize>() {
17 |         match_simd_with_fallback(
18 |             &core_simd::simd::Simd::<u8, N>::swizzle_dyn,
19 |             &swizzle_dyn_scalar_ver,
20 |             &|_, _| true,
21 |         );
22 |     }
23 | }
24 | 
25 | fn match_simd_with_fallback<Scalar, ScalarResult, Vector, VectorResult, const N: usize>(
26 |     fv: &dyn Fn(Vector, Vector) -> VectorResult,
27 |     fs: &dyn Fn([Scalar; N], [Scalar; N]) -> [ScalarResult; N],
28 |     check: &dyn Fn([Scalar; N], [Scalar; N]) -> bool,
29 | ) where
30 |     Scalar: Copy + fmt::Debug + SwizzleStrategy,
31 |     ScalarResult: Copy + biteq::BitEq + fmt::Debug + SwizzleStrategy,
32 |     Vector: Into<[Scalar; N]> + From<[Scalar; N]> + Copy,
33 |     VectorResult: Into<[ScalarResult; N]> + From<[ScalarResult; N]> + Copy,
34 | {
35 |     test_swizzles_2(&|x: [Scalar; N], y: [Scalar; N]| {
36 |         proptest::prop_assume!(check(x, y));
37 |         let result_v: [ScalarResult; N] = fv(x.into(), y.into()).into();
38 |         let result_s: [ScalarResult; N] = fs(x, y);
39 |         crate::prop_assert_biteq!(result_v, result_s);
40 |         Ok(())
41 |     });
42 | }
43 | 
44 | fn test_swizzles_2<A: fmt::Debug + SwizzleStrategy, B: fmt::Debug + SwizzleStrategy>(
45 |     f: &dyn Fn(A, B) -> proptest::test_runner::TestCaseResult,
46 | ) {
47 |     let mut runner = make_runner();
48 |     runner
49 |         .run(
50 |             &(A::swizzled_strategy(), B::swizzled_strategy()),
51 |             |(a, b)| f(a, b),
52 |         )
53 |         .unwrap();
54 | }
55 | 
56 | pub trait SwizzleStrategy {
57 |     type Strategy: proptest::strategy::Strategy<Value = Self>;
58 |     fn swizzled_strategy() -> Self::Strategy;
59 | }
60 | 
61 | impl SwizzleStrategy for u8 {
62 |     type Strategy = RangeInclusive<u8>;
63 |     fn swizzled_strategy() -> Self::Strategy {
64 |         0..=64
65 |     }
66 | }
67 | 
68 | impl<T: fmt::Debug + SwizzleStrategy, const N: usize> SwizzleStrategy for [T; N] {
69 |     type Strategy = test_helpers::array::UniformArrayStrategy<T::Strategy, Self>;
70 |     fn swizzled_strategy() -> Self::Strategy {
71 |         Self::Strategy::new(T::swizzled_strategy())
72 |     }
73 | }
74 | 


--------------------------------------------------------------------------------
/crates/core_simd/tests/to_bytes.rs:
--------------------------------------------------------------------------------
 1 | #![feature(portable_simd)]
 2 | 
 3 | use core_simd::simd::{Simd, ToBytes};
 4 | 
 5 | #[test]
 6 | fn byte_convert() {
 7 |     let int = Simd::<u32, 2>::from_array([0xdeadbeef, 0x8badf00d]);
 8 |     let ne_bytes = int.to_ne_bytes();
 9 |     let be_bytes = int.to_be_bytes();
10 |     let le_bytes = int.to_le_bytes();
11 |     assert_eq!(int[0].to_ne_bytes(), ne_bytes[..4]);
12 |     assert_eq!(int[1].to_ne_bytes(), ne_bytes[4..]);
13 |     assert_eq!(int[0].to_be_bytes(), be_bytes[..4]);
14 |     assert_eq!(int[1].to_be_bytes(), be_bytes[4..]);
15 |     assert_eq!(int[0].to_le_bytes(), le_bytes[..4]);
16 |     assert_eq!(int[1].to_le_bytes(), le_bytes[4..]);
17 |     assert_eq!(Simd::<u32, 2>::from_ne_bytes(ne_bytes), int);
18 |     assert_eq!(Simd::<u32, 2>::from_be_bytes(be_bytes), int);
19 |     assert_eq!(Simd::<u32, 2>::from_le_bytes(le_bytes), int);
20 | }
21 | 


--------------------------------------------------------------------------------
/crates/core_simd/tests/try_from_slice.rs:
--------------------------------------------------------------------------------
 1 | #![feature(portable_simd)]
 2 | 
 3 | #[cfg(target_arch = "wasm32")]
 4 | use wasm_bindgen_test::*;
 5 | 
 6 | #[cfg(target_arch = "wasm32")]
 7 | wasm_bindgen_test_configure!(run_in_browser);
 8 | 
 9 | use core_simd::simd::i32x4;
10 | 
11 | #[test]
12 | #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
13 | fn try_from_slice() {
14 |     // Equal length
15 |     assert_eq!(
16 |         i32x4::try_from([1, 2, 3, 4].as_slice()).unwrap(),
17 |         i32x4::from_array([1, 2, 3, 4])
18 |     );
19 | 
20 |     // Slice length > vector length
21 |     assert!(i32x4::try_from([1, 2, 3, 4, 5].as_slice()).is_err());
22 | 
23 |     // Slice length < vector length
24 |     assert!(i32x4::try_from([1, 2, 3].as_slice()).is_err());
25 | }
26 | 


--------------------------------------------------------------------------------
/crates/core_simd/tests/u16_ops.rs:
--------------------------------------------------------------------------------
1 | #![feature(portable_simd)]
2 | 
3 | #[macro_use]
4 | mod ops_macros;
5 | impl_unsigned_tests! { u16 }
6 | 


--------------------------------------------------------------------------------
/crates/core_simd/tests/u32_ops.rs:
--------------------------------------------------------------------------------
1 | #![feature(portable_simd)]
2 | 
3 | #[macro_use]
4 | mod ops_macros;
5 | impl_unsigned_tests! { u32 }
6 | 


--------------------------------------------------------------------------------
/crates/core_simd/tests/u64_ops.rs:
--------------------------------------------------------------------------------
1 | #![feature(portable_simd)]
2 | 
3 | #[macro_use]
4 | mod ops_macros;
5 | impl_unsigned_tests! { u64 }
6 | 


--------------------------------------------------------------------------------
/crates/core_simd/tests/u8_ops.rs:
--------------------------------------------------------------------------------
1 | #![feature(portable_simd)]
2 | 
3 | #[macro_use]
4 | mod ops_macros;
5 | impl_unsigned_tests! { u8 }
6 | 


--------------------------------------------------------------------------------
/crates/core_simd/tests/usize_ops.rs:
--------------------------------------------------------------------------------
1 | #![feature(portable_simd)]
2 | 
3 | #[macro_use]
4 | mod ops_macros;
5 | impl_unsigned_tests! { usize }
6 | 


--------------------------------------------------------------------------------
/crates/core_simd/webdriver.json:
--------------------------------------------------------------------------------
1 | {
2 |     "goog:chromeOptions": {
3 |         "args": [
4 |             "--enable-features=WebAssemblySimd"
5 |         ]
6 |     }
7 | }
8 | 


--------------------------------------------------------------------------------
/crates/std_float/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "std_float"
 3 | version = "0.1.0"
 4 | edition = "2021"
 5 | 
 6 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 7 | 
 8 | [dependencies]
 9 | core_simd = { path = "../core_simd", default-features = false }
10 | 
11 | [dev-dependencies.test_helpers]
12 | path = "../test_helpers"
13 | 
14 | [target.'cfg(target_arch = "wasm32")'.dev-dependencies]
15 | wasm-bindgen = "0.2"
16 | wasm-bindgen-test = "0.3"
17 | 
18 | [features]
19 | default = ["as_crate"]
20 | as_crate = []
21 | 


--------------------------------------------------------------------------------
/crates/std_float/src/lib.rs:
--------------------------------------------------------------------------------
  1 | #![cfg_attr(
  2 |     feature = "as_crate",
  3 |     feature(core_intrinsics),
  4 |     feature(portable_simd),
  5 |     allow(internal_features)
  6 | )]
  7 | #[cfg(not(feature = "as_crate"))]
  8 | use core::simd;
  9 | #[cfg(feature = "as_crate")]
 10 | use core_simd::simd;
 11 | 
 12 | use core::intrinsics::simd as intrinsics;
 13 | 
 14 | use simd::{LaneCount, Simd, SupportedLaneCount};
 15 | 
 16 | #[cfg(feature = "as_crate")]
 17 | mod experimental {
 18 |     pub trait Sealed {}
 19 | }
 20 | 
 21 | #[cfg(feature = "as_crate")]
 22 | use experimental as sealed;
 23 | 
 24 | use crate::sealed::Sealed;
 25 | 
 26 | /// This trait provides a possibly-temporary implementation of float functions
 27 | /// that may, in the absence of hardware support, canonicalize to calling an
 28 | /// operating system's `math.h` dynamically-loaded library (also known as a
 29 | /// shared object). As these conditionally require runtime support, they
 30 | /// should only appear in binaries built assuming OS support: `std`.
 31 | ///
 32 | /// However, there is no reason SIMD types, in general, need OS support,
 33 | /// as for many architectures an embedded binary may simply configure that
 34 | /// support itself. This means these types must be visible in `core`
 35 | /// but have these functions available in `std`.
 36 | ///
 37 | /// [`f32`] and [`f64`] achieve a similar trick by using "lang items", but
 38 | /// due to compiler limitations, it is harder to implement this approach for
 39 | /// abstract data types like [`Simd`]. From that need, this trait is born.
 40 | ///
 41 | /// It is possible this trait will be replaced in some manner in the future,
 42 | /// when either the compiler or its supporting runtime functions are improved.
 43 | /// For now this trait is available to permit experimentation with SIMD float
 44 | /// operations that may lack hardware support, such as `mul_add`.
 45 | pub trait StdFloat: Sealed + Sized {
 46 |     /// Elementwise fused multiply-add. Computes `(self * a) + b` with only one rounding error,
 47 |     /// yielding a more accurate result than an unfused multiply-add.
 48 |     ///
 49 |     /// Using `mul_add` *may* be more performant than an unfused multiply-add if the target
 50 |     /// architecture has a dedicated `fma` CPU instruction.  However, this is not always
 51 |     /// true, and will be heavily dependent on designing algorithms with specific target
 52 |     /// hardware in mind.
 53 |     #[inline]
 54 |     #[must_use = "method returns a new vector and does not mutate the original value"]
 55 |     fn mul_add(self, a: Self, b: Self) -> Self {
 56 |         unsafe { intrinsics::simd_fma(self, a, b) }
 57 |     }
 58 | 
 59 |     /// Produces a vector where every element has the square root value
 60 |     /// of the equivalently-indexed element in `self`
 61 |     #[inline]
 62 |     #[must_use = "method returns a new vector and does not mutate the original value"]
 63 |     fn sqrt(self) -> Self {
 64 |         unsafe { intrinsics::simd_fsqrt(self) }
 65 |     }
 66 | 
 67 |     /// Produces a vector where every element has the sine of the value
 68 |     /// in the equivalently-indexed element in `self`.
 69 |     #[must_use = "method returns a new vector and does not mutate the original value"]
 70 |     fn sin(self) -> Self;
 71 | 
 72 |     /// Produces a vector where every element has the cosine of the value
 73 |     /// in the equivalently-indexed element in `self`.
 74 |     #[must_use = "method returns a new vector and does not mutate the original value"]
 75 |     fn cos(self) -> Self;
 76 | 
 77 |     /// Produces a vector where every element has the exponential (base e) of the value
 78 |     /// in the equivalently-indexed element in `self`.
 79 |     #[must_use = "method returns a new vector and does not mutate the original value"]
 80 |     fn exp(self) -> Self;
 81 | 
 82 |     /// Produces a vector where every element has the exponential (base 2) of the value
 83 |     /// in the equivalently-indexed element in `self`.
 84 |     #[must_use = "method returns a new vector and does not mutate the original value"]
 85 |     fn exp2(self) -> Self;
 86 | 
 87 |     /// Produces a vector where every element has the natural logarithm of the value
 88 |     /// in the equivalently-indexed element in `self`.
 89 |     #[must_use = "method returns a new vector and does not mutate the original value"]
 90 |     fn ln(self) -> Self;
 91 | 
 92 |     /// Produces a vector where every element has the logarithm with respect to an arbitrary
 93 |     /// in the equivalently-indexed elements in `self` and `base`.
 94 |     #[inline]
 95 |     #[must_use = "method returns a new vector and does not mutate the original value"]
 96 |     fn log(self, base: Self) -> Self {
 97 |         unsafe { intrinsics::simd_div(self.ln(), base.ln()) }
 98 |     }
 99 | 
100 |     /// Produces a vector where every element has the base-2 logarithm of the value
101 |     /// in the equivalently-indexed element in `self`.
102 |     #[must_use = "method returns a new vector and does not mutate the original value"]
103 |     fn log2(self) -> Self;
104 | 
105 |     /// Produces a vector where every element has the base-10 logarithm of the value
106 |     /// in the equivalently-indexed element in `self`.
107 |     #[must_use = "method returns a new vector and does not mutate the original value"]
108 |     fn log10(self) -> Self;
109 | 
110 |     /// Returns the smallest integer greater than or equal to each element.
111 |     #[must_use = "method returns a new vector and does not mutate the original value"]
112 |     #[inline]
113 |     fn ceil(self) -> Self {
114 |         unsafe { intrinsics::simd_ceil(self) }
115 |     }
116 | 
117 |     /// Returns the largest integer value less than or equal to each element.
118 |     #[must_use = "method returns a new vector and does not mutate the original value"]
119 |     #[inline]
120 |     fn floor(self) -> Self {
121 |         unsafe { intrinsics::simd_floor(self) }
122 |     }
123 | 
124 |     /// Rounds to the nearest integer value. Ties round toward zero.
125 |     #[must_use = "method returns a new vector and does not mutate the original value"]
126 |     #[inline]
127 |     fn round(self) -> Self {
128 |         unsafe { intrinsics::simd_round(self) }
129 |     }
130 | 
131 |     /// Returns the floating point's integer value, with its fractional part removed.
132 |     #[must_use = "method returns a new vector and does not mutate the original value"]
133 |     #[inline]
134 |     fn trunc(self) -> Self {
135 |         unsafe { intrinsics::simd_trunc(self) }
136 |     }
137 | 
138 |     /// Returns the floating point's fractional value, with its integer part removed.
139 |     #[must_use = "method returns a new vector and does not mutate the original value"]
140 |     fn fract(self) -> Self;
141 | }
142 | 
143 | impl<const N: usize> Sealed for Simd<f32, N> where LaneCount<N>: SupportedLaneCount {}
144 | impl<const N: usize> Sealed for Simd<f64, N> where LaneCount<N>: SupportedLaneCount {}
145 | 
146 | macro_rules! impl_float {
147 |     {
148 |         $($fn:ident: $intrinsic:ident,)*
149 |     } => {
150 |         impl<const N: usize> StdFloat for Simd<f32, N>
151 |         where
152 |             LaneCount<N>: SupportedLaneCount,
153 |         {
154 |             #[inline]
155 |             fn fract(self) -> Self {
156 |                 self - self.trunc()
157 |             }
158 | 
159 |             $(
160 |             #[inline]
161 |             fn $fn(self) -> Self {
162 |                 unsafe { intrinsics::$intrinsic(self) }
163 |             }
164 |             )*
165 |         }
166 | 
167 |         impl<const N: usize> StdFloat for Simd<f64, N>
168 |         where
169 |             LaneCount<N>: SupportedLaneCount,
170 |         {
171 |             #[inline]
172 |             fn fract(self) -> Self {
173 |                 self - self.trunc()
174 |             }
175 | 
176 |             $(
177 |             #[inline]
178 |             fn $fn(self) -> Self {
179 |                 // https://github.com/llvm/llvm-project/issues/83729
180 |                 #[cfg(target_arch = "aarch64")]
181 |                 {
182 |                     let mut ln = Self::splat(0f64);
183 |                     for i in 0..N {
184 |                         ln[i] = self[i].$fn()
185 |                     }
186 |                     ln
187 |                 }
188 | 
189 |                 #[cfg(not(target_arch = "aarch64"))]
190 |                 {
191 |                     unsafe { intrinsics::$intrinsic(self) }
192 |                 }
193 |             }
194 |             )*
195 |         }
196 |     }
197 | }
198 | 
199 | impl_float! {
200 |     sin: simd_fsin,
201 |     cos: simd_fcos,
202 |     exp: simd_fexp,
203 |     exp2: simd_fexp2,
204 |     ln: simd_flog,
205 |     log2: simd_flog2,
206 |     log10: simd_flog10,
207 | }
208 | 


--------------------------------------------------------------------------------
/crates/std_float/tests/float.rs:
--------------------------------------------------------------------------------
 1 | #![feature(portable_simd)]
 2 | 
 3 | macro_rules! unary_test {
 4 |     { $scalar:tt, $($func:tt),+ } => {
 5 |         test_helpers::test_lanes! {
 6 |             $(
 7 |             fn $func<const LANES: usize>() {
 8 |                 test_helpers::test_unary_elementwise(
 9 |                     &core_simd::simd::Simd::<$scalar, LANES>::$func,
10 |                     &$scalar::$func,
11 |                     &|_| true,
12 |                 )
13 |             }
14 |             )*
15 |         }
16 |     }
17 | }
18 | 
19 | macro_rules! binary_test {
20 |     { $scalar:tt, $($func:tt),+ } => {
21 |         test_helpers::test_lanes! {
22 |             $(
23 |             fn $func<const LANES: usize>() {
24 |                 test_helpers::test_binary_elementwise(
25 |                     &core_simd::simd::Simd::<$scalar, LANES>::$func,
26 |                     &$scalar::$func,
27 |                     &|_, _| true,
28 |                 )
29 |             }
30 |             )*
31 |         }
32 |     }
33 | }
34 | 
35 | macro_rules! ternary_test {
36 |     { $scalar:tt, $($func:tt),+ } => {
37 |         test_helpers::test_lanes! {
38 |             $(
39 |             fn $func<const LANES: usize>() {
40 |                 test_helpers::test_ternary_elementwise(
41 |                     &core_simd::simd::Simd::<$scalar, LANES>::$func,
42 |                     &$scalar::$func,
43 |                     &|_, _, _| true,
44 |                 )
45 |             }
46 |             )*
47 |         }
48 |     }
49 | }
50 | 
51 | macro_rules! impl_tests {
52 |     { $scalar:tt } => {
53 |         mod $scalar {
54 |             use std_float::StdFloat;
55 | 
56 |             unary_test! { $scalar, sqrt, sin, cos, exp, exp2, ln, log2, log10, ceil, floor, round, trunc }
57 |             binary_test! { $scalar, log }
58 |             ternary_test! { $scalar, mul_add }
59 | 
60 |             test_helpers::test_lanes! {
61 |                 fn fract<const LANES: usize>() {
62 |                     test_helpers::test_unary_elementwise_flush_subnormals(
63 |                         &core_simd::simd::Simd::<$scalar, LANES>::fract,
64 |                         &$scalar::fract,
65 |                         &|_| true,
66 |                     )
67 |                 }
68 |             }
69 |         }
70 |     }
71 | }
72 | 
73 | impl_tests! { f32 }
74 | impl_tests! { f64 }
75 | 


--------------------------------------------------------------------------------
/crates/test_helpers/Cargo.toml:
--------------------------------------------------------------------------------
1 | [package]
2 | name = "test_helpers"
3 | version = "0.1.0"
4 | edition = "2021"
5 | publish = false
6 | 
7 | [dependencies]
8 | proptest = { version = "0.10", default-features = false, features = ["alloc"] }
9 | 


--------------------------------------------------------------------------------
/crates/test_helpers/src/array.rs:
--------------------------------------------------------------------------------
  1 | //! Generic-length array strategy.
  2 | 
  3 | // Adapted from proptest's array code
  4 | // Copyright 2017 Jason Lingle
  5 | 
  6 | use core::{marker::PhantomData, mem::MaybeUninit};
  7 | use proptest::{
  8 |     strategy::{NewTree, Strategy, ValueTree},
  9 |     test_runner::TestRunner,
 10 | };
 11 | 
 12 | #[must_use = "strategies do nothing unless used"]
 13 | #[derive(Clone, Copy, Debug)]
 14 | pub struct UniformArrayStrategy<S, T> {
 15 |     strategy: S,
 16 |     _marker: PhantomData<T>,
 17 | }
 18 | 
 19 | impl<S, T> UniformArrayStrategy<S, T> {
 20 |     pub const fn new(strategy: S) -> Self {
 21 |         Self {
 22 |             strategy,
 23 |             _marker: PhantomData,
 24 |         }
 25 |     }
 26 | }
 27 | 
 28 | pub struct ArrayValueTree<T> {
 29 |     tree: T,
 30 |     shrinker: usize,
 31 |     last_shrinker: Option<usize>,
 32 | }
 33 | 
 34 | impl<T, S, const LANES: usize> Strategy for UniformArrayStrategy<S, [T; LANES]>
 35 | where
 36 |     T: core::fmt::Debug,
 37 |     S: Strategy<Value = T>,
 38 | {
 39 |     type Tree = ArrayValueTree<[S::Tree; LANES]>;
 40 |     type Value = [T; LANES];
 41 | 
 42 |     fn new_tree(&self, runner: &mut TestRunner) -> NewTree<Self> {
 43 |         let tree: [S::Tree; LANES] = unsafe {
 44 |             #[allow(clippy::uninit_assumed_init)]
 45 |             let mut tree: [MaybeUninit<S::Tree>; LANES] = MaybeUninit::uninit().assume_init();
 46 |             for t in tree.iter_mut() {
 47 |                 *t = MaybeUninit::new(self.strategy.new_tree(runner)?)
 48 |             }
 49 |             core::mem::transmute_copy(&tree)
 50 |         };
 51 |         Ok(ArrayValueTree {
 52 |             tree,
 53 |             shrinker: 0,
 54 |             last_shrinker: None,
 55 |         })
 56 |     }
 57 | }
 58 | 
 59 | impl<T: ValueTree, const LANES: usize> ValueTree for ArrayValueTree<[T; LANES]> {
 60 |     type Value = [T::Value; LANES];
 61 | 
 62 |     fn current(&self) -> Self::Value {
 63 |         unsafe {
 64 |             #[allow(clippy::uninit_assumed_init)]
 65 |             let mut value: [MaybeUninit<T::Value>; LANES] = MaybeUninit::uninit().assume_init();
 66 |             for (tree_elem, value_elem) in self.tree.iter().zip(value.iter_mut()) {
 67 |                 *value_elem = MaybeUninit::new(tree_elem.current());
 68 |             }
 69 |             core::mem::transmute_copy(&value)
 70 |         }
 71 |     }
 72 | 
 73 |     fn simplify(&mut self) -> bool {
 74 |         while self.shrinker < LANES {
 75 |             if self.tree[self.shrinker].simplify() {
 76 |                 self.last_shrinker = Some(self.shrinker);
 77 |                 return true;
 78 |             } else {
 79 |                 self.shrinker += 1;
 80 |             }
 81 |         }
 82 | 
 83 |         false
 84 |     }
 85 | 
 86 |     fn complicate(&mut self) -> bool {
 87 |         if let Some(shrinker) = self.last_shrinker {
 88 |             self.shrinker = shrinker;
 89 |             if self.tree[shrinker].complicate() {
 90 |                 true
 91 |             } else {
 92 |                 self.last_shrinker = None;
 93 |                 false
 94 |             }
 95 |         } else {
 96 |             false
 97 |         }
 98 |     }
 99 | }
100 | 


--------------------------------------------------------------------------------
/crates/test_helpers/src/biteq.rs:
--------------------------------------------------------------------------------
  1 | //! Compare numeric types by exact bit value.
  2 | 
  3 | pub trait BitEq {
  4 |     fn biteq(&self, other: &Self) -> bool;
  5 |     fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result;
  6 | }
  7 | 
  8 | impl BitEq for bool {
  9 |     fn biteq(&self, other: &Self) -> bool {
 10 |         self == other
 11 |     }
 12 | 
 13 |     fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
 14 |         write!(f, "{:?}", self)
 15 |     }
 16 | }
 17 | 
 18 | macro_rules! impl_integer_biteq {
 19 |     { $($type:ty),* } => {
 20 |         $(
 21 |         impl BitEq for $type {
 22 |             fn biteq(&self, other: &Self) -> bool {
 23 |                 self == other
 24 |             }
 25 | 
 26 |             fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
 27 |                 write!(f, "{:?} ({:x})", self, self)
 28 |             }
 29 |         }
 30 |         )*
 31 |     };
 32 | }
 33 | 
 34 | impl_integer_biteq! { u8, u16, u32, u64, u128, usize, i8, i16, i32, i64, i128, isize }
 35 | 
 36 | macro_rules! impl_float_biteq {
 37 |     { $($type:ty),* } => {
 38 |         $(
 39 |         impl BitEq for $type {
 40 |             fn biteq(&self, other: &Self) -> bool {
 41 |                 if self.is_nan() && other.is_nan() {
 42 |                     true // exact nan bits don't matter
 43 |                 } else {
 44 |                     self.to_bits() == other.to_bits()
 45 |                 }
 46 |             }
 47 | 
 48 |             fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
 49 |                 write!(f, "{:?} ({:x})", self, self.to_bits())
 50 |             }
 51 |         }
 52 |         )*
 53 |     };
 54 | }
 55 | 
 56 | impl_float_biteq! { f32, f64 }
 57 | 
 58 | impl<T> BitEq for *const T {
 59 |     fn biteq(&self, other: &Self) -> bool {
 60 |         self == other
 61 |     }
 62 | 
 63 |     fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
 64 |         write!(f, "{:?}", self)
 65 |     }
 66 | }
 67 | 
 68 | impl<T> BitEq for *mut T {
 69 |     fn biteq(&self, other: &Self) -> bool {
 70 |         self == other
 71 |     }
 72 | 
 73 |     fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
 74 |         write!(f, "{:?}", self)
 75 |     }
 76 | }
 77 | 
 78 | impl<T: BitEq, const N: usize> BitEq for [T; N] {
 79 |     fn biteq(&self, other: &Self) -> bool {
 80 |         self.iter()
 81 |             .zip(other.iter())
 82 |             .fold(true, |value, (left, right)| value && left.biteq(right))
 83 |     }
 84 | 
 85 |     fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
 86 |         #[repr(transparent)]
 87 |         struct Wrapper<'a, T: BitEq>(&'a T);
 88 | 
 89 |         impl<T: BitEq> core::fmt::Debug for Wrapper<'_, T> {
 90 |             fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
 91 |                 self.0.fmt(f)
 92 |             }
 93 |         }
 94 | 
 95 |         f.debug_list()
 96 |             .entries(self.iter().map(|x| Wrapper(x)))
 97 |             .finish()
 98 |     }
 99 | }
100 | 
101 | #[doc(hidden)]
102 | pub struct BitEqWrapper<'a, T>(pub &'a T);
103 | 
104 | impl<T: BitEq> PartialEq for BitEqWrapper<'_, T> {
105 |     fn eq(&self, other: &Self) -> bool {
106 |         self.0.biteq(other.0)
107 |     }
108 | }
109 | 
110 | impl<T: BitEq> core::fmt::Debug for BitEqWrapper<'_, T> {
111 |     fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
112 |         self.0.fmt(f)
113 |     }
114 | }
115 | 
116 | #[doc(hidden)]
117 | pub struct BitEqEitherWrapper<'a, T>(pub &'a T, pub &'a T);
118 | 
119 | impl<T: BitEq> PartialEq<BitEqEitherWrapper<'_, T>> for BitEqWrapper<'_, T> {
120 |     fn eq(&self, other: &BitEqEitherWrapper<'_, T>) -> bool {
121 |         self.0.biteq(other.0) || self.0.biteq(other.1)
122 |     }
123 | }
124 | 
125 | impl<T: BitEq> core::fmt::Debug for BitEqEitherWrapper<'_, T> {
126 |     fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
127 |         if self.0.biteq(self.1) {
128 |             self.0.fmt(f)
129 |         } else {
130 |             self.0.fmt(f)?;
131 |             write!(f, " or ")?;
132 |             self.1.fmt(f)
133 |         }
134 |     }
135 | }
136 | 
137 | #[macro_export]
138 | macro_rules! prop_assert_biteq {
139 |     { $a:expr, $b:expr $(,)? } => {
140 |         {
141 |             use $crate::biteq::BitEqWrapper;
142 |             let a = $a;
143 |             let b = $b;
144 |             proptest::prop_assert_eq!(BitEqWrapper(&a), BitEqWrapper(&b));
145 |         }
146 |     };
147 |     { $a:expr, $b:expr, $c:expr $(,)? } => {
148 |         {
149 |             use $crate::biteq::{BitEqWrapper, BitEqEitherWrapper};
150 |             let a = $a;
151 |             let b = $b;
152 |             let c = $c;
153 |             proptest::prop_assert_eq!(BitEqWrapper(&a), BitEqEitherWrapper(&b, &c));
154 |         }
155 |     };
156 | }
157 | 


--------------------------------------------------------------------------------
/crates/test_helpers/src/subnormals.rs:
--------------------------------------------------------------------------------
 1 | pub trait FlushSubnormals: Sized {
 2 |     fn flush(self) -> Self {
 3 |         self
 4 |     }
 5 | }
 6 | 
 7 | impl<T> FlushSubnormals for *const T {}
 8 | impl<T> FlushSubnormals for *mut T {}
 9 | 
10 | macro_rules! impl_float {
11 |     { $($ty:ty),* } => {
12 |         $(
13 |         impl FlushSubnormals for $ty {
14 |             fn flush(self) -> Self {
15 |                 let is_f32 = size_of::<Self>() == 4;
16 |                 let ppc_flush = is_f32 && cfg!(all(
17 |                     any(target_arch = "powerpc", all(target_arch = "powerpc64", target_endian = "big")),
18 |                     target_feature = "altivec",
19 |                     not(target_feature = "vsx"),
20 |                 ));
21 |                 let arm_flush = is_f32 && cfg!(all(target_arch = "arm", target_feature = "neon"));
22 |                 let flush = ppc_flush || arm_flush;
23 |                 if flush && self.is_subnormal() {
24 |                     <$ty>::copysign(0., self)
25 |                 } else {
26 |                     self
27 |                 }
28 |             }
29 |         }
30 |         )*
31 |     }
32 | }
33 | 
34 | macro_rules! impl_else {
35 |     { $($ty:ty),* } => {
36 |         $(
37 |         impl FlushSubnormals for $ty {}
38 |         )*
39 |     }
40 | }
41 | 
42 | impl_float! { f32, f64 }
43 | impl_else! { i8, i16, i32, i64, isize, u8, u16, u32, u64, usize }
44 | 
45 | /// AltiVec should flush subnormal inputs to zero, but QEMU seems to only flush outputs.
46 | /// https://gitlab.com/qemu-project/qemu/-/issues/1779
47 | #[cfg(all(
48 |     any(target_arch = "powerpc", target_arch = "powerpc64"),
49 |     target_feature = "altivec"
50 | ))]
51 | fn in_buggy_qemu() -> bool {
52 |     use std::sync::OnceLock;
53 |     static BUGGY: OnceLock<bool> = OnceLock::new();
54 | 
55 |     fn add(x: f32, y: f32) -> f32 {
56 |         #[cfg(target_arch = "powerpc")]
57 |         use core::arch::powerpc::*;
58 |         #[cfg(target_arch = "powerpc64")]
59 |         use core::arch::powerpc64::*;
60 | 
61 |         let array: [f32; 4] =
62 |             unsafe { core::mem::transmute(vec_add(vec_splats(x), vec_splats(y))) };
63 |         array[0]
64 |     }
65 | 
66 |     *BUGGY.get_or_init(|| add(-1.0857398e-38, 0.).is_sign_negative())
67 | }
68 | 
69 | #[cfg(all(
70 |     any(target_arch = "powerpc", target_arch = "powerpc64"),
71 |     target_feature = "altivec"
72 | ))]
73 | pub fn flush_in<T: FlushSubnormals>(x: T) -> T {
74 |     if in_buggy_qemu() {
75 |         x
76 |     } else {
77 |         x.flush()
78 |     }
79 | }
80 | 
81 | #[cfg(not(all(
82 |     any(target_arch = "powerpc", target_arch = "powerpc64"),
83 |     target_feature = "altivec"
84 | )))]
85 | pub fn flush_in<T: FlushSubnormals>(x: T) -> T {
86 |     x.flush()
87 | }
88 | 
89 | pub fn flush<T: FlushSubnormals>(x: T) -> T {
90 |     x.flush()
91 | }
92 | 


--------------------------------------------------------------------------------
/crates/test_helpers/src/wasm.rs:
--------------------------------------------------------------------------------
 1 | //! Strategies for `u128` and `i128`, since proptest doesn't provide them for the wasm target.
 2 | 
 3 | macro_rules! impl_num {
 4 |     { $name:ident } => {
 5 |         pub(crate) mod $name {
 6 |             type InnerStrategy = crate::array::UniformArrayStrategy<proptest::num::u64::Any, [u64; 2]>;
 7 |             use proptest::strategy::{Strategy, ValueTree, NewTree};
 8 | 
 9 | 
10 |             #[must_use = "strategies do nothing unless used"]
11 |             #[derive(Clone, Copy, Debug)]
12 |             pub struct Any {
13 |                 strategy: InnerStrategy,
14 |             }
15 | 
16 |             pub struct BinarySearch {
17 |                 inner: <InnerStrategy as Strategy>::Tree,
18 |             }
19 | 
20 |             impl ValueTree for BinarySearch {
21 |                 type Value = $name;
22 | 
23 |                 fn current(&self) -> $name {
24 |                     unsafe { core::mem::transmute(self.inner.current()) }
25 |                 }
26 | 
27 |                 fn simplify(&mut self) -> bool {
28 |                     self.inner.simplify()
29 |                 }
30 | 
31 |                 fn complicate(&mut self) -> bool {
32 |                     self.inner.complicate()
33 |                 }
34 |             }
35 | 
36 |             impl Strategy for Any {
37 |                 type Tree = BinarySearch;
38 |                 type Value = $name;
39 | 
40 |                 fn new_tree(&self, runner: &mut proptest::test_runner::TestRunner) -> NewTree<Self> {
41 |                     Ok(BinarySearch { inner: self.strategy.new_tree(runner)? })
42 |                 }
43 |             }
44 | 
45 |             pub const ANY: Any = Any { strategy: InnerStrategy::new(proptest::num::u64::ANY) };
46 |         }
47 |     }
48 | }
49 | 
50 | impl_num! { u128 }
51 | impl_num! { i128 }
52 | 


--------------------------------------------------------------------------------
/rust-toolchain.toml:
--------------------------------------------------------------------------------
1 | [toolchain]
2 | channel = "nightly-2025-01-16"
3 | components = ["rustfmt", "clippy", "miri", "rust-src"]
4 | 


--------------------------------------------------------------------------------
/subtree-sync.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -eou pipefail
 4 | 
 5 | git fetch origin
 6 | pushd $2
 7 | git fetch origin
 8 | popd
 9 | 
10 | if [ "$(git rev-parse --show-prefix)" != "" ]; then
11 |     echo "Run this script from the git root" >&2
12 |     exit 1
13 | fi
14 | 
15 | if [ "$(git rev-parse HEAD)" != "$(git rev-parse origin/master)" ]; then
16 |     echo "$(pwd) is not at origin/master" >&2
17 |     exit 1
18 | fi
19 | 
20 | if [ ! -f library/portable-simd/git-subtree.sh ]; then
21 |     curl -sS https://raw.githubusercontent.com/bjorn3/git/tqc-subtree-portable/contrib/subtree/git-subtree.sh -o library/portable-simd/git-subtree.sh
22 |     chmod +x library/portable-simd/git-subtree.sh
23 | fi
24 | 
25 | today=$(date +%Y-%m-%d)
26 | 
27 | case $1 in
28 |     "push")
29 |         upstream=rust-upstream-$today
30 |         merge=sync-from-rust-$today
31 | 
32 |         pushd $2
33 |         git checkout master
34 |         git pull
35 |         popd
36 | 
37 |         library/portable-simd/git-subtree.sh push -P library/portable-simd $2 $upstream
38 | 
39 |         pushd $2
40 |         git checkout -B $merge origin/master
41 |         git merge $upstream
42 |         popd
43 |         echo "Branch \`$merge\` created in \`$2\`. You may need to resolve merge conflicts."
44 |         ;;
45 |     "pull")
46 |         branch=sync-from-portable-simd-$today
47 | 
48 |         git checkout -B $branch
49 |         echo "Creating branch \`$branch\`... You may need to resolve merge conflicts."
50 |         library/portable-simd/git-subtree.sh pull -P library/portable-simd $2 origin/master
51 |         ;;
52 | esac
53 | 


--------------------------------------------------------------------------------