├── .github └── workflows │ └── ci.yml ├── .gitignore ├── CHANGELOG.md ├── Cargo.toml ├── LICENSE-APACHE ├── LICENSE-MIT ├── README.md ├── generic-simd-macros ├── Cargo.toml ├── LICENSE-APACHE ├── LICENSE-MIT ├── README.md └── src │ └── lib.rs ├── generic-simd-test ├── Cargo.toml ├── src │ └── lib.rs └── webdriver.json └── generic-simd ├── Cargo.toml ├── LICENSE-APACHE ├── LICENSE-MIT ├── README.md └── src ├── alignment.rs ├── arch ├── arm │ ├── complex.rs │ └── mod.rs ├── generic.rs ├── mod.rs ├── wasm │ ├── complex.rs │ └── mod.rs └── x86 │ ├── complex.rs │ └── mod.rs ├── implementation.rs ├── lib.rs ├── pointer.rs ├── scalar.rs ├── shim ├── mod.rs ├── token.rs └── width.rs ├── slice.rs └── vector ├── mod.rs └── width.rs /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: Build 2 | 3 | on: [push] 4 | 5 | jobs: 6 | test: 7 | strategy: 8 | matrix: 9 | version: 10 | - 1.42.0 11 | - stable 12 | - beta 13 | - nightly 14 | os: 15 | - macos-latest 16 | - ubuntu-latest 17 | - windows-latest 18 | features: 19 | - [] 20 | - [alloc] 21 | - [complex] 22 | - [std] 23 | - [std, alloc] 24 | - [std, complex] 25 | 26 | runs-on: ${{ matrix.os }} 27 | 28 | steps: 29 | - uses: actions/checkout@v1 30 | - name: Install toolchain 31 | uses: actions-rs/toolchain@v1 32 | with: 33 | toolchain: ${{ matrix.version }} 34 | override: true 35 | profile: minimal 36 | - name: Run tests 37 | shell: bash 38 | run: | 39 | FEATURES_FLAG=$(echo '${{ toJson(matrix.features) }}' | jq -r 'map("--features \(.)") | join(" ")') 40 | cargo test --verbose --no-default-features $FEATURES_FLAG --manifest-path generic-simd/Cargo.toml 41 | cargo test --verbose --no-default-features $FEATURES_FLAG --manifest-path generic-simd-test/Cargo.toml 42 | 43 | wasm: 44 | strategy: 45 | matrix: 46 | rustflags: 47 | - "-Ctarget-feature=-simd128" 48 | - "-Ctarget-feature=+simd128" 49 | features: 50 | - --no-default-features 51 | - --no-default-features --features complex 52 | - --no-default-features --features nightly 53 | - --no-default-features --features nightly --features complex 54 | 55 | runs-on: ubuntu-latest 56 | 57 | steps: 58 | - uses: actions/checkout@v1 59 | - name: Install wasm-pack 60 | run: curl https://rustwasm.github.io/wasm-pack/installer/init.sh -sSf | sh 61 | - name: Install nightly rust 62 | uses: actions-rs/toolchain@v1 63 | with: 64 | toolchain: nightly 65 | override: true 66 | profile: minimal 67 | - name: Run tests 68 | env: 69 | RUSTFLAGS: ${{ matrix.rustflags }} 70 | run: wasm-pack test --chrome --headless generic-simd-test -- ${{ matrix.features }} 71 | 72 | 73 | cross: 74 | strategy: 75 | matrix: 76 | target: 77 | - aarch64-unknown-linux-gnu 78 | - armv7-unknown-linux-gnueabihf 79 | - arm-unknown-linux-gnueabihf 80 | features: 81 | - --no-default-features 82 | - --no-default-features --features complex 83 | - --no-default-features --features nightly 84 | - --no-default-features --features nightly --features complex 85 | 86 | runs-on: ubuntu-latest 87 | 88 | steps: 89 | - uses: actions/checkout@v1 90 | - name: Install cross 91 | run: cargo install cross 92 | - name: Run tests 93 | run: | 94 | cross +nightly test --target ${{ matrix.target }} --verbose ${{ matrix.features }} --manifest-path generic-simd/Cargo.toml 95 | cross +nightly test --target ${{ matrix.target }} --verbose ${{ matrix.features }} --manifest-path generic-simd-test/Cargo.toml 96 | 97 | 98 | lint: 99 | runs-on: ubuntu-latest 100 | steps: 101 | - uses: actions/checkout@v1 102 | - name: Install toolchain 103 | uses: actions-rs/toolchain@v1 104 | with: 105 | toolchain: stable 106 | override: true 107 | profile: minimal 108 | components: clippy, rustfmt 109 | - name: Clippy lint 110 | run: cargo clippy --all-targets ${{ matrix.features }} --manifest-path generic-simd/Cargo.toml -- -D warnings 111 | - name: Check formatting 112 | run: cargo fmt -- --check 113 | - name: Deadlinks 114 | run: | 115 | cargo install cargo-deadlinks 116 | cargo doc 117 | cargo deadlinks -v --check-http 118 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | Cargo.lock 3 | *.swp 4 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | All notable changes to this project will be documented in this file. 3 | 4 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), 5 | and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). 6 | 7 | ## [Unreleased] 8 | 9 | ## [0.1.0] - 2020-09-07 10 | ### Added 11 | - Initial release 12 | 13 | [Unreleased]: https://github.com/calebzulawski/generic-simd/compare/0.1.0...HEAD 14 | [0.1.0]: https://github.com/calebzulawski/generic-simd/releases/tag/0.1.0 15 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [workspace] 2 | 3 | members = [ 4 | "generic-simd", 5 | "generic-simd-macros", 6 | "generic-simd-test", 7 | ] 8 | -------------------------------------------------------------------------------- /LICENSE-APACHE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /LICENSE-MIT: -------------------------------------------------------------------------------- 1 | Copyright 2020 Caleb Zulawski 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 8 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | generic-simd 2 | ============ 3 | [![Build Status](https://github.com/calebzulawski/generic-simd/workflows/Build/badge.svg?branch=master)](https://github.com/calebzulawski/generic-simd/actions) 4 | ![Rustc Version 1.42+](https://img.shields.io/badge/rustc-1.42+-lightgray.svg) 5 | [![License](https://img.shields.io/crates/l/generic-simd)](https://crates.io/crates/generic-simd) 6 | [![Crates.io](https://img.shields.io/crates/v/generic-simd)](https://crates.io/crates/generic-simd) 7 | [![Rust Documentation](https://img.shields.io/badge/api-rustdoc-blue.svg)](https://docs.rs/generic-simd) 8 | 9 | NOTE: This project has been archived. Please use `std::simd` instead. 10 | 11 | generic-simd provides safe and idiomatic zero-cost abstractions for writing explicit cross-platform SIMD operations. 12 | 13 | ## License 14 | generic-simd is distributed under the terms of both the MIT license and the Apache License (Version 2.0). 15 | 16 | See [LICENSE-APACHE](LICENSE-APACHE) and [LICENSE-MIT](LICENSE-MIT) for details. 17 | -------------------------------------------------------------------------------- /generic-simd-macros/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "generic-simd-macros" 3 | version = "0.1.0" 4 | authors = ["Caleb Zulawski "] 5 | license = "MIT OR Apache-2.0" 6 | description = "Implementation crate for generic-simd" 7 | repository = "https://github.com/calebzulawski/generic-simd" 8 | categories = [] 9 | readme = "README.md" 10 | include = [ 11 | "/Cargo.toml", 12 | "/LICENSE-APACHE", 13 | "/LICENSE-MIT", 14 | "/README.md", 15 | "/src/**", 16 | "/tests/**", 17 | ] 18 | edition = "2018" 19 | 20 | [features] 21 | default = [] 22 | nightly = [] 23 | 24 | [lib] 25 | proc-macro = true 26 | 27 | [dependencies] 28 | syn = { version = "1", features = ["full"] } 29 | quote = "1" 30 | -------------------------------------------------------------------------------- /generic-simd-macros/LICENSE-APACHE: -------------------------------------------------------------------------------- 1 | ../LICENSE-APACHE -------------------------------------------------------------------------------- /generic-simd-macros/LICENSE-MIT: -------------------------------------------------------------------------------- 1 | ../LICENSE-MIT -------------------------------------------------------------------------------- /generic-simd-macros/README.md: -------------------------------------------------------------------------------- 1 | Implementation crate for [`generic-simd`](https://docs.rs/generic-simd). 2 | -------------------------------------------------------------------------------- /generic-simd-macros/src/lib.rs: -------------------------------------------------------------------------------- 1 | extern crate proc_macro; 2 | use proc_macro::TokenStream; 3 | use quote::quote; 4 | use syn::{parse_macro_input, Ident, ItemFn}; 5 | 6 | #[proc_macro_attribute] 7 | pub fn dispatch(args: TokenStream, input: TokenStream) -> TokenStream { 8 | let ItemFn { 9 | attrs, 10 | vis, 11 | sig, 12 | block, 13 | } = parse_macro_input!(input as ItemFn); 14 | let feature = parse_macro_input!(args as Ident); 15 | 16 | let build_fn = |wasm| { 17 | let nightly = cfg!(feature = "nightly"); 18 | let clone_wasm = if nightly && wasm { 19 | Some(quote! { #[clone(target = "wasm32+simd128")] }) 20 | } else { 21 | None 22 | }; 23 | let clone_arm = if nightly { 24 | Some(quote! { #[clone(target = "aarch64+neon")] }) 25 | } else { 26 | None 27 | }; 28 | quote! { 29 | #[generic_simd::multiversion::multiversion] 30 | #[clone(target = "[x86|x86_64]+avx")] 31 | #[clone(target = "[x86|x86_64]+sse4.1")] 32 | #clone_wasm 33 | #clone_arm 34 | #[crate_path(path = "generic_simd::multiversion")] 35 | #(#attrs)* 36 | #vis 37 | #sig 38 | { 39 | #[target_cfg(target = "[x86|x86_64]+sse4.1")] 40 | let #feature = unsafe { ::new_unchecked() }; 41 | 42 | #[target_cfg(target = "[x86|x86_64]+avx")] 43 | let #feature = unsafe { ::new_unchecked() }; 44 | 45 | #[target_cfg(target = "wasm32+simd128")] 46 | let #feature = unsafe { ::new_unchecked() }; 47 | 48 | #[target_cfg(target = "[arm|aarch64]+neon")] 49 | let #feature = unsafe { ::new_unchecked() }; 50 | 51 | #[target_cfg(not(any( 52 | target = "[x86|x86_64]+sse4.1", 53 | target = "[x86|x86_64]+avx", 54 | target = "[arm|aarch64]+neon", 55 | target = "wasm32+simd128", 56 | )))] 57 | let #feature = ::new().unwrap(); 58 | 59 | #block 60 | } 61 | } 62 | }; 63 | let normal = build_fn(false); 64 | let with_wasm = build_fn(true); 65 | let output = quote! { 66 | #[cfg(all(target_arch = "wasm32", target_feature = "simd128"))] 67 | #with_wasm 68 | 69 | #[cfg(not(all(target_arch = "wasm32", target_feature = "simd128"),))] 70 | #normal 71 | }; 72 | output.into() 73 | } 74 | -------------------------------------------------------------------------------- /generic-simd-test/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "generic-simd-test" 3 | version = "0.1.0" 4 | authors = ["Caleb Zulawski "] 5 | edition = "2018" 6 | publish = false 7 | 8 | [features] 9 | default = ["std", "complex"] 10 | std = ["generic-simd/std"] 11 | complex = ["generic-simd/complex"] 12 | alloc = ["generic-simd/alloc"] 13 | nightly = ["generic-simd/nightly"] 14 | 15 | [dependencies] 16 | generic-simd = { path = "../generic-simd", default-features = false } 17 | num-complex = { version = "0.3", default-features = false, features = ["rand"] } 18 | num-traits = "0.2" 19 | rand = "0.7" 20 | rand_pcg = "0.2" 21 | paste = "1" 22 | wasm-bindgen-test = "0.3" 23 | 24 | [dev-dependencies] 25 | -------------------------------------------------------------------------------- /generic-simd-test/src/lib.rs: -------------------------------------------------------------------------------- 1 | #![cfg_attr( 2 | all(feature = "nightly", target_arch = "wasm32"), 3 | feature(wasm_simd, wasm_target_feature) 4 | )] 5 | #![cfg_attr( 6 | all(feature = "nightly", target_arch = "aarch64"), 7 | feature(stdsimd, aarch64_target_feature) 8 | )] 9 | #![cfg_attr( 10 | all(feature = "nightly", target_arch = "arm"), 11 | feature(stdsimd, arm_target_feature) 12 | )] 13 | 14 | use generic_simd::{dispatch, scalar::ScalarExt, vector::Signed}; 15 | use num_traits::Num; 16 | use rand::distributions::Standard; 17 | use rand::prelude::*; 18 | use rand::SeedableRng; 19 | 20 | wasm_bindgen_test::wasm_bindgen_test_configure!(run_in_browser); 21 | 22 | #[cfg(feature = "complex")] 23 | use num_complex::{Complex, ComplexDistribution}; 24 | 25 | #[inline] 26 | fn unary_op_impl(distribution: D, mut vector: V, vfunc: VFunc, sfunc: SFunc) 27 | where 28 | V::Scalar: Num + core::ops::Neg + core::fmt::Debug + Copy, 29 | D: rand::distributions::Distribution + Copy, 30 | V: Signed, 31 | VFunc: Fn(V) -> V, 32 | SFunc: Fn(V::Scalar) -> V::Scalar, 33 | { 34 | let mut rng = rand_pcg::Pcg32::seed_from_u64(999); 35 | for x in vector.as_slice_mut() { 36 | *x = rng.sample(distribution); 37 | } 38 | 39 | let output = vfunc(vector); 40 | for i in 0..V::width() { 41 | assert_eq!(output[i], sfunc(vector[i])) 42 | } 43 | } 44 | 45 | #[inline] 46 | fn binary_op_impl( 47 | distribution: D, 48 | (mut a, mut b): (V, V), 49 | vfunc: VFunc, 50 | sfunc: SFunc, 51 | ) where 52 | V::Scalar: Num + core::ops::Neg + core::fmt::Debug + Copy, 53 | D: rand::distributions::Distribution + Copy, 54 | V: Signed, 55 | VFunc: Fn(V, V) -> V, 56 | SFunc: Fn(V::Scalar, V::Scalar) -> V::Scalar, 57 | { 58 | let mut rng = rand_pcg::Pcg32::seed_from_u64(999); 59 | for x in a.as_slice_mut() { 60 | *x = rng.sample(distribution); 61 | } 62 | for x in b.as_slice_mut() { 63 | *x = rng.sample(distribution); 64 | } 65 | 66 | let output = vfunc(a, b); 67 | for i in 0..V::width() { 68 | assert_eq!(output[i], sfunc(a[i], b[i])) 69 | } 70 | } 71 | 72 | #[inline] 73 | fn binary_scalar_op_impl(distribution: D, mut a: V, vfunc: VFunc, sfunc: SFunc) 74 | where 75 | V::Scalar: Num + core::ops::Neg + core::fmt::Debug + Copy, 76 | D: rand::distributions::Distribution + Copy, 77 | V: Signed, 78 | VFunc: Fn(V, V::Scalar) -> V, 79 | SFunc: Fn(V::Scalar, V::Scalar) -> V::Scalar, 80 | { 81 | let mut rng = rand_pcg::Pcg32::seed_from_u64(999); 82 | let b = rng.sample(distribution); 83 | for x in a.as_slice_mut() { 84 | *x = rng.sample(distribution); 85 | } 86 | 87 | let output = vfunc(a, b); 88 | for i in 0..V::width() { 89 | assert_eq!(output[i], sfunc(a[i], b)) 90 | } 91 | } 92 | 93 | #[inline] 94 | fn assign_op_impl( 95 | distribution: D, 96 | (mut a, mut b): (V, V), 97 | vfunc: VFunc, 98 | sfunc: SFunc, 99 | ) where 100 | V::Scalar: Num + core::ops::Neg + core::fmt::Debug + Copy, 101 | D: rand::distributions::Distribution + Copy, 102 | V: Signed, 103 | VFunc: Fn(&mut V, V), 104 | SFunc: Fn(&mut V::Scalar, V::Scalar), 105 | { 106 | let mut rng = rand_pcg::Pcg32::seed_from_u64(999); 107 | for x in a.as_slice_mut() { 108 | *x = rng.sample(distribution); 109 | } 110 | for x in b.as_slice_mut() { 111 | *x = rng.sample(distribution); 112 | } 113 | 114 | let mut output: V = a; 115 | vfunc(&mut output, b); 116 | for i in 0..V::width() { 117 | sfunc(&mut a[i], b[i]); 118 | assert_eq!(output[i], a[i]) 119 | } 120 | } 121 | 122 | #[inline] 123 | fn assign_scalar_op_impl(distribution: D, mut a: V, vfunc: VFunc, sfunc: SFunc) 124 | where 125 | V::Scalar: Num + core::ops::Neg + core::fmt::Debug + Copy, 126 | D: rand::distributions::Distribution + Copy, 127 | V: Signed, 128 | VFunc: Fn(&mut V, V::Scalar), 129 | SFunc: Fn(&mut V::Scalar, V::Scalar), 130 | { 131 | let mut rng = rand_pcg::Pcg32::seed_from_u64(999); 132 | let b = rng.sample(distribution); 133 | for x in a.as_slice_mut() { 134 | *x = rng.sample(distribution); 135 | } 136 | 137 | let mut output: V = a; 138 | vfunc(&mut output, b); 139 | for i in 0..V::width() { 140 | sfunc(&mut a[i], b); 141 | assert_eq!(output[i], a[i]) 142 | } 143 | } 144 | 145 | macro_rules! ops_test { 146 | { 147 | $token:ident, $type:ty 148 | } => { 149 | pub mod width_native { 150 | use super::*; 151 | ops_test! { @wrapper $token, $type, zeroed_native } 152 | } 153 | pub mod width_1 { 154 | use super::*; 155 | ops_test! { @wrapper $token, $type, zeroed1 } 156 | } 157 | pub mod width_2 { 158 | use super::*; 159 | ops_test! { @wrapper $token, $type, zeroed2 } 160 | } 161 | pub mod width_4 { 162 | use super::*; 163 | ops_test! { @wrapper $token, $type, zeroed4 } 164 | } 165 | pub mod width_8 { 166 | use super::*; 167 | ops_test! { @wrapper $token, $type, zeroed8 } 168 | } 169 | }; 170 | { 171 | @wrapper $token:ident, $type:ty, $init:ident 172 | } => { 173 | ops_test! { @impl $type, $init, add, binary_op_impl, $token, core::ops::Add::add } 174 | ops_test! { @impl $type, $init, sub, binary_op_impl, $token, core::ops::Sub::sub } 175 | ops_test! { @impl $type, $init, mul, binary_op_impl, $token, core::ops::Mul::mul } 176 | ops_test! { @impl $type, $init, div, binary_op_impl, $token, core::ops::Div::div } 177 | ops_test! { @impl $type, $init, add_scalar, binary_scalar_op_impl, $token, core::ops::Add::add } 178 | ops_test! { @impl $type, $init, sub_scalar, binary_scalar_op_impl, $token, core::ops::Sub::sub } 179 | ops_test! { @impl $type, $init, mul_scalar, binary_scalar_op_impl, $token, core::ops::Mul::mul } 180 | ops_test! { @impl $type, $init, div_scalar, binary_scalar_op_impl, $token, core::ops::Div::div } 181 | ops_test! { @impl $type, $init, add_assign, assign_op_impl, $token, core::ops::AddAssign::add_assign } 182 | ops_test! { @impl $type, $init, sub_assign, assign_op_impl, $token, core::ops::SubAssign::sub_assign } 183 | ops_test! { @impl $type, $init, mul_assign, assign_op_impl, $token, core::ops::MulAssign::mul_assign } 184 | ops_test! { @impl $type, $init, div_assign, assign_op_impl, $token, core::ops::DivAssign::div_assign } 185 | ops_test! { @impl $type, $init, add_assign_scalar, assign_scalar_op_impl, $token, core::ops::AddAssign::add_assign } 186 | ops_test! { @impl $type, $init, sub_assign_scalar, assign_scalar_op_impl, $token, core::ops::SubAssign::sub_assign } 187 | ops_test! { @impl $type, $init, mul_assign_scalar, assign_scalar_op_impl, $token, core::ops::MulAssign::mul_assign } 188 | ops_test! { @impl $type, $init, div_assign_scalar, assign_scalar_op_impl, $token, core::ops::DivAssign::div_assign } 189 | ops_test! { @impl $type, $init, neg, unary_op_impl, $token, core::ops::Neg::neg } 190 | }; 191 | { @distribution f32 } => { Standard }; 192 | { @distribution f64 } => { Standard }; 193 | { @distribution Complex } => { ComplexDistribution::new(Standard, Standard) }; 194 | { @distribution Complex } => { ComplexDistribution::new(Standard, Standard) }; 195 | { 196 | @init unary_op_impl, $type:ty, $token:ident, $init:ident 197 | } => { 198 | <$type>::$init($token) 199 | }; 200 | { 201 | @init binary_op_impl, $type:ty, $token:ident, $init:ident 202 | } => { 203 | (<$type>::$init($token), <$type>::$init($token)) 204 | }; 205 | { 206 | @init binary_scalar_op_impl, $type:ty, $token:ident, $init:ident 207 | } => { 208 | <$type>::$init($token) 209 | }; 210 | { 211 | @init assign_op_impl, $type:ty, $token:ident, $init:ident 212 | } => { 213 | (<$type>::$init($token), <$type>::$init($token)) 214 | }; 215 | { 216 | @init assign_scalar_op_impl, $type:ty, $token:ident, $init:ident 217 | } => { 218 | <$type>::$init($token) 219 | }; 220 | { 221 | @impl $type:ty, $init:ident, $name:ident, $test:ident, $token:ident, $func:path 222 | } => { 223 | paste::paste! { 224 | #[dispatch($token)] 225 | pub fn [<$name _dispatch>]() { 226 | $test(ops_test!(@distribution $type), ops_test!(@init $test, $type, $token, $init), $func, $func); 227 | } 228 | 229 | #[test] 230 | #[wasm_bindgen_test::wasm_bindgen_test] 231 | pub fn [<$name _generic>]() { 232 | [<$name _dispatch_default_version>]() 233 | } 234 | 235 | #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] 236 | #[test] 237 | pub fn [<$name _sse>]() { 238 | use generic_simd::arch::Token as _; 239 | if generic_simd::arch::x86::Sse::new().is_some() { 240 | unsafe { [<$name _dispatch_sse41_version>]() } 241 | } 242 | } 243 | 244 | #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] 245 | #[test] 246 | pub fn [<$name _avx>]() { 247 | use generic_simd::arch::Token as _; 248 | if generic_simd::arch::x86::Avx::new().is_some() { 249 | unsafe { [<$name _dispatch_avx_version>]() } 250 | } 251 | } 252 | 253 | #[cfg(all(feature = "nightly", target_arch = "aarch64"))] 254 | #[test] 255 | pub fn [<$name _neon>]() { 256 | use generic_simd::arch::Token as _; 257 | if generic_simd::arch::arm::Neon::new().is_some() { 258 | unsafe { [<$name _dispatch_neon_version>]() } 259 | } 260 | } 261 | 262 | #[cfg(all(feature = "nightly", target_arch = "wasm32", target_feature = "simd128"))] 263 | #[wasm_bindgen_test::wasm_bindgen_test] 264 | pub fn [<$name _simd128>]() { 265 | use generic_simd::arch::Token as _; 266 | assert!(generic_simd::arch::wasm::Simd128::new().is_some()); 267 | unsafe { [<$name _dispatch_simd128_version>]() } 268 | } 269 | } 270 | }; 271 | } 272 | 273 | pub mod r#f32 { 274 | use super::*; 275 | ops_test! { token, f32 } 276 | } 277 | 278 | pub mod r#f64 { 279 | use super::*; 280 | ops_test! { token, f64 } 281 | } 282 | 283 | #[cfg(feature = "complex")] 284 | pub mod complex_f32 { 285 | use super::*; 286 | ops_test! { token, Complex } 287 | } 288 | 289 | #[cfg(feature = "complex")] 290 | pub mod complex_f64 { 291 | use super::*; 292 | ops_test! { token, Complex } 293 | } 294 | -------------------------------------------------------------------------------- /generic-simd-test/webdriver.json: -------------------------------------------------------------------------------- 1 | { 2 | "goog:chromeOptions": { 3 | "args": [ 4 | "--enable-features=WebAssemblySimd" 5 | ] 6 | } 7 | } 8 | -------------------------------------------------------------------------------- /generic-simd/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "generic-simd" 3 | version = "0.1.0" 4 | authors = ["Caleb Zulawski "] 5 | license = "MIT OR Apache-2.0" 6 | description = "safe and idiomatic zero-cost abstractions for writing explicit cross-platform SIMD operations" 7 | repository = "https://github.com/calebzulawski/generic-simd" 8 | categories = ["no-std", "api-bindings", "hardware-support"] 9 | readme = "README.md" 10 | include = [ 11 | "/Cargo.toml", 12 | "/LICENSE-APACHE", 13 | "/LICENSE-MIT", 14 | "/README.md", 15 | "/src/**", 16 | "/tests/**", 17 | ] 18 | edition = "2018" 19 | 20 | [features] 21 | default = ["std", "complex"] 22 | std = ["multiversion/std"] 23 | complex = ["num-complex"] 24 | alloc = [] 25 | nightly = ["generic-simd-macros/nightly"] 26 | 27 | [dependencies] 28 | num-complex = { version = "0.3", default-features = false, optional = true } 29 | generic-simd-macros = { version = "0.1", default-features = false, path = "../generic-simd-macros" } 30 | multiversion = { version = "0.6.1", default-features = false } 31 | 32 | [package.metadata.docs.rs] 33 | features = ["nightly", "complex", "alloc"] 34 | no-default-features = true 35 | default-target = "x86_64-unknown-linux-gnu" 36 | targets = [ 37 | "x86_64-unknown-linux-gnu", 38 | "i686-unknown-linux-gnu", 39 | "aarch64-unknown-linux-gnu", 40 | "armv7-unknown-linux-gnueabihf", 41 | "wasm32-unknown-unknown", 42 | ] 43 | rustdoc-args = ["-Ctarget-feature=+simd128"] 44 | -------------------------------------------------------------------------------- /generic-simd/LICENSE-APACHE: -------------------------------------------------------------------------------- 1 | ../LICENSE-APACHE -------------------------------------------------------------------------------- /generic-simd/LICENSE-MIT: -------------------------------------------------------------------------------- 1 | ../LICENSE-MIT -------------------------------------------------------------------------------- /generic-simd/README.md: -------------------------------------------------------------------------------- 1 | ../README.md -------------------------------------------------------------------------------- /generic-simd/src/alignment.rs: -------------------------------------------------------------------------------- 1 | //! Alignment helpers. 2 | 3 | #[cfg(all(feature = "alloc", not(feature = "std")))] 4 | extern crate alloc; 5 | 6 | #[cfg(all(feature = "alloc", not(feature = "std")))] 7 | use alloc::{ 8 | alloc::{alloc, Layout}, 9 | boxed::Box, 10 | }; 11 | 12 | #[cfg(feature = "std")] 13 | use std::alloc::{alloc, Layout}; 14 | 15 | use crate::{ 16 | arch, scalar, 17 | vector::{width, VectorOf}, 18 | }; 19 | 20 | #[repr(C)] 21 | #[derive(Copy, Clone)] 22 | struct Vectors>( 23 | VectorOf, 24 | VectorOf, 25 | VectorOf, 26 | VectorOf, 27 | ); 28 | 29 | macro_rules! max_alignment { 30 | { $first:path, $($rest:path,)* } => { 31 | 32 | #[doc(hidden)] 33 | #[repr(C)] 34 | #[derive(Copy, Clone)] 35 | pub struct AllVectors $(+ scalar::ScalarExt<$rest>)*>( 36 | Vectors<$first, Scalar>, 37 | $( 38 | Vectors<$rest, Scalar>, 39 | )* 40 | ); 41 | 42 | /// Allocate a boxed slice of scalars with maximum possible vector alignment for a 43 | /// particular scalar on the current architecture. 44 | /// 45 | /// # Panics 46 | /// Panics if `count` is 0 or memory allocation fails. 47 | #[cfg(any(feature = "std", feature = "alloc"))] 48 | pub fn allocate_max_aligned_slice $(+ scalar::ScalarExt<$rest>)*>(count: usize) -> Box<[Scalar]> { 49 | allocate_aligned_slice::, Scalar>(count) 50 | } 51 | } 52 | } 53 | 54 | crate::call_macro_with_tokens! { max_alignment } 55 | 56 | /// Aligns a value to another type's alignment. 57 | #[repr(C)] 58 | pub struct Aligned { 59 | alignment: [AlignTo; 0], 60 | value: T, 61 | } 62 | 63 | impl Aligned { 64 | pub fn new(value: T) -> Self { 65 | Self { 66 | alignment: [], 67 | value, 68 | } 69 | } 70 | } 71 | 72 | impl core::ops::Deref for Aligned { 73 | type Target = T; 74 | 75 | fn deref(&self) -> &Self::Target { 76 | &self.value 77 | } 78 | } 79 | 80 | impl core::ops::DerefMut for Aligned { 81 | fn deref_mut(&mut self) -> &mut Self::Target { 82 | &mut self.value 83 | } 84 | } 85 | 86 | impl Copy for Aligned {} 87 | 88 | impl Clone for Aligned { 89 | fn clone(&self) -> Self { 90 | Self::new(self.value.clone()) 91 | } 92 | } 93 | 94 | impl Default for Aligned { 95 | fn default() -> Self { 96 | Self::new(T::default()) 97 | } 98 | } 99 | 100 | impl core::fmt::Debug for Aligned { 101 | #[inline] 102 | fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { 103 | f.debug_tuple("Aligned").field(&self.value).finish() 104 | } 105 | } 106 | 107 | impl core::cmp::PartialEq for Aligned { 108 | #[inline] 109 | fn eq(&self, other: &Self) -> bool { 110 | self.value.eq(&other.value) 111 | } 112 | } 113 | 114 | impl core::cmp::Eq for Aligned {} 115 | 116 | impl core::cmp::PartialOrd for Aligned { 117 | #[inline] 118 | fn partial_cmp(&self, other: &Self) -> Option { 119 | self.value.partial_cmp(&other.value) 120 | } 121 | } 122 | 123 | impl core::cmp::Ord for Aligned { 124 | #[inline] 125 | fn cmp(&self, other: &Self) -> core::cmp::Ordering { 126 | self.value.cmp(&other.value) 127 | } 128 | } 129 | 130 | impl core::hash::Hash for Aligned { 131 | #[inline] 132 | fn hash(&self, hasher: &mut H) { 133 | self.value.hash(hasher) 134 | } 135 | } 136 | 137 | /// Allocate a boxed slice of `count` `T`s aligned to the `AlignTo` type. 138 | /// 139 | /// # Panics 140 | /// Panics if `count` is 0 or memory allocation fails. 141 | #[cfg(any(feature = "std", feature = "alloc"))] 142 | pub fn allocate_aligned_slice(count: usize) -> Box<[T]> { 143 | assert!(count > 0, "size must be nonzero"); 144 | let layout = Layout::from_size_align( 145 | count * core::mem::size_of::(), 146 | core::cmp::max(core::mem::align_of::(), core::mem::align_of::()), 147 | ) 148 | .unwrap(); 149 | unsafe { 150 | let ptr = alloc(layout) as *mut T; 151 | assert!(!ptr.is_null()); 152 | for i in 0..count { 153 | ptr.add(i).write(T::default()); 154 | } 155 | Box::from_raw(core::ptr::slice_from_raw_parts_mut(ptr, count)) 156 | } 157 | } 158 | 159 | /// Aligns a type to the maximum possible vector alignment for a particular scalar on the current 160 | /// architecture. 161 | pub type MaxAligned = Aligned, T>; 162 | 163 | #[cfg(test)] 164 | mod test { 165 | use super::*; 166 | 167 | #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] 168 | #[test] 169 | fn check_x86() { 170 | type Foo = [f32; 8]; 171 | type AlignedFoo = MaxAligned; 172 | assert_eq!(core::mem::align_of::(), 32); 173 | } 174 | } 175 | -------------------------------------------------------------------------------- /generic-simd/src/arch/arm/complex.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | arch::{arm::Neon, Token}, 3 | scalar::Scalar, 4 | shim::{Shim2, Shim4, Shim8}, 5 | vector::{width, Native, Vector}, 6 | }; 7 | use num_complex::Complex; 8 | 9 | #[cfg(target_arch = "aarch64")] 10 | use core::arch::aarch64::*; 11 | #[cfg(target_arch = "arm")] 12 | use core::arch::arm::*; 13 | 14 | impl Native for Complex { 15 | type Width = width::W2; 16 | } 17 | 18 | impl Native for Complex { 19 | type Width = width::W1; 20 | } 21 | 22 | /// A NEON vector of `Complex`s. 23 | /// 24 | /// Requires feature `"complex"`. 25 | #[derive(Clone, Copy, Debug)] 26 | #[repr(transparent)] 27 | #[allow(non_camel_case_types)] 28 | pub struct cf32x1(float32x2_t); 29 | 30 | /// A NEON vector of `Complex`s. 31 | /// 32 | /// Requires feature `"complex"`. 33 | #[derive(Clone, Copy, Debug)] 34 | #[repr(transparent)] 35 | #[allow(non_camel_case_types)] 36 | pub struct cf32x2(float32x4_t); 37 | 38 | /// A NEON vector of `Complex`s. 39 | /// 40 | /// Requires feature `"complex"`. 41 | #[cfg(target_arch = "aarch64")] 42 | #[derive(Clone, Copy, Debug)] 43 | #[repr(transparent)] 44 | #[allow(non_camel_case_types)] 45 | pub struct cf64x1(float64x2_t); 46 | 47 | impl Scalar for Complex { 48 | type Vector = cf32x1; 49 | } 50 | 51 | impl Scalar for Complex { 52 | type Vector = cf32x2; 53 | } 54 | 55 | impl Scalar for Complex { 56 | type Vector = Shim2>; 57 | } 58 | 59 | impl Scalar for Complex { 60 | type Vector = Shim4>; 61 | } 62 | 63 | #[cfg(target_arch = "arm")] 64 | impl Scalar for Complex { 65 | type Vector = crate::arch::generic::cf64x1; 66 | } 67 | 68 | #[cfg(target_arch = "aarch64")] 69 | impl Scalar for Complex { 70 | type Vector = cf64x1; 71 | } 72 | 73 | impl Scalar for Complex { 74 | type Vector = Shim2<>::Vector, Complex>; 75 | } 76 | 77 | impl Scalar for Complex { 78 | type Vector = Shim4<>::Vector, Complex>; 79 | } 80 | 81 | impl Scalar for Complex { 82 | type Vector = Shim8<>::Vector, Complex>; 83 | } 84 | 85 | arithmetic_ops! { 86 | feature: Neon::new_unchecked(), 87 | for cf32x1: 88 | add -> (vadd_f32), 89 | sub -> (vsub_f32), 90 | mul -> (), 91 | div -> () 92 | } 93 | 94 | arithmetic_ops! { 95 | feature: Neon::new_unchecked(), 96 | for cf32x2: 97 | add -> (vaddq_f32), 98 | sub -> (vsubq_f32), 99 | mul -> (), 100 | div -> () 101 | } 102 | 103 | #[cfg(target_arch = "aarch64")] 104 | arithmetic_ops! { 105 | feature: Neon::new_unchecked(), 106 | for cf64x1: 107 | add -> (vaddq_f64), 108 | sub -> (vsubq_f64), 109 | mul -> (), 110 | div -> () 111 | } 112 | 113 | impl core::ops::Neg for cf32x1 { 114 | type Output = Self; 115 | 116 | #[inline] 117 | fn neg(mut self) -> Self { 118 | for v in self.as_slice_mut() { 119 | *v = -*v; 120 | } 121 | self 122 | } 123 | } 124 | 125 | impl core::ops::Neg for cf32x2 { 126 | type Output = Self; 127 | 128 | #[inline] 129 | fn neg(mut self) -> Self { 130 | for v in self.as_slice_mut() { 131 | *v = -*v; 132 | } 133 | self 134 | } 135 | } 136 | 137 | #[cfg(target_arch = "aarch64")] 138 | impl core::ops::Neg for cf64x1 { 139 | type Output = Self; 140 | 141 | #[inline] 142 | fn neg(mut self) -> Self { 143 | for v in self.as_slice_mut() { 144 | *v = -*v; 145 | } 146 | self 147 | } 148 | } 149 | 150 | as_slice! { cf32x1 } 151 | as_slice! { cf32x2 } 152 | #[cfg(target_arch = "aarch64")] 153 | as_slice! { cf64x1 } 154 | 155 | unsafe impl Vector for cf32x1 { 156 | type Scalar = Complex; 157 | 158 | type Token = Neon; 159 | 160 | type Width = crate::vector::width::W1; 161 | 162 | type Underlying = float32x2_t; 163 | 164 | #[inline] 165 | fn zeroed(_: Self::Token) -> Self { 166 | // TODO use vdup 167 | Self(unsafe { core::mem::zeroed() }) 168 | } 169 | 170 | #[inline] 171 | fn splat(_: Self::Token, from: Self::Scalar) -> Self { 172 | // TODO use vdup 173 | let mut v: Self = unsafe { core::mem::zeroed() }; 174 | v[0] = from; 175 | v 176 | } 177 | } 178 | 179 | unsafe impl Vector for cf32x2 { 180 | type Scalar = Complex; 181 | 182 | type Token = Neon; 183 | 184 | type Width = crate::vector::width::W2; 185 | 186 | type Underlying = float32x4_t; 187 | 188 | #[inline] 189 | fn zeroed(_: Self::Token) -> Self { 190 | // TODO use vdup 191 | Self(unsafe { core::mem::zeroed() }) 192 | } 193 | 194 | #[inline] 195 | fn splat(_: Self::Token, from: Self::Scalar) -> Self { 196 | // TODO use vdup 197 | let mut v: Self = unsafe { core::mem::zeroed() }; 198 | v[0] = from; 199 | v[1] = from; 200 | v 201 | } 202 | } 203 | 204 | #[cfg(target_arch = "aarch64")] 205 | unsafe impl Vector for cf64x1 { 206 | type Scalar = Complex; 207 | 208 | type Token = Neon; 209 | 210 | type Width = crate::vector::width::W1; 211 | 212 | type Underlying = float64x2_t; 213 | 214 | #[inline] 215 | fn zeroed(_: Self::Token) -> Self { 216 | // TODO use vdup 217 | Self(unsafe { core::mem::zeroed() }) 218 | } 219 | 220 | #[inline] 221 | fn splat(_: Self::Token, from: Self::Scalar) -> Self { 222 | // TODO use vdup 223 | let mut v: Self = unsafe { core::mem::zeroed() }; 224 | v[0] = from; 225 | v 226 | } 227 | } 228 | 229 | impl crate::vector::Complex for cf32x1 { 230 | type RealScalar = f32; 231 | 232 | #[inline] 233 | fn conj(mut self) -> Self { 234 | for v in self.as_slice_mut() { 235 | *v = v.conj(); 236 | } 237 | self 238 | } 239 | 240 | #[inline] 241 | fn mul_i(mut self) -> Self { 242 | for v in self.as_slice_mut() { 243 | *v = Complex::new(-v.im, v.re); 244 | } 245 | self 246 | } 247 | 248 | #[inline] 249 | fn mul_neg_i(mut self) -> Self { 250 | for v in self.as_slice_mut() { 251 | *v = Complex::new(v.im, -v.re); 252 | } 253 | self 254 | } 255 | } 256 | 257 | impl crate::vector::Complex for cf32x2 { 258 | type RealScalar = f32; 259 | 260 | #[inline] 261 | fn conj(mut self) -> Self { 262 | for v in self.as_slice_mut() { 263 | *v = v.conj(); 264 | } 265 | self 266 | } 267 | 268 | #[inline] 269 | fn mul_i(mut self) -> Self { 270 | for v in self.as_slice_mut() { 271 | *v = Complex::new(-v.im, v.re); 272 | } 273 | self 274 | } 275 | 276 | #[inline] 277 | fn mul_neg_i(mut self) -> Self { 278 | for v in self.as_slice_mut() { 279 | *v = Complex::new(v.im, -v.re); 280 | } 281 | self 282 | } 283 | } 284 | 285 | #[cfg(target_arch = "aarch64")] 286 | impl crate::vector::Complex for cf64x1 { 287 | type RealScalar = f32; 288 | 289 | #[inline] 290 | fn conj(mut self) -> Self { 291 | for v in self.as_slice_mut() { 292 | *v = v.conj(); 293 | } 294 | self 295 | } 296 | 297 | #[inline] 298 | fn mul_i(mut self) -> Self { 299 | for v in self.as_slice_mut() { 300 | *v = Complex::new(-v.im, v.re); 301 | } 302 | self 303 | } 304 | 305 | #[inline] 306 | fn mul_neg_i(mut self) -> Self { 307 | for v in self.as_slice_mut() { 308 | *v = Complex::new(v.im, -v.re); 309 | } 310 | self 311 | } 312 | } 313 | -------------------------------------------------------------------------------- /generic-simd/src/arch/arm/mod.rs: -------------------------------------------------------------------------------- 1 | //! arm/aarch64 vector types. 2 | 3 | #[cfg(feature = "complex")] 4 | mod complex; 5 | #[cfg(feature = "complex")] 6 | pub use complex::*; 7 | 8 | use crate::{ 9 | arch::{generic, Token}, 10 | scalar::Scalar, 11 | shim::{Shim2, Shim4, ShimToken}, 12 | vector::{width, Native, Vector}, 13 | }; 14 | 15 | #[cfg(target_arch = "aarch64")] 16 | use core::arch::aarch64::*; 17 | #[cfg(target_arch = "arm")] 18 | use core::arch::arm::*; 19 | 20 | /// NEON instruction set token. 21 | #[derive(Copy, Clone, Debug)] 22 | pub struct Neon(()); 23 | 24 | impl_token! { Neon => "neon" } 25 | 26 | impl Native for f32 { 27 | type Width = width::W4; 28 | } 29 | 30 | impl Native for f64 { 31 | type Width = width::W2; 32 | } 33 | 34 | /// A NEON vector of 2 `f32`s. 35 | #[derive(Clone, Copy, Debug)] 36 | #[repr(transparent)] 37 | #[allow(non_camel_case_types)] 38 | pub struct f32x2(float32x2_t); 39 | 40 | /// A NEON vector of 4 `f32`s. 41 | #[derive(Clone, Copy, Debug)] 42 | #[repr(transparent)] 43 | #[allow(non_camel_case_types)] 44 | pub struct f32x4(float32x4_t); 45 | 46 | /// A NEON vector of 2 `f64`s. 47 | #[cfg(target_arch = "aarch64")] 48 | #[derive(Clone, Copy, Debug)] 49 | #[repr(transparent)] 50 | #[allow(non_camel_case_types)] 51 | pub struct f64x2(float64x2_t); 52 | 53 | impl Scalar for f32 { 54 | type Vector = ShimToken; 55 | } 56 | 57 | impl Scalar for f32 { 58 | type Vector = f32x2; 59 | } 60 | 61 | impl Scalar for f32 { 62 | type Vector = f32x4; 63 | } 64 | 65 | impl Scalar for f32 { 66 | type Vector = Shim2; 67 | } 68 | 69 | impl Scalar for f64 { 70 | type Vector = ShimToken; 71 | } 72 | 73 | #[cfg(target_arch = "arm")] 74 | impl Scalar for f64 { 75 | type Vector = Shim2, Self>; 76 | } 77 | 78 | #[cfg(target_arch = "aarch64")] 79 | impl Scalar for f64 { 80 | type Vector = f64x2; 81 | } 82 | 83 | impl Scalar for f64 { 84 | type Vector = Shim2<>::Vector, Self>; 85 | } 86 | 87 | impl Scalar for f64 { 88 | type Vector = Shim4<>::Vector, Self>; 89 | } 90 | 91 | arithmetic_ops! { 92 | feature: Neon::new_unchecked(), 93 | for f32x2: 94 | add -> (vadd_f32), 95 | sub -> (vsub_f32), 96 | mul -> (vmul_f32), 97 | div -> () 98 | } 99 | 100 | arithmetic_ops! { 101 | feature: Neon::new_unchecked(), 102 | for f32x4: 103 | add -> (vaddq_f32), 104 | sub -> (vsubq_f32), 105 | mul -> (vmulq_f32), 106 | div -> () 107 | } 108 | 109 | #[cfg(target_arch = "aarch64")] 110 | arithmetic_ops! { 111 | feature: Neon::new_unchecked(), 112 | for f64x2: 113 | add -> (vaddq_f64), 114 | sub -> (vsubq_f64), 115 | mul -> (vmulq_f64), 116 | div -> () 117 | } 118 | 119 | impl core::ops::Neg for f32x2 { 120 | type Output = Self; 121 | 122 | #[inline] 123 | fn neg(mut self) -> Self { 124 | for v in self.as_slice_mut() { 125 | *v = -*v; 126 | } 127 | self 128 | } 129 | } 130 | 131 | impl core::ops::Neg for f32x4 { 132 | type Output = Self; 133 | 134 | #[inline] 135 | fn neg(mut self) -> Self { 136 | for v in self.as_slice_mut() { 137 | *v = -*v; 138 | } 139 | self 140 | } 141 | } 142 | 143 | #[cfg(target_arch = "aarch64")] 144 | impl core::ops::Neg for f64x2 { 145 | type Output = Self; 146 | 147 | #[inline] 148 | fn neg(mut self) -> Self { 149 | for v in self.as_slice_mut() { 150 | *v = -*v; 151 | } 152 | self 153 | } 154 | } 155 | 156 | as_slice! { f32x2 } 157 | as_slice! { f32x4 } 158 | 159 | #[cfg(target_arch = "aarch64")] 160 | as_slice! { f64x2 } 161 | 162 | unsafe impl Vector for f32x2 { 163 | type Scalar = f32; 164 | 165 | type Token = Neon; 166 | 167 | type Width = crate::vector::width::W2; 168 | 169 | type Underlying = float32x2_t; 170 | 171 | #[inline] 172 | fn zeroed(_: Self::Token) -> Self { 173 | // TODO use vdup 174 | Self(unsafe { core::mem::zeroed() }) 175 | } 176 | 177 | #[inline] 178 | fn splat(_: Self::Token, from: Self::Scalar) -> Self { 179 | // TODO use vdup 180 | let mut v: Self = unsafe { core::mem::zeroed() }; 181 | v[0] = from; 182 | v[1] = from; 183 | v 184 | } 185 | } 186 | 187 | unsafe impl Vector for f32x4 { 188 | type Scalar = f32; 189 | 190 | type Token = Neon; 191 | 192 | type Width = crate::vector::width::W4; 193 | 194 | type Underlying = float32x4_t; 195 | 196 | #[inline] 197 | fn zeroed(_: Self::Token) -> Self { 198 | // TODO use vdup 199 | Self(unsafe { core::mem::zeroed() }) 200 | } 201 | 202 | #[inline] 203 | fn splat(_: Self::Token, from: Self::Scalar) -> Self { 204 | // TODO use vdup 205 | let mut v: Self = unsafe { core::mem::zeroed() }; 206 | v[0] = from; 207 | v[1] = from; 208 | v[2] = from; 209 | v[3] = from; 210 | v 211 | } 212 | } 213 | 214 | #[cfg(target_arch = "aarch64")] 215 | unsafe impl Vector for f64x2 { 216 | type Scalar = f64; 217 | 218 | type Token = Neon; 219 | 220 | type Width = crate::vector::width::W2; 221 | 222 | type Underlying = float64x2_t; 223 | 224 | #[inline] 225 | fn zeroed(_: Self::Token) -> Self { 226 | // TODO use vdup 227 | Self(unsafe { core::mem::zeroed() }) 228 | } 229 | 230 | #[inline] 231 | fn splat(_: Self::Token, from: Self::Scalar) -> Self { 232 | // TODO use vdup 233 | let mut v: Self = unsafe { core::mem::zeroed() }; 234 | v[0] = from; 235 | v[1] = from; 236 | v 237 | } 238 | } 239 | -------------------------------------------------------------------------------- /generic-simd/src/arch/generic.rs: -------------------------------------------------------------------------------- 1 | //! Generic vector types for any platform. 2 | 3 | use crate::{ 4 | arch::Token, 5 | scalar::Scalar, 6 | shim::{Shim2, Shim4, Shim8}, 7 | vector::{width, Native, Vector}, 8 | }; 9 | 10 | #[cfg(feature = "complex")] 11 | use num_complex::Complex; 12 | 13 | /// Generic instruction set token. 14 | #[derive(Copy, Clone, Debug)] 15 | pub struct Generic; 16 | 17 | unsafe impl Token for Generic { 18 | #[inline] 19 | fn new() -> Option { 20 | Some(Self) 21 | } 22 | 23 | #[inline] 24 | unsafe fn new_unchecked() -> Self { 25 | Self 26 | } 27 | } 28 | 29 | /// A generic vector of one `f32`. 30 | #[derive(Clone, Copy, Debug)] 31 | #[repr(transparent)] 32 | #[allow(non_camel_case_types)] 33 | pub struct f32x1(f32); 34 | 35 | /// A generic vector of one `f64`. 36 | #[derive(Clone, Copy, Debug)] 37 | #[repr(transparent)] 38 | #[allow(non_camel_case_types)] 39 | pub struct f64x1(f64); 40 | 41 | /// A generic vector of one `Complex`. 42 | /// 43 | /// Requires feature `"complex"`. 44 | #[cfg(feature = "complex")] 45 | #[derive(Clone, Copy, Debug)] 46 | #[repr(transparent)] 47 | #[allow(non_camel_case_types)] 48 | pub struct cf32x1(Complex); 49 | 50 | /// A generic vector of one `Complex`. 51 | /// 52 | /// Requires feature `"complex"`. 53 | #[cfg(feature = "complex")] 54 | #[derive(Clone, Copy, Debug)] 55 | #[repr(transparent)] 56 | #[allow(non_camel_case_types)] 57 | pub struct cf64x1(Complex); 58 | 59 | macro_rules! implement { 60 | { 61 | $vector:ty, $scalar:ty 62 | } => { 63 | impl Scalar for $scalar { 64 | type Vector = $vector; 65 | } 66 | 67 | impl Scalar for $scalar { 68 | type Vector = Shim2<$vector, $scalar>; 69 | } 70 | 71 | impl Scalar for $scalar { 72 | type Vector = Shim4<$vector, $scalar>; 73 | } 74 | 75 | impl Scalar for $scalar { 76 | type Vector = Shim8<$vector, $scalar>; 77 | } 78 | 79 | impl Native for $scalar { 80 | type Width = width::W1; 81 | } 82 | } 83 | } 84 | 85 | implement! { f32x1, f32 } 86 | implement! { f64x1, f64 } 87 | 88 | #[cfg(feature = "complex")] 89 | implement! { cf32x1, Complex } 90 | #[cfg(feature = "complex")] 91 | implement! { cf64x1, Complex } 92 | 93 | macro_rules! implement { 94 | { 95 | $vector:ty, $scalar:ty 96 | } => { 97 | arithmetic_ops! { 98 | feature: Generic::new_unchecked(), 99 | for $vector: 100 | add -> (), 101 | sub -> (), 102 | mul -> (), 103 | div -> () 104 | } 105 | 106 | impl core::ops::Neg for $vector { 107 | type Output = Self; 108 | 109 | #[inline] 110 | fn neg(self) -> Self { 111 | Self(-self.0) 112 | } 113 | } 114 | 115 | as_slice! { $vector } 116 | 117 | unsafe impl Vector for $vector { 118 | type Scalar = $scalar; 119 | 120 | type Token = Generic; 121 | 122 | type Width = crate::vector::width::W1; 123 | 124 | type Underlying = $scalar; 125 | 126 | #[inline] 127 | fn zeroed(_: Self::Token) -> Self { 128 | Self(<$scalar>::default()) 129 | } 130 | 131 | #[inline] 132 | fn splat(_: Self::Token, from: Self::Scalar) -> Self { 133 | Self(from) 134 | } 135 | } 136 | } 137 | } 138 | 139 | implement! { f32x1, f32 } 140 | implement! { f64x1, f64 } 141 | 142 | #[cfg(feature = "complex")] 143 | implement! { cf32x1, Complex } 144 | #[cfg(feature = "complex")] 145 | implement! { cf64x1, Complex } 146 | 147 | #[cfg(feature = "complex")] 148 | macro_rules! implement_complex { 149 | { 150 | $vector:ty, $real:ty 151 | } => { 152 | impl crate::vector::Complex for $vector { 153 | type RealScalar = $real; 154 | 155 | #[inline] 156 | fn conj(self) -> Self { 157 | Self(Complex::new(self.0.re, -self.0.im)) 158 | } 159 | 160 | #[inline] 161 | fn mul_i(self) -> Self { 162 | Self(Complex::new(-self.0.im, self.0.re)) 163 | } 164 | 165 | #[inline] 166 | fn mul_neg_i(self) -> Self { 167 | Self(Complex::new(self.0.im, -self.0.re)) 168 | } 169 | } 170 | } 171 | } 172 | 173 | #[cfg(feature = "complex")] 174 | implement_complex! { cf32x1, f32 } 175 | #[cfg(feature = "complex")] 176 | implement_complex! { cf64x1, f64 } 177 | -------------------------------------------------------------------------------- /generic-simd/src/arch/mod.rs: -------------------------------------------------------------------------------- 1 | //! Architecture-specific types. 2 | 3 | /// Indicates support for a particular CPU feature. 4 | /// 5 | /// # Safety 6 | /// Implementing `Token` for a type indicates that the type is only constructible when the 7 | /// associated CPU features are supported. 8 | pub unsafe trait Token: Copy + From + Into { 9 | /// Detects whether the required CPU features are supported. 10 | fn new() -> Option; 11 | 12 | /// Creates the token without detecting if the CPU features are supported. 13 | /// 14 | /// # Safety 15 | /// Calling this function causes undefined behavior if the required CPU features are not 16 | /// supported. 17 | unsafe fn new_unchecked() -> Self; 18 | } 19 | 20 | #[allow(unused_macros)] 21 | macro_rules! impl_token { 22 | { $name:ident => $($features:tt),+ } => { 23 | unsafe impl $crate::arch::Token for $name { 24 | #[inline] 25 | fn new() -> Option { 26 | if multiversion::are_cpu_features_detected!($($features),*) { 27 | Some(Self(())) 28 | } else { 29 | None 30 | } 31 | } 32 | 33 | #[inline] 34 | unsafe fn new_unchecked() -> Self { 35 | Self(()) 36 | } 37 | } 38 | 39 | impl core::convert::From<$name> for $crate::arch::generic::Generic { 40 | #[inline] 41 | fn from(_: $name) -> Self { 42 | Self 43 | } 44 | } 45 | } 46 | } 47 | 48 | pub mod generic; 49 | 50 | #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] 51 | pub mod x86; 52 | 53 | #[cfg(all(feature = "nightly", target_arch = "aarch64"))] 54 | pub mod arm; 55 | 56 | #[cfg(all( 57 | target_arch = "wasm32", 58 | target_feature = "simd128", 59 | feature = "nightly", 60 | ))] 61 | pub mod wasm; 62 | 63 | /// Invokes a macro with the supported token types. 64 | /// 65 | /// Invokes the macro with the list of [`Token`] types as arguments in priority order, delimited 66 | /// by commas (including a trailing comma). 67 | /// 68 | /// The following example creates a `SupportedScalar` supertrait that implements [`ScalarExt`] for 69 | /// each token: 70 | /// ``` 71 | /// use generic_simd::{call_macro_with_tokens, scalar::ScalarExt}; 72 | /// 73 | /// macro_rules! supported_scalars { 74 | /// { $($token:ty,)+ } => { 75 | /// trait SupportedScalar: Copy $(+ ScalarExt<$token>)* {} 76 | /// } 77 | /// } 78 | /// 79 | /// call_macro_with_tokens!{ supported_scalars } 80 | /// ``` 81 | /// 82 | /// [`Token`]: arch/trait.Token.html 83 | /// [`ScalarExt`]: scalar/trait.ScalarExt.html 84 | #[macro_export] 85 | macro_rules! call_macro_with_tokens { 86 | { $mac:ident } => { $crate::call_macro_with_tokens_impl! { $mac } } 87 | } 88 | 89 | #[cfg(not(any( 90 | target_arch = "x86", 91 | target_arch = "x86_64", 92 | all(target_arch = "aarch64", feature = "nightly"), 93 | all( 94 | target_arch = "wasm32", 95 | target_feature = "simd128", 96 | feature = "nightly", 97 | ), 98 | )))] 99 | #[doc(hidden)] 100 | #[macro_export] 101 | macro_rules! call_macro_with_tokens_impl { 102 | { $mac:ident } => { 103 | $mac! { 104 | $crate::arch::generic::Generic, 105 | } 106 | } 107 | } 108 | 109 | #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] 110 | #[doc(hidden)] 111 | #[macro_export] 112 | macro_rules! call_macro_with_tokens_impl { 113 | { $mac:ident } => { 114 | $mac! { 115 | $crate::arch::x86::Avx, 116 | $crate::arch::x86::Sse, 117 | $crate::arch::generic::Generic, 118 | } 119 | } 120 | } 121 | 122 | #[cfg(all(feature = "nightly", target_arch = "aarch64"))] 123 | #[doc(hidden)] 124 | #[macro_export] 125 | macro_rules! call_macro_with_tokens_impl { 126 | { $mac:ident } => { 127 | $mac! { 128 | $crate::arch::arm::Neon, 129 | $crate::arch::generic::Generic, 130 | } 131 | } 132 | } 133 | 134 | #[cfg(all( 135 | target_arch = "wasm32", 136 | target_feature = "simd128", 137 | feature = "nightly", 138 | ))] 139 | #[doc(hidden)] 140 | #[macro_export] 141 | macro_rules! call_macro_with_tokens_impl { 142 | { $mac:ident } => { 143 | $mac! { 144 | $crate::arch::wasm::Simd128, 145 | $crate::arch::generic::Generic, 146 | } 147 | } 148 | } 149 | -------------------------------------------------------------------------------- /generic-simd/src/arch/wasm/complex.rs: -------------------------------------------------------------------------------- 1 | use core::arch::wasm32::*; 2 | 3 | use crate::{ 4 | arch::{generic, wasm::*, Token}, 5 | scalar::Scalar, 6 | shim::{Shim2, Shim4, Shim8, ShimToken}, 7 | vector::{width, Native, Vector}, 8 | }; 9 | use num_complex::Complex; 10 | 11 | impl Native for Complex { 12 | type Width = width::W2; 13 | } 14 | 15 | impl Native for Complex { 16 | type Width = width::W1; 17 | } 18 | 19 | /// A simd128 vector of `Complex`s. 20 | /// 21 | /// Requires feature `"complex"`. 22 | #[derive(Clone, Copy, Debug)] 23 | #[repr(transparent)] 24 | #[allow(non_camel_case_types)] 25 | pub struct cf32x2(v128); 26 | 27 | /// A simd128 vector of `Complex`s. 28 | /// 29 | /// Requires feature `"complex"`. 30 | #[derive(Clone, Copy, Debug)] 31 | #[repr(transparent)] 32 | #[allow(non_camel_case_types)] 33 | pub struct cf64x1(v128); 34 | 35 | impl Scalar for Complex { 36 | type Vector = ShimToken; 37 | } 38 | 39 | impl Scalar for Complex { 40 | type Vector = cf32x2; 41 | } 42 | 43 | impl Scalar for Complex { 44 | type Vector = Shim2>; 45 | } 46 | 47 | impl Scalar for Complex { 48 | type Vector = Shim4>; 49 | } 50 | 51 | impl Scalar for Complex { 52 | type Vector = cf64x1; 53 | } 54 | 55 | impl Scalar for Complex { 56 | type Vector = Shim2>; 57 | } 58 | 59 | impl Scalar for Complex { 60 | type Vector = Shim4>; 61 | } 62 | 63 | impl Scalar for Complex { 64 | type Vector = Shim8>; 65 | } 66 | 67 | as_slice! { cf32x2 } 68 | as_slice! { cf64x1 } 69 | 70 | unsafe impl Vector for cf32x2 { 71 | type Scalar = Complex; 72 | type Token = Simd128; 73 | type Width = width::W2; 74 | type Underlying = v128; 75 | 76 | #[inline] 77 | fn zeroed(_: Self::Token) -> Self { 78 | Self(unsafe { f32x4_splat(0.) }) 79 | } 80 | 81 | #[inline] 82 | fn splat(_: Self::Token, value: Self::Scalar) -> Self { 83 | Self(unsafe { f32x4_const(value.re, value.im, value.re, value.im) }) 84 | } 85 | } 86 | 87 | unsafe impl Vector for cf64x1 { 88 | type Scalar = Complex; 89 | type Token = Simd128; 90 | type Width = width::W1; 91 | type Underlying = v128; 92 | 93 | #[inline] 94 | fn zeroed(_: Self::Token) -> Self { 95 | Self(unsafe { f64x2_splat(0.) }) 96 | } 97 | 98 | #[inline] 99 | fn splat(_: Self::Token, value: Self::Scalar) -> Self { 100 | Self(unsafe { f64x2_const(value.re, value.im) }) 101 | } 102 | } 103 | 104 | arithmetic_ops! { 105 | feature: Simd128::new_unchecked(), 106 | for cf32x2: 107 | add -> (f32x4_add), 108 | sub -> (f32x4_sub), 109 | mul -> (cf32x2_mul), 110 | div -> (cf32x2_div) 111 | } 112 | 113 | arithmetic_ops! { 114 | feature: Simd128::new_unchecked(), 115 | for cf64x1: 116 | add -> (f64x2_add), 117 | sub -> (f64x2_sub), 118 | mul -> (cf64x1_mul), 119 | div -> (cf64x1_div) 120 | } 121 | 122 | #[target_feature(enable = "simd128")] 123 | #[inline] 124 | unsafe fn f32x4_ldup(x: v128) -> v128 { 125 | v32x4_shuffle::<0, 0, 2, 2>(x, x) 126 | } 127 | 128 | #[target_feature(enable = "simd128")] 129 | #[inline] 130 | unsafe fn f32x4_hdup(x: v128) -> v128 { 131 | v32x4_shuffle::<1, 1, 3, 3>(x, x) 132 | } 133 | 134 | #[target_feature(enable = "simd128")] 135 | #[inline] 136 | unsafe fn f64x2_ldup(x: v128) -> v128 { 137 | v64x2_shuffle::<0, 0>(x, x) 138 | } 139 | 140 | #[target_feature(enable = "simd128")] 141 | #[inline] 142 | unsafe fn f64x2_hdup(x: v128) -> v128 { 143 | v64x2_shuffle::<1, 1>(x, x) 144 | } 145 | 146 | #[target_feature(enable = "simd128")] 147 | #[inline] 148 | unsafe fn f32x4_addsub(a: v128, b: v128) -> v128 { 149 | let add = f32x4_add(a, b); 150 | let sub = f32x4_sub(a, b); 151 | v32x4_shuffle::<0, 5, 2, 7>(sub, add) 152 | } 153 | 154 | #[target_feature(enable = "simd128")] 155 | #[inline] 156 | unsafe fn f64x2_addsub(a: v128, b: v128) -> v128 { 157 | let add = f64x2_add(a, b); 158 | let sub = f64x2_sub(a, b); 159 | v64x2_shuffle::<0, 3>(sub, add) 160 | } 161 | 162 | #[target_feature(enable = "simd128")] 163 | #[inline] 164 | unsafe fn cf32x2_mul(a: v128, b: v128) -> v128 { 165 | let re = f32x4_ldup(a); 166 | let im = f32x4_hdup(a); 167 | let sh = v32x4_shuffle::<1, 0, 3, 2>(b, b); 168 | f32x4_addsub(f32x4_mul(re, b), f32x4_mul(im, sh)) 169 | } 170 | 171 | #[target_feature(enable = "simd128")] 172 | #[inline] 173 | unsafe fn cf64x1_mul(a: v128, b: v128) -> v128 { 174 | let re = f64x2_ldup(a); 175 | let im = f64x2_hdup(a); 176 | let sh = v64x2_shuffle::<1, 0>(b, b); 177 | f64x2_addsub(f64x2_mul(re, b), f64x2_mul(im, sh)) 178 | } 179 | 180 | #[target_feature(enable = "simd128")] 181 | #[inline] 182 | unsafe fn cf32x2_div(a: v128, b: v128) -> v128 { 183 | let b_re = f32x4_ldup(b); 184 | let b_im = f32x4_hdup(b); 185 | let a_flip = v32x4_shuffle::<1, 0, 3, 2>(a, a); 186 | let norm_sqr = f32x4_add(f32x4_mul(b_re, b_re), f32x4_mul(b_im, b_im)); 187 | f32x4_div( 188 | f32x4_addsub(f32x4_mul(a, b_re), f32x4_neg(f32x4_mul(a_flip, b_im))), 189 | norm_sqr, 190 | ) 191 | } 192 | 193 | #[target_feature(enable = "simd128")] 194 | #[inline] 195 | unsafe fn cf64x1_div(a: v128, b: v128) -> v128 { 196 | let b_re = f64x2_ldup(b); 197 | let b_im = f64x2_hdup(b); 198 | let a_flip = v64x2_shuffle::<1, 0>(a, a); 199 | let norm_sqr = f64x2_add(f64x2_mul(b_re, b_re), f64x2_mul(b_im, b_im)); 200 | f64x2_div( 201 | f64x2_addsub(f64x2_mul(a, b_re), f64x2_neg(f64x2_mul(a_flip, b_im))), 202 | norm_sqr, 203 | ) 204 | } 205 | 206 | impl core::ops::Neg for cf32x2 { 207 | type Output = Self; 208 | 209 | #[inline] 210 | fn neg(self) -> Self { 211 | Self(unsafe { f32x4_neg(self.0) }) 212 | } 213 | } 214 | 215 | impl core::ops::Neg for cf64x1 { 216 | type Output = Self; 217 | 218 | #[inline] 219 | fn neg(self) -> Self { 220 | Self(unsafe { f64x2_neg(self.0) }) 221 | } 222 | } 223 | -------------------------------------------------------------------------------- /generic-simd/src/arch/wasm/mod.rs: -------------------------------------------------------------------------------- 1 | //! WebAssembly vector types. 2 | 3 | #[cfg(feature = "complex")] 4 | mod complex; 5 | #[cfg(feature = "complex")] 6 | pub use complex::*; 7 | 8 | use crate::{ 9 | arch::{generic, Token}, 10 | scalar::Scalar, 11 | shim::{Shim2, Shim4, ShimToken}, 12 | vector::{width, Native, Vector}, 13 | }; 14 | use core::arch::wasm32::*; 15 | 16 | /// simd128 instruction set token. 17 | #[derive(Copy, Clone, Debug)] 18 | pub struct Simd128(()); 19 | 20 | impl_token! { Simd128 => "simd128" } 21 | 22 | impl Native for f32 { 23 | type Width = width::W4; 24 | } 25 | 26 | impl Native for f64 { 27 | type Width = width::W2; 28 | } 29 | 30 | /// A simd128 vector of `f32`s. 31 | #[derive(Clone, Copy, Debug)] 32 | #[repr(transparent)] 33 | #[allow(non_camel_case_types)] 34 | pub struct f32x4(v128); 35 | 36 | /// A simd128 vector of `f64`s. 37 | #[derive(Clone, Copy, Debug)] 38 | #[repr(transparent)] 39 | #[allow(non_camel_case_types)] 40 | pub struct f64x2(v128); 41 | 42 | impl Scalar for f32 { 43 | type Vector = ShimToken; 44 | } 45 | 46 | impl Scalar for f32 { 47 | type Vector = ShimToken, Self, Simd128>; 48 | } 49 | 50 | impl Scalar for f32 { 51 | type Vector = f32x4; 52 | } 53 | 54 | impl Scalar for f32 { 55 | type Vector = Shim2; 56 | } 57 | 58 | impl Scalar for f64 { 59 | type Vector = ShimToken; 60 | } 61 | 62 | impl Scalar for f64 { 63 | type Vector = f64x2; 64 | } 65 | 66 | impl Scalar for f64 { 67 | type Vector = Shim2; 68 | } 69 | 70 | impl Scalar for f64 { 71 | type Vector = Shim4; 72 | } 73 | 74 | as_slice! { f32x4 } 75 | as_slice! { f64x2 } 76 | 77 | unsafe impl Vector for f32x4 { 78 | type Scalar = f32; 79 | type Token = Simd128; 80 | type Width = width::W4; 81 | type Underlying = v128; 82 | 83 | #[inline] 84 | fn zeroed(_: Self::Token) -> Self { 85 | Self(unsafe { f32x4_splat(0.) }) 86 | } 87 | 88 | #[inline] 89 | fn splat(_: Self::Token, value: Self::Scalar) -> Self { 90 | Self(unsafe { f32x4_splat(value) }) 91 | } 92 | } 93 | 94 | unsafe impl Vector for f64x2 { 95 | type Scalar = f64; 96 | type Token = Simd128; 97 | type Width = width::W2; 98 | type Underlying = v128; 99 | 100 | #[inline] 101 | fn zeroed(_: Self::Token) -> Self { 102 | Self(unsafe { f64x2_splat(0.) }) 103 | } 104 | 105 | #[inline] 106 | fn splat(_: Self::Token, value: Self::Scalar) -> Self { 107 | Self(unsafe { f64x2_splat(value) }) 108 | } 109 | } 110 | 111 | arithmetic_ops! { 112 | feature: Simd128::new_unchecked(), 113 | for f32x4: 114 | add -> (f32x4_add), 115 | sub -> (f32x4_sub), 116 | mul -> (f32x4_mul), 117 | div -> (f32x4_div) 118 | } 119 | 120 | arithmetic_ops! { 121 | feature: Simd128::new_unchecked(), 122 | for f64x2: 123 | add -> (f64x2_add), 124 | sub -> (f64x2_sub), 125 | mul -> (f64x2_mul), 126 | div -> (f64x2_div) 127 | } 128 | 129 | impl core::ops::Neg for f32x4 { 130 | type Output = Self; 131 | 132 | #[inline] 133 | fn neg(self) -> Self { 134 | Self(unsafe { f32x4_neg(self.0) }) 135 | } 136 | } 137 | 138 | impl core::ops::Neg for f64x2 { 139 | type Output = Self; 140 | 141 | #[inline] 142 | fn neg(self) -> Self { 143 | Self(unsafe { f64x2_neg(self.0) }) 144 | } 145 | } 146 | -------------------------------------------------------------------------------- /generic-simd/src/arch/x86/complex.rs: -------------------------------------------------------------------------------- 1 | #[cfg(target_arch = "x86")] 2 | use core::arch::x86::*; 3 | #[cfg(target_arch = "x86_64")] 4 | use core::arch::x86_64::*; 5 | 6 | use crate::{ 7 | arch::{generic, x86::*, Token}, 8 | scalar::Scalar, 9 | shim::{Shim2, Shim4, Shim8, ShimToken}, 10 | vector::{width, Native, Vector}, 11 | }; 12 | use num_complex::Complex; 13 | 14 | impl Native for Complex { 15 | type Width = width::W2; 16 | } 17 | 18 | impl Native for Complex { 19 | type Width = width::W1; 20 | } 21 | 22 | impl Native for Complex { 23 | type Width = width::W4; 24 | } 25 | 26 | impl Native for Complex { 27 | type Width = width::W2; 28 | } 29 | 30 | /// An SSE vector of `Complex`s. 31 | /// 32 | /// Requires feature `"complex"`. 33 | #[derive(Clone, Copy, Debug)] 34 | #[repr(transparent)] 35 | #[allow(non_camel_case_types)] 36 | pub struct cf32x2(__m128); 37 | 38 | /// An SSE vector of `Complex`s. 39 | /// 40 | /// Requires feature `"complex"`. 41 | #[derive(Clone, Copy, Debug)] 42 | #[repr(transparent)] 43 | #[allow(non_camel_case_types)] 44 | pub struct cf64x1(__m128d); 45 | 46 | /// An AVX vector of `Complex`s. 47 | /// 48 | /// Requires feature `"complex"`. 49 | #[derive(Clone, Copy, Debug)] 50 | #[repr(transparent)] 51 | #[allow(non_camel_case_types)] 52 | pub struct cf32x4(__m256); 53 | 54 | /// An AVX vector of `Complex`s. 55 | /// 56 | /// Requires feature `"complex"`. 57 | #[derive(Clone, Copy, Debug)] 58 | #[repr(transparent)] 59 | #[allow(non_camel_case_types)] 60 | pub struct cf64x2(__m256d); 61 | 62 | impl Scalar for Complex { 63 | type Vector = ShimToken; 64 | } 65 | 66 | impl Scalar for Complex { 67 | type Vector = cf32x2; 68 | } 69 | 70 | impl Scalar for Complex { 71 | type Vector = Shim2>; 72 | } 73 | 74 | impl Scalar for Complex { 75 | type Vector = Shim4>; 76 | } 77 | 78 | impl Scalar for Complex { 79 | type Vector = cf64x1; 80 | } 81 | 82 | impl Scalar for Complex { 83 | type Vector = Shim2>; 84 | } 85 | 86 | impl Scalar for Complex { 87 | type Vector = Shim4>; 88 | } 89 | 90 | impl Scalar for Complex { 91 | type Vector = Shim8; 92 | } 93 | 94 | impl Scalar for Complex { 95 | type Vector = ShimToken; 96 | } 97 | 98 | impl Scalar for Complex { 99 | type Vector = ShimToken; 100 | } 101 | 102 | impl Scalar for Complex { 103 | type Vector = cf32x4; 104 | } 105 | 106 | impl Scalar for Complex { 107 | type Vector = Shim2>; 108 | } 109 | 110 | impl Scalar for Complex { 111 | type Vector = ShimToken; 112 | } 113 | 114 | impl Scalar for Complex { 115 | type Vector = cf64x2; 116 | } 117 | 118 | impl Scalar for Complex { 119 | type Vector = Shim2>; 120 | } 121 | 122 | impl Scalar for Complex { 123 | type Vector = Shim4>; 124 | } 125 | 126 | arithmetic_ops! { 127 | feature: Sse::new_unchecked(), 128 | for cf32x2: 129 | add -> (_mm_add_ps), 130 | sub -> (_mm_sub_ps), 131 | mul -> (mul_cf32x2), 132 | div -> (div_cf32x2) 133 | } 134 | 135 | arithmetic_ops! { 136 | feature: Sse::new_unchecked(), 137 | for cf64x1: 138 | add -> (_mm_add_pd), 139 | sub -> (_mm_sub_pd), 140 | mul -> (mul_cf64x1), 141 | div -> (div_cf64x1) 142 | } 143 | 144 | arithmetic_ops! { 145 | feature: Avx::new_unchecked(), 146 | for cf32x4: 147 | add -> (_mm256_add_ps), 148 | sub -> (_mm256_sub_ps), 149 | mul -> (mul_cf32x4), 150 | div -> (div_cf32x4) 151 | } 152 | 153 | arithmetic_ops! { 154 | feature: Avx::new_unchecked(), 155 | for cf64x2: 156 | add -> (_mm256_add_pd), 157 | sub -> (_mm256_sub_pd), 158 | mul -> (mul_cf64x2), 159 | div -> (div_cf64x2) 160 | } 161 | 162 | #[target_feature(enable = "sse3")] 163 | #[inline] 164 | unsafe fn mul_cf32x2(a: __m128, b: __m128) -> __m128 { 165 | let re = _mm_moveldup_ps(a); 166 | let im = _mm_movehdup_ps(a); 167 | let sh = _mm_shuffle_ps(b, b, 0xb1); 168 | _mm_addsub_ps(_mm_mul_ps(re, b), _mm_mul_ps(im, sh)) 169 | } 170 | 171 | #[target_feature(enable = "sse3")] 172 | #[inline] 173 | unsafe fn mul_cf64x1(a: __m128d, b: __m128d) -> __m128d { 174 | let re = _mm_shuffle_pd(a, a, 0x00); 175 | let im = _mm_shuffle_pd(a, a, 0x03); 176 | let sh = _mm_shuffle_pd(b, b, 0x01); 177 | _mm_addsub_pd(_mm_mul_pd(re, b), _mm_mul_pd(im, sh)) 178 | } 179 | 180 | // [(a.re * b.re + a.im * b.im) / (b.re * b.re + b.im * b.im)] + i [(a.im * b.re - a.re * b.im) / (b.re * b.re + b.im * b.im)] 181 | #[target_feature(enable = "sse3")] 182 | #[inline] 183 | unsafe fn div_cf32x2(a: __m128, b: __m128) -> __m128 { 184 | let b_re = _mm_moveldup_ps(b); 185 | let b_im = _mm_movehdup_ps(b); 186 | let a_flip = _mm_shuffle_ps(a, a, 0xb1); 187 | let norm_sqr = _mm_add_ps(_mm_mul_ps(b_re, b_re), _mm_mul_ps(b_im, b_im)); 188 | _mm_div_ps( 189 | _mm_addsub_ps( 190 | _mm_mul_ps(a, b_re), 191 | _mm_xor_ps(_mm_mul_ps(a_flip, b_im), _mm_set1_ps(-0.)), 192 | ), 193 | norm_sqr, 194 | ) 195 | } 196 | 197 | #[target_feature(enable = "sse3")] 198 | #[inline] 199 | unsafe fn div_cf64x1(a: __m128d, b: __m128d) -> __m128d { 200 | let b_re = _mm_shuffle_pd(b, b, 0x00); 201 | let b_im = _mm_shuffle_pd(b, b, 0x03); 202 | let a_flip = _mm_shuffle_pd(a, a, 0x01); 203 | let norm_sqr = _mm_add_pd(_mm_mul_pd(b_re, b_re), _mm_mul_pd(b_im, b_im)); 204 | _mm_div_pd( 205 | _mm_addsub_pd( 206 | _mm_mul_pd(a, b_re), 207 | _mm_xor_pd(_mm_mul_pd(a_flip, b_im), _mm_set1_pd(-0.)), 208 | ), 209 | norm_sqr, 210 | ) 211 | } 212 | 213 | #[target_feature(enable = "avx")] 214 | #[inline] 215 | unsafe fn mul_cf32x4(a: __m256, b: __m256) -> __m256 { 216 | let re = _mm256_moveldup_ps(a); 217 | let im = _mm256_movehdup_ps(a); 218 | let sh = _mm256_shuffle_ps(b, b, 0xb1); 219 | _mm256_addsub_ps(_mm256_mul_ps(re, b), _mm256_mul_ps(im, sh)) 220 | } 221 | 222 | #[target_feature(enable = "avx")] 223 | #[inline] 224 | unsafe fn mul_cf64x2(a: __m256d, b: __m256d) -> __m256d { 225 | let re = _mm256_unpacklo_pd(a, a); 226 | let im = _mm256_unpackhi_pd(a, a); 227 | let sh = _mm256_shuffle_pd(b, b, 0x5); 228 | _mm256_addsub_pd(_mm256_mul_pd(re, b), _mm256_mul_pd(im, sh)) 229 | } 230 | 231 | // [(a.re * b.re + a.im * b.im) / (b.re * b.re + b.im * b.im)] + i [(a.im * b.re - a.re * b.im) / (b.re * b.re + b.im * b.im)] 232 | #[target_feature(enable = "avx")] 233 | #[inline] 234 | unsafe fn div_cf32x4(a: __m256, b: __m256) -> __m256 { 235 | let b_re = _mm256_moveldup_ps(b); 236 | let b_im = _mm256_movehdup_ps(b); 237 | let a_flip = _mm256_shuffle_ps(a, a, 0xb1); 238 | let norm_sqr = _mm256_add_ps(_mm256_mul_ps(b_re, b_re), _mm256_mul_ps(b_im, b_im)); 239 | _mm256_div_ps( 240 | _mm256_addsub_ps( 241 | _mm256_mul_ps(a, b_re), 242 | _mm256_xor_ps(_mm256_mul_ps(a_flip, b_im), _mm256_set1_ps(-0.)), 243 | ), 244 | norm_sqr, 245 | ) 246 | } 247 | 248 | #[target_feature(enable = "avx")] 249 | #[inline] 250 | unsafe fn div_cf64x2(a: __m256d, b: __m256d) -> __m256d { 251 | let b_re = _mm256_unpacklo_pd(b, b); 252 | let b_im = _mm256_unpackhi_pd(b, b); 253 | let a_flip = _mm256_shuffle_pd(a, a, 0x5); 254 | let norm_sqr = _mm256_add_pd(_mm256_mul_pd(b_re, b_re), _mm256_mul_pd(b_im, b_im)); 255 | _mm256_div_pd( 256 | _mm256_addsub_pd( 257 | _mm256_mul_pd(a, b_re), 258 | _mm256_xor_pd(_mm256_mul_pd(a_flip, b_im), _mm256_set1_pd(-0.)), 259 | ), 260 | norm_sqr, 261 | ) 262 | } 263 | 264 | impl core::ops::Neg for cf32x2 { 265 | type Output = Self; 266 | 267 | #[inline] 268 | fn neg(self) -> Self { 269 | Self(unsafe { _mm_xor_ps(self.0, _mm_set1_ps(-0.)) }) 270 | } 271 | } 272 | 273 | impl core::ops::Neg for cf64x1 { 274 | type Output = Self; 275 | 276 | #[inline] 277 | fn neg(self) -> Self { 278 | Self(unsafe { _mm_xor_pd(self.0, _mm_set1_pd(-0.)) }) 279 | } 280 | } 281 | 282 | impl core::ops::Neg for cf32x4 { 283 | type Output = Self; 284 | 285 | #[inline] 286 | fn neg(self) -> Self { 287 | Self(unsafe { _mm256_xor_ps(self.0, _mm256_set1_ps(-0.)) }) 288 | } 289 | } 290 | 291 | impl core::ops::Neg for cf64x2 { 292 | type Output = Self; 293 | 294 | #[inline] 295 | fn neg(self) -> Self { 296 | Self(unsafe { _mm256_xor_pd(self.0, _mm256_set1_pd(-0.)) }) 297 | } 298 | } 299 | 300 | as_slice! { cf32x2 } 301 | as_slice! { cf32x4 } 302 | as_slice! { cf64x1 } 303 | as_slice! { cf64x2 } 304 | 305 | unsafe impl Vector for cf32x2 { 306 | type Scalar = Complex; 307 | 308 | type Token = Sse; 309 | 310 | type Width = crate::vector::width::W2; 311 | 312 | type Underlying = __m128; 313 | 314 | #[inline] 315 | fn zeroed(_: Self::Token) -> Self { 316 | Self(unsafe { _mm_setzero_ps() }) 317 | } 318 | 319 | #[inline] 320 | fn splat(_: Self::Token, from: Self::Scalar) -> Self { 321 | Self(unsafe { _mm_set_ps(from.im, from.re, from.im, from.re) }) 322 | } 323 | } 324 | 325 | unsafe impl Vector for cf64x1 { 326 | type Scalar = Complex; 327 | 328 | type Token = Sse; 329 | 330 | type Width = crate::vector::width::W1; 331 | 332 | type Underlying = __m128d; 333 | 334 | #[inline] 335 | fn zeroed(_: Self::Token) -> Self { 336 | Self(unsafe { _mm_setzero_pd() }) 337 | } 338 | 339 | #[inline] 340 | fn splat(_: Self::Token, from: Self::Scalar) -> Self { 341 | Self(unsafe { _mm_set_pd(from.im, from.re) }) 342 | } 343 | } 344 | 345 | unsafe impl Vector for cf32x4 { 346 | type Scalar = Complex; 347 | 348 | type Token = Avx; 349 | 350 | type Width = crate::vector::width::W4; 351 | 352 | type Underlying = __m256; 353 | 354 | #[inline] 355 | fn zeroed(_: Self::Token) -> Self { 356 | Self(unsafe { _mm256_setzero_ps() }) 357 | } 358 | 359 | #[inline] 360 | fn splat(_: Self::Token, from: Self::Scalar) -> Self { 361 | unsafe { 362 | Self(_mm256_setr_ps( 363 | from.re, from.im, from.re, from.im, from.re, from.im, from.re, from.im, 364 | )) 365 | } 366 | } 367 | } 368 | 369 | unsafe impl Vector for cf64x2 { 370 | type Scalar = Complex; 371 | 372 | type Token = Avx; 373 | 374 | type Width = crate::vector::width::W2; 375 | 376 | type Underlying = __m256d; 377 | 378 | #[inline] 379 | fn zeroed(_: Self::Token) -> Self { 380 | Self(unsafe { _mm256_setzero_pd() }) 381 | } 382 | 383 | #[inline] 384 | fn splat(_: Self::Token, from: Self::Scalar) -> Self { 385 | Self(unsafe { _mm256_setr_pd(from.re, from.im, from.re, from.im) }) 386 | } 387 | } 388 | 389 | impl crate::vector::Complex for cf32x2 { 390 | type RealScalar = f32; 391 | 392 | #[inline] 393 | fn conj(self) -> Self { 394 | Self(unsafe { _mm_xor_ps(self.0, _mm_set_ps(-0., 0., -0., 0.)) }) 395 | } 396 | 397 | #[inline] 398 | fn mul_i(self) -> Self { 399 | Self(unsafe { _mm_addsub_ps(_mm_setzero_ps(), _mm_shuffle_ps(self.0, self.0, 0xb1)) }) 400 | } 401 | 402 | #[inline] 403 | fn mul_neg_i(self) -> Self { 404 | unsafe { 405 | let neg = _mm_addsub_ps(_mm_setzero_ps(), self.0); 406 | Self(_mm_shuffle_ps(neg, neg, 0xb1)) 407 | } 408 | } 409 | } 410 | 411 | impl crate::vector::Complex for cf64x1 { 412 | type RealScalar = f64; 413 | 414 | #[inline] 415 | fn conj(self) -> Self { 416 | Self(unsafe { _mm_xor_pd(self.0, _mm_set_pd(-0., 0.)) }) 417 | } 418 | 419 | #[inline] 420 | fn mul_i(self) -> Self { 421 | Self(unsafe { _mm_addsub_pd(_mm_setzero_pd(), _mm_shuffle_pd(self.0, self.0, 0x1)) }) 422 | } 423 | 424 | #[inline] 425 | fn mul_neg_i(self) -> Self { 426 | unsafe { 427 | let neg = _mm_addsub_pd(_mm_setzero_pd(), self.0); 428 | Self(_mm_shuffle_pd(neg, neg, 0x1)) 429 | } 430 | } 431 | } 432 | 433 | impl crate::vector::Complex for cf32x4 { 434 | type RealScalar = f32; 435 | 436 | #[inline] 437 | fn conj(self) -> Self { 438 | Self(unsafe { _mm256_xor_ps(self.0, _mm256_set_ps(-0., 0., -0., 0., -0., 0., -0., 0.)) }) 439 | } 440 | 441 | #[inline] 442 | fn mul_i(self) -> Self { 443 | Self(unsafe { 444 | _mm256_addsub_ps(_mm256_setzero_ps(), _mm256_shuffle_ps(self.0, self.0, 0xb1)) 445 | }) 446 | } 447 | 448 | #[inline] 449 | fn mul_neg_i(self) -> Self { 450 | unsafe { 451 | let neg = _mm256_addsub_ps(_mm256_setzero_ps(), self.0); 452 | Self(_mm256_shuffle_ps(neg, neg, 0xb1)) 453 | } 454 | } 455 | } 456 | 457 | impl crate::vector::Complex for cf64x2 { 458 | type RealScalar = f64; 459 | 460 | #[inline] 461 | fn conj(self) -> Self { 462 | Self(unsafe { _mm256_xor_pd(self.0, _mm256_set_pd(-0., 0., -0., 0.)) }) 463 | } 464 | 465 | #[inline] 466 | fn mul_i(self) -> Self { 467 | Self(unsafe { 468 | _mm256_addsub_pd(_mm256_setzero_pd(), _mm256_shuffle_pd(self.0, self.0, 0x5)) 469 | }) 470 | } 471 | 472 | #[inline] 473 | fn mul_neg_i(self) -> Self { 474 | unsafe { 475 | let neg = _mm256_addsub_pd(_mm256_setzero_pd(), self.0); 476 | Self(_mm256_shuffle_pd(neg, neg, 0x5)) 477 | } 478 | } 479 | } 480 | -------------------------------------------------------------------------------- /generic-simd/src/arch/x86/mod.rs: -------------------------------------------------------------------------------- 1 | //! x86/x86-64 vector types. 2 | 3 | #[cfg(feature = "complex")] 4 | mod complex; 5 | #[cfg(feature = "complex")] 6 | pub use complex::*; 7 | 8 | use crate::{ 9 | arch::{generic, Token}, 10 | scalar::Scalar, 11 | shim::{Shim2, Shim4, ShimToken}, 12 | vector::{width, Native, Vector}, 13 | }; 14 | 15 | #[cfg(target_arch = "x86")] 16 | use core::arch::x86::*; 17 | #[cfg(target_arch = "x86_64")] 18 | use core::arch::x86_64::*; 19 | 20 | /// SSE4.1 instruction set token. 21 | #[derive(Copy, Clone, Debug)] 22 | pub struct Sse(()); 23 | 24 | /// AVX instruction set token. 25 | #[derive(Copy, Clone, Debug)] 26 | pub struct Avx(()); 27 | 28 | impl_token! { Sse => "sse4.1" } 29 | impl_token! { Avx => "avx" } 30 | 31 | impl core::convert::From for Sse { 32 | #[inline] 33 | fn from(_: Avx) -> Sse { 34 | unsafe { Sse::new_unchecked() } 35 | } 36 | } 37 | 38 | impl Native for f32 { 39 | type Width = width::W4; 40 | } 41 | 42 | impl Native for f64 { 43 | type Width = width::W2; 44 | } 45 | 46 | impl Native for f32 { 47 | type Width = width::W8; 48 | } 49 | 50 | impl Native for f64 { 51 | type Width = width::W4; 52 | } 53 | 54 | /// An SSE vector of `f32`s. 55 | #[derive(Clone, Copy, Debug)] 56 | #[repr(transparent)] 57 | #[allow(non_camel_case_types)] 58 | pub struct f32x4(__m128); 59 | 60 | /// An SSE vector of `f64`s. 61 | #[derive(Clone, Copy, Debug)] 62 | #[repr(transparent)] 63 | #[allow(non_camel_case_types)] 64 | pub struct f64x2(__m128d); 65 | 66 | /// An AVX vector of `f32`s. 67 | #[derive(Clone, Copy, Debug)] 68 | #[repr(transparent)] 69 | #[allow(non_camel_case_types)] 70 | pub struct f32x8(__m256); 71 | 72 | /// An AVX vector of `f64`s. 73 | #[derive(Clone, Copy, Debug)] 74 | #[repr(transparent)] 75 | #[allow(non_camel_case_types)] 76 | pub struct f64x4(__m256d); 77 | 78 | impl Scalar for f32 { 79 | type Vector = ShimToken; 80 | } 81 | 82 | impl Scalar for f32 { 83 | type Vector = ShimToken, Self, Sse>; 84 | } 85 | 86 | impl Scalar for f32 { 87 | type Vector = f32x4; 88 | } 89 | 90 | impl Scalar for f32 { 91 | type Vector = Shim2; 92 | } 93 | 94 | impl Scalar for f64 { 95 | type Vector = ShimToken; 96 | } 97 | 98 | impl Scalar for f64 { 99 | type Vector = f64x2; 100 | } 101 | 102 | impl Scalar for f64 { 103 | type Vector = Shim2; 104 | } 105 | 106 | impl Scalar for f64 { 107 | type Vector = Shim4; 108 | } 109 | 110 | impl Scalar for f32 { 111 | type Vector = ShimToken; 112 | } 113 | 114 | impl Scalar for f32 { 115 | type Vector = ShimToken, Self, Avx>; 116 | } 117 | 118 | impl Scalar for f32 { 119 | type Vector = ShimToken; 120 | } 121 | 122 | impl Scalar for f32 { 123 | type Vector = f32x8; 124 | } 125 | 126 | impl Scalar for f64 { 127 | type Vector = ShimToken; 128 | } 129 | 130 | impl Scalar for f64 { 131 | type Vector = ShimToken; 132 | } 133 | 134 | impl Scalar for f64 { 135 | type Vector = f64x4; 136 | } 137 | 138 | impl Scalar for f64 { 139 | type Vector = Shim2; 140 | } 141 | 142 | arithmetic_ops! { 143 | feature: Sse::new_unchecked(), 144 | for f32x4: 145 | add -> (_mm_add_ps), 146 | sub -> (_mm_sub_ps), 147 | mul -> (_mm_mul_ps), 148 | div -> (_mm_div_ps) 149 | } 150 | 151 | arithmetic_ops! { 152 | feature: Sse::new_unchecked(), 153 | for f64x2: 154 | add -> (_mm_add_pd), 155 | sub -> (_mm_sub_pd), 156 | mul -> (_mm_mul_pd), 157 | div -> (_mm_div_pd) 158 | } 159 | 160 | arithmetic_ops! { 161 | feature: Avx::new_unchecked(), 162 | for f32x8: 163 | add -> (_mm256_add_ps), 164 | sub -> (_mm256_sub_ps), 165 | mul -> (_mm256_mul_ps), 166 | div -> (_mm256_div_ps) 167 | } 168 | 169 | arithmetic_ops! { 170 | feature: Avx::new_unchecked(), 171 | for f64x4: 172 | add -> (_mm256_add_pd), 173 | sub -> (_mm256_sub_pd), 174 | mul -> (_mm256_mul_pd), 175 | div -> (_mm256_div_pd) 176 | } 177 | 178 | impl core::ops::Neg for f32x4 { 179 | type Output = Self; 180 | 181 | #[inline] 182 | fn neg(self) -> Self { 183 | Self(unsafe { _mm_xor_ps(self.0, _mm_set1_ps(-0.)) }) 184 | } 185 | } 186 | 187 | impl core::ops::Neg for f64x2 { 188 | type Output = Self; 189 | 190 | #[inline] 191 | fn neg(self) -> Self { 192 | Self(unsafe { _mm_xor_pd(self.0, _mm_set1_pd(-0.)) }) 193 | } 194 | } 195 | 196 | impl core::ops::Neg for f32x8 { 197 | type Output = Self; 198 | 199 | #[inline] 200 | fn neg(self) -> Self { 201 | Self(unsafe { _mm256_xor_ps(self.0, _mm256_set1_ps(-0.)) }) 202 | } 203 | } 204 | 205 | impl core::ops::Neg for f64x4 { 206 | type Output = Self; 207 | 208 | #[inline] 209 | fn neg(self) -> Self { 210 | Self(unsafe { _mm256_xor_pd(self.0, _mm256_set1_pd(-0.)) }) 211 | } 212 | } 213 | 214 | as_slice! { f32x4 } 215 | as_slice! { f32x8 } 216 | as_slice! { f64x2 } 217 | as_slice! { f64x4 } 218 | 219 | unsafe impl Vector for f32x4 { 220 | type Scalar = f32; 221 | 222 | type Token = Sse; 223 | 224 | type Width = crate::vector::width::W4; 225 | 226 | type Underlying = __m128; 227 | 228 | #[inline] 229 | fn zeroed(_: Self::Token) -> Self { 230 | Self(unsafe { _mm_setzero_ps() }) 231 | } 232 | 233 | #[inline] 234 | fn splat(_: Self::Token, from: Self::Scalar) -> Self { 235 | Self(unsafe { _mm_set1_ps(from) }) 236 | } 237 | } 238 | 239 | unsafe impl Vector for f64x2 { 240 | type Scalar = f64; 241 | 242 | type Token = Sse; 243 | 244 | type Width = crate::vector::width::W2; 245 | 246 | type Underlying = __m128d; 247 | 248 | #[inline] 249 | fn zeroed(_: Self::Token) -> Self { 250 | Self(unsafe { _mm_setzero_pd() }) 251 | } 252 | 253 | #[inline] 254 | fn splat(_: Self::Token, from: Self::Scalar) -> Self { 255 | Self(unsafe { _mm_set1_pd(from) }) 256 | } 257 | } 258 | 259 | unsafe impl Vector for f32x8 { 260 | type Scalar = f32; 261 | 262 | type Token = Avx; 263 | 264 | type Width = crate::vector::width::W8; 265 | 266 | type Underlying = __m256; 267 | 268 | #[inline] 269 | fn zeroed(_: Self::Token) -> Self { 270 | Self(unsafe { _mm256_setzero_ps() }) 271 | } 272 | 273 | #[inline] 274 | fn splat(_: Self::Token, from: Self::Scalar) -> Self { 275 | Self(unsafe { _mm256_set1_ps(from) }) 276 | } 277 | } 278 | 279 | unsafe impl Vector for f64x4 { 280 | type Scalar = f64; 281 | 282 | type Token = Avx; 283 | 284 | type Width = crate::vector::width::W4; 285 | 286 | type Underlying = __m256d; 287 | 288 | #[inline] 289 | fn zeroed(_: Self::Token) -> Self { 290 | Self(unsafe { _mm256_setzero_pd() }) 291 | } 292 | 293 | #[inline] 294 | fn splat(_: Self::Token, from: Self::Scalar) -> Self { 295 | Self(unsafe { _mm256_set1_pd(from) }) 296 | } 297 | } 298 | -------------------------------------------------------------------------------- /generic-simd/src/implementation.rs: -------------------------------------------------------------------------------- 1 | macro_rules! arithmetic_ops { 2 | { 3 | @new $type:ty, $feature:expr, $trait:ident, $func:ident, () 4 | } => { 5 | impl core::ops::$trait<$type> for $type { 6 | type Output = Self; 7 | #[allow(unused_unsafe)] 8 | #[inline] 9 | fn $func(mut self, rhs: Self) -> Self { 10 | for (a, b) in self.iter_mut().zip(rhs.iter()) { 11 | *a = core::ops::$trait::$func(*a, b); 12 | } 13 | self 14 | } 15 | } 16 | 17 | impl core::ops::$trait<<$type as $crate::vector::Vector>::Scalar> for $type { 18 | type Output = Self; 19 | #[inline] 20 | fn $func(mut self, rhs: <$type as $crate::vector::Vector>::Scalar) -> Self { 21 | for a in self.iter_mut() { 22 | *a = core::ops::$trait::$func(*a, rhs); 23 | } 24 | self 25 | } 26 | } 27 | }; 28 | { 29 | @assign $type:ty, $feature:expr, $trait:ident, $func:ident, () 30 | } => { 31 | impl core::ops::$trait<$type> for $type { 32 | #[allow(unused_unsafe)] 33 | #[inline] 34 | fn $func(&mut self, rhs: Self) { 35 | for (a, b) in self.iter_mut().zip(rhs.iter()) { 36 | core::ops::$trait::$func(a, b); 37 | } 38 | } 39 | } 40 | 41 | impl core::ops::$trait<<$type as $crate::vector::Vector>::Scalar> for $type { 42 | #[inline] 43 | fn $func(&mut self, rhs: <$type as $crate::vector::Vector>::Scalar) { 44 | for a in self.iter_mut() { 45 | core::ops::$trait::$func(a, rhs); 46 | } 47 | } 48 | } 49 | }; 50 | { 51 | @new $type:ty, $feature:expr, $trait:ident, $func:ident, ($op:path) 52 | } => { 53 | impl core::ops::$trait<$type> for $type { 54 | type Output = Self; 55 | #[allow(unused_unsafe)] 56 | #[inline] 57 | fn $func(self, rhs: Self) -> Self { 58 | Self(unsafe { $op(self.0, rhs.0) }) 59 | } 60 | } 61 | 62 | impl core::ops::$trait<<$type as $crate::vector::Vector>::Scalar> for $type { 63 | type Output = Self; 64 | #[inline] 65 | fn $func(self, rhs: <$type as $crate::vector::Vector>::Scalar) -> Self { 66 | self.$func(<$type>::splat(unsafe { $feature }, rhs)) 67 | } 68 | } 69 | }; 70 | { 71 | @assign $type:ty, $feature:expr, $trait:ident, $func:ident, ($op:path) 72 | } => { 73 | impl core::ops::$trait<$type> for $type { 74 | #[allow(unused_unsafe)] 75 | #[inline] 76 | fn $func(&mut self, rhs: Self) { 77 | self.0 = unsafe { $op(self.0, rhs.0) }; 78 | } 79 | } 80 | 81 | impl core::ops::$trait<<$type as $crate::vector::Vector>::Scalar> for $type { 82 | #[inline] 83 | fn $func(&mut self, rhs: <$type as $crate::vector::Vector>::Scalar) { 84 | self.$func(<$type>::splat(unsafe { $feature }, rhs)) 85 | } 86 | } 87 | }; 88 | { 89 | feature: $feature:expr, 90 | for $type:ty: 91 | add -> $add_expr:tt, 92 | sub -> $sub_expr:tt, 93 | mul -> $mul_expr:tt, 94 | div -> $div_expr:tt 95 | } => { 96 | impl core::iter::Sum<$type> for Option<$type> { 97 | #[inline] 98 | fn sum(mut iter: I) -> Self 99 | where 100 | I: Iterator, 101 | { 102 | if let Some(mut sum) = iter.next() { 103 | while let Some(v) = iter.next() { 104 | sum += v; 105 | } 106 | Some(sum) 107 | } else { 108 | None 109 | } 110 | } 111 | } 112 | 113 | impl core::iter::Sum<$type> for <$type as $crate::vector::Vector>::Scalar { 114 | #[inline] 115 | fn sum(iter: I) -> Self 116 | where 117 | I: Iterator, 118 | { 119 | if let Some(sums) = iter.sum::>() { 120 | sums.iter().sum() 121 | } else { 122 | Default::default() 123 | } 124 | } 125 | } 126 | 127 | impl core::iter::Product<$type> for Option<$type> { 128 | #[inline] 129 | fn product(mut iter: I) -> Self 130 | where 131 | I: Iterator, 132 | { 133 | if let Some(mut sum) = iter.next() { 134 | while let Some(v) = iter.next() { 135 | sum *= v; 136 | } 137 | Some(sum) 138 | } else { 139 | None 140 | } 141 | } 142 | } 143 | 144 | impl core::iter::Product<$type> for <$type as $crate::vector::Vector>::Scalar { 145 | #[inline] 146 | fn product(iter: I) -> Self 147 | where 148 | I: Iterator, 149 | { 150 | if let Some(sums) = iter.sum::>() { 151 | sums.iter().product() 152 | } else { 153 | Default::default() 154 | } 155 | } 156 | } 157 | 158 | arithmetic_ops!{@new $type, $feature, Add, add, $add_expr} 159 | arithmetic_ops!{@new $type, $feature, Sub, sub, $sub_expr} 160 | arithmetic_ops!{@new $type, $feature, Mul, mul, $mul_expr} 161 | arithmetic_ops!{@new $type, $feature, Div, div, $div_expr} 162 | arithmetic_ops!{@assign $type, $feature, AddAssign, add_assign, $add_expr} 163 | arithmetic_ops!{@assign $type, $feature, SubAssign, sub_assign, $sub_expr} 164 | arithmetic_ops!{@assign $type, $feature, MulAssign, mul_assign, $mul_expr} 165 | arithmetic_ops!{@assign $type, $feature, DivAssign, div_assign, $div_expr} 166 | }; 167 | } 168 | 169 | macro_rules! as_slice { 170 | { 171 | $type:ty 172 | } => { 173 | impl AsRef<[<$type as crate::vector::Vector>::Scalar]> for $type { 174 | #[inline] 175 | fn as_ref(&self) -> &[<$type as crate::vector::Vector>::Scalar] { 176 | use crate::vector::Vector; 177 | self.as_slice() 178 | } 179 | } 180 | 181 | impl AsMut<[<$type as crate::vector::Vector>::Scalar]> for $type { 182 | #[inline] 183 | fn as_mut(&mut self) -> &mut [<$type as crate::vector::Vector>::Scalar] { 184 | use crate::vector::Vector; 185 | self.as_slice_mut() 186 | } 187 | } 188 | 189 | impl core::ops::Deref for $type { 190 | type Target = [::Scalar]; 191 | #[inline] 192 | fn deref(&self) -> &Self::Target { 193 | self.as_slice() 194 | } 195 | } 196 | 197 | impl core::ops::DerefMut for $type { 198 | #[inline] 199 | fn deref_mut(&mut self) -> &mut ::Target { 200 | self.as_slice_mut() 201 | } 202 | } 203 | } 204 | } 205 | -------------------------------------------------------------------------------- /generic-simd/src/lib.rs: -------------------------------------------------------------------------------- 1 | #![cfg_attr(not(feature = "std"), no_std)] 2 | #![cfg_attr( 3 | all(feature = "nightly", target_arch = "wasm32"), 4 | feature(wasm_simd, wasm_target_feature) 5 | )] 6 | #![cfg_attr( 7 | all(feature = "nightly", target_arch = "aarch64"), 8 | feature(stdsimd, aarch64_target_feature) 9 | )] 10 | //! `generic-simd` provides safe and idiomatic zero-cost abstractions for writing explicit 11 | //! cross-platform SIMD operations. 12 | //! 13 | //! # Supported architectures 14 | //! All architectures are supported via scalar fallbacks, but the following instruction sets are 15 | //! also supported: 16 | //! * SSE4.1 (x86/x86-64) 17 | //! * AVX (x86/x86-64) 18 | //! * NEON (aarch64, with `nightly` cargo feature) 19 | //! * SIMD128 (wasm32, with `nightly` cargo feature and `simd128` target feature) 20 | //! 21 | //! The various architecture-specific types are available in the [`arch`](arch/index.html) module. 22 | //! 23 | //! # Abstractions 24 | //! Vector abstractions are provided via the traits in the [`vector`](vector/index.html) module. 25 | //! Generics that use these traits are able to utilize any of the supported instruction sets. 26 | //! 27 | //! The following example performs a vector-accelerated sum of an input slice: 28 | //! ``` 29 | //! use generic_simd::{ 30 | //! arch::Token, 31 | //! dispatch, 32 | //! scalar::ScalarExt, 33 | //! slice::SliceExt, 34 | //! vector::NativeVector, 35 | //! }; 36 | //! 37 | //! // This function provides a generic implementation for any instruction set. 38 | //! // Here we use the "native" vector type, i.e. the widest vector directly supported by the 39 | //! // architecture. 40 | //! #[inline] 41 | //! fn sum_impl(token: T, input: &[f32]) -> f32 42 | //! where 43 | //! T: Token, 44 | //! f32: ScalarExt + core::iter::Sum>, 45 | //! { 46 | //! // Use aligned loads in this example, which may be better on some architectures. 47 | //! let (start, vectors, end) = input.align_native(token); 48 | //! 49 | //! // Sum across the vector lanes, plus the unaligned portions 50 | //! vectors.iter().copied().sum::() + start.iter().chain(end).sum::() 51 | //! } 52 | //! 53 | //! // This function selects the best instruction set at runtime. 54 | //! // The "dispatch" macro compiles this function for each supported architecture. 55 | //! #[dispatch(token)] 56 | //! fn sum(input: &[f32]) -> f32 { 57 | //! sum_impl(token, input) 58 | //! } 59 | //! 60 | //! assert_eq!(sum(&[1f32; 10]), 10.); 61 | //! ``` 62 | //! 63 | //! # Vector shims 64 | //! Various instruction sets provide vectors with different widths, so shims are provided to 65 | //! create vectors of particular widths regardless of architecture. These are available in the 66 | //! [`shim`](shim/index.html) module. 67 | //! 68 | //! For example, the following function performs an [Array of Structures of Arrays](https://en.wikipedia.org/wiki/AoS_and_SoA) 69 | //! operation using arrays of 4 `f64`s regardless of instruction set: 70 | //! ``` 71 | //! use generic_simd::{ 72 | //! arch::Token, 73 | //! dispatch, 74 | //! scalar::Scalar, 75 | //! slice::Slice, 76 | //! vector::{Signed, Vector, width}, 77 | //! }; 78 | //! 79 | //! // Equivalent to an array of 4 2-dimensional coordinates, 80 | //! // but with a vectorizable memory layout. 81 | //! struct Coordinates { 82 | //! x: [f64; 4], 83 | //! y: [f64; 4], 84 | //! } 85 | //! 86 | //! // A generic mean implementation for any instruction set. 87 | //! fn mean_impl(token: T, input: &[Coordinates]) -> (f64, f64) 88 | //! where 89 | //! T: Token, 90 | //! f64: Scalar, 91 | //! >::Vector: Signed, 92 | //! { 93 | //! let mut xsum = f64::zeroed(token); 94 | //! let mut ysum = f64::zeroed(token); 95 | //! 96 | //! for Coordinates { x, y } in input { 97 | //! // read the arrays into vectors 98 | //! xsum += x.read(token); 99 | //! ysum += y.read(token); 100 | //! } 101 | //! 102 | //! // sum across the vector lanes 103 | //! ( 104 | //! xsum.iter().sum::() / (input.len() * 4) as f64, 105 | //! ysum.iter().sum::() / (input.len() * 4) as f64, 106 | //! ) 107 | //! } 108 | //! 109 | //! // Selects the best instruction set at runtime. 110 | //! #[dispatch(token)] 111 | //! fn mean(input: &[Coordinates]) -> (f64, f64) { 112 | //! mean_impl(token, input) 113 | //! } 114 | //! ``` 115 | 116 | // Re-export for use from macros. 117 | #[doc(hidden)] 118 | pub use multiversion; 119 | 120 | /// Multiversions a function over all supported instruction sets. 121 | /// 122 | /// Tagging a function with `#[dispatch(token)]` creates a version of the function for each 123 | /// supported instruction set and provides its token as `token`. 124 | /// The best supported function variant is selected at runtime. 125 | /// 126 | /// # Implementation 127 | /// This attribute is a wrapper for [`multiversion`] and supports all of its 128 | /// conditional compilation and static dispatch features. 129 | /// 130 | /// # Example 131 | /// ``` 132 | /// use generic_simd::slice::SliceExt; 133 | /// 134 | /// #[generic_simd::dispatch(token)] 135 | /// pub fn add_one(x: &mut [f32]) { 136 | /// let (start, vecs, end) = x.align_native_mut(token); 137 | /// for s in start.iter_mut().chain(end.iter_mut()) { 138 | /// *s += 1.; 139 | /// } 140 | /// 141 | /// for v in vecs { 142 | /// *v += 1.; 143 | /// } 144 | /// } 145 | /// 146 | /// #[generic_simd::dispatch(_token)] 147 | /// pub fn add_two(x: &mut [f32]) { 148 | /// // Static dispatching provided by `multiversion`. 149 | /// // This does not perform runtime feature selection and allows inlining. 150 | /// dispatch!(add_one(x)); 151 | /// dispatch!(add_one(x)); 152 | /// } 153 | /// ``` 154 | /// 155 | /// [Abstractions]: index.html#abstractions 156 | /// [Vector shims]: index.html#vector-shims 157 | /// [`multiversion`]: ../multiversion/attr.multiversion.html 158 | pub use generic_simd_macros::dispatch; 159 | 160 | #[macro_use] 161 | mod implementation; 162 | 163 | pub mod alignment; 164 | pub mod arch; 165 | pub mod pointer; 166 | pub mod scalar; 167 | pub mod shim; 168 | pub mod slice; 169 | pub mod vector; 170 | -------------------------------------------------------------------------------- /generic-simd/src/pointer.rs: -------------------------------------------------------------------------------- 1 | //! Extensions for pointers to vectors. 2 | 3 | use crate::{ 4 | scalar::Scalar, 5 | vector::{width, Native, NativeWidth, Vector}, 6 | }; 7 | 8 | /// A pointer to a vector. 9 | pub trait Pointer: Copy 10 | where 11 | Token: crate::arch::Token, 12 | Width: width::Width, 13 | { 14 | type Vector: Vector; 15 | 16 | /// Read a vector from a pointer. 17 | /// 18 | /// # Safety 19 | /// See [`read_ptr`](../vector/trait.Vector.html#method.read_ptr). 20 | unsafe fn vector_read(self, token: Token) -> Self::Vector; 21 | 22 | /// Read a vector from a vector-aligned pointer. 23 | /// 24 | /// # Safety 25 | /// See [`read_aligned_ptr`](../vector/trait.Vector.html#method.read_aligned_ptr). 26 | unsafe fn vector_read_aligned(self, token: Token) -> Self::Vector; 27 | } 28 | 29 | impl Pointer for *const T 30 | where 31 | T: Scalar, 32 | Token: crate::arch::Token, 33 | Width: width::Width, 34 | { 35 | type Vector = T::Vector; 36 | 37 | #[inline] 38 | unsafe fn vector_read(self, token: Token) -> Self::Vector { 39 | Self::Vector::read_ptr(token, self) 40 | } 41 | 42 | #[inline] 43 | unsafe fn vector_read_aligned(self, token: Token) -> Self::Vector { 44 | Self::Vector::read_aligned_ptr(token, self) 45 | } 46 | } 47 | 48 | impl Pointer for *mut T 49 | where 50 | T: Scalar, 51 | Token: crate::arch::Token, 52 | Width: width::Width, 53 | { 54 | type Vector = T::Vector; 55 | 56 | #[inline] 57 | unsafe fn vector_read(self, token: Token) -> Self::Vector { 58 | Self::Vector::read_ptr(token, self) 59 | } 60 | 61 | #[inline] 62 | unsafe fn vector_read_aligned(self, token: Token) -> Self::Vector { 63 | Self::Vector::read_aligned_ptr(token, self) 64 | } 65 | } 66 | 67 | macro_rules! pointer_impl { 68 | { 69 | $width:literal, 70 | $width_type:ty, 71 | $read_unaligned:ident, 72 | $read_aligned:ident 73 | } => { 74 | #[doc = "Read a vector with "] 75 | #[doc = $width] 76 | #[doc = " from a pointer.\n\n# Safety\nSee [`read_ptr`](../vector/trait.Vector.html#method.read_ptr)."] 77 | #[inline] 78 | unsafe fn $read_unaligned(self, token: Token) -> >::Vector { 79 | >::vector_read(self, token) 80 | } 81 | 82 | #[doc = "Read a vector with "] 83 | #[doc = $width] 84 | #[doc = " from a vector-aligned pointer.\n\n# Safety\nSee [`read_aligned_ptr`](../vector/trait.Vector.html#method.read_aligned_ptr)."] 85 | #[inline] 86 | unsafe fn $read_aligned(self, token: Token) -> >::Vector { 87 | >::vector_read_aligned(self, token) 88 | } 89 | } 90 | } 91 | 92 | /// A pointer to a vector, supporting all vector widths. 93 | pub trait PointerExt: 94 | Native 95 | + Pointer 96 | + Pointer 97 | + Pointer 98 | + Pointer 99 | + Pointer> 100 | where 101 | Token: crate::arch::Token, 102 | { 103 | pointer_impl! { "the native number of lanes", >::Width, vector_read_native, vector_read_aligned_native } 104 | pointer_impl! { "1 lane", width::W1, vector_read1, vector_read1_aligned } 105 | pointer_impl! { "2 lanes", width::W2, vector_read2, vector_read2_aligned } 106 | pointer_impl! { "4 lanes", width::W4, vector_read4, vector_read4_aligned } 107 | pointer_impl! { "8 lanes", width::W8, vector_read8, vector_read8_aligned } 108 | } 109 | 110 | impl PointerExt for T 111 | where 112 | T: Native 113 | + Pointer 114 | + Pointer 115 | + Pointer 116 | + Pointer 117 | + Pointer>, 118 | Token: crate::arch::Token, 119 | { 120 | } 121 | -------------------------------------------------------------------------------- /generic-simd/src/scalar.rs: -------------------------------------------------------------------------------- 1 | //! Extensions for scalars. 2 | 3 | use crate::vector::{width, Native, NativeWidth, Vector}; 4 | 5 | /// A scalar value. 6 | pub trait Scalar: Copy 7 | where 8 | Token: crate::arch::Token, 9 | Width: width::Width, 10 | { 11 | type Vector: Vector; 12 | 13 | /// Create a vector set to zero. 14 | /// 15 | /// See [`zeroed`](../vector/trait.Vector.html#method.zeroed). 16 | #[inline] 17 | fn zeroed(token: Token) -> Self::Vector { 18 | Self::Vector::zeroed(token) 19 | } 20 | 21 | /// Splat a scalar to a vector. 22 | /// 23 | /// See [`splat`](../vector/trait.Vector.html#tymethod.splat). 24 | #[inline] 25 | fn splat(self, token: Token) -> Self::Vector { 26 | Self::Vector::splat(token, self) 27 | } 28 | } 29 | 30 | macro_rules! scalar_impl { 31 | { 32 | $width:literal, 33 | $width_type:ty, 34 | $zeroed:ident, 35 | $splat:ident 36 | } => { 37 | #[doc = "Create a vector with "] 38 | #[doc = $width] 39 | #[doc = " set to zero.\n\nSee [`zeroed`](../vector/trait.Vector.html#method.zeroed)."] 40 | #[inline] 41 | fn $zeroed(token: Token) -> >::Vector { 42 | >::zeroed(token.into()) 43 | } 44 | 45 | #[doc = "Splat a scalar to "] 46 | #[doc = $width] 47 | #[doc = ".\n\nSee [`splat`](../vector/trait.Vector.html#tymethod.splat)."] 48 | #[inline] 49 | fn $splat(self, token: Token) -> >::Vector { 50 | >::splat(self, token.into()) 51 | } 52 | } 53 | } 54 | 55 | /// A scalar value, supporting all vector widths. 56 | pub trait ScalarExt: 57 | Native 58 | + self::Scalar 59 | + self::Scalar 60 | + self::Scalar 61 | + self::Scalar 62 | + self::Scalar> 63 | where 64 | Token: crate::arch::Token + From + Into, 65 | { 66 | scalar_impl! { "the native number of lanes", >::Width, zeroed_native, splat_native } 67 | scalar_impl! { "1 lane", width::W1, zeroed1, splat1 } 68 | scalar_impl! { "2 lanes", width::W2, zeroed2, splat2 } 69 | scalar_impl! { "4 lanes", width::W4, zeroed4, splat4 } 70 | scalar_impl! { "8 lanes", width::W8, zeroed8, splat8 } 71 | } 72 | 73 | impl ScalarExt for Scalar 74 | where 75 | Token: crate::arch::Token, 76 | Scalar: Native 77 | + self::Scalar 78 | + self::Scalar 79 | + self::Scalar 80 | + self::Scalar 81 | + self::Scalar>, 82 | { 83 | } 84 | -------------------------------------------------------------------------------- /generic-simd/src/shim/mod.rs: -------------------------------------------------------------------------------- 1 | //! Shims for non-native vectors. 2 | 3 | mod token; 4 | mod width; 5 | 6 | pub use token::*; 7 | pub use width::*; 8 | -------------------------------------------------------------------------------- /generic-simd/src/shim/token.rs: -------------------------------------------------------------------------------- 1 | use crate::arch; 2 | use crate::vector::Vector; 3 | use core::marker::PhantomData; 4 | 5 | #[cfg(feature = "complex")] 6 | use crate::vector::Complex; 7 | 8 | /// Shim that converts the associated token. 9 | #[derive(Copy, Clone, Debug)] 10 | #[repr(transparent)] 11 | pub struct ShimToken(Underlying, PhantomData<(Scalar, Token)>); 12 | 13 | unsafe impl Vector for ShimToken 14 | where 15 | Underlying: Vector, 16 | Scalar: Copy, 17 | Token: arch::Token + Into<::Token>, 18 | { 19 | type Scalar = Scalar; 20 | type Token = Token; 21 | type Width = ::Width; 22 | type Underlying = ::Underlying; 23 | 24 | #[inline] 25 | fn zeroed(token: Self::Token) -> Self { 26 | Self(Underlying::zeroed(token.into()), PhantomData) 27 | } 28 | 29 | #[inline] 30 | fn splat(token: Self::Token, from: Self::Scalar) -> Self { 31 | Self(Underlying::splat(token.into(), from), PhantomData) 32 | } 33 | } 34 | 35 | impl AsRef<[Scalar]> for ShimToken 36 | where 37 | Underlying: AsRef<[Scalar]>, 38 | { 39 | #[inline] 40 | fn as_ref(&self) -> &[Scalar] { 41 | self.0.as_ref() 42 | } 43 | } 44 | 45 | impl AsMut<[Scalar]> for ShimToken 46 | where 47 | Underlying: AsMut<[Scalar]>, 48 | { 49 | #[inline] 50 | fn as_mut(&mut self) -> &mut [Scalar] { 51 | self.0.as_mut() 52 | } 53 | } 54 | 55 | impl core::ops::Deref for ShimToken 56 | where 57 | Underlying: core::ops::Deref, 58 | { 59 | type Target = Underlying::Target; 60 | 61 | #[inline] 62 | fn deref(&self) -> &Self::Target { 63 | &self.0 64 | } 65 | } 66 | 67 | impl core::ops::DerefMut for ShimToken 68 | where 69 | Underlying: core::ops::DerefMut, 70 | { 71 | #[inline] 72 | fn deref_mut(&mut self) -> &mut ::Target { 73 | &mut self.0 74 | } 75 | } 76 | 77 | macro_rules! implement { 78 | { 79 | @op $trait:ident :: $func:ident 80 | } => { 81 | impl core::ops::$trait for ShimToken 82 | where 83 | Underlying: Copy + core::ops::$trait, 84 | { 85 | type Output = Self; 86 | 87 | #[inline] 88 | fn $func(self, rhs: Self) -> Self { 89 | Self((self.0).$func(rhs.0), PhantomData) 90 | } 91 | } 92 | 93 | impl core::ops::$trait for ShimToken 94 | where 95 | Underlying: Copy + core::ops::$trait, 96 | Scalar: Copy, 97 | { 98 | type Output = Self; 99 | 100 | #[inline] 101 | fn $func(self, rhs: Scalar) -> Self { 102 | Self((self.0).$func(rhs), PhantomData) 103 | } 104 | } 105 | }; 106 | 107 | { 108 | @op_assign $trait:ident :: $func:ident 109 | } => { 110 | impl core::ops::$trait for ShimToken 111 | where 112 | Underlying: Copy + core::ops::$trait, 113 | Scalar: Copy, 114 | { 115 | #[inline] 116 | fn $func(&mut self, rhs: Self) { 117 | (self.0).$func(rhs.0); 118 | } 119 | } 120 | 121 | impl core::ops::$trait for ShimToken 122 | where 123 | Underlying: Copy + core::ops::$trait, 124 | Scalar: Copy, 125 | { 126 | #[inline] 127 | fn $func(&mut self, rhs: Scalar) { 128 | (self.0).$func(rhs); 129 | } 130 | } 131 | }; 132 | } 133 | 134 | implement! { @op Add::add } 135 | implement! { @op Sub::sub } 136 | implement! { @op Mul::mul } 137 | implement! { @op Div::div } 138 | implement! { @op_assign AddAssign::add_assign } 139 | implement! { @op_assign SubAssign::sub_assign } 140 | implement! { @op_assign MulAssign::mul_assign } 141 | implement! { @op_assign DivAssign::div_assign } 142 | 143 | impl core::ops::Neg for ShimToken 144 | where 145 | Underlying: Copy + core::ops::Neg, 146 | { 147 | type Output = Self; 148 | 149 | #[inline] 150 | fn neg(self) -> Self { 151 | Self(-self.0, PhantomData) 152 | } 153 | } 154 | 155 | impl core::iter::Sum> 156 | for Option> 157 | where 158 | ShimToken: core::ops::AddAssign, 159 | Underlying: Copy, 160 | { 161 | #[inline] 162 | fn sum(mut iter: I) -> Self 163 | where 164 | I: Iterator>, 165 | { 166 | if let Some(mut sum) = iter.next() { 167 | for v in iter { 168 | sum += v; 169 | } 170 | Some(sum) 171 | } else { 172 | None 173 | } 174 | } 175 | } 176 | 177 | impl core::iter::Sum> 178 | for as Vector>::Scalar 179 | where 180 | Option>: 181 | core::iter::Sum>, 182 | Underlying: Vector, 183 | Scalar: Copy + core::ops::Add + Default, 184 | Token: arch::Token, 185 | Underlying::Token: From, 186 | { 187 | #[inline] 188 | fn sum(iter: I) -> Self 189 | where 190 | I: Iterator>, 191 | { 192 | let mut value = Self::default(); 193 | if let Some(sums) = iter.sum::>>() { 194 | for sum in sums.as_slice() { 195 | value = value + *sum; 196 | } 197 | } 198 | value 199 | } 200 | } 201 | 202 | impl core::iter::Product> 203 | for Option> 204 | where 205 | ShimToken: core::ops::MulAssign, 206 | Underlying: Copy, 207 | { 208 | #[inline] 209 | fn product(mut iter: I) -> Self 210 | where 211 | I: Iterator>, 212 | { 213 | if let Some(mut sum) = iter.next() { 214 | for v in iter { 215 | sum *= v; 216 | } 217 | Some(sum) 218 | } else { 219 | None 220 | } 221 | } 222 | } 223 | 224 | impl core::iter::Product> 225 | for as Vector>::Scalar 226 | where 227 | Option>: 228 | core::iter::Product>, 229 | Underlying: Vector, 230 | Scalar: Copy + core::ops::Mul + Default, 231 | Token: arch::Token, 232 | Underlying::Token: From, 233 | { 234 | #[inline] 235 | fn product(iter: I) -> Self 236 | where 237 | I: Iterator>, 238 | { 239 | let mut value = Self::default(); 240 | if let Some(products) = iter.product::>>() { 241 | for product in products.as_slice() { 242 | value = value * *product; 243 | } 244 | } 245 | value 246 | } 247 | } 248 | 249 | #[cfg(feature = "complex")] 250 | impl Complex for ShimToken, Token> 251 | where 252 | Underlying: Vector> + Complex, 253 | Real: Copy, 254 | Token: arch::Token, 255 | Underlying::Token: From, 256 | { 257 | type RealScalar = Real; 258 | 259 | #[inline] 260 | fn conj(self) -> Self { 261 | Self(self.0.conj(), PhantomData) 262 | } 263 | 264 | #[inline] 265 | fn mul_i(self) -> Self { 266 | Self(self.0.mul_i(), PhantomData) 267 | } 268 | 269 | #[inline] 270 | fn mul_neg_i(self) -> Self { 271 | Self(self.0.mul_neg_i(), PhantomData) 272 | } 273 | } 274 | -------------------------------------------------------------------------------- /generic-simd/src/shim/width.rs: -------------------------------------------------------------------------------- 1 | use crate::vector::{width, Vector}; 2 | use core::marker::PhantomData; 3 | 4 | #[cfg(feature = "complex")] 5 | use crate::vector::Complex; 6 | 7 | /// Determines the doubled width of this vector. 8 | pub trait Double { 9 | type Doubled: width::Width; 10 | } 11 | 12 | impl Double for width::W1 { 13 | type Doubled = width::W2; 14 | } 15 | 16 | impl Double for width::W2 { 17 | type Doubled = width::W4; 18 | } 19 | 20 | impl Double for width::W4 { 21 | type Doubled = width::W8; 22 | } 23 | 24 | /// Shim that doubles the width of a vector. 25 | #[derive(Copy, Clone, Debug)] 26 | #[repr(transparent)] 27 | pub struct Shim2([Underlying; 2], PhantomData); 28 | 29 | /// Shim that quadruples the width of a vector. 30 | pub type Shim4 = Shim2, Scalar>; 31 | 32 | /// Shim that octuples the width of a vector. 33 | pub type Shim8 = Shim4, Scalar>; 34 | 35 | unsafe impl Vector for Shim2 36 | where 37 | Underlying: Vector, 38 | Underlying::Width: Double, 39 | Scalar: Copy, 40 | { 41 | type Scalar = Scalar; 42 | type Token = ::Token; 43 | type Width = ::Doubled; 44 | type Underlying = [::Underlying; 2]; 45 | 46 | #[inline] 47 | fn zeroed(token: Self::Token) -> Self { 48 | Self([Underlying::zeroed(token); 2], PhantomData) 49 | } 50 | 51 | #[inline] 52 | fn splat(token: Self::Token, from: Self::Scalar) -> Self { 53 | Self([Underlying::splat(token, from); 2], PhantomData) 54 | } 55 | } 56 | 57 | impl AsRef<[Scalar]> for Shim2 58 | where 59 | Underlying: Vector, 60 | Underlying::Width: Double, 61 | Scalar: Copy, 62 | { 63 | #[inline] 64 | fn as_ref(&self) -> &[Scalar] { 65 | self.as_slice() 66 | } 67 | } 68 | 69 | impl AsMut<[Scalar]> for Shim2 70 | where 71 | Underlying: Vector, 72 | Underlying::Width: Double, 73 | Scalar: Copy, 74 | { 75 | #[inline] 76 | fn as_mut(&mut self) -> &mut [Scalar] { 77 | self.as_slice_mut() 78 | } 79 | } 80 | 81 | impl core::ops::Deref for Shim2 82 | where 83 | Underlying: Vector, 84 | Underlying::Width: Double, 85 | Scalar: Copy, 86 | { 87 | type Target = [Scalar]; 88 | 89 | #[inline] 90 | fn deref(&self) -> &Self::Target { 91 | self.as_slice() 92 | } 93 | } 94 | 95 | impl core::ops::DerefMut for Shim2 96 | where 97 | Underlying: Vector, 98 | Underlying::Width: Double, 99 | Scalar: Copy, 100 | { 101 | #[inline] 102 | fn deref_mut(&mut self) -> &mut ::Target { 103 | self.as_slice_mut() 104 | } 105 | } 106 | 107 | macro_rules! implement { 108 | { 109 | @op $trait:ident :: $func:ident 110 | } => { 111 | impl core::ops::$trait for Shim2 112 | where 113 | Underlying: Copy + core::ops::$trait, 114 | { 115 | type Output = Self; 116 | 117 | #[inline] 118 | fn $func(self, rhs: Self) -> Self { 119 | Self([self.0[0].$func(rhs.0[0]), self.0[1].$func(rhs.0[1])], PhantomData) 120 | } 121 | } 122 | 123 | impl core::ops::$trait for Shim2 124 | where 125 | Underlying: Copy + core::ops::$trait, 126 | Scalar: Copy, 127 | { 128 | type Output = Self; 129 | 130 | #[inline] 131 | fn $func(self, rhs: Scalar) -> Self { 132 | Self([self.0[0].$func(rhs), self.0[1].$func(rhs)], PhantomData) 133 | } 134 | } 135 | }; 136 | 137 | { 138 | @op_assign $trait:ident :: $func:ident 139 | } => { 140 | impl core::ops::$trait for Shim2 141 | where 142 | Underlying: Copy + core::ops::$trait, 143 | Scalar: Copy, 144 | { 145 | #[inline] 146 | fn $func(&mut self, rhs: Self) { 147 | self.0[0].$func(rhs.0[0]); 148 | self.0[1].$func(rhs.0[1]); 149 | } 150 | } 151 | 152 | impl core::ops::$trait for Shim2 153 | where 154 | Underlying: Copy + core::ops::$trait, 155 | Scalar: Copy, 156 | { 157 | #[inline] 158 | fn $func(&mut self, rhs: Scalar) { 159 | self.0[0].$func(rhs); 160 | self.0[1].$func(rhs); 161 | } 162 | } 163 | }; 164 | } 165 | 166 | implement! { @op Add::add } 167 | implement! { @op Sub::sub } 168 | implement! { @op Mul::mul } 169 | implement! { @op Div::div } 170 | implement! { @op_assign AddAssign::add_assign } 171 | implement! { @op_assign SubAssign::sub_assign } 172 | implement! { @op_assign MulAssign::mul_assign } 173 | implement! { @op_assign DivAssign::div_assign } 174 | 175 | impl core::ops::Neg for Shim2 176 | where 177 | Underlying: Copy + core::ops::Neg, 178 | { 179 | type Output = Self; 180 | 181 | #[inline] 182 | fn neg(self) -> Self { 183 | Self([-self.0[0], -self.0[1]], PhantomData) 184 | } 185 | } 186 | 187 | impl core::iter::Sum> 188 | for Option> 189 | where 190 | Shim2: core::ops::AddAssign, 191 | Underlying: Copy, 192 | { 193 | #[inline] 194 | fn sum(mut iter: I) -> Self 195 | where 196 | I: Iterator>, 197 | { 198 | if let Some(mut sum) = iter.next() { 199 | for v in iter { 200 | sum += v; 201 | } 202 | Some(sum) 203 | } else { 204 | None 205 | } 206 | } 207 | } 208 | 209 | impl core::iter::Sum> 210 | for as Vector>::Scalar 211 | where 212 | Option>: core::iter::Sum>, 213 | Underlying: Vector, 214 | Underlying::Width: Double, 215 | Scalar: Copy + core::ops::Add + Default, 216 | { 217 | #[inline] 218 | fn sum(iter: I) -> Self 219 | where 220 | I: Iterator>, 221 | { 222 | let mut value = Self::default(); 223 | if let Some(sums) = iter.sum::>>() { 224 | for sum in sums.as_slice() { 225 | value = value + *sum; 226 | } 227 | } 228 | value 229 | } 230 | } 231 | 232 | impl core::iter::Product> 233 | for Option> 234 | where 235 | Shim2: core::ops::MulAssign, 236 | Underlying: Copy, 237 | { 238 | #[inline] 239 | fn product(mut iter: I) -> Self 240 | where 241 | I: Iterator>, 242 | { 243 | if let Some(mut sum) = iter.next() { 244 | for v in iter { 245 | sum *= v; 246 | } 247 | Some(sum) 248 | } else { 249 | None 250 | } 251 | } 252 | } 253 | 254 | impl core::iter::Product> 255 | for as Vector>::Scalar 256 | where 257 | Option>: core::iter::Product>, 258 | Underlying: Vector, 259 | Underlying::Width: Double, 260 | Scalar: Copy + core::ops::Mul + Default, 261 | { 262 | #[inline] 263 | fn product(iter: I) -> Self 264 | where 265 | I: Iterator>, 266 | { 267 | let mut value = Self::default(); 268 | if let Some(products) = iter.product::>>() { 269 | for product in products.as_slice() { 270 | value = value * *product; 271 | } 272 | } 273 | value 274 | } 275 | } 276 | 277 | #[cfg(feature = "complex")] 278 | impl Complex for Shim2> 279 | where 280 | Underlying: Vector> + Complex, 281 | Underlying::Width: Double, 282 | Real: Copy, 283 | { 284 | type RealScalar = Real; 285 | 286 | #[inline] 287 | fn conj(self) -> Self { 288 | Self([self.0[0].conj(), self.0[1].conj()], PhantomData) 289 | } 290 | 291 | #[inline] 292 | fn mul_i(self) -> Self { 293 | Self([self.0[0].mul_i(), self.0[1].mul_i()], PhantomData) 294 | } 295 | 296 | #[inline] 297 | fn mul_neg_i(self) -> Self { 298 | Self([self.0[0].mul_neg_i(), self.0[1].mul_neg_i()], PhantomData) 299 | } 300 | } 301 | -------------------------------------------------------------------------------- /generic-simd/src/slice.rs: -------------------------------------------------------------------------------- 1 | //! Extensions for slices of vectors. 2 | 3 | use crate::arch::Token; 4 | use crate::{ 5 | scalar::Scalar, 6 | vector::{width, Native, NativeWidth, Vector}, 7 | }; 8 | use core::marker::PhantomData; 9 | 10 | /// A slice of scalars. 11 | pub trait Slice 12 | where 13 | Token: crate::arch::Token, 14 | Width: width::Width, 15 | { 16 | type Vector: Vector; 17 | 18 | /// Read a vector from a slice without checking the length. 19 | /// 20 | /// # Safety 21 | /// See [`read_unchecked`](../vector/trait.Vector.html#method.read_ptr). 22 | unsafe fn read_unchecked(&self, token: Token) -> Self::Vector; 23 | 24 | /// Read a vector from a slice. 25 | /// 26 | /// See [`read`](../vector/trait.Vector.html#method.read). 27 | fn read(&self, token: Token) -> Self::Vector; 28 | 29 | /// Extract a slice of aligned vectors, as if by [`align_to`]. 30 | /// 31 | /// [`align_to`]: https://doc.rust-lang.org/std/primitive.slice.html#method.align_to 32 | #[allow(clippy::type_complexity)] 33 | fn align( 34 | &self, 35 | #[allow(unused_variables)] token: Token, 36 | ) -> ( 37 | &[::Scalar], 38 | &[Self::Vector], 39 | &[::Scalar], 40 | ); 41 | 42 | /// Extract a slice of aligned mutable vectors, as if by [`align_to_mut`]. 43 | /// 44 | /// [`align_to_mut`]: https://doc.rust-lang.org/std/primitive.slice.html#method.align_to_mut 45 | #[allow(clippy::type_complexity)] 46 | fn align_mut( 47 | &mut self, 48 | #[allow(unused_variables)] token: Token, 49 | ) -> ( 50 | &mut [::Scalar], 51 | &mut [Self::Vector], 52 | &mut [::Scalar], 53 | ); 54 | 55 | /// Create a slice of overlapping vectors from a slice of scalars. 56 | fn overlapping(&self, token: Token) -> Overlapping<'_, Self::Vector>; 57 | 58 | /// Create a mutable slice of overlapping vectors from a slice of scalars. 59 | fn overlapping_mut(&mut self, token: Token) -> OverlappingMut<'_, Self::Vector>; 60 | } 61 | 62 | impl Slice for [T] 63 | where 64 | T: Scalar, 65 | Token: crate::arch::Token, 66 | Width: width::Width, 67 | { 68 | type Vector = T::Vector; 69 | 70 | #[inline] 71 | unsafe fn read_unchecked(&self, token: Token) -> Self::Vector { 72 | Self::Vector::read_unchecked(token, self) 73 | } 74 | 75 | #[inline] 76 | fn read(&self, token: Token) -> Self::Vector { 77 | Self::Vector::read(token, self) 78 | } 79 | 80 | #[allow(clippy::type_complexity)] 81 | #[inline] 82 | fn align( 83 | &self, 84 | #[allow(unused_variables)] token: Token, 85 | ) -> ( 86 | &[::Scalar], 87 | &[Self::Vector], 88 | &[::Scalar], 89 | ) { 90 | unsafe { self.align_to() } 91 | } 92 | 93 | #[allow(clippy::type_complexity)] 94 | #[inline] 95 | fn align_mut( 96 | &mut self, 97 | #[allow(unused_variables)] token: Token, 98 | ) -> ( 99 | &mut [::Scalar], 100 | &mut [Self::Vector], 101 | &mut [::Scalar], 102 | ) { 103 | unsafe { self.align_to_mut() } 104 | } 105 | 106 | #[inline] 107 | fn overlapping(&self, token: Token) -> Overlapping<'_, Self::Vector> { 108 | Overlapping::new(token, self) 109 | } 110 | 111 | #[inline] 112 | fn overlapping_mut(&mut self, token: Token) -> OverlappingMut<'_, Self::Vector> { 113 | OverlappingMut::new(token, self) 114 | } 115 | } 116 | 117 | macro_rules! slice_impl { 118 | { 119 | $width:literal, 120 | $width_type:ty, 121 | $read_unchecked:ident, 122 | $read:ident, 123 | $align:ident, 124 | $align_mut:ident, 125 | $overlapping:ident, 126 | $overlapping_mut:ident 127 | } => { 128 | #[doc = "Read a vector with "] 129 | #[doc = $width] 130 | #[doc = " from a slice without checking the length.\n\nSee [`read_unchecked`](../vector/trait.Vector.html#method.read_ptr)."] 131 | #[inline] 132 | unsafe fn $read_unchecked(&self, token: Token) -> >::Vector { 133 | >::read_unchecked(self, token) 134 | } 135 | 136 | #[doc = "Read a vector with "] 137 | #[doc = $width] 138 | #[doc = " from a slice.\n\nSee [`read`](../vector/trait.Vector.html#method.read)."] 139 | #[inline] 140 | fn $read(&self, token: Token) -> >::Vector { 141 | >::read(self, token) 142 | } 143 | 144 | #[doc = "Align a slice of scalars to vectors with "] 145 | #[doc = $width] 146 | #[doc = ".\n\nSee [`align`](trait.Slice.html#tymethod.align)."] 147 | #[allow(clippy::type_complexity)] 148 | #[inline] 149 | fn $align(&self, token: Token) -> 150 | ( 151 | &[<>::Vector as Vector>::Scalar], 152 | &[>::Vector], 153 | &[<>::Vector as Vector>::Scalar], 154 | ) { 155 | >::align(self, token) 156 | } 157 | 158 | #[doc = "Align a slice of scalars to vectors with "] 159 | #[doc = $width] 160 | #[doc = ".\n\nSee [`align_mut`](trait.Slice.html#tymethod.align_mut)."] 161 | #[allow(clippy::type_complexity)] 162 | #[inline] 163 | fn $align_mut(&mut self, token: Token) -> 164 | ( 165 | &mut [<>::Vector as Vector>::Scalar], 166 | &mut [>::Vector], 167 | &mut [<>::Vector as Vector>::Scalar], 168 | ){ 169 | >::align_mut(self, token) 170 | } 171 | 172 | #[doc = "Create a slice of overlapping vectors of "] 173 | #[doc = $width] 174 | #[doc = "from a slice of scalars.\n\nSee [`overlapping`](trait.Slice.html#tymethod.overlapping)."] 175 | #[inline] 176 | fn $overlapping(&self, token: Token) -> Overlapping<'_, >::Vector> { 177 | >::overlapping(self, token) 178 | } 179 | 180 | #[doc = "Create a mutable slice of overlapping vectors of "] 181 | #[doc = $width] 182 | #[doc = "from a slice of scalars.\n\nSee [`overlapping_mut`](trait.Slice.html#tymethod.overlapping_mut)."] 183 | #[inline] 184 | fn $overlapping_mut( 185 | &mut self, 186 | token: Token, 187 | ) -> OverlappingMut<'_, >::Vector> { 188 | >::overlapping_mut(self, token) 189 | } 190 | } 191 | } 192 | 193 | impl Native for [T] 194 | where 195 | T: Native, 196 | { 197 | type Width = T::Width; 198 | } 199 | 200 | /// A slice of scalars, supporting all vector widths. 201 | pub trait SliceExt: 202 | Native 203 | + Slice 204 | + Slice 205 | + Slice 206 | + Slice 207 | + Slice> 208 | where 209 | Token: crate::arch::Token, 210 | { 211 | slice_impl! { "the native number of lanes", >::Width, read_unchecked_native, read_native, align_native, align_native_mut, overlapping_native, overlapping_native_mut } 212 | slice_impl! { "1 lane", width::W1, read_unchecked1, read1, align1, align1_mut, overlapping1, overlapping1_mut } 213 | slice_impl! { "2 lanes", width::W2, read_unchecked2, read2, align2, align2_mut, overlapping2, overlapping2_mut } 214 | slice_impl! { "4 lanes", width::W4, read_unchecked4, read4, align4, align4_mut, overlapping4, overlapping4_mut } 215 | slice_impl! { "8 lanes", width::W8, read_unchecked8, read8, align8, align8_mut, overlapping8, overlapping8_mut } 216 | } 217 | 218 | impl SliceExt for T 219 | where 220 | T: ?Sized 221 | + Native 222 | + Slice 223 | + Slice 224 | + Slice 225 | + Slice 226 | + Slice>, 227 | Token: crate::arch::Token, 228 | { 229 | } 230 | 231 | /// Wrapper for producing a mutable reference from an unaligned pointer. 232 | pub struct RefMut<'a, V> 233 | where 234 | V: Vector, 235 | { 236 | source: *mut V::Scalar, 237 | temp: V, 238 | lifetime: PhantomData<&'a V::Scalar>, 239 | } 240 | 241 | impl<'a, V> RefMut<'a, V> 242 | where 243 | V: Vector, 244 | { 245 | #[inline] 246 | fn new(token: V::Token, source: *mut V::Scalar) -> Self { 247 | Self { 248 | source, 249 | temp: V::zeroed(token), 250 | lifetime: PhantomData, 251 | } 252 | } 253 | } 254 | 255 | impl<'a, V> core::ops::Deref for RefMut<'a, V> 256 | where 257 | V: Vector, 258 | { 259 | type Target = V; 260 | 261 | #[inline] 262 | fn deref(&self) -> &V { 263 | &self.temp 264 | } 265 | } 266 | 267 | impl<'a, V> core::ops::DerefMut for RefMut<'a, V> 268 | where 269 | V: Vector, 270 | { 271 | #[inline] 272 | fn deref_mut(&mut self) -> &mut V { 273 | &mut self.temp 274 | } 275 | } 276 | 277 | impl<'a, V> core::ops::Drop for RefMut<'a, V> 278 | where 279 | V: Vector, 280 | { 281 | #[inline] 282 | fn drop(&mut self) { 283 | unsafe { 284 | self.temp.write_ptr(self.source); 285 | } 286 | } 287 | } 288 | 289 | /// Wrapper for indexing into overlapping vectors. 290 | pub struct Overlapping<'a, V> 291 | where 292 | V: Vector, 293 | { 294 | slice: &'a [V::Scalar], 295 | phantom: PhantomData, 296 | } 297 | 298 | #[allow(clippy::len_without_is_empty)] 299 | impl<'a, V> Overlapping<'a, V> 300 | where 301 | V: Vector, 302 | { 303 | /// Create a new overlapping vector slice. 304 | #[inline] 305 | pub fn new( 306 | #[allow(unused_variables)] token: impl Into, 307 | slice: &'a [V::Scalar], 308 | ) -> Self { 309 | assert!( 310 | slice.len() >= V::width(), 311 | "slice must be at least as wide as the vector" 312 | ); 313 | Self { 314 | slice, 315 | phantom: PhantomData, 316 | } 317 | } 318 | 319 | /// Returns the number of overlapping vectors. 320 | /// 321 | /// Equal to `slice.len() - V::width() + 1`. 322 | #[inline] 323 | pub fn len(&self) -> usize { 324 | self.slice.len() - V::width() + 1 325 | } 326 | 327 | /// Returns the vector offset `index` into the slice of scalars. 328 | #[inline] 329 | pub fn get(&self, index: usize) -> Option { 330 | if index < self.len() { 331 | Some(unsafe { self.get_unchecked(index) }) 332 | } else { 333 | None 334 | } 335 | } 336 | 337 | /// Returns the vector offset `index` into the slice of scalars. 338 | /// 339 | /// # Safety 340 | /// Index must be less than `len()`, i.e. the underlying slice must be at least `index 341 | /// + V::width()` long. 342 | #[inline] 343 | pub unsafe fn get_unchecked(&self, index: usize) -> V 344 | where 345 | V: Vector, 346 | { 347 | V::read_ptr(V::Token::new_unchecked(), self.slice.as_ptr().add(index)) 348 | } 349 | } 350 | 351 | /// Wrapper for indexing into overlapping mutable vectors. 352 | pub struct OverlappingMut<'a, V> 353 | where 354 | V: Vector, 355 | { 356 | slice: &'a mut [V::Scalar], 357 | phantom: PhantomData, 358 | } 359 | 360 | #[allow(clippy::len_without_is_empty)] 361 | impl<'a, V> OverlappingMut<'a, V> 362 | where 363 | V: Vector, 364 | { 365 | /// Create a new overlapping vector slice. 366 | #[inline] 367 | pub fn new( 368 | #[allow(unused_variables)] token: impl Into, 369 | slice: &'a mut [V::Scalar], 370 | ) -> Self { 371 | assert!( 372 | slice.len() >= V::width(), 373 | "slice must be at least as wide as the vector" 374 | ); 375 | Self { 376 | slice, 377 | phantom: PhantomData, 378 | } 379 | } 380 | 381 | /// Returns the number of overlapping vectors. 382 | /// 383 | /// Equal to `slice.len() - V::width() + 1`. 384 | #[inline] 385 | pub fn len(&self) -> usize { 386 | self.slice.len() - V::width() + 1 387 | } 388 | 389 | /// Returns the vector offset `index` into the slice of scalars. 390 | #[inline] 391 | pub fn get(&self, index: usize) -> Option { 392 | if index < self.len() { 393 | Some(unsafe { self.get_unchecked(index) }) 394 | } else { 395 | None 396 | } 397 | } 398 | 399 | /// Returns the vector offset `index` into the slice of scalars. 400 | /// 401 | /// # Safety 402 | /// Index must be less than `len()`, i.e. the underlying slice must be at least `index 403 | /// + V::width()` long. 404 | #[inline] 405 | pub unsafe fn get_unchecked(&self, index: usize) -> V { 406 | V::read_ptr(V::Token::new_unchecked(), self.slice.as_ptr().add(index)) 407 | } 408 | 409 | /// Returns the mutable vector offset `index` into the slice of scalars. 410 | #[inline] 411 | pub fn get_mut(&'a mut self, index: usize) -> Option> { 412 | if index < self.len() { 413 | Some(unsafe { self.get_unchecked_mut(index) }) 414 | } else { 415 | None 416 | } 417 | } 418 | 419 | /// Returns the mutable vector offset `index` into the slice of scalars. 420 | /// 421 | /// # Safety 422 | /// Index must be less than `len()`, i.e. the underlying slice must be at least `index 423 | /// + V::width()` long. 424 | #[inline] 425 | pub unsafe fn get_unchecked_mut(&'a mut self, index: usize) -> RefMut<'a, V> { 426 | RefMut::new( 427 | V::Token::new_unchecked(), 428 | self.slice.as_mut_ptr().add(index), 429 | ) 430 | } 431 | } 432 | -------------------------------------------------------------------------------- /generic-simd/src/vector/mod.rs: -------------------------------------------------------------------------------- 1 | //! Vector type interfaces. 2 | 3 | pub mod width; 4 | 5 | use crate::arch::Token; 6 | use crate::scalar::Scalar; 7 | use core::ops::{ 8 | Add, AddAssign, Deref, DerefMut, Div, DivAssign, Mul, MulAssign, Neg, Sub, SubAssign, 9 | }; 10 | 11 | /// Indicates the widest native vector. 12 | pub trait Native { 13 | type Width: width::Width; 14 | } 15 | 16 | /// Convenience type for the widest native vector size. 17 | pub type NativeWidth = >::Width; 18 | 19 | /// Convenience type for the widest native vector. 20 | pub type NativeVector = VectorOf, Token>; 21 | 22 | /// Convenience type for the vector with a particular width. 23 | pub type VectorOf = >::Vector; 24 | 25 | /// The fundamental vector type. 26 | /// 27 | /// # Safety 28 | /// This trait may only be implemented for types that have the memory layout of an array of 29 | /// `Scalar` with length `width()`. 30 | pub unsafe trait Vector: Copy { 31 | /// The type of elements in the vector. 32 | type Scalar: Copy; 33 | 34 | /// The token that proves support for this vector on the CPU. 35 | type Token: Token; 36 | 37 | /// The number of elements in the vector. 38 | type Width: width::Width; 39 | 40 | /// The underlying type 41 | type Underlying: Copy; 42 | 43 | /// Returns the number of lanes. 44 | #[inline] 45 | fn width() -> usize { 46 | ::VALUE 47 | } 48 | 49 | /// Creates a new instance of `Token` from a vector. 50 | #[inline] 51 | fn to_token(self) -> Self::Token { 52 | unsafe { Self::Token::new_unchecked() } 53 | } 54 | 55 | /// Returns a slice containing the vector. 56 | #[inline] 57 | fn as_slice(&self) -> &[Self::Scalar] { 58 | unsafe { core::slice::from_raw_parts(self as *const _ as *const _, Self::width()) } 59 | } 60 | 61 | /// Returns a mutable slice containing the vector. 62 | #[inline] 63 | fn as_slice_mut(&mut self) -> &mut [Self::Scalar] { 64 | unsafe { core::slice::from_raw_parts_mut(self as *mut _ as *mut _, Self::width()) } 65 | } 66 | 67 | /// Converts this vector to its underlying type. 68 | #[inline] 69 | fn to_underlying(self) -> Self::Underlying { 70 | assert_eq!( 71 | ( 72 | core::mem::size_of::(), 73 | core::mem::align_of::(), 74 | ), 75 | (core::mem::align_of::(), core::mem::size_of::(),) 76 | ); 77 | unsafe { core::mem::transmute_copy(&self) } 78 | } 79 | 80 | /// Converts the underlying type to a vector. 81 | #[inline] 82 | fn from_underlying( 83 | #[allow(unused_variables)] token: Self::Token, 84 | underlying: Self::Underlying, 85 | ) -> Self { 86 | assert_eq!( 87 | ( 88 | core::mem::size_of::(), 89 | core::mem::align_of::(), 90 | ), 91 | (core::mem::align_of::(), core::mem::size_of::(),) 92 | ); 93 | unsafe { core::mem::transmute_copy(&underlying) } 94 | } 95 | 96 | /// Read from a pointer. 97 | /// 98 | /// # Safety 99 | /// * `from` must point to an array of length at least `width()`. 100 | #[inline] 101 | unsafe fn read_ptr( 102 | #[allow(unused_variables)] token: Self::Token, 103 | from: *const Self::Scalar, 104 | ) -> Self { 105 | (from as *const Self).read_unaligned() 106 | } 107 | 108 | /// Read from a vector-aligned pointer. 109 | /// 110 | /// # Safety 111 | /// * `from` must point to an array of length at least `width()`. 112 | /// * `from` must be aligned for the vector type. 113 | #[inline] 114 | unsafe fn read_aligned_ptr( 115 | #[allow(unused_variables)] token: Self::Token, 116 | from: *const Self::Scalar, 117 | ) -> Self { 118 | (from as *const Self).read() 119 | } 120 | 121 | /// Read from a vector-aligned pointer. 122 | 123 | /// Read from a slice without checking the length. 124 | /// 125 | /// # Safety 126 | /// * `from` be length at least `width()`. 127 | #[inline] 128 | unsafe fn read_unchecked(token: Self::Token, from: &[Self::Scalar]) -> Self { 129 | Self::read_ptr(token, from.as_ptr()) 130 | } 131 | 132 | /// Read from a slice. 133 | /// 134 | /// # Panic 135 | /// Panics if the length of `from` is less than `width()`. 136 | #[inline] 137 | fn read(token: Self::Token, from: &[Self::Scalar]) -> Self { 138 | assert!( 139 | from.len() >= Self::width(), 140 | "source not larget enough to load vector" 141 | ); 142 | unsafe { Self::read_unchecked(token, from) } 143 | } 144 | 145 | /// Write to a pointer. 146 | /// 147 | /// # Safety 148 | /// `from` must point to an array of length at least `width()` 149 | #[inline] 150 | unsafe fn write_ptr(self, to: *mut Self::Scalar) { 151 | (to as *mut Self).write_unaligned(self); 152 | } 153 | 154 | /// Write to a pointer. 155 | /// 156 | /// # Safety 157 | /// `from` must point to an array of length at least `width()` 158 | /// `from` must be aligned for the vector type. 159 | #[inline] 160 | unsafe fn write_aligned_ptr(self, to: *mut Self::Scalar) { 161 | (to as *mut Self).write(self); 162 | } 163 | 164 | /// Write to a slice without checking the length. 165 | /// 166 | /// # Safety 167 | /// `from` must be length at least `width()`. 168 | #[inline] 169 | unsafe fn write_unchecked(self, to: &mut [Self::Scalar]) { 170 | self.write_ptr(to.as_mut_ptr()); 171 | } 172 | 173 | /// Write to a slice. 174 | /// 175 | /// # Panics 176 | /// Panics if the length of `from` is less than `width()`. 177 | #[inline] 178 | fn write(self, to: &mut [Self::Scalar]) { 179 | assert!( 180 | to.len() >= Self::width(), 181 | "destination not large enough to store vector" 182 | ); 183 | unsafe { self.write_unchecked(to) }; 184 | } 185 | 186 | /// Create a new vector with each lane containing zeroes. 187 | fn zeroed(token: Self::Token) -> Self; 188 | 189 | /// Create a new vector with each lane containing the provided value. 190 | fn splat(token: Self::Token, from: Self::Scalar) -> Self; 191 | } 192 | 193 | /// A supertrait for vectors supporting typical arithmetic operations. 194 | pub trait Ops: 195 | Vector 196 | + AsRef<[::Scalar]> 197 | + AsMut<[::Scalar]> 198 | + Deref::Scalar]> 199 | + DerefMut 200 | + Add 201 | + Add<::Scalar, Output = Self> 202 | + AddAssign 203 | + AddAssign<::Scalar> 204 | + Sub 205 | + Sub<::Scalar, Output = Self> 206 | + SubAssign 207 | + SubAssign<::Scalar> 208 | + Mul 209 | + Mul<::Scalar, Output = Self> 210 | + MulAssign 211 | + MulAssign<::Scalar> 212 | + Div 213 | + Div<::Scalar, Output = Self> 214 | + DivAssign 215 | + DivAssign<::Scalar> 216 | { 217 | } 218 | impl Ops for V where 219 | V: Vector 220 | + AsRef<[::Scalar]> 221 | + AsMut<[::Scalar]> 222 | + Deref::Scalar]> 223 | + DerefMut 224 | + Add 225 | + Add<::Scalar, Output = V> 226 | + AddAssign 227 | + AddAssign<::Scalar> 228 | + Sub 229 | + Sub<::Scalar, Output = V> 230 | + SubAssign 231 | + SubAssign<::Scalar> 232 | + Mul 233 | + Mul<::Scalar, Output = V> 234 | + MulAssign 235 | + MulAssign<::Scalar> 236 | + Div 237 | + Div<::Scalar, Output = V> 238 | + DivAssign 239 | + DivAssign<::Scalar> 240 | { 241 | } 242 | 243 | /// A supertrait for vectors that allow arithmetic operations over signed types. 244 | pub trait Signed: Ops + Neg {} 245 | impl Signed for V where V: Ops + Neg {} 246 | 247 | /// Complex valued vectors. 248 | pub trait Complex: Signed { 249 | /// The real scalar type. 250 | type RealScalar: Copy; 251 | 252 | /// Conjugate. 253 | fn conj(self) -> Self; 254 | 255 | /// Multiply by i. 256 | fn mul_i(self) -> Self; 257 | 258 | /// Multiply by -i. 259 | fn mul_neg_i(self) -> Self; 260 | } 261 | -------------------------------------------------------------------------------- /generic-simd/src/vector/width.rs: -------------------------------------------------------------------------------- 1 | //! Types indicating widths of vectors. 2 | 3 | /// Indicates the width of a vector. 4 | pub trait Width { 5 | const VALUE: usize; 6 | } 7 | 8 | /// Indicates a vector contains 1 lane. 9 | pub struct W1; 10 | 11 | /// Indicates a vector contains 2 lanes. 12 | pub struct W2; 13 | 14 | /// Indicates a vector contains 4 lanes. 15 | pub struct W4; 16 | 17 | /// Indicates a vector contains 8 lanes. 18 | pub struct W8; 19 | 20 | impl Width for W1 { 21 | const VALUE: usize = 1; 22 | } 23 | 24 | impl Width for W2 { 25 | const VALUE: usize = 2; 26 | } 27 | 28 | impl Width for W4 { 29 | const VALUE: usize = 4; 30 | } 31 | 32 | impl Width for W8 { 33 | const VALUE: usize = 8; 34 | } 35 | --------------------------------------------------------------------------------