├── .github
    └── workflows
    │   └── ci.yml
├── .gitignore
├── CHANGELOG.md
├── Cargo.toml
├── LICENSE-APACHE
├── LICENSE-MIT
├── README.md
├── generic-simd-macros
    ├── Cargo.toml
    ├── LICENSE-APACHE
    ├── LICENSE-MIT
    ├── README.md
    └── src
    │   └── lib.rs
├── generic-simd-test
    ├── Cargo.toml
    ├── src
    │   └── lib.rs
    └── webdriver.json
└── generic-simd
    ├── Cargo.toml
    ├── LICENSE-APACHE
    ├── LICENSE-MIT
    ├── README.md
    └── src
        ├── alignment.rs
        ├── arch
            ├── arm
            │   ├── complex.rs
            │   └── mod.rs
            ├── generic.rs
            ├── mod.rs
            ├── wasm
            │   ├── complex.rs
            │   └── mod.rs
            └── x86
            │   ├── complex.rs
            │   └── mod.rs
        ├── implementation.rs
        ├── lib.rs
        ├── pointer.rs
        ├── scalar.rs
        ├── shim
            ├── mod.rs
            ├── token.rs
            └── width.rs
        ├── slice.rs
        └── vector
            ├── mod.rs
            └── width.rs


/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
  1 | name: Build
  2 | 
  3 | on: [push]
  4 | 
  5 | jobs:
  6 |   test:
  7 |     strategy:
  8 |       matrix:
  9 |         version:
 10 |           - 1.42.0
 11 |           - stable
 12 |           - beta
 13 |           - nightly
 14 |         os:
 15 |           - macos-latest
 16 |           - ubuntu-latest
 17 |           - windows-latest
 18 |         features:
 19 |           - []
 20 |           - [alloc]
 21 |           - [complex]
 22 |           - [std]
 23 |           - [std, alloc]
 24 |           - [std, complex]
 25 | 
 26 |     runs-on: ${{ matrix.os }}
 27 |     
 28 |     steps:
 29 |     - uses: actions/checkout@v1
 30 |     - name: Install toolchain
 31 |       uses: actions-rs/toolchain@v1
 32 |       with:
 33 |         toolchain: ${{ matrix.version }}
 34 |         override: true
 35 |         profile: minimal
 36 |     - name: Run tests
 37 |       shell: bash
 38 |       run: |
 39 |         FEATURES_FLAG=$(echo '${{ toJson(matrix.features) }}' | jq -r 'map("--features \(.)") | join(" ")')
 40 |         cargo test --verbose --no-default-features $FEATURES_FLAG --manifest-path generic-simd/Cargo.toml
 41 |         cargo test --verbose --no-default-features $FEATURES_FLAG --manifest-path generic-simd-test/Cargo.toml
 42 | 
 43 |   wasm:
 44 |     strategy:
 45 |       matrix:
 46 |         rustflags:
 47 |         - "-Ctarget-feature=-simd128"
 48 |         - "-Ctarget-feature=+simd128"
 49 |         features:
 50 |         - --no-default-features
 51 |         - --no-default-features --features complex
 52 |         - --no-default-features --features nightly
 53 |         - --no-default-features --features nightly --features complex
 54 | 
 55 |     runs-on: ubuntu-latest
 56 | 
 57 |     steps:
 58 |     - uses: actions/checkout@v1
 59 |     - name: Install wasm-pack
 60 |       run: curl https://rustwasm.github.io/wasm-pack/installer/init.sh -sSf | sh
 61 |     - name: Install nightly rust
 62 |       uses: actions-rs/toolchain@v1
 63 |       with:
 64 |         toolchain: nightly
 65 |         override: true
 66 |         profile: minimal
 67 |     - name: Run tests
 68 |       env:
 69 |         RUSTFLAGS: ${{ matrix.rustflags }}
 70 |       run: wasm-pack test --chrome --headless generic-simd-test -- ${{ matrix.features }}
 71 | 
 72 | 
 73 |   cross:
 74 |     strategy:
 75 |       matrix:
 76 |         target:
 77 |         - aarch64-unknown-linux-gnu
 78 |         - armv7-unknown-linux-gnueabihf
 79 |         - arm-unknown-linux-gnueabihf
 80 |         features:
 81 |         - --no-default-features
 82 |         - --no-default-features --features complex
 83 |         - --no-default-features --features nightly
 84 |         - --no-default-features --features nightly --features complex
 85 | 
 86 |     runs-on: ubuntu-latest
 87 | 
 88 |     steps:
 89 |     - uses: actions/checkout@v1
 90 |     - name: Install cross
 91 |       run: cargo install cross
 92 |     - name: Run tests
 93 |       run: |
 94 |         cross +nightly test --target ${{ matrix.target }} --verbose ${{ matrix.features }} --manifest-path generic-simd/Cargo.toml
 95 |         cross +nightly test --target ${{ matrix.target }} --verbose ${{ matrix.features }} --manifest-path generic-simd-test/Cargo.toml
 96 | 
 97 | 
 98 |   lint:
 99 |     runs-on: ubuntu-latest
100 |     steps:
101 |     - uses: actions/checkout@v1
102 |     - name: Install toolchain
103 |       uses: actions-rs/toolchain@v1
104 |       with:
105 |         toolchain: stable
106 |         override: true
107 |         profile: minimal
108 |         components: clippy, rustfmt
109 |     - name: Clippy lint
110 |       run: cargo clippy --all-targets ${{ matrix.features }} --manifest-path generic-simd/Cargo.toml -- -D warnings
111 |     - name: Check formatting
112 |       run: cargo fmt -- --check
113 |     - name: Deadlinks
114 |       run: |
115 |         cargo install cargo-deadlinks
116 |         cargo doc
117 |         cargo deadlinks -v --check-http
118 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | /target
2 | Cargo.lock
3 | *.swp
4 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | # Changelog
 2 | All notable changes to this project will be documented in this file.
 3 | 
 4 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 5 | and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 6 | 
 7 | ## [Unreleased]
 8 | 
 9 | ## [0.1.0] - 2020-09-07
10 | ### Added
11 | - Initial release
12 | 
13 | [Unreleased]: https://github.com/calebzulawski/generic-simd/compare/0.1.0...HEAD
14 | [0.1.0]: https://github.com/calebzulawski/generic-simd/releases/tag/0.1.0
15 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
1 | [workspace]
2 | 
3 | members = [
4 |     "generic-simd",
5 |     "generic-simd-macros",
6 |     "generic-simd-test",
7 | ]
8 | 


--------------------------------------------------------------------------------
/LICENSE-APACHE:
--------------------------------------------------------------------------------
  1 | 
  2 |                                  Apache License
  3 |                            Version 2.0, January 2004
  4 |                         http://www.apache.org/licenses/
  5 | 
  6 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  7 | 
  8 |    1. Definitions.
  9 | 
 10 |       "License" shall mean the terms and conditions for use, reproduction,
 11 |       and distribution as defined by Sections 1 through 9 of this document.
 12 | 
 13 |       "Licensor" shall mean the copyright owner or entity authorized by
 14 |       the copyright owner that is granting the License.
 15 | 
 16 |       "Legal Entity" shall mean the union of the acting entity and all
 17 |       other entities that control, are controlled by, or are under common
 18 |       control with that entity. For the purposes of this definition,
 19 |       "control" means (i) the power, direct or indirect, to cause the
 20 |       direction or management of such entity, whether by contract or
 21 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 22 |       outstanding shares, or (iii) beneficial ownership of such entity.
 23 | 
 24 |       "You" (or "Your") shall mean an individual or Legal Entity
 25 |       exercising permissions granted by this License.
 26 | 
 27 |       "Source" form shall mean the preferred form for making modifications,
 28 |       including but not limited to software source code, documentation
 29 |       source, and configuration files.
 30 | 
 31 |       "Object" form shall mean any form resulting from mechanical
 32 |       transformation or translation of a Source form, including but
 33 |       not limited to compiled object code, generated documentation,
 34 |       and conversions to other media types.
 35 | 
 36 |       "Work" shall mean the work of authorship, whether in Source or
 37 |       Object form, made available under the License, as indicated by a
 38 |       copyright notice that is included in or attached to the work
 39 |       (an example is provided in the Appendix below).
 40 | 
 41 |       "Derivative Works" shall mean any work, whether in Source or Object
 42 |       form, that is based on (or derived from) the Work and for which the
 43 |       editorial revisions, annotations, elaborations, or other modifications
 44 |       represent, as a whole, an original work of authorship. For the purposes
 45 |       of this License, Derivative Works shall not include works that remain
 46 |       separable from, or merely link (or bind by name) to the interfaces of,
 47 |       the Work and Derivative Works thereof.
 48 | 
 49 |       "Contribution" shall mean any work of authorship, including
 50 |       the original version of the Work and any modifications or additions
 51 |       to that Work or Derivative Works thereof, that is intentionally
 52 |       submitted to Licensor for inclusion in the Work by the copyright owner
 53 |       or by an individual or Legal Entity authorized to submit on behalf of
 54 |       the copyright owner. For the purposes of this definition, "submitted"
 55 |       means any form of electronic, verbal, or written communication sent
 56 |       to the Licensor or its representatives, including but not limited to
 57 |       communication on electronic mailing lists, source code control systems,
 58 |       and issue tracking systems that are managed by, or on behalf of, the
 59 |       Licensor for the purpose of discussing and improving the Work, but
 60 |       excluding communication that is conspicuously marked or otherwise
 61 |       designated in writing by the copyright owner as "Not a Contribution."
 62 | 
 63 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 64 |       on behalf of whom a Contribution has been received by Licensor and
 65 |       subsequently incorporated within the Work.
 66 | 
 67 |    2. Grant of Copyright License. Subject to the terms and conditions of
 68 |       this License, each Contributor hereby grants to You a perpetual,
 69 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 70 |       copyright license to reproduce, prepare Derivative Works of,
 71 |       publicly display, publicly perform, sublicense, and distribute the
 72 |       Work and such Derivative Works in Source or Object form.
 73 | 
 74 |    3. Grant of Patent License. Subject to the terms and conditions of
 75 |       this License, each Contributor hereby grants to You a perpetual,
 76 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 77 |       (except as stated in this section) patent license to make, have made,
 78 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 79 |       where such license applies only to those patent claims licensable
 80 |       by such Contributor that are necessarily infringed by their
 81 |       Contribution(s) alone or by combination of their Contribution(s)
 82 |       with the Work to which such Contribution(s) was submitted. If You
 83 |       institute patent litigation against any entity (including a
 84 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 85 |       or a Contribution incorporated within the Work constitutes direct
 86 |       or contributory patent infringement, then any patent licenses
 87 |       granted to You under this License for that Work shall terminate
 88 |       as of the date such litigation is filed.
 89 | 
 90 |    4. Redistribution. You may reproduce and distribute copies of the
 91 |       Work or Derivative Works thereof in any medium, with or without
 92 |       modifications, and in Source or Object form, provided that You
 93 |       meet the following conditions:
 94 | 
 95 |       (a) You must give any other recipients of the Work or
 96 |           Derivative Works a copy of this License; and
 97 | 
 98 |       (b) You must cause any modified files to carry prominent notices
 99 |           stating that You changed the files; and
100 | 
101 |       (c) You must retain, in the Source form of any Derivative Works
102 |           that You distribute, all copyright, patent, trademark, and
103 |           attribution notices from the Source form of the Work,
104 |           excluding those notices that do not pertain to any part of
105 |           the Derivative Works; and
106 | 
107 |       (d) If the Work includes a "NOTICE" text file as part of its
108 |           distribution, then any Derivative Works that You distribute must
109 |           include a readable copy of the attribution notices contained
110 |           within such NOTICE file, excluding those notices that do not
111 |           pertain to any part of the Derivative Works, in at least one
112 |           of the following places: within a NOTICE text file distributed
113 |           as part of the Derivative Works; within the Source form or
114 |           documentation, if provided along with the Derivative Works; or,
115 |           within a display generated by the Derivative Works, if and
116 |           wherever such third-party notices normally appear. The contents
117 |           of the NOTICE file are for informational purposes only and
118 |           do not modify the License. You may add Your own attribution
119 |           notices within Derivative Works that You distribute, alongside
120 |           or as an addendum to the NOTICE text from the Work, provided
121 |           that such additional attribution notices cannot be construed
122 |           as modifying the License.
123 | 
124 |       You may add Your own copyright statement to Your modifications and
125 |       may provide additional or different license terms and conditions
126 |       for use, reproduction, or distribution of Your modifications, or
127 |       for any such Derivative Works as a whole, provided Your use,
128 |       reproduction, and distribution of the Work otherwise complies with
129 |       the conditions stated in this License.
130 | 
131 |    5. Submission of Contributions. Unless You explicitly state otherwise,
132 |       any Contribution intentionally submitted for inclusion in the Work
133 |       by You to the Licensor shall be under the terms and conditions of
134 |       this License, without any additional terms or conditions.
135 |       Notwithstanding the above, nothing herein shall supersede or modify
136 |       the terms of any separate license agreement you may have executed
137 |       with Licensor regarding such Contributions.
138 | 
139 |    6. Trademarks. This License does not grant permission to use the trade
140 |       names, trademarks, service marks, or product names of the Licensor,
141 |       except as required for reasonable and customary use in describing the
142 |       origin of the Work and reproducing the content of the NOTICE file.
143 | 
144 |    7. Disclaimer of Warranty. Unless required by applicable law or
145 |       agreed to in writing, Licensor provides the Work (and each
146 |       Contributor provides its Contributions) on an "AS IS" BASIS,
147 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 |       implied, including, without limitation, any warranties or conditions
149 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 |       PARTICULAR PURPOSE. You are solely responsible for determining the
151 |       appropriateness of using or redistributing the Work and assume any
152 |       risks associated with Your exercise of permissions under this License.
153 | 
154 |    8. Limitation of Liability. In no event and under no legal theory,
155 |       whether in tort (including negligence), contract, or otherwise,
156 |       unless required by applicable law (such as deliberate and grossly
157 |       negligent acts) or agreed to in writing, shall any Contributor be
158 |       liable to You for damages, including any direct, indirect, special,
159 |       incidental, or consequential damages of any character arising as a
160 |       result of this License or out of the use or inability to use the
161 |       Work (including but not limited to damages for loss of goodwill,
162 |       work stoppage, computer failure or malfunction, or any and all
163 |       other commercial damages or losses), even if such Contributor
164 |       has been advised of the possibility of such damages.
165 | 
166 |    9. Accepting Warranty or Additional Liability. While redistributing
167 |       the Work or Derivative Works thereof, You may choose to offer,
168 |       and charge a fee for, acceptance of support, warranty, indemnity,
169 |       or other liability obligations and/or rights consistent with this
170 |       License. However, in accepting such obligations, You may act only
171 |       on Your own behalf and on Your sole responsibility, not on behalf
172 |       of any other Contributor, and only if You agree to indemnify,
173 |       defend, and hold each Contributor harmless for any liability
174 |       incurred by, or claims asserted against, such Contributor by reason
175 |       of your accepting any such warranty or additional liability.
176 | 
177 |    END OF TERMS AND CONDITIONS
178 | 
179 |    APPENDIX: How to apply the Apache License to your work.
180 | 
181 |       To apply the Apache License to your work, attach the following
182 |       boilerplate notice, with the fields enclosed by brackets "[]"
183 |       replaced with your own identifying information. (Don't include
184 |       the brackets!)  The text should be enclosed in the appropriate
185 |       comment syntax for the file format. We also recommend that a
186 |       file or class name and description of purpose be included on the
187 |       same "printed page" as the copyright notice for easier
188 |       identification within third-party archives.
189 | 
190 |    Copyright [yyyy] [name of copyright owner]
191 | 
192 |    Licensed under the Apache License, Version 2.0 (the "License");
193 |    you may not use this file except in compliance with the License.
194 |    You may obtain a copy of the License at
195 | 
196 |        http://www.apache.org/licenses/LICENSE-2.0
197 | 
198 |    Unless required by applicable law or agreed to in writing, software
199 |    distributed under the License is distributed on an "AS IS" BASIS,
200 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201 |    See the License for the specific language governing permissions and
202 |    limitations under the License.
203 | 


--------------------------------------------------------------------------------
/LICENSE-MIT:
--------------------------------------------------------------------------------
1 | Copyright 2020 Caleb Zulawski
2 | 
3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
4 | 
5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
6 | 
7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
8 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | generic-simd
 2 | ============
 3 | [![Build Status](https://github.com/calebzulawski/generic-simd/workflows/Build/badge.svg?branch=master)](https://github.com/calebzulawski/generic-simd/actions)
 4 | ![Rustc Version 1.42+](https://img.shields.io/badge/rustc-1.42+-lightgray.svg)
 5 | [![License](https://img.shields.io/crates/l/generic-simd)](https://crates.io/crates/generic-simd)
 6 | [![Crates.io](https://img.shields.io/crates/v/generic-simd)](https://crates.io/crates/generic-simd)
 7 | [![Rust Documentation](https://img.shields.io/badge/api-rustdoc-blue.svg)](https://docs.rs/generic-simd)
 8 | 
 9 | NOTE: This project has been archived. Please use `std::simd` instead.
10 | 
11 | generic-simd provides safe and idiomatic zero-cost abstractions for writing explicit cross-platform SIMD operations.
12 | 
13 | ## License
14 | generic-simd is distributed under the terms of both the MIT license and the Apache License (Version 2.0).
15 | 
16 | See [LICENSE-APACHE](LICENSE-APACHE) and [LICENSE-MIT](LICENSE-MIT) for details.
17 | 


--------------------------------------------------------------------------------
/generic-simd-macros/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "generic-simd-macros"
 3 | version = "0.1.0"
 4 | authors = ["Caleb Zulawski <caleb.zulawski@gmail.com>"]
 5 | license = "MIT OR Apache-2.0"
 6 | description = "Implementation crate for generic-simd"
 7 | repository = "https://github.com/calebzulawski/generic-simd"
 8 | categories = []
 9 | readme = "README.md"
10 | include = [
11 |     "/Cargo.toml",
12 |     "/LICENSE-APACHE",
13 |     "/LICENSE-MIT",
14 |     "/README.md",
15 |     "/src/**",
16 |     "/tests/**",
17 | ]
18 | edition = "2018"
19 | 
20 | [features]
21 | default = []
22 | nightly = []
23 | 
24 | [lib]
25 | proc-macro = true
26 | 
27 | [dependencies]
28 | syn = { version = "1", features = ["full"] }
29 | quote = "1"
30 | 


--------------------------------------------------------------------------------
/generic-simd-macros/LICENSE-APACHE:
--------------------------------------------------------------------------------
1 | ../LICENSE-APACHE


--------------------------------------------------------------------------------
/generic-simd-macros/LICENSE-MIT:
--------------------------------------------------------------------------------
1 | ../LICENSE-MIT


--------------------------------------------------------------------------------
/generic-simd-macros/README.md:
--------------------------------------------------------------------------------
1 | Implementation crate for [`generic-simd`](https://docs.rs/generic-simd).
2 | 


--------------------------------------------------------------------------------
/generic-simd-macros/src/lib.rs:
--------------------------------------------------------------------------------
 1 | extern crate proc_macro;
 2 | use proc_macro::TokenStream;
 3 | use quote::quote;
 4 | use syn::{parse_macro_input, Ident, ItemFn};
 5 | 
 6 | #[proc_macro_attribute]
 7 | pub fn dispatch(args: TokenStream, input: TokenStream) -> TokenStream {
 8 |     let ItemFn {
 9 |         attrs,
10 |         vis,
11 |         sig,
12 |         block,
13 |     } = parse_macro_input!(input as ItemFn);
14 |     let feature = parse_macro_input!(args as Ident);
15 | 
16 |     let build_fn = |wasm| {
17 |         let nightly = cfg!(feature = "nightly");
18 |         let clone_wasm = if nightly && wasm {
19 |             Some(quote! { #[clone(target = "wasm32+simd128")] })
20 |         } else {
21 |             None
22 |         };
23 |         let clone_arm = if nightly {
24 |             Some(quote! { #[clone(target = "aarch64+neon")] })
25 |         } else {
26 |             None
27 |         };
28 |         quote! {
29 |             #[generic_simd::multiversion::multiversion]
30 |             #[clone(target = "[x86|x86_64]+avx")]
31 |             #[clone(target = "[x86|x86_64]+sse4.1")]
32 |             #clone_wasm
33 |             #clone_arm
34 |             #[crate_path(path = "generic_simd::multiversion")]
35 |             #(#attrs)*
36 |             #vis
37 |             #sig
38 |             {
39 |                 #[target_cfg(target = "[x86|x86_64]+sse4.1")]
40 |                 let #feature = unsafe { <generic_simd::arch::x86::Sse as generic_simd::arch::Token>::new_unchecked() };
41 | 
42 |                 #[target_cfg(target = "[x86|x86_64]+avx")]
43 |                 let #feature = unsafe { <generic_simd::arch::x86::Avx as generic_simd::arch::Token>::new_unchecked() };
44 | 
45 |                 #[target_cfg(target = "wasm32+simd128")]
46 |                 let #feature = unsafe { <generic_simd::arch::wasm::Simd128 as generic_simd::arch::Token>::new_unchecked() };
47 | 
48 |                 #[target_cfg(target = "[arm|aarch64]+neon")]
49 |                 let #feature = unsafe { <generic_simd::arch::arm::Neon as generic_simd::arch::Token>::new_unchecked() };
50 | 
51 |                 #[target_cfg(not(any(
52 |                     target = "[x86|x86_64]+sse4.1",
53 |                     target = "[x86|x86_64]+avx",
54 |                     target = "[arm|aarch64]+neon",
55 |                     target = "wasm32+simd128",
56 |                 )))]
57 |                 let #feature = <generic_simd::arch::generic::Generic as generic_simd::arch::Token>::new().unwrap();
58 | 
59 |                 #block
60 |             }
61 |         }
62 |     };
63 |     let normal = build_fn(false);
64 |     let with_wasm = build_fn(true);
65 |     let output = quote! {
66 |         #[cfg(all(target_arch = "wasm32", target_feature = "simd128"))]
67 |         #with_wasm
68 | 
69 |         #[cfg(not(all(target_arch = "wasm32", target_feature = "simd128"),))]
70 |         #normal
71 |     };
72 |     output.into()
73 | }
74 | 


--------------------------------------------------------------------------------
/generic-simd-test/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "generic-simd-test"
 3 | version = "0.1.0"
 4 | authors = ["Caleb Zulawski <caleb.zulawski@gmail.com>"]
 5 | edition = "2018"
 6 | publish = false
 7 | 
 8 | [features]
 9 | default = ["std", "complex"]
10 | std = ["generic-simd/std"]
11 | complex = ["generic-simd/complex"]
12 | alloc = ["generic-simd/alloc"]
13 | nightly = ["generic-simd/nightly"]
14 | 
15 | [dependencies]
16 | generic-simd = { path = "../generic-simd", default-features = false }
17 | num-complex = { version = "0.3", default-features = false, features = ["rand"] }
18 | num-traits = "0.2"
19 | rand = "0.7"
20 | rand_pcg = "0.2"
21 | paste = "1"
22 | wasm-bindgen-test = "0.3"
23 | 
24 | [dev-dependencies]
25 | 


--------------------------------------------------------------------------------
/generic-simd-test/src/lib.rs:
--------------------------------------------------------------------------------
  1 | #![cfg_attr(
  2 |     all(feature = "nightly", target_arch = "wasm32"),
  3 |     feature(wasm_simd, wasm_target_feature)
  4 | )]
  5 | #![cfg_attr(
  6 |     all(feature = "nightly", target_arch = "aarch64"),
  7 |     feature(stdsimd, aarch64_target_feature)
  8 | )]
  9 | #![cfg_attr(
 10 |     all(feature = "nightly", target_arch = "arm"),
 11 |     feature(stdsimd, arm_target_feature)
 12 | )]
 13 | 
 14 | use generic_simd::{dispatch, scalar::ScalarExt, vector::Signed};
 15 | use num_traits::Num;
 16 | use rand::distributions::Standard;
 17 | use rand::prelude::*;
 18 | use rand::SeedableRng;
 19 | 
 20 | wasm_bindgen_test::wasm_bindgen_test_configure!(run_in_browser);
 21 | 
 22 | #[cfg(feature = "complex")]
 23 | use num_complex::{Complex, ComplexDistribution};
 24 | 
 25 | #[inline]
 26 | fn unary_op_impl<D, V, VFunc, SFunc>(distribution: D, mut vector: V, vfunc: VFunc, sfunc: SFunc)
 27 | where
 28 |     V::Scalar: Num + core::ops::Neg<Output = V::Scalar> + core::fmt::Debug + Copy,
 29 |     D: rand::distributions::Distribution<V::Scalar> + Copy,
 30 |     V: Signed,
 31 |     VFunc: Fn(V) -> V,
 32 |     SFunc: Fn(V::Scalar) -> V::Scalar,
 33 | {
 34 |     let mut rng = rand_pcg::Pcg32::seed_from_u64(999);
 35 |     for x in vector.as_slice_mut() {
 36 |         *x = rng.sample(distribution);
 37 |     }
 38 | 
 39 |     let output = vfunc(vector);
 40 |     for i in 0..V::width() {
 41 |         assert_eq!(output[i], sfunc(vector[i]))
 42 |     }
 43 | }
 44 | 
 45 | #[inline]
 46 | fn binary_op_impl<D, V, VFunc, SFunc>(
 47 |     distribution: D,
 48 |     (mut a, mut b): (V, V),
 49 |     vfunc: VFunc,
 50 |     sfunc: SFunc,
 51 | ) where
 52 |     V::Scalar: Num + core::ops::Neg<Output = V::Scalar> + core::fmt::Debug + Copy,
 53 |     D: rand::distributions::Distribution<V::Scalar> + Copy,
 54 |     V: Signed,
 55 |     VFunc: Fn(V, V) -> V,
 56 |     SFunc: Fn(V::Scalar, V::Scalar) -> V::Scalar,
 57 | {
 58 |     let mut rng = rand_pcg::Pcg32::seed_from_u64(999);
 59 |     for x in a.as_slice_mut() {
 60 |         *x = rng.sample(distribution);
 61 |     }
 62 |     for x in b.as_slice_mut() {
 63 |         *x = rng.sample(distribution);
 64 |     }
 65 | 
 66 |     let output = vfunc(a, b);
 67 |     for i in 0..V::width() {
 68 |         assert_eq!(output[i], sfunc(a[i], b[i]))
 69 |     }
 70 | }
 71 | 
 72 | #[inline]
 73 | fn binary_scalar_op_impl<D, V, VFunc, SFunc>(distribution: D, mut a: V, vfunc: VFunc, sfunc: SFunc)
 74 | where
 75 |     V::Scalar: Num + core::ops::Neg<Output = V::Scalar> + core::fmt::Debug + Copy,
 76 |     D: rand::distributions::Distribution<V::Scalar> + Copy,
 77 |     V: Signed,
 78 |     VFunc: Fn(V, V::Scalar) -> V,
 79 |     SFunc: Fn(V::Scalar, V::Scalar) -> V::Scalar,
 80 | {
 81 |     let mut rng = rand_pcg::Pcg32::seed_from_u64(999);
 82 |     let b = rng.sample(distribution);
 83 |     for x in a.as_slice_mut() {
 84 |         *x = rng.sample(distribution);
 85 |     }
 86 | 
 87 |     let output = vfunc(a, b);
 88 |     for i in 0..V::width() {
 89 |         assert_eq!(output[i], sfunc(a[i], b))
 90 |     }
 91 | }
 92 | 
 93 | #[inline]
 94 | fn assign_op_impl<D, V, VFunc, SFunc>(
 95 |     distribution: D,
 96 |     (mut a, mut b): (V, V),
 97 |     vfunc: VFunc,
 98 |     sfunc: SFunc,
 99 | ) where
100 |     V::Scalar: Num + core::ops::Neg<Output = V::Scalar> + core::fmt::Debug + Copy,
101 |     D: rand::distributions::Distribution<V::Scalar> + Copy,
102 |     V: Signed,
103 |     VFunc: Fn(&mut V, V),
104 |     SFunc: Fn(&mut V::Scalar, V::Scalar),
105 | {
106 |     let mut rng = rand_pcg::Pcg32::seed_from_u64(999);
107 |     for x in a.as_slice_mut() {
108 |         *x = rng.sample(distribution);
109 |     }
110 |     for x in b.as_slice_mut() {
111 |         *x = rng.sample(distribution);
112 |     }
113 | 
114 |     let mut output: V = a;
115 |     vfunc(&mut output, b);
116 |     for i in 0..V::width() {
117 |         sfunc(&mut a[i], b[i]);
118 |         assert_eq!(output[i], a[i])
119 |     }
120 | }
121 | 
122 | #[inline]
123 | fn assign_scalar_op_impl<D, V, VFunc, SFunc>(distribution: D, mut a: V, vfunc: VFunc, sfunc: SFunc)
124 | where
125 |     V::Scalar: Num + core::ops::Neg<Output = V::Scalar> + core::fmt::Debug + Copy,
126 |     D: rand::distributions::Distribution<V::Scalar> + Copy,
127 |     V: Signed,
128 |     VFunc: Fn(&mut V, V::Scalar),
129 |     SFunc: Fn(&mut V::Scalar, V::Scalar),
130 | {
131 |     let mut rng = rand_pcg::Pcg32::seed_from_u64(999);
132 |     let b = rng.sample(distribution);
133 |     for x in a.as_slice_mut() {
134 |         *x = rng.sample(distribution);
135 |     }
136 | 
137 |     let mut output: V = a;
138 |     vfunc(&mut output, b);
139 |     for i in 0..V::width() {
140 |         sfunc(&mut a[i], b);
141 |         assert_eq!(output[i], a[i])
142 |     }
143 | }
144 | 
145 | macro_rules! ops_test {
146 |     {
147 |         $token:ident, $type:ty
148 |     } => {
149 |         pub mod width_native {
150 |             use super::*;
151 |             ops_test! { @wrapper $token, $type, zeroed_native }
152 |         }
153 |         pub mod width_1 {
154 |             use super::*;
155 |             ops_test! { @wrapper $token, $type, zeroed1 }
156 |         }
157 |         pub mod width_2 {
158 |             use super::*;
159 |             ops_test! { @wrapper $token, $type, zeroed2 }
160 |         }
161 |         pub mod width_4 {
162 |             use super::*;
163 |             ops_test! { @wrapper $token, $type, zeroed4 }
164 |         }
165 |         pub mod width_8 {
166 |             use super::*;
167 |             ops_test! { @wrapper $token, $type, zeroed8 }
168 |         }
169 |     };
170 |     {
171 |         @wrapper $token:ident, $type:ty, $init:ident
172 |     } => {
173 |         ops_test! { @impl $type, $init, add,               binary_op_impl,        $token, core::ops::Add::add }
174 |         ops_test! { @impl $type, $init, sub,               binary_op_impl,        $token, core::ops::Sub::sub }
175 |         ops_test! { @impl $type, $init, mul,               binary_op_impl,        $token, core::ops::Mul::mul }
176 |         ops_test! { @impl $type, $init, div,               binary_op_impl,        $token, core::ops::Div::div }
177 |         ops_test! { @impl $type, $init, add_scalar,        binary_scalar_op_impl, $token, core::ops::Add::add }
178 |         ops_test! { @impl $type, $init, sub_scalar,        binary_scalar_op_impl, $token, core::ops::Sub::sub }
179 |         ops_test! { @impl $type, $init, mul_scalar,        binary_scalar_op_impl, $token, core::ops::Mul::mul }
180 |         ops_test! { @impl $type, $init, div_scalar,        binary_scalar_op_impl, $token, core::ops::Div::div }
181 |         ops_test! { @impl $type, $init, add_assign,        assign_op_impl,        $token, core::ops::AddAssign::add_assign }
182 |         ops_test! { @impl $type, $init, sub_assign,        assign_op_impl,        $token, core::ops::SubAssign::sub_assign }
183 |         ops_test! { @impl $type, $init, mul_assign,        assign_op_impl,        $token, core::ops::MulAssign::mul_assign }
184 |         ops_test! { @impl $type, $init, div_assign,        assign_op_impl,        $token, core::ops::DivAssign::div_assign }
185 |         ops_test! { @impl $type, $init, add_assign_scalar, assign_scalar_op_impl, $token, core::ops::AddAssign::add_assign }
186 |         ops_test! { @impl $type, $init, sub_assign_scalar, assign_scalar_op_impl, $token, core::ops::SubAssign::sub_assign }
187 |         ops_test! { @impl $type, $init, mul_assign_scalar, assign_scalar_op_impl, $token, core::ops::MulAssign::mul_assign }
188 |         ops_test! { @impl $type, $init, div_assign_scalar, assign_scalar_op_impl, $token, core::ops::DivAssign::div_assign }
189 |         ops_test! { @impl $type, $init, neg,               unary_op_impl,         $token, core::ops::Neg::neg }
190 |     };
191 |     { @distribution f32 } => { Standard };
192 |     { @distribution f64 } => { Standard };
193 |     { @distribution Complex<f32> } => { ComplexDistribution::new(Standard, Standard) };
194 |     { @distribution Complex<f64> } => { ComplexDistribution::new(Standard, Standard) };
195 |     {
196 |         @init unary_op_impl, $type:ty, $token:ident, $init:ident
197 |     } => {
198 |         <$type>::$init($token)
199 |     };
200 |     {
201 |         @init binary_op_impl, $type:ty, $token:ident, $init:ident
202 |     } => {
203 |         (<$type>::$init($token), <$type>::$init($token))
204 |     };
205 |     {
206 |         @init binary_scalar_op_impl, $type:ty, $token:ident, $init:ident
207 |     } => {
208 |         <$type>::$init($token)
209 |     };
210 |     {
211 |         @init assign_op_impl, $type:ty, $token:ident, $init:ident
212 |     } => {
213 |         (<$type>::$init($token), <$type>::$init($token))
214 |     };
215 |     {
216 |         @init assign_scalar_op_impl, $type:ty, $token:ident, $init:ident
217 |     } => {
218 |         <$type>::$init($token)
219 |     };
220 |     {
221 |         @impl $type:ty, $init:ident, $name:ident, $test:ident, $token:ident, $func:path
222 |     } => {
223 |         paste::paste! {
224 |             #[dispatch($token)]
225 |             pub fn [<$name _dispatch>]() {
226 |                 $test(ops_test!(@distribution $type), ops_test!(@init $test, $type, $token, $init), $func, $func);
227 |             }
228 | 
229 |             #[test]
230 |             #[wasm_bindgen_test::wasm_bindgen_test]
231 |             pub fn [<$name _generic>]() {
232 |                 [<$name _dispatch_default_version>]()
233 |             }
234 | 
235 |             #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
236 |             #[test]
237 |             pub fn [<$name _sse>]() {
238 |                 use generic_simd::arch::Token as _;
239 |                 if generic_simd::arch::x86::Sse::new().is_some() {
240 |                     unsafe { [<$name _dispatch_sse41_version>]() }
241 |                 }
242 |             }
243 | 
244 |             #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
245 |             #[test]
246 |             pub fn [<$name _avx>]() {
247 |                 use generic_simd::arch::Token as _;
248 |                 if generic_simd::arch::x86::Avx::new().is_some() {
249 |                     unsafe { [<$name _dispatch_avx_version>]() }
250 |                 }
251 |             }
252 | 
253 |             #[cfg(all(feature = "nightly", target_arch = "aarch64"))]
254 |             #[test]
255 |             pub fn [<$name _neon>]() {
256 |                 use generic_simd::arch::Token as _;
257 |                 if generic_simd::arch::arm::Neon::new().is_some() {
258 |                     unsafe { [<$name _dispatch_neon_version>]() }
259 |                 }
260 |             }
261 | 
262 |             #[cfg(all(feature = "nightly", target_arch = "wasm32", target_feature = "simd128"))]
263 |             #[wasm_bindgen_test::wasm_bindgen_test]
264 |             pub fn [<$name _simd128>]() {
265 |                 use generic_simd::arch::Token as _;
266 |                 assert!(generic_simd::arch::wasm::Simd128::new().is_some());
267 |                 unsafe { [<$name _dispatch_simd128_version>]() }
268 |             }
269 |         }
270 |     };
271 | }
272 | 
273 | pub mod r#f32 {
274 |     use super::*;
275 |     ops_test! { token, f32 }
276 | }
277 | 
278 | pub mod r#f64 {
279 |     use super::*;
280 |     ops_test! { token, f64 }
281 | }
282 | 
283 | #[cfg(feature = "complex")]
284 | pub mod complex_f32 {
285 |     use super::*;
286 |     ops_test! { token, Complex<f32> }
287 | }
288 | 
289 | #[cfg(feature = "complex")]
290 | pub mod complex_f64 {
291 |     use super::*;
292 |     ops_test! { token, Complex<f64> }
293 | }
294 | 


--------------------------------------------------------------------------------
/generic-simd-test/webdriver.json:
--------------------------------------------------------------------------------
1 | {
2 |     "goog:chromeOptions": {
3 |         "args": [
4 |             "--enable-features=WebAssemblySimd"
5 |         ]
6 |     }
7 | }
8 | 


--------------------------------------------------------------------------------
/generic-simd/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "generic-simd"
 3 | version = "0.1.0"
 4 | authors = ["Caleb Zulawski <caleb.zulawski@gmail.com>"]
 5 | license = "MIT OR Apache-2.0"
 6 | description = "safe and idiomatic zero-cost abstractions for writing explicit cross-platform SIMD operations"
 7 | repository = "https://github.com/calebzulawski/generic-simd"
 8 | categories = ["no-std", "api-bindings", "hardware-support"]
 9 | readme = "README.md"
10 | include = [
11 |     "/Cargo.toml",
12 |     "/LICENSE-APACHE",
13 |     "/LICENSE-MIT",
14 |     "/README.md",
15 |     "/src/**",
16 |     "/tests/**",
17 | ]
18 | edition = "2018"
19 | 
20 | [features]
21 | default = ["std", "complex"]
22 | std = ["multiversion/std"]
23 | complex = ["num-complex"]
24 | alloc = []
25 | nightly = ["generic-simd-macros/nightly"]
26 | 
27 | [dependencies]
28 | num-complex = { version = "0.3", default-features = false, optional = true }
29 | generic-simd-macros = { version = "0.1", default-features = false, path = "../generic-simd-macros" }
30 | multiversion = { version = "0.6.1", default-features = false }
31 | 
32 | [package.metadata.docs.rs]
33 | features = ["nightly", "complex", "alloc"]
34 | no-default-features = true
35 | default-target = "x86_64-unknown-linux-gnu"
36 | targets = [
37 |     "x86_64-unknown-linux-gnu",
38 |     "i686-unknown-linux-gnu",
39 |     "aarch64-unknown-linux-gnu",
40 |     "armv7-unknown-linux-gnueabihf",
41 |     "wasm32-unknown-unknown",
42 | ]
43 | rustdoc-args = ["-Ctarget-feature=+simd128"]
44 | 


--------------------------------------------------------------------------------
/generic-simd/LICENSE-APACHE:
--------------------------------------------------------------------------------
1 | ../LICENSE-APACHE


--------------------------------------------------------------------------------
/generic-simd/LICENSE-MIT:
--------------------------------------------------------------------------------
1 | ../LICENSE-MIT


--------------------------------------------------------------------------------
/generic-simd/README.md:
--------------------------------------------------------------------------------
1 | ../README.md


--------------------------------------------------------------------------------
/generic-simd/src/alignment.rs:
--------------------------------------------------------------------------------
  1 | //! Alignment helpers.
  2 | 
  3 | #[cfg(all(feature = "alloc", not(feature = "std")))]
  4 | extern crate alloc;
  5 | 
  6 | #[cfg(all(feature = "alloc", not(feature = "std")))]
  7 | use alloc::{
  8 |     alloc::{alloc, Layout},
  9 |     boxed::Box,
 10 | };
 11 | 
 12 | #[cfg(feature = "std")]
 13 | use std::alloc::{alloc, Layout};
 14 | 
 15 | use crate::{
 16 |     arch, scalar,
 17 |     vector::{width, VectorOf},
 18 | };
 19 | 
 20 | #[repr(C)]
 21 | #[derive(Copy, Clone)]
 22 | struct Vectors<Token: arch::Token, Scalar: scalar::ScalarExt<Token>>(
 23 |     VectorOf<Scalar, width::W1, Token>,
 24 |     VectorOf<Scalar, width::W2, Token>,
 25 |     VectorOf<Scalar, width::W4, Token>,
 26 |     VectorOf<Scalar, width::W8, Token>,
 27 | );
 28 | 
 29 | macro_rules! max_alignment {
 30 |     { $first:path, $($rest:path,)* } => {
 31 | 
 32 |         #[doc(hidden)]
 33 |         #[repr(C)]
 34 |         #[derive(Copy, Clone)]
 35 |         pub struct AllVectors<Scalar: scalar::ScalarExt<$first> $(+ scalar::ScalarExt<$rest>)*>(
 36 |             Vectors<$first, Scalar>,
 37 |             $(
 38 |             Vectors<$rest, Scalar>,
 39 |             )*
 40 |         );
 41 | 
 42 |         /// Allocate a boxed slice of scalars with maximum possible vector alignment for a
 43 |         /// particular scalar on the current architecture.
 44 |         ///
 45 |         /// # Panics
 46 |         /// Panics if `count` is 0 or memory allocation fails.
 47 |         #[cfg(any(feature = "std", feature = "alloc"))]
 48 |         pub fn allocate_max_aligned_slice<Scalar: Default + scalar::ScalarExt<$first> $(+ scalar::ScalarExt<$rest>)*>(count: usize) -> Box<[Scalar]> {
 49 |             allocate_aligned_slice::<AllVectors<Scalar>, Scalar>(count)
 50 |         }
 51 |     }
 52 | }
 53 | 
 54 | crate::call_macro_with_tokens! { max_alignment }
 55 | 
 56 | /// Aligns a value to another type's alignment.
 57 | #[repr(C)]
 58 | pub struct Aligned<AlignTo, T> {
 59 |     alignment: [AlignTo; 0],
 60 |     value: T,
 61 | }
 62 | 
 63 | impl<AlignTo, T> Aligned<AlignTo, T> {
 64 |     pub fn new(value: T) -> Self {
 65 |         Self {
 66 |             alignment: [],
 67 |             value,
 68 |         }
 69 |     }
 70 | }
 71 | 
 72 | impl<AlignTo, T> core::ops::Deref for Aligned<AlignTo, T> {
 73 |     type Target = T;
 74 | 
 75 |     fn deref(&self) -> &Self::Target {
 76 |         &self.value
 77 |     }
 78 | }
 79 | 
 80 | impl<AlignTo, T> core::ops::DerefMut for Aligned<AlignTo, T> {
 81 |     fn deref_mut(&mut self) -> &mut Self::Target {
 82 |         &mut self.value
 83 |     }
 84 | }
 85 | 
 86 | impl<AlignTo: Copy, T: Copy> Copy for Aligned<AlignTo, T> {}
 87 | 
 88 | impl<AlignTo, T: Clone> Clone for Aligned<AlignTo, T> {
 89 |     fn clone(&self) -> Self {
 90 |         Self::new(self.value.clone())
 91 |     }
 92 | }
 93 | 
 94 | impl<AlignTo, T: Default> Default for Aligned<AlignTo, T> {
 95 |     fn default() -> Self {
 96 |         Self::new(T::default())
 97 |     }
 98 | }
 99 | 
100 | impl<AlignTo, T: core::fmt::Debug> core::fmt::Debug for Aligned<AlignTo, T> {
101 |     #[inline]
102 |     fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
103 |         f.debug_tuple("Aligned").field(&self.value).finish()
104 |     }
105 | }
106 | 
107 | impl<AlignTo, T: core::cmp::PartialEq> core::cmp::PartialEq for Aligned<AlignTo, T> {
108 |     #[inline]
109 |     fn eq(&self, other: &Self) -> bool {
110 |         self.value.eq(&other.value)
111 |     }
112 | }
113 | 
114 | impl<AlignTo, T: core::cmp::Eq> core::cmp::Eq for Aligned<AlignTo, T> {}
115 | 
116 | impl<AlignTo, T: core::cmp::PartialOrd> core::cmp::PartialOrd for Aligned<AlignTo, T> {
117 |     #[inline]
118 |     fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
119 |         self.value.partial_cmp(&other.value)
120 |     }
121 | }
122 | 
123 | impl<AlignTo, T: core::cmp::Ord> core::cmp::Ord for Aligned<AlignTo, T> {
124 |     #[inline]
125 |     fn cmp(&self, other: &Self) -> core::cmp::Ordering {
126 |         self.value.cmp(&other.value)
127 |     }
128 | }
129 | 
130 | impl<AlignTo, T: core::hash::Hash> core::hash::Hash for Aligned<AlignTo, T> {
131 |     #[inline]
132 |     fn hash<H: core::hash::Hasher>(&self, hasher: &mut H) {
133 |         self.value.hash(hasher)
134 |     }
135 | }
136 | 
137 | /// Allocate a boxed slice of `count` `T`s aligned to the `AlignTo` type.
138 | ///
139 | /// # Panics
140 | /// Panics if `count` is 0 or memory allocation fails.
141 | #[cfg(any(feature = "std", feature = "alloc"))]
142 | pub fn allocate_aligned_slice<AlignTo, T: Default>(count: usize) -> Box<[T]> {
143 |     assert!(count > 0, "size must be nonzero");
144 |     let layout = Layout::from_size_align(
145 |         count * core::mem::size_of::<T>(),
146 |         core::cmp::max(core::mem::align_of::<AlignTo>(), core::mem::align_of::<T>()),
147 |     )
148 |     .unwrap();
149 |     unsafe {
150 |         let ptr = alloc(layout) as *mut T;
151 |         assert!(!ptr.is_null());
152 |         for i in 0..count {
153 |             ptr.add(i).write(T::default());
154 |         }
155 |         Box::from_raw(core::ptr::slice_from_raw_parts_mut(ptr, count))
156 |     }
157 | }
158 | 
159 | /// Aligns a type to the maximum possible vector alignment for a particular scalar on the current
160 | /// architecture.
161 | pub type MaxAligned<Scalar, T> = Aligned<AllVectors<Scalar>, T>;
162 | 
163 | #[cfg(test)]
164 | mod test {
165 |     use super::*;
166 | 
167 |     #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
168 |     #[test]
169 |     fn check_x86() {
170 |         type Foo = [f32; 8];
171 |         type AlignedFoo = MaxAligned<f32, Foo>;
172 |         assert_eq!(core::mem::align_of::<AlignedFoo>(), 32);
173 |     }
174 | }
175 | 


--------------------------------------------------------------------------------
/generic-simd/src/arch/arm/complex.rs:
--------------------------------------------------------------------------------
  1 | use crate::{
  2 |     arch::{arm::Neon, Token},
  3 |     scalar::Scalar,
  4 |     shim::{Shim2, Shim4, Shim8},
  5 |     vector::{width, Native, Vector},
  6 | };
  7 | use num_complex::Complex;
  8 | 
  9 | #[cfg(target_arch = "aarch64")]
 10 | use core::arch::aarch64::*;
 11 | #[cfg(target_arch = "arm")]
 12 | use core::arch::arm::*;
 13 | 
 14 | impl Native<Neon> for Complex<f32> {
 15 |     type Width = width::W2;
 16 | }
 17 | 
 18 | impl Native<Neon> for Complex<f64> {
 19 |     type Width = width::W1;
 20 | }
 21 | 
 22 | /// A NEON vector of `Complex<f32>`s.
 23 | ///
 24 | /// Requires feature `"complex"`.
 25 | #[derive(Clone, Copy, Debug)]
 26 | #[repr(transparent)]
 27 | #[allow(non_camel_case_types)]
 28 | pub struct cf32x1(float32x2_t);
 29 | 
 30 | /// A NEON vector of `Complex<f32>`s.
 31 | ///
 32 | /// Requires feature `"complex"`.
 33 | #[derive(Clone, Copy, Debug)]
 34 | #[repr(transparent)]
 35 | #[allow(non_camel_case_types)]
 36 | pub struct cf32x2(float32x4_t);
 37 | 
 38 | /// A NEON vector of `Complex<f64>`s.
 39 | ///
 40 | /// Requires feature `"complex"`.
 41 | #[cfg(target_arch = "aarch64")]
 42 | #[derive(Clone, Copy, Debug)]
 43 | #[repr(transparent)]
 44 | #[allow(non_camel_case_types)]
 45 | pub struct cf64x1(float64x2_t);
 46 | 
 47 | impl Scalar<Neon, width::W1> for Complex<f32> {
 48 |     type Vector = cf32x1;
 49 | }
 50 | 
 51 | impl Scalar<Neon, width::W2> for Complex<f32> {
 52 |     type Vector = cf32x2;
 53 | }
 54 | 
 55 | impl Scalar<Neon, width::W4> for Complex<f32> {
 56 |     type Vector = Shim2<cf32x2, Complex<f32>>;
 57 | }
 58 | 
 59 | impl Scalar<Neon, width::W8> for Complex<f32> {
 60 |     type Vector = Shim4<cf32x2, Complex<f32>>;
 61 | }
 62 | 
 63 | #[cfg(target_arch = "arm")]
 64 | impl Scalar<Neon, width::W1> for Complex<f64> {
 65 |     type Vector = crate::arch::generic::cf64x1;
 66 | }
 67 | 
 68 | #[cfg(target_arch = "aarch64")]
 69 | impl Scalar<Neon, width::W1> for Complex<f64> {
 70 |     type Vector = cf64x1;
 71 | }
 72 | 
 73 | impl Scalar<Neon, width::W2> for Complex<f64> {
 74 |     type Vector = Shim2<<Self as Scalar<Neon, width::W1>>::Vector, Complex<f64>>;
 75 | }
 76 | 
 77 | impl Scalar<Neon, width::W4> for Complex<f64> {
 78 |     type Vector = Shim4<<Self as Scalar<Neon, width::W1>>::Vector, Complex<f64>>;
 79 | }
 80 | 
 81 | impl Scalar<Neon, width::W8> for Complex<f64> {
 82 |     type Vector = Shim8<<Self as Scalar<Neon, width::W1>>::Vector, Complex<f64>>;
 83 | }
 84 | 
 85 | arithmetic_ops! {
 86 |     feature: Neon::new_unchecked(),
 87 |     for cf32x1:
 88 |         add -> (vadd_f32),
 89 |         sub -> (vsub_f32),
 90 |         mul -> (),
 91 |         div -> ()
 92 | }
 93 | 
 94 | arithmetic_ops! {
 95 |     feature: Neon::new_unchecked(),
 96 |     for cf32x2:
 97 |         add -> (vaddq_f32),
 98 |         sub -> (vsubq_f32),
 99 |         mul -> (),
100 |         div -> ()
101 | }
102 | 
103 | #[cfg(target_arch = "aarch64")]
104 | arithmetic_ops! {
105 |     feature: Neon::new_unchecked(),
106 |     for cf64x1:
107 |         add -> (vaddq_f64),
108 |         sub -> (vsubq_f64),
109 |         mul -> (),
110 |         div -> ()
111 | }
112 | 
113 | impl core::ops::Neg for cf32x1 {
114 |     type Output = Self;
115 | 
116 |     #[inline]
117 |     fn neg(mut self) -> Self {
118 |         for v in self.as_slice_mut() {
119 |             *v = -*v;
120 |         }
121 |         self
122 |     }
123 | }
124 | 
125 | impl core::ops::Neg for cf32x2 {
126 |     type Output = Self;
127 | 
128 |     #[inline]
129 |     fn neg(mut self) -> Self {
130 |         for v in self.as_slice_mut() {
131 |             *v = -*v;
132 |         }
133 |         self
134 |     }
135 | }
136 | 
137 | #[cfg(target_arch = "aarch64")]
138 | impl core::ops::Neg for cf64x1 {
139 |     type Output = Self;
140 | 
141 |     #[inline]
142 |     fn neg(mut self) -> Self {
143 |         for v in self.as_slice_mut() {
144 |             *v = -*v;
145 |         }
146 |         self
147 |     }
148 | }
149 | 
150 | as_slice! { cf32x1 }
151 | as_slice! { cf32x2 }
152 | #[cfg(target_arch = "aarch64")]
153 | as_slice! { cf64x1 }
154 | 
155 | unsafe impl Vector for cf32x1 {
156 |     type Scalar = Complex<f32>;
157 | 
158 |     type Token = Neon;
159 | 
160 |     type Width = crate::vector::width::W1;
161 | 
162 |     type Underlying = float32x2_t;
163 | 
164 |     #[inline]
165 |     fn zeroed(_: Self::Token) -> Self {
166 |         // TODO use vdup
167 |         Self(unsafe { core::mem::zeroed() })
168 |     }
169 | 
170 |     #[inline]
171 |     fn splat(_: Self::Token, from: Self::Scalar) -> Self {
172 |         // TODO use vdup
173 |         let mut v: Self = unsafe { core::mem::zeroed() };
174 |         v[0] = from;
175 |         v
176 |     }
177 | }
178 | 
179 | unsafe impl Vector for cf32x2 {
180 |     type Scalar = Complex<f32>;
181 | 
182 |     type Token = Neon;
183 | 
184 |     type Width = crate::vector::width::W2;
185 | 
186 |     type Underlying = float32x4_t;
187 | 
188 |     #[inline]
189 |     fn zeroed(_: Self::Token) -> Self {
190 |         // TODO use vdup
191 |         Self(unsafe { core::mem::zeroed() })
192 |     }
193 | 
194 |     #[inline]
195 |     fn splat(_: Self::Token, from: Self::Scalar) -> Self {
196 |         // TODO use vdup
197 |         let mut v: Self = unsafe { core::mem::zeroed() };
198 |         v[0] = from;
199 |         v[1] = from;
200 |         v
201 |     }
202 | }
203 | 
204 | #[cfg(target_arch = "aarch64")]
205 | unsafe impl Vector for cf64x1 {
206 |     type Scalar = Complex<f64>;
207 | 
208 |     type Token = Neon;
209 | 
210 |     type Width = crate::vector::width::W1;
211 | 
212 |     type Underlying = float64x2_t;
213 | 
214 |     #[inline]
215 |     fn zeroed(_: Self::Token) -> Self {
216 |         // TODO use vdup
217 |         Self(unsafe { core::mem::zeroed() })
218 |     }
219 | 
220 |     #[inline]
221 |     fn splat(_: Self::Token, from: Self::Scalar) -> Self {
222 |         // TODO use vdup
223 |         let mut v: Self = unsafe { core::mem::zeroed() };
224 |         v[0] = from;
225 |         v
226 |     }
227 | }
228 | 
229 | impl crate::vector::Complex for cf32x1 {
230 |     type RealScalar = f32;
231 | 
232 |     #[inline]
233 |     fn conj(mut self) -> Self {
234 |         for v in self.as_slice_mut() {
235 |             *v = v.conj();
236 |         }
237 |         self
238 |     }
239 | 
240 |     #[inline]
241 |     fn mul_i(mut self) -> Self {
242 |         for v in self.as_slice_mut() {
243 |             *v = Complex::new(-v.im, v.re);
244 |         }
245 |         self
246 |     }
247 | 
248 |     #[inline]
249 |     fn mul_neg_i(mut self) -> Self {
250 |         for v in self.as_slice_mut() {
251 |             *v = Complex::new(v.im, -v.re);
252 |         }
253 |         self
254 |     }
255 | }
256 | 
257 | impl crate::vector::Complex for cf32x2 {
258 |     type RealScalar = f32;
259 | 
260 |     #[inline]
261 |     fn conj(mut self) -> Self {
262 |         for v in self.as_slice_mut() {
263 |             *v = v.conj();
264 |         }
265 |         self
266 |     }
267 | 
268 |     #[inline]
269 |     fn mul_i(mut self) -> Self {
270 |         for v in self.as_slice_mut() {
271 |             *v = Complex::new(-v.im, v.re);
272 |         }
273 |         self
274 |     }
275 | 
276 |     #[inline]
277 |     fn mul_neg_i(mut self) -> Self {
278 |         for v in self.as_slice_mut() {
279 |             *v = Complex::new(v.im, -v.re);
280 |         }
281 |         self
282 |     }
283 | }
284 | 
285 | #[cfg(target_arch = "aarch64")]
286 | impl crate::vector::Complex for cf64x1 {
287 |     type RealScalar = f32;
288 | 
289 |     #[inline]
290 |     fn conj(mut self) -> Self {
291 |         for v in self.as_slice_mut() {
292 |             *v = v.conj();
293 |         }
294 |         self
295 |     }
296 | 
297 |     #[inline]
298 |     fn mul_i(mut self) -> Self {
299 |         for v in self.as_slice_mut() {
300 |             *v = Complex::new(-v.im, v.re);
301 |         }
302 |         self
303 |     }
304 | 
305 |     #[inline]
306 |     fn mul_neg_i(mut self) -> Self {
307 |         for v in self.as_slice_mut() {
308 |             *v = Complex::new(v.im, -v.re);
309 |         }
310 |         self
311 |     }
312 | }
313 | 


--------------------------------------------------------------------------------
/generic-simd/src/arch/arm/mod.rs:
--------------------------------------------------------------------------------
  1 | //! arm/aarch64 vector types.
  2 | 
  3 | #[cfg(feature = "complex")]
  4 | mod complex;
  5 | #[cfg(feature = "complex")]
  6 | pub use complex::*;
  7 | 
  8 | use crate::{
  9 |     arch::{generic, Token},
 10 |     scalar::Scalar,
 11 |     shim::{Shim2, Shim4, ShimToken},
 12 |     vector::{width, Native, Vector},
 13 | };
 14 | 
 15 | #[cfg(target_arch = "aarch64")]
 16 | use core::arch::aarch64::*;
 17 | #[cfg(target_arch = "arm")]
 18 | use core::arch::arm::*;
 19 | 
 20 | /// NEON instruction set token.
 21 | #[derive(Copy, Clone, Debug)]
 22 | pub struct Neon(());
 23 | 
 24 | impl_token! { Neon => "neon" }
 25 | 
 26 | impl Native<Neon> for f32 {
 27 |     type Width = width::W4;
 28 | }
 29 | 
 30 | impl Native<Neon> for f64 {
 31 |     type Width = width::W2;
 32 | }
 33 | 
 34 | /// A NEON vector of 2 `f32`s.
 35 | #[derive(Clone, Copy, Debug)]
 36 | #[repr(transparent)]
 37 | #[allow(non_camel_case_types)]
 38 | pub struct f32x2(float32x2_t);
 39 | 
 40 | /// A NEON vector of 4 `f32`s.
 41 | #[derive(Clone, Copy, Debug)]
 42 | #[repr(transparent)]
 43 | #[allow(non_camel_case_types)]
 44 | pub struct f32x4(float32x4_t);
 45 | 
 46 | /// A NEON vector of 2 `f64`s.
 47 | #[cfg(target_arch = "aarch64")]
 48 | #[derive(Clone, Copy, Debug)]
 49 | #[repr(transparent)]
 50 | #[allow(non_camel_case_types)]
 51 | pub struct f64x2(float64x2_t);
 52 | 
 53 | impl Scalar<Neon, width::W1> for f32 {
 54 |     type Vector = ShimToken<generic::f32x1, Self, Neon>;
 55 | }
 56 | 
 57 | impl Scalar<Neon, width::W2> for f32 {
 58 |     type Vector = f32x2;
 59 | }
 60 | 
 61 | impl Scalar<Neon, width::W4> for f32 {
 62 |     type Vector = f32x4;
 63 | }
 64 | 
 65 | impl Scalar<Neon, width::W8> for f32 {
 66 |     type Vector = Shim2<f32x4, Self>;
 67 | }
 68 | 
 69 | impl Scalar<Neon, width::W1> for f64 {
 70 |     type Vector = ShimToken<generic::f64x1, Self, Neon>;
 71 | }
 72 | 
 73 | #[cfg(target_arch = "arm")]
 74 | impl Scalar<Neon, width::W2> for f64 {
 75 |     type Vector = Shim2<ShimToken<generic::f64x1, Self, Neon>, Self>;
 76 | }
 77 | 
 78 | #[cfg(target_arch = "aarch64")]
 79 | impl Scalar<Neon, width::W2> for f64 {
 80 |     type Vector = f64x2;
 81 | }
 82 | 
 83 | impl Scalar<Neon, width::W4> for f64 {
 84 |     type Vector = Shim2<<Self as Scalar<Neon, width::W2>>::Vector, Self>;
 85 | }
 86 | 
 87 | impl Scalar<Neon, width::W8> for f64 {
 88 |     type Vector = Shim4<<Self as Scalar<Neon, width::W2>>::Vector, Self>;
 89 | }
 90 | 
 91 | arithmetic_ops! {
 92 |     feature: Neon::new_unchecked(),
 93 |     for f32x2:
 94 |         add -> (vadd_f32),
 95 |         sub -> (vsub_f32),
 96 |         mul -> (vmul_f32),
 97 |         div -> ()
 98 | }
 99 | 
100 | arithmetic_ops! {
101 |     feature: Neon::new_unchecked(),
102 |     for f32x4:
103 |         add -> (vaddq_f32),
104 |         sub -> (vsubq_f32),
105 |         mul -> (vmulq_f32),
106 |         div -> ()
107 | }
108 | 
109 | #[cfg(target_arch = "aarch64")]
110 | arithmetic_ops! {
111 |     feature: Neon::new_unchecked(),
112 |     for f64x2:
113 |         add -> (vaddq_f64),
114 |         sub -> (vsubq_f64),
115 |         mul -> (vmulq_f64),
116 |         div -> ()
117 | }
118 | 
119 | impl core::ops::Neg for f32x2 {
120 |     type Output = Self;
121 | 
122 |     #[inline]
123 |     fn neg(mut self) -> Self {
124 |         for v in self.as_slice_mut() {
125 |             *v = -*v;
126 |         }
127 |         self
128 |     }
129 | }
130 | 
131 | impl core::ops::Neg for f32x4 {
132 |     type Output = Self;
133 | 
134 |     #[inline]
135 |     fn neg(mut self) -> Self {
136 |         for v in self.as_slice_mut() {
137 |             *v = -*v;
138 |         }
139 |         self
140 |     }
141 | }
142 | 
143 | #[cfg(target_arch = "aarch64")]
144 | impl core::ops::Neg for f64x2 {
145 |     type Output = Self;
146 | 
147 |     #[inline]
148 |     fn neg(mut self) -> Self {
149 |         for v in self.as_slice_mut() {
150 |             *v = -*v;
151 |         }
152 |         self
153 |     }
154 | }
155 | 
156 | as_slice! { f32x2 }
157 | as_slice! { f32x4 }
158 | 
159 | #[cfg(target_arch = "aarch64")]
160 | as_slice! { f64x2 }
161 | 
162 | unsafe impl Vector for f32x2 {
163 |     type Scalar = f32;
164 | 
165 |     type Token = Neon;
166 | 
167 |     type Width = crate::vector::width::W2;
168 | 
169 |     type Underlying = float32x2_t;
170 | 
171 |     #[inline]
172 |     fn zeroed(_: Self::Token) -> Self {
173 |         // TODO use vdup
174 |         Self(unsafe { core::mem::zeroed() })
175 |     }
176 | 
177 |     #[inline]
178 |     fn splat(_: Self::Token, from: Self::Scalar) -> Self {
179 |         // TODO use vdup
180 |         let mut v: Self = unsafe { core::mem::zeroed() };
181 |         v[0] = from;
182 |         v[1] = from;
183 |         v
184 |     }
185 | }
186 | 
187 | unsafe impl Vector for f32x4 {
188 |     type Scalar = f32;
189 | 
190 |     type Token = Neon;
191 | 
192 |     type Width = crate::vector::width::W4;
193 | 
194 |     type Underlying = float32x4_t;
195 | 
196 |     #[inline]
197 |     fn zeroed(_: Self::Token) -> Self {
198 |         // TODO use vdup
199 |         Self(unsafe { core::mem::zeroed() })
200 |     }
201 | 
202 |     #[inline]
203 |     fn splat(_: Self::Token, from: Self::Scalar) -> Self {
204 |         // TODO use vdup
205 |         let mut v: Self = unsafe { core::mem::zeroed() };
206 |         v[0] = from;
207 |         v[1] = from;
208 |         v[2] = from;
209 |         v[3] = from;
210 |         v
211 |     }
212 | }
213 | 
214 | #[cfg(target_arch = "aarch64")]
215 | unsafe impl Vector for f64x2 {
216 |     type Scalar = f64;
217 | 
218 |     type Token = Neon;
219 | 
220 |     type Width = crate::vector::width::W2;
221 | 
222 |     type Underlying = float64x2_t;
223 | 
224 |     #[inline]
225 |     fn zeroed(_: Self::Token) -> Self {
226 |         // TODO use vdup
227 |         Self(unsafe { core::mem::zeroed() })
228 |     }
229 | 
230 |     #[inline]
231 |     fn splat(_: Self::Token, from: Self::Scalar) -> Self {
232 |         // TODO use vdup
233 |         let mut v: Self = unsafe { core::mem::zeroed() };
234 |         v[0] = from;
235 |         v[1] = from;
236 |         v
237 |     }
238 | }
239 | 


--------------------------------------------------------------------------------
/generic-simd/src/arch/generic.rs:
--------------------------------------------------------------------------------
  1 | //! Generic vector types for any platform.
  2 | 
  3 | use crate::{
  4 |     arch::Token,
  5 |     scalar::Scalar,
  6 |     shim::{Shim2, Shim4, Shim8},
  7 |     vector::{width, Native, Vector},
  8 | };
  9 | 
 10 | #[cfg(feature = "complex")]
 11 | use num_complex::Complex;
 12 | 
 13 | /// Generic instruction set token.
 14 | #[derive(Copy, Clone, Debug)]
 15 | pub struct Generic;
 16 | 
 17 | unsafe impl Token for Generic {
 18 |     #[inline]
 19 |     fn new() -> Option<Self> {
 20 |         Some(Self)
 21 |     }
 22 | 
 23 |     #[inline]
 24 |     unsafe fn new_unchecked() -> Self {
 25 |         Self
 26 |     }
 27 | }
 28 | 
 29 | /// A generic vector of one `f32`.
 30 | #[derive(Clone, Copy, Debug)]
 31 | #[repr(transparent)]
 32 | #[allow(non_camel_case_types)]
 33 | pub struct f32x1(f32);
 34 | 
 35 | /// A generic vector of one `f64`.
 36 | #[derive(Clone, Copy, Debug)]
 37 | #[repr(transparent)]
 38 | #[allow(non_camel_case_types)]
 39 | pub struct f64x1(f64);
 40 | 
 41 | /// A generic vector of one `Complex<f32>`.
 42 | ///
 43 | /// Requires feature `"complex"`.
 44 | #[cfg(feature = "complex")]
 45 | #[derive(Clone, Copy, Debug)]
 46 | #[repr(transparent)]
 47 | #[allow(non_camel_case_types)]
 48 | pub struct cf32x1(Complex<f32>);
 49 | 
 50 | /// A generic vector of one `Complex<f64>`.
 51 | ///
 52 | /// Requires feature `"complex"`.
 53 | #[cfg(feature = "complex")]
 54 | #[derive(Clone, Copy, Debug)]
 55 | #[repr(transparent)]
 56 | #[allow(non_camel_case_types)]
 57 | pub struct cf64x1(Complex<f64>);
 58 | 
 59 | macro_rules! implement {
 60 |     {
 61 |         $vector:ty, $scalar:ty
 62 |     } => {
 63 |         impl Scalar<Generic, width::W1> for $scalar {
 64 |             type Vector = $vector;
 65 |         }
 66 | 
 67 |         impl Scalar<Generic, width::W2> for $scalar {
 68 |             type Vector = Shim2<$vector, $scalar>;
 69 |         }
 70 | 
 71 |         impl Scalar<Generic, width::W4> for $scalar {
 72 |             type Vector = Shim4<$vector, $scalar>;
 73 |         }
 74 | 
 75 |         impl Scalar<Generic, width::W8> for $scalar {
 76 |             type Vector = Shim8<$vector, $scalar>;
 77 |         }
 78 | 
 79 |         impl Native<Generic> for $scalar {
 80 |             type Width = width::W1;
 81 |         }
 82 |     }
 83 | }
 84 | 
 85 | implement! { f32x1, f32 }
 86 | implement! { f64x1, f64 }
 87 | 
 88 | #[cfg(feature = "complex")]
 89 | implement! { cf32x1, Complex<f32> }
 90 | #[cfg(feature = "complex")]
 91 | implement! { cf64x1, Complex<f64> }
 92 | 
 93 | macro_rules! implement {
 94 |     {
 95 |         $vector:ty, $scalar:ty
 96 |     } => {
 97 |         arithmetic_ops! {
 98 |             feature: Generic::new_unchecked(),
 99 |             for $vector:
100 |                 add -> (),
101 |                 sub -> (),
102 |                 mul -> (),
103 |                 div -> ()
104 |         }
105 | 
106 |         impl core::ops::Neg for $vector {
107 |             type Output = Self;
108 | 
109 |             #[inline]
110 |             fn neg(self) -> Self {
111 |                 Self(-self.0)
112 |             }
113 |         }
114 | 
115 |         as_slice! { $vector }
116 | 
117 |         unsafe impl Vector for $vector {
118 |             type Scalar = $scalar;
119 | 
120 |             type Token = Generic;
121 | 
122 |             type Width = crate::vector::width::W1;
123 | 
124 |             type Underlying = $scalar;
125 | 
126 |             #[inline]
127 |             fn zeroed(_: Self::Token) -> Self {
128 |                 Self(<$scalar>::default())
129 |             }
130 | 
131 |             #[inline]
132 |             fn splat(_: Self::Token, from: Self::Scalar) -> Self {
133 |                 Self(from)
134 |             }
135 |         }
136 |     }
137 | }
138 | 
139 | implement! { f32x1, f32 }
140 | implement! { f64x1, f64 }
141 | 
142 | #[cfg(feature = "complex")]
143 | implement! { cf32x1, Complex<f32> }
144 | #[cfg(feature = "complex")]
145 | implement! { cf64x1, Complex<f64> }
146 | 
147 | #[cfg(feature = "complex")]
148 | macro_rules! implement_complex {
149 |     {
150 |         $vector:ty, $real:ty
151 |     } => {
152 |         impl crate::vector::Complex for $vector {
153 |             type RealScalar = $real;
154 | 
155 |             #[inline]
156 |             fn conj(self) -> Self {
157 |                 Self(Complex::new(self.0.re, -self.0.im))
158 |             }
159 | 
160 |             #[inline]
161 |             fn mul_i(self) -> Self {
162 |                 Self(Complex::new(-self.0.im, self.0.re))
163 |             }
164 | 
165 |             #[inline]
166 |             fn mul_neg_i(self) -> Self {
167 |                 Self(Complex::new(self.0.im, -self.0.re))
168 |             }
169 |         }
170 |     }
171 | }
172 | 
173 | #[cfg(feature = "complex")]
174 | implement_complex! { cf32x1, f32 }
175 | #[cfg(feature = "complex")]
176 | implement_complex! { cf64x1, f64 }
177 | 


--------------------------------------------------------------------------------
/generic-simd/src/arch/mod.rs:
--------------------------------------------------------------------------------
  1 | //! Architecture-specific types.
  2 | 
  3 | /// Indicates support for a particular CPU feature.
  4 | ///
  5 | /// # Safety
  6 | /// Implementing `Token` for a type indicates that the type is only constructible when the
  7 | /// associated CPU features are supported.
  8 | pub unsafe trait Token: Copy + From<Self> + Into<Self> {
  9 |     /// Detects whether the required CPU features are supported.
 10 |     fn new() -> Option<Self>;
 11 | 
 12 |     /// Creates the token without detecting if the CPU features are supported.
 13 |     ///
 14 |     /// # Safety
 15 |     /// Calling this function causes undefined behavior if the required CPU features are not
 16 |     /// supported.
 17 |     unsafe fn new_unchecked() -> Self;
 18 | }
 19 | 
 20 | #[allow(unused_macros)]
 21 | macro_rules! impl_token {
 22 |     { $name:ident => $($features:tt),+ } => {
 23 |         unsafe impl $crate::arch::Token for $name {
 24 |             #[inline]
 25 |             fn new() -> Option<Self> {
 26 |                 if multiversion::are_cpu_features_detected!($($features),*) {
 27 |                     Some(Self(()))
 28 |                 } else {
 29 |                     None
 30 |                 }
 31 |             }
 32 | 
 33 |             #[inline]
 34 |             unsafe fn new_unchecked() -> Self {
 35 |                 Self(())
 36 |             }
 37 |         }
 38 | 
 39 |         impl core::convert::From<$name> for $crate::arch::generic::Generic {
 40 |             #[inline]
 41 |             fn from(_: $name) -> Self {
 42 |                 Self
 43 |             }
 44 |         }
 45 |     }
 46 | }
 47 | 
 48 | pub mod generic;
 49 | 
 50 | #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
 51 | pub mod x86;
 52 | 
 53 | #[cfg(all(feature = "nightly", target_arch = "aarch64"))]
 54 | pub mod arm;
 55 | 
 56 | #[cfg(all(
 57 |     target_arch = "wasm32",
 58 |     target_feature = "simd128",
 59 |     feature = "nightly",
 60 | ))]
 61 | pub mod wasm;
 62 | 
 63 | /// Invokes a macro with the supported token types.
 64 | ///
 65 | /// Invokes the macro with the list of [`Token`] types as arguments in priority order, delimited
 66 | /// by commas (including a trailing comma).
 67 | ///
 68 | /// The following example creates a `SupportedScalar` supertrait that implements [`ScalarExt`] for
 69 | /// each token:
 70 | /// ```
 71 | /// use generic_simd::{call_macro_with_tokens, scalar::ScalarExt};
 72 | ///
 73 | /// macro_rules! supported_scalars {
 74 | ///     { $($token:ty,)+ } => {
 75 | ///         trait SupportedScalar: Copy $(+ ScalarExt<$token>)* {}
 76 | ///     }
 77 | /// }
 78 | ///
 79 | /// call_macro_with_tokens!{ supported_scalars }
 80 | /// ```
 81 | ///
 82 | /// [`Token`]: arch/trait.Token.html
 83 | /// [`ScalarExt`]: scalar/trait.ScalarExt.html
 84 | #[macro_export]
 85 | macro_rules! call_macro_with_tokens {
 86 |     { $mac:ident } => { $crate::call_macro_with_tokens_impl! { $mac } }
 87 | }
 88 | 
 89 | #[cfg(not(any(
 90 |     target_arch = "x86",
 91 |     target_arch = "x86_64",
 92 |     all(target_arch = "aarch64", feature = "nightly"),
 93 |     all(
 94 |         target_arch = "wasm32",
 95 |         target_feature = "simd128",
 96 |         feature = "nightly",
 97 |     ),
 98 | )))]
 99 | #[doc(hidden)]
100 | #[macro_export]
101 | macro_rules! call_macro_with_tokens_impl {
102 |     { $mac:ident } => {
103 |         $mac! {
104 |             $crate::arch::generic::Generic,
105 |         }
106 |     }
107 | }
108 | 
109 | #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
110 | #[doc(hidden)]
111 | #[macro_export]
112 | macro_rules! call_macro_with_tokens_impl {
113 |     { $mac:ident } => {
114 |         $mac! {
115 |             $crate::arch::x86::Avx,
116 |             $crate::arch::x86::Sse,
117 |             $crate::arch::generic::Generic,
118 |         }
119 |     }
120 | }
121 | 
122 | #[cfg(all(feature = "nightly", target_arch = "aarch64"))]
123 | #[doc(hidden)]
124 | #[macro_export]
125 | macro_rules! call_macro_with_tokens_impl {
126 |     { $mac:ident } => {
127 |         $mac! {
128 |             $crate::arch::arm::Neon,
129 |             $crate::arch::generic::Generic,
130 |         }
131 |     }
132 | }
133 | 
134 | #[cfg(all(
135 |     target_arch = "wasm32",
136 |     target_feature = "simd128",
137 |     feature = "nightly",
138 | ))]
139 | #[doc(hidden)]
140 | #[macro_export]
141 | macro_rules! call_macro_with_tokens_impl {
142 |     { $mac:ident } => {
143 |         $mac! {
144 |             $crate::arch::wasm::Simd128,
145 |             $crate::arch::generic::Generic,
146 |         }
147 |     }
148 | }
149 | 


--------------------------------------------------------------------------------
/generic-simd/src/arch/wasm/complex.rs:
--------------------------------------------------------------------------------
  1 | use core::arch::wasm32::*;
  2 | 
  3 | use crate::{
  4 |     arch::{generic, wasm::*, Token},
  5 |     scalar::Scalar,
  6 |     shim::{Shim2, Shim4, Shim8, ShimToken},
  7 |     vector::{width, Native, Vector},
  8 | };
  9 | use num_complex::Complex;
 10 | 
 11 | impl Native<Simd128> for Complex<f32> {
 12 |     type Width = width::W2;
 13 | }
 14 | 
 15 | impl Native<Simd128> for Complex<f64> {
 16 |     type Width = width::W1;
 17 | }
 18 | 
 19 | /// A simd128 vector of `Complex<f32>`s.
 20 | ///
 21 | /// Requires feature `"complex"`.
 22 | #[derive(Clone, Copy, Debug)]
 23 | #[repr(transparent)]
 24 | #[allow(non_camel_case_types)]
 25 | pub struct cf32x2(v128);
 26 | 
 27 | /// A simd128 vector of `Complex<f64>`s.
 28 | ///
 29 | /// Requires feature `"complex"`.
 30 | #[derive(Clone, Copy, Debug)]
 31 | #[repr(transparent)]
 32 | #[allow(non_camel_case_types)]
 33 | pub struct cf64x1(v128);
 34 | 
 35 | impl Scalar<Simd128, width::W1> for Complex<f32> {
 36 |     type Vector = ShimToken<generic::cf32x1, Self, Simd128>;
 37 | }
 38 | 
 39 | impl Scalar<Simd128, width::W2> for Complex<f32> {
 40 |     type Vector = cf32x2;
 41 | }
 42 | 
 43 | impl Scalar<Simd128, width::W4> for Complex<f32> {
 44 |     type Vector = Shim2<cf32x2, Complex<f32>>;
 45 | }
 46 | 
 47 | impl Scalar<Simd128, width::W8> for Complex<f32> {
 48 |     type Vector = Shim4<cf32x2, Complex<f32>>;
 49 | }
 50 | 
 51 | impl Scalar<Simd128, width::W1> for Complex<f64> {
 52 |     type Vector = cf64x1;
 53 | }
 54 | 
 55 | impl Scalar<Simd128, width::W2> for Complex<f64> {
 56 |     type Vector = Shim2<cf64x1, Complex<f64>>;
 57 | }
 58 | 
 59 | impl Scalar<Simd128, width::W4> for Complex<f64> {
 60 |     type Vector = Shim4<cf64x1, Complex<f64>>;
 61 | }
 62 | 
 63 | impl Scalar<Simd128, width::W8> for Complex<f64> {
 64 |     type Vector = Shim8<cf64x1, Complex<f64>>;
 65 | }
 66 | 
 67 | as_slice! { cf32x2 }
 68 | as_slice! { cf64x1 }
 69 | 
 70 | unsafe impl Vector for cf32x2 {
 71 |     type Scalar = Complex<f32>;
 72 |     type Token = Simd128;
 73 |     type Width = width::W2;
 74 |     type Underlying = v128;
 75 | 
 76 |     #[inline]
 77 |     fn zeroed(_: Self::Token) -> Self {
 78 |         Self(unsafe { f32x4_splat(0.) })
 79 |     }
 80 | 
 81 |     #[inline]
 82 |     fn splat(_: Self::Token, value: Self::Scalar) -> Self {
 83 |         Self(unsafe { f32x4_const(value.re, value.im, value.re, value.im) })
 84 |     }
 85 | }
 86 | 
 87 | unsafe impl Vector for cf64x1 {
 88 |     type Scalar = Complex<f64>;
 89 |     type Token = Simd128;
 90 |     type Width = width::W1;
 91 |     type Underlying = v128;
 92 | 
 93 |     #[inline]
 94 |     fn zeroed(_: Self::Token) -> Self {
 95 |         Self(unsafe { f64x2_splat(0.) })
 96 |     }
 97 | 
 98 |     #[inline]
 99 |     fn splat(_: Self::Token, value: Self::Scalar) -> Self {
100 |         Self(unsafe { f64x2_const(value.re, value.im) })
101 |     }
102 | }
103 | 
104 | arithmetic_ops! {
105 |     feature: Simd128::new_unchecked(),
106 |     for cf32x2:
107 |         add -> (f32x4_add),
108 |         sub -> (f32x4_sub),
109 |         mul -> (cf32x2_mul),
110 |         div -> (cf32x2_div)
111 | }
112 | 
113 | arithmetic_ops! {
114 |     feature: Simd128::new_unchecked(),
115 |     for cf64x1:
116 |         add -> (f64x2_add),
117 |         sub -> (f64x2_sub),
118 |         mul -> (cf64x1_mul),
119 |         div -> (cf64x1_div)
120 | }
121 | 
122 | #[target_feature(enable = "simd128")]
123 | #[inline]
124 | unsafe fn f32x4_ldup(x: v128) -> v128 {
125 |     v32x4_shuffle::<0, 0, 2, 2>(x, x)
126 | }
127 | 
128 | #[target_feature(enable = "simd128")]
129 | #[inline]
130 | unsafe fn f32x4_hdup(x: v128) -> v128 {
131 |     v32x4_shuffle::<1, 1, 3, 3>(x, x)
132 | }
133 | 
134 | #[target_feature(enable = "simd128")]
135 | #[inline]
136 | unsafe fn f64x2_ldup(x: v128) -> v128 {
137 |     v64x2_shuffle::<0, 0>(x, x)
138 | }
139 | 
140 | #[target_feature(enable = "simd128")]
141 | #[inline]
142 | unsafe fn f64x2_hdup(x: v128) -> v128 {
143 |     v64x2_shuffle::<1, 1>(x, x)
144 | }
145 | 
146 | #[target_feature(enable = "simd128")]
147 | #[inline]
148 | unsafe fn f32x4_addsub(a: v128, b: v128) -> v128 {
149 |     let add = f32x4_add(a, b);
150 |     let sub = f32x4_sub(a, b);
151 |     v32x4_shuffle::<0, 5, 2, 7>(sub, add)
152 | }
153 | 
154 | #[target_feature(enable = "simd128")]
155 | #[inline]
156 | unsafe fn f64x2_addsub(a: v128, b: v128) -> v128 {
157 |     let add = f64x2_add(a, b);
158 |     let sub = f64x2_sub(a, b);
159 |     v64x2_shuffle::<0, 3>(sub, add)
160 | }
161 | 
162 | #[target_feature(enable = "simd128")]
163 | #[inline]
164 | unsafe fn cf32x2_mul(a: v128, b: v128) -> v128 {
165 |     let re = f32x4_ldup(a);
166 |     let im = f32x4_hdup(a);
167 |     let sh = v32x4_shuffle::<1, 0, 3, 2>(b, b);
168 |     f32x4_addsub(f32x4_mul(re, b), f32x4_mul(im, sh))
169 | }
170 | 
171 | #[target_feature(enable = "simd128")]
172 | #[inline]
173 | unsafe fn cf64x1_mul(a: v128, b: v128) -> v128 {
174 |     let re = f64x2_ldup(a);
175 |     let im = f64x2_hdup(a);
176 |     let sh = v64x2_shuffle::<1, 0>(b, b);
177 |     f64x2_addsub(f64x2_mul(re, b), f64x2_mul(im, sh))
178 | }
179 | 
180 | #[target_feature(enable = "simd128")]
181 | #[inline]
182 | unsafe fn cf32x2_div(a: v128, b: v128) -> v128 {
183 |     let b_re = f32x4_ldup(b);
184 |     let b_im = f32x4_hdup(b);
185 |     let a_flip = v32x4_shuffle::<1, 0, 3, 2>(a, a);
186 |     let norm_sqr = f32x4_add(f32x4_mul(b_re, b_re), f32x4_mul(b_im, b_im));
187 |     f32x4_div(
188 |         f32x4_addsub(f32x4_mul(a, b_re), f32x4_neg(f32x4_mul(a_flip, b_im))),
189 |         norm_sqr,
190 |     )
191 | }
192 | 
193 | #[target_feature(enable = "simd128")]
194 | #[inline]
195 | unsafe fn cf64x1_div(a: v128, b: v128) -> v128 {
196 |     let b_re = f64x2_ldup(b);
197 |     let b_im = f64x2_hdup(b);
198 |     let a_flip = v64x2_shuffle::<1, 0>(a, a);
199 |     let norm_sqr = f64x2_add(f64x2_mul(b_re, b_re), f64x2_mul(b_im, b_im));
200 |     f64x2_div(
201 |         f64x2_addsub(f64x2_mul(a, b_re), f64x2_neg(f64x2_mul(a_flip, b_im))),
202 |         norm_sqr,
203 |     )
204 | }
205 | 
206 | impl core::ops::Neg for cf32x2 {
207 |     type Output = Self;
208 | 
209 |     #[inline]
210 |     fn neg(self) -> Self {
211 |         Self(unsafe { f32x4_neg(self.0) })
212 |     }
213 | }
214 | 
215 | impl core::ops::Neg for cf64x1 {
216 |     type Output = Self;
217 | 
218 |     #[inline]
219 |     fn neg(self) -> Self {
220 |         Self(unsafe { f64x2_neg(self.0) })
221 |     }
222 | }
223 | 


--------------------------------------------------------------------------------
/generic-simd/src/arch/wasm/mod.rs:
--------------------------------------------------------------------------------
  1 | //! WebAssembly vector types.
  2 | 
  3 | #[cfg(feature = "complex")]
  4 | mod complex;
  5 | #[cfg(feature = "complex")]
  6 | pub use complex::*;
  7 | 
  8 | use crate::{
  9 |     arch::{generic, Token},
 10 |     scalar::Scalar,
 11 |     shim::{Shim2, Shim4, ShimToken},
 12 |     vector::{width, Native, Vector},
 13 | };
 14 | use core::arch::wasm32::*;
 15 | 
 16 | /// simd128 instruction set token.
 17 | #[derive(Copy, Clone, Debug)]
 18 | pub struct Simd128(());
 19 | 
 20 | impl_token! { Simd128 => "simd128" }
 21 | 
 22 | impl Native<Simd128> for f32 {
 23 |     type Width = width::W4;
 24 | }
 25 | 
 26 | impl Native<Simd128> for f64 {
 27 |     type Width = width::W2;
 28 | }
 29 | 
 30 | /// A simd128 vector of `f32`s.
 31 | #[derive(Clone, Copy, Debug)]
 32 | #[repr(transparent)]
 33 | #[allow(non_camel_case_types)]
 34 | pub struct f32x4(v128);
 35 | 
 36 | /// A simd128 vector of `f64`s.
 37 | #[derive(Clone, Copy, Debug)]
 38 | #[repr(transparent)]
 39 | #[allow(non_camel_case_types)]
 40 | pub struct f64x2(v128);
 41 | 
 42 | impl Scalar<Simd128, width::W1> for f32 {
 43 |     type Vector = ShimToken<generic::f32x1, Self, Simd128>;
 44 | }
 45 | 
 46 | impl Scalar<Simd128, width::W2> for f32 {
 47 |     type Vector = ShimToken<Shim2<generic::f32x1, Self>, Self, Simd128>;
 48 | }
 49 | 
 50 | impl Scalar<Simd128, width::W4> for f32 {
 51 |     type Vector = f32x4;
 52 | }
 53 | 
 54 | impl Scalar<Simd128, width::W8> for f32 {
 55 |     type Vector = Shim2<f32x4, f32>;
 56 | }
 57 | 
 58 | impl Scalar<Simd128, width::W1> for f64 {
 59 |     type Vector = ShimToken<generic::f64x1, Self, Simd128>;
 60 | }
 61 | 
 62 | impl Scalar<Simd128, width::W2> for f64 {
 63 |     type Vector = f64x2;
 64 | }
 65 | 
 66 | impl Scalar<Simd128, width::W4> for f64 {
 67 |     type Vector = Shim2<f64x2, f64>;
 68 | }
 69 | 
 70 | impl Scalar<Simd128, width::W8> for f64 {
 71 |     type Vector = Shim4<f64x2, f64>;
 72 | }
 73 | 
 74 | as_slice! { f32x4 }
 75 | as_slice! { f64x2 }
 76 | 
 77 | unsafe impl Vector for f32x4 {
 78 |     type Scalar = f32;
 79 |     type Token = Simd128;
 80 |     type Width = width::W4;
 81 |     type Underlying = v128;
 82 | 
 83 |     #[inline]
 84 |     fn zeroed(_: Self::Token) -> Self {
 85 |         Self(unsafe { f32x4_splat(0.) })
 86 |     }
 87 | 
 88 |     #[inline]
 89 |     fn splat(_: Self::Token, value: Self::Scalar) -> Self {
 90 |         Self(unsafe { f32x4_splat(value) })
 91 |     }
 92 | }
 93 | 
 94 | unsafe impl Vector for f64x2 {
 95 |     type Scalar = f64;
 96 |     type Token = Simd128;
 97 |     type Width = width::W2;
 98 |     type Underlying = v128;
 99 | 
100 |     #[inline]
101 |     fn zeroed(_: Self::Token) -> Self {
102 |         Self(unsafe { f64x2_splat(0.) })
103 |     }
104 | 
105 |     #[inline]
106 |     fn splat(_: Self::Token, value: Self::Scalar) -> Self {
107 |         Self(unsafe { f64x2_splat(value) })
108 |     }
109 | }
110 | 
111 | arithmetic_ops! {
112 |     feature: Simd128::new_unchecked(),
113 |     for f32x4:
114 |         add -> (f32x4_add),
115 |         sub -> (f32x4_sub),
116 |         mul -> (f32x4_mul),
117 |         div -> (f32x4_div)
118 | }
119 | 
120 | arithmetic_ops! {
121 |     feature: Simd128::new_unchecked(),
122 |     for f64x2:
123 |         add -> (f64x2_add),
124 |         sub -> (f64x2_sub),
125 |         mul -> (f64x2_mul),
126 |         div -> (f64x2_div)
127 | }
128 | 
129 | impl core::ops::Neg for f32x4 {
130 |     type Output = Self;
131 | 
132 |     #[inline]
133 |     fn neg(self) -> Self {
134 |         Self(unsafe { f32x4_neg(self.0) })
135 |     }
136 | }
137 | 
138 | impl core::ops::Neg for f64x2 {
139 |     type Output = Self;
140 | 
141 |     #[inline]
142 |     fn neg(self) -> Self {
143 |         Self(unsafe { f64x2_neg(self.0) })
144 |     }
145 | }
146 | 


--------------------------------------------------------------------------------
/generic-simd/src/arch/x86/complex.rs:
--------------------------------------------------------------------------------
  1 | #[cfg(target_arch = "x86")]
  2 | use core::arch::x86::*;
  3 | #[cfg(target_arch = "x86_64")]
  4 | use core::arch::x86_64::*;
  5 | 
  6 | use crate::{
  7 |     arch::{generic, x86::*, Token},
  8 |     scalar::Scalar,
  9 |     shim::{Shim2, Shim4, Shim8, ShimToken},
 10 |     vector::{width, Native, Vector},
 11 | };
 12 | use num_complex::Complex;
 13 | 
 14 | impl Native<Sse> for Complex<f32> {
 15 |     type Width = width::W2;
 16 | }
 17 | 
 18 | impl Native<Sse> for Complex<f64> {
 19 |     type Width = width::W1;
 20 | }
 21 | 
 22 | impl Native<Avx> for Complex<f32> {
 23 |     type Width = width::W4;
 24 | }
 25 | 
 26 | impl Native<Avx> for Complex<f64> {
 27 |     type Width = width::W2;
 28 | }
 29 | 
 30 | /// An SSE vector of `Complex<f32>`s.
 31 | ///
 32 | /// Requires feature `"complex"`.
 33 | #[derive(Clone, Copy, Debug)]
 34 | #[repr(transparent)]
 35 | #[allow(non_camel_case_types)]
 36 | pub struct cf32x2(__m128);
 37 | 
 38 | /// An SSE vector of `Complex<f64>`s.
 39 | ///
 40 | /// Requires feature `"complex"`.
 41 | #[derive(Clone, Copy, Debug)]
 42 | #[repr(transparent)]
 43 | #[allow(non_camel_case_types)]
 44 | pub struct cf64x1(__m128d);
 45 | 
 46 | /// An AVX vector of `Complex<f32>`s.
 47 | ///
 48 | /// Requires feature `"complex"`.
 49 | #[derive(Clone, Copy, Debug)]
 50 | #[repr(transparent)]
 51 | #[allow(non_camel_case_types)]
 52 | pub struct cf32x4(__m256);
 53 | 
 54 | /// An AVX vector of `Complex<f64>`s.
 55 | ///
 56 | /// Requires feature `"complex"`.
 57 | #[derive(Clone, Copy, Debug)]
 58 | #[repr(transparent)]
 59 | #[allow(non_camel_case_types)]
 60 | pub struct cf64x2(__m256d);
 61 | 
 62 | impl Scalar<Sse, width::W1> for Complex<f32> {
 63 |     type Vector = ShimToken<generic::cf32x1, Self, Sse>;
 64 | }
 65 | 
 66 | impl Scalar<Sse, width::W2> for Complex<f32> {
 67 |     type Vector = cf32x2;
 68 | }
 69 | 
 70 | impl Scalar<Sse, width::W4> for Complex<f32> {
 71 |     type Vector = Shim2<cf32x2, Complex<f32>>;
 72 | }
 73 | 
 74 | impl Scalar<Sse, width::W8> for Complex<f32> {
 75 |     type Vector = Shim4<cf32x2, Complex<f32>>;
 76 | }
 77 | 
 78 | impl Scalar<Sse, width::W1> for Complex<f64> {
 79 |     type Vector = cf64x1;
 80 | }
 81 | 
 82 | impl Scalar<Sse, width::W2> for Complex<f64> {
 83 |     type Vector = Shim2<cf64x1, Complex<f64>>;
 84 | }
 85 | 
 86 | impl Scalar<Sse, width::W4> for Complex<f64> {
 87 |     type Vector = Shim4<cf64x1, Complex<f64>>;
 88 | }
 89 | 
 90 | impl Scalar<Sse, width::W8> for Complex<f64> {
 91 |     type Vector = Shim8<cf64x1, Self>;
 92 | }
 93 | 
 94 | impl Scalar<Avx, width::W1> for Complex<f32> {
 95 |     type Vector = ShimToken<generic::cf32x1, Self, Avx>;
 96 | }
 97 | 
 98 | impl Scalar<Avx, width::W2> for Complex<f32> {
 99 |     type Vector = ShimToken<cf32x2, Self, Avx>;
100 | }
101 | 
102 | impl Scalar<Avx, width::W4> for Complex<f32> {
103 |     type Vector = cf32x4;
104 | }
105 | 
106 | impl Scalar<Avx, width::W8> for Complex<f32> {
107 |     type Vector = Shim2<cf32x4, Complex<f32>>;
108 | }
109 | 
110 | impl Scalar<Avx, width::W1> for Complex<f64> {
111 |     type Vector = ShimToken<cf64x1, Self, Avx>;
112 | }
113 | 
114 | impl Scalar<Avx, width::W2> for Complex<f64> {
115 |     type Vector = cf64x2;
116 | }
117 | 
118 | impl Scalar<Avx, width::W4> for Complex<f64> {
119 |     type Vector = Shim2<cf64x2, Complex<f64>>;
120 | }
121 | 
122 | impl Scalar<Avx, width::W8> for Complex<f64> {
123 |     type Vector = Shim4<cf64x2, Complex<f64>>;
124 | }
125 | 
126 | arithmetic_ops! {
127 |     feature: Sse::new_unchecked(),
128 |     for cf32x2:
129 |         add -> (_mm_add_ps),
130 |         sub -> (_mm_sub_ps),
131 |         mul -> (mul_cf32x2),
132 |         div -> (div_cf32x2)
133 | }
134 | 
135 | arithmetic_ops! {
136 |     feature: Sse::new_unchecked(),
137 |     for cf64x1:
138 |         add -> (_mm_add_pd),
139 |         sub -> (_mm_sub_pd),
140 |         mul -> (mul_cf64x1),
141 |         div -> (div_cf64x1)
142 | }
143 | 
144 | arithmetic_ops! {
145 |     feature: Avx::new_unchecked(),
146 |     for cf32x4:
147 |         add -> (_mm256_add_ps),
148 |         sub -> (_mm256_sub_ps),
149 |         mul -> (mul_cf32x4),
150 |         div -> (div_cf32x4)
151 | }
152 | 
153 | arithmetic_ops! {
154 |     feature: Avx::new_unchecked(),
155 |     for cf64x2:
156 |         add -> (_mm256_add_pd),
157 |         sub -> (_mm256_sub_pd),
158 |         mul -> (mul_cf64x2),
159 |         div -> (div_cf64x2)
160 | }
161 | 
162 | #[target_feature(enable = "sse3")]
163 | #[inline]
164 | unsafe fn mul_cf32x2(a: __m128, b: __m128) -> __m128 {
165 |     let re = _mm_moveldup_ps(a);
166 |     let im = _mm_movehdup_ps(a);
167 |     let sh = _mm_shuffle_ps(b, b, 0xb1);
168 |     _mm_addsub_ps(_mm_mul_ps(re, b), _mm_mul_ps(im, sh))
169 | }
170 | 
171 | #[target_feature(enable = "sse3")]
172 | #[inline]
173 | unsafe fn mul_cf64x1(a: __m128d, b: __m128d) -> __m128d {
174 |     let re = _mm_shuffle_pd(a, a, 0x00);
175 |     let im = _mm_shuffle_pd(a, a, 0x03);
176 |     let sh = _mm_shuffle_pd(b, b, 0x01);
177 |     _mm_addsub_pd(_mm_mul_pd(re, b), _mm_mul_pd(im, sh))
178 | }
179 | 
180 | // [(a.re * b.re + a.im * b.im) / (b.re * b.re + b.im * b.im)] + i [(a.im * b.re - a.re * b.im) / (b.re * b.re + b.im * b.im)]
181 | #[target_feature(enable = "sse3")]
182 | #[inline]
183 | unsafe fn div_cf32x2(a: __m128, b: __m128) -> __m128 {
184 |     let b_re = _mm_moveldup_ps(b);
185 |     let b_im = _mm_movehdup_ps(b);
186 |     let a_flip = _mm_shuffle_ps(a, a, 0xb1);
187 |     let norm_sqr = _mm_add_ps(_mm_mul_ps(b_re, b_re), _mm_mul_ps(b_im, b_im));
188 |     _mm_div_ps(
189 |         _mm_addsub_ps(
190 |             _mm_mul_ps(a, b_re),
191 |             _mm_xor_ps(_mm_mul_ps(a_flip, b_im), _mm_set1_ps(-0.)),
192 |         ),
193 |         norm_sqr,
194 |     )
195 | }
196 | 
197 | #[target_feature(enable = "sse3")]
198 | #[inline]
199 | unsafe fn div_cf64x1(a: __m128d, b: __m128d) -> __m128d {
200 |     let b_re = _mm_shuffle_pd(b, b, 0x00);
201 |     let b_im = _mm_shuffle_pd(b, b, 0x03);
202 |     let a_flip = _mm_shuffle_pd(a, a, 0x01);
203 |     let norm_sqr = _mm_add_pd(_mm_mul_pd(b_re, b_re), _mm_mul_pd(b_im, b_im));
204 |     _mm_div_pd(
205 |         _mm_addsub_pd(
206 |             _mm_mul_pd(a, b_re),
207 |             _mm_xor_pd(_mm_mul_pd(a_flip, b_im), _mm_set1_pd(-0.)),
208 |         ),
209 |         norm_sqr,
210 |     )
211 | }
212 | 
213 | #[target_feature(enable = "avx")]
214 | #[inline]
215 | unsafe fn mul_cf32x4(a: __m256, b: __m256) -> __m256 {
216 |     let re = _mm256_moveldup_ps(a);
217 |     let im = _mm256_movehdup_ps(a);
218 |     let sh = _mm256_shuffle_ps(b, b, 0xb1);
219 |     _mm256_addsub_ps(_mm256_mul_ps(re, b), _mm256_mul_ps(im, sh))
220 | }
221 | 
222 | #[target_feature(enable = "avx")]
223 | #[inline]
224 | unsafe fn mul_cf64x2(a: __m256d, b: __m256d) -> __m256d {
225 |     let re = _mm256_unpacklo_pd(a, a);
226 |     let im = _mm256_unpackhi_pd(a, a);
227 |     let sh = _mm256_shuffle_pd(b, b, 0x5);
228 |     _mm256_addsub_pd(_mm256_mul_pd(re, b), _mm256_mul_pd(im, sh))
229 | }
230 | 
231 | // [(a.re * b.re + a.im * b.im) / (b.re * b.re + b.im * b.im)] + i [(a.im * b.re - a.re * b.im) / (b.re * b.re + b.im * b.im)]
232 | #[target_feature(enable = "avx")]
233 | #[inline]
234 | unsafe fn div_cf32x4(a: __m256, b: __m256) -> __m256 {
235 |     let b_re = _mm256_moveldup_ps(b);
236 |     let b_im = _mm256_movehdup_ps(b);
237 |     let a_flip = _mm256_shuffle_ps(a, a, 0xb1);
238 |     let norm_sqr = _mm256_add_ps(_mm256_mul_ps(b_re, b_re), _mm256_mul_ps(b_im, b_im));
239 |     _mm256_div_ps(
240 |         _mm256_addsub_ps(
241 |             _mm256_mul_ps(a, b_re),
242 |             _mm256_xor_ps(_mm256_mul_ps(a_flip, b_im), _mm256_set1_ps(-0.)),
243 |         ),
244 |         norm_sqr,
245 |     )
246 | }
247 | 
248 | #[target_feature(enable = "avx")]
249 | #[inline]
250 | unsafe fn div_cf64x2(a: __m256d, b: __m256d) -> __m256d {
251 |     let b_re = _mm256_unpacklo_pd(b, b);
252 |     let b_im = _mm256_unpackhi_pd(b, b);
253 |     let a_flip = _mm256_shuffle_pd(a, a, 0x5);
254 |     let norm_sqr = _mm256_add_pd(_mm256_mul_pd(b_re, b_re), _mm256_mul_pd(b_im, b_im));
255 |     _mm256_div_pd(
256 |         _mm256_addsub_pd(
257 |             _mm256_mul_pd(a, b_re),
258 |             _mm256_xor_pd(_mm256_mul_pd(a_flip, b_im), _mm256_set1_pd(-0.)),
259 |         ),
260 |         norm_sqr,
261 |     )
262 | }
263 | 
264 | impl core::ops::Neg for cf32x2 {
265 |     type Output = Self;
266 | 
267 |     #[inline]
268 |     fn neg(self) -> Self {
269 |         Self(unsafe { _mm_xor_ps(self.0, _mm_set1_ps(-0.)) })
270 |     }
271 | }
272 | 
273 | impl core::ops::Neg for cf64x1 {
274 |     type Output = Self;
275 | 
276 |     #[inline]
277 |     fn neg(self) -> Self {
278 |         Self(unsafe { _mm_xor_pd(self.0, _mm_set1_pd(-0.)) })
279 |     }
280 | }
281 | 
282 | impl core::ops::Neg for cf32x4 {
283 |     type Output = Self;
284 | 
285 |     #[inline]
286 |     fn neg(self) -> Self {
287 |         Self(unsafe { _mm256_xor_ps(self.0, _mm256_set1_ps(-0.)) })
288 |     }
289 | }
290 | 
291 | impl core::ops::Neg for cf64x2 {
292 |     type Output = Self;
293 | 
294 |     #[inline]
295 |     fn neg(self) -> Self {
296 |         Self(unsafe { _mm256_xor_pd(self.0, _mm256_set1_pd(-0.)) })
297 |     }
298 | }
299 | 
300 | as_slice! { cf32x2 }
301 | as_slice! { cf32x4 }
302 | as_slice! { cf64x1 }
303 | as_slice! { cf64x2 }
304 | 
305 | unsafe impl Vector for cf32x2 {
306 |     type Scalar = Complex<f32>;
307 | 
308 |     type Token = Sse;
309 | 
310 |     type Width = crate::vector::width::W2;
311 | 
312 |     type Underlying = __m128;
313 | 
314 |     #[inline]
315 |     fn zeroed(_: Self::Token) -> Self {
316 |         Self(unsafe { _mm_setzero_ps() })
317 |     }
318 | 
319 |     #[inline]
320 |     fn splat(_: Self::Token, from: Self::Scalar) -> Self {
321 |         Self(unsafe { _mm_set_ps(from.im, from.re, from.im, from.re) })
322 |     }
323 | }
324 | 
325 | unsafe impl Vector for cf64x1 {
326 |     type Scalar = Complex<f64>;
327 | 
328 |     type Token = Sse;
329 | 
330 |     type Width = crate::vector::width::W1;
331 | 
332 |     type Underlying = __m128d;
333 | 
334 |     #[inline]
335 |     fn zeroed(_: Self::Token) -> Self {
336 |         Self(unsafe { _mm_setzero_pd() })
337 |     }
338 | 
339 |     #[inline]
340 |     fn splat(_: Self::Token, from: Self::Scalar) -> Self {
341 |         Self(unsafe { _mm_set_pd(from.im, from.re) })
342 |     }
343 | }
344 | 
345 | unsafe impl Vector for cf32x4 {
346 |     type Scalar = Complex<f32>;
347 | 
348 |     type Token = Avx;
349 | 
350 |     type Width = crate::vector::width::W4;
351 | 
352 |     type Underlying = __m256;
353 | 
354 |     #[inline]
355 |     fn zeroed(_: Self::Token) -> Self {
356 |         Self(unsafe { _mm256_setzero_ps() })
357 |     }
358 | 
359 |     #[inline]
360 |     fn splat(_: Self::Token, from: Self::Scalar) -> Self {
361 |         unsafe {
362 |             Self(_mm256_setr_ps(
363 |                 from.re, from.im, from.re, from.im, from.re, from.im, from.re, from.im,
364 |             ))
365 |         }
366 |     }
367 | }
368 | 
369 | unsafe impl Vector for cf64x2 {
370 |     type Scalar = Complex<f64>;
371 | 
372 |     type Token = Avx;
373 | 
374 |     type Width = crate::vector::width::W2;
375 | 
376 |     type Underlying = __m256d;
377 | 
378 |     #[inline]
379 |     fn zeroed(_: Self::Token) -> Self {
380 |         Self(unsafe { _mm256_setzero_pd() })
381 |     }
382 | 
383 |     #[inline]
384 |     fn splat(_: Self::Token, from: Self::Scalar) -> Self {
385 |         Self(unsafe { _mm256_setr_pd(from.re, from.im, from.re, from.im) })
386 |     }
387 | }
388 | 
389 | impl crate::vector::Complex for cf32x2 {
390 |     type RealScalar = f32;
391 | 
392 |     #[inline]
393 |     fn conj(self) -> Self {
394 |         Self(unsafe { _mm_xor_ps(self.0, _mm_set_ps(-0., 0., -0., 0.)) })
395 |     }
396 | 
397 |     #[inline]
398 |     fn mul_i(self) -> Self {
399 |         Self(unsafe { _mm_addsub_ps(_mm_setzero_ps(), _mm_shuffle_ps(self.0, self.0, 0xb1)) })
400 |     }
401 | 
402 |     #[inline]
403 |     fn mul_neg_i(self) -> Self {
404 |         unsafe {
405 |             let neg = _mm_addsub_ps(_mm_setzero_ps(), self.0);
406 |             Self(_mm_shuffle_ps(neg, neg, 0xb1))
407 |         }
408 |     }
409 | }
410 | 
411 | impl crate::vector::Complex for cf64x1 {
412 |     type RealScalar = f64;
413 | 
414 |     #[inline]
415 |     fn conj(self) -> Self {
416 |         Self(unsafe { _mm_xor_pd(self.0, _mm_set_pd(-0., 0.)) })
417 |     }
418 | 
419 |     #[inline]
420 |     fn mul_i(self) -> Self {
421 |         Self(unsafe { _mm_addsub_pd(_mm_setzero_pd(), _mm_shuffle_pd(self.0, self.0, 0x1)) })
422 |     }
423 | 
424 |     #[inline]
425 |     fn mul_neg_i(self) -> Self {
426 |         unsafe {
427 |             let neg = _mm_addsub_pd(_mm_setzero_pd(), self.0);
428 |             Self(_mm_shuffle_pd(neg, neg, 0x1))
429 |         }
430 |     }
431 | }
432 | 
433 | impl crate::vector::Complex for cf32x4 {
434 |     type RealScalar = f32;
435 | 
436 |     #[inline]
437 |     fn conj(self) -> Self {
438 |         Self(unsafe { _mm256_xor_ps(self.0, _mm256_set_ps(-0., 0., -0., 0., -0., 0., -0., 0.)) })
439 |     }
440 | 
441 |     #[inline]
442 |     fn mul_i(self) -> Self {
443 |         Self(unsafe {
444 |             _mm256_addsub_ps(_mm256_setzero_ps(), _mm256_shuffle_ps(self.0, self.0, 0xb1))
445 |         })
446 |     }
447 | 
448 |     #[inline]
449 |     fn mul_neg_i(self) -> Self {
450 |         unsafe {
451 |             let neg = _mm256_addsub_ps(_mm256_setzero_ps(), self.0);
452 |             Self(_mm256_shuffle_ps(neg, neg, 0xb1))
453 |         }
454 |     }
455 | }
456 | 
457 | impl crate::vector::Complex for cf64x2 {
458 |     type RealScalar = f64;
459 | 
460 |     #[inline]
461 |     fn conj(self) -> Self {
462 |         Self(unsafe { _mm256_xor_pd(self.0, _mm256_set_pd(-0., 0., -0., 0.)) })
463 |     }
464 | 
465 |     #[inline]
466 |     fn mul_i(self) -> Self {
467 |         Self(unsafe {
468 |             _mm256_addsub_pd(_mm256_setzero_pd(), _mm256_shuffle_pd(self.0, self.0, 0x5))
469 |         })
470 |     }
471 | 
472 |     #[inline]
473 |     fn mul_neg_i(self) -> Self {
474 |         unsafe {
475 |             let neg = _mm256_addsub_pd(_mm256_setzero_pd(), self.0);
476 |             Self(_mm256_shuffle_pd(neg, neg, 0x5))
477 |         }
478 |     }
479 | }
480 | 


--------------------------------------------------------------------------------
/generic-simd/src/arch/x86/mod.rs:
--------------------------------------------------------------------------------
  1 | //! x86/x86-64 vector types.
  2 | 
  3 | #[cfg(feature = "complex")]
  4 | mod complex;
  5 | #[cfg(feature = "complex")]
  6 | pub use complex::*;
  7 | 
  8 | use crate::{
  9 |     arch::{generic, Token},
 10 |     scalar::Scalar,
 11 |     shim::{Shim2, Shim4, ShimToken},
 12 |     vector::{width, Native, Vector},
 13 | };
 14 | 
 15 | #[cfg(target_arch = "x86")]
 16 | use core::arch::x86::*;
 17 | #[cfg(target_arch = "x86_64")]
 18 | use core::arch::x86_64::*;
 19 | 
 20 | /// SSE4.1 instruction set token.
 21 | #[derive(Copy, Clone, Debug)]
 22 | pub struct Sse(());
 23 | 
 24 | /// AVX instruction set token.
 25 | #[derive(Copy, Clone, Debug)]
 26 | pub struct Avx(());
 27 | 
 28 | impl_token! { Sse => "sse4.1" }
 29 | impl_token! { Avx => "avx" }
 30 | 
 31 | impl core::convert::From<Avx> for Sse {
 32 |     #[inline]
 33 |     fn from(_: Avx) -> Sse {
 34 |         unsafe { Sse::new_unchecked() }
 35 |     }
 36 | }
 37 | 
 38 | impl Native<Sse> for f32 {
 39 |     type Width = width::W4;
 40 | }
 41 | 
 42 | impl Native<Sse> for f64 {
 43 |     type Width = width::W2;
 44 | }
 45 | 
 46 | impl Native<Avx> for f32 {
 47 |     type Width = width::W8;
 48 | }
 49 | 
 50 | impl Native<Avx> for f64 {
 51 |     type Width = width::W4;
 52 | }
 53 | 
 54 | /// An SSE vector of `f32`s.
 55 | #[derive(Clone, Copy, Debug)]
 56 | #[repr(transparent)]
 57 | #[allow(non_camel_case_types)]
 58 | pub struct f32x4(__m128);
 59 | 
 60 | /// An SSE vector of `f64`s.
 61 | #[derive(Clone, Copy, Debug)]
 62 | #[repr(transparent)]
 63 | #[allow(non_camel_case_types)]
 64 | pub struct f64x2(__m128d);
 65 | 
 66 | /// An AVX vector of `f32`s.
 67 | #[derive(Clone, Copy, Debug)]
 68 | #[repr(transparent)]
 69 | #[allow(non_camel_case_types)]
 70 | pub struct f32x8(__m256);
 71 | 
 72 | /// An AVX vector of `f64`s.
 73 | #[derive(Clone, Copy, Debug)]
 74 | #[repr(transparent)]
 75 | #[allow(non_camel_case_types)]
 76 | pub struct f64x4(__m256d);
 77 | 
 78 | impl Scalar<Sse, width::W1> for f32 {
 79 |     type Vector = ShimToken<generic::f32x1, Self, Sse>;
 80 | }
 81 | 
 82 | impl Scalar<Sse, width::W2> for f32 {
 83 |     type Vector = ShimToken<Shim2<generic::f32x1, Self>, Self, Sse>;
 84 | }
 85 | 
 86 | impl Scalar<Sse, width::W4> for f32 {
 87 |     type Vector = f32x4;
 88 | }
 89 | 
 90 | impl Scalar<Sse, width::W8> for f32 {
 91 |     type Vector = Shim2<f32x4, f32>;
 92 | }
 93 | 
 94 | impl Scalar<Sse, width::W1> for f64 {
 95 |     type Vector = ShimToken<generic::f64x1, Self, Sse>;
 96 | }
 97 | 
 98 | impl Scalar<Sse, width::W2> for f64 {
 99 |     type Vector = f64x2;
100 | }
101 | 
102 | impl Scalar<Sse, width::W4> for f64 {
103 |     type Vector = Shim2<f64x2, f64>;
104 | }
105 | 
106 | impl Scalar<Sse, width::W8> for f64 {
107 |     type Vector = Shim4<f64x2, f64>;
108 | }
109 | 
110 | impl Scalar<Avx, width::W1> for f32 {
111 |     type Vector = ShimToken<generic::f32x1, Self, Avx>;
112 | }
113 | 
114 | impl Scalar<Avx, width::W2> for f32 {
115 |     type Vector = ShimToken<Shim2<generic::f32x1, Self>, Self, Avx>;
116 | }
117 | 
118 | impl Scalar<Avx, width::W4> for f32 {
119 |     type Vector = ShimToken<f32x4, Self, Avx>;
120 | }
121 | 
122 | impl Scalar<Avx, width::W8> for f32 {
123 |     type Vector = f32x8;
124 | }
125 | 
126 | impl Scalar<Avx, width::W1> for f64 {
127 |     type Vector = ShimToken<generic::f64x1, Self, Avx>;
128 | }
129 | 
130 | impl Scalar<Avx, width::W2> for f64 {
131 |     type Vector = ShimToken<f64x2, Self, Avx>;
132 | }
133 | 
134 | impl Scalar<Avx, width::W4> for f64 {
135 |     type Vector = f64x4;
136 | }
137 | 
138 | impl Scalar<Avx, width::W8> for f64 {
139 |     type Vector = Shim2<f64x4, f64>;
140 | }
141 | 
142 | arithmetic_ops! {
143 |     feature: Sse::new_unchecked(),
144 |     for f32x4:
145 |         add -> (_mm_add_ps),
146 |         sub -> (_mm_sub_ps),
147 |         mul -> (_mm_mul_ps),
148 |         div -> (_mm_div_ps)
149 | }
150 | 
151 | arithmetic_ops! {
152 |     feature: Sse::new_unchecked(),
153 |     for f64x2:
154 |         add -> (_mm_add_pd),
155 |         sub -> (_mm_sub_pd),
156 |         mul -> (_mm_mul_pd),
157 |         div -> (_mm_div_pd)
158 | }
159 | 
160 | arithmetic_ops! {
161 |     feature: Avx::new_unchecked(),
162 |     for f32x8:
163 |         add -> (_mm256_add_ps),
164 |         sub -> (_mm256_sub_ps),
165 |         mul -> (_mm256_mul_ps),
166 |         div -> (_mm256_div_ps)
167 | }
168 | 
169 | arithmetic_ops! {
170 |     feature: Avx::new_unchecked(),
171 |     for f64x4:
172 |         add -> (_mm256_add_pd),
173 |         sub -> (_mm256_sub_pd),
174 |         mul -> (_mm256_mul_pd),
175 |         div -> (_mm256_div_pd)
176 | }
177 | 
178 | impl core::ops::Neg for f32x4 {
179 |     type Output = Self;
180 | 
181 |     #[inline]
182 |     fn neg(self) -> Self {
183 |         Self(unsafe { _mm_xor_ps(self.0, _mm_set1_ps(-0.)) })
184 |     }
185 | }
186 | 
187 | impl core::ops::Neg for f64x2 {
188 |     type Output = Self;
189 | 
190 |     #[inline]
191 |     fn neg(self) -> Self {
192 |         Self(unsafe { _mm_xor_pd(self.0, _mm_set1_pd(-0.)) })
193 |     }
194 | }
195 | 
196 | impl core::ops::Neg for f32x8 {
197 |     type Output = Self;
198 | 
199 |     #[inline]
200 |     fn neg(self) -> Self {
201 |         Self(unsafe { _mm256_xor_ps(self.0, _mm256_set1_ps(-0.)) })
202 |     }
203 | }
204 | 
205 | impl core::ops::Neg for f64x4 {
206 |     type Output = Self;
207 | 
208 |     #[inline]
209 |     fn neg(self) -> Self {
210 |         Self(unsafe { _mm256_xor_pd(self.0, _mm256_set1_pd(-0.)) })
211 |     }
212 | }
213 | 
214 | as_slice! { f32x4 }
215 | as_slice! { f32x8 }
216 | as_slice! { f64x2 }
217 | as_slice! { f64x4 }
218 | 
219 | unsafe impl Vector for f32x4 {
220 |     type Scalar = f32;
221 | 
222 |     type Token = Sse;
223 | 
224 |     type Width = crate::vector::width::W4;
225 | 
226 |     type Underlying = __m128;
227 | 
228 |     #[inline]
229 |     fn zeroed(_: Self::Token) -> Self {
230 |         Self(unsafe { _mm_setzero_ps() })
231 |     }
232 | 
233 |     #[inline]
234 |     fn splat(_: Self::Token, from: Self::Scalar) -> Self {
235 |         Self(unsafe { _mm_set1_ps(from) })
236 |     }
237 | }
238 | 
239 | unsafe impl Vector for f64x2 {
240 |     type Scalar = f64;
241 | 
242 |     type Token = Sse;
243 | 
244 |     type Width = crate::vector::width::W2;
245 | 
246 |     type Underlying = __m128d;
247 | 
248 |     #[inline]
249 |     fn zeroed(_: Self::Token) -> Self {
250 |         Self(unsafe { _mm_setzero_pd() })
251 |     }
252 | 
253 |     #[inline]
254 |     fn splat(_: Self::Token, from: Self::Scalar) -> Self {
255 |         Self(unsafe { _mm_set1_pd(from) })
256 |     }
257 | }
258 | 
259 | unsafe impl Vector for f32x8 {
260 |     type Scalar = f32;
261 | 
262 |     type Token = Avx;
263 | 
264 |     type Width = crate::vector::width::W8;
265 | 
266 |     type Underlying = __m256;
267 | 
268 |     #[inline]
269 |     fn zeroed(_: Self::Token) -> Self {
270 |         Self(unsafe { _mm256_setzero_ps() })
271 |     }
272 | 
273 |     #[inline]
274 |     fn splat(_: Self::Token, from: Self::Scalar) -> Self {
275 |         Self(unsafe { _mm256_set1_ps(from) })
276 |     }
277 | }
278 | 
279 | unsafe impl Vector for f64x4 {
280 |     type Scalar = f64;
281 | 
282 |     type Token = Avx;
283 | 
284 |     type Width = crate::vector::width::W4;
285 | 
286 |     type Underlying = __m256d;
287 | 
288 |     #[inline]
289 |     fn zeroed(_: Self::Token) -> Self {
290 |         Self(unsafe { _mm256_setzero_pd() })
291 |     }
292 | 
293 |     #[inline]
294 |     fn splat(_: Self::Token, from: Self::Scalar) -> Self {
295 |         Self(unsafe { _mm256_set1_pd(from) })
296 |     }
297 | }
298 | 


--------------------------------------------------------------------------------
/generic-simd/src/implementation.rs:
--------------------------------------------------------------------------------
  1 | macro_rules! arithmetic_ops {
  2 |     {
  3 |         @new $type:ty, $feature:expr, $trait:ident, $func:ident, ()
  4 |     } => {
  5 |         impl core::ops::$trait<$type> for $type {
  6 |             type Output = Self;
  7 |             #[allow(unused_unsafe)]
  8 |             #[inline]
  9 |             fn $func(mut self, rhs: Self) -> Self {
 10 |                 for (a, b) in self.iter_mut().zip(rhs.iter()) {
 11 |                     *a = core::ops::$trait::$func(*a, b);
 12 |                 }
 13 |                 self
 14 |             }
 15 |         }
 16 | 
 17 |         impl core::ops::$trait<<$type as $crate::vector::Vector>::Scalar> for $type {
 18 |             type Output = Self;
 19 |             #[inline]
 20 |             fn $func(mut self, rhs: <$type as $crate::vector::Vector>::Scalar) -> Self {
 21 |                 for a in self.iter_mut() {
 22 |                     *a = core::ops::$trait::$func(*a, rhs);
 23 |                 }
 24 |                 self
 25 |             }
 26 |         }
 27 |     };
 28 |     {
 29 |         @assign $type:ty, $feature:expr, $trait:ident, $func:ident, ()
 30 |     } => {
 31 |         impl core::ops::$trait<$type> for $type {
 32 |             #[allow(unused_unsafe)]
 33 |             #[inline]
 34 |             fn $func(&mut self, rhs: Self) {
 35 |                 for (a, b) in self.iter_mut().zip(rhs.iter()) {
 36 |                     core::ops::$trait::$func(a, b);
 37 |                 }
 38 |             }
 39 |         }
 40 | 
 41 |         impl core::ops::$trait<<$type as $crate::vector::Vector>::Scalar> for $type {
 42 |             #[inline]
 43 |             fn $func(&mut self, rhs: <$type as $crate::vector::Vector>::Scalar) {
 44 |                 for a in self.iter_mut() {
 45 |                     core::ops::$trait::$func(a, rhs);
 46 |                 }
 47 |             }
 48 |         }
 49 |     };
 50 |     {
 51 |         @new $type:ty, $feature:expr, $trait:ident, $func:ident, ($op:path)
 52 |     } => {
 53 |         impl core::ops::$trait<$type> for $type {
 54 |             type Output = Self;
 55 |             #[allow(unused_unsafe)]
 56 |             #[inline]
 57 |             fn $func(self, rhs: Self) -> Self {
 58 |                 Self(unsafe { $op(self.0, rhs.0) })
 59 |             }
 60 |         }
 61 | 
 62 |         impl core::ops::$trait<<$type as $crate::vector::Vector>::Scalar> for $type {
 63 |             type Output = Self;
 64 |             #[inline]
 65 |             fn $func(self, rhs: <$type as $crate::vector::Vector>::Scalar) -> Self {
 66 |                 self.$func(<$type>::splat(unsafe { $feature }, rhs))
 67 |             }
 68 |         }
 69 |     };
 70 |     {
 71 |         @assign $type:ty, $feature:expr, $trait:ident, $func:ident, ($op:path)
 72 |     } => {
 73 |         impl core::ops::$trait<$type> for $type {
 74 |             #[allow(unused_unsafe)]
 75 |             #[inline]
 76 |             fn $func(&mut self, rhs: Self) {
 77 |                 self.0 = unsafe { $op(self.0, rhs.0) };
 78 |             }
 79 |         }
 80 | 
 81 |         impl core::ops::$trait<<$type as $crate::vector::Vector>::Scalar> for $type {
 82 |             #[inline]
 83 |             fn $func(&mut self, rhs: <$type as $crate::vector::Vector>::Scalar) {
 84 |                 self.$func(<$type>::splat(unsafe { $feature }, rhs))
 85 |             }
 86 |         }
 87 |     };
 88 |     {
 89 |         feature: $feature:expr,
 90 |         for $type:ty:
 91 |             add -> $add_expr:tt,
 92 |             sub -> $sub_expr:tt,
 93 |             mul -> $mul_expr:tt,
 94 |             div -> $div_expr:tt
 95 |     } => {
 96 |         impl core::iter::Sum<$type> for Option<$type> {
 97 |             #[inline]
 98 |             fn sum<I>(mut iter: I) -> Self
 99 |             where
100 |                 I: Iterator<Item = $type>,
101 |             {
102 |                 if let Some(mut sum) = iter.next() {
103 |                     while let Some(v) = iter.next() {
104 |                         sum += v;
105 |                     }
106 |                     Some(sum)
107 |                 } else {
108 |                     None
109 |                 }
110 |             }
111 |         }
112 | 
113 |         impl core::iter::Sum<$type> for <$type as $crate::vector::Vector>::Scalar {
114 |             #[inline]
115 |             fn sum<I>(iter: I) -> Self
116 |             where
117 |                 I: Iterator<Item = $type>,
118 |             {
119 |                 if let Some(sums) = iter.sum::<Option<$type>>() {
120 |                     sums.iter().sum()
121 |                 } else {
122 |                     Default::default()
123 |                 }
124 |             }
125 |         }
126 | 
127 |         impl core::iter::Product<$type> for Option<$type> {
128 |             #[inline]
129 |             fn product<I>(mut iter: I) -> Self
130 |             where
131 |                 I: Iterator<Item = $type>,
132 |             {
133 |                 if let Some(mut sum) = iter.next() {
134 |                     while let Some(v) = iter.next() {
135 |                         sum *= v;
136 |                     }
137 |                     Some(sum)
138 |                 } else {
139 |                     None
140 |                 }
141 |             }
142 |         }
143 | 
144 |         impl core::iter::Product<$type> for <$type as $crate::vector::Vector>::Scalar {
145 |             #[inline]
146 |             fn product<I>(iter: I) -> Self
147 |             where
148 |                 I: Iterator<Item = $type>,
149 |             {
150 |                 if let Some(sums) = iter.sum::<Option<$type>>() {
151 |                     sums.iter().product()
152 |                 } else {
153 |                     Default::default()
154 |                 }
155 |             }
156 |         }
157 | 
158 |         arithmetic_ops!{@new $type, $feature, Add, add, $add_expr}
159 |         arithmetic_ops!{@new $type, $feature, Sub, sub, $sub_expr}
160 |         arithmetic_ops!{@new $type, $feature, Mul, mul, $mul_expr}
161 |         arithmetic_ops!{@new $type, $feature, Div, div, $div_expr}
162 |         arithmetic_ops!{@assign $type, $feature, AddAssign, add_assign, $add_expr}
163 |         arithmetic_ops!{@assign $type, $feature, SubAssign, sub_assign, $sub_expr}
164 |         arithmetic_ops!{@assign $type, $feature, MulAssign, mul_assign, $mul_expr}
165 |         arithmetic_ops!{@assign $type, $feature, DivAssign, div_assign, $div_expr}
166 |     };
167 | }
168 | 
169 | macro_rules! as_slice {
170 |     {
171 |         $type:ty
172 |     } => {
173 |         impl AsRef<[<$type as crate::vector::Vector>::Scalar]> for $type {
174 |             #[inline]
175 |             fn as_ref(&self) -> &[<$type as crate::vector::Vector>::Scalar] {
176 |                 use crate::vector::Vector;
177 |                 self.as_slice()
178 |             }
179 |         }
180 | 
181 |         impl AsMut<[<$type as crate::vector::Vector>::Scalar]> for $type {
182 |             #[inline]
183 |             fn as_mut(&mut self) -> &mut [<$type as crate::vector::Vector>::Scalar] {
184 |                 use crate::vector::Vector;
185 |                 self.as_slice_mut()
186 |             }
187 |         }
188 | 
189 |         impl core::ops::Deref for $type {
190 |             type Target = [<Self as crate::vector::Vector>::Scalar];
191 |             #[inline]
192 |             fn deref(&self) -> &Self::Target {
193 |                 self.as_slice()
194 |             }
195 |         }
196 | 
197 |         impl core::ops::DerefMut for $type {
198 |             #[inline]
199 |             fn deref_mut(&mut self) -> &mut <Self as core::ops::Deref>::Target {
200 |                 self.as_slice_mut()
201 |             }
202 |         }
203 |     }
204 | }
205 | 


--------------------------------------------------------------------------------
/generic-simd/src/lib.rs:
--------------------------------------------------------------------------------
  1 | #![cfg_attr(not(feature = "std"), no_std)]
  2 | #![cfg_attr(
  3 |     all(feature = "nightly", target_arch = "wasm32"),
  4 |     feature(wasm_simd, wasm_target_feature)
  5 | )]
  6 | #![cfg_attr(
  7 |     all(feature = "nightly", target_arch = "aarch64"),
  8 |     feature(stdsimd, aarch64_target_feature)
  9 | )]
 10 | //! `generic-simd` provides safe and idiomatic zero-cost abstractions for writing explicit
 11 | //! cross-platform SIMD operations.
 12 | //!
 13 | //! # Supported architectures
 14 | //! All architectures are supported via scalar fallbacks, but the following instruction sets are
 15 | //! also supported:
 16 | //! * SSE4.1 (x86/x86-64)
 17 | //! * AVX (x86/x86-64)
 18 | //! * NEON (aarch64, with `nightly` cargo feature)
 19 | //! * SIMD128 (wasm32, with `nightly` cargo feature and `simd128` target feature)
 20 | //!
 21 | //! The various architecture-specific types are available in the [`arch`](arch/index.html) module.
 22 | //!
 23 | //! # Abstractions
 24 | //! Vector abstractions are provided via the traits in the [`vector`](vector/index.html) module.
 25 | //! Generics that use these traits are able to utilize any of the supported instruction sets.
 26 | //!
 27 | //! The following example performs a vector-accelerated sum of an input slice:
 28 | //! ```
 29 | //! use generic_simd::{
 30 | //!     arch::Token,
 31 | //!     dispatch,
 32 | //!     scalar::ScalarExt,
 33 | //!     slice::SliceExt,
 34 | //!     vector::NativeVector,
 35 | //! };
 36 | //!
 37 | //! // This function provides a generic implementation for any instruction set.
 38 | //! // Here we use the "native" vector type, i.e. the widest vector directly supported by the
 39 | //! // architecture.
 40 | //! #[inline]
 41 | //! fn sum_impl<T>(token: T, input: &[f32]) -> f32
 42 | //! where
 43 | //!     T: Token,
 44 | //!     f32: ScalarExt<T> + core::iter::Sum<NativeVector<f32, T>>,
 45 | //! {
 46 | //!     // Use aligned loads in this example, which may be better on some architectures.
 47 | //!     let (start, vectors, end) = input.align_native(token);
 48 | //!
 49 | //!     // Sum across the vector lanes, plus the unaligned portions
 50 | //!     vectors.iter().copied().sum::<f32>() + start.iter().chain(end).sum::<f32>()
 51 | //! }
 52 | //!
 53 | //! // This function selects the best instruction set at runtime.
 54 | //! // The "dispatch" macro compiles this function for each supported architecture.
 55 | //! #[dispatch(token)]
 56 | //! fn sum(input: &[f32]) -> f32 {
 57 | //!     sum_impl(token, input)
 58 | //! }
 59 | //!
 60 | //! assert_eq!(sum(&[1f32; 10]), 10.);
 61 | //! ```
 62 | //!
 63 | //! # Vector shims
 64 | //! Various instruction sets provide vectors with different widths, so shims are provided to
 65 | //! create vectors of particular widths regardless of architecture.  These are available in the
 66 | //! [`shim`](shim/index.html) module.
 67 | //!
 68 | //! For example, the following function performs an [Array of Structures of Arrays](https://en.wikipedia.org/wiki/AoS_and_SoA)
 69 | //! operation using arrays of 4 `f64`s regardless of instruction set:
 70 | //! ```
 71 | //! use generic_simd::{
 72 | //!     arch::Token,
 73 | //!     dispatch,
 74 | //!     scalar::Scalar,
 75 | //!     slice::Slice,
 76 | //!     vector::{Signed, Vector, width},
 77 | //! };
 78 | //!
 79 | //! // Equivalent to an array of 4 2-dimensional coordinates,
 80 | //! // but with a vectorizable memory layout.
 81 | //! struct Coordinates {
 82 | //!     x: [f64; 4],
 83 | //!     y: [f64; 4],
 84 | //! }
 85 | //!
 86 | //! // A generic mean implementation for any instruction set.
 87 | //! fn mean_impl<T>(token: T, input: &[Coordinates]) -> (f64, f64)
 88 | //! where
 89 | //!     T: Token,
 90 | //!     f64: Scalar<T, width::W4>,
 91 | //!     <f64 as Scalar<T, width::W4>>::Vector: Signed,
 92 | //! {
 93 | //!     let mut xsum = f64::zeroed(token);
 94 | //!     let mut ysum = f64::zeroed(token);
 95 | //!
 96 | //!     for Coordinates { x, y } in input {
 97 | //!         // read the arrays into vectors
 98 | //!         xsum += x.read(token);
 99 | //!         ysum += y.read(token);
100 | //!     }
101 | //!
102 | //!     // sum across the vector lanes
103 | //!     (
104 | //!         xsum.iter().sum::<f64>() / (input.len() * 4) as f64,
105 | //!         ysum.iter().sum::<f64>() / (input.len() * 4) as f64,
106 | //!     )
107 | //! }
108 | //!
109 | //! // Selects the best instruction set at runtime.
110 | //! #[dispatch(token)]
111 | //! fn mean(input: &[Coordinates]) -> (f64, f64) {
112 | //!     mean_impl(token, input)
113 | //! }
114 | //! ```
115 | 
116 | // Re-export for use from macros.
117 | #[doc(hidden)]
118 | pub use multiversion;
119 | 
120 | /// Multiversions a function over all supported instruction sets.
121 | ///
122 | /// Tagging a function with `#[dispatch(token)]` creates a version of the function for each
123 | /// supported instruction set and provides its token as `token`.
124 | /// The best supported function variant is selected at runtime.
125 | ///
126 | /// # Implementation
127 | /// This attribute is a wrapper for [`multiversion`] and supports all of its
128 | /// conditional compilation and static dispatch features.
129 | ///
130 | /// # Example
131 | /// ```
132 | /// use generic_simd::slice::SliceExt;
133 | ///
134 | /// #[generic_simd::dispatch(token)]
135 | /// pub fn add_one(x: &mut [f32]) {
136 | ///     let (start, vecs, end) = x.align_native_mut(token);
137 | ///     for s in start.iter_mut().chain(end.iter_mut()) {
138 | ///         *s += 1.;
139 | ///     }
140 | ///
141 | ///     for v in vecs {
142 | ///         *v += 1.;
143 | ///     }
144 | /// }
145 | ///
146 | /// #[generic_simd::dispatch(_token)]
147 | /// pub fn add_two(x: &mut [f32]) {
148 | ///     // Static dispatching provided by `multiversion`.
149 | ///     // This does not perform runtime feature selection and allows inlining.
150 | ///     dispatch!(add_one(x));
151 | ///     dispatch!(add_one(x));
152 | /// }
153 | /// ```
154 | ///
155 | /// [Abstractions]: index.html#abstractions
156 | /// [Vector shims]: index.html#vector-shims
157 | /// [`multiversion`]: ../multiversion/attr.multiversion.html
158 | pub use generic_simd_macros::dispatch;
159 | 
160 | #[macro_use]
161 | mod implementation;
162 | 
163 | pub mod alignment;
164 | pub mod arch;
165 | pub mod pointer;
166 | pub mod scalar;
167 | pub mod shim;
168 | pub mod slice;
169 | pub mod vector;
170 | 


--------------------------------------------------------------------------------
/generic-simd/src/pointer.rs:
--------------------------------------------------------------------------------
  1 | //! Extensions for pointers to vectors.
  2 | 
  3 | use crate::{
  4 |     scalar::Scalar,
  5 |     vector::{width, Native, NativeWidth, Vector},
  6 | };
  7 | 
  8 | /// A pointer to a vector.
  9 | pub trait Pointer<Token, Width>: Copy
 10 | where
 11 |     Token: crate::arch::Token,
 12 |     Width: width::Width,
 13 | {
 14 |     type Vector: Vector<Token = Token, Width = Width>;
 15 | 
 16 |     /// Read a vector from a pointer.
 17 |     ///
 18 |     /// # Safety
 19 |     /// See [`read_ptr`](../vector/trait.Vector.html#method.read_ptr).
 20 |     unsafe fn vector_read(self, token: Token) -> Self::Vector;
 21 | 
 22 |     /// Read a vector from a vector-aligned pointer.
 23 |     ///
 24 |     /// # Safety
 25 |     /// See [`read_aligned_ptr`](../vector/trait.Vector.html#method.read_aligned_ptr).
 26 |     unsafe fn vector_read_aligned(self, token: Token) -> Self::Vector;
 27 | }
 28 | 
 29 | impl<T, Token, Width> Pointer<Token, Width> for *const T
 30 | where
 31 |     T: Scalar<Token, Width>,
 32 |     Token: crate::arch::Token,
 33 |     Width: width::Width,
 34 | {
 35 |     type Vector = T::Vector;
 36 | 
 37 |     #[inline]
 38 |     unsafe fn vector_read(self, token: Token) -> Self::Vector {
 39 |         Self::Vector::read_ptr(token, self)
 40 |     }
 41 | 
 42 |     #[inline]
 43 |     unsafe fn vector_read_aligned(self, token: Token) -> Self::Vector {
 44 |         Self::Vector::read_aligned_ptr(token, self)
 45 |     }
 46 | }
 47 | 
 48 | impl<T, Token, Width> Pointer<Token, Width> for *mut T
 49 | where
 50 |     T: Scalar<Token, Width>,
 51 |     Token: crate::arch::Token,
 52 |     Width: width::Width,
 53 | {
 54 |     type Vector = T::Vector;
 55 | 
 56 |     #[inline]
 57 |     unsafe fn vector_read(self, token: Token) -> Self::Vector {
 58 |         Self::Vector::read_ptr(token, self)
 59 |     }
 60 | 
 61 |     #[inline]
 62 |     unsafe fn vector_read_aligned(self, token: Token) -> Self::Vector {
 63 |         Self::Vector::read_aligned_ptr(token, self)
 64 |     }
 65 | }
 66 | 
 67 | macro_rules! pointer_impl {
 68 |     {
 69 |         $width:literal,
 70 |         $width_type:ty,
 71 |         $read_unaligned:ident,
 72 |         $read_aligned:ident
 73 |     } => {
 74 |         #[doc = "Read a vector with "]
 75 |         #[doc = $width]
 76 |         #[doc = " from a pointer.\n\n# Safety\nSee [`read_ptr`](../vector/trait.Vector.html#method.read_ptr)."]
 77 |         #[inline]
 78 |         unsafe fn $read_unaligned(self, token: Token) -> <Self as Pointer<Token, $width_type>>::Vector {
 79 |             <Self as Pointer<Token, $width_type>>::vector_read(self, token)
 80 |         }
 81 | 
 82 |         #[doc = "Read a vector with "]
 83 |         #[doc = $width]
 84 |         #[doc = " from a vector-aligned pointer.\n\n# Safety\nSee [`read_aligned_ptr`](../vector/trait.Vector.html#method.read_aligned_ptr)."]
 85 |         #[inline]
 86 |         unsafe fn $read_aligned(self, token: Token) -> <Self as Pointer<Token, $width_type>>::Vector {
 87 |             <Self as Pointer<Token, $width_type>>::vector_read_aligned(self, token)
 88 |         }
 89 |     }
 90 | }
 91 | 
 92 | /// A pointer to a vector, supporting all vector widths.
 93 | pub trait PointerExt<Token>:
 94 |     Native<Token>
 95 |     + Pointer<Token, width::W1>
 96 |     + Pointer<Token, width::W2>
 97 |     + Pointer<Token, width::W4>
 98 |     + Pointer<Token, width::W8>
 99 |     + Pointer<Token, NativeWidth<Self, Token>>
100 | where
101 |     Token: crate::arch::Token,
102 | {
103 |     pointer_impl! { "the native number of lanes", <Self as Native<Token>>::Width, vector_read_native, vector_read_aligned_native }
104 |     pointer_impl! { "1 lane",  width::W1, vector_read1, vector_read1_aligned }
105 |     pointer_impl! { "2 lanes", width::W2, vector_read2, vector_read2_aligned }
106 |     pointer_impl! { "4 lanes", width::W4, vector_read4, vector_read4_aligned }
107 |     pointer_impl! { "8 lanes", width::W8, vector_read8, vector_read8_aligned }
108 | }
109 | 
110 | impl<T, Token> PointerExt<Token> for T
111 | where
112 |     T: Native<Token>
113 |         + Pointer<Token, width::W1>
114 |         + Pointer<Token, width::W2>
115 |         + Pointer<Token, width::W4>
116 |         + Pointer<Token, width::W8>
117 |         + Pointer<Token, NativeWidth<Self, Token>>,
118 |     Token: crate::arch::Token,
119 | {
120 | }
121 | 


--------------------------------------------------------------------------------
/generic-simd/src/scalar.rs:
--------------------------------------------------------------------------------
 1 | //! Extensions for scalars.
 2 | 
 3 | use crate::vector::{width, Native, NativeWidth, Vector};
 4 | 
 5 | /// A scalar value.
 6 | pub trait Scalar<Token, Width>: Copy
 7 | where
 8 |     Token: crate::arch::Token,
 9 |     Width: width::Width,
10 | {
11 |     type Vector: Vector<Scalar = Self, Token = Token, Width = Width>;
12 | 
13 |     /// Create a vector set to zero.
14 |     ///
15 |     /// See [`zeroed`](../vector/trait.Vector.html#method.zeroed).
16 |     #[inline]
17 |     fn zeroed(token: Token) -> Self::Vector {
18 |         Self::Vector::zeroed(token)
19 |     }
20 | 
21 |     /// Splat a scalar to a vector.
22 |     ///
23 |     /// See [`splat`](../vector/trait.Vector.html#tymethod.splat).
24 |     #[inline]
25 |     fn splat(self, token: Token) -> Self::Vector {
26 |         Self::Vector::splat(token, self)
27 |     }
28 | }
29 | 
30 | macro_rules! scalar_impl {
31 |     {
32 |         $width:literal,
33 |         $width_type:ty,
34 |         $zeroed:ident,
35 |         $splat:ident
36 |     } => {
37 |         #[doc = "Create a vector with "]
38 |         #[doc = $width]
39 |         #[doc = " set to zero.\n\nSee [`zeroed`](../vector/trait.Vector.html#method.zeroed)."]
40 |         #[inline]
41 |         fn $zeroed(token: Token) -> <Self as Scalar<Token, $width_type>>::Vector {
42 |            <Self as Scalar<Token, $width_type>>::zeroed(token.into())
43 |         }
44 | 
45 |         #[doc = "Splat a scalar to "]
46 |         #[doc = $width]
47 |         #[doc = ".\n\nSee [`splat`](../vector/trait.Vector.html#tymethod.splat)."]
48 |         #[inline]
49 |         fn $splat(self, token: Token) -> <Self as Scalar<Token, $width_type>>::Vector {
50 |             <Self as Scalar<Token, $width_type>>::splat(self, token.into())
51 |         }
52 |     }
53 | }
54 | 
55 | /// A scalar value, supporting all vector widths.
56 | pub trait ScalarExt<Token>:
57 |     Native<Token>
58 |     + self::Scalar<Token, width::W1>
59 |     + self::Scalar<Token, width::W2>
60 |     + self::Scalar<Token, width::W4>
61 |     + self::Scalar<Token, width::W8>
62 |     + self::Scalar<Token, NativeWidth<Self, Token>>
63 | where
64 |     Token: crate::arch::Token + From<Token> + Into<Token>,
65 | {
66 |     scalar_impl! { "the native number of lanes", <Self as Native<Token>>::Width, zeroed_native, splat_native }
67 |     scalar_impl! { "1 lane",  width::W1, zeroed1, splat1 }
68 |     scalar_impl! { "2 lanes", width::W2, zeroed2, splat2 }
69 |     scalar_impl! { "4 lanes", width::W4, zeroed4, splat4 }
70 |     scalar_impl! { "8 lanes", width::W8, zeroed8, splat8 }
71 | }
72 | 
73 | impl<Token, Scalar> ScalarExt<Token> for Scalar
74 | where
75 |     Token: crate::arch::Token,
76 |     Scalar: Native<Token>
77 |         + self::Scalar<Token, width::W1>
78 |         + self::Scalar<Token, width::W2>
79 |         + self::Scalar<Token, width::W4>
80 |         + self::Scalar<Token, width::W8>
81 |         + self::Scalar<Token, NativeWidth<Self, Token>>,
82 | {
83 | }
84 | 


--------------------------------------------------------------------------------
/generic-simd/src/shim/mod.rs:
--------------------------------------------------------------------------------
1 | //! Shims for non-native vectors.
2 | 
3 | mod token;
4 | mod width;
5 | 
6 | pub use token::*;
7 | pub use width::*;
8 | 


--------------------------------------------------------------------------------
/generic-simd/src/shim/token.rs:
--------------------------------------------------------------------------------
  1 | use crate::arch;
  2 | use crate::vector::Vector;
  3 | use core::marker::PhantomData;
  4 | 
  5 | #[cfg(feature = "complex")]
  6 | use crate::vector::Complex;
  7 | 
  8 | /// Shim that converts the associated token.
  9 | #[derive(Copy, Clone, Debug)]
 10 | #[repr(transparent)]
 11 | pub struct ShimToken<Underlying, Scalar, Token>(Underlying, PhantomData<(Scalar, Token)>);
 12 | 
 13 | unsafe impl<Underlying, Scalar, Token> Vector for ShimToken<Underlying, Scalar, Token>
 14 | where
 15 |     Underlying: Vector<Scalar = Scalar>,
 16 |     Scalar: Copy,
 17 |     Token: arch::Token + Into<<Underlying as Vector>::Token>,
 18 | {
 19 |     type Scalar = Scalar;
 20 |     type Token = Token;
 21 |     type Width = <Underlying as Vector>::Width;
 22 |     type Underlying = <Underlying as Vector>::Underlying;
 23 | 
 24 |     #[inline]
 25 |     fn zeroed(token: Self::Token) -> Self {
 26 |         Self(Underlying::zeroed(token.into()), PhantomData)
 27 |     }
 28 | 
 29 |     #[inline]
 30 |     fn splat(token: Self::Token, from: Self::Scalar) -> Self {
 31 |         Self(Underlying::splat(token.into(), from), PhantomData)
 32 |     }
 33 | }
 34 | 
 35 | impl<Underlying, Scalar, Token> AsRef<[Scalar]> for ShimToken<Underlying, Scalar, Token>
 36 | where
 37 |     Underlying: AsRef<[Scalar]>,
 38 | {
 39 |     #[inline]
 40 |     fn as_ref(&self) -> &[Scalar] {
 41 |         self.0.as_ref()
 42 |     }
 43 | }
 44 | 
 45 | impl<Underlying, Scalar, Token> AsMut<[Scalar]> for ShimToken<Underlying, Scalar, Token>
 46 | where
 47 |     Underlying: AsMut<[Scalar]>,
 48 | {
 49 |     #[inline]
 50 |     fn as_mut(&mut self) -> &mut [Scalar] {
 51 |         self.0.as_mut()
 52 |     }
 53 | }
 54 | 
 55 | impl<Underlying, Scalar, Token> core::ops::Deref for ShimToken<Underlying, Scalar, Token>
 56 | where
 57 |     Underlying: core::ops::Deref,
 58 | {
 59 |     type Target = Underlying::Target;
 60 | 
 61 |     #[inline]
 62 |     fn deref(&self) -> &Self::Target {
 63 |         &self.0
 64 |     }
 65 | }
 66 | 
 67 | impl<Underlying, Scalar, Token> core::ops::DerefMut for ShimToken<Underlying, Scalar, Token>
 68 | where
 69 |     Underlying: core::ops::DerefMut,
 70 | {
 71 |     #[inline]
 72 |     fn deref_mut(&mut self) -> &mut <Self as core::ops::Deref>::Target {
 73 |         &mut self.0
 74 |     }
 75 | }
 76 | 
 77 | macro_rules! implement {
 78 |     {
 79 |         @op $trait:ident :: $func:ident
 80 |     } => {
 81 |         impl<Underlying, Scalar, Token> core::ops::$trait<Self> for ShimToken<Underlying, Scalar, Token>
 82 |         where
 83 |             Underlying: Copy + core::ops::$trait<Underlying, Output=Underlying>,
 84 |         {
 85 |             type Output = Self;
 86 | 
 87 |             #[inline]
 88 |             fn $func(self, rhs: Self) -> Self {
 89 |                 Self((self.0).$func(rhs.0), PhantomData)
 90 |             }
 91 |         }
 92 | 
 93 |         impl<Underlying, Scalar, Token> core::ops::$trait<Scalar> for ShimToken<Underlying, Scalar, Token>
 94 |         where
 95 |             Underlying: Copy + core::ops::$trait<Scalar, Output=Underlying>,
 96 |             Scalar: Copy,
 97 |         {
 98 |             type Output = Self;
 99 | 
100 |             #[inline]
101 |             fn $func(self, rhs: Scalar) -> Self {
102 |                 Self((self.0).$func(rhs), PhantomData)
103 |             }
104 |         }
105 |     };
106 | 
107 |     {
108 |         @op_assign $trait:ident :: $func:ident
109 |     } => {
110 |         impl<Underlying, Scalar, Token> core::ops::$trait<Self> for ShimToken<Underlying, Scalar, Token>
111 |         where
112 |             Underlying: Copy + core::ops::$trait<Underlying>,
113 |             Scalar: Copy,
114 |         {
115 |             #[inline]
116 |             fn $func(&mut self, rhs: Self) {
117 |                 (self.0).$func(rhs.0);
118 |             }
119 |         }
120 | 
121 |         impl<Underlying, Scalar, Token> core::ops::$trait<Scalar> for ShimToken<Underlying, Scalar, Token>
122 |         where
123 |             Underlying: Copy + core::ops::$trait<Scalar>,
124 |             Scalar: Copy,
125 |         {
126 |             #[inline]
127 |             fn $func(&mut self, rhs: Scalar) {
128 |                 (self.0).$func(rhs);
129 |             }
130 |         }
131 |     };
132 | }
133 | 
134 | implement! { @op Add::add }
135 | implement! { @op Sub::sub }
136 | implement! { @op Mul::mul }
137 | implement! { @op Div::div }
138 | implement! { @op_assign AddAssign::add_assign }
139 | implement! { @op_assign SubAssign::sub_assign }
140 | implement! { @op_assign MulAssign::mul_assign }
141 | implement! { @op_assign DivAssign::div_assign }
142 | 
143 | impl<Underlying, Scalar, Token> core::ops::Neg for ShimToken<Underlying, Scalar, Token>
144 | where
145 |     Underlying: Copy + core::ops::Neg<Output = Underlying>,
146 | {
147 |     type Output = Self;
148 | 
149 |     #[inline]
150 |     fn neg(self) -> Self {
151 |         Self(-self.0, PhantomData)
152 |     }
153 | }
154 | 
155 | impl<Underlying, Scalar, Token> core::iter::Sum<ShimToken<Underlying, Scalar, Token>>
156 |     for Option<ShimToken<Underlying, Scalar, Token>>
157 | where
158 |     ShimToken<Underlying, Scalar, Token>: core::ops::AddAssign,
159 |     Underlying: Copy,
160 | {
161 |     #[inline]
162 |     fn sum<I>(mut iter: I) -> Self
163 |     where
164 |         I: Iterator<Item = ShimToken<Underlying, Scalar, Token>>,
165 |     {
166 |         if let Some(mut sum) = iter.next() {
167 |             for v in iter {
168 |                 sum += v;
169 |             }
170 |             Some(sum)
171 |         } else {
172 |             None
173 |         }
174 |     }
175 | }
176 | 
177 | impl<Underlying, Scalar, Token> core::iter::Sum<ShimToken<Underlying, Scalar, Token>>
178 |     for <ShimToken<Underlying, Scalar, Token> as Vector>::Scalar
179 | where
180 |     Option<ShimToken<Underlying, Scalar, Token>>:
181 |         core::iter::Sum<ShimToken<Underlying, Scalar, Token>>,
182 |     Underlying: Vector<Scalar = Scalar>,
183 |     Scalar: Copy + core::ops::Add<Self, Output = Self> + Default,
184 |     Token: arch::Token,
185 |     Underlying::Token: From<Token>,
186 | {
187 |     #[inline]
188 |     fn sum<I>(iter: I) -> Self
189 |     where
190 |         I: Iterator<Item = ShimToken<Underlying, Scalar, Token>>,
191 |     {
192 |         let mut value = Self::default();
193 |         if let Some(sums) = iter.sum::<Option<ShimToken<Underlying, Scalar, Token>>>() {
194 |             for sum in sums.as_slice() {
195 |                 value = value + *sum;
196 |             }
197 |         }
198 |         value
199 |     }
200 | }
201 | 
202 | impl<Underlying, Scalar, Token> core::iter::Product<ShimToken<Underlying, Scalar, Token>>
203 |     for Option<ShimToken<Underlying, Scalar, Token>>
204 | where
205 |     ShimToken<Underlying, Scalar, Token>: core::ops::MulAssign,
206 |     Underlying: Copy,
207 | {
208 |     #[inline]
209 |     fn product<I>(mut iter: I) -> Self
210 |     where
211 |         I: Iterator<Item = ShimToken<Underlying, Scalar, Token>>,
212 |     {
213 |         if let Some(mut sum) = iter.next() {
214 |             for v in iter {
215 |                 sum *= v;
216 |             }
217 |             Some(sum)
218 |         } else {
219 |             None
220 |         }
221 |     }
222 | }
223 | 
224 | impl<Underlying, Scalar, Token> core::iter::Product<ShimToken<Underlying, Scalar, Token>>
225 |     for <ShimToken<Underlying, Scalar, Token> as Vector>::Scalar
226 | where
227 |     Option<ShimToken<Underlying, Scalar, Token>>:
228 |         core::iter::Product<ShimToken<Underlying, Scalar, Token>>,
229 |     Underlying: Vector<Scalar = Scalar>,
230 |     Scalar: Copy + core::ops::Mul<Self, Output = Self> + Default,
231 |     Token: arch::Token,
232 |     Underlying::Token: From<Token>,
233 | {
234 |     #[inline]
235 |     fn product<I>(iter: I) -> Self
236 |     where
237 |         I: Iterator<Item = ShimToken<Underlying, Scalar, Token>>,
238 |     {
239 |         let mut value = Self::default();
240 |         if let Some(products) = iter.product::<Option<ShimToken<Underlying, Scalar, Token>>>() {
241 |             for product in products.as_slice() {
242 |                 value = value * *product;
243 |             }
244 |         }
245 |         value
246 |     }
247 | }
248 | 
249 | #[cfg(feature = "complex")]
250 | impl<Underlying, Real, Token> Complex for ShimToken<Underlying, num_complex::Complex<Real>, Token>
251 | where
252 |     Underlying: Vector<Scalar = num_complex::Complex<Real>> + Complex<RealScalar = Real>,
253 |     Real: Copy,
254 |     Token: arch::Token,
255 |     Underlying::Token: From<Token>,
256 | {
257 |     type RealScalar = Real;
258 | 
259 |     #[inline]
260 |     fn conj(self) -> Self {
261 |         Self(self.0.conj(), PhantomData)
262 |     }
263 | 
264 |     #[inline]
265 |     fn mul_i(self) -> Self {
266 |         Self(self.0.mul_i(), PhantomData)
267 |     }
268 | 
269 |     #[inline]
270 |     fn mul_neg_i(self) -> Self {
271 |         Self(self.0.mul_neg_i(), PhantomData)
272 |     }
273 | }
274 | 


--------------------------------------------------------------------------------
/generic-simd/src/shim/width.rs:
--------------------------------------------------------------------------------
  1 | use crate::vector::{width, Vector};
  2 | use core::marker::PhantomData;
  3 | 
  4 | #[cfg(feature = "complex")]
  5 | use crate::vector::Complex;
  6 | 
  7 | /// Determines the doubled width of this vector.
  8 | pub trait Double {
  9 |     type Doubled: width::Width;
 10 | }
 11 | 
 12 | impl Double for width::W1 {
 13 |     type Doubled = width::W2;
 14 | }
 15 | 
 16 | impl Double for width::W2 {
 17 |     type Doubled = width::W4;
 18 | }
 19 | 
 20 | impl Double for width::W4 {
 21 |     type Doubled = width::W8;
 22 | }
 23 | 
 24 | /// Shim that doubles the width of a vector.
 25 | #[derive(Copy, Clone, Debug)]
 26 | #[repr(transparent)]
 27 | pub struct Shim2<Underlying, Scalar>([Underlying; 2], PhantomData<Scalar>);
 28 | 
 29 | /// Shim that quadruples the width of a vector.
 30 | pub type Shim4<Underlying, Scalar> = Shim2<Shim2<Underlying, Scalar>, Scalar>;
 31 | 
 32 | /// Shim that octuples the width of a vector.
 33 | pub type Shim8<Underlying, Scalar> = Shim4<Shim2<Underlying, Scalar>, Scalar>;
 34 | 
 35 | unsafe impl<Underlying, Scalar> Vector for Shim2<Underlying, Scalar>
 36 | where
 37 |     Underlying: Vector<Scalar = Scalar>,
 38 |     Underlying::Width: Double,
 39 |     Scalar: Copy,
 40 | {
 41 |     type Scalar = Scalar;
 42 |     type Token = <Underlying as Vector>::Token;
 43 |     type Width = <Underlying::Width as Double>::Doubled;
 44 |     type Underlying = [<Underlying as Vector>::Underlying; 2];
 45 | 
 46 |     #[inline]
 47 |     fn zeroed(token: Self::Token) -> Self {
 48 |         Self([Underlying::zeroed(token); 2], PhantomData)
 49 |     }
 50 | 
 51 |     #[inline]
 52 |     fn splat(token: Self::Token, from: Self::Scalar) -> Self {
 53 |         Self([Underlying::splat(token, from); 2], PhantomData)
 54 |     }
 55 | }
 56 | 
 57 | impl<Underlying, Scalar> AsRef<[Scalar]> for Shim2<Underlying, Scalar>
 58 | where
 59 |     Underlying: Vector<Scalar = Scalar>,
 60 |     Underlying::Width: Double,
 61 |     Scalar: Copy,
 62 | {
 63 |     #[inline]
 64 |     fn as_ref(&self) -> &[Scalar] {
 65 |         self.as_slice()
 66 |     }
 67 | }
 68 | 
 69 | impl<Underlying, Scalar> AsMut<[Scalar]> for Shim2<Underlying, Scalar>
 70 | where
 71 |     Underlying: Vector<Scalar = Scalar>,
 72 |     Underlying::Width: Double,
 73 |     Scalar: Copy,
 74 | {
 75 |     #[inline]
 76 |     fn as_mut(&mut self) -> &mut [Scalar] {
 77 |         self.as_slice_mut()
 78 |     }
 79 | }
 80 | 
 81 | impl<Underlying, Scalar> core::ops::Deref for Shim2<Underlying, Scalar>
 82 | where
 83 |     Underlying: Vector<Scalar = Scalar>,
 84 |     Underlying::Width: Double,
 85 |     Scalar: Copy,
 86 | {
 87 |     type Target = [Scalar];
 88 | 
 89 |     #[inline]
 90 |     fn deref(&self) -> &Self::Target {
 91 |         self.as_slice()
 92 |     }
 93 | }
 94 | 
 95 | impl<Underlying, Scalar> core::ops::DerefMut for Shim2<Underlying, Scalar>
 96 | where
 97 |     Underlying: Vector<Scalar = Scalar>,
 98 |     Underlying::Width: Double,
 99 |     Scalar: Copy,
100 | {
101 |     #[inline]
102 |     fn deref_mut(&mut self) -> &mut <Self as core::ops::Deref>::Target {
103 |         self.as_slice_mut()
104 |     }
105 | }
106 | 
107 | macro_rules! implement {
108 |     {
109 |         @op $trait:ident :: $func:ident
110 |     } => {
111 |         impl<Underlying, Scalar> core::ops::$trait<Self> for Shim2<Underlying, Scalar>
112 |         where
113 |             Underlying: Copy + core::ops::$trait<Underlying, Output=Underlying>,
114 |         {
115 |             type Output = Self;
116 | 
117 |             #[inline]
118 |             fn $func(self, rhs: Self) -> Self {
119 |                 Self([self.0[0].$func(rhs.0[0]), self.0[1].$func(rhs.0[1])], PhantomData)
120 |             }
121 |         }
122 | 
123 |         impl<Underlying, Scalar> core::ops::$trait<Scalar> for Shim2<Underlying, Scalar>
124 |         where
125 |             Underlying: Copy + core::ops::$trait<Scalar, Output=Underlying>,
126 |             Scalar: Copy,
127 |         {
128 |             type Output = Self;
129 | 
130 |             #[inline]
131 |             fn $func(self, rhs: Scalar) -> Self {
132 |                 Self([self.0[0].$func(rhs), self.0[1].$func(rhs)], PhantomData)
133 |             }
134 |         }
135 |     };
136 | 
137 |     {
138 |         @op_assign $trait:ident :: $func:ident
139 |     } => {
140 |         impl<Underlying, Scalar> core::ops::$trait<Self> for Shim2<Underlying, Scalar>
141 |         where
142 |             Underlying: Copy + core::ops::$trait<Underlying>,
143 |             Scalar: Copy,
144 |         {
145 |             #[inline]
146 |             fn $func(&mut self, rhs: Self) {
147 |                 self.0[0].$func(rhs.0[0]);
148 |                 self.0[1].$func(rhs.0[1]);
149 |             }
150 |         }
151 | 
152 |         impl<Underlying, Scalar> core::ops::$trait<Scalar> for Shim2<Underlying, Scalar>
153 |         where
154 |             Underlying: Copy + core::ops::$trait<Scalar>,
155 |             Scalar: Copy,
156 |         {
157 |             #[inline]
158 |             fn $func(&mut self, rhs: Scalar) {
159 |                 self.0[0].$func(rhs);
160 |                 self.0[1].$func(rhs);
161 |             }
162 |         }
163 |     };
164 | }
165 | 
166 | implement! { @op Add::add }
167 | implement! { @op Sub::sub }
168 | implement! { @op Mul::mul }
169 | implement! { @op Div::div }
170 | implement! { @op_assign AddAssign::add_assign }
171 | implement! { @op_assign SubAssign::sub_assign }
172 | implement! { @op_assign MulAssign::mul_assign }
173 | implement! { @op_assign DivAssign::div_assign }
174 | 
175 | impl<Underlying, Scalar> core::ops::Neg for Shim2<Underlying, Scalar>
176 | where
177 |     Underlying: Copy + core::ops::Neg<Output = Underlying>,
178 | {
179 |     type Output = Self;
180 | 
181 |     #[inline]
182 |     fn neg(self) -> Self {
183 |         Self([-self.0[0], -self.0[1]], PhantomData)
184 |     }
185 | }
186 | 
187 | impl<Underlying, Scalar> core::iter::Sum<Shim2<Underlying, Scalar>>
188 |     for Option<Shim2<Underlying, Scalar>>
189 | where
190 |     Shim2<Underlying, Scalar>: core::ops::AddAssign,
191 |     Underlying: Copy,
192 | {
193 |     #[inline]
194 |     fn sum<I>(mut iter: I) -> Self
195 |     where
196 |         I: Iterator<Item = Shim2<Underlying, Scalar>>,
197 |     {
198 |         if let Some(mut sum) = iter.next() {
199 |             for v in iter {
200 |                 sum += v;
201 |             }
202 |             Some(sum)
203 |         } else {
204 |             None
205 |         }
206 |     }
207 | }
208 | 
209 | impl<Underlying, Scalar> core::iter::Sum<Shim2<Underlying, Scalar>>
210 |     for <Shim2<Underlying, Scalar> as Vector>::Scalar
211 | where
212 |     Option<Shim2<Underlying, Scalar>>: core::iter::Sum<Shim2<Underlying, Scalar>>,
213 |     Underlying: Vector<Scalar = Scalar>,
214 |     Underlying::Width: Double,
215 |     Scalar: Copy + core::ops::Add<Self, Output = Self> + Default,
216 | {
217 |     #[inline]
218 |     fn sum<I>(iter: I) -> Self
219 |     where
220 |         I: Iterator<Item = Shim2<Underlying, Scalar>>,
221 |     {
222 |         let mut value = Self::default();
223 |         if let Some(sums) = iter.sum::<Option<Shim2<Underlying, Scalar>>>() {
224 |             for sum in sums.as_slice() {
225 |                 value = value + *sum;
226 |             }
227 |         }
228 |         value
229 |     }
230 | }
231 | 
232 | impl<Underlying, Scalar> core::iter::Product<Shim2<Underlying, Scalar>>
233 |     for Option<Shim2<Underlying, Scalar>>
234 | where
235 |     Shim2<Underlying, Scalar>: core::ops::MulAssign,
236 |     Underlying: Copy,
237 | {
238 |     #[inline]
239 |     fn product<I>(mut iter: I) -> Self
240 |     where
241 |         I: Iterator<Item = Shim2<Underlying, Scalar>>,
242 |     {
243 |         if let Some(mut sum) = iter.next() {
244 |             for v in iter {
245 |                 sum *= v;
246 |             }
247 |             Some(sum)
248 |         } else {
249 |             None
250 |         }
251 |     }
252 | }
253 | 
254 | impl<Underlying, Scalar> core::iter::Product<Shim2<Underlying, Scalar>>
255 |     for <Shim2<Underlying, Scalar> as Vector>::Scalar
256 | where
257 |     Option<Shim2<Underlying, Scalar>>: core::iter::Product<Shim2<Underlying, Scalar>>,
258 |     Underlying: Vector<Scalar = Scalar>,
259 |     Underlying::Width: Double,
260 |     Scalar: Copy + core::ops::Mul<Self, Output = Self> + Default,
261 | {
262 |     #[inline]
263 |     fn product<I>(iter: I) -> Self
264 |     where
265 |         I: Iterator<Item = Shim2<Underlying, Scalar>>,
266 |     {
267 |         let mut value = Self::default();
268 |         if let Some(products) = iter.product::<Option<Shim2<Underlying, Scalar>>>() {
269 |             for product in products.as_slice() {
270 |                 value = value * *product;
271 |             }
272 |         }
273 |         value
274 |     }
275 | }
276 | 
277 | #[cfg(feature = "complex")]
278 | impl<Underlying, Real> Complex for Shim2<Underlying, num_complex::Complex<Real>>
279 | where
280 |     Underlying: Vector<Scalar = num_complex::Complex<Real>> + Complex<RealScalar = Real>,
281 |     Underlying::Width: Double,
282 |     Real: Copy,
283 | {
284 |     type RealScalar = Real;
285 | 
286 |     #[inline]
287 |     fn conj(self) -> Self {
288 |         Self([self.0[0].conj(), self.0[1].conj()], PhantomData)
289 |     }
290 | 
291 |     #[inline]
292 |     fn mul_i(self) -> Self {
293 |         Self([self.0[0].mul_i(), self.0[1].mul_i()], PhantomData)
294 |     }
295 | 
296 |     #[inline]
297 |     fn mul_neg_i(self) -> Self {
298 |         Self([self.0[0].mul_neg_i(), self.0[1].mul_neg_i()], PhantomData)
299 |     }
300 | }
301 | 


--------------------------------------------------------------------------------
/generic-simd/src/slice.rs:
--------------------------------------------------------------------------------
  1 | //! Extensions for slices of vectors.
  2 | 
  3 | use crate::arch::Token;
  4 | use crate::{
  5 |     scalar::Scalar,
  6 |     vector::{width, Native, NativeWidth, Vector},
  7 | };
  8 | use core::marker::PhantomData;
  9 | 
 10 | /// A slice of scalars.
 11 | pub trait Slice<Token, Width>
 12 | where
 13 |     Token: crate::arch::Token,
 14 |     Width: width::Width,
 15 | {
 16 |     type Vector: Vector<Token = Token, Width = Width>;
 17 | 
 18 |     /// Read a vector from a slice without checking the length.
 19 |     ///
 20 |     /// # Safety
 21 |     /// See [`read_unchecked`](../vector/trait.Vector.html#method.read_ptr).
 22 |     unsafe fn read_unchecked(&self, token: Token) -> Self::Vector;
 23 | 
 24 |     /// Read a vector from a slice.
 25 |     ///
 26 |     /// See [`read`](../vector/trait.Vector.html#method.read).
 27 |     fn read(&self, token: Token) -> Self::Vector;
 28 | 
 29 |     /// Extract a slice of aligned vectors, as if by [`align_to`].
 30 |     ///
 31 |     /// [`align_to`]: https://doc.rust-lang.org/std/primitive.slice.html#method.align_to
 32 |     #[allow(clippy::type_complexity)]
 33 |     fn align(
 34 |         &self,
 35 |         #[allow(unused_variables)] token: Token,
 36 |     ) -> (
 37 |         &[<Self::Vector as Vector>::Scalar],
 38 |         &[Self::Vector],
 39 |         &[<Self::Vector as Vector>::Scalar],
 40 |     );
 41 | 
 42 |     /// Extract a slice of aligned mutable vectors, as if by [`align_to_mut`].
 43 |     ///
 44 |     /// [`align_to_mut`]: https://doc.rust-lang.org/std/primitive.slice.html#method.align_to_mut
 45 |     #[allow(clippy::type_complexity)]
 46 |     fn align_mut(
 47 |         &mut self,
 48 |         #[allow(unused_variables)] token: Token,
 49 |     ) -> (
 50 |         &mut [<Self::Vector as Vector>::Scalar],
 51 |         &mut [Self::Vector],
 52 |         &mut [<Self::Vector as Vector>::Scalar],
 53 |     );
 54 | 
 55 |     /// Create a slice of overlapping vectors from a slice of scalars.
 56 |     fn overlapping(&self, token: Token) -> Overlapping<'_, Self::Vector>;
 57 | 
 58 |     /// Create a mutable slice of overlapping vectors from a slice of scalars.
 59 |     fn overlapping_mut(&mut self, token: Token) -> OverlappingMut<'_, Self::Vector>;
 60 | }
 61 | 
 62 | impl<T, Token, Width> Slice<Token, Width> for [T]
 63 | where
 64 |     T: Scalar<Token, Width>,
 65 |     Token: crate::arch::Token,
 66 |     Width: width::Width,
 67 | {
 68 |     type Vector = T::Vector;
 69 | 
 70 |     #[inline]
 71 |     unsafe fn read_unchecked(&self, token: Token) -> Self::Vector {
 72 |         Self::Vector::read_unchecked(token, self)
 73 |     }
 74 | 
 75 |     #[inline]
 76 |     fn read(&self, token: Token) -> Self::Vector {
 77 |         Self::Vector::read(token, self)
 78 |     }
 79 | 
 80 |     #[allow(clippy::type_complexity)]
 81 |     #[inline]
 82 |     fn align(
 83 |         &self,
 84 |         #[allow(unused_variables)] token: Token,
 85 |     ) -> (
 86 |         &[<Self::Vector as Vector>::Scalar],
 87 |         &[Self::Vector],
 88 |         &[<Self::Vector as Vector>::Scalar],
 89 |     ) {
 90 |         unsafe { self.align_to() }
 91 |     }
 92 | 
 93 |     #[allow(clippy::type_complexity)]
 94 |     #[inline]
 95 |     fn align_mut(
 96 |         &mut self,
 97 |         #[allow(unused_variables)] token: Token,
 98 |     ) -> (
 99 |         &mut [<Self::Vector as Vector>::Scalar],
100 |         &mut [Self::Vector],
101 |         &mut [<Self::Vector as Vector>::Scalar],
102 |     ) {
103 |         unsafe { self.align_to_mut() }
104 |     }
105 | 
106 |     #[inline]
107 |     fn overlapping(&self, token: Token) -> Overlapping<'_, Self::Vector> {
108 |         Overlapping::new(token, self)
109 |     }
110 | 
111 |     #[inline]
112 |     fn overlapping_mut(&mut self, token: Token) -> OverlappingMut<'_, Self::Vector> {
113 |         OverlappingMut::new(token, self)
114 |     }
115 | }
116 | 
117 | macro_rules! slice_impl {
118 |     {
119 |         $width:literal,
120 |         $width_type:ty,
121 |         $read_unchecked:ident,
122 |         $read:ident,
123 |         $align:ident,
124 |         $align_mut:ident,
125 |         $overlapping:ident,
126 |         $overlapping_mut:ident
127 |     } => {
128 |         #[doc = "Read a vector with "]
129 |         #[doc = $width]
130 |         #[doc = " from a slice without checking the length.\n\nSee [`read_unchecked`](../vector/trait.Vector.html#method.read_ptr)."]
131 |         #[inline]
132 |         unsafe fn $read_unchecked(&self, token: Token) -> <Self as Slice<Token, $width_type>>::Vector {
133 |             <Self as Slice<Token, $width_type>>::read_unchecked(self, token)
134 |         }
135 | 
136 |         #[doc = "Read a vector with "]
137 |         #[doc = $width]
138 |         #[doc = " from a slice.\n\nSee [`read`](../vector/trait.Vector.html#method.read)."]
139 |         #[inline]
140 |         fn $read(&self, token: Token) -> <Self as Slice<Token, $width_type>>::Vector {
141 |             <Self as Slice<Token, $width_type>>::read(self, token)
142 |         }
143 | 
144 |         #[doc = "Align a slice of scalars to vectors with "]
145 |         #[doc = $width]
146 |         #[doc = ".\n\nSee [`align`](trait.Slice.html#tymethod.align)."]
147 |         #[allow(clippy::type_complexity)]
148 |         #[inline]
149 |         fn $align(&self, token: Token) ->
150 |         (
151 |             &[<<Self as Slice<Token, $width_type>>::Vector as Vector>::Scalar],
152 |             &[<Self as Slice<Token, $width_type>>::Vector],
153 |             &[<<Self as Slice<Token, $width_type>>::Vector as Vector>::Scalar],
154 |         ) {
155 |             <Self as Slice<Token, $width_type>>::align(self, token)
156 |         }
157 | 
158 |         #[doc = "Align a slice of scalars to vectors with "]
159 |         #[doc = $width]
160 |         #[doc = ".\n\nSee [`align_mut`](trait.Slice.html#tymethod.align_mut)."]
161 |         #[allow(clippy::type_complexity)]
162 |         #[inline]
163 |         fn $align_mut(&mut self, token: Token) ->
164 |         (
165 |             &mut [<<Self as Slice<Token, $width_type>>::Vector as Vector>::Scalar],
166 |             &mut [<Self as Slice<Token, $width_type>>::Vector],
167 |             &mut [<<Self as Slice<Token, $width_type>>::Vector as Vector>::Scalar],
168 |         ){
169 |             <Self as Slice<Token, $width_type>>::align_mut(self, token)
170 |         }
171 | 
172 |         #[doc = "Create a slice of overlapping vectors of "]
173 |         #[doc = $width]
174 |         #[doc = "from a slice of scalars.\n\nSee [`overlapping`](trait.Slice.html#tymethod.overlapping)."]
175 |         #[inline]
176 |         fn $overlapping(&self, token: Token) -> Overlapping<'_, <Self as Slice<Token, $width_type>>::Vector> {
177 |             <Self as Slice<Token, $width_type>>::overlapping(self, token)
178 |         }
179 | 
180 |         #[doc = "Create a mutable slice of overlapping vectors of "]
181 |         #[doc = $width]
182 |         #[doc = "from a slice of scalars.\n\nSee [`overlapping_mut`](trait.Slice.html#tymethod.overlapping_mut)."]
183 |         #[inline]
184 |         fn $overlapping_mut(
185 |             &mut self,
186 |             token: Token,
187 |         ) -> OverlappingMut<'_, <Self as Slice<Token, $width_type>>::Vector> {
188 |             <Self as Slice<Token, $width_type>>::overlapping_mut(self, token)
189 |         }
190 |     }
191 | }
192 | 
193 | impl<T, Token> Native<Token> for [T]
194 | where
195 |     T: Native<Token>,
196 | {
197 |     type Width = T::Width;
198 | }
199 | 
200 | /// A slice of scalars, supporting all vector widths.
201 | pub trait SliceExt<Token>:
202 |     Native<Token>
203 |     + Slice<Token, width::W1>
204 |     + Slice<Token, width::W2>
205 |     + Slice<Token, width::W4>
206 |     + Slice<Token, width::W8>
207 |     + Slice<Token, NativeWidth<Self, Token>>
208 | where
209 |     Token: crate::arch::Token,
210 | {
211 |     slice_impl! { "the native number of lanes", <Self as Native<Token>>::Width, read_unchecked_native, read_native, align_native, align_native_mut, overlapping_native, overlapping_native_mut }
212 |     slice_impl! { "1 lane",   width::W1, read_unchecked1, read1, align1, align1_mut, overlapping1, overlapping1_mut }
213 |     slice_impl! { "2 lanes",  width::W2, read_unchecked2, read2, align2, align2_mut, overlapping2, overlapping2_mut }
214 |     slice_impl! { "4 lanes",  width::W4, read_unchecked4, read4, align4, align4_mut, overlapping4, overlapping4_mut }
215 |     slice_impl! { "8 lanes",  width::W8, read_unchecked8, read8, align8, align8_mut, overlapping8, overlapping8_mut }
216 | }
217 | 
218 | impl<T, Token> SliceExt<Token> for T
219 | where
220 |     T: ?Sized
221 |         + Native<Token>
222 |         + Slice<Token, width::W1>
223 |         + Slice<Token, width::W2>
224 |         + Slice<Token, width::W4>
225 |         + Slice<Token, width::W8>
226 |         + Slice<Token, NativeWidth<Self, Token>>,
227 |     Token: crate::arch::Token,
228 | {
229 | }
230 | 
231 | /// Wrapper for producing a mutable reference from an unaligned pointer.
232 | pub struct RefMut<'a, V>
233 | where
234 |     V: Vector,
235 | {
236 |     source: *mut V::Scalar,
237 |     temp: V,
238 |     lifetime: PhantomData<&'a V::Scalar>,
239 | }
240 | 
241 | impl<'a, V> RefMut<'a, V>
242 | where
243 |     V: Vector,
244 | {
245 |     #[inline]
246 |     fn new(token: V::Token, source: *mut V::Scalar) -> Self {
247 |         Self {
248 |             source,
249 |             temp: V::zeroed(token),
250 |             lifetime: PhantomData,
251 |         }
252 |     }
253 | }
254 | 
255 | impl<'a, V> core::ops::Deref for RefMut<'a, V>
256 | where
257 |     V: Vector,
258 | {
259 |     type Target = V;
260 | 
261 |     #[inline]
262 |     fn deref(&self) -> &V {
263 |         &self.temp
264 |     }
265 | }
266 | 
267 | impl<'a, V> core::ops::DerefMut for RefMut<'a, V>
268 | where
269 |     V: Vector,
270 | {
271 |     #[inline]
272 |     fn deref_mut(&mut self) -> &mut V {
273 |         &mut self.temp
274 |     }
275 | }
276 | 
277 | impl<'a, V> core::ops::Drop for RefMut<'a, V>
278 | where
279 |     V: Vector,
280 | {
281 |     #[inline]
282 |     fn drop(&mut self) {
283 |         unsafe {
284 |             self.temp.write_ptr(self.source);
285 |         }
286 |     }
287 | }
288 | 
289 | /// Wrapper for indexing into overlapping vectors.
290 | pub struct Overlapping<'a, V>
291 | where
292 |     V: Vector,
293 | {
294 |     slice: &'a [V::Scalar],
295 |     phantom: PhantomData<V>,
296 | }
297 | 
298 | #[allow(clippy::len_without_is_empty)]
299 | impl<'a, V> Overlapping<'a, V>
300 | where
301 |     V: Vector,
302 | {
303 |     /// Create a new overlapping vector slice.
304 |     #[inline]
305 |     pub fn new(
306 |         #[allow(unused_variables)] token: impl Into<V::Token>,
307 |         slice: &'a [V::Scalar],
308 |     ) -> Self {
309 |         assert!(
310 |             slice.len() >= V::width(),
311 |             "slice must be at least as wide as the vector"
312 |         );
313 |         Self {
314 |             slice,
315 |             phantom: PhantomData,
316 |         }
317 |     }
318 | 
319 |     /// Returns the number of overlapping vectors.
320 |     ///
321 |     /// Equal to `slice.len() - V::width() + 1`.
322 |     #[inline]
323 |     pub fn len(&self) -> usize {
324 |         self.slice.len() - V::width() + 1
325 |     }
326 | 
327 |     /// Returns the vector offset `index` into the slice of scalars.
328 |     #[inline]
329 |     pub fn get(&self, index: usize) -> Option<V> {
330 |         if index < self.len() {
331 |             Some(unsafe { self.get_unchecked(index) })
332 |         } else {
333 |             None
334 |         }
335 |     }
336 | 
337 |     /// Returns the vector offset `index` into the slice of scalars.
338 |     ///
339 |     /// # Safety
340 |     /// Index must be less than `len()`, i.e. the underlying slice must be at least `index
341 |     /// + V::width()` long.
342 |     #[inline]
343 |     pub unsafe fn get_unchecked(&self, index: usize) -> V
344 |     where
345 |         V: Vector,
346 |     {
347 |         V::read_ptr(V::Token::new_unchecked(), self.slice.as_ptr().add(index))
348 |     }
349 | }
350 | 
351 | /// Wrapper for indexing into overlapping mutable vectors.
352 | pub struct OverlappingMut<'a, V>
353 | where
354 |     V: Vector,
355 | {
356 |     slice: &'a mut [V::Scalar],
357 |     phantom: PhantomData<V>,
358 | }
359 | 
360 | #[allow(clippy::len_without_is_empty)]
361 | impl<'a, V> OverlappingMut<'a, V>
362 | where
363 |     V: Vector,
364 | {
365 |     /// Create a new overlapping vector slice.
366 |     #[inline]
367 |     pub fn new(
368 |         #[allow(unused_variables)] token: impl Into<V::Token>,
369 |         slice: &'a mut [V::Scalar],
370 |     ) -> Self {
371 |         assert!(
372 |             slice.len() >= V::width(),
373 |             "slice must be at least as wide as the vector"
374 |         );
375 |         Self {
376 |             slice,
377 |             phantom: PhantomData,
378 |         }
379 |     }
380 | 
381 |     /// Returns the number of overlapping vectors.
382 |     ///
383 |     /// Equal to `slice.len() - V::width() + 1`.
384 |     #[inline]
385 |     pub fn len(&self) -> usize {
386 |         self.slice.len() - V::width() + 1
387 |     }
388 | 
389 |     /// Returns the vector offset `index` into the slice of scalars.
390 |     #[inline]
391 |     pub fn get(&self, index: usize) -> Option<V> {
392 |         if index < self.len() {
393 |             Some(unsafe { self.get_unchecked(index) })
394 |         } else {
395 |             None
396 |         }
397 |     }
398 | 
399 |     /// Returns the vector offset `index` into the slice of scalars.
400 |     ///
401 |     /// # Safety
402 |     /// Index must be less than `len()`, i.e. the underlying slice must be at least `index
403 |     /// + V::width()` long.
404 |     #[inline]
405 |     pub unsafe fn get_unchecked(&self, index: usize) -> V {
406 |         V::read_ptr(V::Token::new_unchecked(), self.slice.as_ptr().add(index))
407 |     }
408 | 
409 |     /// Returns the mutable vector offset `index` into the slice of scalars.
410 |     #[inline]
411 |     pub fn get_mut(&'a mut self, index: usize) -> Option<RefMut<'a, V>> {
412 |         if index < self.len() {
413 |             Some(unsafe { self.get_unchecked_mut(index) })
414 |         } else {
415 |             None
416 |         }
417 |     }
418 | 
419 |     /// Returns the mutable vector offset `index` into the slice of scalars.
420 |     ///
421 |     /// # Safety
422 |     /// Index must be less than `len()`, i.e. the underlying slice must be at least `index
423 |     /// + V::width()` long.
424 |     #[inline]
425 |     pub unsafe fn get_unchecked_mut(&'a mut self, index: usize) -> RefMut<'a, V> {
426 |         RefMut::new(
427 |             V::Token::new_unchecked(),
428 |             self.slice.as_mut_ptr().add(index),
429 |         )
430 |     }
431 | }
432 | 


--------------------------------------------------------------------------------
/generic-simd/src/vector/mod.rs:
--------------------------------------------------------------------------------
  1 | //! Vector type interfaces.
  2 | 
  3 | pub mod width;
  4 | 
  5 | use crate::arch::Token;
  6 | use crate::scalar::Scalar;
  7 | use core::ops::{
  8 |     Add, AddAssign, Deref, DerefMut, Div, DivAssign, Mul, MulAssign, Neg, Sub, SubAssign,
  9 | };
 10 | 
 11 | /// Indicates the widest native vector.
 12 | pub trait Native<Token> {
 13 |     type Width: width::Width;
 14 | }
 15 | 
 16 | /// Convenience type for the widest native vector size.
 17 | pub type NativeWidth<Scalar, Token> = <Scalar as Native<Token>>::Width;
 18 | 
 19 | /// Convenience type for the widest native vector.
 20 | pub type NativeVector<Scalar, Token> = VectorOf<Scalar, NativeWidth<Scalar, Token>, Token>;
 21 | 
 22 | /// Convenience type for the vector with a particular width.
 23 | pub type VectorOf<Scalar, Width, Token> = <Scalar as self::Scalar<Token, Width>>::Vector;
 24 | 
 25 | /// The fundamental vector type.
 26 | ///
 27 | /// # Safety
 28 | /// This trait may only be implemented for types that have the memory layout of an array of
 29 | /// `Scalar` with length `width()`.
 30 | pub unsafe trait Vector: Copy {
 31 |     /// The type of elements in the vector.
 32 |     type Scalar: Copy;
 33 | 
 34 |     /// The token that proves support for this vector on the CPU.
 35 |     type Token: Token;
 36 | 
 37 |     /// The number of elements in the vector.
 38 |     type Width: width::Width;
 39 | 
 40 |     /// The underlying type
 41 |     type Underlying: Copy;
 42 | 
 43 |     /// Returns the number of lanes.
 44 |     #[inline]
 45 |     fn width() -> usize {
 46 |         <Self::Width as width::Width>::VALUE
 47 |     }
 48 | 
 49 |     /// Creates a new instance of `Token` from a vector.
 50 |     #[inline]
 51 |     fn to_token(self) -> Self::Token {
 52 |         unsafe { Self::Token::new_unchecked() }
 53 |     }
 54 | 
 55 |     /// Returns a slice containing the vector.
 56 |     #[inline]
 57 |     fn as_slice(&self) -> &[Self::Scalar] {
 58 |         unsafe { core::slice::from_raw_parts(self as *const _ as *const _, Self::width()) }
 59 |     }
 60 | 
 61 |     /// Returns a mutable slice containing the vector.
 62 |     #[inline]
 63 |     fn as_slice_mut(&mut self) -> &mut [Self::Scalar] {
 64 |         unsafe { core::slice::from_raw_parts_mut(self as *mut _ as *mut _, Self::width()) }
 65 |     }
 66 | 
 67 |     /// Converts this vector to its underlying type.
 68 |     #[inline]
 69 |     fn to_underlying(self) -> Self::Underlying {
 70 |         assert_eq!(
 71 |             (
 72 |                 core::mem::size_of::<Self::Underlying>(),
 73 |                 core::mem::align_of::<Self::Underlying>(),
 74 |             ),
 75 |             (core::mem::align_of::<Self>(), core::mem::size_of::<Self>(),)
 76 |         );
 77 |         unsafe { core::mem::transmute_copy(&self) }
 78 |     }
 79 | 
 80 |     /// Converts the underlying type to a vector.
 81 |     #[inline]
 82 |     fn from_underlying(
 83 |         #[allow(unused_variables)] token: Self::Token,
 84 |         underlying: Self::Underlying,
 85 |     ) -> Self {
 86 |         assert_eq!(
 87 |             (
 88 |                 core::mem::size_of::<Self::Underlying>(),
 89 |                 core::mem::align_of::<Self::Underlying>(),
 90 |             ),
 91 |             (core::mem::align_of::<Self>(), core::mem::size_of::<Self>(),)
 92 |         );
 93 |         unsafe { core::mem::transmute_copy(&underlying) }
 94 |     }
 95 | 
 96 |     /// Read from a pointer.
 97 |     ///
 98 |     /// # Safety
 99 |     /// * `from` must point to an array of length at least `width()`.
100 |     #[inline]
101 |     unsafe fn read_ptr(
102 |         #[allow(unused_variables)] token: Self::Token,
103 |         from: *const Self::Scalar,
104 |     ) -> Self {
105 |         (from as *const Self).read_unaligned()
106 |     }
107 | 
108 |     /// Read from a vector-aligned pointer.
109 |     ///
110 |     /// # Safety
111 |     /// * `from` must point to an array of length at least `width()`.
112 |     /// * `from` must be aligned for the vector type.
113 |     #[inline]
114 |     unsafe fn read_aligned_ptr(
115 |         #[allow(unused_variables)] token: Self::Token,
116 |         from: *const Self::Scalar,
117 |     ) -> Self {
118 |         (from as *const Self).read()
119 |     }
120 | 
121 |     /// Read from a vector-aligned pointer.
122 | 
123 |     /// Read from a slice without checking the length.
124 |     ///
125 |     /// # Safety
126 |     /// * `from` be length at least `width()`.
127 |     #[inline]
128 |     unsafe fn read_unchecked(token: Self::Token, from: &[Self::Scalar]) -> Self {
129 |         Self::read_ptr(token, from.as_ptr())
130 |     }
131 | 
132 |     /// Read from a slice.
133 |     ///
134 |     /// # Panic
135 |     /// Panics if the length of `from` is less than `width()`.
136 |     #[inline]
137 |     fn read(token: Self::Token, from: &[Self::Scalar]) -> Self {
138 |         assert!(
139 |             from.len() >= Self::width(),
140 |             "source not larget enough to load vector"
141 |         );
142 |         unsafe { Self::read_unchecked(token, from) }
143 |     }
144 | 
145 |     /// Write to a pointer.
146 |     ///
147 |     /// # Safety
148 |     /// `from` must point to an array of length at least `width()`
149 |     #[inline]
150 |     unsafe fn write_ptr(self, to: *mut Self::Scalar) {
151 |         (to as *mut Self).write_unaligned(self);
152 |     }
153 | 
154 |     /// Write to a pointer.
155 |     ///
156 |     /// # Safety
157 |     /// `from` must point to an array of length at least `width()`
158 |     /// `from` must be aligned for the vector type.
159 |     #[inline]
160 |     unsafe fn write_aligned_ptr(self, to: *mut Self::Scalar) {
161 |         (to as *mut Self).write(self);
162 |     }
163 | 
164 |     /// Write to a slice without checking the length.
165 |     ///
166 |     /// # Safety
167 |     /// `from` must be length at least `width()`.
168 |     #[inline]
169 |     unsafe fn write_unchecked(self, to: &mut [Self::Scalar]) {
170 |         self.write_ptr(to.as_mut_ptr());
171 |     }
172 | 
173 |     /// Write to a slice.
174 |     ///
175 |     /// # Panics
176 |     /// Panics if the length of `from` is less than `width()`.
177 |     #[inline]
178 |     fn write(self, to: &mut [Self::Scalar]) {
179 |         assert!(
180 |             to.len() >= Self::width(),
181 |             "destination not large enough to store vector"
182 |         );
183 |         unsafe { self.write_unchecked(to) };
184 |     }
185 | 
186 |     /// Create a new vector with each lane containing zeroes.
187 |     fn zeroed(token: Self::Token) -> Self;
188 | 
189 |     /// Create a new vector with each lane containing the provided value.
190 |     fn splat(token: Self::Token, from: Self::Scalar) -> Self;
191 | }
192 | 
193 | /// A supertrait for vectors supporting typical arithmetic operations.
194 | pub trait Ops:
195 |     Vector
196 |     + AsRef<[<Self as Vector>::Scalar]>
197 |     + AsMut<[<Self as Vector>::Scalar]>
198 |     + Deref<Target = [<Self as Vector>::Scalar]>
199 |     + DerefMut
200 |     + Add<Self, Output = Self>
201 |     + Add<<Self as Vector>::Scalar, Output = Self>
202 |     + AddAssign<Self>
203 |     + AddAssign<<Self as Vector>::Scalar>
204 |     + Sub<Self, Output = Self>
205 |     + Sub<<Self as Vector>::Scalar, Output = Self>
206 |     + SubAssign<Self>
207 |     + SubAssign<<Self as Vector>::Scalar>
208 |     + Mul<Self, Output = Self>
209 |     + Mul<<Self as Vector>::Scalar, Output = Self>
210 |     + MulAssign<Self>
211 |     + MulAssign<<Self as Vector>::Scalar>
212 |     + Div<Self, Output = Self>
213 |     + Div<<Self as Vector>::Scalar, Output = Self>
214 |     + DivAssign<Self>
215 |     + DivAssign<<Self as Vector>::Scalar>
216 | {
217 | }
218 | impl<V> Ops for V where
219 |     V: Vector
220 |         + AsRef<[<V as Vector>::Scalar]>
221 |         + AsMut<[<V as Vector>::Scalar]>
222 |         + Deref<Target = [<V as Vector>::Scalar]>
223 |         + DerefMut
224 |         + Add<V, Output = V>
225 |         + Add<<V as Vector>::Scalar, Output = V>
226 |         + AddAssign<V>
227 |         + AddAssign<<V as Vector>::Scalar>
228 |         + Sub<V, Output = V>
229 |         + Sub<<V as Vector>::Scalar, Output = V>
230 |         + SubAssign<V>
231 |         + SubAssign<<V as Vector>::Scalar>
232 |         + Mul<V, Output = V>
233 |         + Mul<<V as Vector>::Scalar, Output = V>
234 |         + MulAssign<V>
235 |         + MulAssign<<V as Vector>::Scalar>
236 |         + Div<V, Output = V>
237 |         + Div<<V as Vector>::Scalar, Output = V>
238 |         + DivAssign<V>
239 |         + DivAssign<<V as Vector>::Scalar>
240 | {
241 | }
242 | 
243 | /// A supertrait for vectors that allow arithmetic operations over signed types.
244 | pub trait Signed: Ops + Neg<Output = Self> {}
245 | impl<V> Signed for V where V: Ops + Neg<Output = V> {}
246 | 
247 | /// Complex valued vectors.
248 | pub trait Complex: Signed {
249 |     /// The real scalar type.
250 |     type RealScalar: Copy;
251 | 
252 |     /// Conjugate.
253 |     fn conj(self) -> Self;
254 | 
255 |     /// Multiply by i.
256 |     fn mul_i(self) -> Self;
257 | 
258 |     /// Multiply by -i.
259 |     fn mul_neg_i(self) -> Self;
260 | }
261 | 


--------------------------------------------------------------------------------
/generic-simd/src/vector/width.rs:
--------------------------------------------------------------------------------
 1 | //! Types indicating widths of vectors.
 2 | 
 3 | /// Indicates the width of a vector.
 4 | pub trait Width {
 5 |     const VALUE: usize;
 6 | }
 7 | 
 8 | /// Indicates a vector contains 1 lane.
 9 | pub struct W1;
10 | 
11 | /// Indicates a vector contains 2 lanes.
12 | pub struct W2;
13 | 
14 | /// Indicates a vector contains 4 lanes.
15 | pub struct W4;
16 | 
17 | /// Indicates a vector contains 8 lanes.
18 | pub struct W8;
19 | 
20 | impl Width for W1 {
21 |     const VALUE: usize = 1;
22 | }
23 | 
24 | impl Width for W2 {
25 |     const VALUE: usize = 2;
26 | }
27 | 
28 | impl Width for W4 {
29 |     const VALUE: usize = 4;
30 | }
31 | 
32 | impl Width for W8 {
33 |     const VALUE: usize = 8;
34 | }
35 | 


--------------------------------------------------------------------------------