├── .github └── workflows │ ├── ci.yml │ └── rustdoc.yml ├── .gitignore ├── CODE_OF_CONDUCT.md ├── Cargo.toml ├── LICENCE.md ├── benches ├── bitwise.rs ├── range.rs ├── simd.rs └── stdlib.rs ├── proptest-regressions └── lib.txt └── src ├── array.rs ├── array_ops.rs ├── default_zero.rs ├── lib.rs ├── simd_ops.rs └── vec.rs /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: Continuous Integration 2 | on: 3 | push: 4 | pull_request: 5 | schedule: 6 | - cron: "0 0 1,15 * *" 7 | 8 | jobs: 9 | test: 10 | runs-on: ${{ matrix.os }} 11 | strategy: 12 | fail-fast: false 13 | matrix: 14 | os: [ubuntu-latest, windows-latest, macOS-latest] 15 | rust: [stable, beta, nightly] 16 | 17 | steps: 18 | - name: Mac CPU info 19 | if: matrix.os == 'macOS-latest' 20 | run: sysctl -a | grep machdep 21 | - name: Lunix CPU info 22 | if: matrix.os == 'ubuntu-latest' 23 | run: cat /proc/cpuinfo 24 | - uses: hecrj/setup-rust-action@v1 25 | with: 26 | rust-version: ${{ matrix.rust }} 27 | - uses: actions/checkout@v1 28 | - name: Run tests 29 | run: cargo test --all-features 30 | 31 | clippy: 32 | runs-on: ubuntu-latest 33 | steps: 34 | - uses: hecrj/setup-rust-action@v1 35 | with: 36 | rust-version: stable 37 | components: clippy 38 | - uses: actions/checkout@v1 39 | - name: Clippy 40 | run: cargo clippy -- -D warnings 41 | -------------------------------------------------------------------------------- /.github/workflows/rustdoc.yml: -------------------------------------------------------------------------------- 1 | name: Rustdoc 2 | on: 3 | push: 4 | branches: 5 | - master 6 | 7 | jobs: 8 | rustdoc: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - uses: hecrj/setup-rust-action@v1 12 | - uses: actions/checkout@master 13 | - name: Build docs 14 | run: | 15 | cargo doc --all-features 16 | cat > target/doc/index.html < 18 | 19 | 20 | 21 | EOF 22 | - name: Deploy to Github Pages 23 | uses: crazy-max/ghaction-github-pages@master 24 | if: success() 25 | with: 26 | target_branch: gh-pages 27 | build_dir: target/doc 28 | env: 29 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 30 | GITHUB_PAT: ${{ secrets.ACCESS_TOKEN }} 31 | # uses: JamesIves/github-pages-deploy-action@master 32 | # if: success() 33 | # env: 34 | # ACCESS_TOKEN: ${{ secrets.ACCESS_TOKEN }} 35 | # BASE_BRANCH: master 36 | # BRANCH: gh-pages 37 | # FOLDER: target/doc 38 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | **/*.rs.bk 3 | Cargo.lock 4 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as 6 | contributors and maintainers pledge to making participation in our project and 7 | our community a harassment-free experience for everyone, regardless of age, body 8 | size, disability, ethnicity, gender identity and expression, level of experience, 9 | education, socio-economic status, nationality, personal appearance, race, 10 | religion, or sexual identity and orientation. 11 | 12 | ## Our Standards 13 | 14 | Examples of behavior that contributes to creating a positive environment 15 | include: 16 | 17 | * Using welcoming and inclusive language 18 | * Being respectful of differing viewpoints and experiences 19 | * Gracefully accepting constructive criticism 20 | * Focusing on what is best for the community 21 | * Showing empathy towards other community members 22 | 23 | Examples of unacceptable behavior by participants include: 24 | 25 | * The use of sexualized language or imagery and unwelcome sexual attention or 26 | advances 27 | * Trolling, insulting/derogatory comments, and personal or political attacks 28 | * Public or private harassment 29 | * Publishing others' private information, such as a physical or electronic 30 | address, without explicit permission 31 | * Other conduct which could reasonably be considered inappropriate in a 32 | professional setting 33 | 34 | ## Our Responsibilities 35 | 36 | Project maintainers are responsible for clarifying the standards of acceptable 37 | behavior and are expected to take appropriate and fair corrective action in 38 | response to any instances of unacceptable behavior. 39 | 40 | Project maintainers have the right and responsibility to remove, edit, or 41 | reject comments, commits, code, wiki edits, issues, and other contributions 42 | that are not aligned to this Code of Conduct, or to ban temporarily or 43 | permanently any contributor for other behaviors that they deem inappropriate, 44 | threatening, offensive, or harmful. 45 | 46 | ## Scope 47 | 48 | This Code of Conduct applies both within project spaces and in public spaces 49 | when an individual is representing the project or its community. Examples of 50 | representing a project or community include using an official project e-mail 51 | address, posting via an official social media account, or acting as an appointed 52 | representative at an online or offline event. Representation of a project may be 53 | further defined and clarified by project maintainers. 54 | 55 | ## Enforcement 56 | 57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 58 | reported by contacting the project team at admin@immutable.rs. All 59 | complaints will be reviewed and investigated and will result in a response that 60 | is deemed necessary and appropriate to the circumstances. The project team is 61 | obligated to maintain confidentiality with regard to the reporter of an incident. 62 | Further details of specific enforcement policies may be posted separately. 63 | 64 | Project maintainers who do not follow or enforce the Code of Conduct in good 65 | faith may face temporary or permanent repercussions as determined by other 66 | members of the project's leadership. 67 | 68 | ## Attribution 69 | 70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, 71 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html 72 | 73 | [homepage]: https://www.contributor-covenant.org 74 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "simdify" 3 | version = "0.1.0" 4 | authors = ["Bodil Stokke "] 5 | edition = "2018" 6 | license = "MPL-2.0+" 7 | description = "SIMD optimised algorithms and data types" 8 | repository = "https://github.com/bodil/simdify" 9 | documentation = "https://docs.rs/simdify" 10 | categories = ["data-structures"] 11 | 12 | [dev-dependencies] 13 | proptest = "0.9.4" 14 | rand = { version = "0.7.2", features = ["small_rng"] } 15 | 16 | [dependencies] 17 | typenum = "1.11.2" 18 | bitmaps = "2.0.0" 19 | generic-array = "0.13.2" 20 | -------------------------------------------------------------------------------- /LICENCE.md: -------------------------------------------------------------------------------- 1 | Mozilla Public License Version 2.0 2 | ================================== 3 | 4 | ### 1. Definitions 5 | 6 | **1.1. “Contributor”** 7 | means each individual or legal entity that creates, contributes to 8 | the creation of, or owns Covered Software. 9 | 10 | **1.2. “Contributor Version”** 11 | means the combination of the Contributions of others (if any) used 12 | by a Contributor and that particular Contributor's Contribution. 13 | 14 | **1.3. “Contribution”** 15 | means Covered Software of a particular Contributor. 16 | 17 | **1.4. “Covered Software”** 18 | means Source Code Form to which the initial Contributor has attached 19 | the notice in Exhibit A, the Executable Form of such Source Code 20 | Form, and Modifications of such Source Code Form, in each case 21 | including portions thereof. 22 | 23 | **1.5. “Incompatible With Secondary Licenses”** 24 | means 25 | 26 | * **(a)** that the initial Contributor has attached the notice described 27 | in Exhibit B to the Covered Software; or 28 | * **(b)** that the Covered Software was made available under the terms of 29 | version 1.1 or earlier of the License, but not also under the 30 | terms of a Secondary License. 31 | 32 | **1.6. “Executable Form”** 33 | means any form of the work other than Source Code Form. 34 | 35 | **1.7. “Larger Work”** 36 | means a work that combines Covered Software with other material, in 37 | a separate file or files, that is not Covered Software. 38 | 39 | **1.8. “License”** 40 | means this document. 41 | 42 | **1.9. “Licensable”** 43 | means having the right to grant, to the maximum extent possible, 44 | whether at the time of the initial grant or subsequently, any and 45 | all of the rights conveyed by this License. 46 | 47 | **1.10. “Modifications”** 48 | means any of the following: 49 | 50 | * **(a)** any file in Source Code Form that results from an addition to, 51 | deletion from, or modification of the contents of Covered 52 | Software; or 53 | * **(b)** any new file in Source Code Form that contains any Covered 54 | Software. 55 | 56 | **1.11. “Patent Claims” of a Contributor** 57 | means any patent claim(s), including without limitation, method, 58 | process, and apparatus claims, in any patent Licensable by such 59 | Contributor that would be infringed, but for the grant of the 60 | License, by the making, using, selling, offering for sale, having 61 | made, import, or transfer of either its Contributions or its 62 | Contributor Version. 63 | 64 | **1.12. “Secondary License”** 65 | means either the GNU General Public License, Version 2.0, the GNU 66 | Lesser General Public License, Version 2.1, the GNU Affero General 67 | Public License, Version 3.0, or any later versions of those 68 | licenses. 69 | 70 | **1.13. “Source Code Form”** 71 | means the form of the work preferred for making modifications. 72 | 73 | **1.14. “You” (or “Your”)** 74 | means an individual or a legal entity exercising rights under this 75 | License. For legal entities, “You” includes any entity that 76 | controls, is controlled by, or is under common control with You. For 77 | purposes of this definition, “control” means **(a)** the power, direct 78 | or indirect, to cause the direction or management of such entity, 79 | whether by contract or otherwise, or **(b)** ownership of more than 80 | fifty percent (50%) of the outstanding shares or beneficial 81 | ownership of such entity. 82 | 83 | 84 | ### 2. License Grants and Conditions 85 | 86 | #### 2.1. Grants 87 | 88 | Each Contributor hereby grants You a world-wide, royalty-free, 89 | non-exclusive license: 90 | 91 | * **(a)** under intellectual property rights (other than patent or trademark) 92 | Licensable by such Contributor to use, reproduce, make available, 93 | modify, display, perform, distribute, and otherwise exploit its 94 | Contributions, either on an unmodified basis, with Modifications, or 95 | as part of a Larger Work; and 96 | * **(b)** under Patent Claims of such Contributor to make, use, sell, offer 97 | for sale, have made, import, and otherwise transfer either its 98 | Contributions or its Contributor Version. 99 | 100 | #### 2.2. Effective Date 101 | 102 | The licenses granted in Section 2.1 with respect to any Contribution 103 | become effective for each Contribution on the date the Contributor first 104 | distributes such Contribution. 105 | 106 | #### 2.3. Limitations on Grant Scope 107 | 108 | The licenses granted in this Section 2 are the only rights granted under 109 | this License. No additional rights or licenses will be implied from the 110 | distribution or licensing of Covered Software under this License. 111 | Notwithstanding Section 2.1(b) above, no patent license is granted by a 112 | Contributor: 113 | 114 | * **(a)** for any code that a Contributor has removed from Covered Software; 115 | or 116 | * **(b)** for infringements caused by: **(i)** Your and any other third party's 117 | modifications of Covered Software, or **(ii)** the combination of its 118 | Contributions with other software (except as part of its Contributor 119 | Version); or 120 | * **(c)** under Patent Claims infringed by Covered Software in the absence of 121 | its Contributions. 122 | 123 | This License does not grant any rights in the trademarks, service marks, 124 | or logos of any Contributor (except as may be necessary to comply with 125 | the notice requirements in Section 3.4). 126 | 127 | #### 2.4. Subsequent Licenses 128 | 129 | No Contributor makes additional grants as a result of Your choice to 130 | distribute the Covered Software under a subsequent version of this 131 | License (see Section 10.2) or under the terms of a Secondary License (if 132 | permitted under the terms of Section 3.3). 133 | 134 | #### 2.5. Representation 135 | 136 | Each Contributor represents that the Contributor believes its 137 | Contributions are its original creation(s) or it has sufficient rights 138 | to grant the rights to its Contributions conveyed by this License. 139 | 140 | #### 2.6. Fair Use 141 | 142 | This License is not intended to limit any rights You have under 143 | applicable copyright doctrines of fair use, fair dealing, or other 144 | equivalents. 145 | 146 | #### 2.7. Conditions 147 | 148 | Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted 149 | in Section 2.1. 150 | 151 | 152 | ### 3. Responsibilities 153 | 154 | #### 3.1. Distribution of Source Form 155 | 156 | All distribution of Covered Software in Source Code Form, including any 157 | Modifications that You create or to which You contribute, must be under 158 | the terms of this License. You must inform recipients that the Source 159 | Code Form of the Covered Software is governed by the terms of this 160 | License, and how they can obtain a copy of this License. You may not 161 | attempt to alter or restrict the recipients' rights in the Source Code 162 | Form. 163 | 164 | #### 3.2. Distribution of Executable Form 165 | 166 | If You distribute Covered Software in Executable Form then: 167 | 168 | * **(a)** such Covered Software must also be made available in Source Code 169 | Form, as described in Section 3.1, and You must inform recipients of 170 | the Executable Form how they can obtain a copy of such Source Code 171 | Form by reasonable means in a timely manner, at a charge no more 172 | than the cost of distribution to the recipient; and 173 | 174 | * **(b)** You may distribute such Executable Form under the terms of this 175 | License, or sublicense it under different terms, provided that the 176 | license for the Executable Form does not attempt to limit or alter 177 | the recipients' rights in the Source Code Form under this License. 178 | 179 | #### 3.3. Distribution of a Larger Work 180 | 181 | You may create and distribute a Larger Work under terms of Your choice, 182 | provided that You also comply with the requirements of this License for 183 | the Covered Software. If the Larger Work is a combination of Covered 184 | Software with a work governed by one or more Secondary Licenses, and the 185 | Covered Software is not Incompatible With Secondary Licenses, this 186 | License permits You to additionally distribute such Covered Software 187 | under the terms of such Secondary License(s), so that the recipient of 188 | the Larger Work may, at their option, further distribute the Covered 189 | Software under the terms of either this License or such Secondary 190 | License(s). 191 | 192 | #### 3.4. Notices 193 | 194 | You may not remove or alter the substance of any license notices 195 | (including copyright notices, patent notices, disclaimers of warranty, 196 | or limitations of liability) contained within the Source Code Form of 197 | the Covered Software, except that You may alter any license notices to 198 | the extent required to remedy known factual inaccuracies. 199 | 200 | #### 3.5. Application of Additional Terms 201 | 202 | You may choose to offer, and to charge a fee for, warranty, support, 203 | indemnity or liability obligations to one or more recipients of Covered 204 | Software. However, You may do so only on Your own behalf, and not on 205 | behalf of any Contributor. You must make it absolutely clear that any 206 | such warranty, support, indemnity, or liability obligation is offered by 207 | You alone, and You hereby agree to indemnify every Contributor for any 208 | liability incurred by such Contributor as a result of warranty, support, 209 | indemnity or liability terms You offer. You may include additional 210 | disclaimers of warranty and limitations of liability specific to any 211 | jurisdiction. 212 | 213 | 214 | ### 4. Inability to Comply Due to Statute or Regulation 215 | 216 | If it is impossible for You to comply with any of the terms of this 217 | License with respect to some or all of the Covered Software due to 218 | statute, judicial order, or regulation then You must: **(a)** comply with 219 | the terms of this License to the maximum extent possible; and **(b)** 220 | describe the limitations and the code they affect. Such description must 221 | be placed in a text file included with all distributions of the Covered 222 | Software under this License. Except to the extent prohibited by statute 223 | or regulation, such description must be sufficiently detailed for a 224 | recipient of ordinary skill to be able to understand it. 225 | 226 | 227 | ### 5. Termination 228 | 229 | **5.1.** The rights granted under this License will terminate automatically 230 | if You fail to comply with any of its terms. However, if You become 231 | compliant, then the rights granted under this License from a particular 232 | Contributor are reinstated **(a)** provisionally, unless and until such 233 | Contributor explicitly and finally terminates Your grants, and **(b)** on an 234 | ongoing basis, if such Contributor fails to notify You of the 235 | non-compliance by some reasonable means prior to 60 days after You have 236 | come back into compliance. Moreover, Your grants from a particular 237 | Contributor are reinstated on an ongoing basis if such Contributor 238 | notifies You of the non-compliance by some reasonable means, this is the 239 | first time You have received notice of non-compliance with this License 240 | from such Contributor, and You become compliant prior to 30 days after 241 | Your receipt of the notice. 242 | 243 | **5.2.** If You initiate litigation against any entity by asserting a patent 244 | infringement claim (excluding declaratory judgment actions, 245 | counter-claims, and cross-claims) alleging that a Contributor Version 246 | directly or indirectly infringes any patent, then the rights granted to 247 | You by any and all Contributors for the Covered Software under Section 248 | 2.1 of this License shall terminate. 249 | 250 | **5.3.** In the event of termination under Sections 5.1 or 5.2 above, all 251 | end user license agreements (excluding distributors and resellers) which 252 | have been validly granted by You or Your distributors under this License 253 | prior to termination shall survive termination. 254 | 255 | 256 | ### 6. Disclaimer of Warranty 257 | 258 | > Covered Software is provided under this License on an “as is” 259 | > basis, without warranty of any kind, either expressed, implied, or 260 | > statutory, including, without limitation, warranties that the 261 | > Covered Software is free of defects, merchantable, fit for a 262 | > particular purpose or non-infringing. The entire risk as to the 263 | > quality and performance of the Covered Software is with You. 264 | > Should any Covered Software prove defective in any respect, You 265 | > (not any Contributor) assume the cost of any necessary servicing, 266 | > repair, or correction. This disclaimer of warranty constitutes an 267 | > essential part of this License. No use of any Covered Software is 268 | > authorized under this License except under this disclaimer. 269 | 270 | ### 7. Limitation of Liability 271 | 272 | > Under no circumstances and under no legal theory, whether tort 273 | > (including negligence), contract, or otherwise, shall any 274 | > Contributor, or anyone who distributes Covered Software as 275 | > permitted above, be liable to You for any direct, indirect, 276 | > special, incidental, or consequential damages of any character 277 | > including, without limitation, damages for lost profits, loss of 278 | > goodwill, work stoppage, computer failure or malfunction, or any 279 | > and all other commercial damages or losses, even if such party 280 | > shall have been informed of the possibility of such damages. This 281 | > limitation of liability shall not apply to liability for death or 282 | > personal injury resulting from such party's negligence to the 283 | > extent applicable law prohibits such limitation. Some 284 | > jurisdictions do not allow the exclusion or limitation of 285 | > incidental or consequential damages, so this exclusion and 286 | > limitation may not apply to You. 287 | 288 | 289 | ### 8. Litigation 290 | 291 | Any litigation relating to this License may be brought only in the 292 | courts of a jurisdiction where the defendant maintains its principal 293 | place of business and such litigation shall be governed by laws of that 294 | jurisdiction, without reference to its conflict-of-law provisions. 295 | Nothing in this Section shall prevent a party's ability to bring 296 | cross-claims or counter-claims. 297 | 298 | 299 | ### 9. Miscellaneous 300 | 301 | This License represents the complete agreement concerning the subject 302 | matter hereof. If any provision of this License is held to be 303 | unenforceable, such provision shall be reformed only to the extent 304 | necessary to make it enforceable. Any law or regulation which provides 305 | that the language of a contract shall be construed against the drafter 306 | shall not be used to construe this License against a Contributor. 307 | 308 | 309 | ### 10. Versions of the License 310 | 311 | #### 10.1. New Versions 312 | 313 | Mozilla Foundation is the license steward. Except as provided in Section 314 | 10.3, no one other than the license steward has the right to modify or 315 | publish new versions of this License. Each version will be given a 316 | distinguishing version number. 317 | 318 | #### 10.2. Effect of New Versions 319 | 320 | You may distribute the Covered Software under the terms of the version 321 | of the License under which You originally received the Covered Software, 322 | or under the terms of any subsequent version published by the license 323 | steward. 324 | 325 | #### 10.3. Modified Versions 326 | 327 | If you create software not governed by this License, and you want to 328 | create a new license for such software, you may create and use a 329 | modified version of this License if you rename the license and remove 330 | any references to the name of the license steward (except to note that 331 | such modified license differs from this License). 332 | 333 | #### 10.4. Distributing Source Code Form that is Incompatible With Secondary Licenses 334 | 335 | If You choose to distribute Source Code Form that is Incompatible With 336 | Secondary Licenses under the terms of this version of the License, the 337 | notice described in Exhibit B of this License must be attached. 338 | 339 | ## Exhibit A - Source Code Form License Notice 340 | 341 | This Source Code Form is subject to the terms of the Mozilla Public 342 | License, v. 2.0. If a copy of the MPL was not distributed with this 343 | file, You can obtain one at http://mozilla.org/MPL/2.0/. 344 | 345 | If it is not possible or desirable to put the notice in a particular 346 | file, then You may include the notice in a location (such as a LICENSE 347 | file in a relevant directory) where a recipient would be likely to look 348 | for such a notice. 349 | 350 | You may add additional accurate notices of copyright ownership. 351 | 352 | ## Exhibit B - “Incompatible With Secondary Licenses” Notice 353 | 354 | This Source Code Form is "Incompatible With Secondary Licenses", as 355 | defined by the Mozilla Public License, v. 2.0. 356 | -------------------------------------------------------------------------------- /benches/bitwise.rs: -------------------------------------------------------------------------------- 1 | #![feature(test)] 2 | 3 | extern crate test; 4 | 5 | use test::Bencher; 6 | 7 | use rand::{rngs::SmallRng, Rng, SeedableRng}; 8 | 9 | mod range; 10 | use range::GenRange; 11 | 12 | use simdify::DefaultZero; 13 | 14 | fn search(slice: &[A], key: A) -> Result { 15 | if slice.is_empty() { 16 | return Err(0); 17 | } 18 | let mut i = 0; 19 | let mut b = (slice.len() - 1).next_power_of_two(); 20 | while b > 1 { 21 | b >>= 1; 22 | let j = i | b; 23 | if slice.len() <= j { 24 | continue; 25 | } 26 | if slice[j] <= key { 27 | i = j; 28 | } else { 29 | b >>= 1; 30 | while b > 0 { 31 | if slice[i | b] <= key { 32 | i |= b; 33 | } 34 | b >>= 1; 35 | } 36 | break; 37 | } 38 | } 39 | if slice[i] == key { 40 | Ok(i) 41 | } else { 42 | Err(i) 43 | } 44 | } 45 | 46 | fn bitwise_search(size: usize, b: &mut Bencher) 47 | where 48 | Int: Ord + Copy + DefaultZero + GenRange, 49 | { 50 | let mut gen = SmallRng::from_entropy(); 51 | let keys = Int::gen_range(size); 52 | let index = gen.gen_range(0, keys.len()); 53 | let key = keys[index]; 54 | b.iter(|| { 55 | // assert_eq!(Ok(index), keys.binary_search(&key)); 56 | assert_eq!(Ok(index), search(&keys, key)); 57 | }) 58 | } 59 | 60 | #[bench] 61 | fn bitwise_search_i8_16(b: &mut Bencher) { 62 | bitwise_search::(16, b) 63 | } 64 | #[bench] 65 | fn bitwise_search_i8_256(b: &mut Bencher) { 66 | bitwise_search::(256, b) 67 | } 68 | 69 | #[bench] 70 | fn bitwise_search_i32_10(b: &mut Bencher) { 71 | bitwise_search::(10, b) 72 | } 73 | #[bench] 74 | fn bitwise_search_i32_1000(b: &mut Bencher) { 75 | bitwise_search::(1000, b) 76 | } 77 | #[bench] 78 | fn bitwise_search_i32_100_000(b: &mut Bencher) { 79 | bitwise_search::(100_000, b) 80 | } 81 | #[bench] 82 | fn bitwise_search_i32_10_000_000(b: &mut Bencher) { 83 | bitwise_search::(10_000_000, b) 84 | } 85 | #[bench] 86 | fn bitwise_search_i32_1_000_000_000(b: &mut Bencher) { 87 | bitwise_search::(1_000_000_000, b) 88 | } 89 | 90 | #[bench] 91 | fn bitwise_search_i64_10(b: &mut Bencher) { 92 | bitwise_search::(10, b) 93 | } 94 | #[bench] 95 | fn bitwise_search_i64_1000(b: &mut Bencher) { 96 | bitwise_search::(1000, b) 97 | } 98 | #[bench] 99 | fn bitwise_search_i64_100_000(b: &mut Bencher) { 100 | bitwise_search::(100_000, b) 101 | } 102 | #[bench] 103 | fn bitwise_search_i64_10_000_000(b: &mut Bencher) { 104 | bitwise_search::(10_000_000, b) 105 | } 106 | #[bench] 107 | fn bitwise_search_i64_1_000_000_000(b: &mut Bencher) { 108 | bitwise_search::(1_000_000_000, b) 109 | } 110 | -------------------------------------------------------------------------------- /benches/range.rs: -------------------------------------------------------------------------------- 1 | use simdify::SimdVec; 2 | 3 | pub trait GenRange: Sized { 4 | fn gen_range(size: usize) -> SimdVec; 5 | } 6 | 7 | impl GenRange for i8 { 8 | fn gen_range(size: usize) -> SimdVec { 9 | let mut i = i8::min_value(); 10 | let mut vec = SimdVec::new(); 11 | for _ in 0..size { 12 | vec.push(i); 13 | i += 1; 14 | } 15 | vec 16 | } 17 | } 18 | 19 | impl GenRange for i32 { 20 | fn gen_range(size: usize) -> SimdVec { 21 | let mut i = i32::min_value(); 22 | let mut vec = SimdVec::new(); 23 | for _ in 0..size { 24 | vec.push(i); 25 | i += 1; 26 | } 27 | vec 28 | } 29 | } 30 | 31 | impl GenRange for i64 { 32 | fn gen_range(size: usize) -> SimdVec { 33 | let mut i = i64::min_value(); 34 | let mut vec = SimdVec::new(); 35 | for _ in 0..size { 36 | vec.push(i); 37 | i += 1; 38 | } 39 | vec 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /benches/simd.rs: -------------------------------------------------------------------------------- 1 | #![feature(test)] 2 | 3 | extern crate test; 4 | 5 | use test::Bencher; 6 | 7 | use std::arch::x86_64::{__m128i, __m256i}; 8 | 9 | use rand::{rngs::SmallRng, Rng, SeedableRng}; 10 | 11 | mod range; 12 | use range::GenRange; 13 | 14 | use simdify::{DefaultZero, SimdArrayOps, SimdOps, SimdVec}; 15 | 16 | fn simdify_k_ary_search(size: usize, b: &mut Bencher) 17 | where 18 | Int: Ord + Copy + DefaultZero + GenRange + SimdOps<__m256i> + SimdOps<__m128i>, 19 | SimdVec: SimdArrayOps, 20 | { 21 | let mut gen = SmallRng::from_entropy(); 22 | let keys = Int::gen_range(size); 23 | let index = gen.gen_range(0, keys.len()); 24 | let key = keys[index]; 25 | b.iter(|| { 26 | assert_eq!(Ok(index), keys.search(key)); 27 | }) 28 | } 29 | 30 | #[bench] 31 | fn simdify_k_ary_search_i8_16(b: &mut Bencher) { 32 | simdify_k_ary_search::(16, b) 33 | } 34 | #[bench] 35 | fn simdify_k_ary_search_i8_256(b: &mut Bencher) { 36 | simdify_k_ary_search::(256, b) 37 | } 38 | 39 | #[bench] 40 | fn simdify_k_ary_search_i32_10(b: &mut Bencher) { 41 | simdify_k_ary_search::(10, b) 42 | } 43 | #[bench] 44 | fn simdify_k_ary_search_i32_1000(b: &mut Bencher) { 45 | simdify_k_ary_search::(1000, b) 46 | } 47 | #[bench] 48 | fn simdify_k_ary_search_i32_100_000(b: &mut Bencher) { 49 | simdify_k_ary_search::(100_000, b) 50 | } 51 | #[bench] 52 | fn simdify_k_ary_search_i32_10_000_000(b: &mut Bencher) { 53 | simdify_k_ary_search::(10_000_000, b) 54 | } 55 | #[bench] 56 | fn simdify_k_ary_search_i32_1_000_000_000(b: &mut Bencher) { 57 | simdify_k_ary_search::(1_000_000_000, b) 58 | } 59 | 60 | #[bench] 61 | fn simdify_k_ary_search_i64_10(b: &mut Bencher) { 62 | simdify_k_ary_search::(10, b) 63 | } 64 | #[bench] 65 | fn simdify_k_ary_search_i64_1000(b: &mut Bencher) { 66 | simdify_k_ary_search::(1000, b) 67 | } 68 | #[bench] 69 | fn simdify_k_ary_search_i64_100_000(b: &mut Bencher) { 70 | simdify_k_ary_search::(100_000, b) 71 | } 72 | #[bench] 73 | fn simdify_k_ary_search_i64_10_000_000(b: &mut Bencher) { 74 | simdify_k_ary_search::(10_000_000, b) 75 | } 76 | #[bench] 77 | fn simdify_k_ary_search_i64_1_000_000_000(b: &mut Bencher) { 78 | simdify_k_ary_search::(1_000_000_000, b) 79 | } 80 | -------------------------------------------------------------------------------- /benches/stdlib.rs: -------------------------------------------------------------------------------- 1 | #![feature(test)] 2 | 3 | extern crate test; 4 | 5 | use test::Bencher; 6 | 7 | use rand::{rngs::SmallRng, Rng, SeedableRng}; 8 | 9 | mod range; 10 | use range::GenRange; 11 | 12 | use simdify::DefaultZero; 13 | 14 | fn std_binary_search(size: usize, b: &mut Bencher) 15 | where 16 | Int: Ord + Copy + DefaultZero + GenRange, 17 | { 18 | let mut gen = SmallRng::from_entropy(); 19 | let keys = Int::gen_range(size); 20 | let index = gen.gen_range(0, keys.len()); 21 | let key = keys[index]; 22 | b.iter(|| { 23 | assert_eq!(Ok(index), keys.binary_search(&key)); 24 | }) 25 | } 26 | 27 | #[bench] 28 | fn std_binary_search_i8_16(b: &mut Bencher) { 29 | std_binary_search::(16, b) 30 | } 31 | #[bench] 32 | fn std_binary_search_i8_256(b: &mut Bencher) { 33 | std_binary_search::(256, b) 34 | } 35 | 36 | #[bench] 37 | fn std_binary_search_i32_10(b: &mut Bencher) { 38 | std_binary_search::(10, b) 39 | } 40 | #[bench] 41 | fn std_binary_search_i32_1000(b: &mut Bencher) { 42 | std_binary_search::(1000, b) 43 | } 44 | #[bench] 45 | fn std_binary_search_i32_100_000(b: &mut Bencher) { 46 | std_binary_search::(100_000, b) 47 | } 48 | #[bench] 49 | fn std_binary_search_i32_10_000_000(b: &mut Bencher) { 50 | std_binary_search::(10_000_000, b) 51 | } 52 | #[bench] 53 | fn std_binary_search_i32_1_000_000_000(b: &mut Bencher) { 54 | std_binary_search::(1_000_000_000, b) 55 | } 56 | 57 | #[bench] 58 | fn std_binary_search_i64_10(b: &mut Bencher) { 59 | std_binary_search::(10, b) 60 | } 61 | #[bench] 62 | fn std_binary_search_i64_1000(b: &mut Bencher) { 63 | std_binary_search::(1000, b) 64 | } 65 | #[bench] 66 | fn std_binary_search_i64_100_000(b: &mut Bencher) { 67 | std_binary_search::(100_000, b) 68 | } 69 | #[bench] 70 | fn std_binary_search_i64_10_000_000(b: &mut Bencher) { 71 | std_binary_search::(10_000_000, b) 72 | } 73 | #[bench] 74 | fn std_binary_search_i64_1_000_000_000(b: &mut Bencher) { 75 | std_binary_search::(1_000_000_000, b) 76 | } 77 | -------------------------------------------------------------------------------- /proptest-regressions/lib.txt: -------------------------------------------------------------------------------- 1 | # Seeds for failure cases proptest has generated in the past. It is 2 | # automatically read and these particular cases re-run before any 3 | # novel cases are generated. 4 | # 5 | # It is recommended to check this file in to source control so that 6 | # everyone who runs the test benefits from these saved cases. 7 | cc 76f469261bd4929339f6cd95acb95a63653a29ef57604f7e95f308ab612362e5 # shrinks to items = [-128, -127, -126, -125, -124, -123, -122, -121, -120, -119, -118, -117, -116, -115, -114, -113, -112, -111, -110, -109, -108, -107, -106, -105, -104, -103, -102, -101, -100, -99, -98, -97, -96, -95, -94, -93, -92, -91, -90, -89, -88, -87, -86, -85, -84, -83, -82, -81, -80, -79, -78, -77, -76, -75, -74, -73, -72, -71, -70, -69, -68, -67, -66, -65, -64, -63, -62, -61, -60, -59, -58, -57, -56, -55, -54, -53, -52, -51, -50, -49, -48, -47, -46, -45, -44, -43, -42, -41, -40, -39, -38, -37, -36, -35, -34, -33, -32, -31, -30, -29, -28, -27, -26, -25, -24, -23, -22, -21, -20, -19, -18, -17, -16, -15, -14, -13, -12, -11, -10, -9, -8, -7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127], index = 194961794203727843 8 | cc db067dbb69e12672bf030c0ec1b7d9bb87495b77821e1e5fac3318539ec1345b # shrinks to items = [0], key = 1 9 | cc 31b5397ce50d0c699e4fa221fbd4f63a8be3dc016e1ccec26ea299cf6e4c7977 # shrinks to items = [0], key = 1 10 | cc b631ee023cf2b94779ca24f111e33d1ba81473e6208b04409aed861f2acf29c0 # shrinks to items = [-1], key = 0 11 | cc e725ce7d8cb2787372bb91013ab56cd6e985dd9063cf6e70d3114e94d8735655 # shrinks to items = [-127, -125, -124, -122, -121, -120, -119, -118, -117, -116, -115, -113, -112, -111, -110, -109, -108, -106, -105, -103, -102, -101, -100, -99, -98, -97, -96, -95, -93, -92, -91, -90, -88, -87, -86, -85, -84, -82, -81, -80, -77, -76, -75, -74, -73, -72, -71, -70, -69, -68, -67, -66, -65, -64, -63, -62, -61, -60, -59, -58, -57, -56, -55, -54, -53, -52, -51, -50, -49, -48, -47, -46, -45, -44, -43, -42, -41, -40, -39, -38, -37, -35, -34, -33, -32, -31, -30, -29, -28, -27, -26, -25, -24, -22, -21, -19, -18, -17, -16, -15, -14, -13, -12, -11, -10, -9, -8, -7, -6, -5, -4, -3, -2, 0, 1, 2, 3, 4, 5, 6, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 51, 52, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 77, 78, 79, 80, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 102, 103, 105, 106, 107, 109, 110, 111, 112, 113, 115, 116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127], key = 121 12 | cc 035e9c5a7fb9ac82621d2b21fbb48cacdc15ae9122c9ae782c2678ffab7296dc # shrinks to items = [-84, -62, -61, -60, -59, -58, -54, -49, -48, -47, -46, -45, -43, -42, -41, -40, -39, -38, -37, -34, -31, -30, -29, -28, -27, -26, -25, -24, -23, -22, -21, -20, -19, -18, -17, -16, -15, -14, -13, -12, -11, -10, -9, -8, -7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50], index = 4116758193655838584 13 | -------------------------------------------------------------------------------- /src/array.rs: -------------------------------------------------------------------------------- 1 | use std::arch::x86_64 as arch; 2 | use std::cmp::Ordering; 3 | use std::fmt::{Debug, Error, Formatter}; 4 | use std::hash::{Hash, Hasher}; 5 | use std::marker::PhantomData; 6 | use std::ops::{Deref, DerefMut}; 7 | 8 | use generic_array::{ArrayLength, GenericArray}; 9 | 10 | use crate::{DefaultZero, SimdArrayOps}; 11 | 12 | /// A fixed capacity stack allocated SIMD aligned vector. 13 | /// 14 | /// The capacity `N` denotes the number of 32-byte chunks allocated, which means 15 | /// the maximum capacity of the vector will be `N * (32 / size_of::())` 16 | /// elements. You can use `SimdArray::max_size()` to get the capacity. 17 | pub struct SimdArray 18 | where 19 | N: ArrayLength, 20 | { 21 | phantom: PhantomData<(A, N)>, 22 | size: usize, 23 | data: GenericArray, 24 | } 25 | 26 | impl SimdArray 27 | where 28 | N: ArrayLength, 29 | A: Copy + DefaultZero, 30 | { 31 | /// Construct an empty vector. 32 | pub fn new() -> Self { 33 | SimdArray { 34 | size: 0, 35 | phantom: PhantomData, 36 | data: GenericArray::default_zero(), 37 | } 38 | } 39 | 40 | /// Push a value to the end of the vector. 41 | /// 42 | /// Returns `false` if the vector was at capacity. 43 | pub fn push(&mut self, value: A) -> bool { 44 | if self.at_capacity() { 45 | false 46 | } else { 47 | let index = self.size; 48 | self.size += 1; 49 | self[index] = value; 50 | true 51 | } 52 | } 53 | 54 | /// Pop a value off the end of the vector. 55 | /// 56 | /// Returns `None` if the vector was empty. 57 | pub fn pop(&mut self) -> Option { 58 | if self.is_empty() { 59 | None 60 | } else { 61 | let result = self[self.size - 1]; 62 | self.size -= 1; 63 | Some(result) 64 | } 65 | } 66 | } 67 | 68 | impl Default for SimdArray 69 | where 70 | N: ArrayLength, 71 | A: Copy + DefaultZero, 72 | { 73 | fn default() -> Self { 74 | Self::new() 75 | } 76 | } 77 | 78 | impl SimdArray 79 | where 80 | N: ArrayLength, 81 | { 82 | /// Get the maximum capacity of the array. 83 | pub fn max_size() -> usize { 84 | std::mem::size_of::>() / std::mem::size_of::() 85 | } 86 | 87 | fn at_capacity(&self) -> bool { 88 | self.size == Self::max_size() 89 | } 90 | } 91 | 92 | impl SimdArrayOps for SimdArray 93 | where 94 | A: Ord, 95 | N: ArrayLength, 96 | { 97 | /// Get the current number of elements in the array. 98 | fn len(&self) -> usize { 99 | self.size 100 | } 101 | 102 | /// Test if the array is currently empty. 103 | fn is_empty(&self) -> bool { 104 | self.len() == 0 105 | } 106 | 107 | fn data_m256(&self) -> &[arch::__m256i] { 108 | &self.data 109 | } 110 | } 111 | 112 | impl Deref for SimdArray 113 | where 114 | N: ArrayLength, 115 | { 116 | type Target = [A]; 117 | 118 | fn deref(&self) -> &Self::Target { 119 | unsafe { std::slice::from_raw_parts(self.data.as_ptr() as *const _, self.size) } 120 | } 121 | } 122 | 123 | impl DerefMut for SimdArray 124 | where 125 | N: ArrayLength, 126 | { 127 | fn deref_mut(&mut self) -> &mut Self::Target { 128 | unsafe { std::slice::from_raw_parts_mut(self.data.as_mut_ptr() as *mut _, self.size) } 129 | } 130 | } 131 | 132 | impl Extend for SimdArray 133 | where 134 | A: Copy + DefaultZero, 135 | N: ArrayLength, 136 | { 137 | fn extend(&mut self, iter: I) 138 | where 139 | I: IntoIterator, 140 | { 141 | for item in iter { 142 | if !self.push(item) { 143 | panic!( 144 | "SimdArray::extend: exceeded array capacity of {}", 145 | Self::max_size() 146 | ) 147 | } 148 | } 149 | } 150 | } 151 | 152 | impl<'a, A, N> From<&'a [A]> for SimdArray 153 | where 154 | N: ArrayLength, 155 | A: Copy + DefaultZero, 156 | { 157 | fn from(slice: &'a [A]) -> Self { 158 | if slice.len() > Self::max_size() { 159 | panic!( 160 | "SimdArray::from: slice has length {} but array capacity is {}", 161 | slice.len(), 162 | Self::max_size() 163 | ) 164 | } 165 | let mut out = Self::new(); 166 | let source = &slice[..std::cmp::min(Self::max_size(), slice.len())]; 167 | out.size = source.len(); 168 | out.deref_mut().copy_from_slice(source); 169 | out 170 | } 171 | } 172 | 173 | impl Debug for SimdArray 174 | where 175 | A: Debug, 176 | N: ArrayLength, 177 | { 178 | fn fmt(&self, f: &mut Formatter) -> Result<(), Error> { 179 | self.deref().fmt(f) 180 | } 181 | } 182 | 183 | impl Hash for SimdArray 184 | where 185 | A: Hash, 186 | N: ArrayLength, 187 | { 188 | fn hash(&self, hasher: &mut H) 189 | where 190 | H: Hasher, 191 | { 192 | self.deref().hash(hasher) 193 | } 194 | } 195 | 196 | impl PartialEq for SimdArray 197 | where 198 | A: PartialEq, 199 | N: ArrayLength, 200 | { 201 | fn eq(&self, other: &Self) -> bool { 202 | self.deref().eq(other.deref()) 203 | } 204 | } 205 | 206 | impl Eq for SimdArray 207 | where 208 | A: Eq, 209 | N: ArrayLength, 210 | { 211 | } 212 | 213 | impl PartialOrd for SimdArray 214 | where 215 | A: PartialOrd, 216 | N: ArrayLength, 217 | { 218 | fn partial_cmp(&self, other: &Self) -> Option { 219 | self.deref().partial_cmp(other.deref()) 220 | } 221 | } 222 | 223 | impl Ord for SimdArray 224 | where 225 | A: Ord, 226 | N: ArrayLength, 227 | { 228 | fn cmp(&self, other: &Self) -> Ordering { 229 | self.deref().cmp(other.deref()) 230 | } 231 | } 232 | -------------------------------------------------------------------------------- /src/array_ops.rs: -------------------------------------------------------------------------------- 1 | use std::arch::x86_64::{self as arch, __m128i, __m256i}; 2 | use std::ops::Deref; 3 | 4 | use bitmaps::Bitmap; 5 | 6 | use crate::simd_ops::{SimdOps, SimdRegister}; 7 | 8 | /// SIMD optimised array operations. 9 | pub trait SimdArrayOps: Deref 10 | where 11 | A: Ord, 12 | { 13 | fn len(&self) -> usize; 14 | fn is_empty(&self) -> bool; 15 | fn data_m256(&self) -> &[arch::__m256i]; 16 | 17 | fn search(&self, key: A) -> Result 18 | where 19 | A: Copy + SimdOps<__m256i> + SimdOps<__m128i>, 20 | { 21 | if is_x86_feature_detected!("avx2") { 22 | unsafe { self.k_ary_search::<__m256i>(key) } 23 | } else if is_x86_feature_detected!("sse2") { 24 | unsafe { self.k_ary_search::<__m128i>(key) } 25 | } else { 26 | self.deref().binary_search(&key) 27 | } 28 | } 29 | 30 | #[inline] 31 | unsafe fn load(&self, index: usize) -> R 32 | where 33 | R: SimdRegister, 34 | A: SimdOps, 35 | { 36 | debug_assert_eq!( 37 | 0, 38 | index & (A::ALIGNMENT - 1), 39 | "load index must be divisible by {}", 40 | A::ALIGNMENT 41 | ); 42 | debug_assert!(index < self.len(), "index out of range"); 43 | R::load(&R::from_m256i(self.data_m256())[index / A::ALIGNMENT]) 44 | } 45 | 46 | /// Fast k-ary search for a key in an array. 47 | /// 48 | /// The algorithm is described in ['k-Ary Search 49 | /// on Modern Processors,' 50 | /// 2009](https://event.cwi.nl/damon2009/DaMoN09-KarySearch.pdf). 51 | unsafe fn k_ary_search(&self, key: A) -> Result 52 | where 53 | R: SimdRegister, 54 | A: Copy + SimdOps, 55 | { 56 | if self.is_empty() { 57 | return Err(0); 58 | } 59 | let keys = A::set(key); 60 | let mut middle = self.len() / (2 * A::ALIGNMENT); 61 | let mut pos = middle * A::ALIGNMENT; 62 | let mut low = 0; 63 | let mut high = self.len(); 64 | loop { 65 | let data = self.load(pos); 66 | let mut eq = A::cmp_eq(data, keys); 67 | let mut cmp = A::cmp_gt(data, keys); 68 | let all_greater = if pos + A::ALIGNMENT > self.len() { 69 | // At last chunk, mask away out-of-bounds bits 70 | let mask = Bitmap::mask((self.len() - pos) * A::BITS_PER_CMP); 71 | eq &= mask; 72 | cmp &= mask; 73 | mask 74 | } else { 75 | !Bitmap::new() 76 | }; 77 | if !eq.is_empty() { 78 | // We found a match 79 | return Ok(pos + eq.first_index().unwrap() / A::BITS_PER_CMP); 80 | } 81 | if cmp.is_empty() { 82 | // Everything was smaller, move up 83 | low = pos + A::ALIGNMENT; 84 | middle = std::cmp::max(middle / 2, 1); 85 | pos += middle * A::ALIGNMENT; 86 | if pos >= high { 87 | // Nowhere to move up, we found the insertion point 88 | return Err(high); 89 | } 90 | continue; 91 | } 92 | if cmp == all_greater { 93 | // Everything was greater, move down 94 | if pos <= low { 95 | // Nowhere to move down, we found the insertion point 96 | return Err(low); 97 | } 98 | high = pos; 99 | middle = std::cmp::max(middle / 2, 1); 100 | pos -= middle * A::ALIGNMENT; 101 | continue; 102 | } 103 | // We found a transition point 104 | let index = (cmp.first_index().unwrap() / A::BITS_PER_CMP) + pos; 105 | return Err(index); 106 | } 107 | } 108 | } 109 | -------------------------------------------------------------------------------- /src/default_zero.rs: -------------------------------------------------------------------------------- 1 | use std::arch::x86_64::{__m128i, __m256i}; 2 | 3 | use generic_array::{ArrayLength, GenericArray}; 4 | 5 | /// A marker trait for types whose default value is equal to zeroed memory. 6 | pub trait DefaultZero { 7 | fn default_zero() -> Self 8 | where 9 | Self: Sized, 10 | { 11 | unsafe { std::mem::MaybeUninit::zeroed().assume_init() } 12 | } 13 | } 14 | 15 | impl DefaultZero for i8 {} 16 | impl DefaultZero for i16 {} 17 | impl DefaultZero for i32 {} 18 | impl DefaultZero for i64 {} 19 | impl DefaultZero for i128 {} 20 | impl DefaultZero for isize {} 21 | impl DefaultZero for u8 {} 22 | impl DefaultZero for u16 {} 23 | impl DefaultZero for u32 {} 24 | impl DefaultZero for u64 {} 25 | impl DefaultZero for u128 {} 26 | impl DefaultZero for usize {} 27 | impl DefaultZero for __m128i {} 28 | impl DefaultZero for __m256i {} 29 | 30 | impl DefaultZero for *const A {} 31 | impl DefaultZero for *mut A {} 32 | 33 | impl DefaultZero for GenericArray 34 | where 35 | A: DefaultZero, 36 | N: ArrayLength, 37 | { 38 | } 39 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | mod default_zero; 2 | pub use crate::default_zero::DefaultZero; 3 | 4 | mod simd_ops; 5 | pub use crate::simd_ops::{SimdOps, SimdRegister}; 6 | 7 | mod array_ops; 8 | pub use crate::array_ops::SimdArrayOps; 9 | 10 | mod array; 11 | pub use crate::array::SimdArray; 12 | 13 | mod vec; 14 | pub use crate::vec::SimdVec; 15 | 16 | #[cfg(test)] 17 | // FIXME: Clippy-in-rls is unhappy about something in the proptest! macro, 18 | // remove the below when it stops being silly. 19 | #[allow(clippy::unnecessary_operation)] 20 | mod test { 21 | use super::*; 22 | use proptest::collection::{btree_set, SizeRange}; 23 | use proptest::strategy::{BoxedStrategy, Strategy, ValueTree}; 24 | use proptest::{num, proptest}; 25 | use std::arch::x86_64::{__m128i, __m256i}; 26 | use std::fmt::{Debug, Display}; 27 | use typenum::U32; 28 | 29 | fn sorted_vec( 30 | element: T, 31 | size: impl Into, 32 | ) -> BoxedStrategy::Value>> 33 | where 34 | T: Strategy + 'static, 35 | ::Value: Ord, 36 | { 37 | btree_set(element, size) 38 | .prop_map(|h| { 39 | let mut v: Vec<_> = h.into_iter().collect(); 40 | v.sort(); 41 | v 42 | }) 43 | .boxed() 44 | } 45 | 46 | #[test] 47 | fn big_k_ary_search_1() { 48 | let data: &[i8] = &[ 49 | -127, -125, -124, -122, -121, -120, -119, -118, -117, -116, -115, -113, -112, -111, 50 | -110, -109, -108, -106, -105, -103, -102, -101, -100, -99, -98, -97, -96, -95, -93, 51 | -92, -91, -90, -88, -87, -86, -85, -84, -82, -81, -80, -77, -76, -75, -74, -73, -72, 52 | -71, -70, -69, -68, -67, -66, -65, -64, -63, -62, -61, -60, -59, -58, -57, -56, -55, 53 | -54, -53, -52, -51, -50, -49, -48, -47, -46, -45, -44, -43, -42, -41, -40, -39, -38, 54 | -37, -35, -34, -33, -32, -31, -30, -29, -28, -27, -26, -25, -24, -22, -21, -19, -18, 55 | -17, -16, -15, -14, -13, -12, -11, -10, -9, -8, -7, -6, -5, -4, -3, -2, 0, 1, 2, 3, 4, 56 | 5, 6, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 57 | 30, 31, 32, 33, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 51, 52, 54, 58 | 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 77, 78, 79, 80, 59 | 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 102, 103, 105, 106, 60 | 107, 109, 110, 111, 112, 113, 115, 116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 61 | 127, 62 | ]; 63 | let vector: SimdArray = data.into(); 64 | assert_eq!(vector[data.len() - 6], 121); 65 | assert_eq!(Ok(data.len() - 6), vector.search(121)); 66 | } 67 | 68 | #[test] 69 | fn big_k_ary_search_2() { 70 | let data: &[i8] = &[ 71 | -128, -127, -126, -125, -124, -122, -121, -120, -119, -118, -117, -116, -115, -114, 72 | -113, -112, -111, -110, -109, -107, -106, -104, -103, -102, -101, -100, -99, -98, -96, 73 | -95, -94, -93, -91, -90, -89, -88, -87, -86, -85, -84, -83, -82, -81, -80, -79, -78, 74 | -77, -76, -75, -74, -73, -72, -70, -69, -68, -67, -66, -65, -64, -63, -61, -60, -59, 75 | -58, -57, -56, -55, -54, -53, -52, -51, -50, -49, -48, -47, -46, -44, -43, -42, -41, 76 | -40, -39, -38, -37, -36, -35, -34, -33, -32, -31, -30, -29, -28, -27, -26, -25, -24, 77 | -23, -21, -20, -19, -18, -17, -16, -15, -13, -12, -11, -10, -9, -8, -7, -6, -5, -4, -3, 78 | -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 22, 23, 79 | 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 38, 39, 40, 41, 42, 43, 44, 45, 46, 80 | 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 81 | 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 91, 82 | 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 83 | 111, 113, 114, 115, 118, 120, 121, 122, 123, 124, 125, 126, 127, 84 | ]; 85 | let vector: SimdArray = data.into(); 86 | assert_eq!(vector[16], -111); 87 | assert_eq!(Ok(16), vector.search(-111)); 88 | } 89 | 90 | #[test] 91 | fn two_chunk_k_ary_search() { 92 | let data: &[i8] = &[ 93 | -114, -111, -101, -81, -61, -56, -40, -35, -32, -31, -30, -29, -28, -27, -26, -22, -21, 94 | -20, -19, -18, -17, -16, -15, -14, -13, -12, -11, -9, -8, -7, -6, -5, -4, -3, -2, -1, 95 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 20, 21, 22, 32, 35, 96 | 43, 44, 56, 57, 97 | ]; 98 | let vector: SimdArray = data.into(); 99 | assert_eq!(vector[32], -4); 100 | assert_eq!(Ok(32), vector.search(-4)); 101 | } 102 | 103 | #[test] 104 | fn one_item_k_ary_search() { 105 | let data: &[i8] = &[-1]; 106 | let vector: SimdArray = data.into(); 107 | assert_eq!(Err(1), vector.search(0)); 108 | } 109 | 110 | fn simdify_k_ary_search_present_128(items: Vec, index: usize) 111 | where 112 | A: Ord + Copy + DefaultZero + Debug + SimdOps<__m128i>, 113 | SimdArray: SimdArrayOps, 114 | { 115 | if !is_x86_feature_detected!("sse2") { 116 | return; 117 | } 118 | 119 | let index = index % items.len(); 120 | let item = items[index]; 121 | let vector: SimdVec = items.as_slice().into(); 122 | assert_eq!(Ok(index), unsafe { vector.k_ary_search::<__m128i>(item) }); 123 | } 124 | 125 | fn simdify_k_ary_search_any_128(items: Vec, key: A) 126 | where 127 | A: Ord + Copy + DefaultZero + Debug + Display + SimdOps<__m128i>, 128 | SimdArray: SimdArrayOps, 129 | { 130 | if !is_x86_feature_detected!("sse2") { 131 | return; 132 | } 133 | 134 | let vector: SimdVec = items.as_slice().into(); 135 | match unsafe { vector.k_ary_search::<__m128i>(key) } { 136 | Ok(index) => assert_eq!(items[index], key), 137 | Err(index) => { 138 | if index < items.len() { 139 | assert!( 140 | items[index] > key, 141 | "insert index value {} should be higher than search key {}", 142 | items[index], 143 | key 144 | ); 145 | } 146 | if index > 0 { 147 | assert!( 148 | items[index - 1] < key, 149 | "pre-insert index value {} should be lower than search key {}", 150 | items[index - 1], 151 | key 152 | ); 153 | } 154 | } 155 | } 156 | } 157 | 158 | fn simdify_k_ary_search_present_256(items: Vec, index: usize) 159 | where 160 | A: Ord + Copy + DefaultZero + Debug + SimdOps<__m256i>, 161 | SimdArray: SimdArrayOps, 162 | { 163 | if !is_x86_feature_detected!("avx2") { 164 | return; 165 | } 166 | 167 | let index = index % items.len(); 168 | let item = items[index]; 169 | let vector: SimdVec = items.as_slice().into(); 170 | assert_eq!(Ok(index), unsafe { vector.k_ary_search::<__m256i>(item) }); 171 | } 172 | 173 | fn simdify_k_ary_search_any_256(items: Vec, key: A) 174 | where 175 | A: Ord + Copy + DefaultZero + Debug + Display + SimdOps<__m256i>, 176 | SimdArray: SimdArrayOps, 177 | { 178 | if !is_x86_feature_detected!("avx2") { 179 | return; 180 | } 181 | 182 | let vector: SimdVec = items.as_slice().into(); 183 | match unsafe { vector.k_ary_search::<__m256i>(key) } { 184 | Ok(index) => assert_eq!(items[index], key), 185 | Err(index) => { 186 | if index < items.len() { 187 | assert!( 188 | items[index] > key, 189 | "insert index value {} should be higher than search key {}", 190 | items[index], 191 | key 192 | ); 193 | } 194 | if index > 0 { 195 | assert!( 196 | items[index - 1] < key, 197 | "pre-insert index value {} should be lower than search key {}", 198 | items[index - 1], 199 | key 200 | ); 201 | } 202 | } 203 | } 204 | } 205 | 206 | proptest! { 207 | #[test] 208 | fn plain_binary_search_present(items in sorted_vec(num::i8::ANY, 1..1024), index in num::usize::ANY) { 209 | let index = index % items.len(); 210 | let item = items[index]; 211 | assert_eq!(Ok(index), items.binary_search(&item)) 212 | } 213 | 214 | #[test] 215 | fn plain_binary_search_any(items in sorted_vec(num::i8::ANY, 1..1024), key in num::i8::ANY) { 216 | match items.binary_search(&key) { 217 | Ok(index) => assert_eq!(items[index], key), 218 | Err(index) => { 219 | if index < items.len() { 220 | assert!(items[index] > key, "insert index value {} should be higher than search key {}", items[index], key); 221 | } 222 | if index > 0 { 223 | assert!(items[index-1] < key, "pre-insert index value {} should be lower than search key {}", items[index-1], key); 224 | } 225 | } 226 | } 227 | } 228 | 229 | #[test] 230 | fn simdify_k_ary_search_present_i8_128(items in sorted_vec(num::i8::ANY, 1..1024), index in num::usize::ANY) { 231 | simdify_k_ary_search_present_128(items, index) 232 | } 233 | 234 | #[test] 235 | fn simdify_k_ary_search_any_i8_128(items in sorted_vec(num::i8::ANY, 1..1024), key in num::i8::ANY) { 236 | simdify_k_ary_search_any_128(items,key) 237 | } 238 | 239 | #[test] 240 | fn simdify_k_ary_search_present_i16_128(items in sorted_vec(num::i16::ANY, 1..512), index in num::usize::ANY) { 241 | simdify_k_ary_search_present_128(items, index) 242 | } 243 | 244 | #[test] 245 | fn simdify_k_ary_search_any_i16_128(items in sorted_vec(num::i16::ANY, 1..512), key in num::i16::ANY) { 246 | simdify_k_ary_search_any_128(items,key) 247 | } 248 | 249 | #[test] 250 | fn simdify_k_ary_search_present_i32_128(items in sorted_vec(num::i32::ANY, 1..256), index in num::usize::ANY) { 251 | simdify_k_ary_search_present_128(items, index) 252 | } 253 | 254 | #[test] 255 | fn simdify_k_ary_search_any_i32_128(items in sorted_vec(num::i32::ANY, 1..256), key in num::i32::ANY) { 256 | simdify_k_ary_search_any_128(items,key) 257 | } 258 | 259 | #[test] 260 | fn simdify_k_ary_search_present_i64_128(items in sorted_vec(num::i64::ANY, 1..128), index in num::usize::ANY) { 261 | simdify_k_ary_search_present_128(items, index) 262 | } 263 | 264 | #[test] 265 | fn simdify_k_ary_search_any_i64_128(items in sorted_vec(num::i64::ANY, 1..128), key in num::i64::ANY) { 266 | simdify_k_ary_search_any_128(items,key) 267 | } 268 | 269 | #[test] 270 | fn simdify_k_ary_search_present_i8_256(items in sorted_vec(num::i8::ANY, 1..1024), index in num::usize::ANY) { 271 | simdify_k_ary_search_present_256(items, index) 272 | } 273 | 274 | #[test] 275 | fn simdify_k_ary_search_any_i8_256(items in sorted_vec(num::i8::ANY, 1..1024), key in num::i8::ANY) { 276 | simdify_k_ary_search_any_256(items,key) 277 | } 278 | 279 | #[test] 280 | fn simdify_k_ary_search_present_i16_256(items in sorted_vec(num::i16::ANY, 1..512), index in num::usize::ANY) { 281 | simdify_k_ary_search_present_256(items, index) 282 | } 283 | 284 | #[test] 285 | fn simdify_k_ary_search_any_i16_256(items in sorted_vec(num::i16::ANY, 1..512), key in num::i16::ANY) { 286 | simdify_k_ary_search_any_256(items,key) 287 | } 288 | 289 | #[test] 290 | fn simdify_k_ary_search_present_i32_256(items in sorted_vec(num::i32::ANY, 1..256), index in num::usize::ANY) { 291 | simdify_k_ary_search_present_256(items, index) 292 | } 293 | 294 | #[test] 295 | fn simdify_k_ary_search_any_i32_256(items in sorted_vec(num::i32::ANY, 1..256), key in num::i32::ANY) { 296 | simdify_k_ary_search_any_256(items,key) 297 | } 298 | 299 | #[test] 300 | fn simdify_k_ary_search_present_i64_256(items in sorted_vec(num::i64::ANY, 1..128), index in num::usize::ANY) { 301 | simdify_k_ary_search_present_256(items, index) 302 | } 303 | 304 | #[test] 305 | fn simdify_k_ary_search_any_i64_256(items in sorted_vec(num::i64::ANY, 1..128), key in num::i64::ANY) { 306 | simdify_k_ary_search_any_256(items,key) 307 | } 308 | } 309 | } 310 | -------------------------------------------------------------------------------- /src/simd_ops.rs: -------------------------------------------------------------------------------- 1 | use std::arch::x86_64::{self as arch, __m128i, __m256i}; 2 | use std::mem::size_of; 3 | 4 | use bitmaps::{Bitmap, Bits}; 5 | use typenum::{U16, U32}; 6 | 7 | /// Marker trait for SIMD registers. 8 | pub trait SimdRegister: Copy + Sized { 9 | type MovemaskSize: Bits; 10 | 11 | #[inline] 12 | unsafe fn from_m256i(slice256: &[__m256i]) -> &[Self] { 13 | let len = slice256.len() * (size_of::<__m256i>() / size_of::()); 14 | std::slice::from_raw_parts(slice256.as_ptr() as *const Self, len) 15 | } 16 | unsafe fn load(&self) -> Self; 17 | } 18 | 19 | impl SimdRegister for __m128i { 20 | type MovemaskSize = U16; 21 | 22 | #[inline] 23 | #[target_feature(enable = "sse2")] 24 | unsafe fn load(&self) -> Self { 25 | arch::_mm_load_si128(self) 26 | } 27 | } 28 | 29 | impl SimdRegister for __m256i { 30 | type MovemaskSize = U32; 31 | 32 | #[inline] 33 | unsafe fn from_m256i(slice: &[__m256i]) -> &[Self] { 34 | slice 35 | } 36 | 37 | #[inline] 38 | #[target_feature(enable = "avx2")] 39 | unsafe fn load(&self) -> Self { 40 | arch::_mm256_load_si256(self) 41 | } 42 | } 43 | 44 | /// Operations on datatypes stored in SIMD registers. 45 | pub trait SimdOps: Sized { 46 | const ALIGNMENT: usize = size_of::() / size_of::(); 47 | const BITS_PER_CMP: usize = size_of::(); 48 | 49 | unsafe fn set(value: Self) -> R; 50 | unsafe fn cmp_eq(left: R, right: R) -> Bitmap; 51 | unsafe fn cmp_gt(left: R, right: R) -> Bitmap; 52 | } 53 | 54 | impl SimdOps<__m128i> for i8 { 55 | #[inline] 56 | #[target_feature(enable = "sse2")] 57 | unsafe fn set(value: i8) -> __m128i { 58 | arch::_mm_set1_epi8(value) 59 | } 60 | 61 | #[inline] 62 | #[target_feature(enable = "sse2")] 63 | unsafe fn cmp_eq(left: __m128i, right: __m128i) -> Bitmap { 64 | Bitmap::from_value(arch::_mm_movemask_epi8(arch::_mm_cmpeq_epi8(left, right)) as u16) 65 | } 66 | 67 | #[inline] 68 | #[target_feature(enable = "sse2")] 69 | unsafe fn cmp_gt(left: __m128i, right: __m128i) -> Bitmap { 70 | Bitmap::from_value(arch::_mm_movemask_epi8(arch::_mm_cmpgt_epi8(left, right)) as u16) 71 | } 72 | } 73 | 74 | impl SimdOps<__m256i> for i8 { 75 | #[inline] 76 | #[target_feature(enable = "avx2")] 77 | unsafe fn set(value: i8) -> __m256i { 78 | arch::_mm256_set1_epi8(value) 79 | } 80 | 81 | #[inline] 82 | #[target_feature(enable = "avx2")] 83 | unsafe fn cmp_eq(left: __m256i, right: __m256i) -> Bitmap { 84 | Bitmap::from_value(arch::_mm256_movemask_epi8(arch::_mm256_cmpeq_epi8(left, right)) as u32) 85 | } 86 | 87 | #[inline] 88 | #[target_feature(enable = "avx2")] 89 | unsafe fn cmp_gt(left: __m256i, right: __m256i) -> Bitmap { 90 | Bitmap::from_value(arch::_mm256_movemask_epi8(arch::_mm256_cmpgt_epi8(left, right)) as u32) 91 | } 92 | } 93 | 94 | impl SimdOps<__m128i> for i16 { 95 | #[inline] 96 | #[target_feature(enable = "sse2")] 97 | unsafe fn set(value: i16) -> __m128i { 98 | arch::_mm_set1_epi16(value) 99 | } 100 | 101 | #[inline] 102 | #[target_feature(enable = "sse2")] 103 | unsafe fn cmp_eq(left: __m128i, right: __m128i) -> Bitmap { 104 | Bitmap::from_value(arch::_mm_movemask_epi8(arch::_mm_cmpeq_epi16(left, right)) as u16) 105 | } 106 | 107 | #[inline] 108 | #[target_feature(enable = "sse2")] 109 | unsafe fn cmp_gt(left: __m128i, right: __m128i) -> Bitmap { 110 | Bitmap::from_value(arch::_mm_movemask_epi8(arch::_mm_cmpgt_epi16(left, right)) as u16) 111 | } 112 | } 113 | 114 | impl SimdOps<__m256i> for i16 { 115 | #[inline] 116 | #[target_feature(enable = "avx2")] 117 | unsafe fn set(value: i16) -> __m256i { 118 | arch::_mm256_set1_epi16(value) 119 | } 120 | 121 | #[inline] 122 | #[target_feature(enable = "avx2")] 123 | unsafe fn cmp_eq(left: __m256i, right: __m256i) -> Bitmap { 124 | Bitmap::from_value(arch::_mm256_movemask_epi8(arch::_mm256_cmpeq_epi16(left, right)) as u32) 125 | } 126 | 127 | #[inline] 128 | #[target_feature(enable = "avx2")] 129 | unsafe fn cmp_gt(left: __m256i, right: __m256i) -> Bitmap { 130 | Bitmap::from_value(arch::_mm256_movemask_epi8(arch::_mm256_cmpgt_epi16(left, right)) as u32) 131 | } 132 | } 133 | 134 | impl SimdOps<__m128i> for i32 { 135 | #[inline] 136 | #[target_feature(enable = "sse2")] 137 | unsafe fn set(value: i32) -> __m128i { 138 | arch::_mm_set1_epi32(value) 139 | } 140 | 141 | #[inline] 142 | #[target_feature(enable = "sse2")] 143 | unsafe fn cmp_eq(left: __m128i, right: __m128i) -> Bitmap { 144 | Bitmap::from_value(arch::_mm_movemask_epi8(arch::_mm_cmpeq_epi32(left, right)) as u16) 145 | } 146 | 147 | #[inline] 148 | #[target_feature(enable = "sse2")] 149 | unsafe fn cmp_gt(left: __m128i, right: __m128i) -> Bitmap { 150 | Bitmap::from_value(arch::_mm_movemask_epi8(arch::_mm_cmpgt_epi32(left, right)) as u16) 151 | } 152 | } 153 | 154 | impl SimdOps<__m256i> for i32 { 155 | #[inline] 156 | #[target_feature(enable = "avx2")] 157 | unsafe fn set(value: i32) -> __m256i { 158 | arch::_mm256_set1_epi32(value) 159 | } 160 | 161 | #[inline] 162 | #[target_feature(enable = "avx2")] 163 | unsafe fn cmp_eq(left: __m256i, right: __m256i) -> Bitmap { 164 | Bitmap::from_value(arch::_mm256_movemask_epi8(arch::_mm256_cmpeq_epi32(left, right)) as u32) 165 | } 166 | 167 | #[inline] 168 | #[target_feature(enable = "avx2")] 169 | unsafe fn cmp_gt(left: __m256i, right: __m256i) -> Bitmap { 170 | Bitmap::from_value(arch::_mm256_movemask_epi8(arch::_mm256_cmpgt_epi32(left, right)) as u32) 171 | } 172 | } 173 | 174 | impl SimdOps<__m128i> for i64 { 175 | #[inline] 176 | #[target_feature(enable = "sse2")] 177 | unsafe fn set(value: i64) -> __m128i { 178 | arch::_mm_set1_epi64x(value) 179 | } 180 | 181 | #[inline] 182 | #[target_feature(enable = "sse2")] 183 | unsafe fn cmp_eq(left: __m128i, right: __m128i) -> Bitmap { 184 | Bitmap::from_value(arch::_mm_movemask_epi8(arch::_mm_cmpeq_epi64(left, right)) as u16) 185 | } 186 | 187 | #[inline] 188 | #[target_feature(enable = "sse2")] 189 | unsafe fn cmp_gt(left: __m128i, right: __m128i) -> Bitmap { 190 | Bitmap::from_value(arch::_mm_movemask_epi8(arch::_mm_cmpgt_epi64(left, right)) as u16) 191 | } 192 | } 193 | 194 | impl SimdOps<__m256i> for i64 { 195 | #[inline] 196 | #[target_feature(enable = "avx2")] 197 | unsafe fn set(value: i64) -> __m256i { 198 | arch::_mm256_set1_epi64x(value) 199 | } 200 | 201 | #[inline] 202 | #[target_feature(enable = "avx2")] 203 | unsafe fn cmp_eq(left: __m256i, right: __m256i) -> Bitmap { 204 | Bitmap::from_value(arch::_mm256_movemask_epi8(arch::_mm256_cmpeq_epi64(left, right)) as u32) 205 | } 206 | 207 | #[inline] 208 | #[target_feature(enable = "avx2")] 209 | unsafe fn cmp_gt(left: __m256i, right: __m256i) -> Bitmap { 210 | Bitmap::from_value(arch::_mm256_movemask_epi8(arch::_mm256_cmpgt_epi64(left, right)) as u32) 211 | } 212 | } 213 | 214 | impl SimdOps<__m128i> for isize { 215 | #[inline] 216 | #[target_feature(enable = "sse2")] 217 | unsafe fn set(value: isize) -> __m128i { 218 | if std::mem::size_of::() == 8 { 219 | arch::_mm_set1_epi64x(value as i64) 220 | } else if std::mem::size_of::() == 4 { 221 | arch::_mm_set1_epi32(value as i32) 222 | } else { 223 | panic!( 224 | "did not expect isize to be {} bytes long", 225 | std::mem::size_of::() 226 | ) 227 | } 228 | } 229 | 230 | #[inline] 231 | #[target_feature(enable = "sse2")] 232 | unsafe fn cmp_eq(left: __m128i, right: __m128i) -> Bitmap { 233 | if std::mem::size_of::() == 8 { 234 | Bitmap::from_value(arch::_mm_movemask_epi8(arch::_mm_cmpeq_epi64(left, right)) as u16) 235 | } else if std::mem::size_of::() == 4 { 236 | Bitmap::from_value(arch::_mm_movemask_epi8(arch::_mm_cmpeq_epi32(left, right)) as u16) 237 | } else { 238 | panic!( 239 | "did not expect isize to be {} bytes long", 240 | std::mem::size_of::() 241 | ) 242 | } 243 | } 244 | 245 | #[inline] 246 | #[target_feature(enable = "sse2")] 247 | unsafe fn cmp_gt(left: __m128i, right: __m128i) -> Bitmap { 248 | if std::mem::size_of::() == 8 { 249 | Bitmap::from_value(arch::_mm_movemask_epi8(arch::_mm_cmpgt_epi64(left, right)) as u16) 250 | } else if std::mem::size_of::() == 4 { 251 | Bitmap::from_value(arch::_mm_movemask_epi8(arch::_mm_cmpgt_epi32(left, right)) as u16) 252 | } else { 253 | panic!( 254 | "did not expect isize to be {} bytes long", 255 | std::mem::size_of::() 256 | ) 257 | } 258 | } 259 | } 260 | 261 | impl SimdOps<__m256i> for isize { 262 | #[inline] 263 | #[target_feature(enable = "avx2")] 264 | unsafe fn set(value: isize) -> __m256i { 265 | if std::mem::size_of::() == 8 { 266 | arch::_mm256_set1_epi64x(value as i64) 267 | } else if std::mem::size_of::() == 4 { 268 | arch::_mm256_set1_epi32(value as i32) 269 | } else { 270 | panic!( 271 | "did not expect isize to be {} bytes long", 272 | std::mem::size_of::() 273 | ) 274 | } 275 | } 276 | 277 | #[inline] 278 | #[target_feature(enable = "avx2")] 279 | unsafe fn cmp_eq(left: __m256i, right: __m256i) -> Bitmap { 280 | if std::mem::size_of::() == 8 { 281 | Bitmap::from_value( 282 | arch::_mm256_movemask_epi8(arch::_mm256_cmpeq_epi64(left, right)) as u32, 283 | ) 284 | } else if std::mem::size_of::() == 4 { 285 | Bitmap::from_value( 286 | arch::_mm256_movemask_epi8(arch::_mm256_cmpeq_epi32(left, right)) as u32, 287 | ) 288 | } else { 289 | panic!( 290 | "did not expect isize to be {} bytes long", 291 | std::mem::size_of::() 292 | ) 293 | } 294 | } 295 | 296 | #[inline] 297 | #[target_feature(enable = "avx2")] 298 | unsafe fn cmp_gt(left: __m256i, right: __m256i) -> Bitmap { 299 | if std::mem::size_of::() == 8 { 300 | Bitmap::from_value( 301 | arch::_mm256_movemask_epi8(arch::_mm256_cmpgt_epi64(left, right)) as u32, 302 | ) 303 | } else if std::mem::size_of::() == 4 { 304 | Bitmap::from_value( 305 | arch::_mm256_movemask_epi8(arch::_mm256_cmpgt_epi32(left, right)) as u32, 306 | ) 307 | } else { 308 | panic!( 309 | "did not expect isize to be {} bytes long", 310 | std::mem::size_of::() 311 | ) 312 | } 313 | } 314 | } 315 | -------------------------------------------------------------------------------- /src/vec.rs: -------------------------------------------------------------------------------- 1 | use std::arch::x86_64 as arch; 2 | use std::cmp::Ordering; 3 | use std::fmt::{Debug, Error, Formatter}; 4 | use std::hash::{Hash, Hasher}; 5 | use std::marker::PhantomData; 6 | use std::ops::{Deref, DerefMut}; 7 | 8 | use crate::{DefaultZero, SimdArrayOps}; 9 | 10 | /// A heap allocated SIMD aligned vector. 11 | pub struct SimdVec { 12 | phantom: PhantomData, 13 | size: usize, 14 | vec: Vec, 15 | } 16 | 17 | impl SimdVec 18 | where 19 | A: Copy + DefaultZero, 20 | { 21 | /// Construct an empty vector. 22 | pub fn new() -> Self { 23 | SimdVec { 24 | phantom: PhantomData, 25 | size: 0, 26 | vec: Vec::new(), 27 | } 28 | } 29 | 30 | /// Construct an empty vector with a given capacity. 31 | pub fn with_capacity(capacity: usize) -> Self { 32 | SimdVec { 33 | phantom: PhantomData, 34 | size: 0, 35 | vec: Vec::with_capacity((capacity + (Self::block_size() - 1)) / Self::block_size()), 36 | } 37 | } 38 | 39 | fn max_size(&self) -> usize { 40 | self.vec.len() * Self::block_size() 41 | } 42 | 43 | fn block_size() -> usize { 44 | std::mem::size_of::() / std::mem::size_of::() 45 | } 46 | 47 | fn at_capacity(&self) -> bool { 48 | self.size == self.max_size() 49 | } 50 | 51 | fn add_block(&mut self) { 52 | self.vec.push(arch::__m256i::default_zero()) 53 | } 54 | 55 | fn trim_excess(&mut self) { 56 | while self.len() + Self::block_size() <= self.max_size() { 57 | self.vec.pop(); 58 | } 59 | } 60 | 61 | /// Push a value to the end of the vector. 62 | pub fn push(&mut self, value: A) { 63 | if self.at_capacity() { 64 | self.add_block() 65 | }; 66 | let index = self.size; 67 | self.size += 1; 68 | self[index] = value; 69 | } 70 | 71 | /// Pop a value off the end of the vector. 72 | /// 73 | /// Returns `None` if the vector was empty. 74 | pub fn pop(&mut self) -> Option { 75 | if self.is_empty() { 76 | return None; 77 | } 78 | let result = self[self.size - 1]; 79 | self.size -= 1; 80 | self.trim_excess(); 81 | Some(result) 82 | } 83 | } 84 | 85 | impl SimdArrayOps for SimdVec 86 | where 87 | A: Ord, 88 | { 89 | /// Get the current number of elements in the vector. 90 | fn len(&self) -> usize { 91 | self.size 92 | } 93 | 94 | /// Test if the vector is currently empty. 95 | fn is_empty(&self) -> bool { 96 | self.len() == 0 97 | } 98 | 99 | fn data_m256(&self) -> &[arch::__m256i] { 100 | self.vec.as_slice() 101 | } 102 | } 103 | 104 | impl Default for SimdVec 105 | where 106 | A: Copy + DefaultZero, 107 | { 108 | fn default() -> Self { 109 | Self::new() 110 | } 111 | } 112 | 113 | impl Deref for SimdVec { 114 | type Target = [A]; 115 | 116 | fn deref(&self) -> &Self::Target { 117 | unsafe { std::slice::from_raw_parts(self.vec.as_ptr() as *const _, self.size) } 118 | } 119 | } 120 | 121 | impl DerefMut for SimdVec { 122 | fn deref_mut(&mut self) -> &mut Self::Target { 123 | unsafe { std::slice::from_raw_parts_mut(self.vec.as_mut_ptr() as *mut _, self.size) } 124 | } 125 | } 126 | 127 | impl Extend for SimdVec 128 | where 129 | A: Copy + DefaultZero, 130 | { 131 | fn extend(&mut self, iter: I) 132 | where 133 | I: IntoIterator, 134 | { 135 | for item in iter { 136 | self.push(item); 137 | } 138 | } 139 | } 140 | 141 | impl<'a, A> From<&'a [A]> for SimdVec 142 | where 143 | A: Copy + DefaultZero, 144 | { 145 | fn from(slice: &'a [A]) -> Self { 146 | let mut out = Self::with_capacity(slice.len()); 147 | out.extend(slice.iter().copied()); 148 | out 149 | } 150 | } 151 | 152 | impl Debug for SimdVec 153 | where 154 | A: Debug, 155 | { 156 | fn fmt(&self, f: &mut Formatter) -> Result<(), Error> { 157 | self.deref().fmt(f) 158 | } 159 | } 160 | 161 | impl Hash for SimdVec 162 | where 163 | A: Hash, 164 | { 165 | fn hash(&self, hasher: &mut H) 166 | where 167 | H: Hasher, 168 | { 169 | self.deref().hash(hasher) 170 | } 171 | } 172 | 173 | impl PartialEq for SimdVec 174 | where 175 | A: PartialEq, 176 | { 177 | fn eq(&self, other: &Self) -> bool { 178 | self.deref().eq(other.deref()) 179 | } 180 | } 181 | 182 | impl Eq for SimdVec where A: Eq {} 183 | 184 | impl PartialOrd for SimdVec 185 | where 186 | A: PartialOrd, 187 | { 188 | fn partial_cmp(&self, other: &Self) -> Option { 189 | self.deref().partial_cmp(other.deref()) 190 | } 191 | } 192 | 193 | impl Ord for SimdVec 194 | where 195 | A: Ord, 196 | { 197 | fn cmp(&self, other: &Self) -> Ordering { 198 | self.deref().cmp(other.deref()) 199 | } 200 | } 201 | --------------------------------------------------------------------------------