├── .github ├── LICENSE └── README.md ├── swar └── swar.cppm /.github/LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2025 Kris Jusiak 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /.github/README.md: -------------------------------------------------------------------------------- 1 | ../swar -------------------------------------------------------------------------------- /swar: -------------------------------------------------------------------------------- 1 | // 26 | [Overview](#Overview) / [Examples](#Examples) / [API](#API) / [FAQ](#FAQ) / [Resources](#Resources) 27 | 28 | ## `SWAR`: [SIMD](https://en.wikipedia.org/wiki/Single_instruction,_multiple_data) Within A Register library 29 | 30 | [![MIT Licence](http://img.shields.io/badge/license-MIT-blue.svg)](https://opensource.org/license/mit) 31 | [![Version](https://img.shields.io/github/v/release/qlibs/swar)](https://github.com/qlibs/swar/releases) 32 | [![Build](https://img.shields.io/badge/build-green.svg)](https://godbolt.org/z/xob1nGYoP) 33 | [![Try it online](https://img.shields.io/badge/try%20it-online-blue.svg)](https://godbolt.org/z/55K55hqWb) 34 | 35 | > https://en.wikipedia.org/wiki/SWAR 36 | 37 | ### Use cases 38 | 39 | - Performance (branchless) 40 | - Portable (uses 'normal' registers) 41 | 42 | ### Features 43 | 44 | - Single header (https://raw.githubusercontent.com/qlibs/swar/main/swar) / C++20 module (https://raw.githubusercontent.com/qlibs/swar/main/swar.cppm) 45 | - Minimal [API](#api) 46 | - Verifies itself upon include (can be disabled with `-DNTEST` - see [FAQ](#faq)) 47 | 48 | ### Requirements 49 | 50 | - C++20 ([clang++13+, g++12](https://en.cppreference.com/w/cpp/compiler_support)) 51 | 52 | ### Overview 53 | 54 | > `API` (https://godbolt.org/z/b4v9aTEYs) 55 | 56 | ```cpp 57 | constexpr u8 data[]{1, 2, 3, 5, 5, 6, 7, 8}; 58 | constexpr swar lhs{data}; // copy_from 59 | constexpr swar rhs{5}; // broadcast (native: u64) 60 | 61 | static_assert(8u == lhs.size()); 62 | static_assert(sizeof(u64) == sizeof(lhs)); 63 | 64 | constexpr auto match = lhs == rhs; 65 | 66 | static_assert(any_of(match)); 67 | static_assert(some_of(match)); 68 | static_assert(not all_of(match)); 69 | static_assert(not none_of(match)); 70 | 71 | static_assert(3u == find_first_set(match)); 72 | static_assert(4u == find_last_set(match)); 73 | static_assert(2u == popcount(match)); 74 | static_assert(match[3u] and match[4u]); 75 | 76 | static_assert(sizeof(u32) == sizeof(swar)); 77 | static_assert(sizeof(u64) == sizeof(swar)); 78 | static_assert(sizeof(u32) == sizeof(swar)); 79 | static_assert(sizeof(u64) == sizeof(swar)); 80 | static_assert(sizeof(u64) == sizeof(swar)); 81 | static_assert(sizeof(u128) == sizeof(swar)); 82 | 83 | // and more (see API)... 84 | ``` 85 | 86 | > Performance (https://godbolt.org/z/ManGb8aso) 87 | 88 | ```cpp 89 | auto eq(swar lhs, swar rhs) { 90 | return lhs == rhs; 91 | } 92 | ``` 93 | 94 | ```cpp 95 | eq: // $CXX -O3 -mno-sse -mno-sse2 -mno-sse3 -mno-avx 96 | movabs rdx, -9187201950435737472 97 | xor rdi, rsi 98 | movabs rax, 72340172838076672 99 | or rdi, rdx 100 | sub rax, rdi 101 | and rax, rdx 102 | ret 103 | ``` 104 | 105 | ```cpp 106 | auto contains(swar lhs, u8 value) { 107 | const auto rhs = swar{value}; 108 | const auto match = lhs == rhs; 109 | return any_of(match); 110 | } 111 | ``` 112 | 113 | ```cpp 114 | contains: // $CXX -O3 -mno-sse -mno-sse2 -mno-sse3 -mno-avx 115 | movabs rax, 72340172838076673 116 | movzx esi, sil 117 | movabs rdx, -9187201950435737472 118 | imul rsi, rax 119 | sub rax, 1 120 | xor rdi, rsi 121 | or rdi, rdx 122 | sub rax, rdi 123 | test rax, rdx 124 | setne al 125 | ret 126 | ``` 127 | 128 | ```cpp 129 | auto find(swar lhs, u8 value) { 130 | const auto rhs = swar{value}; 131 | const auto match = lhs == rhs; 132 | return any_of(match) * find_first_set(match); 133 | } 134 | ``` 135 | 136 | ```cpp 137 | find: // $CXX -O3 -mno-sse -mno-sse2 -mno-sse3 -mno-avx 138 | movabs rax, 72340172838076673 139 | movzx esi, sil 140 | movabs rdx, 72340172838076672 141 | imul rsi, rax 142 | movabs rax, -9187201950435737472 143 | xor rdi, rsi 144 | or rdi, rax 145 | sub rdx, rdi 146 | and rdx, rax 147 | xor eax, eax 148 | rep bsf rax, rdx 149 | test rdx, rdx 150 | mov edx, 0 151 | cmove rax, rdx 152 | ret 153 | ``` 154 | 155 | ### Examples 156 | 157 | > swar vs simd (https://godbolt.org/z/YsG8evqr8) 158 | 159 | ```cpp 160 | template auto eq(T lhs, T rhs) { return lhs == rhs; } 161 | ``` 162 | 163 | ```cpp 164 | eq(swar, swar): // $CXX -O3 -mno-sse -mno-sse2 -mno-sse3 -mno-avx 165 | movabs rdx, -9187201950435737472 166 | xor rdi, rsi 167 | movabs rax, 72340172838076672 168 | or rdi, rdx 169 | sub rax, rdi 170 | and rax, rdx 171 | ret 172 | 173 | eq(simd, simd): // $CXX -O3 -mavx512f 174 | vpcmpeqb xmm0, xmm0, xmm1 175 | ret 176 | ``` 177 | 178 | ```cpp 179 | template auto contains(T lhs, auto value) { 180 | const auto rhs = T{value}; 181 | const auto match = lhs == rhs; 182 | return any_of(match); 183 | } 184 | ``` 185 | 186 | ```cpp 187 | cointains(swar, swar): // $CXX -O3 -mno-sse -mno-sse2 -mno-sse3 -mno-avx 188 | movabs rax, 72340172838076673 189 | movzx esi, sil 190 | movabs rdx, -9187201950435737472 191 | imul rsi, rax 192 | sub rax, 1 193 | xor rdi, rsi 194 | or rdi, rdx 195 | sub rax, rdi 196 | test rax, rdx 197 | setne al 198 | ret 199 | 200 | contains(simd, simd): // $CXX -O3 -mavx512f 201 | vmovd xmm1, edi 202 | vpbroadcastb xmm1, xmm1 203 | vpcmpeqb xmm0, xmm1, xmm0 204 | vptest xmm0, xmm0 205 | setne al 206 | ret 207 | ``` 208 | 209 | 210 | ```cpp 211 | template auto find(T lhs, auto value) { 212 | const auto rhs = T{value}; 213 | const auto match = lhs == rhs; 214 | return any_of(match) * find_first_set(match); 215 | } 216 | ``` 217 | 218 | ```cpp 219 | find(swar, swar): // $CXX -O3 -mno-sse -mno-sse2 -mno-sse3 -mno-avx 220 | movabs rax, 72340172838076673 221 | movzx esi, sil 222 | movabs rdx, 72340172838076672 223 | imul rsi, rax 224 | movabs rax, -9187201950435737472 225 | xor rdi, rsi 226 | or rdi, rax 227 | sub rdx, rdi 228 | and rdx, rax 229 | xor eax, eax 230 | rep bsf rax, rdx 231 | test rdx, rdx 232 | mov edx, 0 233 | cmove rax, rdx 234 | ret 235 | 236 | find(simd, simd): // $CXX -O3 -mavx512f 237 | vmovd xmm1, edi 238 | vpbroadcastb xmm1, xmm1 239 | vpcmpeqb xmm0, xmm1, xmm0 240 | vpmovmskb eax, xmm0 241 | or eax, 65536 242 | rep bsf ecx, eax 243 | xor eax, eax 244 | vptest xmm0, xmm0 245 | cmovne eax, ecx 246 | ret 247 | ``` 248 | 249 | ### API 250 | 251 | ```cpp 252 | namespace swar::inline v1_0_0 { 253 | template> 254 | requires ((sizeof(T) * Width) <= sizeof(TAbi)) 255 | struct swar { 256 | using value_type = T; 257 | using abi_type = TAbi; 258 | 259 | constexpr swar() noexcept = default; 260 | constexpr swar(const swar&) noexcept = default; 261 | constexpr swar(swar&&) noexcept = default; 262 | constexpr explicit swar(const auto value) noexcept; 263 | constexpr explicit swar(const auto* mem) noexcept; 264 | constexpr explicit swar(const auto& gen) noexcept; 265 | [[nodiscard]] constexpr explicit operator abi_type() const noexcept; 266 | [[nodiscard]] constexpr auto operator[](size_t) const noexcept -> T; 267 | [[nodiscard]] static constexpr auto size() noexcept -> size_t; 268 | [[nodiscard]] friend constexpr auto operator==(const swar&, const swar&) noexcept; 269 | }; 270 | 271 | template> 272 | requires ((sizeof(T) * Width) <= sizeof(TAbi)) 273 | struct swar_mask { 274 | using value_type = bool; /// predefined 275 | using abi_type = TAbi; 276 | 277 | constexpr swar_mask() noexcept = default; 278 | constexpr swar_mask(const swar_mask&) noexcept = default; 279 | constexpr swar_mask(swar_mask&&) noexcept = default; 280 | constexpr explicit swar_mask(const abi_type value) noexcept; 281 | 282 | [[nodiscard]] constexpr auto operator[](const size_t index) const noexcept -> bool; 283 | [[nodiscard]] static constexpr auto size() noexcept -> size_t { return Width; } 284 | }; 285 | 286 | template 287 | [[nodiscard]] constexpr auto all_of(const swar_mask& s) noexcept -> bool; 288 | 289 | template 290 | [[nodiscard]] constexpr auto any_of(const swar_mask& s) noexcept -> bool; 291 | 292 | template 293 | [[nodiscard]] constexpr auto some_of(const swar_mask& s) noexcept -> bool; 294 | 295 | template 296 | [[nodiscard]] constexpr auto none_of(const swar_mask& s) noexcept -> bool; 297 | 298 | template 299 | [[nodiscard]] constexpr auto find_first_set(const swar_mask& s) noexcept; 300 | 301 | template 302 | [[nodiscard]] constexpr auto find_last_set(const swar_mask& s) noexcept; 303 | 304 | template 305 | [[nodiscard]] constexpr auto popcount(const swar_mask& s) noexcept; 306 | 307 | template inline constexpr bool is_swar_v = /* unspecified */; 308 | template inline constexpr bool is_swar_mask_v = /* unspecified */; 309 | } // namespace swar 310 | ``` 311 | 312 | ### FAQ 313 | 314 | > - How to disable running tests at compile-time? 315 | > 316 | > When `-DNTEST` is defined static_asserts tests wont be executed upon include. 317 | > Note: Use with caution as disabling tests means that there are no gurantees upon include that given compiler/env combination works as expected. 318 | 319 | ### Resources 320 | 321 | > - `std::simd` - https://wg21.link/P1928 322 | > - Intel Optimization Reference Manual - https://www.intel.com/content/www/us/en/developer/articles/technical/intel-sdm.html 323 | 324 | ### License 325 | 326 | > - [MIT](LICENSE) 327 | 328 |