├── .gitattributes ├── .gitignore ├── .travis.yml ├── LICENSE ├── PackedArray.c ├── PackedArray.h ├── PackedArraySIMD.c ├── README.md ├── _gnu-make └── Makefile ├── _ios-xcode ├── .gitignore ├── PackedArray-Info.plist └── PackedArray.xcodeproj │ ├── project.pbxproj │ └── project.xcworkspace │ └── contents.xcworkspacedata ├── _mac-xcode ├── .gitignore └── PackedArray.xcodeproj │ ├── project.pbxproj │ └── project.xcworkspace │ └── contents.xcworkspacedata ├── _win-vs11 ├── .gitignore ├── Common.props ├── Debug.props ├── PackedArray.sln ├── PackedArraySIMDSelfBench.vcxproj ├── PackedArraySIMDSelfTest.vcxproj ├── PackedArraySelfBench.vcxproj ├── PackedArraySelfTest.vcxproj ├── Release.props ├── x64.props └── x86.props └── benchmark ├── PackedArraySIMDSelfBench-unrolled-galaxy-note-cortex-a9-1.4GHz.txt ├── PackedArraySIMDSelfBench-unrolled-ipad2-cortex-a9-1GHz.txt ├── PackedArraySIMDSelfBench-unrolled-iphone5-a6-1.3GHz.txt ├── PackedArraySIMDSelfBench-unrolled-mbp-corei7-M620-2.67GHz.txt ├── PackedArraySelfBench-reference-galaxy-note-cortex-a9-1.4GHz.txt ├── PackedArraySelfBench-reference-ipad2-cortex-a9-1GHz.txt ├── PackedArraySelfBench-reference-iphone5-a6-1.3GHz.txt ├── PackedArraySelfBench-reference-mbp-corei7-M620-2.67GHz.txt ├── PackedArraySelfBench-unrolled-galaxy-note-cortex-a9-1.4GHz.txt ├── PackedArraySelfBench-unrolled-ipad2-cortex-a9-1GHz.txt ├── PackedArraySelfBench-unrolled-iphone5-a6-1.3GHz.txt └── PackedArraySelfBench-unrolled-mbp-corei7-M620-2.67GHz.txt /.gitattributes: -------------------------------------------------------------------------------- 1 | * text=auto 2 | 3 | # sources 4 | *.h text diff=cpp 5 | *.c text diff=cpp 6 | *.cpp text diff=cpp 7 | *.rb text diff=ruby 8 | *.html text diff=html 9 | *.m text diff=objc 10 | 11 | # shell scripts 12 | *.sh eol=lf 13 | 14 | # GNU Makefile 15 | Makefile text eol=lf 16 | 17 | # Autotools 18 | *.am text eol=lf 19 | 20 | # Android 21 | *.mk text eol=lf 22 | 23 | # Xcode files 24 | *.pbxproj text eol=lf merge=union 25 | 26 | # Visual Studio files 27 | *.sln text eol=crlf merge=union 28 | *.vcxproj text eol=crlf merge=union 29 | *.vcxproj.filters text eol=crlf merge=union 30 | *.props text eol=crlf 31 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | *~ 3 | *.swp 4 | 5 | /bin 6 | PackedArray.pp.c 7 | PackedArray.cut.c 8 | PackedArraySIMD.pp.c 9 | PackedArraySIMD.cut.c 10 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: cpp 2 | compiler: 3 | - clang 4 | - gcc 5 | env: 6 | - TARGET=build 7 | - TARGET=test 8 | - TARGET=preprocess 9 | - TARGET=cut 10 | - TARGET=assembly 11 | script: make -j -C ./_gnu-make $TARGET 12 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE 2 | Version 2, December 2004 3 | 4 | Copyright (C) 2004 Sam Hocevar 5 | 6 | Everyone is permitted to copy and distribute verbatim or modified 7 | copies of this license document, and changing it is allowed as long 8 | as the name is changed. 9 | 10 | DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE 11 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 12 | 13 | 0. You just DO WHAT THE FUCK YOU WANT TO. 14 | 1. Bla bla bla 15 | 2. Montesqieu et camembert, vive la France, zut alors! 16 | -------------------------------------------------------------------------------- /PackedArray.c: -------------------------------------------------------------------------------- 1 | // see README.md for usage instructions. 2 | // (‑●‑●)> released under the WTFPL v2 license, by Gregory Pakosz (@gpakosz) 3 | 4 | #ifndef PACKEDARRAY_SELF 5 | #define PACKEDARRAY_SELF "PackedArray.c" 6 | #endif 7 | 8 | #ifdef PACKEDARRAY_IMPL 9 | 10 | #ifndef PACKEDARRAY_JOIN 11 | #define PACKEDARRAY_JOIN(lhs, rhs) PACKEDARRAY_JOIN_(lhs, rhs) 12 | #define PACKEDARRAY_JOIN_(lhs, rhs) PACKEDARRAY_JOIN__(lhs, rhs) 13 | #define PACKEDARRAY_JOIN__(lhs, rhs) lhs##rhs 14 | #endif // #ifndef PACKEDARRAY_JOIN 15 | 16 | #ifndef PACKEDARRAY_IMPL_BITS_PER_ITEM 17 | #error PACKEDARRAY_IMPL_BITS_PER_ITEM undefined 18 | #endif // #ifndef PACKEDARRAY_IMPL_BITS_PER_ITEM 19 | 20 | #if defined(PACKEDARRAY_IMPL_PACK_CASES) || defined(PACKEDARRAY_IMPL_UNPACK_CASES) 21 | 22 | #ifndef PACKEDARRAY_IMPL_CASE_I 23 | #define PACKEDARRAY_IMPL_CASE_I 0 24 | #elif PACKEDARRAY_IMPL_CASE_I == 0 25 | #undef PACKEDARRAY_IMPL_CASE_I 26 | #define PACKEDARRAY_IMPL_CASE_I 1 27 | #elif PACKEDARRAY_IMPL_CASE_I == 1 28 | #undef PACKEDARRAY_IMPL_CASE_I 29 | #define PACKEDARRAY_IMPL_CASE_I 2 30 | #elif PACKEDARRAY_IMPL_CASE_I == 2 31 | #undef PACKEDARRAY_IMPL_CASE_I 32 | #define PACKEDARRAY_IMPL_CASE_I 3 33 | #elif PACKEDARRAY_IMPL_CASE_I == 3 34 | #undef PACKEDARRAY_IMPL_CASE_I 35 | #define PACKEDARRAY_IMPL_CASE_I 4 36 | #elif PACKEDARRAY_IMPL_CASE_I == 4 37 | #undef PACKEDARRAY_IMPL_CASE_I 38 | #define PACKEDARRAY_IMPL_CASE_I 5 39 | #elif PACKEDARRAY_IMPL_CASE_I == 5 40 | #undef PACKEDARRAY_IMPL_CASE_I 41 | #define PACKEDARRAY_IMPL_CASE_I 6 42 | #elif PACKEDARRAY_IMPL_CASE_I == 6 43 | #undef PACKEDARRAY_IMPL_CASE_I 44 | #define PACKEDARRAY_IMPL_CASE_I 7 45 | #elif PACKEDARRAY_IMPL_CASE_I == 7 46 | #undef PACKEDARRAY_IMPL_CASE_I 47 | #define PACKEDARRAY_IMPL_CASE_I 8 48 | #elif PACKEDARRAY_IMPL_CASE_I == 8 49 | #undef PACKEDARRAY_IMPL_CASE_I 50 | #define PACKEDARRAY_IMPL_CASE_I 9 51 | #elif PACKEDARRAY_IMPL_CASE_I == 9 52 | #undef PACKEDARRAY_IMPL_CASE_I 53 | #define PACKEDARRAY_IMPL_CASE_I 10 54 | #elif PACKEDARRAY_IMPL_CASE_I == 10 55 | #undef PACKEDARRAY_IMPL_CASE_I 56 | #define PACKEDARRAY_IMPL_CASE_I 11 57 | #elif PACKEDARRAY_IMPL_CASE_I == 11 58 | #undef PACKEDARRAY_IMPL_CASE_I 59 | #define PACKEDARRAY_IMPL_CASE_I 12 60 | #elif PACKEDARRAY_IMPL_CASE_I == 12 61 | #undef PACKEDARRAY_IMPL_CASE_I 62 | #define PACKEDARRAY_IMPL_CASE_I 13 63 | #elif PACKEDARRAY_IMPL_CASE_I == 13 64 | #undef PACKEDARRAY_IMPL_CASE_I 65 | #define PACKEDARRAY_IMPL_CASE_I 14 66 | #elif PACKEDARRAY_IMPL_CASE_I == 14 67 | #undef PACKEDARRAY_IMPL_CASE_I 68 | #define PACKEDARRAY_IMPL_CASE_I 15 69 | #elif PACKEDARRAY_IMPL_CASE_I == 15 70 | #undef PACKEDARRAY_IMPL_CASE_I 71 | #define PACKEDARRAY_IMPL_CASE_I 16 72 | #elif PACKEDARRAY_IMPL_CASE_I == 16 73 | #undef PACKEDARRAY_IMPL_CASE_I 74 | #define PACKEDARRAY_IMPL_CASE_I 17 75 | #elif PACKEDARRAY_IMPL_CASE_I == 17 76 | #undef PACKEDARRAY_IMPL_CASE_I 77 | #define PACKEDARRAY_IMPL_CASE_I 18 78 | #elif PACKEDARRAY_IMPL_CASE_I == 18 79 | #undef PACKEDARRAY_IMPL_CASE_I 80 | #define PACKEDARRAY_IMPL_CASE_I 19 81 | #elif PACKEDARRAY_IMPL_CASE_I == 19 82 | #undef PACKEDARRAY_IMPL_CASE_I 83 | #define PACKEDARRAY_IMPL_CASE_I 20 84 | #elif PACKEDARRAY_IMPL_CASE_I == 20 85 | #undef PACKEDARRAY_IMPL_CASE_I 86 | #define PACKEDARRAY_IMPL_CASE_I 21 87 | #elif PACKEDARRAY_IMPL_CASE_I == 21 88 | #undef PACKEDARRAY_IMPL_CASE_I 89 | #define PACKEDARRAY_IMPL_CASE_I 22 90 | #elif PACKEDARRAY_IMPL_CASE_I == 22 91 | #undef PACKEDARRAY_IMPL_CASE_I 92 | #define PACKEDARRAY_IMPL_CASE_I 23 93 | #elif PACKEDARRAY_IMPL_CASE_I == 23 94 | #undef PACKEDARRAY_IMPL_CASE_I 95 | #define PACKEDARRAY_IMPL_CASE_I 24 96 | #elif PACKEDARRAY_IMPL_CASE_I == 24 97 | #undef PACKEDARRAY_IMPL_CASE_I 98 | #define PACKEDARRAY_IMPL_CASE_I 25 99 | #elif PACKEDARRAY_IMPL_CASE_I == 25 100 | #undef PACKEDARRAY_IMPL_CASE_I 101 | #define PACKEDARRAY_IMPL_CASE_I 26 102 | #elif PACKEDARRAY_IMPL_CASE_I == 26 103 | #undef PACKEDARRAY_IMPL_CASE_I 104 | #define PACKEDARRAY_IMPL_CASE_I 27 105 | #elif PACKEDARRAY_IMPL_CASE_I == 27 106 | #undef PACKEDARRAY_IMPL_CASE_I 107 | #define PACKEDARRAY_IMPL_CASE_I 28 108 | #elif PACKEDARRAY_IMPL_CASE_I == 28 109 | #undef PACKEDARRAY_IMPL_CASE_I 110 | #define PACKEDARRAY_IMPL_CASE_I 29 111 | #elif PACKEDARRAY_IMPL_CASE_I == 29 112 | #undef PACKEDARRAY_IMPL_CASE_I 113 | #define PACKEDARRAY_IMPL_CASE_I 30 114 | #elif PACKEDARRAY_IMPL_CASE_I == 30 115 | #undef PACKEDARRAY_IMPL_CASE_I 116 | #define PACKEDARRAY_IMPL_CASE_I 31 117 | #elif PACKEDARRAY_IMPL_CASE_I == 31 118 | #undef PACKEDARRAY_IMPL_CASE_I 119 | #define PACKEDARRAY_IMPL_CASE_I 32 120 | #endif // #ifndef PACKEDARRAY_IMPL_CASE_I 121 | 122 | #ifndef PACKEDARRAY_IMPL_BITS_AVAILABLE 123 | #define PACKEDARRAY_IMPL_BITS_AVAILABLE (32 - ((PACKEDARRAY_IMPL_CASE_I * PACKEDARRAY_IMPL_BITS_PER_ITEM) % 32)) 124 | #endif 125 | #ifndef PACKEDARRAY_IMPL_START_BIT 126 | #define PACKEDARRAY_IMPL_START_BIT ((PACKEDARRAY_IMPL_CASE_I * PACKEDARRAY_IMPL_BITS_PER_ITEM) % 32) 127 | #endif 128 | #ifndef PACKEDARRAY_IMPL_MASK 129 | #define PACKEDARRAY_IMPL_MASK (uint32_t)((1ULL << PACKEDARRAY_IMPL_BITS_PER_ITEM) - 1) 130 | #endif 131 | 132 | #if defined(PACKEDARRAY_IMPL_PACK_CASES) 133 | 134 | #ifndef PACKEDARRAY_IMPL_PACK_CASE_BREAK 135 | #define PACKEDARRAY_IMPL_PACK_CASE_BREAK 136 | #endif 137 | 138 | case PACKEDARRAY_IMPL_CASE_I: 139 | #if (PACKEDARRAY_IMPL_BITS_PER_ITEM <= PACKEDARRAY_IMPL_BITS_AVAILABLE) 140 | packed |= *in++ << PACKEDARRAY_IMPL_START_BIT; 141 | #if (PACKEDARRAY_IMPL_BITS_PER_ITEM == PACKEDARRAY_IMPL_BITS_AVAILABLE) 142 | *out++ = packed; 143 | packed = 0; 144 | #endif 145 | #else 146 | packed |= *in << PACKEDARRAY_IMPL_START_BIT; 147 | *out++ = packed; 148 | packed = *in++ >> PACKEDARRAY_IMPL_BITS_AVAILABLE; 149 | #endif 150 | PACKEDARRAY_IMPL_PACK_CASE_BREAK 151 | 152 | #if PACKEDARRAY_IMPL_CASE_I < 31 153 | #include PACKEDARRAY_SELF 154 | #else 155 | #undef PACKEDARRAY_IMPL_CASE_I 156 | #undef PACKEDARRAY_IMPL_PACK_CASE_BREAK 157 | #undef PACKEDARRAY_IMPL_PACK_CASES 158 | #endif 159 | 160 | #elif defined(PACKEDARRAY_IMPL_UNPACK_CASES) // #if defined(PACKEDARRAY_IMPL_PACK_CASES) 161 | 162 | #ifndef PACKEDARRAY_IMPL_UNPACK_CASE_BREAK 163 | #define PACKEDARRAY_IMPL_UNPACK_CASE_BREAK 164 | #endif 165 | 166 | case PACKEDARRAY_IMPL_CASE_I: 167 | #if (PACKEDARRAY_IMPL_BITS_PER_ITEM <= PACKEDARRAY_IMPL_BITS_AVAILABLE) 168 | *out++ = (packed >> PACKEDARRAY_IMPL_START_BIT) & PACKEDARRAY_IMPL_MASK; 169 | PACKEDARRAY_IMPL_UNPACK_CASE_BREAK 170 | #if (PACKEDARRAY_IMPL_CASE_I < 31) && (PACKEDARRAY_IMPL_BITS_PER_ITEM == PACKEDARRAY_IMPL_BITS_AVAILABLE) 171 | packed = *++in; 172 | #endif 173 | #else 174 | { 175 | uint32_t low, high; 176 | low = packed >> PACKEDARRAY_IMPL_START_BIT; 177 | packed = *++in; 178 | high = packed << PACKEDARRAY_IMPL_BITS_AVAILABLE; 179 | 180 | *out++ = (low | high) & PACKEDARRAY_IMPL_MASK; 181 | } 182 | PACKEDARRAY_IMPL_UNPACK_CASE_BREAK 183 | #endif 184 | 185 | #if PACKEDARRAY_IMPL_CASE_I < 31 186 | #include PACKEDARRAY_SELF 187 | #else 188 | #undef PACKEDARRAY_IMPL_CASE_I 189 | #undef PACKEDARRAY_IMPL_UNPACK_CASE_BREAK 190 | #undef PACKEDARRAY_IMPL_UNPACK_CASES 191 | #endif 192 | 193 | #endif // #elif defined(PACKEDARRAY_IMPL_UNPACK_CASES) 194 | 195 | #else // #if defined(PACKEDARRAY_IMPL_PACK_CASES) || defined(PACKEDARRAY_IMPL_UNPACK_CASES) 196 | 197 | void PACKEDARRAY_JOIN(__PackedArray_pack_, PACKEDARRAY_IMPL_BITS_PER_ITEM)(uint32_t* __restrict out, uint32_t offset, const uint32_t* __restrict in, uint32_t count) 198 | { 199 | uint32_t startBit; 200 | uint32_t packed; 201 | const uint32_t* __restrict end; 202 | 203 | out += ((uint64_t)offset * (uint64_t)PACKEDARRAY_IMPL_BITS_PER_ITEM) / 32; 204 | startBit = ((uint64_t)offset * (uint64_t)PACKEDARRAY_IMPL_BITS_PER_ITEM) % 32; 205 | packed = *out & (uint32_t)((1ULL << startBit) - 1); 206 | 207 | offset = offset % 32; 208 | if (count >= 32 - offset) 209 | { 210 | int32_t n; 211 | 212 | n = (count + offset) / 32; 213 | count -= 32 * n - offset; 214 | switch (offset) 215 | { 216 | do 217 | { 218 | #define PACKEDARRAY_IMPL_PACK_CASES 219 | #include PACKEDARRAY_SELF 220 | } while (--n > 0); 221 | } 222 | 223 | if (count == 0) 224 | return; 225 | 226 | offset = 0; 227 | startBit = 0; 228 | } 229 | 230 | end = in + count; 231 | switch (offset) 232 | { 233 | #define PACKEDARRAY_IMPL_PACK_CASES 234 | #define PACKEDARRAY_IMPL_PACK_CASE_BREAK \ 235 | if (in == end)\ 236 | break; 237 | #include PACKEDARRAY_SELF 238 | } 239 | PACKEDARRAY_ASSERT(in == end); 240 | if ((count * PACKEDARRAY_IMPL_BITS_PER_ITEM + startBit) % 32) 241 | { 242 | packed |= *out & ~((uint32_t)(1ULL << ((((uint64_t)count * (uint64_t)PACKEDARRAY_IMPL_BITS_PER_ITEM + startBit - 1) % 32) + 1)) - 1); 243 | *out = packed; 244 | } 245 | } 246 | 247 | void PACKEDARRAY_JOIN(__PackedArray_unpack_, PACKEDARRAY_IMPL_BITS_PER_ITEM)(const uint32_t* __restrict in, uint32_t offset, uint32_t* __restrict out, uint32_t count) 248 | { 249 | uint32_t packed; 250 | const uint32_t* __restrict end; 251 | 252 | in += ((uint64_t)offset * (uint64_t)PACKEDARRAY_IMPL_BITS_PER_ITEM) / 32; 253 | packed = *in; 254 | 255 | offset = offset % 32; 256 | if (count >= 32 - offset) 257 | { 258 | int32_t n; 259 | 260 | n = (count + offset) / 32; 261 | count -= 32 * n - offset; 262 | switch (offset) 263 | { 264 | do 265 | { 266 | packed = *++in; 267 | #define PACKEDARRAY_IMPL_UNPACK_CASES 268 | #include PACKEDARRAY_SELF 269 | } while (--n > 0); 270 | } 271 | 272 | if (count == 0) 273 | return; 274 | 275 | packed = *++in; 276 | offset = 0; 277 | } 278 | 279 | end = out + count; 280 | switch (offset) 281 | { 282 | #define PACKEDARRAY_IMPL_UNPACK_CASES 283 | #define PACKEDARRAY_IMPL_UNPACK_CASE_BREAK \ 284 | if (out == end)\ 285 | break; 286 | #include PACKEDARRAY_SELF 287 | } 288 | PACKEDARRAY_ASSERT(out == end); 289 | } 290 | 291 | #undef PACKEDARRAY_IMPL_BITS_PER_ITEM 292 | #undef PACKEDARRAY_IMPL_BITS_AVAILABLE 293 | #undef PACKEDARRAY_IMPL_START_BIT 294 | #undef PACKEDARRAY_IMPL_START_MASK 295 | 296 | #endif // #if defined(PACKEDARRAY_IMPL_PACK_CASES) || defined(PACKEDARRAY_IMPL_UNPACK_CASES) 297 | 298 | #else 299 | 300 | #include "PackedArray.h" 301 | 302 | #if !defined(PACKEDARRAY_ASSERT) 303 | #include 304 | #define PACKEDARRAY_ASSERT(expression) assert(expression) 305 | #endif 306 | 307 | #define PACKEDARRAY_IMPL 308 | #define PACKEDARRAY_IMPL_BITS_PER_ITEM 1 309 | #include PACKEDARRAY_SELF 310 | #define PACKEDARRAY_IMPL_BITS_PER_ITEM 2 311 | #include PACKEDARRAY_SELF 312 | #define PACKEDARRAY_IMPL_BITS_PER_ITEM 3 313 | #include PACKEDARRAY_SELF 314 | #define PACKEDARRAY_IMPL_BITS_PER_ITEM 4 315 | #include PACKEDARRAY_SELF 316 | #define PACKEDARRAY_IMPL_BITS_PER_ITEM 5 317 | #include PACKEDARRAY_SELF 318 | #define PACKEDARRAY_IMPL_BITS_PER_ITEM 6 319 | #include PACKEDARRAY_SELF 320 | #define PACKEDARRAY_IMPL_BITS_PER_ITEM 7 321 | #include PACKEDARRAY_SELF 322 | #define PACKEDARRAY_IMPL_BITS_PER_ITEM 8 323 | #include PACKEDARRAY_SELF 324 | #define PACKEDARRAY_IMPL_BITS_PER_ITEM 9 325 | #include PACKEDARRAY_SELF 326 | #define PACKEDARRAY_IMPL_BITS_PER_ITEM 10 327 | #include PACKEDARRAY_SELF 328 | #define PACKEDARRAY_IMPL_BITS_PER_ITEM 11 329 | #include PACKEDARRAY_SELF 330 | #define PACKEDARRAY_IMPL_BITS_PER_ITEM 12 331 | #include PACKEDARRAY_SELF 332 | #define PACKEDARRAY_IMPL_BITS_PER_ITEM 13 333 | #include PACKEDARRAY_SELF 334 | #define PACKEDARRAY_IMPL_BITS_PER_ITEM 14 335 | #include PACKEDARRAY_SELF 336 | #define PACKEDARRAY_IMPL_BITS_PER_ITEM 15 337 | #include PACKEDARRAY_SELF 338 | #define PACKEDARRAY_IMPL_BITS_PER_ITEM 16 339 | #include PACKEDARRAY_SELF 340 | #define PACKEDARRAY_IMPL_BITS_PER_ITEM 17 341 | #include PACKEDARRAY_SELF 342 | #define PACKEDARRAY_IMPL_BITS_PER_ITEM 18 343 | #include PACKEDARRAY_SELF 344 | #define PACKEDARRAY_IMPL_BITS_PER_ITEM 19 345 | #include PACKEDARRAY_SELF 346 | #define PACKEDARRAY_IMPL_BITS_PER_ITEM 20 347 | #include PACKEDARRAY_SELF 348 | #define PACKEDARRAY_IMPL_BITS_PER_ITEM 21 349 | #include PACKEDARRAY_SELF 350 | #define PACKEDARRAY_IMPL_BITS_PER_ITEM 22 351 | #include PACKEDARRAY_SELF 352 | #define PACKEDARRAY_IMPL_BITS_PER_ITEM 23 353 | #include PACKEDARRAY_SELF 354 | #define PACKEDARRAY_IMPL_BITS_PER_ITEM 24 355 | #include PACKEDARRAY_SELF 356 | #define PACKEDARRAY_IMPL_BITS_PER_ITEM 25 357 | #include PACKEDARRAY_SELF 358 | #define PACKEDARRAY_IMPL_BITS_PER_ITEM 26 359 | #include PACKEDARRAY_SELF 360 | #define PACKEDARRAY_IMPL_BITS_PER_ITEM 27 361 | #include PACKEDARRAY_SELF 362 | #define PACKEDARRAY_IMPL_BITS_PER_ITEM 28 363 | #include PACKEDARRAY_SELF 364 | #define PACKEDARRAY_IMPL_BITS_PER_ITEM 29 365 | #include PACKEDARRAY_SELF 366 | #define PACKEDARRAY_IMPL_BITS_PER_ITEM 30 367 | #include PACKEDARRAY_SELF 368 | #define PACKEDARRAY_IMPL_BITS_PER_ITEM 31 369 | #include PACKEDARRAY_SELF 370 | #define PACKEDARRAY_IMPL_BITS_PER_ITEM 32 371 | #include PACKEDARRAY_SELF 372 | #undef PACKEDARRAY_IMPL 373 | 374 | 375 | #if !defined(PACKEDARRAY_MALLOC) || !defined(PACKEDARRAY_FREE) 376 | #include 377 | #endif 378 | 379 | #if !defined(PACKEDARRAY_MALLOC) 380 | #define PACKEDARRAY_MALLOC(size) malloc(size) 381 | #endif 382 | 383 | #if !defined(PACKEDARRAY_FREE) 384 | #define PACKEDARRAY_FREE(p) free(p) 385 | #endif 386 | 387 | #include 388 | 389 | PackedArray* PackedArray_create(uint32_t bitsPerItem, uint32_t count) 390 | { 391 | PackedArray* a; 392 | size_t bufferSize; 393 | 394 | PACKEDARRAY_ASSERT(bitsPerItem > 0); 395 | PACKEDARRAY_ASSERT(bitsPerItem <= 32); 396 | 397 | bufferSize = sizeof(uint32_t) * (((uint64_t)bitsPerItem * (uint64_t)count + 31) / 32); 398 | a = (PackedArray*)PACKEDARRAY_MALLOC(sizeof(PackedArray) + bufferSize); 399 | 400 | if (a != NULL) 401 | { 402 | a->buffer[((uint64_t)bitsPerItem * (uint64_t)count + 31) / 32 - 1] = 0; 403 | a->bitsPerItem = bitsPerItem; 404 | a->count = count; 405 | } 406 | 407 | return a; 408 | } 409 | 410 | void PackedArray_destroy(PackedArray* a) 411 | { 412 | PACKEDARRAY_ASSERT(a); 413 | PACKEDARRAY_FREE(a); 414 | } 415 | 416 | void PackedArray_pack(PackedArray* a, const uint32_t offset, const uint32_t* in, uint32_t count) 417 | { 418 | PACKEDARRAY_ASSERT(a != NULL); 419 | PACKEDARRAY_ASSERT(in != NULL); 420 | 421 | switch (a->bitsPerItem) 422 | { 423 | case 1: __PackedArray_pack_1(a->buffer, offset, in, count); break; 424 | case 2: __PackedArray_pack_2(a->buffer, offset, in, count); break; 425 | case 3: __PackedArray_pack_3(a->buffer, offset, in, count); break; 426 | case 4: __PackedArray_pack_4(a->buffer, offset, in, count); break; 427 | case 5: __PackedArray_pack_5(a->buffer, offset, in, count); break; 428 | case 6: __PackedArray_pack_6(a->buffer, offset, in, count); break; 429 | case 7: __PackedArray_pack_7(a->buffer, offset, in, count); break; 430 | case 8: __PackedArray_pack_8(a->buffer, offset, in, count); break; 431 | case 9: __PackedArray_pack_9(a->buffer, offset, in, count); break; 432 | case 10: __PackedArray_pack_10(a->buffer, offset, in, count); break; 433 | case 11: __PackedArray_pack_11(a->buffer, offset, in, count); break; 434 | case 12: __PackedArray_pack_12(a->buffer, offset, in, count); break; 435 | case 13: __PackedArray_pack_13(a->buffer, offset, in, count); break; 436 | case 14: __PackedArray_pack_14(a->buffer, offset, in, count); break; 437 | case 15: __PackedArray_pack_15(a->buffer, offset, in, count); break; 438 | case 16: __PackedArray_pack_16(a->buffer, offset, in, count); break; 439 | case 17: __PackedArray_pack_17(a->buffer, offset, in, count); break; 440 | case 18: __PackedArray_pack_18(a->buffer, offset, in, count); break; 441 | case 19: __PackedArray_pack_19(a->buffer, offset, in, count); break; 442 | case 20: __PackedArray_pack_20(a->buffer, offset, in, count); break; 443 | case 21: __PackedArray_pack_21(a->buffer, offset, in, count); break; 444 | case 22: __PackedArray_pack_22(a->buffer, offset, in, count); break; 445 | case 23: __PackedArray_pack_23(a->buffer, offset, in, count); break; 446 | case 24: __PackedArray_pack_24(a->buffer, offset, in, count); break; 447 | case 25: __PackedArray_pack_25(a->buffer, offset, in, count); break; 448 | case 26: __PackedArray_pack_26(a->buffer, offset, in, count); break; 449 | case 27: __PackedArray_pack_27(a->buffer, offset, in, count); break; 450 | case 28: __PackedArray_pack_28(a->buffer, offset, in, count); break; 451 | case 29: __PackedArray_pack_29(a->buffer, offset, in, count); break; 452 | case 30: __PackedArray_pack_30(a->buffer, offset, in, count); break; 453 | case 31: __PackedArray_pack_31(a->buffer, offset, in, count); break; 454 | case 32: __PackedArray_pack_32(a->buffer, offset, in, count); break; 455 | } 456 | } 457 | 458 | void PackedArray_unpack(const PackedArray* a, const uint32_t offset, uint32_t* out, uint32_t count) 459 | { 460 | PACKEDARRAY_ASSERT(a != NULL); 461 | PACKEDARRAY_ASSERT(out != NULL); 462 | 463 | switch (a->bitsPerItem) 464 | { 465 | case 1: __PackedArray_unpack_1(a->buffer, offset, out, count); break; 466 | case 2: __PackedArray_unpack_2(a->buffer, offset, out, count); break; 467 | case 3: __PackedArray_unpack_3(a->buffer, offset, out, count); break; 468 | case 4: __PackedArray_unpack_4(a->buffer, offset, out, count); break; 469 | case 5: __PackedArray_unpack_5(a->buffer, offset, out, count); break; 470 | case 6: __PackedArray_unpack_6(a->buffer, offset, out, count); break; 471 | case 7: __PackedArray_unpack_7(a->buffer, offset, out, count); break; 472 | case 8: __PackedArray_unpack_8(a->buffer, offset, out, count); break; 473 | case 9: __PackedArray_unpack_9(a->buffer, offset, out, count); break; 474 | case 10: __PackedArray_unpack_10(a->buffer, offset, out, count); break; 475 | case 11: __PackedArray_unpack_11(a->buffer, offset, out, count); break; 476 | case 12: __PackedArray_unpack_12(a->buffer, offset, out, count); break; 477 | case 13: __PackedArray_unpack_13(a->buffer, offset, out, count); break; 478 | case 14: __PackedArray_unpack_14(a->buffer, offset, out, count); break; 479 | case 15: __PackedArray_unpack_15(a->buffer, offset, out, count); break; 480 | case 16: __PackedArray_unpack_16(a->buffer, offset, out, count); break; 481 | case 17: __PackedArray_unpack_17(a->buffer, offset, out, count); break; 482 | case 18: __PackedArray_unpack_18(a->buffer, offset, out, count); break; 483 | case 19: __PackedArray_unpack_19(a->buffer, offset, out, count); break; 484 | case 20: __PackedArray_unpack_20(a->buffer, offset, out, count); break; 485 | case 21: __PackedArray_unpack_21(a->buffer, offset, out, count); break; 486 | case 22: __PackedArray_unpack_22(a->buffer, offset, out, count); break; 487 | case 23: __PackedArray_unpack_23(a->buffer, offset, out, count); break; 488 | case 24: __PackedArray_unpack_24(a->buffer, offset, out, count); break; 489 | case 25: __PackedArray_unpack_25(a->buffer, offset, out, count); break; 490 | case 26: __PackedArray_unpack_26(a->buffer, offset, out, count); break; 491 | case 27: __PackedArray_unpack_27(a->buffer, offset, out, count); break; 492 | case 28: __PackedArray_unpack_28(a->buffer, offset, out, count); break; 493 | case 29: __PackedArray_unpack_29(a->buffer, offset, out, count); break; 494 | case 30: __PackedArray_unpack_30(a->buffer, offset, out, count); break; 495 | case 31: __PackedArray_unpack_31(a->buffer, offset, out, count); break; 496 | case 32: __PackedArray_unpack_32(a->buffer, offset, out, count); break; 497 | } 498 | } 499 | 500 | void PackedArray_set(PackedArray* a, const uint32_t offset, const uint32_t in) 501 | { 502 | uint32_t* __restrict out; 503 | uint32_t bitsPerItem; 504 | uint32_t startBit; 505 | uint32_t bitsAvailable; 506 | uint32_t mask; 507 | 508 | PACKEDARRAY_ASSERT(a != NULL); 509 | 510 | bitsPerItem = a->bitsPerItem; 511 | 512 | out = &a->buffer[((uint64_t)offset * (uint64_t)bitsPerItem) / 32]; 513 | startBit = ((uint64_t)offset * (uint64_t)bitsPerItem) % 32; 514 | 515 | bitsAvailable = 32 - startBit; 516 | 517 | mask = (uint32_t)(1ULL << bitsPerItem) - 1; 518 | PACKEDARRAY_ASSERT(0 == (~mask & in)); 519 | 520 | if (bitsPerItem <= bitsAvailable) 521 | { 522 | out[0] = (out[0] & ~(mask << startBit)) | (in << startBit); 523 | } 524 | else 525 | { 526 | // value spans 2 buffer cells 527 | uint32_t low, high; 528 | 529 | low = in << startBit; 530 | high = in >> bitsAvailable; 531 | 532 | out[0] = (out[0] & ~(mask << startBit)) | low; 533 | 534 | out[1] = (out[1] & ~(mask >> (32 - startBit))) | high; 535 | } 536 | } 537 | 538 | uint32_t PackedArray_get(const PackedArray* a, const uint32_t offset) 539 | { 540 | const uint32_t* __restrict in; 541 | uint32_t bitsPerItem; 542 | uint32_t startBit; 543 | uint32_t bitsAvailable; 544 | uint32_t mask; 545 | uint32_t out; 546 | 547 | PACKEDARRAY_ASSERT(a != NULL); 548 | 549 | bitsPerItem = a->bitsPerItem; 550 | 551 | in = &a->buffer[((uint64_t)offset * (uint64_t)bitsPerItem) / 32]; 552 | startBit = ((uint64_t)offset * (uint64_t)bitsPerItem) % 32; 553 | 554 | bitsAvailable = 32 - startBit; 555 | 556 | mask = (uint32_t)(1ULL << bitsPerItem) - 1; 557 | 558 | if (bitsPerItem <= bitsAvailable) 559 | { 560 | out = (in[0] >> startBit) & mask; 561 | } 562 | else 563 | { 564 | // out spans 2 buffer cells 565 | uint32_t low, high; 566 | 567 | low = in[0] >> startBit; 568 | high = in[1] << (32 - startBit); 569 | 570 | out = low ^ ((low ^ high) & (mask >> bitsAvailable << bitsAvailable)); 571 | } 572 | 573 | return out; 574 | } 575 | 576 | uint32_t PackedArray_bufferSize(const PackedArray* a) 577 | { 578 | PACKEDARRAY_ASSERT(a != NULL); 579 | return (uint32_t)(((uint64_t)a->bitsPerItem * (uint64_t)a->count + 31) / 32); 580 | } 581 | 582 | #if !(defined(_MSC_VER) && _MSC_VER >= 1400) && !defined(__GNUC__) 583 | // log base 2 of an integer, aka the position of the highest bit set 584 | static uint32_t __PackedArray_log2(uint32_t v) 585 | { 586 | // references 587 | // http://aggregate.org/MAGIC 588 | // http://graphics.stanford.edu/~seander/bithacks.html 589 | 590 | static const uint32_t multiplyDeBruijnBitPosition[32] = 591 | { 592 | 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 593 | 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 594 | }; 595 | 596 | v |= v >> 1; 597 | v |= v >> 2; 598 | v |= v >> 4; 599 | v |= v >> 8; 600 | v |= v >> 16; 601 | 602 | return multiplyDeBruijnBitPosition[(uint32_t)(v * 0x7C4ACDDU) >> 27]; 603 | } 604 | #endif 605 | 606 | // position of the highest bit set 607 | static int __PackedArray_highestBitSet(uint32_t v) 608 | { 609 | #if defined(_MSC_VER) && _MSC_VER >= 1400 610 | unsigned long index; 611 | return _BitScanReverse(&index, v) ? index : -1; 612 | #elif defined(__GNUC__) 613 | return v == 0 ? -1 : 31 - __builtin_clz(v); 614 | #else 615 | return v != 0 ? __PackedArray_log2(v) : -1; 616 | #endif 617 | } 618 | 619 | uint32_t PackedArray_computeBitsPerItem(const uint32_t* in, uint32_t count) 620 | { 621 | uint32_t i, in_max, bitsPerItem; 622 | 623 | in_max = 0; 624 | for (i = 0; i < count; ++i) 625 | in_max = in[i] > in_max ? in[i] : in_max; 626 | 627 | bitsPerItem = __PackedArray_highestBitSet(in_max) + 1; 628 | return bitsPerItem == 0 ? 1 : bitsPerItem; 629 | } 630 | 631 | 632 | // - 8< ------------------------------------------------------------------------ 633 | 634 | #if defined(PACKEDARRAY_SELF_TEST) && defined(PACKEDARRAY_SELF_BENCH) 635 | #error choose either PACKEDARRAY_SELF_TEST or PACKEDARRAY_SELF_BENCH 636 | #endif 637 | 638 | #if defined(PACKEDARRAY_SELF_TEST) 639 | 640 | #undef NDEBUG // we want asserts 641 | #include 642 | 643 | #include 644 | #include // memcmp 645 | 646 | static void PackedArray_pack_reference(PackedArray* a, const uint32_t offset, const uint32_t* in, uint32_t count) 647 | { 648 | uint32_t* __restrict out; 649 | uint32_t bitsPerItem; 650 | uint32_t startBit; 651 | uint32_t bitsAvailable; 652 | uint32_t mask; 653 | uint32_t packed; 654 | 655 | assert(a != NULL); 656 | assert(in != NULL); 657 | assert(count != 0); 658 | 659 | bitsPerItem = a->bitsPerItem; 660 | 661 | out = &a->buffer[(uint64_t)offset * (uint64_t)bitsPerItem / 32]; 662 | startBit = ((uint64_t)offset * (uint64_t)bitsPerItem) % 32; 663 | 664 | bitsAvailable = 32 - startBit; 665 | 666 | mask = (uint32_t)(1ULL << bitsPerItem) - 1; 667 | 668 | packed = *out; 669 | 670 | while (count--) 671 | { 672 | uint32_t value = *in++; 673 | 674 | assert(0 == (~mask & value)); 675 | 676 | if (bitsPerItem <= bitsAvailable) 677 | { 678 | packed = (packed & ~(mask << startBit)) | (value << startBit); 679 | 680 | startBit += bitsPerItem; 681 | bitsAvailable -= bitsPerItem; 682 | } 683 | else if (bitsAvailable == 0) 684 | { 685 | *out++ = packed; 686 | packed = *out; 687 | 688 | startBit = 0; 689 | bitsAvailable = 32; 690 | 691 | packed = (packed & ~mask) | value; 692 | 693 | startBit += bitsPerItem; 694 | bitsAvailable -= bitsPerItem; 695 | } 696 | else 697 | { 698 | // value spans 2 buffer cells 699 | uint32_t low, high; 700 | 701 | low = value << startBit; 702 | high = value >> bitsAvailable; 703 | 704 | packed = (packed & ~(mask << startBit)) | low; 705 | *out++ = packed; 706 | 707 | packed = *out; 708 | packed = (packed & ~(mask >> (32 - startBit))) | high; 709 | 710 | startBit = (startBit + bitsPerItem) % 32; 711 | bitsAvailable = 32 - startBit; 712 | } 713 | } 714 | *out = packed; 715 | } 716 | 717 | static void PackedArray_unpack_reference(const PackedArray* a, const uint32_t offset, uint32_t* out, uint32_t count) 718 | { 719 | const uint32_t* __restrict in; 720 | uint32_t bitsPerItem; 721 | uint32_t startBit; 722 | uint32_t bitsAvailable; 723 | uint32_t mask; 724 | uint32_t packed; 725 | 726 | assert(a != NULL); 727 | assert(out != NULL); 728 | assert(count != 0); 729 | 730 | bitsPerItem = a->bitsPerItem; 731 | 732 | in = &a->buffer[(uint64_t)offset * (uint64_t)bitsPerItem / 32]; 733 | startBit = ((uint64_t)offset * (uint64_t)bitsPerItem) % 32; 734 | 735 | bitsAvailable = 32 - startBit; 736 | 737 | mask = (uint32_t)(1ULL << bitsPerItem) - 1; 738 | 739 | packed = *in; 740 | 741 | while (count--) 742 | { 743 | uint32_t value; 744 | 745 | if (bitsPerItem <= bitsAvailable) 746 | { 747 | value = (packed >> startBit) & mask; 748 | *out++ = value; 749 | 750 | startBit += bitsPerItem; 751 | bitsAvailable -= bitsPerItem; 752 | } 753 | else if (bitsAvailable == 0) 754 | { 755 | packed = *++in; 756 | value = packed & mask; 757 | *out++ = value; 758 | 759 | startBit = bitsPerItem; 760 | bitsAvailable = 32 - bitsPerItem; 761 | } 762 | else 763 | { 764 | // value spans 2 buffer cells 765 | uint32_t low, high; 766 | 767 | low = packed >> startBit; 768 | packed = *++in; 769 | high = packed << (32 - startBit); 770 | 771 | value = low ^ ((low ^ high) & (mask >> bitsAvailable << bitsAvailable)); 772 | *out++ = value; 773 | 774 | startBit = (startBit + bitsPerItem) % 32; 775 | bitsAvailable = 32 - startBit; 776 | } 777 | } 778 | } 779 | 780 | int main(void) 781 | { 782 | uint32_t bitsPerItem; 783 | 784 | printf("-- PackedArray self test -------------------------------------------------------\n"); 785 | printf("\n"); 786 | 787 | printf("sizeof(PackedArray) = %d\n", (int)sizeof(PackedArray)); 788 | printf("\n"); 789 | 790 | printf("1 by 1 packing / unpacking:\n"); 791 | for (bitsPerItem = 1; bitsPerItem <= 32; ++bitsPerItem) 792 | { 793 | uint32_t mask = (uint32_t)(1ULL << bitsPerItem) - 1; 794 | int count; 795 | 796 | for (count = 1; count <= 1024; ++count) 797 | { 798 | PackedArray* a1 = PackedArray_create(bitsPerItem, count); 799 | PackedArray* a2 = PackedArray_create(bitsPerItem, count); 800 | int i; 801 | 802 | assert(a1->count == (uint32_t)count); 803 | assert(a2->count == (uint32_t)count); 804 | assert(a1->bitsPerItem == bitsPerItem); 805 | assert(a2->bitsPerItem == bitsPerItem); 806 | assert(PackedArray_bufferSize(a1) == PackedArray_bufferSize(a2)); 807 | 808 | for (i = 0; i < (int)PackedArray_bufferSize(a1); ++i) 809 | a1->buffer[i] = a2->buffer[i] = rand(); 810 | 811 | for (i = 0; i < count; ++i) 812 | { 813 | uint32_t v1, v2; 814 | 815 | v1 = rand() & mask; 816 | v2 = v1 + 1; 817 | PackedArray_pack(a1, i, &v1, 1); 818 | PackedArray_pack_reference(a2, i, &v1, 1); 819 | assert(memcmp(a1->buffer, a2->buffer, sizeof(a1->buffer[0]) * PackedArray_bufferSize(a1)) == 0); 820 | 821 | PackedArray_set(a1, i, v1); 822 | assert(memcmp(a1->buffer, a2->buffer, sizeof(a1->buffer[0]) * PackedArray_bufferSize(a1)) == 0); 823 | 824 | PackedArray_unpack(a1, i, &v2, 1); 825 | assert(v1 == v2); 826 | PackedArray_unpack_reference(a2, i, &v2, 1); 827 | assert(v1 == v2); 828 | v2 = PackedArray_get(a2, i); 829 | assert(v1 == v2); 830 | } 831 | 832 | for (i = count - 1; i >= 0; --i) 833 | { 834 | uint32_t v1, v2; 835 | 836 | v1 = rand() & mask; 837 | v2 = v1 + 1; 838 | PackedArray_pack(a1, i, &v1, 1); 839 | PackedArray_pack_reference(a2, i, &v1, 1); 840 | assert(memcmp(a1->buffer, a2->buffer, sizeof(a1->buffer[0]) * PackedArray_bufferSize(a1)) == 0); 841 | 842 | PackedArray_set(a1, i, v1); 843 | assert(memcmp(a1->buffer, a2->buffer, sizeof(a1->buffer[0]) * PackedArray_bufferSize(a1)) == 0); 844 | 845 | PackedArray_unpack(a1, i, &v2, 1); 846 | assert(v1 == v2); 847 | PackedArray_unpack_reference(a2, i, &v2, 1); 848 | assert(v1 == v2); 849 | v2 = PackedArray_get(a2, i); 850 | assert(v1 == v2); 851 | } 852 | 853 | PackedArray_destroy(a1); 854 | PackedArray_destroy(a2); 855 | } 856 | printf(" %2d bits per item -- success.\n", bitsPerItem); 857 | } 858 | 859 | printf("\n"); 860 | printf("bulk packing / unpacking:\n"); 861 | for (bitsPerItem = 1; bitsPerItem <= 32; ++bitsPerItem) 862 | { 863 | uint32_t mask = (uint32_t)(1ULL << bitsPerItem) - 1; 864 | int count; 865 | 866 | for (count = 1; count <= 128; ++count) 867 | { 868 | uint32_t* v1; 869 | uint32_t* v2; 870 | PackedArray* a1; 871 | PackedArray* a2; 872 | int i, j; 873 | 874 | v1 = (uint32_t*)malloc(sizeof(uint32_t) * count); 875 | assert(v1 != NULL); 876 | v2 = (uint32_t*)malloc(sizeof(uint32_t) * count); 877 | assert(v2 != NULL); 878 | 879 | a1 = PackedArray_create(bitsPerItem, count); 880 | assert(a1 != NULL); 881 | a2 = PackedArray_create(bitsPerItem, count); 882 | assert(a2 != NULL); 883 | 884 | for (i = 0; i < (int)PackedArray_bufferSize(a1); ++i) 885 | a1->buffer[i] = a2->buffer[i] = rand(); 886 | 887 | for (i = 0; i < count; ++i) 888 | v1[i] = rand() & mask; 889 | 890 | assert(bitsPerItem >= PackedArray_computeBitsPerItem(v1, count)); 891 | 892 | for (i = 0; i < count; ++i) 893 | { 894 | for (j = 1; j <= count - i; ++j) 895 | { 896 | PackedArray_pack(a1, i, v1, j); 897 | PackedArray_pack_reference(a2, i, v1, j); 898 | assert(memcmp(a1->buffer, a2->buffer, sizeof(a1->buffer[0]) * PackedArray_bufferSize(a1)) == 0); 899 | 900 | PackedArray_unpack(a1, i, v2, j); 901 | assert(bitsPerItem >= PackedArray_computeBitsPerItem(v2, j)); 902 | assert(memcmp(v1, v2, j * sizeof(uint32_t)) == 0); 903 | PackedArray_unpack_reference(a2, i, v2, j); 904 | assert(memcmp(v1, v2, j * sizeof(uint32_t)) == 0); 905 | } 906 | } 907 | 908 | PackedArray_destroy(a1); 909 | PackedArray_destroy(a2); 910 | free(v1); 911 | free(v2); 912 | } 913 | printf(" %2d bits per item -- success.\n", bitsPerItem); 914 | } 915 | 916 | return 0; 917 | } 918 | 919 | #elif defined(PACKEDARRAY_SELF_BENCH) // #if defined(PACKEDARRAY_SELF_TEST) 920 | 921 | #ifndef NDEBUG 922 | #error please define NDEBUG to inhibit asserts when compiling the benchmark 923 | #endif 924 | 925 | #include 926 | #include 927 | #include 928 | 929 | #ifndef MIN 930 | #define MIN(x, y) (((x) < (y)) ? (x) : (y)) 931 | #endif 932 | 933 | #ifndef MAX 934 | #define MAX(x, y) (((x) < (y)) ? (y) : (x)) 935 | #endif 936 | 937 | #ifdef _MSC_VER 938 | #pragma warning(push, 3) 939 | #include 940 | #pragma warning(pop) 941 | static double getChronometerTime(void) 942 | { 943 | LARGE_INTEGER frequency; 944 | LARGE_INTEGER t; 945 | 946 | QueryPerformanceFrequency(&frequency); 947 | QueryPerformanceCounter(&t); 948 | 949 | return (double)t.QuadPart / (double)frequency.QuadPart * 1000; 950 | } 951 | 952 | #else 953 | #include 954 | 955 | static double getChronometerTime() 956 | { 957 | struct timeval now = { 0 }; 958 | gettimeofday(&now, NULL); 959 | 960 | return (double)now.tv_sec + (double)now.tv_usec * 1e-6; 961 | } 962 | #endif 963 | 964 | #define LOOP_COUNT 1000 965 | static double bench_memcpy(uint32_t* in, uint32_t* out, uint32_t count) 966 | { 967 | double start, end; 968 | uint32_t i; 969 | 970 | start = getChronometerTime(); 971 | 972 | for (i = 0; i < LOOP_COUNT; ++i) 973 | memcpy(out, in, count * sizeof(uint32_t)); 974 | 975 | end = getChronometerTime(); 976 | 977 | return 1e6 * (end - start) / LOOP_COUNT; 978 | } 979 | 980 | static double bench_loopcpy(uint32_t* in, uint32_t* out, uint32_t count) 981 | { 982 | double start, end; 983 | uint32_t i; 984 | 985 | start = getChronometerTime(); 986 | 987 | for (i = 0; i < LOOP_COUNT; ++i) 988 | { 989 | uint32_t j; 990 | 991 | for (j = 0; j < count; ++j) 992 | out[j] = in[j]; 993 | } 994 | 995 | end = getChronometerTime(); 996 | 997 | return 1e6 * (end - start) / LOOP_COUNT; 998 | } 999 | 1000 | static double bench_pack(uint32_t* in, PackedArray* out, uint32_t count) 1001 | { 1002 | double start, end; 1003 | int i; 1004 | 1005 | start = getChronometerTime(); 1006 | 1007 | for (i = 0; i < LOOP_COUNT; ++i) 1008 | PackedArray_pack(out, 0, in, count); 1009 | 1010 | end = getChronometerTime(); 1011 | 1012 | return 1e6 * (end - start) / LOOP_COUNT; 1013 | } 1014 | 1015 | static double bench_unpack(PackedArray* in, uint32_t* out, uint32_t count) 1016 | { 1017 | double start, end; 1018 | int i; 1019 | 1020 | start = getChronometerTime(); 1021 | 1022 | for (i = 0; i < LOOP_COUNT; ++i) 1023 | PackedArray_unpack(in, 0, out, count); 1024 | 1025 | end = getChronometerTime(); 1026 | 1027 | return 1e6 * (end - start) / LOOP_COUNT; 1028 | } 1029 | 1030 | #define MAX_ELEMENT_COUNT (1 << 18) 1031 | #define LOG2_MAX_ELEMENT_COUNT 18 1032 | int main(void) 1033 | { 1034 | double start, end; 1035 | uint32_t* b1; 1036 | uint32_t* b2; 1037 | uint32_t count, bitsPerItem; 1038 | PackedArray** packed; 1039 | uint32_t i; 1040 | double* speed_memcpy; 1041 | double avg_memcpy, min_memcpy, max_memcpy; 1042 | double* speed_loopcpy; 1043 | double avg_loopcpy, min_loopcpy, max_loopcpy; 1044 | double* speed_pack[32]; 1045 | double avg_pack, min_pack, max_pack; 1046 | double* speed_unpack[32]; 1047 | double avg_unpack, min_unpack, max_unpack; 1048 | 1049 | printf("-- PackedArray self bench ------------------------------------------------------\n"); 1050 | 1051 | start = getChronometerTime(); 1052 | 1053 | b1 = (uint32_t*)malloc(sizeof(uint32_t) * MAX_ELEMENT_COUNT); 1054 | assert(b1 != NULL); 1055 | b2 = (uint32_t*)malloc(sizeof(uint32_t) * MAX_ELEMENT_COUNT); 1056 | assert(b2 != NULL); 1057 | 1058 | packed = (PackedArray**)malloc(sizeof(PackedArray*) * 32); 1059 | assert(packed != NULL); 1060 | for (i = 0; i < 32; ++i) 1061 | packed[i] = PackedArray_create(i + 1, MAX_ELEMENT_COUNT); 1062 | 1063 | for (i = 0; i < MAX_ELEMENT_COUNT; ++i) 1064 | b1[i] = rand(); 1065 | 1066 | speed_memcpy = (double*)malloc(sizeof(double) * (LOG2_MAX_ELEMENT_COUNT + 1)); 1067 | assert(speed_memcpy != NULL); 1068 | avg_memcpy = 0; 1069 | min_memcpy = DBL_MAX; 1070 | max_memcpy = 0; 1071 | 1072 | printf("memcpy:\n"); 1073 | printf("bits\tsize (B)\ttime (µs)\tspeed (B/µs)\n"); 1074 | 1075 | for (count = 1, i = 0; count <= MAX_ELEMENT_COUNT; count *= 2, ++i) 1076 | { 1077 | double elapsed = bench_memcpy(b1, b2, count); 1078 | double speed = count * sizeof(uint32_t) / elapsed; 1079 | printf("%4d\t%8d\t%9.3f\t%12.3f\n", 32, (uint32_t)(count * sizeof(uint32_t)), elapsed, speed); 1080 | 1081 | avg_memcpy += speed; 1082 | min_memcpy = MIN(min_memcpy, speed); 1083 | max_memcpy = MAX(max_memcpy, speed); 1084 | 1085 | speed_memcpy[i] = speed; 1086 | } 1087 | 1088 | avg_memcpy /= i; 1089 | 1090 | printf("avg (B/µs)\tmin (B/µs)\tmax (B/µs)\n"); 1091 | printf("%10.3f\t%10.3f\t%10.3f\n", avg_memcpy, min_memcpy, max_memcpy); 1092 | printf("\n"); 1093 | 1094 | speed_loopcpy = (double*)malloc(sizeof(double) * (LOG2_MAX_ELEMENT_COUNT + 1)); 1095 | assert(speed_loopcpy != NULL); 1096 | avg_loopcpy = 0; 1097 | min_loopcpy = DBL_MAX; 1098 | max_loopcpy = 0; 1099 | 1100 | printf("loopcpy:\n"); 1101 | printf("bits\tsize (B)\ttime (µs)\tspeed (B/µs)\n"); 1102 | 1103 | for (count = 1, i = 0; count <= MAX_ELEMENT_COUNT; count *= 2, ++i) 1104 | { 1105 | double elapsed = bench_loopcpy(b1, b2, count); 1106 | double speed = count * sizeof(uint32_t) / elapsed; 1107 | printf("%4d\t%8d\t%9.3f\t%12.3f\n", 32, (uint32_t)(count * sizeof(uint32_t)), elapsed, speed); 1108 | 1109 | avg_loopcpy += speed; 1110 | min_loopcpy = MIN(min_loopcpy, speed); 1111 | max_loopcpy = MAX(max_loopcpy, speed); 1112 | 1113 | speed_loopcpy[i] = speed; 1114 | } 1115 | 1116 | avg_loopcpy /= i; 1117 | 1118 | printf("avg (B/µs)\tmin (B/µs)\tmax (B/µs)\n"); 1119 | printf("%10.3f\t%10.3f\t%10.3f\n", avg_loopcpy, min_loopcpy, max_loopcpy); 1120 | printf("\n"); 1121 | 1122 | for (bitsPerItem = 1; bitsPerItem <= 32; ++bitsPerItem) 1123 | { 1124 | avg_pack = 0; 1125 | min_pack = DBL_MAX; 1126 | max_pack = 0; 1127 | avg_unpack = 0; 1128 | min_unpack = DBL_MAX; 1129 | max_unpack = 0; 1130 | 1131 | printf("pack:\t \t \t \t"); 1132 | printf("unpack:\t \t \t \t"); 1133 | printf("\n"); 1134 | printf("bits\tsize (B)\ttime (µs)\tspeed (B/µs)"); 1135 | printf("\t"); 1136 | printf("bits\tsize (B)\ttime (µs)\tspeed (B/µs)"); 1137 | printf("\n"); 1138 | 1139 | speed_pack[bitsPerItem - 1] = (double*)malloc(sizeof(double) * (LOG2_MAX_ELEMENT_COUNT + 1)); 1140 | assert(speed_pack[bitsPerItem - 1] != NULL); 1141 | speed_unpack[bitsPerItem - 1] = (double*)malloc(sizeof(double) * (LOG2_MAX_ELEMENT_COUNT + 1)); 1142 | assert(speed_unpack[bitsPerItem - 1] != NULL); 1143 | for (count = 1, i = 0; count <= MAX_ELEMENT_COUNT; count *= 2, ++i) 1144 | { 1145 | uint32_t mask = (uint32_t)(1ULL << bitsPerItem) - 1; 1146 | uint32_t j; 1147 | double elapsed, speed; 1148 | 1149 | for (j = 0; j < count; ++j) 1150 | b2[j] = b1[j] & mask; 1151 | 1152 | elapsed = bench_pack(b2, packed[bitsPerItem - 1], count); 1153 | speed = count * sizeof(uint32_t) / elapsed; 1154 | printf("%4d\t%8d\t%9.3f\t%12.3f", bitsPerItem, (uint32_t)(count * sizeof(uint32_t)), elapsed, speed); 1155 | 1156 | avg_pack += speed; 1157 | min_pack = MIN(min_pack, speed); 1158 | max_pack = MAX(max_pack, speed); 1159 | 1160 | speed_pack[bitsPerItem - 1][i] = speed; 1161 | 1162 | printf("\t"); 1163 | 1164 | elapsed = bench_unpack(packed[bitsPerItem - 1], b2, count); 1165 | speed = count * sizeof(uint32_t) / elapsed; 1166 | printf("%4d\t%8d\t%9.3f\t%12.3f", bitsPerItem, (uint32_t)(count * sizeof(uint32_t)), elapsed, speed); 1167 | 1168 | avg_unpack += speed; 1169 | min_unpack = MIN(min_unpack, speed); 1170 | max_unpack = MAX(max_unpack, speed); 1171 | 1172 | speed_unpack[bitsPerItem - 1][i] = speed; 1173 | 1174 | printf("\n"); 1175 | } 1176 | assert(i == LOG2_MAX_ELEMENT_COUNT + 1); 1177 | 1178 | printf("avg (B/µs)\tmin (B/µs)\tmax (B/µs)"); 1179 | printf("\t\t"); 1180 | printf("avg (B/µs)\tmin (B/µs)\tmax (B/µs)"); 1181 | printf("\n"); 1182 | 1183 | avg_pack /= i; 1184 | printf("%10.3f\t%10.3f\t%10.3f", avg_pack, min_pack, max_pack); 1185 | 1186 | printf("\t\t"); 1187 | 1188 | avg_unpack /= i; 1189 | printf("%10.3f\t%10.3f\t%10.3f", avg_unpack, min_unpack, max_unpack); 1190 | printf("\n"); 1191 | printf("\n"); 1192 | } 1193 | 1194 | printf("\n"); 1195 | 1196 | printf("stats by bits per item\n"); 1197 | printf("pack:\t \t \t \t"); 1198 | printf("unpack:\t \t \t \t"); 1199 | printf("\n"); 1200 | printf("bits\tavg (B/µs)\tmin (B/µs)\tmax (B/µs)"); 1201 | printf("\t"); 1202 | printf("avg (B/µs)\tmin (B/µs)\tmax (B/µs)"); 1203 | printf("\n"); 1204 | for (bitsPerItem = 1; bitsPerItem <= 32; ++bitsPerItem) 1205 | { 1206 | avg_pack = 0; 1207 | min_pack = DBL_MAX; 1208 | max_pack = 0; 1209 | avg_unpack = 0; 1210 | min_unpack = DBL_MAX; 1211 | max_unpack = 0; 1212 | 1213 | for (count = 1, i = 0; count <= MAX_ELEMENT_COUNT; count *= 2, ++i) 1214 | { 1215 | double speed; 1216 | 1217 | speed = speed_pack[bitsPerItem - 1][i]; 1218 | 1219 | avg_pack += speed; 1220 | min_pack = MIN(min_pack, speed); 1221 | max_pack = MAX(max_pack, speed); 1222 | 1223 | speed = speed_unpack[bitsPerItem - 1][i]; 1224 | 1225 | avg_unpack += speed; 1226 | min_unpack = MIN(min_unpack, speed); 1227 | max_unpack = MAX(max_unpack, speed); 1228 | } 1229 | assert(i == LOG2_MAX_ELEMENT_COUNT + 1); 1230 | 1231 | printf("%4d\t", bitsPerItem); 1232 | 1233 | avg_pack /= i; 1234 | printf("%10.3f\t%10.3f\t%10.3f", avg_pack, min_pack, max_pack); 1235 | printf("\t"); 1236 | 1237 | avg_unpack /= i; 1238 | printf("%10.3f\t%10.3f\t%10.3f", avg_unpack, min_unpack, max_unpack); 1239 | printf("\n"); 1240 | } 1241 | 1242 | printf("\n"); 1243 | 1244 | printf("stats by size\n"); 1245 | printf("pack:\t \t \t \t"); 1246 | printf("unpack:\t \t \t \t"); 1247 | printf("\n"); 1248 | printf("size(B)\tavg (B/µs)\tmin (B/µs)\tmax (B/µs)"); 1249 | printf("\t"); 1250 | printf("avg (B/µs)\tmin (B/µs)\tmax (B/µs)"); 1251 | printf("\n"); 1252 | for (count = 1, i = 0; count <= MAX_ELEMENT_COUNT; count *= 2, ++i) 1253 | { 1254 | avg_pack = 0; 1255 | min_pack = DBL_MAX; 1256 | max_pack = 0; 1257 | avg_unpack = 0; 1258 | min_unpack = DBL_MAX; 1259 | max_unpack = 0; 1260 | 1261 | for (bitsPerItem = 1; bitsPerItem <= 32; ++bitsPerItem) 1262 | { 1263 | double speed; 1264 | 1265 | speed = speed_pack[bitsPerItem - 1][i]; 1266 | 1267 | avg_pack += speed; 1268 | min_pack = MIN(min_pack, speed); 1269 | max_pack = MAX(max_pack, speed); 1270 | 1271 | speed = speed_unpack[bitsPerItem - 1][i]; 1272 | 1273 | avg_unpack += speed; 1274 | min_unpack = MIN(min_unpack, speed); 1275 | max_unpack = MAX(max_unpack, speed); 1276 | } 1277 | 1278 | printf("%7d\t", (uint32_t)sizeof(uint32_t) * count); 1279 | 1280 | avg_pack /= 32; 1281 | printf("%10.3f\t%10.3f\t%10.3f", avg_pack, min_pack, max_pack); 1282 | printf("\t"); 1283 | 1284 | avg_unpack /= 32; 1285 | printf("%10.3f\t%10.3f\t%10.3f", avg_unpack, min_unpack, max_unpack); 1286 | printf("\n"); 1287 | } 1288 | 1289 | printf("\n"); 1290 | 1291 | free(b1); 1292 | free(b2); 1293 | free(speed_memcpy); 1294 | free(speed_loopcpy); 1295 | 1296 | for (i = 0; i < 32; ++i) 1297 | { 1298 | PackedArray_destroy(packed[i]); 1299 | free(speed_pack[i]); 1300 | free(speed_unpack[i]); 1301 | } 1302 | 1303 | free(packed); 1304 | 1305 | end = getChronometerTime(); 1306 | printf("total time (s): %f\n", (end - start)); 1307 | printf("\n"); 1308 | 1309 | return 0; 1310 | } 1311 | 1312 | #endif // #elif defined(PACKEDARRAY_SELF_BENCH) 1313 | 1314 | #endif // #ifdef PACKEDARRAY_IMPL 1315 | -------------------------------------------------------------------------------- /PackedArray.h: -------------------------------------------------------------------------------- 1 | #ifndef PACKEDARRAY_H 2 | #define PACKEDARRAY_H 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | #include 9 | 10 | /* 11 | 12 | PackedArray principle: 13 | . compact storage of <= 32 bits items 14 | . items are tightly packed into a buffer of uint32_t integers 15 | 16 | PackedArray requirements: 17 | . you must know in advance how many bits are needed to hold a single item 18 | . you must know in advance how many items you want to store 19 | . when packing, behavior is undefined if items have more than bitsPerItem bits 20 | 21 | PackedArray general in memory representation: 22 | |-------------------------------------------------- - - - 23 | | b0 | b1 | b2 | 24 | |-------------------------------------------------- - - - 25 | | i0 | i1 | i2 | i3 | i4 | i5 | i6 | i7 | i8 | i9 | 26 | |-------------------------------------------------- - - - 27 | 28 | . items are tightly packed together 29 | . several items end up inside the same buffer cell, e.g. i0, i1, i2 30 | . some items span two buffer cells, e.g. i3, i6 31 | 32 | */ 33 | 34 | struct _PackedArray 35 | { 36 | uint32_t bitsPerItem; 37 | uint32_t count; 38 | 39 | uint32_t padding[2]; 40 | #ifdef _MSC_VER 41 | #pragma warning(push) 42 | #pragma warning(disable: 4200) 43 | #endif // #ifdef _MSC_VER 44 | uint32_t buffer[]; 45 | #ifdef _MSC_VER 46 | #pragma warning(pop) 47 | #endif // #ifdef _MSC_VER 48 | }; 49 | typedef struct _PackedArray PackedArray; 50 | 51 | // creation / destruction 52 | PackedArray* PackedArray_create(uint32_t bitsPerItem, uint32_t count); 53 | void PackedArray_destroy(PackedArray* a); 54 | 55 | // packing / unpacking 56 | // offset is expressed in number of elements 57 | void PackedArray_pack(PackedArray* a, const uint32_t offset, const uint32_t* in, uint32_t count); 58 | void PackedArray_unpack(const PackedArray* a, const uint32_t offset, uint32_t* out, uint32_t count); 59 | 60 | // single item access 61 | void PackedArray_set(PackedArray* a, const uint32_t offset, const uint32_t in); 62 | uint32_t PackedArray_get(const PackedArray* a, const uint32_t offset); 63 | 64 | // helpers 65 | uint32_t PackedArray_bufferSize(const PackedArray* a); 66 | uint32_t PackedArray_computeBitsPerItem(const uint32_t* in, uint32_t count); 67 | 68 | #ifdef __cplusplus 69 | } 70 | #endif 71 | 72 | #endif // #ifndef PACKEDARRAY_H 73 | -------------------------------------------------------------------------------- /PackedArraySIMD.c: -------------------------------------------------------------------------------- 1 | // see README.md for usage instructions. 2 | // (‑●‑●)> released under the WTFPL v2 license, by Gregory Pakosz (@gpakosz) 3 | 4 | #ifndef PACKEDARRAY_SELF 5 | #define PACKEDARRAY_SELF "PackedArraySIMD.c" 6 | #endif 7 | 8 | #ifdef PACKEDARRAY_IMPL 9 | 10 | #ifndef PACKEDARRAY_JOIN 11 | #define PACKEDARRAY_JOIN(lhs, rhs) PACKEDARRAY_JOIN_(lhs, rhs) 12 | #define PACKEDARRAY_JOIN_(lhs, rhs) PACKEDARRAY_JOIN__(lhs, rhs) 13 | #define PACKEDARRAY_JOIN__(lhs, rhs) lhs##rhs 14 | #endif // #ifndef PACKEDARRAY_JOIN 15 | 16 | #ifndef PACKEDARRAY_IMPL_BITS_PER_ITEM 17 | #error PACKEDARRAY_IMPL_BITS_PER_ITEM undefined 18 | #endif // #ifndef PACKEDARRAY_IMPL_BITS_PER_ITEM 19 | 20 | #if defined(PACKEDARRAY_IMPL_PACK_CASES) || defined(PACKEDARRAY_IMPL_UNPACK_CASES) 21 | 22 | #ifndef PACKEDARRAY_IMPL_CASE_I 23 | #define PACKEDARRAY_IMPL_CASE_I 0 24 | #elif PACKEDARRAY_IMPL_CASE_I == 0 25 | #undef PACKEDARRAY_IMPL_CASE_I 26 | #define PACKEDARRAY_IMPL_CASE_I 1 27 | #elif PACKEDARRAY_IMPL_CASE_I == 1 28 | #undef PACKEDARRAY_IMPL_CASE_I 29 | #define PACKEDARRAY_IMPL_CASE_I 2 30 | #elif PACKEDARRAY_IMPL_CASE_I == 2 31 | #undef PACKEDARRAY_IMPL_CASE_I 32 | #define PACKEDARRAY_IMPL_CASE_I 3 33 | #elif PACKEDARRAY_IMPL_CASE_I == 3 34 | #undef PACKEDARRAY_IMPL_CASE_I 35 | #define PACKEDARRAY_IMPL_CASE_I 4 36 | #elif PACKEDARRAY_IMPL_CASE_I == 4 37 | #undef PACKEDARRAY_IMPL_CASE_I 38 | #define PACKEDARRAY_IMPL_CASE_I 5 39 | #elif PACKEDARRAY_IMPL_CASE_I == 5 40 | #undef PACKEDARRAY_IMPL_CASE_I 41 | #define PACKEDARRAY_IMPL_CASE_I 6 42 | #elif PACKEDARRAY_IMPL_CASE_I == 6 43 | #undef PACKEDARRAY_IMPL_CASE_I 44 | #define PACKEDARRAY_IMPL_CASE_I 7 45 | #elif PACKEDARRAY_IMPL_CASE_I == 7 46 | #undef PACKEDARRAY_IMPL_CASE_I 47 | #define PACKEDARRAY_IMPL_CASE_I 8 48 | #elif PACKEDARRAY_IMPL_CASE_I == 8 49 | #undef PACKEDARRAY_IMPL_CASE_I 50 | #define PACKEDARRAY_IMPL_CASE_I 9 51 | #elif PACKEDARRAY_IMPL_CASE_I == 9 52 | #undef PACKEDARRAY_IMPL_CASE_I 53 | #define PACKEDARRAY_IMPL_CASE_I 10 54 | #elif PACKEDARRAY_IMPL_CASE_I == 10 55 | #undef PACKEDARRAY_IMPL_CASE_I 56 | #define PACKEDARRAY_IMPL_CASE_I 11 57 | #elif PACKEDARRAY_IMPL_CASE_I == 11 58 | #undef PACKEDARRAY_IMPL_CASE_I 59 | #define PACKEDARRAY_IMPL_CASE_I 12 60 | #elif PACKEDARRAY_IMPL_CASE_I == 12 61 | #undef PACKEDARRAY_IMPL_CASE_I 62 | #define PACKEDARRAY_IMPL_CASE_I 13 63 | #elif PACKEDARRAY_IMPL_CASE_I == 13 64 | #undef PACKEDARRAY_IMPL_CASE_I 65 | #define PACKEDARRAY_IMPL_CASE_I 14 66 | #elif PACKEDARRAY_IMPL_CASE_I == 14 67 | #undef PACKEDARRAY_IMPL_CASE_I 68 | #define PACKEDARRAY_IMPL_CASE_I 15 69 | #elif PACKEDARRAY_IMPL_CASE_I == 15 70 | #undef PACKEDARRAY_IMPL_CASE_I 71 | #define PACKEDARRAY_IMPL_CASE_I 16 72 | #elif PACKEDARRAY_IMPL_CASE_I == 16 73 | #undef PACKEDARRAY_IMPL_CASE_I 74 | #define PACKEDARRAY_IMPL_CASE_I 17 75 | #elif PACKEDARRAY_IMPL_CASE_I == 17 76 | #undef PACKEDARRAY_IMPL_CASE_I 77 | #define PACKEDARRAY_IMPL_CASE_I 18 78 | #elif PACKEDARRAY_IMPL_CASE_I == 18 79 | #undef PACKEDARRAY_IMPL_CASE_I 80 | #define PACKEDARRAY_IMPL_CASE_I 19 81 | #elif PACKEDARRAY_IMPL_CASE_I == 19 82 | #undef PACKEDARRAY_IMPL_CASE_I 83 | #define PACKEDARRAY_IMPL_CASE_I 20 84 | #elif PACKEDARRAY_IMPL_CASE_I == 20 85 | #undef PACKEDARRAY_IMPL_CASE_I 86 | #define PACKEDARRAY_IMPL_CASE_I 21 87 | #elif PACKEDARRAY_IMPL_CASE_I == 21 88 | #undef PACKEDARRAY_IMPL_CASE_I 89 | #define PACKEDARRAY_IMPL_CASE_I 22 90 | #elif PACKEDARRAY_IMPL_CASE_I == 22 91 | #undef PACKEDARRAY_IMPL_CASE_I 92 | #define PACKEDARRAY_IMPL_CASE_I 23 93 | #elif PACKEDARRAY_IMPL_CASE_I == 23 94 | #undef PACKEDARRAY_IMPL_CASE_I 95 | #define PACKEDARRAY_IMPL_CASE_I 24 96 | #elif PACKEDARRAY_IMPL_CASE_I == 24 97 | #undef PACKEDARRAY_IMPL_CASE_I 98 | #define PACKEDARRAY_IMPL_CASE_I 25 99 | #elif PACKEDARRAY_IMPL_CASE_I == 25 100 | #undef PACKEDARRAY_IMPL_CASE_I 101 | #define PACKEDARRAY_IMPL_CASE_I 26 102 | #elif PACKEDARRAY_IMPL_CASE_I == 26 103 | #undef PACKEDARRAY_IMPL_CASE_I 104 | #define PACKEDARRAY_IMPL_CASE_I 27 105 | #elif PACKEDARRAY_IMPL_CASE_I == 27 106 | #undef PACKEDARRAY_IMPL_CASE_I 107 | #define PACKEDARRAY_IMPL_CASE_I 28 108 | #elif PACKEDARRAY_IMPL_CASE_I == 28 109 | #undef PACKEDARRAY_IMPL_CASE_I 110 | #define PACKEDARRAY_IMPL_CASE_I 29 111 | #elif PACKEDARRAY_IMPL_CASE_I == 29 112 | #undef PACKEDARRAY_IMPL_CASE_I 113 | #define PACKEDARRAY_IMPL_CASE_I 30 114 | #elif PACKEDARRAY_IMPL_CASE_I == 30 115 | #undef PACKEDARRAY_IMPL_CASE_I 116 | #define PACKEDARRAY_IMPL_CASE_I 31 117 | #elif PACKEDARRAY_IMPL_CASE_I == 31 118 | #undef PACKEDARRAY_IMPL_CASE_I 119 | #define PACKEDARRAY_IMPL_CASE_I 32 120 | #endif // #ifndef PACKEDARRAY_IMPL_CASE_I 121 | 122 | #ifndef PACKEDARRAY_IMPL_BITS_AVAILABLE 123 | #define PACKEDARRAY_IMPL_BITS_AVAILABLE (32 - ((PACKEDARRAY_IMPL_CASE_I * PACKEDARRAY_IMPL_BITS_PER_ITEM) % 32)) 124 | #endif 125 | #ifndef PACKEDARRAY_IMPL_START_BIT 126 | #define PACKEDARRAY_IMPL_START_BIT ((PACKEDARRAY_IMPL_CASE_I * PACKEDARRAY_IMPL_BITS_PER_ITEM) % 32) 127 | #endif 128 | 129 | #if defined(PACKEDARRAY_IMPL_PACK_CASES) 130 | 131 | #ifndef PACKEDARRAY_IMPL_PACK_CASE_BREAK 132 | #define PACKEDARRAY_IMPL_PACK_CASE_BREAK 133 | #endif 134 | 135 | case PACKEDARRAY_IMPL_CASE_I: 136 | #if (PACKEDARRAY_IMPL_BITS_PER_ITEM <= PACKEDARRAY_IMPL_BITS_AVAILABLE) 137 | in_4 = PackedArray_loadu_uint32x4(in); 138 | packed = PackedArray_vsli0_uint32x4(packed, in_4, PACKEDARRAY_IMPL_START_BIT); 139 | in += 4; 140 | #if (PACKEDARRAY_IMPL_BITS_PER_ITEM == PACKEDARRAY_IMPL_BITS_AVAILABLE) 141 | PackedArray_store_uint32x4(out, packed); 142 | out += 4; 143 | packed = PackedArray_uint32x4_zero; 144 | #endif 145 | #else 146 | in_4 = PackedArray_loadu_uint32x4(in); 147 | packed = PackedArray_vsli0_uint32x4(packed, in_4, PACKEDARRAY_IMPL_START_BIT); 148 | PackedArray_store_uint32x4(out, packed); 149 | out += 4; 150 | packed = PackedArray_shr_uint32x4(in_4, PACKEDARRAY_IMPL_BITS_AVAILABLE); 151 | in += 4; 152 | #endif 153 | PACKEDARRAY_IMPL_PACK_CASE_BREAK 154 | 155 | #if PACKEDARRAY_IMPL_CASE_I < 31 156 | #include PACKEDARRAY_SELF 157 | #else 158 | #undef PACKEDARRAY_IMPL_CASE_I 159 | #undef PACKEDARRAY_IMPL_PACK_CASE_BREAK 160 | #undef PACKEDARRAY_IMPL_PACK_CASES 161 | #endif 162 | 163 | #elif defined(PACKEDARRAY_IMPL_UNPACK_CASES) // #if defined(PACKEDARRAY_IMPL_PACK_CASES) 164 | 165 | #ifndef PACKEDARRAY_IMPL_UNPACK_CASE_BREAK 166 | #define PACKEDARRAY_IMPL_UNPACK_CASE_BREAK 167 | #endif 168 | 169 | case PACKEDARRAY_IMPL_CASE_I: 170 | #if (PACKEDARRAY_IMPL_BITS_PER_ITEM <= PACKEDARRAY_IMPL_BITS_AVAILABLE) 171 | out_4 = PackedArray_and_uint32x4(PackedArray_shr_uint32x4(packed, PACKEDARRAY_IMPL_START_BIT), PackedArray_set_uint32x4(PACKEDARRAY_IMPL_MASK)); 172 | PackedArray_storeu_uint32x4(out, out_4); 173 | out += 4; 174 | PACKEDARRAY_IMPL_UNPACK_CASE_BREAK 175 | #if (PACKEDARRAY_IMPL_CASE_I < 31) && (PACKEDARRAY_IMPL_BITS_PER_ITEM == PACKEDARRAY_IMPL_BITS_AVAILABLE) 176 | in += 4; 177 | packed = PackedArray_load_uint32x4(in); 178 | #endif 179 | #else 180 | out_4 = PackedArray_shr_uint32x4(packed, PACKEDARRAY_IMPL_START_BIT); 181 | in += 4; 182 | packed = PackedArray_load_uint32x4(in); 183 | out_4 = PackedArray_vsli0_uint32x4(out_4, packed, PACKEDARRAY_IMPL_BITS_AVAILABLE); 184 | out_4 = PackedArray_and_uint32x4(out_4, PackedArray_set_uint32x4(PACKEDARRAY_IMPL_MASK)); 185 | PackedArray_storeu_uint32x4(out, out_4); 186 | out += 4; 187 | PACKEDARRAY_IMPL_UNPACK_CASE_BREAK 188 | #endif 189 | 190 | #if PACKEDARRAY_IMPL_CASE_I < 31 191 | #include PACKEDARRAY_SELF 192 | #else 193 | #undef PACKEDARRAY_IMPL_CASE_I 194 | #undef PACKEDARRAY_IMPL_UNPACK_CASE_BREAK 195 | #undef PACKEDARRAY_IMPL_UNPACK_CASES 196 | #endif 197 | 198 | #endif // #elif defined(PACKEDARRAY_IMPL_UNPACK_CASES) 199 | 200 | #else // #if defined(PACKEDARRAY_IMPL_PACK_CASES) || defined(PACKEDARRAY_IMPL_UNPACK_CASES) 201 | 202 | #ifndef PACKEDARRAY_IMPL_MASK 203 | #define PACKEDARRAY_IMPL_MASK (uint32_t)((1ULL << PACKEDARRAY_IMPL_BITS_PER_ITEM) - 1) 204 | #endif 205 | 206 | void PACKEDARRAY_JOIN(__PackedArray_pack_, PACKEDARRAY_IMPL_BITS_PER_ITEM)(uint32_t* __restrict buffer, uint32_t offset, const uint32_t* __restrict in, uint32_t count) 207 | { 208 | uint32_t pre, post; 209 | uint32_t* __restrict out; 210 | const uint32_t* __restrict end; 211 | uint32_t startBit; 212 | PackedArray_uint32x4_t packed, in_4, mask; 213 | uint32_t offset_4; 214 | 215 | pre = (offset + 3) / 4 * 4 - offset; 216 | pre = pre > count ? count : pre; 217 | 218 | if (pre > 0) 219 | { 220 | __PackedArray_pack_scalar(buffer, PACKEDARRAY_IMPL_BITS_PER_ITEM, PACKEDARRAY_IMPL_MASK, offset, in, pre); 221 | offset += pre; 222 | in += pre; 223 | count -= pre; 224 | } 225 | 226 | post = count % 4; 227 | count -= post; 228 | 229 | if (count > 0) 230 | { 231 | out = &buffer[(offset / 4 * PACKEDARRAY_IMPL_BITS_PER_ITEM) / 32 * 4]; 232 | startBit = (offset / 4 * PACKEDARRAY_IMPL_BITS_PER_ITEM) % 32; 233 | packed = PackedArray_load_uint32x4(out); 234 | mask = PackedArray_sub_uint32x4(PackedArray_shl_uint32x4(PackedArray_set_uint32x4(1), startBit), PackedArray_set_uint32x4(1)); 235 | packed = PackedArray_and_uint32x4(packed, mask); 236 | 237 | offset_4 = offset % 128; 238 | offset += count; 239 | 240 | if (count >= 128 - offset_4) 241 | { 242 | int32_t n; 243 | 244 | n = (count + offset_4) / 128; 245 | count -= 128 * n - offset_4; 246 | switch (offset_4 / 4) 247 | { 248 | do 249 | { 250 | #define PACKEDARRAY_IMPL_PACK_CASES 251 | #include PACKEDARRAY_SELF 252 | } while (--n > 0); 253 | } 254 | 255 | if (count == 0) 256 | goto PACKEDARRAY_JOIN(PACKEDARRAY_JOIN(__PackedArray_pack_, PACKEDARRAY_IMPL_BITS_PER_ITEM), _post); 257 | 258 | offset_4 = 0; 259 | startBit = 0; 260 | } 261 | 262 | end = in + count; 263 | switch (offset_4 / 4) 264 | { 265 | #define PACKEDARRAY_IMPL_PACK_CASES 266 | #define PACKEDARRAY_IMPL_PACK_CASE_BREAK \ 267 | if (in == end)\ 268 | break; 269 | #include PACKEDARRAY_SELF 270 | } 271 | PACKEDARRAY_ASSERT(in == end); 272 | if ((count / 4 * PACKEDARRAY_IMPL_BITS_PER_ITEM + startBit) % 32) 273 | { 274 | in_4 = PackedArray_loadu_uint32x4(out); 275 | mask = PackedArray_sub_uint32x4(PackedArray_shl_uint32x4(PackedArray_set_uint32x4(1), ((count / 4 * PACKEDARRAY_IMPL_BITS_PER_ITEM + startBit - 1) % 32) + 1), PackedArray_set_uint32x4(1)); 276 | in_4 = PackedArray_andnot_uint32x4(in_4, mask); 277 | packed = PackedArray_or_uint32x4(packed, in_4); 278 | PackedArray_store_uint32x4(out, packed); 279 | } 280 | } 281 | 282 | PACKEDARRAY_JOIN(PACKEDARRAY_JOIN(__PackedArray_pack_, PACKEDARRAY_IMPL_BITS_PER_ITEM), _post): 283 | if (post > 0) 284 | __PackedArray_pack_scalar(buffer, PACKEDARRAY_IMPL_BITS_PER_ITEM, PACKEDARRAY_IMPL_MASK, offset, in, post); 285 | } 286 | 287 | void PACKEDARRAY_JOIN(__PackedArray_unpack_, PACKEDARRAY_IMPL_BITS_PER_ITEM)(const uint32_t* __restrict buffer, uint32_t offset, uint32_t* __restrict out, uint32_t count) 288 | { 289 | uint32_t pre, post; 290 | const uint32_t* __restrict in; 291 | const uint32_t* __restrict end; 292 | PackedArray_uint32x4_t packed, out_4; 293 | uint32_t offset_4; 294 | 295 | pre = (offset + 3) / 4 * 4 - offset; 296 | pre = pre > count ? count : pre; 297 | 298 | if (pre > 0) 299 | { 300 | __PackedArray_unpack_scalar(buffer, PACKEDARRAY_IMPL_BITS_PER_ITEM, PACKEDARRAY_IMPL_MASK, offset, out, pre); 301 | offset += pre; 302 | out += pre; 303 | count -= pre; 304 | } 305 | 306 | post = count % 4; 307 | count -= post; 308 | 309 | if (count > 0) 310 | { 311 | in = &buffer[(offset / 4 * PACKEDARRAY_IMPL_BITS_PER_ITEM) / 32 * 4]; 312 | packed = PackedArray_load_uint32x4(in); 313 | 314 | offset_4 = offset % 128; 315 | offset += count; 316 | 317 | if (count >= 128 - offset_4) 318 | { 319 | int32_t n; 320 | 321 | n = (count + offset_4) / 128; 322 | count -= 128 * n - offset_4; 323 | switch (offset_4 / 4) 324 | { 325 | do 326 | { 327 | in += 4; 328 | packed = PackedArray_load_uint32x4(in); 329 | #define PACKEDARRAY_IMPL_UNPACK_CASES 330 | #include PACKEDARRAY_SELF 331 | } while (--n > 0); 332 | } 333 | 334 | if (count == 0) 335 | goto PACKEDARRAY_JOIN(PACKEDARRAY_JOIN(__PackedArray_unpack_, PACKEDARRAY_IMPL_BITS_PER_ITEM), _post); 336 | 337 | in += 4; 338 | packed = PackedArray_loadu_uint32x4(in); 339 | offset_4 = 0; 340 | } 341 | 342 | end = out + count; 343 | switch (offset_4 / 4) 344 | { 345 | #define PACKEDARRAY_IMPL_UNPACK_CASES 346 | #define PACKEDARRAY_IMPL_UNPACK_CASE_BREAK \ 347 | if (out == end)\ 348 | break; 349 | #include PACKEDARRAY_SELF 350 | } 351 | PACKEDARRAY_ASSERT(out == end); 352 | } 353 | 354 | PACKEDARRAY_JOIN(PACKEDARRAY_JOIN(__PackedArray_unpack_, PACKEDARRAY_IMPL_BITS_PER_ITEM), _post): 355 | if (post > 0) 356 | __PackedArray_unpack_scalar(buffer, PACKEDARRAY_IMPL_BITS_PER_ITEM, PACKEDARRAY_IMPL_MASK, offset, out, post); 357 | } 358 | 359 | #undef PACKEDARRAY_IMPL_BITS_PER_ITEM 360 | #undef PACKEDARRAY_IMPL_BITS_AVAILABLE 361 | #undef PACKEDARRAY_IMPL_START_BIT 362 | #undef PACKEDARRAY_IMPL_START_MASK 363 | 364 | #endif // #if defined(PACKEDARRAY_IMPL_PACK_CASES) || defined(PACKEDARRAY_IMPL_UNPACK_CASES) 365 | 366 | #else 367 | 368 | #include "PackedArray.h" 369 | 370 | #if !defined(PACKEDARRAY_ASSERT) 371 | #include 372 | #define PACKEDARRAY_ASSERT(expression) assert(expression) 373 | #endif 374 | 375 | #include 376 | 377 | static void __PackedArray_pack_scalar(uint32_t* buffer, const uint32_t bitsPerItem, const uint32_t mask, uint32_t offset, const uint32_t* in, uint32_t count) 378 | { 379 | uint32_t* __restrict out; 380 | uint32_t startBit; 381 | uint32_t bitsAvailable; 382 | 383 | PACKEDARRAY_ASSERT(buffer != NULL); 384 | PACKEDARRAY_ASSERT(in != NULL); 385 | PACKEDARRAY_ASSERT(count != 0); 386 | 387 | while (count--) 388 | { 389 | uint32_t value = *in++; 390 | 391 | PACKEDARRAY_ASSERT(0 == (~mask & value)); 392 | 393 | out = &buffer[((uint64_t)offset / 4 * (uint64_t)bitsPerItem) / 32 * 4 + offset % 4]; 394 | startBit = ((uint64_t)offset / 4 * (uint64_t)bitsPerItem) % 32; 395 | ++offset; 396 | 397 | bitsAvailable = 32 - startBit; 398 | 399 | if (bitsPerItem <= bitsAvailable) 400 | { 401 | out[0] = (out[0] & ~(mask << startBit)) | (value << startBit); 402 | } 403 | else 404 | { 405 | // value spans 2 buffer cells 406 | uint32_t low, high; 407 | 408 | low = value << startBit; 409 | high = value >> bitsAvailable; 410 | 411 | out[0] = (out[0] & ~(mask << startBit)) | low; 412 | 413 | out[4] = (out[4] & ~(mask >> bitsAvailable)) | high; 414 | } 415 | } 416 | } 417 | 418 | static void __PackedArray_unpack_scalar(const uint32_t* buffer, const uint32_t bitsPerItem, const uint32_t mask, uint32_t offset, uint32_t* out, uint32_t count) 419 | { 420 | const uint32_t* __restrict in; 421 | uint32_t startBit; 422 | uint32_t bitsAvailable; 423 | 424 | PACKEDARRAY_ASSERT(buffer != NULL); 425 | PACKEDARRAY_ASSERT(out != NULL); 426 | PACKEDARRAY_ASSERT(count != 0); 427 | 428 | while (count--) 429 | { 430 | uint32_t value; 431 | 432 | in = &buffer[((uint64_t)offset / 4 * (uint64_t)bitsPerItem) / 32 * 4 + offset % 4]; 433 | startBit = ((uint64_t)offset / 4 * (uint64_t)bitsPerItem) % 32; 434 | ++offset; 435 | 436 | bitsAvailable = 32 - startBit; 437 | 438 | if (bitsPerItem <= bitsAvailable) 439 | { 440 | value = (in[0] >> startBit) & mask; 441 | } 442 | else 443 | { 444 | // value spans 2 buffer cells 445 | uint32_t low, high; 446 | 447 | low = in[0] >> startBit; 448 | high = in[4] << bitsAvailable; 449 | 450 | value = (low | high) & mask; 451 | } 452 | *out++ = value; 453 | } 454 | } 455 | 456 | #if defined(__SSE2__) || defined(_M_IX86) || defined(_M_X64) 457 | 458 | #include 459 | 460 | #define PackedArray_uint32x4_t __m128i 461 | #define PackedArray_uint32x4_zero _mm_setzero_si128() 462 | #define PackedArray_set_uint32x4(i) _mm_set1_epi32(i) 463 | #define PackedArray_sub_uint32x4(lhs, rhs) _mm_sub_epi32(lhs, rhs) 464 | #define PackedArray_loadu_uint32x4(ptr) _mm_loadu_si128((const __m128i*)ptr) 465 | #define PackedArray_storeu_uint32x4(ptr, v) _mm_storeu_si128((__m128i*)ptr, v) 466 | #define PackedArray_load_uint32x4(ptr) _mm_load_si128((const __m128i*)ptr) 467 | #define PackedArray_store_uint32x4(ptr, v) _mm_store_si128((__m128i*)ptr, v) 468 | #define PackedArray_shl_uint32x4(v, shift) _mm_slli_epi32(v, shift) 469 | #define PackedArray_shr_uint32x4(v, shift) _mm_srli_epi32(v, shift) 470 | #define PackedArray_or_uint32x4(lhs, rhs) _mm_or_si128(lhs, rhs) 471 | #define PackedArray_and_uint32x4(lhs, rhs) _mm_and_si128(lhs, rhs) 472 | #define PackedArray_andnot_uint32x4(lhs, rhs) _mm_andnot_si128(rhs, lhs) 473 | // assumes dst bits are cleared at inserted bit positions 474 | #define PackedArray_vsli0_uint32x4(dst, src, shift) PackedArray_or_uint32x4(dst, PackedArray_shl_uint32x4(src, shift)) 475 | 476 | #elif defined(__ARM_NEON__) || defined(_M_ARM) 477 | 478 | #include 479 | 480 | #define PackedArray_uint32x4_t uint32x4_t 481 | #define PackedArray_uint32x4_zero vdupq_n_u32(0) 482 | #define PackedArray_set_uint32x4(i) vdupq_n_u32(i) 483 | #define PackedArray_sub_uint32x4(lhs, rhs) vsubq_u32(lhs, rhs) 484 | #define PackedArray_loadu_uint32x4(ptr) vld1q_u32((const uint32_t*)ptr) 485 | #define PackedArray_storeu_uint32x4(ptr, v) vst1q_u32(ptr, v) 486 | #if defined(__GNUC__) 487 | // because __builtin_assume_aligned isn't always available... 488 | 489 | typedef uint32_t __attribute__((aligned(16))) PackedArray_aligned_uint32_t; 490 | #define PackedArray_load_uint32x4(ptr) vld1q_u32((const PackedArray_aligned_uint32_t*)ptr) 491 | #define PackedArray_store_uint32x4(ptr, v) vst1q_u32((PackedArray_aligned_uint32_t*)ptr, v) 492 | #else 493 | #define PackedArray_load_uint32x4(ptr) vld1q_u32((const uint32_t*)ptr) 494 | #define PackedArray_store_uint32x4(ptr, v) vst1q_u32(ptr, v) 495 | #endif 496 | #define PackedArray_shl_uint32x4(v, shift) vshlq_u32(v, vdupq_n_s32(shift)) 497 | #define PackedArray_shr_uint32x4(v, shift) vshlq_u32(v, vdupq_n_s32(-shift)) 498 | #define PackedArray_or_uint32x4(lhs, rhs) vorrq_u32(lhs, rhs) 499 | #define PackedArray_and_uint32x4(lhs, rhs) vandq_u32(lhs, rhs) 500 | #define PackedArray_andnot_uint32x4(lhs, rhs) vbicq_u32(lhs, rhs) 501 | // assumes dst bits are cleared at inserted bit positions 502 | #define PackedArray_vsli0_uint32x4(dst, src, shift) vsliq_n_u32(dst, src, shift) 503 | 504 | #else 505 | 506 | #error unsupported SIMD platform 507 | 508 | #endif 509 | 510 | #define PACKEDARRAY_IMPL 511 | #define PACKEDARRAY_IMPL_BITS_PER_ITEM 1 512 | #include PACKEDARRAY_SELF 513 | #define PACKEDARRAY_IMPL_BITS_PER_ITEM 2 514 | #include PACKEDARRAY_SELF 515 | #define PACKEDARRAY_IMPL_BITS_PER_ITEM 3 516 | #include PACKEDARRAY_SELF 517 | #define PACKEDARRAY_IMPL_BITS_PER_ITEM 4 518 | #include PACKEDARRAY_SELF 519 | #define PACKEDARRAY_IMPL_BITS_PER_ITEM 5 520 | #include PACKEDARRAY_SELF 521 | #define PACKEDARRAY_IMPL_BITS_PER_ITEM 6 522 | #include PACKEDARRAY_SELF 523 | #define PACKEDARRAY_IMPL_BITS_PER_ITEM 7 524 | #include PACKEDARRAY_SELF 525 | #define PACKEDARRAY_IMPL_BITS_PER_ITEM 8 526 | #include PACKEDARRAY_SELF 527 | #define PACKEDARRAY_IMPL_BITS_PER_ITEM 9 528 | #include PACKEDARRAY_SELF 529 | #define PACKEDARRAY_IMPL_BITS_PER_ITEM 10 530 | #include PACKEDARRAY_SELF 531 | #define PACKEDARRAY_IMPL_BITS_PER_ITEM 11 532 | #include PACKEDARRAY_SELF 533 | #define PACKEDARRAY_IMPL_BITS_PER_ITEM 12 534 | #include PACKEDARRAY_SELF 535 | #define PACKEDARRAY_IMPL_BITS_PER_ITEM 13 536 | #include PACKEDARRAY_SELF 537 | #define PACKEDARRAY_IMPL_BITS_PER_ITEM 14 538 | #include PACKEDARRAY_SELF 539 | #define PACKEDARRAY_IMPL_BITS_PER_ITEM 15 540 | #include PACKEDARRAY_SELF 541 | #define PACKEDARRAY_IMPL_BITS_PER_ITEM 16 542 | #include PACKEDARRAY_SELF 543 | #define PACKEDARRAY_IMPL_BITS_PER_ITEM 17 544 | #include PACKEDARRAY_SELF 545 | #define PACKEDARRAY_IMPL_BITS_PER_ITEM 18 546 | #include PACKEDARRAY_SELF 547 | #define PACKEDARRAY_IMPL_BITS_PER_ITEM 19 548 | #include PACKEDARRAY_SELF 549 | #define PACKEDARRAY_IMPL_BITS_PER_ITEM 20 550 | #include PACKEDARRAY_SELF 551 | #define PACKEDARRAY_IMPL_BITS_PER_ITEM 21 552 | #include PACKEDARRAY_SELF 553 | #define PACKEDARRAY_IMPL_BITS_PER_ITEM 22 554 | #include PACKEDARRAY_SELF 555 | #define PACKEDARRAY_IMPL_BITS_PER_ITEM 23 556 | #include PACKEDARRAY_SELF 557 | #define PACKEDARRAY_IMPL_BITS_PER_ITEM 24 558 | #include PACKEDARRAY_SELF 559 | #define PACKEDARRAY_IMPL_BITS_PER_ITEM 25 560 | #include PACKEDARRAY_SELF 561 | #define PACKEDARRAY_IMPL_BITS_PER_ITEM 26 562 | #include PACKEDARRAY_SELF 563 | #define PACKEDARRAY_IMPL_BITS_PER_ITEM 27 564 | #include PACKEDARRAY_SELF 565 | #define PACKEDARRAY_IMPL_BITS_PER_ITEM 28 566 | #include PACKEDARRAY_SELF 567 | #define PACKEDARRAY_IMPL_BITS_PER_ITEM 29 568 | #include PACKEDARRAY_SELF 569 | #define PACKEDARRAY_IMPL_BITS_PER_ITEM 30 570 | #include PACKEDARRAY_SELF 571 | #define PACKEDARRAY_IMPL_BITS_PER_ITEM 31 572 | #include PACKEDARRAY_SELF 573 | #define PACKEDARRAY_IMPL_BITS_PER_ITEM 32 574 | #include PACKEDARRAY_SELF 575 | #undef PACKEDARRAY_IMPL 576 | 577 | 578 | #if !defined(PACKEDARRAY_ALIGNED_MALLOC) || !defined(PACKEDARRAY_FREE) 579 | #include 580 | #endif 581 | 582 | #if !defined(PACKEDARRAY_ALIGNED_MALLOC) 583 | #if defined (_MSC_VER) 584 | #define PACKEDARRAY_ALIGNED_MALLOC(alignment, size) _aligned_malloc(size, alignment) 585 | #elif defined (ANDROID) || defined (__ANDROID__) 586 | #define PACKEDARRAY_ALIGNED_MALLOC(alignment, size) memalign(alignment, size) 587 | #else 588 | static void* __PackedArray_aligned_malloc(size_t alignment, size_t size) 589 | { 590 | void* p = NULL; 591 | posix_memalign(&p, alignment, size); 592 | return p; 593 | } 594 | #define PACKEDARRAY_ALIGNED_MALLOC(alignment, size) __PackedArray_aligned_malloc(alignment, size) 595 | #endif 596 | #endif 597 | 598 | #if !defined(PACKEDARRAY_FREE) 599 | #if defined (_MSC_VER) 600 | #define PACKEDARRAY_FREE(p) _aligned_free(p) 601 | #else 602 | #define PACKEDARRAY_FREE(p) free(p) 603 | #endif 604 | #endif 605 | 606 | PackedArray* PackedArray_create(uint32_t bitsPerItem, uint32_t count) 607 | { 608 | PackedArray* a; 609 | size_t bufferSize; 610 | 611 | PACKEDARRAY_ASSERT(bitsPerItem > 0); 612 | PACKEDARRAY_ASSERT(bitsPerItem <= 32); 613 | 614 | bufferSize = sizeof(uint32_t) * (((uint64_t)count / 4 * (uint64_t)bitsPerItem + 31) / 32 * 4); 615 | bufferSize += count < 4 ? sizeof(uint32_t) * count : sizeof(uint32_t) * 4; 616 | a = (PackedArray*)PACKEDARRAY_ALIGNED_MALLOC(16, sizeof(PackedArray) + bufferSize); 617 | PACKEDARRAY_ASSERT((uint64_t)a->buffer % 16 == 0); 618 | 619 | if (a != NULL) 620 | { 621 | a->buffer[bufferSize / sizeof(uint32_t) - 1] = 0; 622 | a->bitsPerItem = bitsPerItem; 623 | a->count = count; 624 | } 625 | 626 | return a; 627 | } 628 | 629 | void PackedArray_destroy(PackedArray* a) 630 | { 631 | PACKEDARRAY_ASSERT(a); 632 | PACKEDARRAY_FREE(a); 633 | } 634 | 635 | void PackedArray_pack(PackedArray* a, const uint32_t offset, const uint32_t* in, uint32_t count) 636 | { 637 | PACKEDARRAY_ASSERT(a != NULL); 638 | PACKEDARRAY_ASSERT(in != NULL); 639 | 640 | switch (a->bitsPerItem) 641 | { 642 | case 1: __PackedArray_pack_1(a->buffer, offset, in, count); break; 643 | case 2: __PackedArray_pack_2(a->buffer, offset, in, count); break; 644 | case 3: __PackedArray_pack_3(a->buffer, offset, in, count); break; 645 | case 4: __PackedArray_pack_4(a->buffer, offset, in, count); break; 646 | case 5: __PackedArray_pack_5(a->buffer, offset, in, count); break; 647 | case 6: __PackedArray_pack_6(a->buffer, offset, in, count); break; 648 | case 7: __PackedArray_pack_7(a->buffer, offset, in, count); break; 649 | case 8: __PackedArray_pack_8(a->buffer, offset, in, count); break; 650 | case 9: __PackedArray_pack_9(a->buffer, offset, in, count); break; 651 | case 10: __PackedArray_pack_10(a->buffer, offset, in, count); break; 652 | case 11: __PackedArray_pack_11(a->buffer, offset, in, count); break; 653 | case 12: __PackedArray_pack_12(a->buffer, offset, in, count); break; 654 | case 13: __PackedArray_pack_13(a->buffer, offset, in, count); break; 655 | case 14: __PackedArray_pack_14(a->buffer, offset, in, count); break; 656 | case 15: __PackedArray_pack_15(a->buffer, offset, in, count); break; 657 | case 16: __PackedArray_pack_16(a->buffer, offset, in, count); break; 658 | case 17: __PackedArray_pack_17(a->buffer, offset, in, count); break; 659 | case 18: __PackedArray_pack_18(a->buffer, offset, in, count); break; 660 | case 19: __PackedArray_pack_19(a->buffer, offset, in, count); break; 661 | case 20: __PackedArray_pack_20(a->buffer, offset, in, count); break; 662 | case 21: __PackedArray_pack_21(a->buffer, offset, in, count); break; 663 | case 22: __PackedArray_pack_22(a->buffer, offset, in, count); break; 664 | case 23: __PackedArray_pack_23(a->buffer, offset, in, count); break; 665 | case 24: __PackedArray_pack_24(a->buffer, offset, in, count); break; 666 | case 25: __PackedArray_pack_25(a->buffer, offset, in, count); break; 667 | case 26: __PackedArray_pack_26(a->buffer, offset, in, count); break; 668 | case 27: __PackedArray_pack_27(a->buffer, offset, in, count); break; 669 | case 28: __PackedArray_pack_28(a->buffer, offset, in, count); break; 670 | case 29: __PackedArray_pack_29(a->buffer, offset, in, count); break; 671 | case 30: __PackedArray_pack_30(a->buffer, offset, in, count); break; 672 | case 31: __PackedArray_pack_31(a->buffer, offset, in, count); break; 673 | case 32: __PackedArray_pack_32(a->buffer, offset, in, count); break; 674 | } 675 | } 676 | 677 | void PackedArray_unpack(const PackedArray* a, const uint32_t offset, uint32_t* out, uint32_t count) 678 | { 679 | PACKEDARRAY_ASSERT(a != NULL); 680 | PACKEDARRAY_ASSERT(out != NULL); 681 | 682 | switch (a->bitsPerItem) 683 | { 684 | case 1: __PackedArray_unpack_1(a->buffer, offset, out, count); break; 685 | case 2: __PackedArray_unpack_2(a->buffer, offset, out, count); break; 686 | case 3: __PackedArray_unpack_3(a->buffer, offset, out, count); break; 687 | case 4: __PackedArray_unpack_4(a->buffer, offset, out, count); break; 688 | case 5: __PackedArray_unpack_5(a->buffer, offset, out, count); break; 689 | case 6: __PackedArray_unpack_6(a->buffer, offset, out, count); break; 690 | case 7: __PackedArray_unpack_7(a->buffer, offset, out, count); break; 691 | case 8: __PackedArray_unpack_8(a->buffer, offset, out, count); break; 692 | case 9: __PackedArray_unpack_9(a->buffer, offset, out, count); break; 693 | case 10: __PackedArray_unpack_10(a->buffer, offset, out, count); break; 694 | case 11: __PackedArray_unpack_11(a->buffer, offset, out, count); break; 695 | case 12: __PackedArray_unpack_12(a->buffer, offset, out, count); break; 696 | case 13: __PackedArray_unpack_13(a->buffer, offset, out, count); break; 697 | case 14: __PackedArray_unpack_14(a->buffer, offset, out, count); break; 698 | case 15: __PackedArray_unpack_15(a->buffer, offset, out, count); break; 699 | case 16: __PackedArray_unpack_16(a->buffer, offset, out, count); break; 700 | case 17: __PackedArray_unpack_17(a->buffer, offset, out, count); break; 701 | case 18: __PackedArray_unpack_18(a->buffer, offset, out, count); break; 702 | case 19: __PackedArray_unpack_19(a->buffer, offset, out, count); break; 703 | case 20: __PackedArray_unpack_20(a->buffer, offset, out, count); break; 704 | case 21: __PackedArray_unpack_21(a->buffer, offset, out, count); break; 705 | case 22: __PackedArray_unpack_22(a->buffer, offset, out, count); break; 706 | case 23: __PackedArray_unpack_23(a->buffer, offset, out, count); break; 707 | case 24: __PackedArray_unpack_24(a->buffer, offset, out, count); break; 708 | case 25: __PackedArray_unpack_25(a->buffer, offset, out, count); break; 709 | case 26: __PackedArray_unpack_26(a->buffer, offset, out, count); break; 710 | case 27: __PackedArray_unpack_27(a->buffer, offset, out, count); break; 711 | case 28: __PackedArray_unpack_28(a->buffer, offset, out, count); break; 712 | case 29: __PackedArray_unpack_29(a->buffer, offset, out, count); break; 713 | case 30: __PackedArray_unpack_30(a->buffer, offset, out, count); break; 714 | case 31: __PackedArray_unpack_31(a->buffer, offset, out, count); break; 715 | case 32: __PackedArray_unpack_32(a->buffer, offset, out, count); break; 716 | } 717 | } 718 | 719 | void PackedArray_set(PackedArray* a, const uint32_t offset, const uint32_t in) 720 | { 721 | uint32_t* __restrict out; 722 | uint32_t bitsPerItem; 723 | uint32_t startBit; 724 | uint32_t bitsAvailable; 725 | uint32_t mask; 726 | 727 | PACKEDARRAY_ASSERT(a != NULL); 728 | 729 | bitsPerItem = a->bitsPerItem; 730 | 731 | out = &a->buffer[4 * (((uint64_t)offset / 4 * (uint64_t)bitsPerItem) / 32) + (offset % 4)]; 732 | startBit = ((uint64_t)offset / 4 * (uint64_t)bitsPerItem) % 32; 733 | 734 | bitsAvailable = 32 - startBit; 735 | 736 | mask = (uint32_t)(1ULL << bitsPerItem) - 1; 737 | PACKEDARRAY_ASSERT(0 == (~mask & in)); 738 | 739 | if (bitsPerItem <= bitsAvailable) 740 | { 741 | out[0] = (out[0] & ~(mask << startBit)) | (in << startBit); 742 | } 743 | else 744 | { 745 | // value spans 2 buffer cells 746 | uint32_t low, high; 747 | 748 | low = in << startBit; 749 | high = in >> bitsAvailable; 750 | 751 | out[0] = (out[0] & ~(mask << startBit)) | low; 752 | 753 | out[4] = (out[4] & ~(mask >> (32 - startBit))) | high; 754 | } 755 | } 756 | 757 | uint32_t PackedArray_get(const PackedArray* a, const uint32_t offset) 758 | { 759 | const uint32_t* __restrict in; 760 | uint32_t bitsPerItem; 761 | uint32_t startBit; 762 | uint32_t bitsAvailable; 763 | uint32_t mask; 764 | uint32_t out; 765 | 766 | PACKEDARRAY_ASSERT(a != NULL); 767 | 768 | bitsPerItem = a->bitsPerItem; 769 | 770 | in = &a->buffer[4 * (((uint64_t)offset / 4 * (uint64_t)bitsPerItem) / 32) + (offset % 4)]; 771 | startBit = ((uint64_t)offset / 4 * (uint64_t)bitsPerItem) % 32; 772 | 773 | bitsAvailable = 32 - startBit; 774 | 775 | mask = (uint32_t)(1ULL << bitsPerItem) - 1; 776 | 777 | if (bitsPerItem <= bitsAvailable) 778 | { 779 | out = (in[0] >> startBit) & mask; 780 | } 781 | else 782 | { 783 | // value spans 2 buffer cells 784 | uint32_t low, high; 785 | 786 | low = in[0] >> startBit; 787 | high = in[4] << (32 - startBit); 788 | 789 | out = low ^ ((low ^ high) & (mask >> bitsAvailable << bitsAvailable)); 790 | } 791 | 792 | return out; 793 | } 794 | 795 | uint32_t PackedArray_bufferSize(const PackedArray* a) 796 | { 797 | size_t bufferSize; 798 | uint32_t bitsPerItem, count; 799 | 800 | PACKEDARRAY_ASSERT(a != NULL); 801 | 802 | bitsPerItem = a->bitsPerItem; 803 | count = a->count; 804 | 805 | bufferSize = ((uint64_t)count / 4 * (uint64_t)bitsPerItem + 31) / 32 * 4; 806 | bufferSize += count < 4 ? count : 4; 807 | 808 | return (uint32_t)bufferSize; 809 | } 810 | 811 | #if !(defined(_MSC_VER) && _MSC_VER >= 1400) && !defined(__GNUC__) 812 | // log base 2 of an integer, aka the position of the highest bit set 813 | static uint32_t __PackedArray_log2(uint32_t v) 814 | { 815 | // references 816 | // http://aggregate.org/MAGIC 817 | // http://graphics.stanford.edu/~seander/bithacks.html 818 | 819 | static const uint32_t multiplyDeBruijnBitPosition[32] = 820 | { 821 | 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 822 | 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 823 | }; 824 | 825 | v |= v >> 1; 826 | v |= v >> 2; 827 | v |= v >> 4; 828 | v |= v >> 8; 829 | v |= v >> 16; 830 | 831 | return multiplyDeBruijnBitPosition[(uint32_t)(v * 0x7C4ACDDU) >> 27]; 832 | } 833 | #endif 834 | 835 | // position of the highest bit set 836 | static int __PackedArray_highestBitSet(uint32_t v) 837 | { 838 | #if defined(_MSC_VER) && _MSC_VER >= 1400 839 | unsigned long index; 840 | return _BitScanReverse(&index, v) ? index : -1; 841 | #elif defined(__GNUC__) 842 | return v == 0 ? -1 : 31 - __builtin_clz(v); 843 | #else 844 | return v != 0 ? __PackedArray_log2(v) : -1; 845 | #endif 846 | } 847 | 848 | uint32_t PackedArray_computeBitsPerItem(const uint32_t* in, uint32_t count) 849 | { 850 | uint32_t i, in_max, bitsPerItem; 851 | 852 | in_max = 0; 853 | for (i = 0; i < count; ++i) 854 | in_max = in[i] > in_max ? in[i] : in_max; 855 | 856 | bitsPerItem = __PackedArray_highestBitSet(in_max) + 1; 857 | return bitsPerItem == 0 ? 1 : bitsPerItem; 858 | } 859 | 860 | 861 | // - 8< ------------------------------------------------------------------------ 862 | 863 | #if defined(PACKEDARRAY_SELF_TEST) && defined(PACKEDARRAY_SELF_BENCH) 864 | #error choose either PACKEDARRAY_SELF_TEST or PACKEDARRAY_SELF_BENCH 865 | #endif 866 | 867 | #if defined(PACKEDARRAY_SELF_TEST) 868 | 869 | #undef NDEBUG // we want asserts 870 | #include 871 | 872 | #include 873 | #include // memcmp 874 | 875 | static void PackedArray_pack_reference(PackedArray* a, uint32_t offset, const uint32_t* in, uint32_t count) 876 | { 877 | uint32_t* __restrict out; 878 | uint32_t bitsPerItem; 879 | uint32_t startBit; 880 | uint32_t bitsAvailable; 881 | uint32_t mask; 882 | 883 | assert(a != NULL); 884 | assert(in != NULL); 885 | 886 | bitsPerItem = a->bitsPerItem; 887 | 888 | mask = (uint32_t)((1ULL << bitsPerItem) - 1); 889 | 890 | while (count--) 891 | { 892 | uint32_t value = *in++; 893 | 894 | assert(0 == (~mask & value)); 895 | 896 | out = &a->buffer[((uint64_t)offset / 4 * (uint64_t)bitsPerItem) / 32 * 4 + offset % 4]; 897 | startBit = ((uint64_t)offset / 4 * (uint64_t)bitsPerItem) % 32; 898 | ++offset; 899 | 900 | bitsAvailable = 32 - startBit; 901 | 902 | if (bitsPerItem <= bitsAvailable) 903 | { 904 | out[0] = (out[0] & ~(mask << startBit)) | (value << startBit); 905 | } 906 | else 907 | { 908 | // value spans 2 buffer cells 909 | uint32_t low, high; 910 | 911 | low = value << startBit; 912 | high = value >> bitsAvailable; 913 | 914 | out[0] = (out[0] & ~(mask << startBit)) | low; 915 | 916 | out[4] = (out[4] & ~(mask >> bitsAvailable)) | high; 917 | } 918 | } 919 | } 920 | 921 | static void PackedArray_unpack_reference(const PackedArray* a, uint32_t offset, uint32_t* out, uint32_t count) 922 | { 923 | const uint32_t* __restrict in; 924 | uint32_t bitsPerItem; 925 | uint32_t startBit; 926 | uint32_t bitsAvailable; 927 | uint32_t mask; 928 | 929 | assert(a != NULL); 930 | assert(out != NULL); 931 | 932 | bitsPerItem = a->bitsPerItem; 933 | 934 | mask = (uint32_t)((1ULL << bitsPerItem) - 1); 935 | 936 | while (count--) 937 | { 938 | uint32_t value; 939 | 940 | in = &a->buffer[((uint64_t)offset / 4 * (uint64_t)bitsPerItem) / 32 * 4 + offset % 4]; 941 | startBit = ((uint64_t)offset / 4 * (uint64_t)bitsPerItem) % 32; 942 | ++offset; 943 | 944 | bitsAvailable = 32 - startBit; 945 | 946 | if (bitsPerItem <= bitsAvailable) 947 | { 948 | value = (in[0] >> startBit) & mask; 949 | } 950 | else 951 | { 952 | // value spans 2 buffer cells 953 | uint32_t low, high; 954 | 955 | low = in[0] >> startBit; 956 | high = in[4] << bitsAvailable; 957 | 958 | value = low ^ ((low ^ high) & (mask >> bitsAvailable << bitsAvailable)); 959 | } 960 | *out++ = value; 961 | } 962 | } 963 | 964 | int main(void) 965 | { 966 | uint32_t bitsPerItem; 967 | 968 | printf("-- PackedArray self test -------------------------------------------------------\n"); 969 | printf("\n"); 970 | 971 | printf("sizeof(PackedArray) = %d\n", (int)sizeof(PackedArray)); 972 | printf("\n"); 973 | 974 | printf("1 by 1 packing / unpacking:\n"); 975 | for (bitsPerItem = 1; bitsPerItem <= 32; ++bitsPerItem) 976 | { 977 | uint32_t mask = (uint32_t)(1ULL << bitsPerItem) - 1; 978 | int count; 979 | 980 | for (count = 1; count <= 1024; ++count) 981 | { 982 | PackedArray* a1 = PackedArray_create(bitsPerItem, count); 983 | PackedArray* a2 = PackedArray_create(bitsPerItem, count); 984 | int i; 985 | 986 | assert(a1->count == (uint32_t)count); 987 | assert(a2->count == (uint32_t)count); 988 | assert(a1->bitsPerItem == bitsPerItem); 989 | assert(a2->bitsPerItem == bitsPerItem); 990 | assert(PackedArray_bufferSize(a1) == PackedArray_bufferSize(a2)); 991 | 992 | for (i = 0; i < (int)PackedArray_bufferSize(a1); ++i) 993 | a1->buffer[i] = a2->buffer[i] = rand(); 994 | 995 | for (i = 0; i < count; ++i) 996 | { 997 | uint32_t v1, v2; 998 | 999 | v1 = rand() & mask; 1000 | v2 = v1 + 1; 1001 | PackedArray_pack(a1, i, &v1, 1); 1002 | PackedArray_pack_reference(a2, i, &v1, 1); 1003 | assert(memcmp(a1->buffer, a2->buffer, sizeof(a1->buffer[0]) * PackedArray_bufferSize(a1)) == 0); 1004 | 1005 | PackedArray_set(a1, i, v1); 1006 | assert(memcmp(a1->buffer, a2->buffer, sizeof(a1->buffer[0]) * PackedArray_bufferSize(a1)) == 0); 1007 | 1008 | PackedArray_unpack(a1, i, &v2, 1); 1009 | assert(v1 == v2); 1010 | PackedArray_unpack_reference(a2, i, &v2, 1); 1011 | assert(v1 == v2); 1012 | v2 = PackedArray_get(a2, i); 1013 | assert(v1 == v2); 1014 | } 1015 | 1016 | for (i = count - 1; i >= 0; --i) 1017 | { 1018 | uint32_t v1, v2; 1019 | 1020 | v1 = rand() & mask; 1021 | v2 = v1 + 1; 1022 | PackedArray_pack(a1, i, &v1, 1); 1023 | PackedArray_pack_reference(a2, i, &v1, 1); 1024 | assert(memcmp(a1->buffer, a2->buffer, sizeof(a1->buffer[0]) * PackedArray_bufferSize(a1)) == 0); 1025 | 1026 | PackedArray_set(a1, i, v1); 1027 | assert(memcmp(a1->buffer, a2->buffer, sizeof(a1->buffer[0]) * PackedArray_bufferSize(a1)) == 0); 1028 | 1029 | PackedArray_unpack(a1, i, &v2, 1); 1030 | assert(v1 == v2); 1031 | PackedArray_unpack_reference(a2, i, &v2, 1); 1032 | assert(v1 == v2); 1033 | v2 = PackedArray_get(a2, i); 1034 | assert(v1 == v2); 1035 | } 1036 | 1037 | PackedArray_destroy(a1); 1038 | PackedArray_destroy(a2); 1039 | } 1040 | printf(" %2d bits per item -- success.\n", bitsPerItem); 1041 | } 1042 | 1043 | printf("\n"); 1044 | printf("bulk packing / unpacking:\n"); 1045 | for (bitsPerItem = 1; bitsPerItem <= 32; ++bitsPerItem) 1046 | { 1047 | uint32_t mask = (uint32_t)(1ULL << bitsPerItem) - 1; 1048 | int count; 1049 | 1050 | for (count = 1; count <= 128; ++count) 1051 | { 1052 | uint32_t* v1; 1053 | uint32_t* v2; 1054 | PackedArray* a1; 1055 | PackedArray* a2; 1056 | int i, j; 1057 | 1058 | v1 = (uint32_t*)malloc(sizeof(uint32_t) * count); 1059 | assert(v1 != NULL); 1060 | v2 = (uint32_t*)malloc(sizeof(uint32_t) * count); 1061 | assert(v2 != NULL); 1062 | 1063 | a1 = PackedArray_create(bitsPerItem, count); 1064 | assert(a1 != NULL); 1065 | a2 = PackedArray_create(bitsPerItem, count); 1066 | assert(a2 != NULL); 1067 | 1068 | for (i = 0; i < (int)PackedArray_bufferSize(a1); ++i) 1069 | a1->buffer[i] = a2->buffer[i] = rand(); 1070 | 1071 | for (i = 0; i < count; ++i) 1072 | v1[i] = rand() & mask; 1073 | 1074 | assert(bitsPerItem >= PackedArray_computeBitsPerItem(v1, count)); 1075 | 1076 | for (i = 0; i < count; ++i) 1077 | { 1078 | for (j = 1; j <= count - i; ++j) 1079 | { 1080 | PackedArray_pack(a1, i, v1, j); 1081 | PackedArray_pack_reference(a2, i, v1, j); 1082 | assert(memcmp(a1->buffer, a2->buffer, sizeof(a1->buffer[0]) * PackedArray_bufferSize(a1)) == 0); 1083 | 1084 | PackedArray_unpack(a1, i, v2, j); 1085 | assert(bitsPerItem >= PackedArray_computeBitsPerItem(v2, j)); 1086 | assert(memcmp(v1, v2, j * sizeof(uint32_t)) == 0); 1087 | PackedArray_unpack_reference(a2, i, v2, j); 1088 | assert(memcmp(v1, v2, j * sizeof(uint32_t)) == 0); 1089 | } 1090 | } 1091 | 1092 | PackedArray_destroy(a1); 1093 | PackedArray_destroy(a2); 1094 | free(v1); 1095 | free(v2); 1096 | } 1097 | printf(" %2d bits per item -- success.\n", bitsPerItem); 1098 | } 1099 | 1100 | return 0; 1101 | } 1102 | 1103 | #elif defined(PACKEDARRAY_SELF_BENCH) // #if defined(PACKEDARRAY_SELF_TEST) 1104 | 1105 | #ifndef NDEBUG 1106 | #error please define NDEBUG to inhibit asserts when compiling the benchmark 1107 | #endif 1108 | 1109 | #include 1110 | #include 1111 | #include 1112 | 1113 | #ifndef MIN 1114 | #define MIN(x, y) (((x) < (y)) ? (x) : (y)) 1115 | #endif 1116 | 1117 | #ifndef MAX 1118 | #define MAX(x, y) (((x) < (y)) ? (y) : (x)) 1119 | #endif 1120 | 1121 | #ifdef _MSC_VER 1122 | #pragma warning(push, 3) 1123 | #include 1124 | #pragma warning(pop) 1125 | static double getChronometerTime(void) 1126 | { 1127 | LARGE_INTEGER frequency; 1128 | LARGE_INTEGER t; 1129 | 1130 | QueryPerformanceFrequency(&frequency); 1131 | QueryPerformanceCounter(&t); 1132 | 1133 | return (double)t.QuadPart / (double)frequency.QuadPart * 1000; 1134 | } 1135 | 1136 | #else 1137 | #include 1138 | 1139 | static double getChronometerTime() 1140 | { 1141 | struct timeval now = { 0 }; 1142 | gettimeofday(&now, NULL); 1143 | 1144 | return (double)now.tv_sec + (double)now.tv_usec * 1e-6; 1145 | } 1146 | #endif 1147 | 1148 | #define LOOP_COUNT 1000 1149 | static double bench_memcpy(uint32_t* in, uint32_t* out, uint32_t count) 1150 | { 1151 | double start, end; 1152 | uint32_t i; 1153 | 1154 | start = getChronometerTime(); 1155 | 1156 | for (i = 0; i < LOOP_COUNT; ++i) 1157 | memcpy(out, in, count * sizeof(uint32_t)); 1158 | 1159 | end = getChronometerTime(); 1160 | 1161 | return 1e6 * (end - start) / LOOP_COUNT; 1162 | } 1163 | 1164 | static double bench_loopcpy(uint32_t* in, uint32_t* out, uint32_t count) 1165 | { 1166 | double start, end; 1167 | uint32_t i; 1168 | 1169 | start = getChronometerTime(); 1170 | 1171 | for (i = 0; i < LOOP_COUNT; ++i) 1172 | { 1173 | uint32_t j; 1174 | 1175 | for (j = 0; j < count; ++j) 1176 | out[j] = in[j]; 1177 | } 1178 | 1179 | end = getChronometerTime(); 1180 | 1181 | return 1e6 * (end - start) / LOOP_COUNT; 1182 | } 1183 | 1184 | static double bench_pack(uint32_t* in, PackedArray* out, uint32_t count) 1185 | { 1186 | double start, end; 1187 | int i; 1188 | 1189 | start = getChronometerTime(); 1190 | 1191 | for (i = 0; i < LOOP_COUNT; ++i) 1192 | PackedArray_pack(out, 0, in, count); 1193 | 1194 | end = getChronometerTime(); 1195 | 1196 | return 1e6 * (end - start) / LOOP_COUNT; 1197 | } 1198 | 1199 | static double bench_unpack(PackedArray* in, uint32_t* out, uint32_t count) 1200 | { 1201 | double start, end; 1202 | int i; 1203 | 1204 | start = getChronometerTime(); 1205 | 1206 | for (i = 0; i < LOOP_COUNT; ++i) 1207 | PackedArray_unpack(in, 0, out, count); 1208 | 1209 | end = getChronometerTime(); 1210 | 1211 | return 1e6 * (end - start) / LOOP_COUNT; 1212 | } 1213 | 1214 | #define MAX_ELEMENT_COUNT (1 << 18) 1215 | #define LOG2_MAX_ELEMENT_COUNT 18 1216 | int main(void) 1217 | { 1218 | double start, end; 1219 | uint32_t* b1; 1220 | uint32_t* b2; 1221 | uint32_t count, bitsPerItem; 1222 | PackedArray** packed; 1223 | uint32_t i; 1224 | double* speed_memcpy; 1225 | double avg_memcpy, min_memcpy, max_memcpy; 1226 | double* speed_loopcpy; 1227 | double avg_loopcpy, min_loopcpy, max_loopcpy; 1228 | double* speed_pack[32]; 1229 | double avg_pack, min_pack, max_pack; 1230 | double* speed_unpack[32]; 1231 | double avg_unpack, min_unpack, max_unpack; 1232 | 1233 | printf("-- PackedArray self bench ------------------------------------------------------\n"); 1234 | 1235 | start = getChronometerTime(); 1236 | 1237 | b1 = (uint32_t*)malloc(sizeof(uint32_t) * MAX_ELEMENT_COUNT); 1238 | assert(b1 != NULL); 1239 | b2 = (uint32_t*)malloc(sizeof(uint32_t) * MAX_ELEMENT_COUNT); 1240 | assert(b2 != NULL); 1241 | 1242 | packed = (PackedArray**)malloc(sizeof(PackedArray*) * 32); 1243 | assert(packed != NULL); 1244 | for (i = 0; i < 32; ++i) 1245 | packed[i] = PackedArray_create(i + 1, MAX_ELEMENT_COUNT); 1246 | 1247 | for (i = 0; i < MAX_ELEMENT_COUNT; ++i) 1248 | b1[i] = rand(); 1249 | 1250 | speed_memcpy = (double*)malloc(sizeof(double) * (LOG2_MAX_ELEMENT_COUNT + 1)); 1251 | assert(speed_memcpy != NULL); 1252 | avg_memcpy = 0; 1253 | min_memcpy = DBL_MAX; 1254 | max_memcpy = 0; 1255 | 1256 | printf("memcpy:\n"); 1257 | printf("bits\tsize (B)\ttime (µs)\tspeed (B/µs)\n"); 1258 | 1259 | for (count = 1, i = 0; count <= MAX_ELEMENT_COUNT; count *= 2, ++i) 1260 | { 1261 | double elapsed = bench_memcpy(b1, b2, count); 1262 | double speed = count * sizeof(uint32_t) / elapsed; 1263 | printf("%4d\t%8d\t%9.3f\t%12.3f\n", 32, (uint32_t)(count * sizeof(uint32_t)), elapsed, speed); 1264 | 1265 | avg_memcpy += speed; 1266 | min_memcpy = MIN(min_memcpy, speed); 1267 | max_memcpy = MAX(max_memcpy, speed); 1268 | 1269 | speed_memcpy[i] = speed; 1270 | } 1271 | 1272 | avg_memcpy /= i; 1273 | 1274 | printf("avg (B/µs)\tmin (B/µs)\tmax (B/µs)\n"); 1275 | printf("%10.3f\t%10.3f\t%10.3f\n", avg_memcpy, min_memcpy, max_memcpy); 1276 | printf("\n"); 1277 | 1278 | speed_loopcpy = (double*)malloc(sizeof(double) * (LOG2_MAX_ELEMENT_COUNT + 1)); 1279 | assert(speed_loopcpy != NULL); 1280 | avg_loopcpy = 0; 1281 | min_loopcpy = DBL_MAX; 1282 | max_loopcpy = 0; 1283 | 1284 | printf("loopcpy:\n"); 1285 | printf("bits\tsize (B)\ttime (µs)\tspeed (B/µs)\n"); 1286 | 1287 | for (count = 1, i = 0; count <= MAX_ELEMENT_COUNT; count *= 2, ++i) 1288 | { 1289 | double elapsed = bench_loopcpy(b1, b2, count); 1290 | double speed = count * sizeof(uint32_t) / elapsed; 1291 | printf("%4d\t%8d\t%9.3f\t%12.3f\n", 32, (uint32_t)(count * sizeof(uint32_t)), elapsed, speed); 1292 | 1293 | avg_loopcpy += speed; 1294 | min_loopcpy = MIN(min_loopcpy, speed); 1295 | max_loopcpy = MAX(max_loopcpy, speed); 1296 | 1297 | speed_loopcpy[i] = speed; 1298 | } 1299 | 1300 | avg_loopcpy /= i; 1301 | 1302 | printf("avg (B/µs)\tmin (B/µs)\tmax (B/µs)\n"); 1303 | printf("%10.3f\t%10.3f\t%10.3f\n", avg_loopcpy, min_loopcpy, max_loopcpy); 1304 | printf("\n"); 1305 | 1306 | for (bitsPerItem = 1; bitsPerItem <= 32; ++bitsPerItem) 1307 | { 1308 | avg_pack = 0; 1309 | min_pack = DBL_MAX; 1310 | max_pack = 0; 1311 | avg_unpack = 0; 1312 | min_unpack = DBL_MAX; 1313 | max_unpack = 0; 1314 | 1315 | printf("pack:\t \t \t \t"); 1316 | printf("unpack:\t \t \t \t"); 1317 | printf("\n"); 1318 | printf("bits\tsize (B)\ttime (µs)\tspeed (B/µs)"); 1319 | printf("\t"); 1320 | printf("bits\tsize (B)\ttime (µs)\tspeed (B/µs)"); 1321 | printf("\n"); 1322 | 1323 | speed_pack[bitsPerItem - 1] = (double*)malloc(sizeof(double) * (LOG2_MAX_ELEMENT_COUNT + 1)); 1324 | assert(speed_pack[bitsPerItem - 1] != NULL); 1325 | speed_unpack[bitsPerItem - 1] = (double*)malloc(sizeof(double) * (LOG2_MAX_ELEMENT_COUNT + 1)); 1326 | assert(speed_unpack[bitsPerItem - 1] != NULL); 1327 | for (count = 1, i = 0; count <= MAX_ELEMENT_COUNT; count *= 2, ++i) 1328 | { 1329 | uint32_t mask = (uint32_t)(1ULL << bitsPerItem) - 1; 1330 | uint32_t j; 1331 | double elapsed, speed; 1332 | 1333 | for (j = 0; j < count; ++j) 1334 | b2[j] = b1[j] & mask; 1335 | 1336 | elapsed = bench_pack(b2, packed[bitsPerItem - 1], count); 1337 | speed = count * sizeof(uint32_t) / elapsed; 1338 | printf("%4d\t%8d\t%9.3f\t%12.3f", bitsPerItem, (uint32_t)(count * sizeof(uint32_t)), elapsed, speed); 1339 | 1340 | avg_pack += speed; 1341 | min_pack = MIN(min_pack, speed); 1342 | max_pack = MAX(max_pack, speed); 1343 | 1344 | speed_pack[bitsPerItem - 1][i] = speed; 1345 | 1346 | printf("\t"); 1347 | 1348 | elapsed = bench_unpack(packed[bitsPerItem - 1], b2, count); 1349 | speed = count * sizeof(uint32_t) / elapsed; 1350 | printf("%4d\t%8d\t%9.3f\t%12.3f", bitsPerItem, (uint32_t)(count * sizeof(uint32_t)), elapsed, speed); 1351 | 1352 | avg_unpack += speed; 1353 | min_unpack = MIN(min_unpack, speed); 1354 | max_unpack = MAX(max_unpack, speed); 1355 | 1356 | speed_unpack[bitsPerItem - 1][i] = speed; 1357 | 1358 | printf("\n"); 1359 | } 1360 | printf("%d\n",i); 1361 | assert(i == LOG2_MAX_ELEMENT_COUNT + 1); 1362 | 1363 | printf("avg (B/µs)\tmin (B/µs)\tmax (B/µs)"); 1364 | printf("\t\t"); 1365 | printf("avg (B/µs)\tmin (B/µs)\tmax (B/µs)"); 1366 | printf("\n"); 1367 | 1368 | avg_pack /= i; 1369 | printf("%10.3f\t%10.3f\t%10.3f", avg_pack, min_pack, max_pack); 1370 | 1371 | printf("\t\t"); 1372 | 1373 | avg_unpack /= i; 1374 | printf("%10.3f\t%10.3f\t%10.3f", avg_unpack, min_unpack, max_unpack); 1375 | printf("\n"); 1376 | printf("\n"); 1377 | } 1378 | 1379 | printf("\n"); 1380 | 1381 | printf("stats by bits per item\n"); 1382 | printf("pack:\t \t \t \t"); 1383 | printf("unpack:\t \t \t \t"); 1384 | printf("\n"); 1385 | printf("bits\tavg (B/µs)\tmin (B/µs)\tmax (B/µs)"); 1386 | printf("\t"); 1387 | printf("avg (B/µs)\tmin (B/µs)\tmax (B/µs)"); 1388 | printf("\n"); 1389 | for (bitsPerItem = 1; bitsPerItem <= 32; ++bitsPerItem) 1390 | { 1391 | avg_pack = 0; 1392 | min_pack = DBL_MAX; 1393 | max_pack = 0; 1394 | avg_unpack = 0; 1395 | min_unpack = DBL_MAX; 1396 | max_unpack = 0; 1397 | 1398 | for (count = 1, i = 0; count <= MAX_ELEMENT_COUNT; count *= 2, ++i) 1399 | { 1400 | double speed; 1401 | 1402 | speed = speed_pack[bitsPerItem - 1][i]; 1403 | 1404 | avg_pack += speed; 1405 | min_pack = MIN(min_pack, speed); 1406 | max_pack = MAX(max_pack, speed); 1407 | 1408 | speed = speed_unpack[bitsPerItem - 1][i]; 1409 | 1410 | avg_unpack += speed; 1411 | min_unpack = MIN(min_unpack, speed); 1412 | max_unpack = MAX(max_unpack, speed); 1413 | } 1414 | assert(i == LOG2_MAX_ELEMENT_COUNT + 1); 1415 | 1416 | printf("%4d\t", bitsPerItem); 1417 | 1418 | avg_pack /= i; 1419 | printf("%10.3f\t%10.3f\t%10.3f", avg_pack, min_pack, max_pack); 1420 | printf("\t"); 1421 | 1422 | avg_unpack /= i; 1423 | printf("%10.3f\t%10.3f\t%10.3f", avg_unpack, min_unpack, max_unpack); 1424 | printf("\n"); 1425 | } 1426 | 1427 | printf("\n"); 1428 | 1429 | printf("stats by size\n"); 1430 | printf("pack:\t \t \t \t"); 1431 | printf("unpack:\t \t \t \t"); 1432 | printf("\n"); 1433 | printf("size(B)\tavg (B/µs)\tmin (B/µs)\tmax (B/µs)"); 1434 | printf("\t"); 1435 | printf("avg (B/µs)\tmin (B/µs)\tmax (B/µs)"); 1436 | printf("\n"); 1437 | for (count = 1, i = 0; count <= MAX_ELEMENT_COUNT; count *= 2, ++i) 1438 | { 1439 | avg_pack = 0; 1440 | min_pack = DBL_MAX; 1441 | max_pack = 0; 1442 | avg_unpack = 0; 1443 | min_unpack = DBL_MAX; 1444 | max_unpack = 0; 1445 | 1446 | for (bitsPerItem = 1; bitsPerItem <= 32; ++bitsPerItem) 1447 | { 1448 | double speed; 1449 | 1450 | speed = speed_pack[bitsPerItem - 1][i]; 1451 | 1452 | avg_pack += speed; 1453 | min_pack = MIN(min_pack, speed); 1454 | max_pack = MAX(max_pack, speed); 1455 | 1456 | speed = speed_unpack[bitsPerItem - 1][i]; 1457 | 1458 | avg_unpack += speed; 1459 | min_unpack = MIN(min_unpack, speed); 1460 | max_unpack = MAX(max_unpack, speed); 1461 | } 1462 | 1463 | printf("%7d\t", (uint32_t)sizeof(uint32_t) * count); 1464 | 1465 | avg_pack /= 32; 1466 | printf("%10.3f\t%10.3f\t%10.3f", avg_pack, min_pack, max_pack); 1467 | printf("\t"); 1468 | 1469 | avg_unpack /= 32; 1470 | printf("%10.3f\t%10.3f\t%10.3f", avg_unpack, min_unpack, max_unpack); 1471 | printf("\n"); 1472 | } 1473 | 1474 | printf("\n"); 1475 | 1476 | free(b1); 1477 | free(b2); 1478 | free(speed_memcpy); 1479 | free(speed_loopcpy); 1480 | 1481 | for (i = 0; i < 32; ++i) 1482 | { 1483 | PackedArray_destroy(packed[i]); 1484 | free(speed_pack[i]); 1485 | free(speed_unpack[i]); 1486 | } 1487 | 1488 | free(packed); 1489 | 1490 | end = getChronometerTime(); 1491 | printf("total time (s): %f\n", (end - start)); 1492 | printf("\n"); 1493 | 1494 | return 0; 1495 | } 1496 | 1497 | #endif // #elif defined(PACKEDARRAY_SELF_BENCH) 1498 | 1499 | #endif // #ifdef PACKEDARRAY_IMPL 1500 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PackedArray: random access array of tightly packed unsigned integers 2 | [![Build Status](https://travis-ci.org/gpakosz/PackedArray.png?branch=master)](https://travis-ci.org/gpakosz/PackedArray) 3 | ## TLDR 4 | 5 | *PackedArray comes to the rescue when you're in a desperate need for an uint9_t 6 | or uint17_t array.* 7 | 8 | ## What? 9 | 10 | When you want to hold an unordered sequence of unsigned integers into memory, 11 | the C programming language lets you choose among 4 data types: 12 | 13 | - `uint8_t` 14 | - `uint16_t` 15 | - `uint32_t` 16 | - `uint64_t` 17 | 18 | If your numbers are within the [0, 100000] range, only 17 bits per integer are 19 | needed since 217 = 131072. However, you can't use an array of 20 | `uint16_t` because 16 bits are not enough to store numbers between 65536 and 21 | 100000. When you use the next available type, `uint32_t`, you're wasting 15 bits 22 | per integer which represents a 47% overhead in terms of storage requirements. 23 | 24 | `PackedArray` saves memory by packing integers/items together at the bit-level: 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 |
b0b1b2...
i0i1i2i3i4i5i6i7i8i9...
47 | 48 | A `PackedArray` is backed by an `uint32_t` buffer. Several items end up being 49 | stored inside the same buffer cell, e.g. i0, i1, and i2. Some items span two 50 | buffer cells, e.g. i3, and i7. `PackedArray` is responsible for 51 | encoding/decoding items into/from the storage buffer. 52 | 53 | `PackedArraySIMD` is a `PackedArray` variant that makes use of SSE2 or NEON 54 | instructions. 55 | 56 | Going SIMD processes integers 4 by 4 but imposes an interleaved layout in the 57 | storage buffer. 58 | 59 | `PackedArraySIMD` interleaved layout, 13 bits per item: 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 |
b0b1b2b3...
i0i4i8ai1i5i9ai2i6i10ai3i7i11ai8b...
86 | 87 | As a consequence, the data layout of `PackedArraySIMD` isn't compatible with its 88 | non SIMD counterpart. In other words, you cannot use `PackedArray` to unpack 89 | data packed with `PackedArraySIMD` or the other way around. 90 | 91 | It is also worth noting the implementations of `PackedArraySIMD_pack` and 92 | `PackedArraySIMD_unpack` require more plumbing than their non-SIMD counterparts. 93 | Additional computations are needed to find out and adjust a data window that can 94 | be processed 4 by 4 with SIMD instructions. 95 | 96 | `PackedArray` and `PackedArraySIMD` are released under the WTFPL v2 license. 97 | 98 | For more information, see the [PackedArray announcement on my personal website]. 99 | 100 | [PackedArray announcement on my personal website]: http://pempek.net/articles/2013/08/03/packedarray-random-access-array-tightly-packed-unsigned-integers/ 101 | 102 | ## Why? 103 | 104 | `PackedArray` is designed as a drop-in replacement for an unsigned integer 105 | array. I couldn't find such a data structure in the wild, so I implemented one. 106 | 107 | Instead of writing: 108 | 109 | uint32_t* a = (uint32_t*)malloc(sizeof(uint32_t) * count); 110 | ... 111 | value = a[i]; 112 | ... 113 | a[j] = value; 114 | 115 | You write: 116 | 117 | PackedArray* a = PackedArray_create(bitsPerItem, count); 118 | ... 119 | value = PackedArray_get(a, i); 120 | ... 121 | PackedArray_set(a, j, value); 122 | 123 | The `PackedArray_computeBitsPerItem` helper scans a `uint32_t` array and returns 124 | the number of bits needed to create a `PackedArray` capable of holding its 125 | content. 126 | 127 | There are also `PackedArray_pack` and `PackedArray_unpack` that operate on 128 | several items in a row. Those two could really have been named 129 | `PackedArray_write` and `PackedArray_read` but I decided "pack" / "unpack" 130 | conveys better something is happening under the hood. 131 | 132 | // bulk packing / unpacking 133 | PackedArray_pack(a, j, in, count); 134 | PackedArray_unpack(a, j, out, count); 135 | 136 | // the following are semantically equivalent 137 | PackedArray_set(a, j, value); 138 | PackedArray_pack(a, j, &value, 1); 139 | 140 | value = PackedArray_get(a, i); 141 | PackedArray_unpack(a, i, &value, 1); 142 | 143 | -------------------------------------------------------------------------------- 144 | 145 | ## Compiling 146 | 147 | In order to use `PackedArray` or `PackedArraySIMD` in your own project, you just 148 | have to bring in the two `PackedArray.h` and `PackedArray.c` (or 149 | `PackedArraySIMD.c`) files. It's that simple. 150 | 151 | You can customize `PackedArray.c`'s behavior by defining the following macros: 152 | 153 | - `PACKEDARRAY_ASSERT` 154 | - `PACKEDARRAY_MALLOC` 155 | - `PACKEDARARY_FREE` 156 | 157 | You can customize `PackedArraySIMD.c`'s behavior by defining the following 158 | macros: 159 | 160 | - `PACKEDARRAY_ASSERT` 161 | - `PACKEDARRAY_ALIGNED_MALLOC` 162 | - `PACKEDARARY_FREE` 163 | 164 | `PackedArray.c` and `PackedArraySIMD.c` can compile themselves into either a 165 | test program or a micro-benchmark. For that, you have to use one of the 166 | following preprocessor directives: 167 | 168 | - `PACKEDARRAY_SELF_TEST` 169 | - `PACKEDARRAY_SELF_BENCH` 170 | 171 | For example, from command line: 172 | 173 | $ cc -o PackedArraySelfTest -DPACKEDARRAY_SELF_TEST -O2 -g PackedArray.c 174 | $ cc -o PackedArraySelfBench -DPACKEDARRAY_SELF_BENCH -DNDEBUG -O2 -g PackedArray.c 175 | 176 | $ cc -o PackedArraySIMDSelfTest -DPACKEDARRAY_SELF_TEST -O2 -g PackedArraySIMD.c 177 | $ cc -o PackedArraySIMDSelfBench -DPACKEDARRAY_SELF_BENCH -DNDEBUG -O2 -g PackedArraySIMD.c 178 | 179 | ### Compiling for Windows 180 | 181 | There is a Visual Studio 2012 solution in the `_win-vs11/` folder. 182 | 183 | ### Compiling for Linux or Mac 184 | 185 | There is a GNU Make 3.81 `MakeFile` in the `_gnu-make/` folder: 186 | 187 | $ make -C _gnu-make/ 188 | 189 | ### Compiling for Mac 190 | 191 | See above if you want to compile from command line. Otherwise there is an Xcode 192 | project located in the `_mac-xcode/` folder. 193 | 194 | ### Compiling for iOS 195 | 196 | There is an Xcode project located in the `_ios-xcode/` folder. 197 | 198 | If you prefer compiling from command line and deploying to a jailbroken device 199 | through SSH, use: 200 | 201 | $ make -C _gnu-make/ binsubdir=ios CC="$(xcrun --sdk iphoneos --find clang) -isysroot $(xcrun --sdk iphoneos --show-sdk-path) -arch armv7 -arch armv7s -arch arm64" postbuild="codesign -s 'iPhone Developer'" 202 | 203 | ### Compiling for Android 204 | 205 | You will have to install the Android NDK, and point the `$NDK_ROOT` environment 206 | variable to the NDK path: e.g. `export NDK_ROOT=/opt/android-ndk` (without a 207 | trailing `/` character). 208 | 209 | Next, the easy way is to make a standalone Android toolchain with the following 210 | command: 211 | 212 | $ $NDK_ROOT/build/tools/make-standalone-toolchain.sh --system=$(uname -s | tr [A-Z] [a-z])-$(uname -m) --platform=android-3 --toolchain=arm-linux-androideabi-clang3.3 --install-dir=/tmp/android-clang 213 | 214 | Now you can compile the self test and self benchmark programs by running: 215 | 216 | $ make -C _gnu-make/ binsubdir=android CC=/tmp/android-clang/bin/clang CFLAGS='-march=armv7-a -mfloat-abi=softfp -mfpu=neon -O2' 217 | 218 | -------------------------------------------------------------------------------- 219 | 220 | ## Implementation details, what the hell is going on? 221 | 222 | First, in `PackedArray.c` or `PackedArraySIMD.c`, everything that comes below 223 | the `- 8< ----` marker is the code for the self test and self micro-benchmark 224 | programs and can be discarded if you really want to: 225 | 226 | If you want to cut down your anxiety, you can use the provided GNU Makefile and 227 | invoke: 228 | 229 | $ make -C _gnu-make/ cut 230 | 231 | This produces the `PackedArray.cut.c` and `PackedArraySIMD.cut.c` files. 232 | 233 | You may also be troubled by `PackedArray.c` and `PackedArraySIMD.c` including 234 | themselves with `#include PACKEDARRAY_SELF`. By combining preprocessing tricks 235 | and including themselves, `PackedArray.c` and `PackedArraySIMD.c` 236 | "generate the code" for the unrolled pack and unpack implementations. 237 | 238 | By default `PACKEDARRAY_SELF` is defined to `"PackedArray.c"` which assumes the 239 | compiler is going to look for the file in the same directory as the file from 240 | which the `#include` statement is being evaluated. This helps compiling when the 241 | build system refers to the source files with relative paths. Depending on your 242 | compiler/build system combination you may want to override `PACKEDARRAY_SELF` to 243 | `__FILE__`. 244 | 245 | If you want to see the generated code, you can use the provided GNU Makefile and 246 | invoke: 247 | 248 | $ make -C _gnu-make/ preprocess 249 | 250 | This produces the `PackedArray.pp.c` and `PackedArraySIMD.pp.c` files. 251 | 252 | 253 | -------------------------------------------------------------------------------- 254 | 255 | If you find `PackedArray` or `PackedArraySIMD` useful and decide to use it in 256 | your own projects please drop me a line [@gpakosz]. 257 | 258 | If you use it in a commercial project, consider using [Gittip]. 259 | 260 | [@gpakosz]: https://twitter.com/gpakosz 261 | [Gittip]: https://www.gittip.com/gpakosz/ 262 | -------------------------------------------------------------------------------- /_gnu-make/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: build test preprocess cut assembly clean 2 | 3 | # directories 4 | ifeq ($(realpath .),) 5 | $(error your version of Make doesn't support $$(realpath names...) - please use GNU Make 3.81 or later) 6 | endif 7 | 8 | ifeq ($(platform),) 9 | __uname_s := $(shell sh -c 'uname -s 2>/dev/null | tr [A-Z] [a-z] || echo unknown-platform') 10 | __uname_m := $(shell sh -c 'uname -m 2>/dev/null | tr [A-Z] [a-z] || echo unknown-architecture') 11 | 12 | ifeq ($(__uname_s),linux) 13 | override platform := linux 14 | override architecture := $(__uname_m) 15 | endif 16 | ifeq ($(__uname_s),darwin) 17 | override platform := mac 18 | override architecture := $(__uname_m) 19 | endif 20 | endif 21 | ifeq ($(architecture),) 22 | override architecture := unknown-architecture 23 | endif 24 | 25 | prefix := $(realpath ..) 26 | srcdir := $(realpath ..) 27 | buildir := $(realpath .)/build 28 | binsubdir := $(platform)-$(architecture) 29 | bindir := $(prefix)/bin/$(binsubdir) 30 | 31 | CFLAGS := -O2 -g 32 | 33 | define _generate_rules 34 | .PHONY: build-$(1) 35 | build: build-$(1) 36 | build-$(1): $(bindir)/$(1)SelfTest $(bindir)/$(1)SelfBench 37 | 38 | $(bindir)/$(1)SelfTest: $(srcdir)/$(1).c $(srcdir)/PackedArray.h 39 | mkdir -p $$(@D) 40 | $(CC) -o $$@ -DPACKEDARRAY_SELF_TEST -std=c99 -pedantic $(CFLAGS) $$< 41 | $$(if $(postbuild),$(postbuild) $$@) 42 | 43 | $(bindir)/$(1)SelfBench: $(srcdir)/$(1).c $(srcdir)/PackedArray.h 44 | mkdir -p $$(@D) 45 | $(CC) -o $$@ -DPACKEDARRAY_SELF_BENCH -DNDEBUG -std=c99 -pedantic $(CFLAGS) $$< 46 | $$(if $(postbuild),$(postbuild) $$@) 47 | 48 | .PHONY: assembly-$(1) 49 | assembly: assembly-$(1) 50 | assembly-$(1): $(bindir)/$(1)SelfTest.s $(bindir)/$(1)SelfBench.s 51 | 52 | $(bindir)/$(1)SelfTest.s: $(srcdir)/$(1).c $(srcdir)/PackedArray.h 53 | @echo Generating $$(@F) assembly listing 54 | @mkdir -p $$(@D) 55 | @$(CC) -o $$@ -S -fverbose-asm -g -DPACKEDARRAY_SELF_TEST -std=c99 -pedantic $(CFLAGS) $$< 56 | 57 | $(bindir)/$(1)SelfBench.s: $(srcdir)/$(1).c $(srcdir)/PackedArray.h 58 | @echo Generating $$(@F) assembly listing 59 | @mkdir -p $$(@D) 60 | @$(CC) -o $$@ -S -fverbose-asm -g -DPACKEDARRAY_SELF_BENCH -DNDEBUG -std=c99 -pedantic $(CFLAGS) $$< 61 | 62 | .PHONY: test-$(1) 63 | test : test-$(1) 64 | test-$(1): build-$(1) 65 | $(bindir)/$(1)SelfTest 66 | 67 | .PHONY: preprocess-$(1) 68 | preprocess: preprocess-$(1) 69 | preprocess-$(1): $(srcdir)/$(1).c 70 | @echo Preprocessing $(1).c to $(1).pp.c 71 | $$(eval tmp := $$(shell mktemp -t $(1)XXX)) 72 | @sed -E\ 73 | -e 's/#include (<|").+(>|")/pp(&)/'\ 74 | $(srcdir)/$(1).c > $$(tmp) 75 | @$(CPP) $(CPPFLAGS) -DPACKEDARRAY_SELF=__FILE__ $(CFLAGS) -xc $$(tmp) | sed -E\ 76 | -e '/^# /d'\ 77 | -e 's/[ \t]*$$$$//'\ 78 | -e 's/pp\((.*)\)/\1/'\ 79 | | cat -s > $(srcdir)/$(1).pp.c 80 | @rm -f $$(tmp) 81 | 82 | .PHONY: cut-$(1) 83 | cut: cut-$(1) 84 | cut-$(1): $(srcdir)/$(1).c 85 | @echo Cutting down your anxiety to $(1).cut.c 86 | @sed -n '/- 8<.*/q;p' $(srcdir)/$(1).c > $(srcdir)/$(1).cut.c 87 | endef 88 | 89 | $(foreach p,PackedArray PackedArraySIMD, $(eval $(call _generate_rules,$(p)))) 90 | 91 | clean: 92 | rm -rf $(buildir) 93 | rm -rf $(bindir) 94 | rm -rf $(srcdir)/*.pp.c 95 | rm -rf $(srcdir)/*.cut.c 96 | -------------------------------------------------------------------------------- /_ios-xcode/.gitignore: -------------------------------------------------------------------------------- 1 | xcuserdata/ 2 | xcshareddata/ 3 | -------------------------------------------------------------------------------- /_ios-xcode/PackedArray-Info.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | CFBundleDevelopmentRegion 6 | en 7 | CFBundleDisplayName 8 | ${PRODUCT_NAME} 9 | CFBundleExecutable 10 | ${EXECUTABLE_NAME} 11 | CFBundleIdentifier 12 | net.pempek.${PRODUCT_NAME:rfc1034identifier} 13 | CFBundleInfoDictionaryVersion 14 | 6.0 15 | CFBundleName 16 | ${PRODUCT_NAME} 17 | CFBundlePackageType 18 | APPL 19 | CFBundleShortVersionString 20 | 1.0 21 | CFBundleSignature 22 | ???? 23 | CFBundleVersion 24 | 1.0 25 | LSRequiresIPhoneOS 26 | 27 | UIRequiredDeviceCapabilities 28 | 29 | armv7 30 | 31 | UISupportedInterfaceOrientations 32 | 33 | UIInterfaceOrientationPortrait 34 | UIInterfaceOrientationLandscapeLeft 35 | UIInterfaceOrientationLandscapeRight 36 | 37 | UISupportedInterfaceOrientations~ipad 38 | 39 | UIInterfaceOrientationPortrait 40 | UIInterfaceOrientationPortraitUpsideDown 41 | UIInterfaceOrientationLandscapeLeft 42 | UIInterfaceOrientationLandscapeRight 43 | 44 | 45 | 46 | -------------------------------------------------------------------------------- /_ios-xcode/PackedArray.xcodeproj/project.pbxproj: -------------------------------------------------------------------------------- 1 | // !$*UTF8*$! 2 | { 3 | archiveVersion = 1; 4 | classes = { 5 | }; 6 | objectVersion = 46; 7 | objects = { 8 | 9 | /* Begin PBXBuildFile section */ 10 | 1A98F9EC17A406A700BF09FF /* PackedArray.c in Sources */ = {isa = PBXBuildFile; fileRef = 1A98F9EB17A406A700BF09FF /* PackedArray.c */; }; 11 | 1A98F9F017A408F000BF09FF /* PackedArray.c in Sources */ = {isa = PBXBuildFile; fileRef = 1A98F9EB17A406A700BF09FF /* PackedArray.c */; }; 12 | 1AC5A25517AD052200249A68 /* PackedArraySIMD.c in Sources */ = {isa = PBXBuildFile; fileRef = 1AC5A24017AD049E00249A68 /* PackedArraySIMD.c */; }; 13 | 1AC5A25617AD052400249A68 /* PackedArraySIMD.c in Sources */ = {isa = PBXBuildFile; fileRef = 1AC5A24017AD049E00249A68 /* PackedArraySIMD.c */; }; 14 | /* End PBXBuildFile section */ 15 | 16 | /* Begin PBXFileReference section */ 17 | 1A98F9C917A4018400BF09FF /* PackedArraySelfTest.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = PackedArraySelfTest.app; sourceTree = BUILT_PRODUCTS_DIR; }; 18 | 1A98F9EB17A406A700BF09FF /* PackedArray.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = PackedArray.c; sourceTree = ""; }; 19 | 1A98F9F617A408F000BF09FF /* PackedArraySelfBench.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = PackedArraySelfBench.app; sourceTree = BUILT_PRODUCTS_DIR; }; 20 | 1A98FA1A17A4267A00BF09FF /* PackedArray.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = PackedArray.h; sourceTree = ""; }; 21 | 1AC5A24017AD049E00249A68 /* PackedArraySIMD.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; path = PackedArraySIMD.c; sourceTree = ""; }; 22 | 1AC5A24917AD04A600249A68 /* PackedArraySIMDSelfTest.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = PackedArraySIMDSelfTest.app; sourceTree = BUILT_PRODUCTS_DIR; }; 23 | 1AC5A25317AD04A800249A68 /* PackedArraySIMDSelfBench.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = PackedArraySIMDSelfBench.app; sourceTree = BUILT_PRODUCTS_DIR; }; 24 | /* End PBXFileReference section */ 25 | 26 | /* Begin PBXFrameworksBuildPhase section */ 27 | 1A98F9C617A4018400BF09FF /* Frameworks */ = { 28 | isa = PBXFrameworksBuildPhase; 29 | buildActionMask = 2147483647; 30 | files = ( 31 | ); 32 | runOnlyForDeploymentPostprocessing = 0; 33 | }; 34 | 1A98F9F117A408F000BF09FF /* Frameworks */ = { 35 | isa = PBXFrameworksBuildPhase; 36 | buildActionMask = 2147483647; 37 | files = ( 38 | ); 39 | runOnlyForDeploymentPostprocessing = 0; 40 | }; 41 | 1AC5A24417AD04A600249A68 /* Frameworks */ = { 42 | isa = PBXFrameworksBuildPhase; 43 | buildActionMask = 2147483647; 44 | files = ( 45 | ); 46 | runOnlyForDeploymentPostprocessing = 0; 47 | }; 48 | 1AC5A24E17AD04A800249A68 /* Frameworks */ = { 49 | isa = PBXFrameworksBuildPhase; 50 | buildActionMask = 2147483647; 51 | files = ( 52 | ); 53 | runOnlyForDeploymentPostprocessing = 0; 54 | }; 55 | /* End PBXFrameworksBuildPhase section */ 56 | 57 | /* Begin PBXGroup section */ 58 | 1A98F9C017A4018400BF09FF = { 59 | isa = PBXGroup; 60 | children = ( 61 | 1A98F9ED17A406AE00BF09FF /* PackedArray */, 62 | 1A98F9CA17A4018400BF09FF /* Products */, 63 | ); 64 | sourceTree = ""; 65 | }; 66 | 1A98F9CA17A4018400BF09FF /* Products */ = { 67 | isa = PBXGroup; 68 | children = ( 69 | 1A98F9C917A4018400BF09FF /* PackedArraySelfTest.app */, 70 | 1A98F9F617A408F000BF09FF /* PackedArraySelfBench.app */, 71 | 1AC5A24917AD04A600249A68 /* PackedArraySIMDSelfTest.app */, 72 | 1AC5A25317AD04A800249A68 /* PackedArraySIMDSelfBench.app */, 73 | ); 74 | name = Products; 75 | sourceTree = ""; 76 | }; 77 | 1A98F9ED17A406AE00BF09FF /* PackedArray */ = { 78 | isa = PBXGroup; 79 | children = ( 80 | 1A98F9EB17A406A700BF09FF /* PackedArray.c */, 81 | 1A98FA1A17A4267A00BF09FF /* PackedArray.h */, 82 | 1AC5A24017AD049E00249A68 /* PackedArraySIMD.c */, 83 | ); 84 | name = PackedArray; 85 | path = ..; 86 | sourceTree = ""; 87 | }; 88 | /* End PBXGroup section */ 89 | 90 | /* Begin PBXNativeTarget section */ 91 | 1A98F9C817A4018400BF09FF /* PackedArraySelfTest */ = { 92 | isa = PBXNativeTarget; 93 | buildConfigurationList = 1A98F9E617A4018400BF09FF /* Build configuration list for PBXNativeTarget "PackedArraySelfTest" */; 94 | buildPhases = ( 95 | 1A98F9C517A4018400BF09FF /* Sources */, 96 | 1A98F9C617A4018400BF09FF /* Frameworks */, 97 | 1A98F9C717A4018400BF09FF /* Resources */, 98 | ); 99 | buildRules = ( 100 | ); 101 | dependencies = ( 102 | ); 103 | name = PackedArraySelfTest; 104 | productName = PackedArray; 105 | productReference = 1A98F9C917A4018400BF09FF /* PackedArraySelfTest.app */; 106 | productType = "com.apple.product-type.application"; 107 | }; 108 | 1A98F9EE17A408F000BF09FF /* PackedArraySelfBench */ = { 109 | isa = PBXNativeTarget; 110 | buildConfigurationList = 1A98F9F317A408F000BF09FF /* Build configuration list for PBXNativeTarget "PackedArraySelfBench" */; 111 | buildPhases = ( 112 | 1A98F9EF17A408F000BF09FF /* Sources */, 113 | 1A98F9F117A408F000BF09FF /* Frameworks */, 114 | 1A98F9F217A408F000BF09FF /* Resources */, 115 | ); 116 | buildRules = ( 117 | ); 118 | dependencies = ( 119 | ); 120 | name = PackedArraySelfBench; 121 | productName = PackedArray; 122 | productReference = 1A98F9F617A408F000BF09FF /* PackedArraySelfBench.app */; 123 | productType = "com.apple.product-type.application"; 124 | }; 125 | 1AC5A24117AD04A600249A68 /* PackedArraySIMDSelfTest */ = { 126 | isa = PBXNativeTarget; 127 | buildConfigurationList = 1AC5A24617AD04A600249A68 /* Build configuration list for PBXNativeTarget "PackedArraySIMDSelfTest" */; 128 | buildPhases = ( 129 | 1AC5A24217AD04A600249A68 /* Sources */, 130 | 1AC5A24417AD04A600249A68 /* Frameworks */, 131 | 1AC5A24517AD04A600249A68 /* Resources */, 132 | ); 133 | buildRules = ( 134 | ); 135 | dependencies = ( 136 | ); 137 | name = PackedArraySIMDSelfTest; 138 | productName = PackedArray; 139 | productReference = 1AC5A24917AD04A600249A68 /* PackedArraySIMDSelfTest.app */; 140 | productType = "com.apple.product-type.application"; 141 | }; 142 | 1AC5A24B17AD04A800249A68 /* PackedArraySIMDSelfBench */ = { 143 | isa = PBXNativeTarget; 144 | buildConfigurationList = 1AC5A25017AD04A800249A68 /* Build configuration list for PBXNativeTarget "PackedArraySIMDSelfBench" */; 145 | buildPhases = ( 146 | 1AC5A24C17AD04A800249A68 /* Sources */, 147 | 1AC5A24E17AD04A800249A68 /* Frameworks */, 148 | 1AC5A24F17AD04A800249A68 /* Resources */, 149 | ); 150 | buildRules = ( 151 | ); 152 | dependencies = ( 153 | ); 154 | name = PackedArraySIMDSelfBench; 155 | productName = PackedArray; 156 | productReference = 1AC5A25317AD04A800249A68 /* PackedArraySIMDSelfBench.app */; 157 | productType = "com.apple.product-type.application"; 158 | }; 159 | /* End PBXNativeTarget section */ 160 | 161 | /* Begin PBXProject section */ 162 | 1A98F9C117A4018400BF09FF /* Project object */ = { 163 | isa = PBXProject; 164 | attributes = { 165 | LastUpgradeCheck = 0460; 166 | ORGANIZATIONNAME = "Gregory Pakosz"; 167 | }; 168 | buildConfigurationList = 1A98F9C417A4018400BF09FF /* Build configuration list for PBXProject "PackedArray" */; 169 | compatibilityVersion = "Xcode 3.2"; 170 | developmentRegion = English; 171 | hasScannedForEncodings = 0; 172 | knownRegions = ( 173 | en, 174 | ); 175 | mainGroup = 1A98F9C017A4018400BF09FF; 176 | productRefGroup = 1A98F9CA17A4018400BF09FF /* Products */; 177 | projectDirPath = ""; 178 | projectRoot = ""; 179 | targets = ( 180 | 1A98F9C817A4018400BF09FF /* PackedArraySelfTest */, 181 | 1A98F9EE17A408F000BF09FF /* PackedArraySelfBench */, 182 | 1AC5A24117AD04A600249A68 /* PackedArraySIMDSelfTest */, 183 | 1AC5A24B17AD04A800249A68 /* PackedArraySIMDSelfBench */, 184 | ); 185 | }; 186 | /* End PBXProject section */ 187 | 188 | /* Begin PBXResourcesBuildPhase section */ 189 | 1A98F9C717A4018400BF09FF /* Resources */ = { 190 | isa = PBXResourcesBuildPhase; 191 | buildActionMask = 2147483647; 192 | files = ( 193 | ); 194 | runOnlyForDeploymentPostprocessing = 0; 195 | }; 196 | 1A98F9F217A408F000BF09FF /* Resources */ = { 197 | isa = PBXResourcesBuildPhase; 198 | buildActionMask = 2147483647; 199 | files = ( 200 | ); 201 | runOnlyForDeploymentPostprocessing = 0; 202 | }; 203 | 1AC5A24517AD04A600249A68 /* Resources */ = { 204 | isa = PBXResourcesBuildPhase; 205 | buildActionMask = 2147483647; 206 | files = ( 207 | ); 208 | runOnlyForDeploymentPostprocessing = 0; 209 | }; 210 | 1AC5A24F17AD04A800249A68 /* Resources */ = { 211 | isa = PBXResourcesBuildPhase; 212 | buildActionMask = 2147483647; 213 | files = ( 214 | ); 215 | runOnlyForDeploymentPostprocessing = 0; 216 | }; 217 | /* End PBXResourcesBuildPhase section */ 218 | 219 | /* Begin PBXSourcesBuildPhase section */ 220 | 1A98F9C517A4018400BF09FF /* Sources */ = { 221 | isa = PBXSourcesBuildPhase; 222 | buildActionMask = 2147483647; 223 | files = ( 224 | 1A98F9EC17A406A700BF09FF /* PackedArray.c in Sources */, 225 | ); 226 | runOnlyForDeploymentPostprocessing = 0; 227 | }; 228 | 1A98F9EF17A408F000BF09FF /* Sources */ = { 229 | isa = PBXSourcesBuildPhase; 230 | buildActionMask = 2147483647; 231 | files = ( 232 | 1A98F9F017A408F000BF09FF /* PackedArray.c in Sources */, 233 | ); 234 | runOnlyForDeploymentPostprocessing = 0; 235 | }; 236 | 1AC5A24217AD04A600249A68 /* Sources */ = { 237 | isa = PBXSourcesBuildPhase; 238 | buildActionMask = 2147483647; 239 | files = ( 240 | 1AC5A25517AD052200249A68 /* PackedArraySIMD.c in Sources */, 241 | ); 242 | runOnlyForDeploymentPostprocessing = 0; 243 | }; 244 | 1AC5A24C17AD04A800249A68 /* Sources */ = { 245 | isa = PBXSourcesBuildPhase; 246 | buildActionMask = 2147483647; 247 | files = ( 248 | 1AC5A25617AD052400249A68 /* PackedArraySIMD.c in Sources */, 249 | ); 250 | runOnlyForDeploymentPostprocessing = 0; 251 | }; 252 | /* End PBXSourcesBuildPhase section */ 253 | 254 | /* Begin XCBuildConfiguration section */ 255 | 1A98F9E417A4018400BF09FF /* Debug */ = { 256 | isa = XCBuildConfiguration; 257 | buildSettings = { 258 | ALWAYS_SEARCH_USER_PATHS = NO; 259 | ARCHS = "$(ARCHS_STANDARD_INCLUDING_64_BIT)"; 260 | CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x"; 261 | CLANG_CXX_LIBRARY = "libc++"; 262 | CLANG_WARN_CONSTANT_CONVERSION = YES; 263 | CLANG_WARN_EMPTY_BODY = YES; 264 | CLANG_WARN_ENUM_CONVERSION = YES; 265 | CLANG_WARN_INT_CONVERSION = YES; 266 | CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; 267 | "CODE_SIGN_IDENTITY[sdk=iphoneos*]" = "iPhone Developer"; 268 | COPY_PHASE_STRIP = NO; 269 | GCC_C_LANGUAGE_STANDARD = gnu99; 270 | GCC_DYNAMIC_NO_PIC = NO; 271 | GCC_OPTIMIZATION_LEVEL = 0; 272 | GCC_SYMBOLS_PRIVATE_EXTERN = NO; 273 | GCC_WARN_ABOUT_RETURN_TYPE = YES; 274 | GCC_WARN_UNINITIALIZED_AUTOS = YES; 275 | GCC_WARN_UNUSED_VARIABLE = YES; 276 | INFOPLIST_FILE = "PackedArray-Info.plist"; 277 | IPHONEOS_DEPLOYMENT_TARGET = 7.0; 278 | ONLY_ACTIVE_ARCH = YES; 279 | PRODUCT_NAME = "$(TARGET_NAME)"; 280 | SDKROOT = iphoneos; 281 | TARGETED_DEVICE_FAMILY = "1,2"; 282 | }; 283 | name = Debug; 284 | }; 285 | 1A98F9E517A4018400BF09FF /* Release */ = { 286 | isa = XCBuildConfiguration; 287 | buildSettings = { 288 | ALWAYS_SEARCH_USER_PATHS = NO; 289 | ARCHS = "$(ARCHS_STANDARD_INCLUDING_64_BIT)"; 290 | CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x"; 291 | CLANG_CXX_LIBRARY = "libc++"; 292 | CLANG_WARN_CONSTANT_CONVERSION = YES; 293 | CLANG_WARN_EMPTY_BODY = YES; 294 | CLANG_WARN_ENUM_CONVERSION = YES; 295 | CLANG_WARN_INT_CONVERSION = YES; 296 | CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; 297 | "CODE_SIGN_IDENTITY[sdk=iphoneos*]" = "iPhone Developer"; 298 | COPY_PHASE_STRIP = YES; 299 | GCC_C_LANGUAGE_STANDARD = gnu99; 300 | GCC_OPTIMIZATION_LEVEL = 2; 301 | GCC_PREPROCESSOR_DEFINITIONS = NDEBUG; 302 | GCC_WARN_ABOUT_RETURN_TYPE = YES; 303 | GCC_WARN_UNINITIALIZED_AUTOS = YES; 304 | GCC_WARN_UNUSED_VARIABLE = YES; 305 | INFOPLIST_FILE = "PackedArray-Info.plist"; 306 | IPHONEOS_DEPLOYMENT_TARGET = 7.0; 307 | OTHER_CFLAGS = "-DNS_BLOCK_ASSERTIONS=1"; 308 | PRODUCT_NAME = "$(TARGET_NAME)"; 309 | SDKROOT = iphoneos; 310 | TARGETED_DEVICE_FAMILY = "1,2"; 311 | VALIDATE_PRODUCT = YES; 312 | }; 313 | name = Release; 314 | }; 315 | 1A98F9E717A4018400BF09FF /* Debug */ = { 316 | isa = XCBuildConfiguration; 317 | buildSettings = { 318 | GCC_PREPROCESSOR_DEFINITIONS = PACKEDARRAY_SELF_TEST; 319 | WRAPPER_EXTENSION = app; 320 | }; 321 | name = Debug; 322 | }; 323 | 1A98F9E817A4018400BF09FF /* Release */ = { 324 | isa = XCBuildConfiguration; 325 | buildSettings = { 326 | GCC_PREPROCESSOR_DEFINITIONS = PACKEDARRAY_SELF_TEST; 327 | WRAPPER_EXTENSION = app; 328 | }; 329 | name = Release; 330 | }; 331 | 1A98F9F417A408F000BF09FF /* Debug */ = { 332 | isa = XCBuildConfiguration; 333 | buildSettings = { 334 | GCC_PREPROCESSOR_DEFINITIONS = ( 335 | PACKEDARRAY_SELF_BENCH, 336 | NDEBUG, 337 | "$(inherited)", 338 | ); 339 | WRAPPER_EXTENSION = app; 340 | }; 341 | name = Debug; 342 | }; 343 | 1A98F9F517A408F000BF09FF /* Release */ = { 344 | isa = XCBuildConfiguration; 345 | buildSettings = { 346 | GCC_PREPROCESSOR_DEFINITIONS = ( 347 | PACKEDARRAY_SELF_BENCH, 348 | "$(inherited)", 349 | ); 350 | WRAPPER_EXTENSION = app; 351 | }; 352 | name = Release; 353 | }; 354 | 1AC5A24717AD04A600249A68 /* Debug */ = { 355 | isa = XCBuildConfiguration; 356 | buildSettings = { 357 | ARCHS = "$(ARCHS_STANDARD_INCLUDING_64_BIT)"; 358 | GCC_PREPROCESSOR_DEFINITIONS = PACKEDARRAY_SELF_TEST; 359 | WRAPPER_EXTENSION = app; 360 | }; 361 | name = Debug; 362 | }; 363 | 1AC5A24817AD04A600249A68 /* Release */ = { 364 | isa = XCBuildConfiguration; 365 | buildSettings = { 366 | ARCHS = "$(ARCHS_STANDARD_INCLUDING_64_BIT)"; 367 | GCC_PREPROCESSOR_DEFINITIONS = PACKEDARRAY_SELF_TEST; 368 | WRAPPER_EXTENSION = app; 369 | }; 370 | name = Release; 371 | }; 372 | 1AC5A25117AD04A800249A68 /* Debug */ = { 373 | isa = XCBuildConfiguration; 374 | buildSettings = { 375 | GCC_PREPROCESSOR_DEFINITIONS = ( 376 | PACKEDARRAY_SELF_BENCH, 377 | NDEBUG, 378 | "$(inherited)", 379 | ); 380 | WRAPPER_EXTENSION = app; 381 | }; 382 | name = Debug; 383 | }; 384 | 1AC5A25217AD04A800249A68 /* Release */ = { 385 | isa = XCBuildConfiguration; 386 | buildSettings = { 387 | GCC_PREPROCESSOR_DEFINITIONS = ( 388 | PACKEDARRAY_SELF_BENCH, 389 | "$(inherited)", 390 | ); 391 | WRAPPER_EXTENSION = app; 392 | }; 393 | name = Release; 394 | }; 395 | /* End XCBuildConfiguration section */ 396 | 397 | /* Begin XCConfigurationList section */ 398 | 1A98F9C417A4018400BF09FF /* Build configuration list for PBXProject "PackedArray" */ = { 399 | isa = XCConfigurationList; 400 | buildConfigurations = ( 401 | 1A98F9E417A4018400BF09FF /* Debug */, 402 | 1A98F9E517A4018400BF09FF /* Release */, 403 | ); 404 | defaultConfigurationIsVisible = 0; 405 | defaultConfigurationName = Release; 406 | }; 407 | 1A98F9E617A4018400BF09FF /* Build configuration list for PBXNativeTarget "PackedArraySelfTest" */ = { 408 | isa = XCConfigurationList; 409 | buildConfigurations = ( 410 | 1A98F9E717A4018400BF09FF /* Debug */, 411 | 1A98F9E817A4018400BF09FF /* Release */, 412 | ); 413 | defaultConfigurationIsVisible = 0; 414 | defaultConfigurationName = Release; 415 | }; 416 | 1A98F9F317A408F000BF09FF /* Build configuration list for PBXNativeTarget "PackedArraySelfBench" */ = { 417 | isa = XCConfigurationList; 418 | buildConfigurations = ( 419 | 1A98F9F417A408F000BF09FF /* Debug */, 420 | 1A98F9F517A408F000BF09FF /* Release */, 421 | ); 422 | defaultConfigurationIsVisible = 0; 423 | defaultConfigurationName = Release; 424 | }; 425 | 1AC5A24617AD04A600249A68 /* Build configuration list for PBXNativeTarget "PackedArraySIMDSelfTest" */ = { 426 | isa = XCConfigurationList; 427 | buildConfigurations = ( 428 | 1AC5A24717AD04A600249A68 /* Debug */, 429 | 1AC5A24817AD04A600249A68 /* Release */, 430 | ); 431 | defaultConfigurationIsVisible = 0; 432 | defaultConfigurationName = Release; 433 | }; 434 | 1AC5A25017AD04A800249A68 /* Build configuration list for PBXNativeTarget "PackedArraySIMDSelfBench" */ = { 435 | isa = XCConfigurationList; 436 | buildConfigurations = ( 437 | 1AC5A25117AD04A800249A68 /* Debug */, 438 | 1AC5A25217AD04A800249A68 /* Release */, 439 | ); 440 | defaultConfigurationIsVisible = 0; 441 | defaultConfigurationName = Release; 442 | }; 443 | /* End XCConfigurationList section */ 444 | }; 445 | rootObject = 1A98F9C117A4018400BF09FF /* Project object */; 446 | } 447 | -------------------------------------------------------------------------------- /_ios-xcode/PackedArray.xcodeproj/project.xcworkspace/contents.xcworkspacedata: -------------------------------------------------------------------------------- 1 | 2 | 4 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /_mac-xcode/.gitignore: -------------------------------------------------------------------------------- 1 | xcuserdata/ 2 | xcshareddata/ 3 | -------------------------------------------------------------------------------- /_mac-xcode/PackedArray.xcodeproj/project.pbxproj: -------------------------------------------------------------------------------- 1 | // !$*UTF8*$! 2 | { 3 | archiveVersion = 1; 4 | classes = { 5 | }; 6 | objectVersion = 46; 7 | objects = { 8 | 9 | /* Begin PBXBuildFile section */ 10 | 1A98FA1017A424D700BF09FF /* PackedArray.c in Sources */ = {isa = PBXBuildFile; fileRef = 1A98FA0E17A424D700BF09FF /* PackedArray.c */; }; 11 | 1A98FA1317A4262700BF09FF /* PackedArray.c in Sources */ = {isa = PBXBuildFile; fileRef = 1A98FA0E17A424D700BF09FF /* PackedArray.c */; }; 12 | 1AEF60E217AD035900CA6B64 /* PackedArraySIMD.c in Sources */ = {isa = PBXBuildFile; fileRef = 1AEF60CF17AD02B000CA6B64 /* PackedArraySIMD.c */; }; 13 | 1AEF60E317AD035A00CA6B64 /* PackedArraySIMD.c in Sources */ = {isa = PBXBuildFile; fileRef = 1AEF60CF17AD02B000CA6B64 /* PackedArraySIMD.c */; }; 14 | /* End PBXBuildFile section */ 15 | 16 | /* Begin PBXCopyFilesBuildPhase section */ 17 | 1A98F9FF17A4249200BF09FF /* CopyFiles */ = { 18 | isa = PBXCopyFilesBuildPhase; 19 | buildActionMask = 2147483647; 20 | dstPath = /usr/share/man/man1/; 21 | dstSubfolderSpec = 0; 22 | files = ( 23 | ); 24 | runOnlyForDeploymentPostprocessing = 1; 25 | }; 26 | 1A98FA1517A4262700BF09FF /* CopyFiles */ = { 27 | isa = PBXCopyFilesBuildPhase; 28 | buildActionMask = 2147483647; 29 | dstPath = /usr/share/man/man1/; 30 | dstSubfolderSpec = 0; 31 | files = ( 32 | ); 33 | runOnlyForDeploymentPostprocessing = 1; 34 | }; 35 | 1AEF60D417AD02BA00CA6B64 /* CopyFiles */ = { 36 | isa = PBXCopyFilesBuildPhase; 37 | buildActionMask = 2147483647; 38 | dstPath = /usr/share/man/man1/; 39 | dstSubfolderSpec = 0; 40 | files = ( 41 | ); 42 | runOnlyForDeploymentPostprocessing = 1; 43 | }; 44 | 1AEF60DD17AD02BD00CA6B64 /* CopyFiles */ = { 45 | isa = PBXCopyFilesBuildPhase; 46 | buildActionMask = 2147483647; 47 | dstPath = /usr/share/man/man1/; 48 | dstSubfolderSpec = 0; 49 | files = ( 50 | ); 51 | runOnlyForDeploymentPostprocessing = 1; 52 | }; 53 | /* End PBXCopyFilesBuildPhase section */ 54 | 55 | /* Begin PBXFileReference section */ 56 | 1A98FA0117A4249200BF09FF /* PackedArraySelfTest */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = PackedArraySelfTest; sourceTree = BUILT_PRODUCTS_DIR; }; 57 | 1A98FA0E17A424D700BF09FF /* PackedArray.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = PackedArray.c; sourceTree = ""; }; 58 | 1A98FA0F17A424D700BF09FF /* PackedArray.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = PackedArray.h; sourceTree = ""; }; 59 | 1A98FA1917A4262700BF09FF /* PackedArraySelfBench */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = PackedArraySelfBench; sourceTree = BUILT_PRODUCTS_DIR; }; 60 | 1AEF60CF17AD02B000CA6B64 /* PackedArraySIMD.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; path = PackedArraySIMD.c; sourceTree = ""; }; 61 | 1AEF60D817AD02BA00CA6B64 /* PackedArraySIMDSelfTest */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = PackedArraySIMDSelfTest; sourceTree = BUILT_PRODUCTS_DIR; }; 62 | 1AEF60E117AD02BD00CA6B64 /* PackedArraySIMDSelfBench */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = PackedArraySIMDSelfBench; sourceTree = BUILT_PRODUCTS_DIR; }; 63 | /* End PBXFileReference section */ 64 | 65 | /* Begin PBXFrameworksBuildPhase section */ 66 | 1A98F9FE17A4249200BF09FF /* Frameworks */ = { 67 | isa = PBXFrameworksBuildPhase; 68 | buildActionMask = 2147483647; 69 | files = ( 70 | ); 71 | runOnlyForDeploymentPostprocessing = 0; 72 | }; 73 | 1A98FA1417A4262700BF09FF /* Frameworks */ = { 74 | isa = PBXFrameworksBuildPhase; 75 | buildActionMask = 2147483647; 76 | files = ( 77 | ); 78 | runOnlyForDeploymentPostprocessing = 0; 79 | }; 80 | 1AEF60D317AD02BA00CA6B64 /* Frameworks */ = { 81 | isa = PBXFrameworksBuildPhase; 82 | buildActionMask = 2147483647; 83 | files = ( 84 | ); 85 | runOnlyForDeploymentPostprocessing = 0; 86 | }; 87 | 1AEF60DC17AD02BD00CA6B64 /* Frameworks */ = { 88 | isa = PBXFrameworksBuildPhase; 89 | buildActionMask = 2147483647; 90 | files = ( 91 | ); 92 | runOnlyForDeploymentPostprocessing = 0; 93 | }; 94 | /* End PBXFrameworksBuildPhase section */ 95 | 96 | /* Begin PBXGroup section */ 97 | 1A98F9F817A4249200BF09FF = { 98 | isa = PBXGroup; 99 | children = ( 100 | 1A98FA0D17A424BE00BF09FF /* PackedArray */, 101 | 1A98FA0217A4249200BF09FF /* Products */, 102 | ); 103 | sourceTree = ""; 104 | }; 105 | 1A98FA0217A4249200BF09FF /* Products */ = { 106 | isa = PBXGroup; 107 | children = ( 108 | 1A98FA0117A4249200BF09FF /* PackedArraySelfTest */, 109 | 1A98FA1917A4262700BF09FF /* PackedArraySelfBench */, 110 | 1AEF60D817AD02BA00CA6B64 /* PackedArraySIMDSelfTest */, 111 | 1AEF60E117AD02BD00CA6B64 /* PackedArraySIMDSelfBench */, 112 | ); 113 | name = Products; 114 | sourceTree = ""; 115 | }; 116 | 1A98FA0D17A424BE00BF09FF /* PackedArray */ = { 117 | isa = PBXGroup; 118 | children = ( 119 | 1A98FA0E17A424D700BF09FF /* PackedArray.c */, 120 | 1A98FA0F17A424D700BF09FF /* PackedArray.h */, 121 | 1AEF60CF17AD02B000CA6B64 /* PackedArraySIMD.c */, 122 | ); 123 | name = PackedArray; 124 | path = ..; 125 | sourceTree = ""; 126 | }; 127 | /* End PBXGroup section */ 128 | 129 | /* Begin PBXNativeTarget section */ 130 | 1A98FA0017A4249200BF09FF /* PackedArraySelfTest */ = { 131 | isa = PBXNativeTarget; 132 | buildConfigurationList = 1A98FA0A17A4249200BF09FF /* Build configuration list for PBXNativeTarget "PackedArraySelfTest" */; 133 | buildPhases = ( 134 | 1A98F9FD17A4249200BF09FF /* Sources */, 135 | 1A98F9FE17A4249200BF09FF /* Frameworks */, 136 | 1A98F9FF17A4249200BF09FF /* CopyFiles */, 137 | ); 138 | buildRules = ( 139 | ); 140 | dependencies = ( 141 | ); 142 | name = PackedArraySelfTest; 143 | productName = PackedArray; 144 | productReference = 1A98FA0117A4249200BF09FF /* PackedArraySelfTest */; 145 | productType = "com.apple.product-type.tool"; 146 | }; 147 | 1A98FA1117A4262700BF09FF /* PackedArraySelfBench */ = { 148 | isa = PBXNativeTarget; 149 | buildConfigurationList = 1A98FA1617A4262700BF09FF /* Build configuration list for PBXNativeTarget "PackedArraySelfBench" */; 150 | buildPhases = ( 151 | 1A98FA1217A4262700BF09FF /* Sources */, 152 | 1A98FA1417A4262700BF09FF /* Frameworks */, 153 | 1A98FA1517A4262700BF09FF /* CopyFiles */, 154 | ); 155 | buildRules = ( 156 | ); 157 | dependencies = ( 158 | ); 159 | name = PackedArraySelfBench; 160 | productName = PackedArray; 161 | productReference = 1A98FA1917A4262700BF09FF /* PackedArraySelfBench */; 162 | productType = "com.apple.product-type.tool"; 163 | }; 164 | 1AEF60D017AD02BA00CA6B64 /* PackedArraySIMDSelfTest */ = { 165 | isa = PBXNativeTarget; 166 | buildConfigurationList = 1AEF60D517AD02BA00CA6B64 /* Build configuration list for PBXNativeTarget "PackedArraySIMDSelfTest" */; 167 | buildPhases = ( 168 | 1AEF60D117AD02BA00CA6B64 /* Sources */, 169 | 1AEF60D317AD02BA00CA6B64 /* Frameworks */, 170 | 1AEF60D417AD02BA00CA6B64 /* CopyFiles */, 171 | ); 172 | buildRules = ( 173 | ); 174 | dependencies = ( 175 | ); 176 | name = PackedArraySIMDSelfTest; 177 | productName = PackedArray; 178 | productReference = 1AEF60D817AD02BA00CA6B64 /* PackedArraySIMDSelfTest */; 179 | productType = "com.apple.product-type.tool"; 180 | }; 181 | 1AEF60D917AD02BD00CA6B64 /* PackedArraySIMDSelfBench */ = { 182 | isa = PBXNativeTarget; 183 | buildConfigurationList = 1AEF60DE17AD02BD00CA6B64 /* Build configuration list for PBXNativeTarget "PackedArraySIMDSelfBench" */; 184 | buildPhases = ( 185 | 1AEF60DA17AD02BD00CA6B64 /* Sources */, 186 | 1AEF60DC17AD02BD00CA6B64 /* Frameworks */, 187 | 1AEF60DD17AD02BD00CA6B64 /* CopyFiles */, 188 | ); 189 | buildRules = ( 190 | ); 191 | dependencies = ( 192 | ); 193 | name = PackedArraySIMDSelfBench; 194 | productName = PackedArray; 195 | productReference = 1AEF60E117AD02BD00CA6B64 /* PackedArraySIMDSelfBench */; 196 | productType = "com.apple.product-type.tool"; 197 | }; 198 | /* End PBXNativeTarget section */ 199 | 200 | /* Begin PBXProject section */ 201 | 1A98F9F917A4249200BF09FF /* Project object */ = { 202 | isa = PBXProject; 203 | attributes = { 204 | LastUpgradeCheck = 0460; 205 | ORGANIZATIONNAME = "Gregory Pakosz"; 206 | }; 207 | buildConfigurationList = 1A98F9FC17A4249200BF09FF /* Build configuration list for PBXProject "PackedArray" */; 208 | compatibilityVersion = "Xcode 3.2"; 209 | developmentRegion = English; 210 | hasScannedForEncodings = 0; 211 | knownRegions = ( 212 | en, 213 | ); 214 | mainGroup = 1A98F9F817A4249200BF09FF; 215 | productRefGroup = 1A98FA0217A4249200BF09FF /* Products */; 216 | projectDirPath = ""; 217 | projectRoot = ""; 218 | targets = ( 219 | 1A98FA0017A4249200BF09FF /* PackedArraySelfTest */, 220 | 1A98FA1117A4262700BF09FF /* PackedArraySelfBench */, 221 | 1AEF60D017AD02BA00CA6B64 /* PackedArraySIMDSelfTest */, 222 | 1AEF60D917AD02BD00CA6B64 /* PackedArraySIMDSelfBench */, 223 | ); 224 | }; 225 | /* End PBXProject section */ 226 | 227 | /* Begin PBXSourcesBuildPhase section */ 228 | 1A98F9FD17A4249200BF09FF /* Sources */ = { 229 | isa = PBXSourcesBuildPhase; 230 | buildActionMask = 2147483647; 231 | files = ( 232 | 1A98FA1017A424D700BF09FF /* PackedArray.c in Sources */, 233 | ); 234 | runOnlyForDeploymentPostprocessing = 0; 235 | }; 236 | 1A98FA1217A4262700BF09FF /* Sources */ = { 237 | isa = PBXSourcesBuildPhase; 238 | buildActionMask = 2147483647; 239 | files = ( 240 | 1A98FA1317A4262700BF09FF /* PackedArray.c in Sources */, 241 | ); 242 | runOnlyForDeploymentPostprocessing = 0; 243 | }; 244 | 1AEF60D117AD02BA00CA6B64 /* Sources */ = { 245 | isa = PBXSourcesBuildPhase; 246 | buildActionMask = 2147483647; 247 | files = ( 248 | 1AEF60E217AD035900CA6B64 /* PackedArraySIMD.c in Sources */, 249 | ); 250 | runOnlyForDeploymentPostprocessing = 0; 251 | }; 252 | 1AEF60DA17AD02BD00CA6B64 /* Sources */ = { 253 | isa = PBXSourcesBuildPhase; 254 | buildActionMask = 2147483647; 255 | files = ( 256 | 1AEF60E317AD035A00CA6B64 /* PackedArraySIMD.c in Sources */, 257 | ); 258 | runOnlyForDeploymentPostprocessing = 0; 259 | }; 260 | /* End PBXSourcesBuildPhase section */ 261 | 262 | /* Begin XCBuildConfiguration section */ 263 | 1A98FA0817A4249200BF09FF /* Debug */ = { 264 | isa = XCBuildConfiguration; 265 | buildSettings = { 266 | ALWAYS_SEARCH_USER_PATHS = NO; 267 | ARCHS = "$(ARCHS_STANDARD_64_BIT)"; 268 | CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x"; 269 | CLANG_CXX_LIBRARY = "libc++"; 270 | CLANG_WARN_CONSTANT_CONVERSION = YES; 271 | CLANG_WARN_EMPTY_BODY = YES; 272 | CLANG_WARN_ENUM_CONVERSION = YES; 273 | CLANG_WARN_INT_CONVERSION = YES; 274 | CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; 275 | COPY_PHASE_STRIP = NO; 276 | GCC_C_LANGUAGE_STANDARD = gnu99; 277 | GCC_DYNAMIC_NO_PIC = NO; 278 | GCC_ENABLE_OBJC_EXCEPTIONS = YES; 279 | GCC_OPTIMIZATION_LEVEL = 0; 280 | GCC_SYMBOLS_PRIVATE_EXTERN = NO; 281 | GCC_WARN_64_TO_32_BIT_CONVERSION = YES; 282 | GCC_WARN_ABOUT_RETURN_TYPE = YES; 283 | GCC_WARN_UNINITIALIZED_AUTOS = YES; 284 | GCC_WARN_UNUSED_VARIABLE = YES; 285 | MACOSX_DEPLOYMENT_TARGET = 10.8; 286 | ONLY_ACTIVE_ARCH = YES; 287 | PRODUCT_NAME = "$(TARGET_NAME)"; 288 | SDKROOT = macosx; 289 | }; 290 | name = Debug; 291 | }; 292 | 1A98FA0917A4249200BF09FF /* Release */ = { 293 | isa = XCBuildConfiguration; 294 | buildSettings = { 295 | ALWAYS_SEARCH_USER_PATHS = NO; 296 | ARCHS = "$(ARCHS_STANDARD_64_BIT)"; 297 | CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x"; 298 | CLANG_CXX_LIBRARY = "libc++"; 299 | CLANG_WARN_CONSTANT_CONVERSION = YES; 300 | CLANG_WARN_EMPTY_BODY = YES; 301 | CLANG_WARN_ENUM_CONVERSION = YES; 302 | CLANG_WARN_INT_CONVERSION = YES; 303 | CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; 304 | COPY_PHASE_STRIP = YES; 305 | DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; 306 | GCC_C_LANGUAGE_STANDARD = gnu99; 307 | GCC_ENABLE_OBJC_EXCEPTIONS = YES; 308 | GCC_OPTIMIZATION_LEVEL = 2; 309 | GCC_PREPROCESSOR_DEFINITIONS = NDEBUG; 310 | GCC_WARN_64_TO_32_BIT_CONVERSION = YES; 311 | GCC_WARN_ABOUT_RETURN_TYPE = YES; 312 | GCC_WARN_UNINITIALIZED_AUTOS = YES; 313 | GCC_WARN_UNUSED_VARIABLE = YES; 314 | MACOSX_DEPLOYMENT_TARGET = 10.8; 315 | PRODUCT_NAME = "$(TARGET_NAME)"; 316 | SDKROOT = macosx; 317 | }; 318 | name = Release; 319 | }; 320 | 1A98FA0B17A4249200BF09FF /* Debug */ = { 321 | isa = XCBuildConfiguration; 322 | buildSettings = { 323 | GCC_PREPROCESSOR_DEFINITIONS = ( 324 | PACKEDARRAY_SELF_TEST, 325 | "$(inherited)", 326 | ); 327 | }; 328 | name = Debug; 329 | }; 330 | 1A98FA0C17A4249200BF09FF /* Release */ = { 331 | isa = XCBuildConfiguration; 332 | buildSettings = { 333 | GCC_PREPROCESSOR_DEFINITIONS = ( 334 | PACKEDARRAY_SELF_TEST, 335 | "$(inherited)", 336 | ); 337 | }; 338 | name = Release; 339 | }; 340 | 1A98FA1717A4262700BF09FF /* Debug */ = { 341 | isa = XCBuildConfiguration; 342 | buildSettings = { 343 | GCC_PREPROCESSOR_DEFINITIONS = ( 344 | PACKEDARRAY_SELF_BENCH, 345 | NDEBUG, 346 | "$(inherited)", 347 | ); 348 | }; 349 | name = Debug; 350 | }; 351 | 1A98FA1817A4262700BF09FF /* Release */ = { 352 | isa = XCBuildConfiguration; 353 | buildSettings = { 354 | GCC_PREPROCESSOR_DEFINITIONS = ( 355 | PACKEDARRAY_SELF_BENCH, 356 | NDEBUG, 357 | "$(inherited)", 358 | ); 359 | }; 360 | name = Release; 361 | }; 362 | 1AEF60D617AD02BA00CA6B64 /* Debug */ = { 363 | isa = XCBuildConfiguration; 364 | buildSettings = { 365 | GCC_PREPROCESSOR_DEFINITIONS = ( 366 | PACKEDARRAY_SELF_TEST, 367 | "$(inherited)", 368 | ); 369 | }; 370 | name = Debug; 371 | }; 372 | 1AEF60D717AD02BA00CA6B64 /* Release */ = { 373 | isa = XCBuildConfiguration; 374 | buildSettings = { 375 | GCC_PREPROCESSOR_DEFINITIONS = ( 376 | PACKEDARRAY_SELF_TEST, 377 | "$(inherited)", 378 | ); 379 | }; 380 | name = Release; 381 | }; 382 | 1AEF60DF17AD02BD00CA6B64 /* Debug */ = { 383 | isa = XCBuildConfiguration; 384 | buildSettings = { 385 | GCC_PREPROCESSOR_DEFINITIONS = ( 386 | PACKEDARRAY_SELF_BENCH, 387 | NDEBUG, 388 | "$(inherited)", 389 | ); 390 | }; 391 | name = Debug; 392 | }; 393 | 1AEF60E017AD02BD00CA6B64 /* Release */ = { 394 | isa = XCBuildConfiguration; 395 | buildSettings = { 396 | GCC_PREPROCESSOR_DEFINITIONS = ( 397 | PACKEDARRAY_SELF_BENCH, 398 | NDEBUG, 399 | "$(inherited)", 400 | ); 401 | }; 402 | name = Release; 403 | }; 404 | /* End XCBuildConfiguration section */ 405 | 406 | /* Begin XCConfigurationList section */ 407 | 1A98F9FC17A4249200BF09FF /* Build configuration list for PBXProject "PackedArray" */ = { 408 | isa = XCConfigurationList; 409 | buildConfigurations = ( 410 | 1A98FA0817A4249200BF09FF /* Debug */, 411 | 1A98FA0917A4249200BF09FF /* Release */, 412 | ); 413 | defaultConfigurationIsVisible = 0; 414 | defaultConfigurationName = Release; 415 | }; 416 | 1A98FA0A17A4249200BF09FF /* Build configuration list for PBXNativeTarget "PackedArraySelfTest" */ = { 417 | isa = XCConfigurationList; 418 | buildConfigurations = ( 419 | 1A98FA0B17A4249200BF09FF /* Debug */, 420 | 1A98FA0C17A4249200BF09FF /* Release */, 421 | ); 422 | defaultConfigurationIsVisible = 0; 423 | defaultConfigurationName = Release; 424 | }; 425 | 1A98FA1617A4262700BF09FF /* Build configuration list for PBXNativeTarget "PackedArraySelfBench" */ = { 426 | isa = XCConfigurationList; 427 | buildConfigurations = ( 428 | 1A98FA1717A4262700BF09FF /* Debug */, 429 | 1A98FA1817A4262700BF09FF /* Release */, 430 | ); 431 | defaultConfigurationIsVisible = 0; 432 | defaultConfigurationName = Release; 433 | }; 434 | 1AEF60D517AD02BA00CA6B64 /* Build configuration list for PBXNativeTarget "PackedArraySIMDSelfTest" */ = { 435 | isa = XCConfigurationList; 436 | buildConfigurations = ( 437 | 1AEF60D617AD02BA00CA6B64 /* Debug */, 438 | 1AEF60D717AD02BA00CA6B64 /* Release */, 439 | ); 440 | defaultConfigurationIsVisible = 0; 441 | defaultConfigurationName = Release; 442 | }; 443 | 1AEF60DE17AD02BD00CA6B64 /* Build configuration list for PBXNativeTarget "PackedArraySIMDSelfBench" */ = { 444 | isa = XCConfigurationList; 445 | buildConfigurations = ( 446 | 1AEF60DF17AD02BD00CA6B64 /* Debug */, 447 | 1AEF60E017AD02BD00CA6B64 /* Release */, 448 | ); 449 | defaultConfigurationIsVisible = 0; 450 | defaultConfigurationName = Release; 451 | }; 452 | /* End XCConfigurationList section */ 453 | }; 454 | rootObject = 1A98F9F917A4249200BF09FF /* Project object */; 455 | } 456 | -------------------------------------------------------------------------------- /_mac-xcode/PackedArray.xcodeproj/project.xcworkspace/contents.xcworkspacedata: -------------------------------------------------------------------------------- 1 | 2 | 4 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /_win-vs11/.gitignore: -------------------------------------------------------------------------------- 1 | /build 2 | /ipch 3 | 4 | *.suo 5 | *.sdf 6 | *.opensdf 7 | *.user 8 | *.sln.docstates 9 | -------------------------------------------------------------------------------- /_win-vs11/Common.props: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | ..\bin\Windows$(PLATFORM_SUFFIX)-$(ARCH)$(CONF_SUFFIX) 5 | ..\lib\Windows$(PLATFORM_SUFFIX)-$(ARCH)-vs11$(CONF_SUFFIX) 6 | build\$(ProjectName)-win$(PLATFORM_SUFFIX)-$(ARCH)-vs11$(CONF_SUFFIX) 7 | 8 | 9 | $(BUILD_DIR)\ 10 | $(BIN_DIR)\ 11 | false 12 | 13 | 14 | 15 | _CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) 16 | true 17 | /we4013 /wd4820 /we4289 /wd4342 /wd4347 /wd4514 /we4545 /we4546 /we4547 /we4548 /we4549 /we4619 /we4623 /we4625 /we4626 /wd4710 /we4836 /we4905 /we4906 /we4928 /we4946 /wd4986 /wd4711 /wd4350 18 | NotUsing 19 | true 20 | EnableAllWarnings 21 | 22 | 23 | true 24 | Console 25 | /time %(AdditionalOptions) 26 | 27 | 28 | 29 | 30 | $(BIN_DIR) 31 | 32 | 33 | $(LIB_DIR) 34 | 35 | 36 | $(BUILD_DIR) 37 | 38 | 39 | -------------------------------------------------------------------------------- /_win-vs11/Debug.props: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -Debug 5 | 6 | 7 | 8 | Disabled 9 | Level3 10 | ProgramDatabase 11 | MultiThreadedDebug 12 | true 13 | OnlyExplicitInline 14 | 15 | 16 | 17 | 18 | $(CONF_SUFFIX) 19 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /_win-vs11/PackedArray.sln: -------------------------------------------------------------------------------- 1 |  2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio Express 2012 for Windows Desktop 4 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "PackedArraySelfTest", "PackedArraySelfTest.vcxproj", "{7BC8C873-A2C7-43A1-BD8A-2F1731F3CB81}" 5 | EndProject 6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "PackedArraySelfBench", "PackedArraySelfBench.vcxproj", "{686B991E-01AD-4433-897E-DFD5E751DAF5}" 7 | EndProject 8 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "PackedArraySIMDSelfBench", "PackedArraySIMDSelfBench.vcxproj", "{6082A77F-4ADC-4E49-89BA-BFE1C9E69C9D}" 9 | EndProject 10 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "PackedArraySIMDSelfTest", "PackedArraySIMDSelfTest.vcxproj", "{2F15B46B-D5F3-46B4-A5A2-ECD62B9FD848}" 11 | EndProject 12 | Global 13 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 14 | Debug|Win32 = Debug|Win32 15 | Debug|x64 = Debug|x64 16 | Release|Win32 = Release|Win32 17 | Release|x64 = Release|x64 18 | EndGlobalSection 19 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 20 | {7BC8C873-A2C7-43A1-BD8A-2F1731F3CB81}.Debug|Win32.ActiveCfg = Debug|Win32 21 | {7BC8C873-A2C7-43A1-BD8A-2F1731F3CB81}.Debug|Win32.Build.0 = Debug|Win32 22 | {7BC8C873-A2C7-43A1-BD8A-2F1731F3CB81}.Debug|x64.ActiveCfg = Debug|x64 23 | {7BC8C873-A2C7-43A1-BD8A-2F1731F3CB81}.Debug|x64.Build.0 = Debug|x64 24 | {7BC8C873-A2C7-43A1-BD8A-2F1731F3CB81}.Release|Win32.ActiveCfg = Release|Win32 25 | {7BC8C873-A2C7-43A1-BD8A-2F1731F3CB81}.Release|Win32.Build.0 = Release|Win32 26 | {7BC8C873-A2C7-43A1-BD8A-2F1731F3CB81}.Release|x64.ActiveCfg = Release|x64 27 | {7BC8C873-A2C7-43A1-BD8A-2F1731F3CB81}.Release|x64.Build.0 = Release|x64 28 | {686B991E-01AD-4433-897E-DFD5E751DAF5}.Debug|Win32.ActiveCfg = Debug|Win32 29 | {686B991E-01AD-4433-897E-DFD5E751DAF5}.Debug|Win32.Build.0 = Debug|Win32 30 | {686B991E-01AD-4433-897E-DFD5E751DAF5}.Debug|x64.ActiveCfg = Debug|x64 31 | {686B991E-01AD-4433-897E-DFD5E751DAF5}.Debug|x64.Build.0 = Debug|x64 32 | {686B991E-01AD-4433-897E-DFD5E751DAF5}.Release|Win32.ActiveCfg = Release|Win32 33 | {686B991E-01AD-4433-897E-DFD5E751DAF5}.Release|Win32.Build.0 = Release|Win32 34 | {686B991E-01AD-4433-897E-DFD5E751DAF5}.Release|x64.ActiveCfg = Release|x64 35 | {686B991E-01AD-4433-897E-DFD5E751DAF5}.Release|x64.Build.0 = Release|x64 36 | {6082A77F-4ADC-4E49-89BA-BFE1C9E69C9D}.Debug|Win32.ActiveCfg = Debug|Win32 37 | {6082A77F-4ADC-4E49-89BA-BFE1C9E69C9D}.Debug|Win32.Build.0 = Debug|Win32 38 | {6082A77F-4ADC-4E49-89BA-BFE1C9E69C9D}.Debug|x64.ActiveCfg = Debug|x64 39 | {6082A77F-4ADC-4E49-89BA-BFE1C9E69C9D}.Debug|x64.Build.0 = Debug|x64 40 | {6082A77F-4ADC-4E49-89BA-BFE1C9E69C9D}.Release|Win32.ActiveCfg = Release|Win32 41 | {6082A77F-4ADC-4E49-89BA-BFE1C9E69C9D}.Release|Win32.Build.0 = Release|Win32 42 | {6082A77F-4ADC-4E49-89BA-BFE1C9E69C9D}.Release|x64.ActiveCfg = Release|x64 43 | {6082A77F-4ADC-4E49-89BA-BFE1C9E69C9D}.Release|x64.Build.0 = Release|x64 44 | {2F15B46B-D5F3-46B4-A5A2-ECD62B9FD848}.Debug|Win32.ActiveCfg = Debug|Win32 45 | {2F15B46B-D5F3-46B4-A5A2-ECD62B9FD848}.Debug|Win32.Build.0 = Debug|Win32 46 | {2F15B46B-D5F3-46B4-A5A2-ECD62B9FD848}.Debug|x64.ActiveCfg = Debug|x64 47 | {2F15B46B-D5F3-46B4-A5A2-ECD62B9FD848}.Debug|x64.Build.0 = Debug|x64 48 | {2F15B46B-D5F3-46B4-A5A2-ECD62B9FD848}.Release|Win32.ActiveCfg = Release|Win32 49 | {2F15B46B-D5F3-46B4-A5A2-ECD62B9FD848}.Release|Win32.Build.0 = Release|Win32 50 | {2F15B46B-D5F3-46B4-A5A2-ECD62B9FD848}.Release|x64.ActiveCfg = Release|x64 51 | {2F15B46B-D5F3-46B4-A5A2-ECD62B9FD848}.Release|x64.Build.0 = Release|x64 52 | EndGlobalSection 53 | GlobalSection(SolutionProperties) = preSolution 54 | HideSolutionNode = FALSE 55 | EndGlobalSection 56 | EndGlobal 57 | -------------------------------------------------------------------------------- /_win-vs11/PackedArraySIMDSelfBench.vcxproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | Debug 6 | Win32 7 | 8 | 9 | Debug 10 | x64 11 | 12 | 13 | Release 14 | Win32 15 | 16 | 17 | Release 18 | x64 19 | 20 | 21 | 22 | {6082A77F-4ADC-4E49-89BA-BFE1C9E69C9D} 23 | Win32Proj 24 | PackedArraySIMD 25 | 26 | 27 | 28 | Application 29 | true 30 | v110 31 | 32 | 33 | Application 34 | true 35 | v110 36 | 37 | 38 | Application 39 | false 40 | v110 41 | true 42 | 43 | 44 | Application 45 | false 46 | v110 47 | true 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | PACKEDARRAY_SELF_BENCH;NDEBUG;%(PreprocessorDefinitions) 84 | 85 | 86 | 87 | 88 | PACKEDARRAY_SELF_BENCH;NDEBUG;%(PreprocessorDefinitions) 89 | 90 | 91 | 92 | 93 | PACKEDARRAY_SELF_BENCH;NDEBUG%(PreprocessorDefinitions) 94 | 95 | 96 | 97 | 98 | PACKEDARRAY_SELF_BENCH;NDEBUG%(PreprocessorDefinitions) 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | -------------------------------------------------------------------------------- /_win-vs11/PackedArraySIMDSelfTest.vcxproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | Debug 6 | Win32 7 | 8 | 9 | Debug 10 | x64 11 | 12 | 13 | Release 14 | Win32 15 | 16 | 17 | Release 18 | x64 19 | 20 | 21 | 22 | {2F15B46B-D5F3-46B4-A5A2-ECD62B9FD848} 23 | Win32Proj 24 | PackedArraySIMD 25 | 26 | 27 | 28 | Application 29 | true 30 | v110 31 | 32 | 33 | Application 34 | true 35 | v110 36 | 37 | 38 | Application 39 | false 40 | v110 41 | true 42 | 43 | 44 | Application 45 | false 46 | v110 47 | true 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | PACKEDARRAY_SELF_TEST;%(PreprocessorDefinitions) 84 | 85 | 86 | 87 | 88 | PACKEDARRAY_SELF_TEST;%(PreprocessorDefinitions) 89 | 90 | 91 | 92 | 93 | PACKEDARRAY_SELF_TEST;%(PreprocessorDefinitions) 94 | 95 | 96 | 97 | 98 | PACKEDARRAY_SELF_TEST;%(PreprocessorDefinitions) 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | -------------------------------------------------------------------------------- /_win-vs11/PackedArraySelfBench.vcxproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | Debug 6 | Win32 7 | 8 | 9 | Debug 10 | x64 11 | 12 | 13 | Release 14 | Win32 15 | 16 | 17 | Release 18 | x64 19 | 20 | 21 | 22 | {686B991E-01AD-4433-897E-DFD5E751DAF5} 23 | Win32Proj 24 | PackedArray 25 | 26 | 27 | 28 | Application 29 | true 30 | v110 31 | 32 | 33 | Application 34 | true 35 | v110 36 | 37 | 38 | Application 39 | false 40 | v110 41 | true 42 | 43 | 44 | Application 45 | false 46 | v110 47 | true 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | PACKEDARRAY_SELF_BENCH;NDEBUG;%(PreprocessorDefinitions) 84 | 85 | 86 | 87 | 88 | PACKEDARRAY_SELF_BENCH;NDEBUG;%(PreprocessorDefinitions) 89 | 90 | 91 | 92 | 93 | PACKEDARRAY_SELF_BENCH;NDEBUG%(PreprocessorDefinitions) 94 | 95 | 96 | 97 | 98 | PACKEDARRAY_SELF_BENCH;NDEBUG%(PreprocessorDefinitions) 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | -------------------------------------------------------------------------------- /_win-vs11/PackedArraySelfTest.vcxproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | Debug 6 | Win32 7 | 8 | 9 | Debug 10 | x64 11 | 12 | 13 | Release 14 | Win32 15 | 16 | 17 | Release 18 | x64 19 | 20 | 21 | 22 | {7BC8C873-A2C7-43A1-BD8A-2F1731F3CB81} 23 | Win32Proj 24 | PackedArray 25 | 26 | 27 | 28 | Application 29 | true 30 | v110 31 | 32 | 33 | Application 34 | true 35 | v110 36 | 37 | 38 | Application 39 | false 40 | v110 41 | true 42 | 43 | 44 | Application 45 | false 46 | v110 47 | true 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | PACKEDARRAY_SELF_TEST;%(PreprocessorDefinitions) 84 | 85 | 86 | 87 | 88 | PACKEDARRAY_SELF_TEST;%(PreprocessorDefinitions) 89 | 90 | 91 | 92 | 93 | PACKEDARRAY_SELF_TEST;%(PreprocessorDefinitions) 94 | 95 | 96 | 97 | 98 | PACKEDARRAY_SELF_TEST;%(PreprocessorDefinitions) 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | -------------------------------------------------------------------------------- /_win-vs11/Release.props: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | false 8 | 9 | 10 | 11 | MaxSpeed 12 | AnySuitable 13 | true 14 | Speed 15 | true 16 | true 17 | true 18 | NDEBUG;%(PreprocessorDefinitions) 19 | true 20 | Level3 21 | ProgramDatabase 22 | MultiThreaded 23 | false 24 | /d2Zi+ %(AdditionalOptions) 25 | 26 | 27 | UseLinkTimeCodeGeneration 28 | true 29 | 30 | 31 | true 32 | 33 | 34 | 35 | 36 | $(CONF_SUFFIX) 37 | 38 | 39 | 40 | -------------------------------------------------------------------------------- /_win-vs11/x64.props: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | x64 5 | 6 | 7 | 8 | MachineX64 9 | 10 | 11 | 12 | 13 | $(ARCH) 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /_win-vs11/x86.props: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | x86 5 | 6 | 7 | 8 | MachineX86 9 | 10 | 11 | StreamingSIMDExtensions2 12 | 13 | 14 | 15 | 16 | $(ARCH) 17 | 18 | 19 | --------------------------------------------------------------------------------