├── LICENSE ├── README.md ├── images ├── graph1.png ├── graph2.png ├── graph3.png ├── graph4.png ├── radix1.png └── radix2.png └── src ├── bench.c ├── blitsort.c ├── blitsort.h ├── crumsort.c ├── crumsort.h ├── extra_tests.c ├── fluxsort.c ├── fluxsort.h ├── gridsort.c ├── gridsort.h ├── quadsort.c ├── quadsort.h ├── skipsort.c ├── skipsort.h ├── wolfsort.c └── wolfsort.h /LICENSE: -------------------------------------------------------------------------------- 1 | This is free and unencumbered software released into the public domain. 2 | 3 | Anyone is free to copy, modify, publish, use, compile, sell, or 4 | distribute this software, either in source code form or as a compiled 5 | binary, for any purpose, commercial or non-commercial, and by any 6 | means. 7 | 8 | In jurisdictions that recognize copyright laws, the author or authors 9 | of this software dedicate any and all copyright interest in the 10 | software to the public domain. We make this dedication for the benefit 11 | of the public at large and to the detriment of our heirs and 12 | successors. We intend this dedication to be an overt act of 13 | relinquishment in perpetuity of all present and future rights to this 14 | software under copyright law. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | For more information, please refer to 25 | -------------------------------------------------------------------------------- /images/graph1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scandum/wolfsort/56ad38959aeeae01c54dcb668363132f51a75e47/images/graph1.png -------------------------------------------------------------------------------- /images/graph2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scandum/wolfsort/56ad38959aeeae01c54dcb668363132f51a75e47/images/graph2.png -------------------------------------------------------------------------------- /images/graph3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scandum/wolfsort/56ad38959aeeae01c54dcb668363132f51a75e47/images/graph3.png -------------------------------------------------------------------------------- /images/graph4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scandum/wolfsort/56ad38959aeeae01c54dcb668363132f51a75e47/images/graph4.png -------------------------------------------------------------------------------- /images/radix1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scandum/wolfsort/56ad38959aeeae01c54dcb668363132f51a75e47/images/radix1.png -------------------------------------------------------------------------------- /images/radix2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scandum/wolfsort/56ad38959aeeae01c54dcb668363132f51a75e47/images/radix2.png -------------------------------------------------------------------------------- /src/bench.c: -------------------------------------------------------------------------------- 1 | /* 2 | To compile use either: 3 | 4 | gcc -O3 bench.c 5 | 6 | or 7 | 8 | clang -O3 bench.c 9 | 10 | or 11 | 12 | g++ -O3 bench.c 13 | */ 14 | 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | 23 | #define cmp(a,b) (*(a) > *(b)) // uncomment for faster primitive comparisons 24 | 25 | const char *sorts[] = { "*", "quadsort", "gridsort", "blitsort", "fluxsort", "skipsort", "crumsort", "wolfsort", "sort::std" }; 26 | 27 | //#define SKIP_STRINGS 28 | //#define SKIP_DOUBLES 29 | //#define SKIP_LONGS 30 | 31 | #if __has_include("blitsort.h") 32 | #include "blitsort.h" // curl "https://raw.githubusercontent.com/scandum/blitsort/master/src/blitsort.{c,h}" -o "blitsort.#1" 33 | #endif 34 | #if __has_include("crumsort.h") 35 | #include "crumsort.h" // curl "https://raw.githubusercontent.com/scandum/crumsort/master/src/crumsort.{c,h}" -o "crumsort.#1" 36 | #endif 37 | #if __has_include("dripsort.h") 38 | #include "dripsort.h" 39 | #endif 40 | #if __has_include("flowsort.h") 41 | #include "flowsort.h" 42 | #endif 43 | #if __has_include("fluxsort.h") 44 | #include "fluxsort.h" // curl "https://raw.githubusercontent.com/scandum/fluxsort/master/src/fluxsort.{c,h}" -o "fluxsort.#1" 45 | #endif 46 | #if __has_include("gridsort.h") 47 | #include "gridsort.h" // curl "https://raw.githubusercontent.com/scandum/gridsort/master/src/gridsort.{c,h}" -o "gridsort.#1" 48 | #endif 49 | #if __has_include("octosort.h") 50 | #include "octosort.h" // curl "https://raw.githubusercontent.com/scandum/octosort/master/src/octosort.{c,h}" -o "octosort.#1" 51 | #endif 52 | #if __has_include("piposort.h") 53 | #include "piposort.h" // curl "https://raw.githubusercontent.com/scandum/piposort/master/src/piposort.{c,h}" -o "piposort.#1" 54 | #endif 55 | #if __has_include("quadsort.h") 56 | #include "quadsort.h" // curl "https://raw.githubusercontent.com/scandum/quadsort/master/src/quadsort.{c,h}" -o "quadsort.#1" 57 | #endif 58 | #if __has_include("skipsort.h") 59 | #include "skipsort.h" 60 | #endif 61 | #if __has_include("wolfsort.h") 62 | #include "wolfsort.h" // curl "https://raw.githubusercontent.com/scandum/wolfsort/master/src/wolfsort.{c,h}" -o "wolfsort.#1" 63 | #endif 64 | 65 | #if __has_include("rhsort.c") 66 | #define RHSORT_C 67 | #include "rhsort.c" // curl https://raw.githubusercontent.com/mlochbaum/rhsort/master/rhsort.c > rhsort.c 68 | #endif 69 | 70 | #ifdef __GNUG__ 71 | #include 72 | #if __has_include("pdqsort.h") 73 | #include "pdqsort.h" // curl https://raw.githubusercontent.com/orlp/pdqsort/master/pdqsort.h > pdqsort.h 74 | #endif 75 | #if __has_include("ska_sort.hpp") 76 | #define SKASORT_HPP 77 | #include "ska_sort.hpp" // curl https://raw.githubusercontent.com/skarupke/ska_sort/master/ska_sort.hpp > ska_sort.hpp 78 | #endif 79 | #if __has_include("timsort.hpp") 80 | #include "timsort.hpp" // curl https://raw.githubusercontent.com/timsort/cpp-TimSort/master/include/gfx/timsort.hpp > timsort.hpp 81 | #endif 82 | #endif 83 | 84 | #if __has_include("antiqsort.c") 85 | #include "antiqsort.c" 86 | #endif 87 | 88 | //typedef int CMPFUNC (const void *a, const void *b); 89 | 90 | typedef void SRTFUNC(void *array, size_t nmemb, size_t size, CMPFUNC *cmpf); 91 | 92 | 93 | // Comment out Remove __attribute__ ((noinline)) and comparisons++ for full 94 | // throttle. Like so: #define COMPARISON_PP //comparisons++ 95 | 96 | size_t comparisons; 97 | 98 | #define COMPARISON_PP comparisons++ 99 | 100 | #define NO_INLINE __attribute__ ((noinline)) 101 | 102 | // primitive type comparison functions 103 | 104 | NO_INLINE int cmp_int(const void * a, const void * b) 105 | { 106 | COMPARISON_PP; 107 | 108 | return *(int *) a - *(int *) b; 109 | 110 | // const int l = *(const int *)a; 111 | // const int r = *(const int *)b; 112 | 113 | // return l - r; 114 | // return l > r; 115 | // return (l > r) - (l < r); 116 | } 117 | 118 | NO_INLINE int cmp_rev(const void * a, const void * b) 119 | { 120 | int fa = *(int *)a; 121 | int fb = *(int *)b; 122 | 123 | COMPARISON_PP; 124 | 125 | return fb - fa; 126 | } 127 | 128 | NO_INLINE int cmp_stable(const void * a, const void * b) 129 | { 130 | int fa = *(int *)a; 131 | int fb = *(int *)b; 132 | 133 | COMPARISON_PP; 134 | 135 | return fa / 100000 - fb / 100000; 136 | } 137 | 138 | NO_INLINE int cmp_long(const void * a, const void * b) 139 | { 140 | const long long fa = *(const long long *) a; 141 | const long long fb = *(const long long *) b; 142 | 143 | COMPARISON_PP; 144 | 145 | return (fa > fb) - (fa < fb); 146 | // return (fa > fb); 147 | } 148 | 149 | NO_INLINE int cmp_float(const void * a, const void * b) 150 | { 151 | return *(float *) a - *(float *) b; 152 | } 153 | 154 | NO_INLINE int cmp_long_double(const void * a, const void * b) 155 | { 156 | const long double fa = *(const long double *) a; 157 | const long double fb = *(const long double *) b; 158 | 159 | COMPARISON_PP; 160 | 161 | return (fa > fb) - (fa < fb); 162 | 163 | /* if (isnan(fa) || isnan(fb)) 164 | { 165 | return isnan(fa) - isnan(fb); 166 | } 167 | 168 | return (fa > fb); 169 | */ 170 | } 171 | 172 | // pointer comparison functions 173 | 174 | NO_INLINE int cmp_str(const void * a, const void * b) 175 | { 176 | COMPARISON_PP; 177 | 178 | return strcmp(*(const char **) a, *(const char **) b); 179 | } 180 | 181 | NO_INLINE int cmp_int_ptr(const void * a, const void * b) 182 | { 183 | const int *fa = *(const int **) a; 184 | const int *fb = *(const int **) b; 185 | 186 | COMPARISON_PP; 187 | 188 | return (*fa > *fb) - (*fa < *fb); 189 | } 190 | 191 | NO_INLINE int cmp_long_ptr(const void * a, const void * b) 192 | { 193 | const long long *fa = *(const long long **) a; 194 | const long long *fb = *(const long long **) b; 195 | 196 | COMPARISON_PP; 197 | 198 | return (*fa > *fb) - (*fa < *fb); 199 | } 200 | 201 | NO_INLINE int cmp_long_double_ptr(const void * a, const void * b) 202 | { 203 | const long double *fa = *(const long double **) a; 204 | const long double *fb = *(const long double **) b; 205 | 206 | COMPARISON_PP; 207 | 208 | return (*fa > *fb) - (*fa < *fb); 209 | } 210 | 211 | // c++ comparison functions 212 | 213 | #ifdef __GNUG__ 214 | 215 | NO_INLINE bool cpp_cmp_int(const int &a, const int &b) 216 | { 217 | COMPARISON_PP; 218 | 219 | return a < b; 220 | } 221 | 222 | NO_INLINE bool cpp_cmp_str(char const* const a, char const* const b) 223 | { 224 | COMPARISON_PP; 225 | 226 | return strcmp(a, b) < 0; 227 | } 228 | 229 | #endif 230 | 231 | long long utime() 232 | { 233 | struct timeval now_time; 234 | 235 | gettimeofday(&now_time, NULL); 236 | 237 | return now_time.tv_sec * 1000000LL + now_time.tv_usec; 238 | } 239 | 240 | void seed_rand(unsigned long long seed) 241 | { 242 | srand(seed); 243 | } 244 | 245 | void test_sort(void *array, void *unsorted, void *valid, int minimum, int maximum, int samples, int repetitions, SRTFUNC *srt, const char *name, const char *desc, size_t size, CMPFUNC *cmpf) 246 | { 247 | long long start, end, total, best, average_time, average_comp; 248 | char temp[100]; 249 | static char compare = 0; 250 | long long *ptla = (long long *) array, *ptlv = (long long *) valid; 251 | long double *ptda = (long double *) array, *ptdv = (long double *) valid; 252 | int *pta = (int *) array, *ptv = (int *) valid, rep, sam, max, cnt, name32; 253 | 254 | #ifdef SKASORT_HPP 255 | void *swap; 256 | #endif 257 | 258 | if (*name == '*') 259 | { 260 | if (!strcmp(desc, "random order") || !strcmp(desc, "random 1-4") || !strcmp(desc, "random 4") || !strcmp(desc, "random string") || !strcmp(desc, "random 10")) 261 | { 262 | if (comparisons) 263 | { 264 | compare = 1; 265 | printf("%s\n", "| Name | Items | Type | Best | Average | Compares | Samples | Distribution |"); 266 | printf("%s\n", "| --------- | -------- | ---- | -------- | -------- | --------- | ------- | ---------------- |"); 267 | } 268 | else 269 | { 270 | printf("%s\n", "| Name | Items | Type | Best | Average | Loops | Samples | Distribution |"); 271 | printf("%s\n", "| --------- | -------- | ---- | -------- | -------- | --------- | ------- | ---------------- |"); 272 | } 273 | } 274 | else 275 | { 276 | printf("%s\n", "| | | | | | | | |"); 277 | } 278 | return; 279 | } 280 | 281 | name32 = name[0] + (name[1] ? name[1] * 32 : 0) + (name[2] ? name[2] * 1024 : 0); 282 | 283 | best = average_time = average_comp = 0; 284 | 285 | if (minimum == 7 && maximum == 7) 286 | { 287 | pta = (int *) unsorted; 288 | printf("\e[1;32m%10d %10d %10d %10d %10d %10d %10d\e[0m\n", pta[0], pta[1], pta[2], pta[3], pta[4], pta[5], pta[6]); 289 | pta = (int *) array; 290 | } 291 | 292 | for (sam = 0 ; sam < samples ; sam++) 293 | { 294 | total = average_comp = 0; 295 | max = minimum; 296 | 297 | start = utime(); 298 | 299 | for (rep = repetitions - 1 ; rep >= 0 ; rep--) 300 | { 301 | memcpy(array, (char *) unsorted + maximum * rep * size, max * size); 302 | 303 | comparisons = 0; 304 | 305 | // edit char *sorts to add / remove sorts 306 | 307 | switch (name32) 308 | { 309 | #ifdef BLITSORT_H 310 | case 'b' + 'l' * 32 + 'i' * 1024: blitsort(array, max, size, cmpf); break; 311 | #endif 312 | #ifdef CRUMSORT_H 313 | case 'c' + 'r' * 32 + 'u' * 1024: crumsort(array, max, size, cmpf); break; 314 | #endif 315 | #ifdef DRIPSORT_H 316 | case 'd' + 'r' * 32 + 'i' * 1024: dripsort(array, max, size, cmpf); break; 317 | #endif 318 | #ifdef FLOWSORT_H 319 | case 'f' + 'l' * 32 + 'o' * 1024: flowsort(array, max, size, cmpf); break; 320 | #endif 321 | #ifdef FLUXSORT_H 322 | case 'f' + 'l' * 32 + 'u' * 1024: fluxsort(array, max, size, cmpf); break; 323 | case 's' + '_' * 32 + 'f' * 1024: fluxsort_size(array, max, size, cmpf); break; 324 | 325 | #endif 326 | #ifdef GRIDSORT_H 327 | case 'g' + 'r' * 32 + 'i' * 1024: gridsort(array, max, size, cmpf); break; 328 | #endif 329 | #ifdef OCTOSORT_H 330 | case 'o' + 'c' * 32 + 't' * 1024: octosort(array, max, size, cmpf); break; 331 | #endif 332 | #ifdef PIPOSORT_H 333 | case 'p' + 'i' * 32 + 'p' * 1024: piposort(array, max, size, cmpf); break; 334 | #endif 335 | #ifdef QUADSORT_H 336 | case 'q' + 'u' * 32 + 'a' * 1024: quadsort(array, max, size, cmpf); break; 337 | case 's' + '_' * 32 + 'q' * 1024: quadsort_size(array, max, size, cmpf); break; 338 | #endif 339 | #ifdef SKIPSORT_H 340 | case 's' + 'k' * 32 + 'i' * 1024: skipsort(array, max, size, cmpf); break; 341 | #endif 342 | #ifdef WOLFSORT_H 343 | case 'w' + 'o' * 32 + 'l' * 1024: wolfsort(array, max, size, cmpf); break; 344 | #endif 345 | case 'q' + 's' * 32 + 'o' * 1024: qsort(array, max, size, cmpf); break; 346 | 347 | #ifdef RHSORT_C 348 | case 'r' + 'h' * 32 + 's' * 1024: if (size == sizeof(int)) rhsort32(pta, max); else return; break; 349 | #endif 350 | 351 | #ifdef __GNUG__ 352 | case 's' + 'o' * 32 + 'r' * 1024: if (size == sizeof(int)) std::sort(pta, pta + max); else if (size == sizeof(long long)) std::sort(ptla, ptla + max); else std::sort(ptda, ptda + max); break; 353 | case 's' + 't' * 32 + 'a' * 1024: if (size == sizeof(int)) std::stable_sort(pta, pta + max); else if (size == sizeof(long long)) std::stable_sort(ptla, ptla + max); else std::stable_sort(ptda, ptda + max); break; 354 | 355 | #ifdef PDQSORT_H 356 | case 'p' + 'd' * 32 + 'q' * 1024: if (size == sizeof(int)) pdqsort(pta, pta + max); else if (size == sizeof(long long)) pdqsort(ptla, ptla + max); else pdqsort(ptda, ptda + max); break; 357 | #endif 358 | #ifdef SKASORT_HPP 359 | case 's' + 'k' * 32 + 'a' * 1024: swap = malloc(max * size); if (size == sizeof(int)) ska_sort_copy(pta, pta + max, (int *) swap); else if (size == sizeof(long long)) ska_sort_copy(ptla, ptla + max, (long long *) swap); else repetitions = 0; free(swap); break; 360 | #endif 361 | #ifdef GFX_TIMSORT_HPP 362 | case 't' + 'i' * 32 + 'm' * 1024: if (size == sizeof(int)) gfx::timsort(pta, pta + max, cpp_cmp_int); else if (size == sizeof(long long)) gfx::timsort(ptla, ptla + max); else gfx::timsort(ptda, ptda + max); break; 363 | #endif 364 | #endif 365 | default: 366 | switch (name32) 367 | { 368 | case 's' + 'o' * 32 + 'r' * 1024: 369 | case 's' + 't' * 32 + 'a' * 1024: 370 | case 'p' + 'd' * 32 + 'q' * 1024: 371 | case 'r' + 'h' * 32 + 's' * 1024: 372 | case 's' + 'k' * 32 + 'a' * 1024: 373 | case 't' + 'i' * 32 + 'm' * 1024: 374 | printf("unknown sort: %s (compile with g++ instead of gcc?)\n", name); 375 | return; 376 | default: 377 | printf("unknown sort: %s\n", name); 378 | return; 379 | } 380 | } 381 | average_comp += comparisons; 382 | 383 | if (minimum < maximum && ++max > maximum) 384 | { 385 | max = minimum; 386 | } 387 | } 388 | end = utime(); 389 | 390 | total = end - start; 391 | 392 | if (!best || total < best) 393 | { 394 | best = total; 395 | } 396 | average_time += total; 397 | } 398 | 399 | if (minimum == 7 && maximum == 7) 400 | { 401 | printf("\e[1;32m%10d %10d %10d %10d %10d %10d %10d\e[0m\n", pta[0], pta[1], pta[2], pta[3], pta[4], pta[5], pta[6]); 402 | } 403 | 404 | if (repetitions == 0) 405 | { 406 | return; 407 | } 408 | 409 | average_time /= samples; 410 | 411 | if (cmpf == cmp_stable) 412 | { 413 | for (cnt = 1 ; cnt < maximum ; cnt++) 414 | { 415 | if (pta[cnt - 1] > pta[cnt]) 416 | { 417 | sprintf(temp, "\e[1;31m%16s\e[0m", "unstable"); 418 | desc = temp; 419 | break; 420 | } 421 | } 422 | } 423 | 424 | if (compare) 425 | { 426 | if (repetitions <= 1) 427 | { 428 | printf("|%10s |%9d | %4d |%9f |%9f |%10d | %7d | %16s |\e[0m\n", name, maximum, (int) size * 8, best / 1000000.0, average_time / 1000000.0, (int) comparisons, samples, desc); 429 | } 430 | else 431 | { 432 | printf("|%10s |%9d | %4d |%9f |%9f |%10.1f | %7d | %16s |\e[0m\n", name, maximum, (int) size * 8, best / 1000000.0, average_time / 1000000.0, (float) average_comp / repetitions, samples, desc); 433 | } 434 | } 435 | else 436 | { 437 | printf("|%10s | %8d | %4d | %f | %f | %9d | %7d | %16s |\e[0m\n", name, maximum, (int) size * 8, best / 1000000.0, average_time / 1000000.0, repetitions, samples, desc); 438 | } 439 | 440 | if (minimum != maximum || cmpf == cmp_stable) 441 | { 442 | return; 443 | } 444 | 445 | for (cnt = 1 ; cnt < maximum ; cnt++) 446 | { 447 | if (cmpf == cmp_str) 448 | { 449 | char **ptsa = (char **) array; 450 | if (strcmp((char *) ptsa[cnt - 1], (char *) ptsa[cnt]) > 0) 451 | { 452 | printf("%17s: not properly sorted at index %d. (%s vs %s\n", name, cnt, (char *) ptsa[cnt - 1], (char *) ptsa[cnt]); 453 | break; 454 | } 455 | } 456 | else if (size == sizeof(int *) && cmpf == cmp_long_double_ptr) 457 | { 458 | long double **pptda = (long double **) array; 459 | 460 | if (cmp_long_double_ptr(&pptda[cnt - 1], &pptda[cnt]) > 0) 461 | { 462 | printf("%17s: not properly sorted at index %d. (%Lf vs %Lf\n", name, cnt, *pptda[cnt - 1], *pptda[cnt]); 463 | break; 464 | } 465 | } 466 | else if (cmpf == cmp_long_ptr) 467 | { 468 | long long **pptla = (long long **) array; 469 | 470 | if (cmp_long_ptr(&pptla[cnt - 1], &pptla[cnt]) > 0) 471 | { 472 | printf("%17s: not properly sorted at index %d. (%lld vs %lld\n", name, cnt, *pptla[cnt - 1], *pptla[cnt]); 473 | break; 474 | } 475 | } 476 | else if (cmpf == cmp_int_ptr) 477 | { 478 | int **pptia = (int **) array; 479 | 480 | if (cmp_int_ptr(&pptia[cnt - 1], &pptia[cnt]) > 0) 481 | { 482 | printf("%17s: not properly sorted at index %d. (%d vs %d\n", name, cnt, *pptia[cnt - 1], *pptia[cnt]); 483 | break; 484 | } 485 | } 486 | else if (size == sizeof(int)) 487 | { 488 | if (pta[cnt - 1] > pta[cnt]) 489 | { 490 | printf("%17s: not properly sorted at index %d. (%d vs %d\n", name, cnt, pta[cnt - 1], pta[cnt]); 491 | break; 492 | } 493 | if (pta[cnt - 1] == pta[cnt]) 494 | { 495 | // printf("%17s: Found a repeat value at index %d. (%d)\n", name, cnt, pta[cnt]); 496 | } 497 | } 498 | else if (size == sizeof(long long)) 499 | { 500 | if (ptla[cnt - 1] > ptla[cnt]) 501 | { 502 | printf("%17s: not properly sorted at index %d. (%lld vs %lld\n", name, cnt, ptla[cnt - 1], ptla[cnt]); 503 | break; 504 | } 505 | } 506 | else if (size == sizeof(long double)) 507 | { 508 | if (cmp_long_double(&ptda[cnt - 1], &ptda[cnt]) > 0) 509 | { 510 | printf("%17s: not properly sorted at index %d. (%Lf vs %Lf\n", name, cnt, ptda[cnt - 1], ptda[cnt]); 511 | break; 512 | } 513 | } 514 | } 515 | 516 | for (cnt = 1 ; cnt < maximum ; cnt++) 517 | { 518 | if (size == sizeof(int)) 519 | { 520 | if (pta[cnt] != ptv[cnt]) 521 | { 522 | printf(" validate: array[%d] != valid[%d]. (%d vs %d\n", cnt, cnt, pta[cnt], ptv[cnt]); 523 | break; 524 | } 525 | } 526 | else if (size == sizeof(long long)) 527 | { 528 | if (ptla[cnt] != ptlv[cnt]) 529 | { 530 | if (cmpf == cmp_str) 531 | { 532 | char **ptsa = (char **) array; 533 | char **ptsv = (char **) valid; 534 | 535 | printf(" validate: array[%d] != valid[%d]. (%s vs %s) %s\n", cnt, cnt, (char *) ptsa[cnt], (char *) ptsv[cnt], !strcmp((char *) ptsa[cnt], (char *) ptsv[cnt]) ? "\e[1;31munstable\e[0m" : ""); 536 | break; 537 | } 538 | if (cmpf == cmp_long_ptr) 539 | { 540 | long long **ptla = (long long **) array; 541 | long long **ptlv = (long long **) valid; 542 | 543 | printf(" validate: array[%d] != valid[%d]. (%lld vs %lld) %s\n", cnt, cnt, *ptla[cnt], *ptlv[cnt], (*ptla[cnt] == *ptlv[cnt]) ? "\e[1;31munstable\e[0m" : ""); 544 | break; 545 | } 546 | if (cmpf == cmp_int_ptr) 547 | { 548 | int **ptia = (int **) array; 549 | int **ptiv = (int **) valid; 550 | 551 | printf(" validate: array[%d] != valid[%d]. (%d vs %d) %s\n", cnt, cnt, *ptia[cnt], *ptiv[cnt], (*ptia[cnt] == *ptiv[cnt]) ? "\e[1;31munstable\e[0m" : ""); 552 | break; 553 | } 554 | 555 | printf(" validate: array[%d] != valid[%d]. (%lld vs %lld\n", cnt, cnt, ptla[cnt], ptlv[cnt]); 556 | break; 557 | } 558 | } 559 | else if (size == sizeof(long double)) 560 | { 561 | if (ptda[cnt] != ptdv[cnt]) 562 | { 563 | printf(" validate: array[%d] != valid[%d]. (%Lf vs %Lf\n", cnt, cnt, ptda[cnt], ptdv[cnt]); 564 | break; 565 | } 566 | } 567 | } 568 | } 569 | 570 | void validate() 571 | { 572 | int seed = time(NULL); 573 | int cnt, val, max = 1000; 574 | 575 | int *a_array, *r_array, *v_array; 576 | 577 | seed_rand(seed); 578 | 579 | a_array = (int *) malloc(max * sizeof(int)); 580 | r_array = (int *) malloc(max * sizeof(int)); 581 | v_array = (int *) malloc(max * sizeof(int)); 582 | 583 | for (cnt = 0 ; cnt < max ; cnt++) r_array[cnt] = rand(); 584 | 585 | for (cnt = 0 ; cnt < max ; cnt++) 586 | { 587 | memcpy(a_array, r_array, cnt * sizeof(int)); 588 | memcpy(v_array, r_array, cnt * sizeof(int)); 589 | 590 | quadsort_prim(a_array, cnt, sizeof(int)); 591 | qsort(v_array, cnt, sizeof(int), cmp_int); 592 | 593 | for (val = 0 ; val < cnt ; val++) 594 | { 595 | if (val && v_array[val - 1] > v_array[val]) {printf("\e[1;31mvalidate rand: seed %d: size: %d Not properly sorted at index %d.\n", seed, cnt, val); return;} 596 | if (a_array[val] != v_array[val]) {printf("\e[1;31mvalidate rand: seed %d: size: %d Not verified at index %d.\n", seed, cnt, val); return;} 597 | } 598 | } 599 | 600 | // ascending saw 601 | 602 | for (cnt = 0 ; cnt < max ; cnt++) r_array[cnt] = cnt % (max / 5); 603 | 604 | for (cnt = 0 ; cnt < max ; cnt += 7) 605 | { 606 | memcpy(a_array, r_array, cnt * sizeof(int)); 607 | memcpy(v_array, r_array, cnt * sizeof(int)); 608 | 609 | quadsort(a_array, cnt, sizeof(int), cmp_int); 610 | qsort(v_array, cnt, sizeof(int), cmp_int); 611 | 612 | for (val = 0 ; val < cnt ; val++) 613 | { 614 | if (val && v_array[val - 1] > v_array[val]) {printf("\e[1;31mvalidate ascending saw: seed %d: size: %d Not properly sorted at index %d.\n", seed, cnt, val); return;} 615 | if (a_array[val] != v_array[val]) {printf("\e[1;31mvalidate ascending saw: seed %d: size: %d Not verified at index %d.\n", seed, cnt, val); return;} 616 | } 617 | } 618 | 619 | // descending saw 620 | 621 | for (cnt = 0 ; cnt < max ; cnt++) 622 | { 623 | r_array[cnt] = (max - cnt + 1) % (max / 11); 624 | } 625 | 626 | for (cnt = 1 ; cnt < max ; cnt += 7) 627 | { 628 | memcpy(a_array, r_array, cnt * sizeof(int)); 629 | memcpy(v_array, r_array, cnt * sizeof(int)); 630 | 631 | quadsort(a_array, cnt, sizeof(int), cmp_int); 632 | qsort(v_array, cnt, sizeof(int), cmp_int); 633 | 634 | for (val = 0 ; val < cnt ; val++) 635 | { 636 | if (val && v_array[val - 1] > v_array[val]) {printf("\e[1;31mvalidate descending saw: seed %d: size: %d Not properly sorted at index %d.\n\n", seed, cnt, val); return;} 637 | if (a_array[val] != v_array[val]) {printf("\e[1;31mvalidate descending saw: seed %d: size: %d Not verified at index %d.\n\n", seed, cnt, val); return;} 638 | } 639 | } 640 | 641 | // random half 642 | 643 | for (cnt = 0 ; cnt < max ; cnt++) r_array[cnt] = (cnt < max / 2) ? cnt : rand(); 644 | 645 | for (cnt = 1 ; cnt < max ; cnt += 7) 646 | { 647 | memcpy(a_array, r_array, cnt * sizeof(int)); 648 | memcpy(v_array, r_array, cnt * sizeof(int)); 649 | 650 | quadsort(a_array, cnt, sizeof(int), cmp_int); 651 | qsort(v_array, cnt, sizeof(int), cmp_int); 652 | 653 | for (val = 0 ; val < cnt ; val++) 654 | { 655 | if (val && v_array[val - 1] > v_array[val]) {printf("\e[1;31mvalidate rand tail: seed %d: size: %d Not properly sorted at index %d.\n", seed, cnt, val); return;} 656 | if (a_array[val] != v_array[val]) {printf("\e[1;31mvalidate rand tail: seed %d: size: %d Not verified at index %d.\n", seed, cnt, val); return;} 657 | } 658 | } 659 | free(a_array); 660 | free(r_array); 661 | free(v_array); 662 | } 663 | 664 | unsigned int bit_reverse(unsigned int x) 665 | { 666 | x = (((x & 0xaaaaaaaa) >> 1) | ((x & 0x55555555) << 1)); 667 | x = (((x & 0xcccccccc) >> 2) | ((x & 0x33333333) << 2)); 668 | x = (((x & 0xf0f0f0f0) >> 4) | ((x & 0x0f0f0f0f) << 4)); 669 | x = (((x & 0xff00ff00) >> 8) | ((x & 0x00ff00ff) << 8)); 670 | 671 | return((x >> 16) | (x << 15)); 672 | } 673 | 674 | void run_test(void *a_array, void *r_array, void *v_array, int minimum, int maximum, int samples, int repetitions, int copies, const char *desc, size_t size, CMPFUNC *cmpf) 675 | { 676 | int cnt, rep; 677 | 678 | memcpy(v_array, r_array, maximum * size); 679 | 680 | for (rep = 0 ; rep < copies ; rep++) 681 | { 682 | memcpy((char *) r_array + rep * maximum * size, v_array, maximum * size); 683 | } 684 | quadsort(v_array, maximum, size, cmpf); 685 | 686 | for (cnt = 0 ; (size_t) cnt < sizeof(sorts) / sizeof(char *) ; cnt++) 687 | { 688 | test_sort(a_array, r_array, v_array, minimum, maximum, samples, repetitions, qsort, sorts[cnt], desc, size, cmpf); 689 | } 690 | } 691 | 692 | void range_test(int max, int samples, int repetitions, int seed) 693 | { 694 | int cnt, last; 695 | int mem = max * 10 > 32768 * 64 ? max * 10 : 32768 * 64; 696 | char dist[40]; 697 | 698 | int *a_array = (int *) malloc(max * sizeof(int)); 699 | int *r_array = (int *) malloc(mem * sizeof(int)); 700 | int *v_array = (int *) malloc(max * sizeof(int)); 701 | 702 | srand(seed); 703 | 704 | for (cnt = 0 ; cnt < mem ; cnt++) 705 | { 706 | r_array[cnt] = rand(); 707 | } 708 | 709 | if (max <= 4096) 710 | { 711 | for (last = 1, samples = 32768*4, repetitions = 4 ; repetitions <= max ; repetitions *= 2, samples /= 2) 712 | { 713 | if (max >= repetitions) 714 | { 715 | sprintf(dist, "random %d-%d", last, repetitions); 716 | 717 | memcpy(v_array, r_array, repetitions * sizeof(int)); 718 | quadsort(v_array, repetitions, sizeof(int), cmp_int); 719 | 720 | for (cnt = 0 ; (size_t) cnt < sizeof(sorts) / sizeof(char *) ; cnt++) 721 | { 722 | test_sort(a_array, r_array, v_array, last, repetitions, 50, samples, qsort, sorts[cnt], dist, sizeof(int), cmp_int); 723 | } 724 | last = repetitions + 1; 725 | } 726 | } 727 | free(a_array); 728 | free(r_array); 729 | free(v_array); 730 | return; 731 | } 732 | 733 | if (max == 10000000) 734 | { 735 | repetitions = 10000000; 736 | 737 | for (max = 10 ; max <= 10000000 ; max *= 10) 738 | { 739 | repetitions /= 10; 740 | 741 | memcpy(v_array, r_array, max * sizeof(int)); 742 | quadsort_prim(v_array, max, sizeof(int)); 743 | 744 | sprintf(dist, "random %d", max); 745 | 746 | for (cnt = 0 ; (size_t) cnt < sizeof(sorts) / sizeof(char *) ; cnt++) 747 | { 748 | test_sort(a_array, r_array, v_array, max, max, 10, repetitions, qsort, sorts[cnt], dist, sizeof(int), cmp_int); 749 | } 750 | } 751 | } 752 | else 753 | { 754 | for (samples = 32768*4, repetitions = 4 ; samples > 0 ; repetitions *= 2, samples /= 2) 755 | { 756 | if (max >= repetitions) 757 | { 758 | memcpy(v_array, r_array, repetitions * sizeof(int)); 759 | quadsort(v_array, repetitions, sizeof(int), cmp_int); 760 | 761 | sprintf(dist, "random %d", repetitions); 762 | 763 | for (cnt = 0 ; (size_t) cnt < sizeof(sorts) / sizeof(char *) ; cnt++) 764 | { 765 | test_sort(a_array, r_array, v_array, repetitions, repetitions, 100, samples, qsort, sorts[cnt], dist, sizeof(int), cmp_int); 766 | } 767 | } 768 | } 769 | } 770 | free(a_array); 771 | free(r_array); 772 | free(v_array); 773 | return; 774 | } 775 | 776 | #define VAR int 777 | 778 | int main(int argc, char **argv) 779 | { 780 | int max = 100000; 781 | int samples = 10; 782 | int repetitions = 1; 783 | int seed = 0; 784 | int cnt, mem; 785 | VAR *a_array, *r_array, *v_array, sum; 786 | 787 | if (argc >= 1 && argv[1] && *argv[1]) 788 | { 789 | max = atoi(argv[1]); 790 | } 791 | 792 | if (argc >= 2 && argv[2] && *argv[2]) 793 | { 794 | samples = atoi(argv[2]); 795 | } 796 | 797 | if (argc >= 3 && argv[3] && *argv[3]) 798 | { 799 | repetitions = atoi(argv[3]); 800 | } 801 | 802 | if (argc >= 4 && argv[4] && *argv[4]) 803 | { 804 | seed = atoi(argv[4]); 805 | } 806 | 807 | validate(); 808 | 809 | seed = seed ? seed : time(NULL); 810 | 811 | printf("Info: int = %lu, long long = %lu, long double = %lu\n\n", sizeof(int) * 8, sizeof(long long) * 8, sizeof(long double) * 8); 812 | 813 | printf("Benchmark: array size: %d, samples: %d, repetitions: %d, seed: %d\n\n", max, samples, repetitions, seed); 814 | 815 | if (repetitions == 0) 816 | { 817 | range_test(max, samples, repetitions, seed); 818 | return 0; 819 | } 820 | 821 | mem = max * repetitions; 822 | 823 | #ifndef SKIP_STRINGS 824 | #ifndef cmp 825 | 826 | // C string 827 | 828 | { 829 | char **sa_array = (char **) malloc(max * sizeof(char **)); 830 | char **sr_array = (char **) malloc(mem * sizeof(char **)); 831 | char **sv_array = (char **) malloc(max * sizeof(char **)); 832 | 833 | char *buffer = (char *) malloc(mem * 16); 834 | 835 | seed_rand(seed); 836 | 837 | for (cnt = 0 ; cnt < mem ; cnt++) 838 | { 839 | sprintf(buffer + cnt * 16, "%X", rand() % 1000000); 840 | 841 | sr_array[cnt] = buffer + cnt * 16; 842 | } 843 | run_test(sa_array, sr_array, sv_array, max, max, samples, repetitions, 0, "random string", sizeof(char **), cmp_str); 844 | 845 | free(sa_array); 846 | free(sr_array); 847 | free(sv_array); 848 | 849 | free(buffer); 850 | } 851 | 852 | // long double table 853 | 854 | { 855 | long double **da_array = (long double **) malloc(max * sizeof(long double *)); 856 | long double **dr_array = (long double **) malloc(mem * sizeof(long double *)); 857 | long double **dv_array = (long double **) malloc(max * sizeof(long double *)); 858 | 859 | long double *buffer = (long double *) malloc(mem * sizeof(long double)); 860 | 861 | if (da_array == NULL || dr_array == NULL || dv_array == NULL) 862 | { 863 | printf("main(%d,%d,%d): malloc: %s\n", max, samples, repetitions, strerror(errno)); 864 | 865 | return 0; 866 | } 867 | 868 | seed_rand(seed); 869 | 870 | for (cnt = 0 ; cnt < mem ; cnt++) 871 | { 872 | buffer[cnt] = (long double) rand(); 873 | buffer[cnt] += (long double) ((unsigned long long) rand() << 32ULL); 874 | 875 | dr_array[cnt] = buffer + cnt; 876 | } 877 | run_test(da_array, dr_array, dv_array, max, max, samples, repetitions, 0, "random double", sizeof(long double *), cmp_long_double_ptr); 878 | 879 | free(da_array); 880 | free(dr_array); 881 | free(dv_array); 882 | 883 | free(buffer); 884 | } 885 | 886 | // long long table 887 | 888 | { 889 | long long **la_array = (long long **) malloc(max * sizeof(long long *)); 890 | long long **lr_array = (long long **) malloc(mem * sizeof(long long *)); 891 | long long **lv_array = (long long **) malloc(max * sizeof(long long *)); 892 | 893 | long long *buffer = (long long *) malloc(mem * sizeof(long long)); 894 | 895 | if (la_array == NULL || lr_array == NULL || lv_array == NULL) 896 | { 897 | printf("main(%d,%d,%d): malloc: %s\n", max, samples, repetitions, strerror(errno)); 898 | 899 | return 0; 900 | } 901 | 902 | seed_rand(seed); 903 | 904 | for (cnt = 0 ; cnt < mem ; cnt++) 905 | { 906 | buffer[cnt] = (long long) rand(); 907 | buffer[cnt] += (long long) ((unsigned long long) rand() << 32ULL); 908 | 909 | lr_array[cnt] = buffer + cnt; 910 | } 911 | run_test(la_array, lr_array, lv_array, max, max, samples, repetitions, 0, "random long", sizeof(long long *), cmp_long_ptr); 912 | 913 | 914 | free(la_array); 915 | free(lr_array); 916 | free(lv_array); 917 | 918 | free(buffer); 919 | } 920 | 921 | // int table 922 | 923 | { 924 | int **la_array = (int **) malloc(max * sizeof(int *)); 925 | int **lr_array = (int **) malloc(mem * sizeof(int *)); 926 | int **lv_array = (int **) malloc(max * sizeof(int *)); 927 | 928 | int *buffer = (int *) malloc(mem * sizeof(int)); 929 | 930 | if (la_array == NULL || lr_array == NULL || lv_array == NULL) 931 | { 932 | printf("main(%d,%d,%d): malloc: %s\n", max, samples, repetitions, strerror(errno)); 933 | 934 | return 0; 935 | } 936 | 937 | seed_rand(seed); 938 | 939 | for (cnt = 0 ; cnt < mem ; cnt++) 940 | { 941 | buffer[cnt] = rand(); 942 | 943 | lr_array[cnt] = buffer + cnt; 944 | } 945 | run_test(la_array, lr_array, lv_array, max, max, samples, repetitions, 0, "random int", sizeof(int *), cmp_int_ptr); 946 | 947 | free(la_array); 948 | free(lr_array); 949 | free(lv_array); 950 | 951 | free(buffer); 952 | 953 | printf("\n"); 954 | } 955 | #endif 956 | #endif 957 | // 128 bit 958 | 959 | #ifndef SKIP_DOUBLES 960 | long double *da_array = (long double *) malloc(max * sizeof(long double)); 961 | long double *dr_array = (long double *) malloc(mem * sizeof(long double)); 962 | long double *dv_array = (long double *) malloc(max * sizeof(long double)); 963 | 964 | if (da_array == NULL || dr_array == NULL || dv_array == NULL) 965 | { 966 | printf("main(%d,%d,%d): malloc: %s\n", max, samples, repetitions, strerror(errno)); 967 | 968 | return 0; 969 | } 970 | 971 | seed_rand(seed); 972 | 973 | for (cnt = 0 ; cnt < mem ; cnt++) 974 | { 975 | dr_array[cnt] = (long double) rand(); 976 | dr_array[cnt] += (long double) ((unsigned long long) rand() << 32ULL); 977 | dr_array[cnt] += 1.0L / 3.0L; 978 | } 979 | 980 | memcpy(dv_array, dr_array, max * sizeof(long double)); 981 | quadsort(dv_array, max, sizeof(long double), cmp_long_double); 982 | 983 | for (cnt = 0 ; (size_t) cnt < sizeof(sorts) / sizeof(char *) ; cnt++) 984 | { 985 | test_sort(da_array, dr_array, dv_array, max, max, samples, repetitions, qsort, sorts[cnt], "random order", sizeof(long double), cmp_long_double); 986 | } 987 | #ifndef cmp 988 | #ifdef QUADSORT_H 989 | test_sort(da_array, dr_array, dv_array, max, max, samples, repetitions, qsort, "s_quadsort", "random order", sizeof(long double), cmp_long_double_ptr); 990 | #endif 991 | #endif 992 | free(da_array); 993 | free(dr_array); 994 | free(dv_array); 995 | 996 | printf("\n"); 997 | #endif 998 | // 64 bit 999 | 1000 | #ifndef SKIP_LONGS 1001 | long long *la_array = (long long *) malloc(max * sizeof(long long)); 1002 | long long *lr_array = (long long *) malloc(mem * sizeof(long long)); 1003 | long long *lv_array = (long long *) malloc(max * sizeof(long long)); 1004 | 1005 | if (la_array == NULL || lr_array == NULL || lv_array == NULL) 1006 | { 1007 | printf("main(%d,%d,%d): malloc: %s\n", max, samples, repetitions, strerror(errno)); 1008 | 1009 | return 0; 1010 | } 1011 | 1012 | seed_rand(seed); 1013 | 1014 | for (cnt = 0 ; cnt < mem ; cnt++) 1015 | { 1016 | lr_array[cnt] = rand(); 1017 | lr_array[cnt] += (unsigned long long) rand() << 32ULL; 1018 | } 1019 | 1020 | memcpy(lv_array, lr_array, max * sizeof(long long)); 1021 | quadsort(lv_array, max, sizeof(long long), cmp_long); 1022 | 1023 | for (cnt = 0 ; (size_t) cnt < sizeof(sorts) / sizeof(char *) ; cnt++) 1024 | { 1025 | test_sort(la_array, lr_array, lv_array, max, max, samples, repetitions, qsort, sorts[cnt], "random order", sizeof(long long), cmp_long); 1026 | } 1027 | 1028 | free(la_array); 1029 | free(lr_array); 1030 | free(lv_array); 1031 | 1032 | printf("\n"); 1033 | #endif 1034 | // 32 bit 1035 | 1036 | a_array = (VAR *) malloc(max * sizeof(VAR)); 1037 | r_array = (VAR *) malloc(mem * sizeof(VAR)); 1038 | v_array = (VAR *) malloc(max * sizeof(VAR)); 1039 | 1040 | int quad0 = 0; 1041 | int nmemb = max; 1042 | int half1 = nmemb / 2; 1043 | int half2 = nmemb - half1; 1044 | int quad1 = half1 / 2; 1045 | int quad2 = half1 - quad1; 1046 | int quad3 = half2 / 2; 1047 | int quad4 = half2 - quad3; 1048 | 1049 | int span3 = quad1 + quad2 + quad3; 1050 | 1051 | // random 1052 | 1053 | seed_rand(seed); 1054 | 1055 | for (cnt = 0 ; cnt < mem ; cnt++) 1056 | { 1057 | r_array[cnt] = rand(); 1058 | } 1059 | run_test(a_array, r_array, v_array, max, max, samples, repetitions, 0, "random order", sizeof(VAR), cmp_int); 1060 | 1061 | // random % 100 1062 | 1063 | for (cnt = 0 ; cnt < mem ; cnt++) 1064 | { 1065 | r_array[cnt] = rand() % 100; 1066 | } 1067 | run_test(a_array, r_array, v_array, max, max, samples, repetitions, 0, "random % 100", sizeof(VAR), cmp_int); 1068 | 1069 | // ascending 1070 | 1071 | for (cnt = sum = 0 ; cnt < mem ; cnt++) 1072 | { 1073 | r_array[cnt] = sum; sum += rand() % 5; 1074 | } 1075 | 1076 | run_test(a_array, r_array, v_array, max, max, samples, repetitions, 0, "ascending order", sizeof(VAR), cmp_int); 1077 | 1078 | // ascending saw 1079 | 1080 | for (cnt = 0 ; cnt < max ; cnt++) 1081 | { 1082 | r_array[cnt] = rand(); 1083 | } 1084 | 1085 | quadsort(r_array + quad0, quad1, sizeof(VAR), cmp_int); 1086 | quadsort(r_array + quad1, quad2, sizeof(VAR), cmp_int); 1087 | quadsort(r_array + half1, quad3, sizeof(VAR), cmp_int); 1088 | quadsort(r_array + span3, quad4, sizeof(VAR), cmp_int); 1089 | 1090 | run_test(a_array, r_array, v_array, max, max, samples, repetitions, repetitions, "ascending saw", sizeof(VAR), cmp_int); 1091 | 1092 | // pipe organ 1093 | 1094 | for (cnt = 0 ; cnt < max ; cnt++) 1095 | { 1096 | r_array[cnt] = rand(); 1097 | } 1098 | 1099 | quadsort(r_array + quad0, half1, sizeof(VAR), cmp_int); 1100 | qsort(r_array + half1, half2, sizeof(VAR), cmp_rev); 1101 | 1102 | for (cnt = half1 + 1 ; cnt < max ; cnt++) 1103 | { 1104 | if (r_array[cnt] >= r_array[cnt - 1]) 1105 | { 1106 | r_array[cnt] = r_array[cnt - 1] - 1; // guarantee the run is strictly descending 1107 | } 1108 | } 1109 | 1110 | run_test(a_array, r_array, v_array, max, max, samples, repetitions, repetitions, "pipe organ", sizeof(VAR), cmp_int); 1111 | 1112 | // descending 1113 | 1114 | for (cnt = 0, sum = mem * 10 ; cnt < mem ; cnt++) 1115 | { 1116 | r_array[cnt] = sum; sum -= 1 + rand() % 5; 1117 | } 1118 | run_test(a_array, r_array, v_array, max, max, samples, repetitions, 0, "descending order", sizeof(VAR), cmp_int); 1119 | 1120 | // descending saw 1121 | 1122 | for (cnt = 0 ; cnt < max ; cnt++) 1123 | { 1124 | r_array[cnt] = rand(); 1125 | } 1126 | 1127 | qsort(r_array + quad0, quad1, sizeof(VAR), cmp_rev); 1128 | qsort(r_array + quad1, quad2, sizeof(VAR), cmp_rev); 1129 | qsort(r_array + half1, quad3, sizeof(VAR), cmp_rev); 1130 | qsort(r_array + span3, quad4, sizeof(VAR), cmp_rev); 1131 | 1132 | for (cnt = 1 ; cnt < max ; cnt++) 1133 | { 1134 | if (cnt == quad1 || cnt == half1 || cnt == span3) continue; 1135 | 1136 | if (r_array[cnt] >= r_array[cnt - 1]) 1137 | { 1138 | r_array[cnt] = r_array[cnt - 1] - 1; // guarantee the run is strictly descending 1139 | } 1140 | } 1141 | 1142 | run_test(a_array, r_array, v_array, max, max, samples, repetitions, repetitions, "descending saw", sizeof(VAR), cmp_int); 1143 | 1144 | 1145 | // random tail 25% 1146 | 1147 | for (cnt = 0 ; cnt < max ; cnt++) 1148 | { 1149 | r_array[cnt] = rand(); 1150 | } 1151 | quadsort(r_array, span3, sizeof(VAR), cmp_int); 1152 | 1153 | run_test(a_array, r_array, v_array, max, max, samples, repetitions, repetitions, "random tail", sizeof(VAR), cmp_int); 1154 | 1155 | // random 50% 1156 | 1157 | for (cnt = 0 ; cnt < max ; cnt++) 1158 | { 1159 | r_array[cnt] = rand(); 1160 | } 1161 | quadsort(r_array, half1, sizeof(VAR), cmp_int); 1162 | 1163 | run_test(a_array, r_array, v_array, max, max, samples, repetitions, repetitions, "random half", sizeof(VAR), cmp_int); 1164 | 1165 | // tiles 1166 | 1167 | for (cnt = 0 ; cnt < mem ; cnt++) 1168 | { 1169 | if (cnt % 2 == 0) 1170 | { 1171 | r_array[cnt] = 16777216 + cnt; 1172 | } 1173 | else 1174 | { 1175 | r_array[cnt] = 33554432 + cnt; 1176 | } 1177 | } 1178 | run_test(a_array, r_array, v_array, max, max, samples, repetitions, 0, "ascending tiles", sizeof(VAR), cmp_int); 1179 | 1180 | // bit-reversal 1181 | 1182 | for (cnt = 0 ; cnt < mem ; cnt++) 1183 | { 1184 | r_array[cnt] = bit_reverse(cnt); 1185 | } 1186 | run_test(a_array, r_array, v_array, max, max, samples, repetitions, 0, "bit reversal", sizeof(VAR), cmp_int); 1187 | 1188 | #ifndef cmp 1189 | #ifdef ANTIQSORT 1190 | test_antiqsort; 1191 | #endif 1192 | #endif 1193 | 1194 | #define QUAD_DEBUG 1195 | #if __has_include("extra_tests.c") 1196 | #include "extra_tests.c" 1197 | #endif 1198 | 1199 | free(a_array); 1200 | free(r_array); 1201 | free(v_array); 1202 | 1203 | return 0; 1204 | } 1205 | -------------------------------------------------------------------------------- /src/blitsort.c: -------------------------------------------------------------------------------- 1 | // blitsort 1.2.1.3 - Igor van den Hoven ivdhoven@gmail.com 2 | 3 | #define BLIT_AUX 512 // set to 0 for sqrt(n) cache size 4 | #define BLIT_OUT 96 // should be smaller or equal to BLIT_AUX 5 | 6 | void FUNC(blit_partition)(VAR *array, VAR *swap, size_t swap_size, size_t nmemb, CMPFUNC *cmp); 7 | 8 | void FUNC(blit_analyze)(VAR *array, VAR *swap, size_t swap_size, size_t nmemb, CMPFUNC *cmp) 9 | { 10 | unsigned char loop, asum, bsum, csum, dsum; 11 | unsigned int astreaks, bstreaks, cstreaks, dstreaks; 12 | size_t quad1, quad2, quad3, quad4, half1, half2; 13 | size_t cnt, abalance, bbalance, cbalance, dbalance; 14 | VAR *pta, *ptb, *ptc, *ptd; 15 | 16 | half1 = nmemb / 2; 17 | quad1 = half1 / 2; 18 | quad2 = half1 - quad1; 19 | half2 = nmemb - half1; 20 | quad3 = half2 / 2; 21 | quad4 = half2 - quad3; 22 | 23 | pta = array; 24 | ptb = array + quad1; 25 | ptc = array + half1; 26 | ptd = array + half1 + quad3; 27 | 28 | astreaks = bstreaks = cstreaks = dstreaks = 0; 29 | abalance = bbalance = cbalance = dbalance = 0; 30 | 31 | for (cnt = nmemb ; cnt > 132 ; cnt -= 128) 32 | { 33 | for (asum = bsum = csum = dsum = 0, loop = 32 ; loop ; loop--) 34 | { 35 | asum += cmp(pta, pta + 1) > 0; pta++; 36 | bsum += cmp(ptb, ptb + 1) > 0; ptb++; 37 | csum += cmp(ptc, ptc + 1) > 0; ptc++; 38 | dsum += cmp(ptd, ptd + 1) > 0; ptd++; 39 | } 40 | abalance += asum; astreaks += asum = (asum == 0) | (asum == 32); 41 | bbalance += bsum; bstreaks += bsum = (bsum == 0) | (bsum == 32); 42 | cbalance += csum; cstreaks += csum = (csum == 0) | (csum == 32); 43 | dbalance += dsum; dstreaks += dsum = (dsum == 0) | (dsum == 32); 44 | 45 | if (cnt > 516 && asum + bsum + csum + dsum == 0) 46 | { 47 | abalance += 48; pta += 96; 48 | bbalance += 48; ptb += 96; 49 | cbalance += 48; ptc += 96; 50 | dbalance += 48; ptd += 96; 51 | cnt -= 384; 52 | } 53 | } 54 | 55 | for ( ; cnt > 7 ; cnt -= 4) 56 | { 57 | abalance += cmp(pta, pta + 1) > 0; pta++; 58 | bbalance += cmp(ptb, ptb + 1) > 0; ptb++; 59 | cbalance += cmp(ptc, ptc + 1) > 0; ptc++; 60 | dbalance += cmp(ptd, ptd + 1) > 0; ptd++; 61 | } 62 | 63 | if (quad1 < quad2) {bbalance += cmp(ptb, ptb + 1) > 0; ptb++;} 64 | if (quad1 < quad3) {cbalance += cmp(ptc, ptc + 1) > 0; ptc++;} 65 | if (quad1 < quad4) {dbalance += cmp(ptd, ptd + 1) > 0; ptd++;} 66 | 67 | cnt = abalance + bbalance + cbalance + dbalance; 68 | 69 | if (cnt == 0) 70 | { 71 | if (cmp(pta, pta + 1) <= 0 && cmp(ptb, ptb + 1) <= 0 && cmp(ptc, ptc + 1) <= 0) 72 | { 73 | return; 74 | } 75 | } 76 | 77 | asum = quad1 - abalance == 1; 78 | bsum = quad2 - bbalance == 1; 79 | csum = quad3 - cbalance == 1; 80 | dsum = quad4 - dbalance == 1; 81 | 82 | if (asum | bsum | csum | dsum) 83 | { 84 | unsigned char span1 = (asum && bsum) * (cmp(pta, pta + 1) > 0); 85 | unsigned char span2 = (bsum && csum) * (cmp(ptb, ptb + 1) > 0); 86 | unsigned char span3 = (csum && dsum) * (cmp(ptc, ptc + 1) > 0); 87 | 88 | switch (span1 | span2 * 2 | span3 * 4) 89 | { 90 | case 0: break; 91 | case 1: FUNC(quad_reversal)(array, ptb); abalance = bbalance = 0; break; 92 | case 2: FUNC(quad_reversal)(pta + 1, ptc); bbalance = cbalance = 0; break; 93 | case 3: FUNC(quad_reversal)(array, ptc); abalance = bbalance = cbalance = 0; break; 94 | case 4: FUNC(quad_reversal)(ptb + 1, ptd); cbalance = dbalance = 0; break; 95 | case 5: FUNC(quad_reversal)(array, ptb); 96 | FUNC(quad_reversal)(ptb + 1, ptd); abalance = bbalance = cbalance = dbalance = 0; break; 97 | case 6: FUNC(quad_reversal)(pta + 1, ptd); bbalance = cbalance = dbalance = 0; break; 98 | case 7: FUNC(quad_reversal)(array, ptd); return; 99 | } 100 | 101 | if (asum && abalance) {FUNC(quad_reversal)(array, pta); abalance = 0;} 102 | if (bsum && bbalance) {FUNC(quad_reversal)(pta + 1, ptb); bbalance = 0;} 103 | if (csum && cbalance) {FUNC(quad_reversal)(ptb + 1, ptc); cbalance = 0;} 104 | if (dsum && dbalance) {FUNC(quad_reversal)(ptc + 1, ptd); dbalance = 0;} 105 | } 106 | 107 | #ifdef cmp 108 | cnt = nmemb / 256; // more than 50% ordered 109 | #else 110 | cnt = nmemb / 512; // more than 25% ordered 111 | #endif 112 | asum = astreaks > cnt; 113 | bsum = bstreaks > cnt; 114 | csum = cstreaks > cnt; 115 | dsum = dstreaks > cnt; 116 | 117 | #ifndef cmp 118 | if (quad1 > QUAD_CACHE) 119 | { 120 | asum = bsum = csum = dsum = 1; 121 | } 122 | #endif 123 | switch (asum + bsum * 2 + csum * 4 + dsum * 8) 124 | { 125 | case 0: 126 | FUNC(blit_partition)(array, swap, swap_size, nmemb, cmp); 127 | return; 128 | case 1: 129 | if (abalance) FUNC(quadsort_swap)(array, swap, swap_size, quad1, cmp); 130 | FUNC(blit_partition)(pta + 1, swap, swap_size, quad2 + half2, cmp); 131 | break; 132 | case 2: 133 | FUNC(blit_partition)(array, swap, swap_size, quad1, cmp); 134 | if (bbalance) FUNC(quadsort_swap)(pta + 1, swap, swap_size, quad2, cmp); 135 | FUNC(blit_partition)(ptb + 1, swap, swap_size, half2, cmp); 136 | break; 137 | case 3: 138 | if (abalance) FUNC(quadsort_swap)(array, swap, swap_size, quad1, cmp); 139 | if (bbalance) FUNC(quadsort_swap)(pta + 1, swap, swap_size, quad2, cmp); 140 | FUNC(blit_partition)(ptb + 1, swap, swap_size, half2, cmp); 141 | break; 142 | case 4: 143 | FUNC(blit_partition)(array, swap, swap_size, half1, cmp); 144 | if (cbalance) FUNC(quadsort_swap)(ptb + 1, swap, swap_size, quad3, cmp); 145 | FUNC(blit_partition)(ptc + 1, swap, swap_size, quad4, cmp); 146 | break; 147 | case 8: 148 | FUNC(blit_partition)(array, swap, swap_size, half1 + quad3, cmp); 149 | if (dbalance) FUNC(quadsort_swap)(ptc + 1, swap, swap_size, quad4, cmp); 150 | break; 151 | case 9: 152 | if (abalance) FUNC(quadsort_swap)(array, swap, swap_size, quad1, cmp); 153 | FUNC(blit_partition)(pta + 1, swap, swap_size, quad2 + quad3, cmp); 154 | if (dbalance) FUNC(quadsort_swap)(ptc + 1, swap, swap_size, quad4, cmp); 155 | break; 156 | case 12: 157 | FUNC(blit_partition)(array, swap, swap_size, half1, cmp); 158 | if (cbalance) FUNC(quadsort_swap)(ptb + 1, swap, swap_size, quad3, cmp); 159 | if (dbalance) FUNC(quadsort_swap)(ptc + 1, swap, swap_size, quad4, cmp); 160 | break; 161 | case 5: 162 | case 6: 163 | case 7: 164 | case 10: 165 | case 11: 166 | case 13: 167 | case 14: 168 | case 15: 169 | if (asum) 170 | { 171 | if (abalance) FUNC(quadsort_swap)(array, swap, swap_size, quad1, cmp); 172 | } 173 | else FUNC(blit_partition)(array, swap, swap_size, quad1, cmp); 174 | if (bsum) 175 | { 176 | if (bbalance) FUNC(quadsort_swap)(pta + 1, swap, swap_size, quad2, cmp); 177 | } 178 | else FUNC(blit_partition)(pta + 1, swap, swap_size, quad2, cmp); 179 | if (csum) 180 | { 181 | if (cbalance) FUNC(quadsort_swap)(ptb + 1, swap, swap_size, quad3, cmp); 182 | } 183 | else FUNC(blit_partition)(ptb + 1, swap, swap_size, quad3, cmp); 184 | if (dsum) 185 | { 186 | if (dbalance) FUNC(quadsort_swap)(ptc + 1, swap, swap_size, quad4, cmp); 187 | } 188 | else FUNC(blit_partition)(ptc + 1, swap, swap_size, quad4, cmp); 189 | break; 190 | } 191 | 192 | if (cmp(pta, pta + 1) <= 0) 193 | { 194 | if (cmp(ptc, ptc + 1) <= 0) 195 | { 196 | if (cmp(ptb, ptb + 1) <= 0) 197 | { 198 | return; 199 | } 200 | } 201 | else 202 | { 203 | FUNC(rotate_merge_block)(array + half1, swap, swap_size, quad3, quad4, cmp); 204 | } 205 | } 206 | else 207 | { 208 | FUNC(rotate_merge_block)(array, swap, swap_size, quad1, quad2, cmp); 209 | 210 | if (cmp(ptc, ptc + 1) > 0) 211 | { 212 | FUNC(rotate_merge_block)(array + half1, swap, swap_size, quad3, quad4, cmp); 213 | } 214 | } 215 | FUNC(rotate_merge_block)(array, swap, swap_size, half1, half2, cmp); 216 | } 217 | 218 | // The next 4 functions are used for pivot selection 219 | 220 | VAR FUNC(blit_binary_median)(VAR *pta, VAR *ptb, size_t len, CMPFUNC *cmp) 221 | { 222 | while (len /= 2) 223 | { 224 | if (cmp(pta + len, ptb + len) <= 0) pta += len; else ptb += len; 225 | } 226 | return cmp(pta, ptb) > 0 ? *pta : *ptb; 227 | } 228 | 229 | void FUNC(blit_trim_four)(VAR *pta, CMPFUNC *cmp) 230 | { 231 | VAR swap; 232 | size_t x; 233 | 234 | x = cmp(pta, pta + 1) > 0; swap = pta[!x]; pta[0] = pta[x]; pta[1] = swap; pta += 2; 235 | x = cmp(pta, pta + 1) > 0; swap = pta[!x]; pta[0] = pta[x]; pta[1] = swap; pta -= 2; 236 | 237 | x = (cmp(pta, pta + 2) <= 0) * 2; pta[2] = pta[x]; pta++; 238 | x = (cmp(pta, pta + 2) > 0) * 2; pta[0] = pta[x]; 239 | } 240 | 241 | VAR FUNC(blit_median_of_nine)(VAR *array, VAR *swap, size_t nmemb, CMPFUNC *cmp) 242 | { 243 | VAR *pta; 244 | size_t x, y, z; 245 | 246 | z = nmemb / 9; 247 | 248 | pta = array; 249 | 250 | for (x = 0 ; x < 9 ; x++) 251 | { 252 | swap[x] = *pta; 253 | 254 | pta += z; 255 | } 256 | 257 | FUNC(blit_trim_four)(swap, cmp); 258 | FUNC(blit_trim_four)(swap + 4, cmp); 259 | 260 | swap[0] = swap[5]; 261 | swap[3] = swap[8]; 262 | 263 | FUNC(blit_trim_four)(swap, cmp); 264 | 265 | swap[0] = swap[6]; 266 | 267 | x = cmp(swap + 0, swap + 1) > 0; 268 | y = cmp(swap + 0, swap + 2) > 0; 269 | z = cmp(swap + 1, swap + 2) > 0; 270 | 271 | return swap[(x == y) + (y ^ z)]; 272 | } 273 | 274 | VAR FUNC(blit_median_of_cbrt)(VAR *array, VAR *swap, size_t swap_size, size_t nmemb, int *generic, CMPFUNC *cmp) 275 | { 276 | VAR *pta, *pts; 277 | size_t cnt, div, cbrt; 278 | 279 | for (cbrt = 32 ; nmemb > cbrt * cbrt * cbrt && cbrt < swap_size ; cbrt *= 2) {} 280 | 281 | div = nmemb / cbrt; 282 | 283 | pta = array; // + (size_t) &div / 16 % div; // for a non-deterministic offset 284 | pts = swap; 285 | 286 | for (cnt = 0 ; cnt < cbrt ; cnt++) 287 | { 288 | pts[cnt] = *pta; 289 | 290 | pta += div; 291 | } 292 | cbrt /= 2; 293 | 294 | FUNC(quadsort_swap)(pts, pts + cbrt * 2, cbrt, cbrt, cmp); 295 | FUNC(quadsort_swap)(pts + cbrt, pts + cbrt * 2, cbrt, cbrt, cmp); 296 | 297 | *generic = (cmp(pts + cbrt * 2 - 1, pts) <= 0) & (cmp(pts + cbrt - 1, pts) <= 0); 298 | 299 | return FUNC(blit_binary_median)(pts, pts + cbrt, cbrt, cmp); 300 | } 301 | 302 | // As per suggestion by Marshall Lochbaum to improve generic data handling 303 | 304 | size_t FUNC(blit_reverse_partition)(VAR *array, VAR *swap, VAR *piv, size_t swap_size, size_t nmemb, CMPFUNC *cmp) 305 | { 306 | if (nmemb > swap_size) 307 | { 308 | size_t l, r, h = nmemb / 2; 309 | 310 | l = FUNC(blit_reverse_partition)(array + 0, swap, piv, swap_size, h, cmp); 311 | r = FUNC(blit_reverse_partition)(array + h, swap, piv, swap_size, nmemb - h, cmp); 312 | 313 | FUNC(trinity_rotation)(array + l, swap, swap_size, h - l + r, h - l); 314 | 315 | return l + r; 316 | } 317 | #if !defined __clang__ 318 | size_t cnt, val, m = 0; 319 | VAR *pta = array; 320 | 321 | for (cnt = nmemb / 4 ; cnt ; cnt--) 322 | { 323 | val = cmp(piv, pta) > 0; swap[-m] = array[m] = *pta++; m += val; swap++; 324 | val = cmp(piv, pta) > 0; swap[-m] = array[m] = *pta++; m += val; swap++; 325 | val = cmp(piv, pta) > 0; swap[-m] = array[m] = *pta++; m += val; swap++; 326 | val = cmp(piv, pta) > 0; swap[-m] = array[m] = *pta++; m += val; swap++; 327 | } 328 | 329 | for (cnt = nmemb % 4 ; cnt ; cnt--) 330 | { 331 | val = cmp(piv, pta) > 0; swap[-m] = array[m] = *pta++; m += val; swap++; 332 | } 333 | swap -= nmemb; 334 | #else 335 | size_t cnt, m; 336 | VAR *tmp, *ptx = array, *pta = array, *pts = swap; 337 | 338 | for (cnt = nmemb / 4 ; cnt ; cnt--) 339 | { 340 | tmp = cmp(piv, ptx) > 0 ? pta++ : pts++; *tmp = *ptx++; 341 | tmp = cmp(piv, ptx) > 0 ? pta++ : pts++; *tmp = *ptx++; 342 | tmp = cmp(piv, ptx) > 0 ? pta++ : pts++; *tmp = *ptx++; 343 | tmp = cmp(piv, ptx) > 0 ? pta++ : pts++; *tmp = *ptx++; 344 | } 345 | 346 | for (cnt = nmemb % 4 ; cnt ; cnt--) 347 | { 348 | tmp = cmp(piv, ptx) > 0 ? pta++ : pts++; *tmp = *ptx++; 349 | } 350 | m = pta - array; 351 | #endif 352 | memcpy(array + m, swap, (nmemb - m) * sizeof(VAR)); 353 | 354 | return m; 355 | } 356 | 357 | size_t FUNC(blit_default_partition)(VAR *array, VAR *swap, VAR *piv, size_t swap_size, size_t nmemb, CMPFUNC *cmp) 358 | { 359 | if (nmemb > swap_size) 360 | { 361 | size_t l, r, h = nmemb / 2; 362 | 363 | l = FUNC(blit_default_partition)(array + 0, swap, piv, swap_size, h, cmp); 364 | r = FUNC(blit_default_partition)(array + h, swap, piv, swap_size, nmemb - h, cmp); 365 | 366 | FUNC(trinity_rotation)(array + l, swap, swap_size, h - l + r, h - l); 367 | 368 | return l + r; 369 | } 370 | #if !defined __clang__ 371 | size_t cnt, val, m = 0; 372 | VAR *pta = array; 373 | 374 | for (cnt = nmemb / 4 ; cnt ; cnt--) 375 | { 376 | val = cmp(pta, piv) <= 0; swap[-m] = array[m] = *pta++; m += val; swap++; 377 | val = cmp(pta, piv) <= 0; swap[-m] = array[m] = *pta++; m += val; swap++; 378 | val = cmp(pta, piv) <= 0; swap[-m] = array[m] = *pta++; m += val; swap++; 379 | val = cmp(pta, piv) <= 0; swap[-m] = array[m] = *pta++; m += val; swap++; 380 | } 381 | 382 | for (cnt = nmemb % 4 ; cnt ; cnt--) 383 | { 384 | val = cmp(pta, piv) <= 0; swap[-m] = array[m] = *pta++; m += val; swap++; 385 | } 386 | swap -= nmemb; 387 | #else 388 | size_t cnt, m; 389 | VAR *tmp, *ptx = array, *pta = array, *pts = swap; 390 | 391 | for (cnt = nmemb / 4 ; cnt ; cnt--) 392 | { 393 | tmp = cmp(ptx, piv) <= 0 ? pta++ : pts++; *tmp = *ptx++; 394 | tmp = cmp(ptx, piv) <= 0 ? pta++ : pts++; *tmp = *ptx++; 395 | tmp = cmp(ptx, piv) <= 0 ? pta++ : pts++; *tmp = *ptx++; 396 | tmp = cmp(ptx, piv) <= 0 ? pta++ : pts++; *tmp = *ptx++; 397 | } 398 | 399 | for (cnt = nmemb % 4 ; cnt ; cnt--) 400 | { 401 | tmp = cmp(ptx, piv) <= 0 ? pta++ : pts++; *tmp = *ptx++; 402 | } 403 | m = pta - array; 404 | #endif 405 | memcpy(array + m, swap, sizeof(VAR) * (nmemb - m)); 406 | 407 | return m; 408 | } 409 | 410 | void FUNC(blit_partition)(VAR *array, VAR *swap, size_t swap_size, size_t nmemb, CMPFUNC *cmp) 411 | { 412 | size_t a_size = 0, s_size; 413 | VAR piv, max = 0; 414 | int generic = 0; 415 | 416 | while (1) 417 | { 418 | if (nmemb <= 2048) 419 | { 420 | piv = FUNC(blit_median_of_nine)(array, swap, nmemb, cmp); 421 | } 422 | else 423 | { 424 | piv = FUNC(blit_median_of_cbrt)(array, swap, swap_size, nmemb, &generic, cmp); 425 | 426 | if (generic) break; 427 | } 428 | 429 | if (a_size && cmp(&max, &piv) <= 0) 430 | { 431 | a_size = FUNC(blit_reverse_partition)(array, swap, &piv, swap_size, nmemb, cmp); 432 | s_size = nmemb - a_size; 433 | nmemb = a_size; 434 | 435 | if (s_size <= a_size / 16 || a_size <= BLIT_OUT) break; 436 | 437 | a_size = 0; 438 | continue; 439 | } 440 | 441 | a_size = FUNC(blit_default_partition)(array, swap, &piv, swap_size, nmemb, cmp); 442 | s_size = nmemb - a_size; 443 | 444 | if (a_size <= s_size / 16 || s_size <= BLIT_OUT) 445 | { 446 | if (s_size == 0) 447 | { 448 | a_size = FUNC(blit_reverse_partition)(array, swap, &piv, swap_size, a_size, cmp); 449 | s_size = nmemb - a_size; 450 | nmemb = a_size; 451 | 452 | if (s_size <= a_size / 16 || a_size <= BLIT_OUT) break; 453 | 454 | a_size = 0; 455 | continue; 456 | } 457 | FUNC(quadsort_swap)(array + a_size, swap, swap_size, s_size, cmp); 458 | } 459 | else 460 | { 461 | FUNC(blit_partition)(array + a_size, swap, swap_size, s_size, cmp); 462 | } 463 | nmemb = a_size; 464 | 465 | if (s_size <= a_size / 16 || a_size <= BLIT_OUT) break; 466 | 467 | max = piv; 468 | } 469 | FUNC(quadsort_swap)(array, swap, swap_size, nmemb, cmp); 470 | } 471 | 472 | void FUNC(blitsort)(void *array, size_t nmemb, CMPFUNC *cmp) 473 | { 474 | if (nmemb <= 132) 475 | { 476 | FUNC(quadsort)(array, nmemb, cmp); 477 | } 478 | else 479 | { 480 | VAR *pta = (VAR *) array; 481 | #if BLIT_AUX 482 | size_t swap_size = BLIT_AUX; 483 | #else 484 | size_t swap_size = 1 << 19; 485 | 486 | while (nmemb / swap_size < swap_size / 128) 487 | { 488 | swap_size /= 4; 489 | } 490 | #endif 491 | VAR swap[swap_size]; 492 | 493 | FUNC(blit_analyze)(pta, swap, swap_size, nmemb, cmp); 494 | } 495 | } 496 | 497 | void FUNC(blitsort_swap)(void *array, void *swap, size_t swap_size, size_t nmemb, CMPFUNC *cmp) 498 | { 499 | if (nmemb <= 132) 500 | { 501 | FUNC(quadsort_swap)(array, swap, swap_size, nmemb, cmp); 502 | } 503 | else 504 | { 505 | VAR *pta = (VAR *) array; 506 | VAR *pts = (VAR *) swap; 507 | 508 | FUNC(blit_analyze)(pta, pts, swap_size, nmemb, cmp); 509 | } 510 | } 511 | 512 | #undef BLIT_AUX 513 | #undef BLIT_OUT 514 | -------------------------------------------------------------------------------- /src/blitsort.h: -------------------------------------------------------------------------------- 1 | // blitsort 1.2.1.3 - Igor van den Hoven ivdhoven@gmail.com 2 | 3 | #ifndef BLITSORT_H 4 | #define BLITSORT_H 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | typedef int CMPFUNC (const void *a, const void *b); 15 | 16 | //#define cmp(a,b) (*(a) > *(b)) 17 | 18 | #ifndef QUADSORT_H 19 | #include "quadsort.h" 20 | #endif 21 | 22 | // When sorting an array of pointers, like a string array, the QUAD_CACHE needs 23 | // to be set for proper performance when sorting large arrays. 24 | // quadsort_prim() can be used to sort arrays of 32 and 64 bit integers 25 | // without a comparison function or cache restrictions. 26 | 27 | // With a 6 MB L3 cache a value of 262144 works well. 28 | 29 | #ifdef cmp 30 | #define QUAD_CACHE 4294967295 31 | #else 32 | //#define QUAD_CACHE 131072 33 | #define QUAD_CACHE 262144 34 | //#define QUAD_CACHE 524288 35 | //#define QUAD_CACHE 4294967295 36 | #endif 37 | 38 | ////////////////////////////////////////////////////////// 39 | // ┌───────────────────────────────────────────────────┐// 40 | // │ ██████┐ ██████┐ ██████┐ ██████┐████████┐ │// 41 | // │ └────██┐└────██┐ ██┌──██┐└─██┌─┘└──██┌──┘ │// 42 | // │ █████┌┘ █████┌┘ ██████┌┘ ██│ ██│ │// 43 | // │ └───██┐██┌───┘ ██┌──██┐ ██│ ██│ │// 44 | // │ ██████┌┘███████┐ ██████┌┘██████┐ ██│ │// 45 | // │ └─────┘ └──────┘ └─────┘ └─────┘ └─┘ │// 46 | // └───────────────────────────────────────────────────┘// 47 | ////////////////////////////////////////////////////////// 48 | 49 | #define VAR int 50 | #define FUNC(NAME) NAME##32 51 | 52 | #include "blitsort.c" 53 | 54 | #undef VAR 55 | #undef FUNC 56 | 57 | // blitsort_prim 58 | 59 | #define VAR int 60 | #define FUNC(NAME) NAME##_int32 61 | #ifndef cmp 62 | #define cmp(a,b) (*(a) > *(b)) 63 | #include "blitsort.c" 64 | #undef cmp 65 | #else 66 | #include "blitsort.c" 67 | #endif 68 | #undef VAR 69 | #undef FUNC 70 | 71 | #define VAR unsigned int 72 | #define FUNC(NAME) NAME##_uint32 73 | #ifndef cmp 74 | #define cmp(a,b) (*(a) > *(b)) 75 | #include "blitsort.c" 76 | #undef cmp 77 | #else 78 | #include "blitsort.c" 79 | #endif 80 | #undef VAR 81 | #undef FUNC 82 | 83 | ////////////////////////////////////////////////////////// 84 | // ┌───────────────────────────────────────────────────┐// 85 | // │ █████┐ ██┐ ██┐ ██████┐ ██████┐████████┐ │// 86 | // │ ██┌───┘ ██│ ██│ ██┌──██┐└─██┌─┘└──██┌──┘ │// 87 | // │ ██████┐ ███████│ ██████┌┘ ██│ ██│ │// 88 | // │ ██┌──██┐└────██│ ██┌──██┐ ██│ ██│ │// 89 | // │ └█████┌┘ ██│ ██████┌┘██████┐ ██│ │// 90 | // │ └────┘ └─┘ └─────┘ └─────┘ └─┘ │// 91 | // └───────────────────────────────────────────────────┘// 92 | ////////////////////////////////////////////////////////// 93 | 94 | #define VAR long long 95 | #define FUNC(NAME) NAME##64 96 | 97 | #include "blitsort.c" 98 | 99 | #undef VAR 100 | #undef FUNC 101 | 102 | // blitsort_prim 103 | 104 | #define VAR long long 105 | #define FUNC(NAME) NAME##_int64 106 | #ifndef cmp 107 | #define cmp(a,b) (*(a) > *(b)) 108 | #include "blitsort.c" 109 | #undef cmp 110 | #else 111 | #include "blitsort.c" 112 | #endif 113 | #undef VAR 114 | #undef FUNC 115 | 116 | #define VAR unsigned long long 117 | #define FUNC(NAME) NAME##_uint64 118 | #ifndef cmp 119 | #define cmp(a,b) (*(a) > *(b)) 120 | #include "blitsort.c" 121 | #undef cmp 122 | #else 123 | #include "blitsort.c" 124 | #endif 125 | #undef VAR 126 | #undef FUNC 127 | 128 | // This section is outside of 32/64 bit pointer territory, so no cache checks 129 | // necessary, unless sorting 32+ byte structures. 130 | 131 | #undef QUAD_CACHE 132 | #define QUAD_CACHE 4294967295 133 | 134 | ////////////////////////////////////////////////////////// 135 | //┌────────────────────────────────────────────────────┐// 136 | //│ █████┐ ██████┐ ██████┐████████┐ │// 137 | //│ ██┌──██┐ ██┌──██┐└─██┌─┘└──██┌──┘ │// 138 | //│ └█████┌┘ ██████┌┘ ██│ ██│ │// 139 | //│ ██┌──██┐ ██┌──██┐ ██│ ██│ │// 140 | //│ └█████┌┘ ██████┌┘██████┐ ██│ │// 141 | //│ └────┘ └─────┘ └─────┘ └─┘ │// 142 | //└────────────────────────────────────────────────────┘// 143 | ////////////////////////////////////////////////////////// 144 | 145 | #define VAR char 146 | #define FUNC(NAME) NAME##8 147 | 148 | #include "blitsort.c" 149 | 150 | #undef VAR 151 | #undef FUNC 152 | 153 | ////////////////////////////////////////////////////////// 154 | //┌────────────────────────────────────────────────────┐// 155 | //│ ▄██┐ █████┐ ██████┐ ██████┐████████┐│// 156 | //│ ████│ ██┌───┘ ██┌──██┐└─██┌─┘└──██┌──┘│// 157 | //│ └─██│ ██████┐ ██████┌┘ ██│ ██│ │// 158 | //│ ██│ ██┌──██┐ ██┌──██┐ ██│ ██│ │// 159 | //│ ██████┐└█████┌┘ ██████┌┘██████┐ ██│ │// 160 | //│ └─────┘ └────┘ └─────┘ └─────┘ └─┘ │// 161 | //└────────────────────────────────────────────────────┘// 162 | ////////////////////////////////////////////////////////// 163 | 164 | #define VAR short 165 | #define FUNC(NAME) NAME##16 166 | 167 | #include "blitsort.c" 168 | 169 | #undef VAR 170 | #undef FUNC 171 | 172 | ////////////////////////////////////////////////////////// 173 | //┌────────────────────────────────────────────────────┐// 174 | //│ ▄██┐ ██████┐ █████┐ ██████┐ ██████┐████████┐ │// 175 | //│ ████│ └────██┐██┌──██┐ ██┌──██┐└─██┌─┘└──██┌──┘ │// 176 | //│ └─██│ █████┌┘└█████┌┘ ██████┌┘ ██│ ██│ │// 177 | //│ ██│ ██┌───┘ ██┌──██┐ ██┌──██┐ ██│ ██│ │// 178 | //│ ██████┐███████┐└█████┌┘ ██████┌┘██████┐ ██│ │// 179 | //│ └─────┘└──────┘ └────┘ └─────┘ └─────┘ └─┘ │// 180 | //└────────────────────────────────────────────────────┘// 181 | ////////////////////////////////////////////////////////// 182 | 183 | // 128 reflects the name, though the actual size is 80, 96, or 128 bits, 184 | // depending on platform. 185 | #if (DBL_MANT_DIG < LDBL_MANT_DIG) 186 | #define VAR long double 187 | #define FUNC(NAME) NAME##128 188 | #include "blitsort.c" 189 | #undef VAR 190 | #undef FUNC 191 | #endif 192 | 193 | /////////////////////////////////////////////////////////// 194 | //┌─────────────────────────────────────────────────────┐// 195 | //│ ██████┐██┐ ██┐███████┐████████┐ ██████┐ ███┐ ███┐│// 196 | //│██┌────┘██│ ██│██┌────┘└──██┌──┘██┌───██┐████┐████││// 197 | //│██│ ██│ ██│███████┐ ██│ ██│ ██│██┌███┌██││// 198 | //│██│ ██│ ██│└────██│ ██│ ██│ ██│██│└█┌┘██││// 199 | //│└██████┐└██████┌┘███████│ ██│ └██████┌┘██│ └┘ ██││// 200 | //│ └─────┘ └─────┘ └──────┘ └─┘ └─────┘ └─┘ └─┘│// 201 | //└─────────────────────────────────────────────────────┘// 202 | /////////////////////////////////////////////////////////// 203 | 204 | /* 205 | typedef struct {char bytes[32];} struct256; 206 | #define VAR struct256 207 | #define FUNC(NAME) NAME##256 208 | 209 | #include "blitsort.c" 210 | 211 | #undef VAR 212 | #undef FUNC 213 | */ 214 | 215 | ///////////////////////////////////////////////////////////////////////////// 216 | //┌────────────────────────────────────────────────────────────────────────┐// 217 | //│ ██████┐ ██┐ ██████┐████████┐███████┐ ██████┐ ██████┐ ████████┐ │// 218 | //│ ██┌──██┐██│ └─██┌─┘└──██┌──┘██┌────┘██┌───██┐██┌──██┐└──██┌──┘ │// 219 | //│ ██████┌┘██│ ██│ ██│ ███████┐██│ ██│██████┌┘ ██│ │// 220 | //│ ██┌──██┐██│ ██│ ██│ └────██│██│ ██│██┌──██┐ ██│ │// 221 | //│ ██████┌┘███████┐██████┐ ██│ ███████│└██████┌┘██│ ██│ ██│ │// 222 | //│ └─────┘ └──────┘└─────┘ └─┘ └──────┘ └─────┘ └─┘ └─┘ └─┘ │// 223 | //└────────────────────────────────────────────────────────────────────────┘// 224 | ///////////////////////////////////////////////////////////////////////////// 225 | 226 | void blitsort(void *array, size_t nmemb, size_t size, CMPFUNC *cmp) 227 | { 228 | if (nmemb < 2) 229 | { 230 | return; 231 | } 232 | 233 | switch (size) 234 | { 235 | case sizeof(char): 236 | blitsort8(array, nmemb, cmp); 237 | return; 238 | 239 | case sizeof(short): 240 | blitsort16(array, nmemb, cmp); 241 | return; 242 | 243 | case sizeof(int): 244 | blitsort32(array, nmemb, cmp); 245 | return; 246 | 247 | case sizeof(long long): 248 | blitsort64(array, nmemb, cmp); 249 | return; 250 | #if (DBL_MANT_DIG < LDBL_MANT_DIG) 251 | case sizeof(long double): 252 | blitsort128(array, nmemb, cmp); 253 | return; 254 | #endif 255 | // case sizeof(struct256): 256 | // blitsort256(array, nmemb, cmp); 257 | return; 258 | 259 | default: 260 | #if (DBL_MANT_DIG < LDBL_MANT_DIG) 261 | assert(size == sizeof(char) || size == sizeof(short) || size == sizeof(int) || size == sizeof(long long) || size == sizeof(long double)); 262 | #else 263 | assert(size == sizeof(char) || size == sizeof(short) || size == sizeof(int) || size == sizeof(long long)); 264 | #endif 265 | // qsort(array, nmemb, size, cmp); 266 | } 267 | } 268 | 269 | // suggested size values for primitives: 270 | 271 | // case 0: unsigned char 272 | // case 1: signed char 273 | // case 2: signed short 274 | // case 3: unsigned short 275 | // case 4: signed int 276 | // case 5: unsigned int 277 | // case 6: float 278 | // case 7: double 279 | // case 8: signed long long 280 | // case 9: unsigned long long 281 | // case ?: long double, use sizeof(long double): 282 | 283 | void blitsort_prim(void *array, size_t nmemb, size_t size) 284 | { 285 | if (nmemb < 2) 286 | { 287 | return; 288 | } 289 | 290 | switch (size) 291 | { 292 | case 4: 293 | blitsort_int32(array, nmemb, NULL); 294 | return; 295 | case 5: 296 | blitsort_uint32(array, nmemb, NULL); 297 | return; 298 | case 8: 299 | blitsort_int64(array, nmemb, NULL); 300 | return; 301 | case 9: 302 | blitsort_uint64(array, nmemb, NULL); 303 | return; 304 | default: 305 | assert(size == sizeof(int) || size == sizeof(int) + 1 || size == sizeof(long long) || size == sizeof(long long) + 1); 306 | return; 307 | } 308 | } 309 | 310 | #undef QUAD_CACHE 311 | 312 | #endif 313 | -------------------------------------------------------------------------------- /src/crumsort.c: -------------------------------------------------------------------------------- 1 | // crumsort 1.2.1.3 - Igor van den Hoven ivdhoven@gmail.com 2 | 3 | #define CRUM_AUX 512 4 | #define CRUM_OUT 96 5 | 6 | void FUNC(fulcrum_partition)(VAR *array, VAR *swap, VAR *max, size_t swap_size, size_t nmemb, CMPFUNC *cmp); 7 | 8 | void FUNC(crum_analyze)(VAR *array, VAR *swap, size_t swap_size, size_t nmemb, CMPFUNC *cmp) 9 | { 10 | unsigned char loop, asum, bsum, csum, dsum; 11 | unsigned int astreaks, bstreaks, cstreaks, dstreaks; 12 | size_t quad1, quad2, quad3, quad4, half1, half2; 13 | size_t cnt, abalance, bbalance, cbalance, dbalance; 14 | VAR *pta, *ptb, *ptc, *ptd; 15 | 16 | half1 = nmemb / 2; 17 | quad1 = half1 / 2; 18 | quad2 = half1 - quad1; 19 | half2 = nmemb - half1; 20 | quad3 = half2 / 2; 21 | quad4 = half2 - quad3; 22 | 23 | pta = array; 24 | ptb = array + quad1; 25 | ptc = array + half1; 26 | ptd = array + half1 + quad3; 27 | 28 | astreaks = bstreaks = cstreaks = dstreaks = 0; 29 | abalance = bbalance = cbalance = dbalance = 0; 30 | 31 | for (cnt = nmemb ; cnt > 132 ; cnt -= 128) 32 | { 33 | for (asum = bsum = csum = dsum = 0, loop = 32 ; loop ; loop--) 34 | { 35 | asum += cmp(pta, pta + 1) > 0; pta++; 36 | bsum += cmp(ptb, ptb + 1) > 0; ptb++; 37 | csum += cmp(ptc, ptc + 1) > 0; ptc++; 38 | dsum += cmp(ptd, ptd + 1) > 0; ptd++; 39 | } 40 | abalance += asum; astreaks += asum = (asum == 0) | (asum == 32); 41 | bbalance += bsum; bstreaks += bsum = (bsum == 0) | (bsum == 32); 42 | cbalance += csum; cstreaks += csum = (csum == 0) | (csum == 32); 43 | dbalance += dsum; dstreaks += dsum = (dsum == 0) | (dsum == 32); 44 | 45 | if (cnt > 516 && asum + bsum + csum + dsum == 0) 46 | { 47 | abalance += 48; pta += 96; 48 | bbalance += 48; ptb += 96; 49 | cbalance += 48; ptc += 96; 50 | dbalance += 48; ptd += 96; 51 | cnt -= 384; 52 | } 53 | } 54 | 55 | for ( ; cnt > 7 ; cnt -= 4) 56 | { 57 | abalance += cmp(pta, pta + 1) > 0; pta++; 58 | bbalance += cmp(ptb, ptb + 1) > 0; ptb++; 59 | cbalance += cmp(ptc, ptc + 1) > 0; ptc++; 60 | dbalance += cmp(ptd, ptd + 1) > 0; ptd++; 61 | } 62 | 63 | if (quad1 < quad2) {bbalance += cmp(ptb, ptb + 1) > 0; ptb++;} 64 | if (quad1 < quad3) {cbalance += cmp(ptc, ptc + 1) > 0; ptc++;} 65 | if (quad1 < quad4) {dbalance += cmp(ptd, ptd + 1) > 0; ptd++;} 66 | 67 | cnt = abalance + bbalance + cbalance + dbalance; 68 | 69 | if (cnt == 0) 70 | { 71 | if (cmp(pta, pta + 1) <= 0 && cmp(ptb, ptb + 1) <= 0 && cmp(ptc, ptc + 1) <= 0) 72 | { 73 | return; 74 | } 75 | } 76 | 77 | asum = quad1 - abalance == 1; 78 | bsum = quad2 - bbalance == 1; 79 | csum = quad3 - cbalance == 1; 80 | dsum = quad4 - dbalance == 1; 81 | 82 | if (asum | bsum | csum | dsum) 83 | { 84 | unsigned char span1 = (asum && bsum) * (cmp(pta, pta + 1) > 0); 85 | unsigned char span2 = (bsum && csum) * (cmp(ptb, ptb + 1) > 0); 86 | unsigned char span3 = (csum && dsum) * (cmp(ptc, ptc + 1) > 0); 87 | 88 | switch (span1 | span2 * 2 | span3 * 4) 89 | { 90 | case 0: break; 91 | case 1: FUNC(quad_reversal)(array, ptb); abalance = bbalance = 0; break; 92 | case 2: FUNC(quad_reversal)(pta + 1, ptc); bbalance = cbalance = 0; break; 93 | case 3: FUNC(quad_reversal)(array, ptc); abalance = bbalance = cbalance = 0; break; 94 | case 4: FUNC(quad_reversal)(ptb + 1, ptd); cbalance = dbalance = 0; break; 95 | case 5: FUNC(quad_reversal)(array, ptb); 96 | FUNC(quad_reversal)(ptb + 1, ptd); abalance = bbalance = cbalance = dbalance = 0; break; 97 | case 6: FUNC(quad_reversal)(pta + 1, ptd); bbalance = cbalance = dbalance = 0; break; 98 | case 7: FUNC(quad_reversal)(array, ptd); return; 99 | } 100 | 101 | if (asum && abalance) {FUNC(quad_reversal)(array, pta); abalance = 0;} 102 | if (bsum && bbalance) {FUNC(quad_reversal)(pta + 1, ptb); bbalance = 0;} 103 | if (csum && cbalance) {FUNC(quad_reversal)(ptb + 1, ptc); cbalance = 0;} 104 | if (dsum && dbalance) {FUNC(quad_reversal)(ptc + 1, ptd); dbalance = 0;} 105 | } 106 | 107 | #ifdef cmp 108 | cnt = nmemb / 256; // switch to quadsort if at least 50% ordered 109 | #else 110 | cnt = nmemb / 512; // switch to quadsort if at least 25% ordered 111 | #endif 112 | asum = astreaks > cnt; 113 | bsum = bstreaks > cnt; 114 | csum = cstreaks > cnt; 115 | dsum = dstreaks > cnt; 116 | 117 | #ifndef cmp 118 | if (quad1 > QUAD_CACHE) 119 | { 120 | asum = bsum = csum = dsum = 1; 121 | } 122 | #endif 123 | switch (asum + bsum * 2 + csum * 4 + dsum * 8) 124 | { 125 | case 0: 126 | FUNC(fulcrum_partition)(array, swap, NULL, swap_size, nmemb, cmp); 127 | return; 128 | case 1: 129 | if (abalance) FUNC(quadsort_swap)(array, swap, swap_size, quad1, cmp); 130 | FUNC(fulcrum_partition)(pta + 1, swap, NULL, swap_size, quad2 + half2, cmp); 131 | break; 132 | case 2: 133 | FUNC(fulcrum_partition)(array, swap, NULL, swap_size, quad1, cmp); 134 | if (bbalance) FUNC(quadsort_swap)(pta + 1, swap, swap_size, quad2, cmp); 135 | FUNC(fulcrum_partition)(ptb + 1, swap, NULL, swap_size, half2, cmp); 136 | break; 137 | case 3: 138 | if (abalance) FUNC(quadsort_swap)(array, swap, swap_size, quad1, cmp); 139 | if (bbalance) FUNC(quadsort_swap)(pta + 1, swap, swap_size, quad2, cmp); 140 | FUNC(fulcrum_partition)(ptb + 1, swap, NULL, swap_size, half2, cmp); 141 | break; 142 | case 4: 143 | FUNC(fulcrum_partition)(array, swap, NULL, swap_size, half1, cmp); 144 | if (cbalance) FUNC(quadsort_swap)(ptb + 1, swap, swap_size, quad3, cmp); 145 | FUNC(fulcrum_partition)(ptc + 1, swap, NULL, swap_size, quad4, cmp); 146 | break; 147 | case 8: 148 | FUNC(fulcrum_partition)(array, swap, NULL, swap_size, half1 + quad3, cmp); 149 | if (dbalance) FUNC(quadsort_swap)(ptc + 1, swap, swap_size, quad4, cmp); 150 | break; 151 | case 9: 152 | if (abalance) FUNC(quadsort_swap)(array, swap, swap_size, quad1, cmp); 153 | FUNC(fulcrum_partition)(pta + 1, swap, NULL, swap_size, quad2 + quad3, cmp); 154 | if (dbalance) FUNC(quadsort_swap)(ptc + 1, swap, swap_size, quad4, cmp); 155 | break; 156 | case 12: 157 | FUNC(fulcrum_partition)(array, swap, NULL, swap_size, half1, cmp); 158 | if (cbalance) FUNC(quadsort_swap)(ptb + 1, swap, swap_size, quad3, cmp); 159 | if (dbalance) FUNC(quadsort_swap)(ptc + 1, swap, swap_size, quad4, cmp); 160 | break; 161 | case 5: 162 | case 6: 163 | case 7: 164 | case 10: 165 | case 11: 166 | case 13: 167 | case 14: 168 | case 15: 169 | if (asum) 170 | { 171 | if (abalance) FUNC(quadsort_swap)(array, swap, swap_size, quad1, cmp); 172 | } 173 | else FUNC(fulcrum_partition)(array, swap, NULL, swap_size, quad1, cmp); 174 | if (bsum) 175 | { 176 | if (bbalance) FUNC(quadsort_swap)(pta + 1, swap, swap_size, quad2, cmp); 177 | } 178 | else FUNC(fulcrum_partition)(pta + 1, swap, NULL, swap_size, quad2, cmp); 179 | if (csum) 180 | { 181 | if (cbalance) FUNC(quadsort_swap)(ptb + 1, swap, swap_size, quad3, cmp); 182 | } 183 | else FUNC(fulcrum_partition)(ptb + 1, swap, NULL, swap_size, quad3, cmp); 184 | if (dsum) 185 | { 186 | if (dbalance) FUNC(quadsort_swap)(ptc + 1, swap, swap_size, quad4, cmp); 187 | } 188 | else FUNC(fulcrum_partition)(ptc + 1, swap, NULL, swap_size, quad4, cmp); 189 | break; 190 | } 191 | 192 | if (cmp(pta, pta + 1) <= 0) 193 | { 194 | if (cmp(ptc, ptc + 1) <= 0) 195 | { 196 | if (cmp(ptb, ptb + 1) <= 0) 197 | { 198 | return; 199 | } 200 | } 201 | else 202 | { 203 | FUNC(rotate_merge_block)(array + half1, swap, swap_size, quad3, quad4, cmp); 204 | } 205 | } 206 | else 207 | { 208 | FUNC(rotate_merge_block)(array, swap, swap_size, quad1, quad2, cmp); 209 | 210 | if (cmp(ptc, ptc + 1) > 0) 211 | { 212 | FUNC(rotate_merge_block)(array + half1, swap, swap_size, quad3, quad4, cmp); 213 | } 214 | } 215 | FUNC(rotate_merge_block)(array, swap, swap_size, half1, half2, cmp); 216 | } 217 | 218 | // The next 4 functions are used for pivot selection 219 | 220 | VAR *FUNC(crum_binary_median)(VAR *pta, VAR *ptb, size_t len, CMPFUNC *cmp) 221 | { 222 | while (len /= 2) 223 | { 224 | if (cmp(pta + len, ptb + len) <= 0) pta += len; else ptb += len; 225 | } 226 | return cmp(pta, ptb) > 0 ? pta : ptb; 227 | } 228 | 229 | VAR *FUNC(crum_median_of_cbrt)(VAR *array, VAR *swap, size_t swap_size, size_t nmemb, int *generic, CMPFUNC *cmp) 230 | { 231 | VAR *pta, *piv; 232 | size_t cnt, cbrt, div; 233 | 234 | for (cbrt = 32 ; nmemb > cbrt * cbrt * cbrt && cbrt < swap_size ; cbrt *= 2) {} 235 | 236 | div = nmemb / cbrt; 237 | 238 | pta = array + nmemb - 1 - (size_t) &div / 64 % div; 239 | piv = array + cbrt; 240 | 241 | for (cnt = cbrt ; cnt ; cnt--) 242 | { 243 | swap[0] = *--piv; *piv = *pta; *pta = swap[0]; 244 | 245 | pta -= div; 246 | } 247 | 248 | cbrt /= 2; 249 | 250 | FUNC(quadsort_swap)(piv, swap, swap_size, cbrt, cmp); 251 | FUNC(quadsort_swap)(piv + cbrt, swap, swap_size, cbrt, cmp); 252 | 253 | *generic = (cmp(piv + cbrt * 2 - 1, piv) <= 0) & (cmp(piv + cbrt - 1, piv) <= 0); 254 | 255 | return FUNC(crum_binary_median)(piv, piv + cbrt, cbrt, cmp); 256 | } 257 | 258 | size_t FUNC(crum_median_of_three)(VAR *array, size_t v0, size_t v1, size_t v2, CMPFUNC *cmp) 259 | { 260 | size_t v[3] = {v0, v1, v2}; 261 | char x, y, z; 262 | 263 | x = cmp(array + v0, array + v1) > 0; 264 | y = cmp(array + v0, array + v2) > 0; 265 | z = cmp(array + v1, array + v2) > 0; 266 | 267 | return v[(x == y) + (y ^ z)]; 268 | } 269 | 270 | VAR *FUNC(crum_median_of_nine)(VAR *array, size_t nmemb, CMPFUNC *cmp) 271 | { 272 | size_t x, y, z, div = nmemb / 16; 273 | 274 | x = FUNC(crum_median_of_three)(array, div * 2, div * 1, div * 4, cmp); 275 | y = FUNC(crum_median_of_three)(array, div * 8, div * 6, div * 10, cmp); 276 | z = FUNC(crum_median_of_three)(array, div * 14, div * 12, div * 15, cmp); 277 | 278 | return array + FUNC(crum_median_of_three)(array, x, y, z, cmp); 279 | } 280 | 281 | size_t FUNC(fulcrum_default_partition)(VAR *array, VAR *swap, VAR *ptx, VAR *piv, size_t swap_size, size_t nmemb, CMPFUNC *cmp) 282 | { 283 | size_t i, cnt, val, m = 0; 284 | VAR *ptl, *ptr, *pta, *tpa; 285 | 286 | memcpy(swap, array, 32 * sizeof(VAR)); 287 | memcpy(swap + 32, array + nmemb - 32, 32 * sizeof(VAR)); 288 | 289 | ptl = array; 290 | ptr = array + nmemb - 1; 291 | 292 | pta = array + 32; 293 | tpa = array + nmemb - 33; 294 | 295 | cnt = nmemb / 16 - 4; 296 | 297 | while (1) 298 | { 299 | if (pta - ptl - m <= 48) 300 | { 301 | if (cnt-- == 0) break; 302 | 303 | for (i = 16 ; i ; i--) 304 | { 305 | val = cmp(pta, piv) <= 0; ptl[m] = ptr[m] = *pta++; m += val; ptr--; 306 | } 307 | } 308 | if (pta - ptl - m >= 16) 309 | { 310 | if (cnt-- == 0) break; 311 | 312 | for (i = 16 ; i ; i--) 313 | { 314 | val = cmp(tpa, piv) <= 0; ptl[m] = ptr[m] = *tpa--; m += val; ptr--; 315 | } 316 | } 317 | } 318 | 319 | if (pta - ptl - m <= 48) 320 | { 321 | for (cnt = nmemb % 16 ; cnt ; cnt--) 322 | { 323 | val = cmp(pta, piv) <= 0; ptl[m] = ptr[m] = *pta++; m += val; ptr--; 324 | } 325 | } 326 | else 327 | { 328 | for (cnt = nmemb % 16 ; cnt ; cnt--) 329 | { 330 | val = cmp(tpa, piv) <= 0; ptl[m] = ptr[m] = *tpa--; m += val; ptr--; 331 | } 332 | } 333 | pta = swap; 334 | 335 | for (cnt = 16 ; cnt ; cnt--) 336 | { 337 | val = cmp(pta, piv) <= 0; ptl[m] = ptr[m] = *pta++; m += val; ptr--; 338 | val = cmp(pta, piv) <= 0; ptl[m] = ptr[m] = *pta++; m += val; ptr--; 339 | val = cmp(pta, piv) <= 0; ptl[m] = ptr[m] = *pta++; m += val; ptr--; 340 | val = cmp(pta, piv) <= 0; ptl[m] = ptr[m] = *pta++; m += val; ptr--; 341 | } 342 | return m; 343 | } 344 | 345 | // As per suggestion by Marshall Lochbaum to improve generic data handling by mimicking dual-pivot quicksort 346 | 347 | size_t FUNC(fulcrum_reverse_partition)(VAR *array, VAR *swap, VAR *ptx, VAR *piv, size_t swap_size, size_t nmemb, CMPFUNC *cmp) 348 | { 349 | size_t i, cnt, val, m = 0; 350 | VAR *ptl, *ptr, *pta, *tpa; 351 | 352 | memcpy(swap, array, 32 * sizeof(VAR)); 353 | memcpy(swap + 32, array + nmemb - 32, 32 * sizeof(VAR)); 354 | 355 | ptl = array; 356 | ptr = array + nmemb - 1; 357 | 358 | pta = array + 32; 359 | tpa = array + nmemb - 33; 360 | 361 | cnt = nmemb / 16 - 4; 362 | 363 | while (1) 364 | { 365 | if (pta - ptl - m <= 48) 366 | { 367 | if (cnt-- == 0) break; 368 | 369 | for (i = 16 ; i ; i--) 370 | { 371 | val = cmp(piv, pta) > 0; ptl[m] = ptr[m] = *pta++; m += val; ptr--; 372 | } 373 | } 374 | if (pta - ptl - m >= 16) 375 | { 376 | if (cnt-- == 0) break; 377 | 378 | for (i = 16 ; i ; i--) 379 | { 380 | val = cmp(piv, tpa) > 0; ptl[m] = ptr[m] = *tpa--; m += val; ptr--; 381 | } 382 | } 383 | } 384 | 385 | if (pta - ptl - m <= 48) 386 | { 387 | for (cnt = nmemb % 16 ; cnt ; cnt--) 388 | { 389 | val = cmp(piv, pta) > 0; ptl[m] = ptr[m] = *pta++; m += val; ptr--; 390 | } 391 | } 392 | else 393 | { 394 | for (cnt = nmemb % 16 ; cnt ; cnt--) 395 | { 396 | val = cmp(piv, tpa) > 0; ptl[m] = ptr[m] = *tpa--; m += val; ptr--; 397 | } 398 | } 399 | pta = swap; 400 | 401 | for (cnt = 16 ; cnt ; cnt--) 402 | { 403 | val = cmp(piv, pta) > 0; ptl[m] = ptr[m] = *pta++; m += val; ptr--; 404 | val = cmp(piv, pta) > 0; ptl[m] = ptr[m] = *pta++; m += val; ptr--; 405 | val = cmp(piv, pta) > 0; ptl[m] = ptr[m] = *pta++; m += val; ptr--; 406 | val = cmp(piv, pta) > 0; ptl[m] = ptr[m] = *pta++; m += val; ptr--; 407 | } 408 | return m; 409 | } 410 | 411 | void FUNC(fulcrum_partition)(VAR *array, VAR *swap, VAR *max, size_t swap_size, size_t nmemb, CMPFUNC *cmp) 412 | { 413 | size_t a_size, s_size; 414 | VAR *ptp, piv; 415 | int generic = 0; 416 | 417 | while (1) 418 | { 419 | if (nmemb <= 2048) 420 | { 421 | ptp = FUNC(crum_median_of_nine)(array, nmemb, cmp); 422 | } 423 | else 424 | { 425 | ptp = FUNC(crum_median_of_cbrt)(array, swap, swap_size, nmemb, &generic, cmp); 426 | 427 | if (generic) break; 428 | } 429 | piv = *ptp; 430 | 431 | if (max && cmp(max, &piv) <= 0) 432 | { 433 | a_size = FUNC(fulcrum_reverse_partition)(array, swap, array, &piv, swap_size, nmemb, cmp); 434 | s_size = nmemb - a_size; 435 | nmemb = a_size; 436 | 437 | if (s_size <= a_size / 32 || a_size <= CRUM_OUT) break; 438 | 439 | max = NULL; 440 | continue; 441 | } 442 | *ptp = array[--nmemb]; 443 | 444 | a_size = FUNC(fulcrum_default_partition)(array, swap, array, &piv, swap_size, nmemb, cmp); 445 | s_size = nmemb - a_size; 446 | 447 | ptp = array + a_size; array[nmemb] = *ptp; *ptp = piv; 448 | 449 | if (a_size <= s_size / 32 || s_size <= CRUM_OUT) 450 | { 451 | FUNC(quadsort_swap)(ptp + 1, swap, swap_size, s_size, cmp); 452 | } 453 | else 454 | { 455 | FUNC(fulcrum_partition)(ptp + 1, swap, max, swap_size, s_size, cmp); 456 | } 457 | nmemb = a_size; 458 | 459 | if (s_size <= a_size / 32 || a_size <= CRUM_OUT) 460 | { 461 | if (a_size <= CRUM_OUT) break; 462 | 463 | a_size = FUNC(fulcrum_reverse_partition)(array, swap, array, &piv, swap_size, nmemb, cmp); 464 | s_size = nmemb - a_size; 465 | nmemb = a_size; 466 | 467 | if (s_size <= a_size / 32 || a_size <= CRUM_OUT) break; 468 | 469 | max = NULL; 470 | continue; 471 | } 472 | max = ptp; 473 | } 474 | FUNC(quadsort_swap)(array, swap, swap_size, nmemb, cmp); 475 | } 476 | 477 | void FUNC(crumsort)(void *array, size_t nmemb, CMPFUNC *cmp) 478 | { 479 | if (nmemb <= 256) 480 | { 481 | VAR swap[nmemb]; 482 | 483 | FUNC(quadsort_swap)(array, swap, nmemb, nmemb, cmp); 484 | 485 | return; 486 | } 487 | VAR *pta = (VAR *) array; 488 | #if CRUM_AUX 489 | size_t swap_size = CRUM_AUX; 490 | #else 491 | size_t swap_size = 128; 492 | 493 | while (swap_size * swap_size <= nmemb) 494 | { 495 | swap_size *= 4; 496 | } 497 | #endif 498 | VAR swap[swap_size]; 499 | 500 | FUNC(crum_analyze)(pta, swap, swap_size, nmemb, cmp); 501 | } 502 | 503 | void FUNC(crumsort_swap)(void *array, void *swap, size_t swap_size, size_t nmemb, CMPFUNC *cmp) 504 | { 505 | if (nmemb <= 256) 506 | { 507 | FUNC(quadsort_swap)(array, swap, swap_size, nmemb, cmp); 508 | } 509 | else 510 | { 511 | VAR *pta = (VAR *) array; 512 | VAR *pts = (VAR *) swap; 513 | 514 | FUNC(crum_analyze)(pta, pts, swap_size, nmemb, cmp); 515 | } 516 | } 517 | -------------------------------------------------------------------------------- /src/crumsort.h: -------------------------------------------------------------------------------- 1 | // crumsort 1.2.1.3 - Igor van den Hoven ivdhoven@gmail.com 2 | 3 | #ifndef CRUMSORT_H 4 | #define CRUMSORT_H 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | typedef int CMPFUNC (const void *a, const void *b); 15 | 16 | //#define cmp(a,b) (*(a) > *(b)) 17 | 18 | #ifndef QUADSORT_H 19 | #include "quadsort.h" 20 | #endif 21 | 22 | // When sorting an array of pointers, like a string array, the QUAD_CACHE needs 23 | // to be set for proper performance when sorting large arrays. 24 | // crumsort_prim() can be used to sort arrays of 32 and 64 bit integers 25 | // without a comparison function or cache restrictions. 26 | 27 | // With a 6 MB L3 cache a value of 262144 works well. 28 | 29 | #ifdef cmp 30 | #define QUAD_CACHE 4294967295 31 | #else 32 | //#define QUAD_CACHE 131072 33 | #define QUAD_CACHE 262144 34 | //#define QUAD_CACHE 524288 35 | //#define QUAD_CACHE 4294967295 36 | #endif 37 | 38 | ////////////////////////////////////////////////////////// 39 | // ┌───────────────────────────────────────────────────┐// 40 | // │ ██████┐ ██████┐ ██████┐ ██████┐████████┐ │// 41 | // │ └────██┐└────██┐ ██┌──██┐└─██┌─┘└──██┌──┘ │// 42 | // │ █████┌┘ █████┌┘ ██████┌┘ ██│ ██│ │// 43 | // │ └───██┐██┌───┘ ██┌──██┐ ██│ ██│ │// 44 | // │ ██████┌┘███████┐ ██████┌┘██████┐ ██│ │// 45 | // │ └─────┘ └──────┘ └─────┘ └─────┘ └─┘ │// 46 | // └───────────────────────────────────────────────────┘// 47 | ////////////////////////////////////////////////////////// 48 | 49 | #define VAR int 50 | #define FUNC(NAME) NAME##32 51 | 52 | #include "crumsort.c" 53 | 54 | #undef VAR 55 | #undef FUNC 56 | 57 | // crumsort_prim 58 | 59 | #define VAR int 60 | #define FUNC(NAME) NAME##_int32 61 | #ifndef cmp 62 | #define cmp(a,b) (*(a) > *(b)) 63 | #include "crumsort.c" 64 | #undef cmp 65 | #else 66 | #include "crumsort.c" 67 | #endif 68 | #undef VAR 69 | #undef FUNC 70 | 71 | #define VAR unsigned int 72 | #define FUNC(NAME) NAME##_uint32 73 | #ifndef cmp 74 | #define cmp(a,b) (*(a) > *(b)) 75 | #include "crumsort.c" 76 | #undef cmp 77 | #else 78 | #include "crumsort.c" 79 | #endif 80 | #undef VAR 81 | #undef FUNC 82 | 83 | ////////////////////////////////////////////////////////// 84 | // ┌───────────────────────────────────────────────────┐// 85 | // │ █████┐ ██┐ ██┐ ██████┐ ██████┐████████┐ │// 86 | // │ ██┌───┘ ██│ ██│ ██┌──██┐└─██┌─┘└──██┌──┘ │// 87 | // │ ██████┐ ███████│ ██████┌┘ ██│ ██│ │// 88 | // │ ██┌──██┐└────██│ ██┌──██┐ ██│ ██│ │// 89 | // │ └█████┌┘ ██│ ██████┌┘██████┐ ██│ │// 90 | // │ └────┘ └─┘ └─────┘ └─────┘ └─┘ │// 91 | // └───────────────────────────────────────────────────┘// 92 | ////////////////////////////////////////////////////////// 93 | 94 | #define VAR long long 95 | #define FUNC(NAME) NAME##64 96 | 97 | #include "crumsort.c" 98 | 99 | #undef VAR 100 | #undef FUNC 101 | 102 | // crumsort_prim 103 | 104 | #define VAR long long 105 | #define FUNC(NAME) NAME##_int64 106 | #ifndef cmp 107 | #define cmp(a,b) (*(a) > *(b)) 108 | #include "crumsort.c" 109 | #undef cmp 110 | #else 111 | #include "crumsort.c" 112 | #endif 113 | #undef VAR 114 | #undef FUNC 115 | 116 | #define VAR unsigned long long 117 | #define FUNC(NAME) NAME##_uint64 118 | #ifndef cmp 119 | #define cmp(a,b) (*(a) > *(b)) 120 | #include "crumsort.c" 121 | #undef cmp 122 | #else 123 | #include "crumsort.c" 124 | #endif 125 | #undef VAR 126 | #undef FUNC 127 | 128 | // This section is outside of 32/64 bit pointer territory, so no cache checks 129 | // necessary, unless sorting 32+ byte structures. 130 | 131 | #undef QUAD_CACHE 132 | #define QUAD_CACHE 4294967295 133 | 134 | ////////////////////////////////////////////////////////// 135 | //┌────────────────────────────────────────────────────┐// 136 | //│ █████┐ ██████┐ ██████┐████████┐ │// 137 | //│ ██┌──██┐ ██┌──██┐└─██┌─┘└──██┌──┘ │// 138 | //│ └█████┌┘ ██████┌┘ ██│ ██│ │// 139 | //│ ██┌──██┐ ██┌──██┐ ██│ ██│ │// 140 | //│ └█████┌┘ ██████┌┘██████┐ ██│ │// 141 | //│ └────┘ └─────┘ └─────┘ └─┘ │// 142 | //└────────────────────────────────────────────────────┘// 143 | ////////////////////////////////////////////////////////// 144 | 145 | #define VAR char 146 | #define FUNC(NAME) NAME##8 147 | 148 | #include "crumsort.c" 149 | 150 | #undef VAR 151 | #undef FUNC 152 | 153 | ////////////////////////////////////////////////////////// 154 | //┌────────────────────────────────────────────────────┐// 155 | //│ ▄██┐ █████┐ ██████┐ ██████┐████████┐│// 156 | //│ ████│ ██┌───┘ ██┌──██┐└─██┌─┘└──██┌──┘│// 157 | //│ └─██│ ██████┐ ██████┌┘ ██│ ██│ │// 158 | //│ ██│ ██┌──██┐ ██┌──██┐ ██│ ██│ │// 159 | //│ ██████┐└█████┌┘ ██████┌┘██████┐ ██│ │// 160 | //│ └─────┘ └────┘ └─────┘ └─────┘ └─┘ │// 161 | //└────────────────────────────────────────────────────┘// 162 | ////////////////////////////////////////////////////////// 163 | 164 | #define VAR short 165 | #define FUNC(NAME) NAME##16 166 | 167 | #include "crumsort.c" 168 | 169 | #undef VAR 170 | #undef FUNC 171 | 172 | ////////////////////////////////////////////////////////// 173 | //┌────────────────────────────────────────────────────┐// 174 | //│ ▄██┐ ██████┐ █████┐ ██████┐ ██████┐████████┐ │// 175 | //│ ████│ └────██┐██┌──██┐ ██┌──██┐└─██┌─┘└──██┌──┘ │// 176 | //│ └─██│ █████┌┘└█████┌┘ ██████┌┘ ██│ ██│ │// 177 | //│ ██│ ██┌───┘ ██┌──██┐ ██┌──██┐ ██│ ██│ │// 178 | //│ ██████┐███████┐└█████┌┘ ██████┌┘██████┐ ██│ │// 179 | //│ └─────┘└──────┘ └────┘ └─────┘ └─────┘ └─┘ │// 180 | //└────────────────────────────────────────────────────┘// 181 | ////////////////////////////////////////////////////////// 182 | 183 | // 128 reflects the name, though the actual size of a long double is 64, 80, 184 | // 96, or 128 bits, depending on platform. 185 | 186 | #if (DBL_MANT_DIG < LDBL_MANT_DIG) 187 | #define VAR long double 188 | #define FUNC(NAME) NAME##128 189 | #include "crumsort.c" 190 | #undef VAR 191 | #undef FUNC 192 | #endif 193 | 194 | /////////////////////////////////////////////////////////// 195 | //┌─────────────────────────────────────────────────────┐// 196 | //│ ██████┐██┐ ██┐███████┐████████┐ ██████┐ ███┐ ███┐│// 197 | //│██┌────┘██│ ██│██┌────┘└──██┌──┘██┌───██┐████┐████││// 198 | //│██│ ██│ ██│███████┐ ██│ ██│ ██│██┌███┌██││// 199 | //│██│ ██│ ██│└────██│ ██│ ██│ ██│██│└█┌┘██││// 200 | //│└██████┐└██████┌┘███████│ ██│ └██████┌┘██│ └┘ ██││// 201 | //│ └─────┘ └─────┘ └──────┘ └─┘ └─────┘ └─┘ └─┘│// 202 | //└─────────────────────────────────────────────────────┘// 203 | /////////////////////////////////////////////////////////// 204 | 205 | /* 206 | typedef struct {char bytes[32];} struct256; 207 | #define VAR struct256 208 | #define FUNC(NAME) NAME##256 209 | 210 | #include "crumsort.c" 211 | 212 | #undef VAR 213 | #undef FUNC 214 | */ 215 | 216 | ////////////////////////////////////////////////////////////////////////// 217 | //┌─────────────────────────────────────────────────────────────────────┐// 218 | //│ ██████┐██████┐ ██┐ ██┐███┐ ███┐███████┐ ██████┐ ██████┐ ████████┐│// 219 | //│██┌────┘██┌──██┐██│ ██│████┐████│██┌────┘██┌───██┐██┌──██┐└──██┌──┘│// 220 | //│██│ ██████┌┘██│ ██│██┌███┌██│███████┐██│ ██│██████┌┘ ██│ │// 221 | //│██│ ██┌──██┐██│ ██│██│└█┌┘██│└────██│██│ ██│██┌──██┐ ██│ │// 222 | //│└██████┐██│ ██│└██████┌┘██│ └┘ ██│███████│└██████┌┘██│ ██│ ██│ │// 223 | //│ └─────┘└─┘ └─┘ └─────┘ └─┘ └─┘└──────┘ └─────┘ └─┘ └─┘ └─┘ │// 224 | //└─────────────────────────────────────────────────────────────────────┘// 225 | ////////////////////////////////////////////////////////////////////////// 226 | 227 | void crumsort(void *array, size_t nmemb, size_t size, CMPFUNC *cmp) 228 | { 229 | if (nmemb < 2) 230 | { 231 | return; 232 | } 233 | 234 | switch (size) 235 | { 236 | case sizeof(char): 237 | crumsort8(array, nmemb, cmp); 238 | return; 239 | 240 | case sizeof(short): 241 | crumsort16(array, nmemb, cmp); 242 | return; 243 | 244 | case sizeof(int): 245 | crumsort32(array, nmemb, cmp); 246 | return; 247 | 248 | case sizeof(long long): 249 | crumsort64(array, nmemb, cmp); 250 | return; 251 | #if (DBL_MANT_DIG < LDBL_MANT_DIG) 252 | case sizeof(long double): 253 | crumsort128(array, nmemb, cmp); 254 | return; 255 | #endif 256 | // case sizeof(struct256): 257 | // crumsort256(array, nmemb, cmp); 258 | return; 259 | 260 | default: 261 | #if (DBL_MANT_DIG < LDBL_MANT_DIG) 262 | assert(size == sizeof(char) || size == sizeof(short) || size == sizeof(int) || size == sizeof(long long) || size == sizeof(long double)); 263 | #else 264 | assert(size == sizeof(char) || size == sizeof(short) || size == sizeof(int) || size == sizeof(long long)); 265 | #endif 266 | // qsort(array, nmemb, size, cmp); 267 | } 268 | } 269 | 270 | // suggested size values for primitives: 271 | 272 | // case 0: unsigned char 273 | // case 1: signed char 274 | // case 2: signed short 275 | // case 3: unsigned short 276 | // case 4: signed int 277 | // case 5: unsigned int 278 | // case 6: float 279 | // case 7: double 280 | // case 8: signed long long 281 | // case 9: unsigned long long 282 | // case ?: long double, use sizeof(long double): 283 | 284 | void crumsort_prim(void *array, size_t nmemb, size_t size) 285 | { 286 | if (nmemb < 2) 287 | { 288 | return; 289 | } 290 | 291 | switch (size) 292 | { 293 | case 4: 294 | crumsort_int32(array, nmemb, NULL); 295 | return; 296 | case 5: 297 | crumsort_uint32(array, nmemb, NULL); 298 | return; 299 | case 8: 300 | crumsort_int64(array, nmemb, NULL); 301 | return; 302 | case 9: 303 | crumsort_uint64(array, nmemb, NULL); 304 | return; 305 | default: 306 | assert(size == sizeof(int) || size == sizeof(int) + 1 || size == sizeof(long long) || size == sizeof(long long) + 1); 307 | return; 308 | } 309 | } 310 | 311 | #undef QUAD_CACHE 312 | 313 | #endif 314 | -------------------------------------------------------------------------------- /src/extra_tests.c: -------------------------------------------------------------------------------- 1 | #ifdef QUAD_DEBUG 2 | 3 | // random % 4 4 | 5 | for (cnt = 0 ; cnt < mem ; cnt++) 6 | { 7 | r_array[cnt] = rand() % 4; 8 | } 9 | run_test(a_array, r_array, v_array, max, max, samples, repetitions, 0, "random % 4", sizeof(VAR), cmp_int); 10 | 11 | // semi random 12 | 13 | for (cnt = 0 ; cnt < mem ; cnt++) 14 | { 15 | r_array[cnt] = rand() % 8 / 7 * rand(); 16 | } 17 | run_test(a_array, r_array, v_array, max, max, samples, repetitions, 0, "semi random", sizeof(VAR), cmp_int); 18 | 19 | // random signal 20 | 21 | for (cnt = 0 ; cnt < mem ; cnt++) 22 | { 23 | if (cnt < mem / 2) 24 | { 25 | r_array[cnt] = cnt + rand() % 16; 26 | } 27 | else 28 | { 29 | r_array[cnt] = mem - cnt + rand() % 16; 30 | } 31 | } 32 | run_test(a_array, r_array, v_array, max, max, samples, repetitions, 0, "random signal", sizeof(VAR), cmp_int); 33 | 34 | // exponential 35 | 36 | for (cnt = 0 ; cnt < mem ; cnt++) 37 | { 38 | r_array[cnt] = (size_t) (cnt * cnt) % 10000; //(1 << 30); 39 | } 40 | run_test(a_array, r_array, v_array, max, max, samples, repetitions, 0, "exponential", sizeof(VAR), cmp_int); 41 | 42 | // random fragments -- Make array 92% sorted 43 | 44 | for (cnt = 0 ; cnt < max ; cnt++) 45 | { 46 | r_array[cnt] = rand(); 47 | } 48 | quadsort(r_array + quad0, quad1 / 100 * 98, sizeof(VAR), cmp_int); 49 | quadsort(r_array + quad1, quad1 / 100 * 98, sizeof(VAR), cmp_int); 50 | quadsort(r_array + half1, quad1 / 100 * 98, sizeof(VAR), cmp_int); 51 | quadsort(r_array + span3, quad1 / 100 * 98, sizeof(VAR), cmp_int); 52 | 53 | run_test(a_array, r_array, v_array, max, max, samples, repetitions, repetitions, "chaos fragments", sizeof(VAR), cmp_int); 54 | 55 | // Make array 12% sorted, this tends to make timsort/powersort slower than fully random 56 | 57 | for (cnt = 0 ; cnt < max ; cnt++) 58 | { 59 | r_array[cnt] = rand(); 60 | } 61 | quadsort(r_array + quad0 / 1, quad1 * 2 / 100, sizeof(VAR), cmp_int); 62 | quadsort(r_array + quad1 / 2, quad1 * 2 / 100, sizeof(VAR), cmp_int); 63 | quadsort(r_array + quad1 / 1, quad1 * 2 / 100, sizeof(VAR), cmp_int); 64 | quadsort(r_array + half1 / 1, quad1 * 2 / 100, sizeof(VAR), cmp_int); 65 | quadsort(r_array + span3 / 2, quad1 * 2 / 100, sizeof(VAR), cmp_int); 66 | quadsort(r_array + span3 / 1, quad1 * 2 / 100, sizeof(VAR), cmp_int); 67 | 68 | run_test(a_array, r_array, v_array, max, max, samples, repetitions, repetitions, "order fragments", sizeof(VAR), cmp_int); 69 | 70 | // Make array 95% generic 71 | 72 | for (cnt = 0 ; cnt < max ; cnt++) 73 | { 74 | if (rand() % 20 == 0) 75 | { 76 | r_array[cnt] = rand(); 77 | } 78 | else 79 | { 80 | r_array[cnt] = 1000000000; 81 | } 82 | } 83 | run_test(a_array, r_array, v_array, max, max, samples, repetitions, repetitions, "95% generic", sizeof(VAR), cmp_int); 84 | 85 | // Three saws 86 | 87 | for (cnt = 0 ; cnt < max ; cnt++) 88 | { 89 | r_array[cnt] = rand(); 90 | } 91 | quadsort(r_array, max / 3, sizeof(VAR), cmp_int); 92 | quadsort(r_array + max / 3, max / 3, sizeof(VAR), cmp_int); 93 | quadsort(r_array + max / 3 * 2, max / 3, sizeof(VAR), cmp_int); 94 | 95 | run_test(a_array, r_array, v_array, max, max, samples, repetitions, repetitions, "three saws", sizeof(VAR), cmp_int); 96 | 97 | // various combinations of reverse and ascending order data 98 | /* 99 | for (cnt = 0 ; cnt < max ; cnt++) r_array[cnt] = rand(); 100 | quadsort(r_array + quad0, half1, sizeof(VAR), cmp_int); 101 | quadsort(r_array + half1, half2, sizeof(VAR), cmp_int); 102 | run_test(a_array, r_array, v_array, max, max, samples, repetitions, repetitions, "aaaaa aaaaa", sizeof(VAR), cmp_int); 103 | 104 | for (cnt = 0 ; cnt < max ; cnt++) r_array[cnt] = rand(); 105 | quadsort(r_array + quad1 / 2, nmemb - quad1 / 2, sizeof(VAR), cmp_int); 106 | run_test(a_array, r_array, v_array, max, max, samples, repetitions, repetitions, "raaaaaaaaaa", sizeof(VAR), cmp_int); 107 | 108 | size_t span2 = quad2 + quad3 + quad4; 109 | 110 | for (cnt = 0 ; cnt < max ; cnt++) r_array[cnt] = rand(); 111 | quadsort(r_array + quad1, span2, sizeof(VAR), cmp_int); 112 | run_test(a_array, r_array, v_array, max, max, samples, repetitions, repetitions, "rr aaaaaaaa", sizeof(VAR), cmp_int); 113 | 114 | for (cnt = 0 ; cnt < max ; cnt++) r_array[cnt] = rand(); 115 | quadsort(r_array + quad0, quad1, sizeof(VAR), cmp_int); 116 | quadsort(r_array + half1, half2, sizeof(VAR), cmp_int); 117 | run_test(a_array, r_array, v_array, max, max, samples, repetitions, repetitions, "aa rr aaaaa", sizeof(VAR), cmp_int); 118 | 119 | for (cnt = 0 ; cnt < max ; cnt++) r_array[cnt] = rand(); 120 | quadsort(r_array + quad0, half1, sizeof(VAR), cmp_int); 121 | quadsort(r_array + span3, quad4, sizeof(VAR), cmp_int); 122 | run_test(a_array, r_array, v_array, max, max, samples, repetitions, repetitions, "aaaaa rr aa", sizeof(VAR), cmp_int); 123 | 124 | for (cnt = 0 ; cnt < max ; cnt++) r_array[cnt] = rand(); 125 | quadsort(r_array + quad0, nmemb, sizeof(VAR), cmp_int); 126 | qsort(r_array + quad0, half1, sizeof(VAR), cmp_rev); 127 | qsort(r_array + half1, half2, sizeof(VAR), cmp_rev); 128 | run_test(a_array, r_array, v_array, max, max, samples, repetitions, repetitions, "rrrrr rrrrr", sizeof(VAR), cmp_int); 129 | 130 | for (cnt = 0 ; cnt < max ; cnt++) r_array[cnt] = rand(); 131 | quadsort(r_array + quad0, nmemb, sizeof(VAR), cmp_int); 132 | qsort(r_array + quad0, quad1, sizeof(VAR), cmp_rev); 133 | qsort(r_array + quad1, quad2, sizeof(VAR), cmp_rev); 134 | qsort(r_array + half1, quad3, sizeof(VAR), cmp_rev); 135 | qsort(r_array + span3, quad4, sizeof(VAR), cmp_rev); 136 | run_test(a_array, r_array, v_array, max, max, samples, repetitions, repetitions, "rr rr rr rr", sizeof(VAR), cmp_int); 137 | */ 138 | #endif 139 | -------------------------------------------------------------------------------- /src/fluxsort.c: -------------------------------------------------------------------------------- 1 | // fluxsort 1.2.1.3 - Igor van den Hoven ivdhoven@gmail.com 2 | 3 | #define FLUX_OUT 96 4 | 5 | void FUNC(flux_partition)(VAR *array, VAR *swap, VAR *ptx, VAR *ptp, size_t nmemb, CMPFUNC *cmp); 6 | 7 | // Determine whether to use mergesort or quicksort 8 | 9 | void FUNC(flux_analyze)(VAR *array, VAR *swap, size_t swap_size, size_t nmemb, CMPFUNC *cmp) 10 | { 11 | unsigned char loop, asum, bsum, csum, dsum; 12 | unsigned int astreaks, bstreaks, cstreaks, dstreaks; 13 | size_t quad1, quad2, quad3, quad4, half1, half2; 14 | size_t cnt, abalance, bbalance, cbalance, dbalance; 15 | VAR *pta, *ptb, *ptc, *ptd; 16 | 17 | half1 = nmemb / 2; 18 | quad1 = half1 / 2; 19 | quad2 = half1 - quad1; 20 | half2 = nmemb - half1; 21 | quad3 = half2 / 2; 22 | quad4 = half2 - quad3; 23 | 24 | pta = array; 25 | ptb = array + quad1; 26 | ptc = array + half1; 27 | ptd = array + half1 + quad3; 28 | 29 | astreaks = bstreaks = cstreaks = dstreaks = 0; 30 | abalance = bbalance = cbalance = dbalance = 0; 31 | 32 | if (quad1 < quad2) {bbalance += cmp(ptb, ptb + 1) > 0; ptb++;} 33 | if (quad1 < quad3) {cbalance += cmp(ptc, ptc + 1) > 0; ptc++;} 34 | if (quad1 < quad4) {dbalance += cmp(ptd, ptd + 1) > 0; ptd++;} 35 | 36 | for (cnt = nmemb ; cnt > 132 ; cnt -= 128) 37 | { 38 | for (asum = bsum = csum = dsum = 0, loop = 32 ; loop ; loop--) 39 | { 40 | asum += cmp(pta, pta + 1) > 0; pta++; 41 | bsum += cmp(ptb, ptb + 1) > 0; ptb++; 42 | csum += cmp(ptc, ptc + 1) > 0; ptc++; 43 | dsum += cmp(ptd, ptd + 1) > 0; ptd++; 44 | } 45 | abalance += asum; astreaks += asum = (asum == 0) | (asum == 32); 46 | bbalance += bsum; bstreaks += bsum = (bsum == 0) | (bsum == 32); 47 | cbalance += csum; cstreaks += csum = (csum == 0) | (csum == 32); 48 | dbalance += dsum; dstreaks += dsum = (dsum == 0) | (dsum == 32); 49 | 50 | if (cnt > 516 && asum + bsum + csum + dsum == 0) 51 | { 52 | abalance += 48; pta += 96; 53 | bbalance += 48; ptb += 96; 54 | cbalance += 48; ptc += 96; 55 | dbalance += 48; ptd += 96; 56 | cnt -= 384; 57 | } 58 | } 59 | 60 | for ( ; cnt > 7 ; cnt -= 4) 61 | { 62 | abalance += cmp(pta, pta + 1) > 0; pta++; 63 | bbalance += cmp(ptb, ptb + 1) > 0; ptb++; 64 | cbalance += cmp(ptc, ptc + 1) > 0; ptc++; 65 | dbalance += cmp(ptd, ptd + 1) > 0; ptd++; 66 | } 67 | 68 | cnt = abalance + bbalance + cbalance + dbalance; 69 | 70 | if (cnt == 0) 71 | { 72 | if (cmp(pta, pta + 1) <= 0 && cmp(ptb, ptb + 1) <= 0 && cmp(ptc, ptc + 1) <= 0) 73 | { 74 | return; 75 | } 76 | } 77 | 78 | asum = quad1 - abalance == 1; 79 | bsum = quad2 - bbalance == 1; 80 | csum = quad3 - cbalance == 1; 81 | dsum = quad4 - dbalance == 1; 82 | 83 | if (asum | bsum | csum | dsum) 84 | { 85 | unsigned char span1 = (asum && bsum) * (cmp(pta, pta + 1) > 0); 86 | unsigned char span2 = (bsum && csum) * (cmp(ptb, ptb + 1) > 0); 87 | unsigned char span3 = (csum && dsum) * (cmp(ptc, ptc + 1) > 0); 88 | 89 | switch (span1 | span2 * 2 | span3 * 4) 90 | { 91 | case 0: break; 92 | case 1: FUNC(quad_reversal)(array, ptb); abalance = bbalance = 0; break; 93 | case 2: FUNC(quad_reversal)(pta + 1, ptc); bbalance = cbalance = 0; break; 94 | case 3: FUNC(quad_reversal)(array, ptc); abalance = bbalance = cbalance = 0; break; 95 | case 4: FUNC(quad_reversal)(ptb + 1, ptd); cbalance = dbalance = 0; break; 96 | case 5: FUNC(quad_reversal)(array, ptb); 97 | FUNC(quad_reversal)(ptb + 1, ptd); abalance = bbalance = cbalance = dbalance = 0; break; 98 | case 6: FUNC(quad_reversal)(pta + 1, ptd); bbalance = cbalance = dbalance = 0; break; 99 | case 7: FUNC(quad_reversal)(array, ptd); return; 100 | } 101 | if (asum && abalance) {FUNC(quad_reversal)(array, pta); abalance = 0;} 102 | if (bsum && bbalance) {FUNC(quad_reversal)(pta + 1, ptb); bbalance = 0;} 103 | if (csum && cbalance) {FUNC(quad_reversal)(ptb + 1, ptc); cbalance = 0;} 104 | if (dsum && dbalance) {FUNC(quad_reversal)(ptc + 1, ptd); dbalance = 0;} 105 | } 106 | 107 | #ifdef cmp 108 | cnt = nmemb / 256; // switch to quadsort if at least 50% ordered 109 | #else 110 | cnt = nmemb / 512; // switch to quadsort if at least 25% ordered 111 | #endif 112 | asum = astreaks > cnt; 113 | bsum = bstreaks > cnt; 114 | csum = cstreaks > cnt; 115 | dsum = dstreaks > cnt; 116 | 117 | #ifndef cmp 118 | if (quad1 > QUAD_CACHE) 119 | { 120 | asum = bsum = csum = dsum = 1; 121 | } 122 | #endif 123 | 124 | switch (asum + bsum * 2 + csum * 4 + dsum * 8) 125 | { 126 | case 0: 127 | FUNC(flux_partition)(array, swap, array, swap + nmemb, nmemb, cmp); 128 | return; 129 | case 1: 130 | if (abalance) FUNC(quadsort_swap)(array, swap, swap_size, quad1, cmp); 131 | FUNC(flux_partition)(pta + 1, swap, pta + 1, swap + quad2 + half2, quad2 + half2, cmp); 132 | break; 133 | case 2: 134 | FUNC(flux_partition)(array, swap, array, swap + quad1, quad1, cmp); 135 | if (bbalance) FUNC(quadsort_swap)(pta + 1, swap, swap_size, quad2, cmp); 136 | FUNC(flux_partition)(ptb + 1, swap, ptb + 1, swap + half2, half2, cmp); 137 | break; 138 | case 3: 139 | if (abalance) FUNC(quadsort_swap)(array, swap, swap_size, quad1, cmp); 140 | if (bbalance) FUNC(quadsort_swap)(pta + 1, swap, swap_size, quad2, cmp); 141 | FUNC(flux_partition)(ptb + 1, swap, ptb + 1, swap + half2, half2, cmp); 142 | break; 143 | case 4: 144 | FUNC(flux_partition)(array, swap, array, swap + half1, half1, cmp); 145 | if (cbalance) FUNC(quadsort_swap)(ptb + 1, swap, swap_size, quad3, cmp); 146 | FUNC(flux_partition)(ptc + 1, swap, ptc + 1, swap + quad4, quad4, cmp); 147 | break; 148 | case 8: 149 | FUNC(flux_partition)(array, swap, array, swap + half1 + quad3, half1 + quad3, cmp); 150 | if (dbalance) FUNC(quadsort_swap)(ptc + 1, swap, swap_size, quad4, cmp); 151 | break; 152 | case 9: 153 | if (abalance) FUNC(quadsort_swap)(array, swap, swap_size, quad1, cmp); 154 | FUNC(flux_partition)(pta + 1, swap, pta + 1, swap + quad2 + quad3, quad2 + quad3, cmp); 155 | if (dbalance) FUNC(quadsort_swap)(ptc + 1, swap, swap_size, quad4, cmp); 156 | break; 157 | case 12: 158 | FUNC(flux_partition)(array, swap, array, swap + half1, half1, cmp); 159 | if (cbalance) FUNC(quadsort_swap)(ptb + 1, swap, swap_size, quad3, cmp); 160 | if (dbalance) FUNC(quadsort_swap)(ptc + 1, swap, swap_size, quad4, cmp); 161 | break; 162 | case 5: 163 | case 6: 164 | case 7: 165 | case 10: 166 | case 11: 167 | case 13: 168 | case 14: 169 | case 15: 170 | if (asum) 171 | { 172 | if (abalance) FUNC(quadsort_swap)(array, swap, swap_size, quad1, cmp); 173 | } 174 | else FUNC(flux_partition)(array, swap, array, swap + quad1, quad1, cmp); 175 | if (bsum) 176 | { 177 | if (bbalance) FUNC(quadsort_swap)(pta + 1, swap, swap_size, quad2, cmp); 178 | } 179 | else FUNC(flux_partition)(pta + 1, swap, pta + 1, swap + quad2, quad2, cmp); 180 | if (csum) 181 | { 182 | if (cbalance) FUNC(quadsort_swap)(ptb + 1, swap, swap_size, quad3, cmp); 183 | } 184 | else FUNC(flux_partition)(ptb + 1, swap, ptb + 1, swap + quad3, quad3, cmp); 185 | if (dsum) 186 | { 187 | if (dbalance) FUNC(quadsort_swap)(ptc + 1, swap, swap_size, quad4, cmp); 188 | } 189 | else FUNC(flux_partition)(ptc + 1, swap, ptc + 1, swap + quad4, quad4, cmp); 190 | break; 191 | } 192 | 193 | if (cmp(pta, pta + 1) <= 0) 194 | { 195 | if (cmp(ptc, ptc + 1) <= 0) 196 | { 197 | if (cmp(ptb, ptb + 1) <= 0) 198 | { 199 | return; 200 | } 201 | memcpy(swap, array, nmemb * sizeof(VAR)); 202 | } 203 | else 204 | { 205 | FUNC(cross_merge)(swap + half1, array + half1, quad3, quad4, cmp); 206 | memcpy(swap, array, half1 * sizeof(VAR)); 207 | } 208 | } 209 | else 210 | { 211 | if (cmp(ptc, ptc + 1) <= 0) 212 | { 213 | memcpy(swap + half1, array + half1, half2 * sizeof(VAR)); 214 | FUNC(cross_merge)(swap, array, quad1, quad2, cmp); 215 | } 216 | else 217 | { 218 | FUNC(cross_merge)(swap + half1, ptb + 1, quad3, quad4, cmp); 219 | FUNC(cross_merge)(swap, array, quad1, quad2, cmp); 220 | } 221 | } 222 | FUNC(cross_merge)(array, swap, half1, half2, cmp); 223 | } 224 | 225 | // The next 4 functions are used for pivot selection 226 | 227 | VAR FUNC(binary_median)(VAR *pta, VAR *ptb, size_t len, CMPFUNC *cmp) 228 | { 229 | while (len /= 2) 230 | { 231 | if (cmp(pta + len, ptb + len) <= 0) pta += len; else ptb += len; 232 | } 233 | return cmp(pta, ptb) > 0 ? *pta : *ptb; 234 | } 235 | 236 | void FUNC(trim_four)(VAR *pta, CMPFUNC *cmp) 237 | { 238 | VAR swap; 239 | size_t x; 240 | 241 | x = cmp(pta, pta + 1) > 0; swap = pta[!x]; pta[0] = pta[x]; pta[1] = swap; pta += 2; 242 | x = cmp(pta, pta + 1) > 0; swap = pta[!x]; pta[0] = pta[x]; pta[1] = swap; pta -= 2; 243 | 244 | x = (cmp(pta, pta + 2) <= 0) * 2; pta[2] = pta[x]; pta++; 245 | x = (cmp(pta, pta + 2) > 0) * 2; pta[0] = pta[x]; 246 | } 247 | 248 | VAR FUNC(median_of_nine)(VAR *array, size_t nmemb, CMPFUNC *cmp) 249 | { 250 | VAR *pta, swap[9]; 251 | size_t x, y, z; 252 | 253 | z = nmemb / 9; 254 | 255 | pta = array; 256 | 257 | for (x = 0 ; x < 9 ; x++) 258 | { 259 | swap[x] = *pta; 260 | 261 | pta += z; 262 | } 263 | 264 | FUNC(trim_four)(swap, cmp); 265 | FUNC(trim_four)(swap + 4, cmp); 266 | 267 | swap[0] = swap[5]; 268 | swap[3] = swap[8]; 269 | 270 | FUNC(trim_four)(swap, cmp); 271 | 272 | swap[0] = swap[6]; 273 | 274 | x = cmp(swap + 0, swap + 1) > 0; 275 | y = cmp(swap + 0, swap + 2) > 0; 276 | z = cmp(swap + 1, swap + 2) > 0; 277 | 278 | return swap[(x == y) + (y ^ z)]; 279 | } 280 | 281 | VAR FUNC(median_of_cbrt)(VAR *array, VAR *swap, VAR *ptx, size_t nmemb, int *generic, CMPFUNC *cmp) 282 | { 283 | VAR *pta, *pts; 284 | size_t cnt, div, cbrt; 285 | 286 | for (cbrt = 32 ; nmemb > cbrt * cbrt * cbrt ; cbrt *= 2) {} 287 | 288 | div = nmemb / cbrt; 289 | 290 | pta = ptx + (size_t) &div / 16 % div; 291 | pts = ptx == array ? swap : array; 292 | 293 | for (cnt = 0 ; cnt < cbrt ; cnt++) 294 | { 295 | pts[cnt] = *pta; 296 | 297 | pta += div; 298 | } 299 | cbrt /= 2; 300 | 301 | FUNC(quadsort_swap)(pts, pts + cbrt * 2, cbrt, cbrt, cmp); 302 | FUNC(quadsort_swap)(pts + cbrt, pts + cbrt * 2, cbrt, cbrt, cmp); 303 | 304 | *generic = (cmp(pts + cbrt * 2 - 1, pts) <= 0) & (cmp(pts + cbrt - 1, pts) <= 0); 305 | 306 | return FUNC(binary_median)(pts, pts + cbrt, cbrt, cmp); 307 | } 308 | 309 | // As per suggestion by Marshall Lochbaum to improve generic data handling by mimicking dual-pivot quicksort 310 | 311 | void FUNC(flux_reverse_partition)(VAR *array, VAR *swap, VAR *ptx, VAR *piv, size_t nmemb, CMPFUNC *cmp) 312 | { 313 | size_t a_size, s_size; 314 | 315 | #if !defined __clang__ 316 | { 317 | size_t cnt, m, val; 318 | VAR *pts = swap; 319 | 320 | for (m = 0, cnt = nmemb / 8 ; cnt ; cnt--) 321 | { 322 | val = cmp(piv, ptx) > 0; pts[-m] = array[m] = *ptx++; m += val; pts++; 323 | val = cmp(piv, ptx) > 0; pts[-m] = array[m] = *ptx++; m += val; pts++; 324 | val = cmp(piv, ptx) > 0; pts[-m] = array[m] = *ptx++; m += val; pts++; 325 | val = cmp(piv, ptx) > 0; pts[-m] = array[m] = *ptx++; m += val; pts++; 326 | val = cmp(piv, ptx) > 0; pts[-m] = array[m] = *ptx++; m += val; pts++; 327 | val = cmp(piv, ptx) > 0; pts[-m] = array[m] = *ptx++; m += val; pts++; 328 | val = cmp(piv, ptx) > 0; pts[-m] = array[m] = *ptx++; m += val; pts++; 329 | val = cmp(piv, ptx) > 0; pts[-m] = array[m] = *ptx++; m += val; pts++; 330 | } 331 | 332 | for (cnt = nmemb % 8 ; cnt ; cnt--) 333 | { 334 | val = cmp(piv, ptx) > 0; pts[-m] = array[m] = *ptx++; m += val; pts++; 335 | } 336 | a_size = m; 337 | s_size = nmemb - a_size; 338 | } 339 | #else 340 | { 341 | size_t cnt; 342 | VAR *tmp, *pta = array, *pts = swap; 343 | 344 | for (cnt = nmemb / 8 ; cnt ; cnt--) 345 | { 346 | tmp = cmp(piv, ptx) > 0 ? pta++ : pts++; *tmp = *ptx++; 347 | tmp = cmp(piv, ptx) > 0 ? pta++ : pts++; *tmp = *ptx++; 348 | tmp = cmp(piv, ptx) > 0 ? pta++ : pts++; *tmp = *ptx++; 349 | tmp = cmp(piv, ptx) > 0 ? pta++ : pts++; *tmp = *ptx++; 350 | tmp = cmp(piv, ptx) > 0 ? pta++ : pts++; *tmp = *ptx++; 351 | tmp = cmp(piv, ptx) > 0 ? pta++ : pts++; *tmp = *ptx++; 352 | tmp = cmp(piv, ptx) > 0 ? pta++ : pts++; *tmp = *ptx++; 353 | tmp = cmp(piv, ptx) > 0 ? pta++ : pts++; *tmp = *ptx++; 354 | } 355 | 356 | for (cnt = nmemb % 8 ; cnt ; cnt--) 357 | { 358 | tmp = cmp(piv, ptx) > 0 ? pta++ : pts++; *tmp = *ptx++; 359 | } 360 | a_size = pta - array; 361 | s_size = pts - swap; 362 | } 363 | #endif 364 | memcpy(array + a_size, swap, s_size * sizeof(VAR)); 365 | 366 | if (s_size <= a_size / 16 || a_size <= FLUX_OUT) 367 | { 368 | FUNC(quadsort_swap)(array, swap, a_size, a_size, cmp); 369 | return; 370 | } 371 | FUNC(flux_partition)(array, swap, array, piv, a_size, cmp); 372 | } 373 | 374 | size_t FUNC(flux_default_partition)(VAR *array, VAR *swap, VAR *ptx, VAR *piv, size_t nmemb, CMPFUNC *cmp) 375 | { 376 | size_t run = 0, a = 0, m = 0; 377 | 378 | #if !defined __clang__ 379 | size_t val; 380 | 381 | for (a = 8 ; a <= nmemb ; a += 8) 382 | { 383 | val = cmp(ptx, piv) <= 0; swap[-m] = array[m] = *ptx++; m += val; swap++; 384 | val = cmp(ptx, piv) <= 0; swap[-m] = array[m] = *ptx++; m += val; swap++; 385 | val = cmp(ptx, piv) <= 0; swap[-m] = array[m] = *ptx++; m += val; swap++; 386 | val = cmp(ptx, piv) <= 0; swap[-m] = array[m] = *ptx++; m += val; swap++; 387 | val = cmp(ptx, piv) <= 0; swap[-m] = array[m] = *ptx++; m += val; swap++; 388 | val = cmp(ptx, piv) <= 0; swap[-m] = array[m] = *ptx++; m += val; swap++; 389 | val = cmp(ptx, piv) <= 0; swap[-m] = array[m] = *ptx++; m += val; swap++; 390 | val = cmp(ptx, piv) <= 0; swap[-m] = array[m] = *ptx++; m += val; swap++; 391 | 392 | if (m == a) run = a; 393 | } 394 | 395 | for (a = nmemb % 8 ; a ; a--) 396 | { 397 | val = cmp(ptx, piv) <= 0; swap[-m] = array[m] = *ptx++; m += val; swap++; 398 | } 399 | swap -= nmemb; 400 | #else 401 | VAR *tmp, *pta = array, *pts = swap; 402 | 403 | for (a = 8 ; a <= nmemb ; a += 8) 404 | { 405 | tmp = cmp(ptx, piv) <= 0 ? pta++ : pts++; *tmp = *ptx++; 406 | tmp = cmp(ptx, piv) <= 0 ? pta++ : pts++; *tmp = *ptx++; 407 | tmp = cmp(ptx, piv) <= 0 ? pta++ : pts++; *tmp = *ptx++; 408 | tmp = cmp(ptx, piv) <= 0 ? pta++ : pts++; *tmp = *ptx++; 409 | 410 | tmp = cmp(ptx, piv) <= 0 ? pta++ : pts++; *tmp = *ptx++; 411 | tmp = cmp(ptx, piv) <= 0 ? pta++ : pts++; *tmp = *ptx++; 412 | tmp = cmp(ptx, piv) <= 0 ? pta++ : pts++; *tmp = *ptx++; 413 | tmp = cmp(ptx, piv) <= 0 ? pta++ : pts++; *tmp = *ptx++; 414 | 415 | if (pta == array || pts == swap) run = a; 416 | } 417 | 418 | for (a = nmemb % 8 ; a ; a--) 419 | { 420 | tmp = cmp(ptx, piv) <= 0 ? pta++ : pts++; *tmp = *ptx++; 421 | } 422 | m = pta - array; 423 | #endif 424 | 425 | if (run <= nmemb / 4) 426 | { 427 | return m; 428 | } 429 | 430 | if (m == nmemb) 431 | { 432 | return m; 433 | } 434 | 435 | a = nmemb - m; 436 | 437 | memcpy(array + m, swap, a * sizeof(VAR)); 438 | 439 | FUNC(quadsort_swap)(array + m, swap, a, a, cmp); 440 | FUNC(quadsort_swap)(array, swap, m, m, cmp); 441 | 442 | return 0; 443 | } 444 | 445 | void FUNC(flux_partition)(VAR *array, VAR *swap, VAR *ptx, VAR *piv, size_t nmemb, CMPFUNC *cmp) 446 | { 447 | size_t a_size = 0, s_size; 448 | int generic = 0; 449 | 450 | while (1) 451 | { 452 | --piv; 453 | 454 | if (nmemb <= 2048) 455 | { 456 | *piv = FUNC(median_of_nine)(ptx, nmemb, cmp); 457 | } 458 | else 459 | { 460 | *piv = FUNC(median_of_cbrt)(array, swap, ptx, nmemb, &generic, cmp); 461 | 462 | if (generic) 463 | { 464 | if (ptx == swap) 465 | { 466 | memcpy(array, swap, nmemb * sizeof(VAR)); 467 | } 468 | FUNC(quadsort_swap)(array, swap, nmemb, nmemb, cmp); 469 | return; 470 | } 471 | } 472 | 473 | if (a_size && cmp(piv + 1, piv) <= 0) 474 | { 475 | FUNC(flux_reverse_partition)(array, swap, array, piv, nmemb, cmp); 476 | return; 477 | } 478 | a_size = FUNC(flux_default_partition)(array, swap, ptx, piv, nmemb, cmp); 479 | s_size = nmemb - a_size; 480 | 481 | if (a_size <= s_size / 32 || s_size <= FLUX_OUT) 482 | { 483 | if (a_size == 0) 484 | { 485 | return; 486 | } 487 | if (s_size == 0) 488 | { 489 | FUNC(flux_reverse_partition)(array, swap, array, piv, a_size, cmp); 490 | return; 491 | } 492 | memcpy(array + a_size, swap, s_size * sizeof(VAR)); 493 | FUNC(quadsort_swap)(array + a_size, swap, s_size, s_size, cmp); 494 | } 495 | else 496 | { 497 | FUNC(flux_partition)(array + a_size, swap, swap, piv, s_size, cmp); 498 | } 499 | 500 | if (s_size <= a_size / 32 || a_size <= FLUX_OUT) 501 | { 502 | if (a_size <= FLUX_OUT) 503 | { 504 | FUNC(quadsort_swap)(array, swap, a_size, a_size, cmp); 505 | } 506 | else 507 | { 508 | FUNC(flux_reverse_partition)(array, swap, array, piv, a_size, cmp); 509 | } 510 | return; 511 | } 512 | nmemb = a_size; 513 | ptx = array; 514 | } 515 | } 516 | 517 | void FUNC(fluxsort)(void *array, size_t nmemb, CMPFUNC *cmp) 518 | { 519 | if (nmemb <= 132) 520 | { 521 | FUNC(quadsort)(array, nmemb, cmp); 522 | } 523 | else 524 | { 525 | VAR *pta = (VAR *) array; 526 | VAR *swap = (VAR *) malloc(nmemb * sizeof(VAR)); 527 | 528 | if (swap == NULL) 529 | { 530 | FUNC(quadsort)(array, nmemb, cmp); 531 | return; 532 | } 533 | FUNC(flux_analyze)(pta, swap, nmemb, nmemb, cmp); 534 | 535 | free(swap); 536 | } 537 | } 538 | 539 | void FUNC(fluxsort_swap)(void *array, void *swap, size_t swap_size, size_t nmemb, CMPFUNC *cmp) 540 | { 541 | if (nmemb <= 132) 542 | { 543 | FUNC(quadsort_swap)(array, swap, swap_size, nmemb, cmp); 544 | } 545 | else 546 | { 547 | VAR *pta = (VAR *) array; 548 | VAR *pts = (VAR *) swap; 549 | 550 | FUNC(flux_analyze)(pta, pts, swap_size, nmemb, cmp); 551 | } 552 | } 553 | -------------------------------------------------------------------------------- /src/fluxsort.h: -------------------------------------------------------------------------------- 1 | // fluxsort 1.2.1.3 - Igor van den Hoven ivdhoven@gmail.com 2 | 3 | #ifndef FLUXSORT_H 4 | #define FLUXSORT_H 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | typedef int CMPFUNC (const void *a, const void *b); 14 | 15 | //#define cmp(a,b) (*(a) > *(b)) 16 | 17 | #ifndef QUADSORT_H 18 | #include "quadsort.h" 19 | #endif 20 | 21 | // When sorting an array of 32/64 bit pointers, like a string array, QUAD_CACHE 22 | // needs to be adjusted in quadsort.h and here for proper performance when 23 | // sorting large arrays. 24 | 25 | #ifdef cmp 26 | #define QUAD_CACHE 4294967295 27 | #else 28 | //#define QUAD_CACHE 131072 29 | #define QUAD_CACHE 262144 30 | //#define QUAD_CACHE 524288 31 | //#define QUAD_CACHE 4294967295 32 | #endif 33 | 34 | ////////////////////////////////////////////////////////// 35 | // ┌───────────────────────────────────────────────────┐// 36 | // │ ██████┐ ██████┐ ██████┐ ██████┐████████┐ │// 37 | // │ └────██┐└────██┐ ██┌──██┐└─██┌─┘└──██┌──┘ │// 38 | // │ █████┌┘ █████┌┘ ██████┌┘ ██│ ██│ │// 39 | // │ └───██┐██┌───┘ ██┌──██┐ ██│ ██│ │// 40 | // │ ██████┌┘███████┐ ██████┌┘██████┐ ██│ │// 41 | // │ └─────┘ └──────┘ └─────┘ └─────┘ └─┘ │// 42 | // └───────────────────────────────────────────────────┘// 43 | ////////////////////////////////////////////////////////// 44 | 45 | #define VAR int 46 | #define FUNC(NAME) NAME##32 47 | 48 | #include "fluxsort.c" 49 | 50 | #undef VAR 51 | #undef FUNC 52 | 53 | // fluxsort_prim 54 | 55 | #define VAR int 56 | #define FUNC(NAME) NAME##_int32 57 | #ifndef cmp 58 | #define cmp(a,b) (*(a) > *(b)) 59 | #include "fluxsort.c" 60 | #undef cmp 61 | #else 62 | #include "fluxsort.c" 63 | #endif 64 | #undef VAR 65 | #undef FUNC 66 | 67 | #define VAR unsigned int 68 | #define FUNC(NAME) NAME##_uint32 69 | #ifndef cmp 70 | #define cmp(a,b) (*(a) > *(b)) 71 | #include "fluxsort.c" 72 | #undef cmp 73 | #else 74 | #include "fluxsort.c" 75 | #endif 76 | #undef VAR 77 | #undef FUNC 78 | 79 | ////////////////////////////////////////////////////////// 80 | // ┌───────────────────────────────────────────────────┐// 81 | // │ █████┐ ██┐ ██┐ ██████┐ ██████┐████████┐ │// 82 | // │ ██┌───┘ ██│ ██│ ██┌──██┐└─██┌─┘└──██┌──┘ │// 83 | // │ ██████┐ ███████│ ██████┌┘ ██│ ██│ │// 84 | // │ ██┌──██┐└────██│ ██┌──██┐ ██│ ██│ │// 85 | // │ └█████┌┘ ██│ ██████┌┘██████┐ ██│ │// 86 | // │ └────┘ └─┘ └─────┘ └─────┘ └─┘ │// 87 | // └───────────────────────────────────────────────────┘// 88 | ////////////////////////////////////////////////////////// 89 | 90 | #define VAR long long 91 | #define FUNC(NAME) NAME##64 92 | 93 | #include "fluxsort.c" 94 | 95 | #undef VAR 96 | #undef FUNC 97 | 98 | // fluxsort_prim 99 | 100 | #define VAR long long 101 | #define FUNC(NAME) NAME##_int64 102 | #ifndef cmp 103 | #define cmp(a,b) (*(a) > *(b)) 104 | #include "fluxsort.c" 105 | #undef cmp 106 | #else 107 | #include "fluxsort.c" 108 | #endif 109 | #undef VAR 110 | #undef FUNC 111 | 112 | #define VAR unsigned long long 113 | #define FUNC(NAME) NAME##_uint64 114 | #ifndef cmp 115 | #define cmp(a,b) (*(a) > *(b)) 116 | #include "fluxsort.c" 117 | #undef cmp 118 | #else 119 | #include "fluxsort.c" 120 | #endif 121 | #undef VAR 122 | #undef FUNC 123 | 124 | // This section is outside of 32/64 bit pointer territory, so no cache checks 125 | // necessary, unless sorting 32+ byte structures. 126 | 127 | #undef QUAD_CACHE 128 | #define QUAD_CACHE 4294967295 129 | 130 | ////////////////////////////////////////////////////////// 131 | //┌────────────────────────────────────────────────────┐// 132 | //│ █████┐ ██████┐ ██████┐████████┐ │// 133 | //│ ██┌──██┐ ██┌──██┐└─██┌─┘└──██┌──┘ │// 134 | //│ └█████┌┘ ██████┌┘ ██│ ██│ │// 135 | //│ ██┌──██┐ ██┌──██┐ ██│ ██│ │// 136 | //│ └█████┌┘ ██████┌┘██████┐ ██│ │// 137 | //│ └────┘ └─────┘ └─────┘ └─┘ │// 138 | //└────────────────────────────────────────────────────┘// 139 | ////////////////////////////////////////////////////////// 140 | 141 | #define VAR char 142 | #define FUNC(NAME) NAME##8 143 | 144 | #include "fluxsort.c" 145 | 146 | #undef VAR 147 | #undef FUNC 148 | 149 | ////////////////////////////////////////////////////////// 150 | //┌────────────────────────────────────────────────────┐// 151 | //│ ▄██┐ █████┐ ██████┐ ██████┐████████┐│// 152 | //│ ████│ ██┌───┘ ██┌──██┐└─██┌─┘└──██┌──┘│// 153 | //│ └─██│ ██████┐ ██████┌┘ ██│ ██│ │// 154 | //│ ██│ ██┌──██┐ ██┌──██┐ ██│ ██│ │// 155 | //│ ██████┐└█████┌┘ ██████┌┘██████┐ ██│ │// 156 | //│ └─────┘ └────┘ └─────┘ └─────┘ └─┘ │// 157 | //└────────────────────────────────────────────────────┘// 158 | ////////////////////////////////////////////////////////// 159 | 160 | #define VAR short 161 | #define FUNC(NAME) NAME##16 162 | 163 | #include "fluxsort.c" 164 | 165 | #undef VAR 166 | #undef FUNC 167 | 168 | 169 | 170 | ////////////////////////////////////////////////////////// 171 | //┌────────────────────────────────────────────────────┐// 172 | //│ ▄██┐ ██████┐ █████┐ ██████┐ ██████┐████████┐ │// 173 | //│ ████│ └────██┐██┌──██┐ ██┌──██┐└─██┌─┘└──██┌──┘ │// 174 | //│ └─██│ █████┌┘└█████┌┘ ██████┌┘ ██│ ██│ │// 175 | //│ ██│ ██┌───┘ ██┌──██┐ ██┌──██┐ ██│ ██│ │// 176 | //│ ██████┐███████┐└█████┌┘ ██████┌┘██████┐ ██│ │// 177 | //│ └─────┘└──────┘ └────┘ └─────┘ └─────┘ └─┘ │// 178 | //└────────────────────────────────────────────────────┘// 179 | ////////////////////////////////////////////////////////// 180 | 181 | #if (DBL_MANT_DIG < LDBL_MANT_DIG) 182 | #define VAR long double 183 | #define FUNC(NAME) NAME##128 184 | #include "fluxsort.c" 185 | #undef VAR 186 | #undef FUNC 187 | #endif 188 | 189 | ////////////////////////////////////////////////////////////////////////// 190 | //┌────────────────────────────────────────────────────────────────────┐// 191 | //│███████┐██┐ ██┐ ██┐██┐ ██┐███████┐ ██████┐ ██████┐ ████████┐ │// 192 | //│██┌────┘██│ ██│ ██│└██┐██┌┘██┌────┘██┌───██┐██┌──██┐└──██┌──┘ │// 193 | //│█████┐ ██│ ██│ ██│ └███┌┘ ███████┐██│ ██│██████┌┘ ██│ │// 194 | //│██┌──┘ ██│ ██│ ██│ ██┌██┐ └────██│██│ ██│██┌──██┐ ██│ │// 195 | //│██│ ███████┐└██████┌┘██┌┘ ██┐███████│└██████┌┘██│ ██│ ██│ │// 196 | //│└─┘ └──────┘ └─────┘ └─┘ └─┘└──────┘ └─────┘ └─┘ └─┘ └─┘ │// 197 | //└────────────────────────────────────────────────────────────────────┘// 198 | ////////////////////////////////////////////////////////////////////////// 199 | 200 | void fluxsort(void *array, size_t nmemb, size_t size, CMPFUNC *cmp) 201 | { 202 | if (nmemb < 2) 203 | { 204 | return; 205 | } 206 | 207 | switch (size) 208 | { 209 | case sizeof(char): 210 | fluxsort8(array, nmemb, cmp); 211 | return; 212 | 213 | case sizeof(short): 214 | fluxsort16(array, nmemb, cmp); 215 | return; 216 | 217 | case sizeof(int): 218 | fluxsort32(array, nmemb, cmp); 219 | return; 220 | 221 | case sizeof(long long): 222 | fluxsort64(array, nmemb, cmp); 223 | return; 224 | #if (DBL_MANT_DIG < LDBL_MANT_DIG) 225 | case sizeof(long double): 226 | fluxsort128(array, nmemb, cmp); 227 | return; 228 | #endif 229 | 230 | default: 231 | #if (DBL_MANT_DIG < LDBL_MANT_DIG) 232 | assert(size == sizeof(char) || size == sizeof(short) || size == sizeof(int) || size == sizeof(long long) || size == sizeof(long double)); 233 | #else 234 | assert(size == sizeof(char) || size == sizeof(short) || size == sizeof(int) || size == sizeof(long long)); 235 | #endif 236 | } 237 | } 238 | 239 | // This must match quadsort_prim() 240 | 241 | void fluxsort_prim(void *array, size_t nmemb, size_t size) 242 | { 243 | if (nmemb < 2) 244 | { 245 | return; 246 | } 247 | 248 | switch (size) 249 | { 250 | case 4: 251 | fluxsort_int32(array, nmemb, NULL); 252 | return; 253 | case 5: 254 | fluxsort_uint32(array, nmemb, NULL); 255 | return; 256 | case 8: 257 | fluxsort_int64(array, nmemb, NULL); 258 | return; 259 | case 9: 260 | fluxsort_uint64(array, nmemb, NULL); 261 | return; 262 | default: 263 | assert(size == sizeof(int) || size == sizeof(int) + 1 || size == sizeof(long long) || size == sizeof(long long) + 1); 264 | return; 265 | } 266 | } 267 | 268 | // Sort arrays of structures, the comparison function must be by reference. 269 | 270 | void fluxsort_size(void *array, size_t nmemb, size_t size, CMPFUNC *cmp) 271 | { 272 | char **pti, *pta, *pts; 273 | size_t index, offset; 274 | 275 | pta = (char *) array; 276 | pti = (char **) malloc(nmemb * sizeof(char *)); 277 | 278 | assert(pti != NULL); 279 | 280 | for (index = offset = 0 ; index < nmemb ; index++) 281 | { 282 | pti[index] = pta + offset; 283 | 284 | offset += size; 285 | } 286 | 287 | switch (sizeof(size_t)) 288 | { 289 | case 4: fluxsort32(pti, nmemb, cmp); break; 290 | case 8: fluxsort64(pti, nmemb, cmp); break; 291 | } 292 | 293 | pts = (char *) malloc(nmemb * size); 294 | 295 | assert(pts != NULL); 296 | 297 | for (index = 0 ; index < nmemb ; index++) 298 | { 299 | memcpy(pts, pti[index], size); 300 | 301 | pts += size; 302 | } 303 | pts -= nmemb * size; 304 | 305 | memcpy(array, pts, nmemb * size); 306 | 307 | free(pti); 308 | free(pts); 309 | } 310 | 311 | #undef QUAD_CACHE 312 | 313 | #endif 314 | -------------------------------------------------------------------------------- /src/gridsort.c: -------------------------------------------------------------------------------- 1 | // gridsort 1.2.1.3 - Igor van den Hoven ivdhoven@gmail.com 2 | 3 | STRUCT(x_node) 4 | { 5 | VAR *swap; 6 | size_t y_size; 7 | size_t y; 8 | VAR *y_base; 9 | STRUCT(y_node) **y_axis; 10 | }; 11 | 12 | STRUCT(y_node) 13 | { 14 | size_t z_size; 15 | VAR *z_axis1; 16 | VAR *z_axis2; 17 | }; 18 | 19 | STRUCT(x_node) *FUNC(create_grid)(VAR *array, size_t nmemb, CMPFUNC *cmp) 20 | { 21 | STRUCT(x_node) *x_node = (STRUCT(x_node) *) malloc(sizeof(STRUCT(x_node))); 22 | STRUCT(y_node) *y_node; 23 | 24 | for (BSC_Z = BSC_X ; BSC_Z * BSC_Z / 4 < nmemb ; BSC_Z *= 4); 25 | 26 | x_node->swap = (VAR *) malloc(BSC_Z * 2 * sizeof(VAR)); 27 | 28 | x_node->y_base = (VAR *) malloc(BSC_Z * sizeof(VAR)); 29 | 30 | x_node->y_axis = (STRUCT(y_node) **) malloc(BSC_Z * sizeof(STRUCT(y_node) *)); 31 | 32 | FUNC(quadsort_swap)(array, x_node->swap, BSC_Z * 2, BSC_Z * 2, cmp); 33 | 34 | for (int cnt = 0 ; cnt < 2 ; cnt++) 35 | { 36 | y_node = (STRUCT(y_node) *) malloc(sizeof(STRUCT(y_node))); 37 | 38 | y_node->z_axis1 = (VAR *) malloc(BSC_Z * sizeof(VAR)); 39 | memcpy(y_node->z_axis1, array + cnt * BSC_Z, BSC_Z * sizeof(VAR)); 40 | 41 | y_node->z_axis2 = (VAR *) malloc(BSC_Z * sizeof(VAR)); 42 | 43 | y_node->z_size = 0; 44 | 45 | x_node->y_axis[cnt] = y_node; 46 | x_node->y_base[cnt] = y_node->z_axis1[0]; 47 | } 48 | x_node->y_size = 2; 49 | x_node->y = 0; 50 | 51 | return x_node; 52 | } 53 | 54 | // used by destroy_grid 55 | 56 | // y_node->z_axis1 should be sorted and of BSC_Z size. 57 | // y_node->z_axis2 should be unsorted and of y_node->z_size size. 58 | 59 | void FUNC(twin_merge_cpy)(STRUCT(x_node) *x_node, VAR *dest, STRUCT(y_node) *y_node, CMPFUNC *cmp) 60 | { 61 | VAR *ptl = y_node->z_axis1; 62 | VAR *ptr = y_node->z_axis2; 63 | size_t nmemb1 = BSC_Z; 64 | size_t nmemb2 = y_node->z_size; 65 | VAR *tpl = y_node->z_axis1 + nmemb1 - 1; 66 | VAR *tpr = y_node->z_axis2 + nmemb2 - 1; 67 | VAR *ptd = dest; 68 | VAR *tpd = dest + nmemb1 + nmemb2 - 1; 69 | size_t loop, x, y; 70 | 71 | FUNC(quadsort_swap)(ptr, x_node->swap, nmemb2, nmemb2, cmp); 72 | 73 | while (1) 74 | { 75 | if (tpl - ptl > 8) 76 | { 77 | ptl8_ptr: if (cmp(ptl + 7, ptr) <= 0) 78 | { 79 | memcpy(ptd, ptl, 8 * sizeof(VAR)); ptd += 8; ptl += 8; 80 | 81 | if (tpl - ptl > 8) {goto ptl8_ptr;} continue; 82 | } 83 | 84 | tpl8_tpr: if (cmp(tpl - 7, tpr) > 0) 85 | { 86 | tpd -= 7; tpl -= 7; memcpy(tpd--, tpl--, 8 * sizeof(VAR)); 87 | 88 | if (tpl - ptl > 8) {goto tpl8_tpr;} continue; 89 | } 90 | } 91 | 92 | if (tpr - ptr > 8) 93 | { 94 | ptl_ptr8: if (cmp(ptl, ptr + 7) > 0) 95 | { 96 | memcpy(ptd, ptr, 8 * sizeof(VAR)); ptd += 8; ptr += 8; 97 | 98 | if (tpr - ptr > 8) {goto ptl_ptr8;} continue; 99 | } 100 | 101 | tpl_tpr8: if (cmp(tpl, tpr - 7) <= 0) 102 | { 103 | tpd -= 7; tpr -= 7; memcpy(tpd--, tpr--, 8 * sizeof(VAR)); 104 | 105 | if (tpr - ptr > 8) {goto tpl_tpr8;} continue; 106 | } 107 | } 108 | 109 | if (tpd - ptd < 16) 110 | { 111 | break; 112 | } 113 | 114 | loop = 8; do 115 | { 116 | head_branchless_merge(ptd, x, ptl, ptr, cmp); 117 | tail_branchless_merge(tpd, y, tpl, tpr, cmp); 118 | } 119 | while (--loop); 120 | } 121 | 122 | while (tpl - ptl > 1 && tpr - ptr > 1) 123 | { 124 | if (cmp(ptl + 1, ptr) <= 0) 125 | { 126 | *ptd++ = *ptl++; *ptd++ = *ptl++; 127 | } 128 | else if (cmp(ptl, ptr + 1) > 0) 129 | { 130 | *ptd++ = *ptr++; *ptd++ = *ptr++; 131 | } 132 | else 133 | { 134 | x = cmp(ptl, ptr) <= 0; y = !x; ptd[x] = *ptr; ptr += 1; ptd[y] = *ptl; ptl += 1; ptd += 2; 135 | x = cmp(ptl, ptr) <= 0; y = !x; ptd[x] = *ptr; ptr += y; ptd[y] = *ptl; ptl += x; ptd++; 136 | } 137 | } 138 | 139 | while (ptl <= tpl && ptr <= tpr) 140 | { 141 | *ptd++ = cmp(ptl, ptr) <= 0 ? *ptl++ : *ptr++; 142 | } 143 | while (ptl <= tpl) 144 | { 145 | *ptd++ = *ptl++; 146 | } 147 | while (ptr <= tpr) 148 | { 149 | *ptd++ = *ptr++; 150 | } 151 | } 152 | 153 | void FUNC(parity_twin_merge)(VAR *ptl, VAR *ptr, VAR *ptd, VAR *tpd, size_t block, CMPFUNC *cmp) 154 | { 155 | VAR *tpl, *tpr; 156 | #if !defined __clang__ 157 | unsigned char x, y; 158 | #endif 159 | tpl = ptl + block - 1; 160 | tpr = ptr + block - 1; 161 | 162 | for (block-- ; block ; block--) 163 | { 164 | head_branchless_merge(ptd, x, ptl, ptr, cmp); 165 | tail_branchless_merge(tpd, y, tpl, tpr, cmp); 166 | } 167 | *ptd = cmp(ptl, ptr) <= 0 ? *ptl : *ptr; 168 | *tpd = cmp(tpl, tpr) > 0 ? *tpl : *tpr; 169 | } 170 | 171 | // merge two sorted arrays across two buckets 172 | // [AB][AB] --> [AA][ ] + [BB][ ] 173 | 174 | void FUNC(twin_merge)(STRUCT(x_node) *x_node, STRUCT(y_node) *y_node1, STRUCT(y_node) *y_node2, CMPFUNC *cmp) 175 | { 176 | VAR *pta, *ptb, *tpa, *tpb, *pts; 177 | 178 | FUNC(quadsort_swap)(y_node1->z_axis2, x_node->swap, BSC_Z, BSC_Z, cmp); 179 | 180 | pta = y_node1->z_axis1; 181 | ptb = y_node1->z_axis2; 182 | tpa = pta + BSC_Z - 1; 183 | tpb = ptb + BSC_Z - 1; 184 | 185 | if (cmp(tpa, ptb) <= 0) 186 | { 187 | pts = y_node1->z_axis2; 188 | y_node1->z_axis2 = y_node2->z_axis1; 189 | y_node2->z_axis1 = pts; 190 | 191 | return; 192 | } 193 | 194 | if (cmp(pta, tpb) > 0) 195 | { 196 | pts = y_node1->z_axis1; 197 | y_node1->z_axis1 = y_node1->z_axis2; 198 | y_node1->z_axis2 = y_node2->z_axis1; 199 | y_node2->z_axis1 = pts; 200 | 201 | return; 202 | } 203 | 204 | FUNC(parity_twin_merge)(pta, ptb, y_node2->z_axis2, y_node2->z_axis1 + BSC_Z - 1, BSC_Z, cmp); 205 | 206 | pta = y_node1->z_axis1; y_node1->z_axis1 = y_node2->z_axis2; y_node2->z_axis2 = pta; 207 | } 208 | 209 | void FUNC(destroy_grid)(STRUCT(x_node) *x_node, VAR *array, CMPFUNC *cmp) 210 | { 211 | STRUCT(y_node) *y_node; 212 | size_t y, z; 213 | 214 | for (y = z = 0 ; y < x_node->y_size ; y++) 215 | { 216 | y_node = x_node->y_axis[y]; 217 | 218 | if (y_node->z_size) 219 | { 220 | FUNC(twin_merge_cpy)(x_node, &array[z], y_node, cmp); 221 | } 222 | else 223 | { 224 | memcpy(&array[z], y_node->z_axis1, BSC_Z * sizeof(VAR)); 225 | } 226 | z += BSC_Z + y_node->z_size; 227 | 228 | free(y_node->z_axis1); 229 | free(y_node->z_axis2); 230 | 231 | free(y_node); 232 | } 233 | free(x_node->y_axis); 234 | free(x_node->y_base); 235 | free(x_node->swap); 236 | 237 | free(x_node); 238 | } 239 | 240 | size_t FUNC(adaptive_binary_search)(STRUCT(x_node) *x_node, VAR *array, VAR key, CMPFUNC *cmp) 241 | { 242 | static unsigned int run; 243 | size_t top, mid; 244 | VAR *base = array; 245 | 246 | if (!run) 247 | { 248 | top = x_node->y_size; 249 | 250 | goto monobound; 251 | } 252 | 253 | if (x_node->y == x_node->y_size - 1) 254 | { 255 | if (cmp(base + x_node->y, &key) <= 0) 256 | { 257 | return x_node->y; 258 | } 259 | top = x_node->y; 260 | 261 | goto monobound; 262 | } 263 | 264 | if (x_node->y == 0) 265 | { 266 | base++; 267 | 268 | if (cmp(base, &key) > 0) 269 | { 270 | return 0; 271 | } 272 | top = x_node->y_size - 1; 273 | 274 | goto monobound; 275 | } 276 | 277 | base += x_node->y; 278 | 279 | if (cmp(base, &key) <= 0) 280 | { 281 | if (cmp(base + 1, &key) > 0) 282 | { 283 | goto end; 284 | } 285 | base++; 286 | top = x_node->y_size - x_node->y - 1; 287 | 288 | } 289 | else 290 | { 291 | base--; 292 | 293 | if (cmp(base, &key) <= 0) 294 | { 295 | goto end; 296 | } 297 | top = x_node->y - 1; 298 | base = array; 299 | } 300 | 301 | monobound: 302 | 303 | while (top > 1) 304 | { 305 | mid = top / 2; 306 | 307 | if (cmp(base + mid, &key) <= 0) 308 | { 309 | base += mid; 310 | } 311 | top -= mid; 312 | } 313 | 314 | end: 315 | 316 | top = base - array; 317 | 318 | run = x_node->y == top; 319 | 320 | return x_node->y = top; 321 | } 322 | 323 | void FUNC(insert_y_node)(STRUCT(x_node) *x_node, size_t y) 324 | { 325 | size_t end = ++x_node->y_size; 326 | 327 | if (x_node->y_size % BSC_Z == 0) 328 | { 329 | x_node->y_base = (VAR *) realloc(x_node->y_base, (x_node->y_size + BSC_Z) * sizeof(VAR)); 330 | x_node->y_axis = (STRUCT(y_node) **) realloc(x_node->y_axis, (x_node->y_size + BSC_Z) * sizeof(STRUCT(y_node) *)); 331 | } 332 | 333 | while (y < --end) 334 | { 335 | x_node->y_axis[end] = x_node->y_axis[end - 1]; 336 | x_node->y_base[end] = x_node->y_base[end - 1]; 337 | } 338 | x_node->y_axis[y] = (STRUCT(y_node) *) malloc(sizeof(STRUCT(y_node))); 339 | 340 | x_node->y_axis[y]->z_axis1 = (VAR *) malloc(BSC_Z * sizeof(VAR)); 341 | x_node->y_axis[y]->z_axis2 = (VAR *) malloc(BSC_Z * sizeof(VAR)); 342 | } 343 | 344 | void FUNC(split_y_node)(STRUCT(x_node) *x_node, size_t y1, size_t y2, CMPFUNC *cmp) 345 | { 346 | STRUCT(y_node) *y_node1, *y_node2; 347 | 348 | FUNC(insert_y_node)(x_node, y2); 349 | 350 | y_node1 = x_node->y_axis[y1]; 351 | y_node2 = x_node->y_axis[y2]; 352 | 353 | FUNC(twin_merge)(x_node, y_node1, y_node2, cmp); 354 | 355 | y_node1->z_size = y_node2->z_size = 0; 356 | 357 | x_node->y_base[y1] = y_node1->z_axis1[0]; 358 | x_node->y_base[y2] = y_node2->z_axis1[0]; 359 | } 360 | 361 | void FUNC(insert_z_node)(STRUCT(x_node) *x_node, VAR key, CMPFUNC *cmp) 362 | { 363 | STRUCT(y_node) *y_node; 364 | size_t y; 365 | 366 | y = FUNC(adaptive_binary_search)(x_node, x_node->y_base, key, cmp); 367 | 368 | y_node = x_node->y_axis[y]; 369 | 370 | y_node->z_axis2[y_node->z_size++] = key; 371 | 372 | if (y_node->z_size == BSC_Z) 373 | { 374 | FUNC(split_y_node)(x_node, y, y + 1, cmp); 375 | } 376 | } 377 | 378 | 379 | ///////////////////////////////////////////////////////////////////////////// 380 | //┌───────────────────────────────────────────────────────────────────────┐// 381 | //│ ██████┐ ██████┐ ██████┐██████┐ ███████┐ ██████┐ ██████┐ ████████┐ │// 382 | //│ ██┌────┘ ██┌──██┐└─██┌─┘██┌──██┐██┌────┘██┌───██┐██┌──██┐└──██┌──┘ │// 383 | //│ ██│ ███┐██████┌┘ ██│ ██│ ██│███████┐██│ ██│██████┌┘ ██│ │// 384 | //│ ██│ ██│██┌──██┐ ██│ ██│ ██│└────██│██│ ██│██┌──██┐ ██│ │// 385 | //│ └██████┌┘██│ ██│██████┐██████┌┘███████│└██████┌┘██│ ██│ ██│ │// 386 | //│ └─────┘ └─┘ └─┘└─────┘└─────┘ └──────┘ └─────┘ └─┘ └─┘ └─┘ │// 387 | //└───────────────────────────────────────────────────────────────────────┘// 388 | ///////////////////////////////////////////////////////////////////////////// 389 | 390 | void FUNC(gridsort)(void *array, size_t nmemb, size_t size, CMPFUNC *cmp) 391 | { 392 | size_t cnt = nmemb; 393 | VAR *pta = (VAR *) array; 394 | 395 | STRUCT(x_node) *grid = FUNC(create_grid)(pta, cnt, cmp); 396 | 397 | pta += BSC_Z * 2; 398 | cnt -= BSC_Z * 2; 399 | 400 | while (cnt--) 401 | { 402 | FUNC(insert_z_node)(grid, *pta++, cmp); 403 | } 404 | 405 | FUNC(destroy_grid)(grid, (VAR *) array, cmp); 406 | } 407 | -------------------------------------------------------------------------------- /src/gridsort.h: -------------------------------------------------------------------------------- 1 | // gridsort 1.2.1.3 - Igor van den Hoven ivdhoven@gmail.com 2 | 3 | #ifndef GRIDSORT_H 4 | #define GRIDSORT_H 5 | 6 | //#define cmp(a,b) (*(a) > *(b)) 7 | 8 | #ifndef QUADSORT_H 9 | #include "quadsort.h" 10 | #endif 11 | 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | typedef int CMPFUNC (const void *a, const void *b); 18 | 19 | #define BSC_X 32 20 | #define BSC_Y 2 21 | 22 | size_t BSC_Z; 23 | 24 | ////////////////////////////////////////////////////////// 25 | //┌────────────────────────────────────────────────────┐// 26 | //│ █████┐ ██████┐ ██████┐████████┐ │// 27 | //│ ██┌──██┐ ██┌──██┐└─██┌─┘└──██┌──┘ │// 28 | //│ └█████┌┘ ██████┌┘ ██│ ██│ │// 29 | //│ ██┌──██┐ ██┌──██┐ ██│ ██│ │// 30 | //│ └█████┌┘ ██████┌┘██████┐ ██│ │// 31 | //│ └────┘ └─────┘ └─────┘ └─┘ │// 32 | //└────────────────────────────────────────────────────┘// 33 | ////////////////////////////////////////////////////////// 34 | 35 | #undef VAR 36 | #undef FUNC 37 | #undef STRUCT 38 | 39 | #define VAR char 40 | #define FUNC(NAME) NAME##8 41 | #define STRUCT(NAME) struct NAME##8 42 | 43 | #include "gridsort.c" 44 | 45 | ////////////////////////////////////////////////////////// 46 | //┌────────────────────────────────────────────────────┐// 47 | //│ ▄██┐ █████┐ ██████┐ ██████┐████████┐│// 48 | //│ ████│ ██┌───┘ ██┌──██┐└─██┌─┘└──██┌──┘│// 49 | //│ └─██│ ██████┐ ██████┌┘ ██│ ██│ │// 50 | //│ ██│ ██┌──██┐ ██┌──██┐ ██│ ██│ │// 51 | //│ ██████┐└█████┌┘ ██████┌┘██████┐ ██│ │// 52 | //│ └─────┘ └────┘ └─────┘ └─────┘ └─┘ │// 53 | //└────────────────────────────────────────────────────┘// 54 | ////////////////////////////////////////////////////////// 55 | 56 | #undef VAR 57 | #undef FUNC 58 | #undef STRUCT 59 | 60 | #define VAR short 61 | #define FUNC(NAME) NAME##16 62 | #define STRUCT(NAME) struct NAME##16 63 | 64 | #include "gridsort.c" 65 | 66 | ////////////////////////////////////////////////////////// 67 | // ┌───────────────────────────────────────────────────┐// 68 | // │ ██████┐ ██████┐ ██████┐ ██████┐████████┐ │// 69 | // │ └────██┐└────██┐ ██┌──██┐└─██┌─┘└──██┌──┘ │// 70 | // │ █████┌┘ █████┌┘ ██████┌┘ ██│ ██│ │// 71 | // │ └───██┐██┌───┘ ██┌──██┐ ██│ ██│ │// 72 | // │ ██████┌┘███████┐ ██████┌┘██████┐ ██│ │// 73 | // │ └─────┘ └──────┘ └─────┘ └─────┘ └─┘ │// 74 | // └───────────────────────────────────────────────────┘// 75 | ////////////////////////////////////////////////////////// 76 | 77 | #undef VAR 78 | #undef FUNC 79 | #undef STRUCT 80 | 81 | #define VAR int 82 | #define FUNC(NAME) NAME##32 83 | #define STRUCT(NAME) struct NAME##32 84 | 85 | #include "gridsort.c" 86 | 87 | ////////////////////////////////////////////////////////// 88 | // ┌───────────────────────────────────────────────────┐// 89 | // │ █████┐ ██┐ ██┐ ██████┐ ██████┐████████┐ │// 90 | // │ ██┌───┘ ██│ ██│ ██┌──██┐└─██┌─┘└──██┌──┘ │// 91 | // │ ██████┐ ███████│ ██████┌┘ ██│ ██│ │// 92 | // │ ██┌──██┐└────██│ ██┌──██┐ ██│ ██│ │// 93 | // │ └█████┌┘ ██│ ██████┌┘██████┐ ██│ │// 94 | // │ └────┘ └─┘ └─────┘ └─────┘ └─┘ │// 95 | // └───────────────────────────────────────────────────┘// 96 | ////////////////////////////////////////////////////////// 97 | 98 | #undef VAR 99 | #undef FUNC 100 | #undef STRUCT 101 | 102 | #define VAR long long 103 | #define FUNC(NAME) NAME##64 104 | #define STRUCT(NAME) struct NAME##64 105 | 106 | #include "gridsort.c" 107 | 108 | ////////////////////////////////////////////////////////// 109 | //┌────────────────────────────────────────────────────┐// 110 | //│ ▄██┐ ██████┐ █████┐ ██████┐ ██████┐████████┐ │// 111 | //│ ████│ └────██┐██┌──██┐ ██┌──██┐└─██┌─┘└──██┌──┘ │// 112 | //│ └─██│ █████┌┘└█████┌┘ ██████┌┘ ██│ ██│ │// 113 | //│ ██│ ██┌───┘ ██┌──██┐ ██┌──██┐ ██│ ██│ │// 114 | //│ ██████┐███████┐└█████┌┘ ██████┌┘██████┐ ██│ │// 115 | //│ └─────┘└──────┘ └────┘ └─────┘ └─────┘ └─┘ │// 116 | //└────────────────────────────────────────────────────┘// 117 | ////////////////////////////////////////////////////////// 118 | 119 | #undef VAR 120 | #undef FUNC 121 | #undef STRUCT 122 | 123 | #define VAR long double 124 | #define FUNC(NAME) NAME##128 125 | #define STRUCT(NAME) struct NAME##128 126 | 127 | #include "gridsort.c" 128 | 129 | ///////////////////////////////////////////////////////////////////////////// 130 | //┌───────────────────────────────────────────────────────────────────────┐// 131 | //│ ██████┐ ██████┐ ██████┐██████┐ ███████┐ ██████┐ ██████┐ ████████┐ │// 132 | //│ ██┌────┘ ██┌──██┐└─██┌─┘██┌──██┐██┌────┘██┌───██┐██┌──██┐└──██┌──┘ │// 133 | //│ ██│ ███┐██████┌┘ ██│ ██│ ██│███████┐██│ ██│██████┌┘ ██│ │// 134 | //│ ██│ ██│██┌──██┐ ██│ ██│ ██│└────██│██│ ██│██┌──██┐ ██│ │// 135 | //│ └██████┌┘██│ ██│██████┐██████┌┘███████│└██████┌┘██│ ██│ ██│ │// 136 | //│ └─────┘ └─┘ └─┘└─────┘└─────┘ └──────┘ └─────┘ └─┘ └─┘ └─┘ │// 137 | //└───────────────────────────────────────────────────────────────────────┘// 138 | ///////////////////////////////////////////////////////////////////////////// 139 | 140 | void gridsort(void *array, size_t nmemb, size_t size, CMPFUNC *cmp) 141 | { 142 | if (nmemb < BSC_X * BSC_X) 143 | { 144 | return quadsort(array, nmemb, size, cmp); 145 | } 146 | 147 | switch (size) 148 | { 149 | case sizeof(char): 150 | return gridsort8(array, nmemb, size, cmp); 151 | 152 | case sizeof(short): 153 | return gridsort16(array, nmemb, size, cmp); 154 | 155 | case sizeof(int): 156 | return gridsort32(array, nmemb, size, cmp); 157 | 158 | case sizeof(long long): 159 | return gridsort64(array, nmemb, size, cmp); 160 | 161 | case sizeof(long double): 162 | return gridsort128(array, nmemb, size, cmp); 163 | 164 | default: 165 | assert(size == sizeof(char) || size == sizeof(short) || size == sizeof(int) || size == sizeof(long long) || size == sizeof(long double)); 166 | } 167 | } 168 | 169 | #undef VAR 170 | #undef FUNC 171 | #undef STRUCT 172 | 173 | #endif 174 | -------------------------------------------------------------------------------- /src/quadsort.c: -------------------------------------------------------------------------------- 1 | // quadsort 1.2.1.3 - Igor van den Hoven ivdhoven@gmail.com 2 | 3 | // the next seven functions are used for sorting 0 to 31 elements 4 | 5 | void FUNC(parity_swap_four)(VAR *array, CMPFUNC *cmp) 6 | { 7 | VAR tmp, *pta = array; 8 | size_t x; 9 | 10 | branchless_swap(pta, tmp, x, cmp); pta += 2; 11 | branchless_swap(pta, tmp, x, cmp); pta--; 12 | 13 | if (cmp(pta, pta + 1) > 0) 14 | { 15 | tmp = pta[0]; pta[0] = pta[1]; pta[1] = tmp; pta--; 16 | 17 | branchless_swap(pta, tmp, x, cmp); pta += 2; 18 | branchless_swap(pta, tmp, x, cmp); pta--; 19 | branchless_swap(pta, tmp, x, cmp); 20 | } 21 | } 22 | 23 | void FUNC(parity_swap_five)(VAR *array, CMPFUNC *cmp) 24 | { 25 | VAR tmp, *pta = array; 26 | size_t x, y; 27 | 28 | branchless_swap(pta, tmp, x, cmp); pta += 2; 29 | branchless_swap(pta, tmp, x, cmp); pta -= 1; 30 | branchless_swap(pta, tmp, x, cmp); pta += 2; 31 | branchless_swap(pta, tmp, y, cmp); pta = array; 32 | 33 | if (x + y) 34 | { 35 | branchless_swap(pta, tmp, x, cmp); pta += 2; 36 | branchless_swap(pta, tmp, x, cmp); pta -= 1; 37 | branchless_swap(pta, tmp, x, cmp); pta += 2; 38 | branchless_swap(pta, tmp, x, cmp); pta = array; 39 | branchless_swap(pta, tmp, x, cmp); pta += 2; 40 | branchless_swap(pta, tmp, x, cmp); pta -= 1; 41 | } 42 | } 43 | 44 | void FUNC(parity_swap_six)(VAR *array, VAR *swap, CMPFUNC *cmp) 45 | { 46 | VAR tmp, *pta = array, *ptl, *ptr; 47 | size_t x, y; 48 | 49 | branchless_swap(pta, tmp, x, cmp); pta++; 50 | branchless_swap(pta, tmp, x, cmp); pta += 3; 51 | branchless_swap(pta, tmp, x, cmp); pta--; 52 | branchless_swap(pta, tmp, x, cmp); pta = array; 53 | 54 | if (cmp(pta + 2, pta + 3) <= 0) 55 | { 56 | branchless_swap(pta, tmp, x, cmp); pta += 4; 57 | branchless_swap(pta, tmp, x, cmp); 58 | return; 59 | } 60 | x = cmp(pta, pta + 1) > 0; y = !x; swap[0] = pta[x]; swap[1] = pta[y]; swap[2] = pta[2]; pta += 4; 61 | x = cmp(pta, pta + 1) > 0; y = !x; swap[4] = pta[x]; swap[5] = pta[y]; swap[3] = pta[-1]; 62 | 63 | pta = array; ptl = swap; ptr = swap + 3; 64 | 65 | head_branchless_merge(pta, x, ptl, ptr, cmp); 66 | head_branchless_merge(pta, x, ptl, ptr, cmp); 67 | head_branchless_merge(pta, x, ptl, ptr, cmp); 68 | 69 | pta = array + 5; ptl = swap + 2; ptr = swap + 5; 70 | 71 | tail_branchless_merge(pta, y, ptl, ptr, cmp); 72 | tail_branchless_merge(pta, y, ptl, ptr, cmp); 73 | *pta = cmp(ptl, ptr) > 0 ? *ptl : *ptr; 74 | } 75 | 76 | void FUNC(parity_swap_seven)(VAR *array, VAR *swap, CMPFUNC *cmp) 77 | { 78 | VAR tmp, *pta = array, *ptl, *ptr; 79 | size_t x, y; 80 | 81 | branchless_swap(pta, tmp, x, cmp); pta += 2; 82 | branchless_swap(pta, tmp, x, cmp); pta += 2; 83 | branchless_swap(pta, tmp, x, cmp); pta -= 3; 84 | branchless_swap(pta, tmp, y, cmp); pta += 2; 85 | branchless_swap(pta, tmp, x, cmp); pta += 2; y += x; 86 | branchless_swap(pta, tmp, x, cmp); pta -= 1; y += x; 87 | 88 | if (y == 0) return; 89 | 90 | branchless_swap(pta, tmp, x, cmp); pta = array; 91 | 92 | x = cmp(pta, pta + 1) > 0; swap[0] = pta[x]; swap[1] = pta[!x]; swap[2] = pta[2]; pta += 3; 93 | x = cmp(pta, pta + 1) > 0; swap[3] = pta[x]; swap[4] = pta[!x]; pta += 2; 94 | x = cmp(pta, pta + 1) > 0; swap[5] = pta[x]; swap[6] = pta[!x]; 95 | 96 | pta = array; ptl = swap; ptr = swap + 3; 97 | 98 | head_branchless_merge(pta, x, ptl, ptr, cmp); 99 | head_branchless_merge(pta, x, ptl, ptr, cmp); 100 | head_branchless_merge(pta, x, ptl, ptr, cmp); 101 | 102 | pta = array + 6; ptl = swap + 2; ptr = swap + 6; 103 | 104 | tail_branchless_merge(pta, y, ptl, ptr, cmp); 105 | tail_branchless_merge(pta, y, ptl, ptr, cmp); 106 | tail_branchless_merge(pta, y, ptl, ptr, cmp); 107 | *pta = cmp(ptl, ptr) > 0 ? *ptl : *ptr; 108 | } 109 | 110 | void FUNC(tiny_sort)(VAR *array, VAR *swap, size_t nmemb, CMPFUNC *cmp) 111 | { 112 | VAR tmp; 113 | size_t x; 114 | 115 | switch (nmemb) 116 | { 117 | case 0: 118 | case 1: 119 | return; 120 | case 2: 121 | branchless_swap(array, tmp, x, cmp); 122 | return; 123 | case 3: 124 | branchless_swap(array, tmp, x, cmp); array++; 125 | branchless_swap(array, tmp, x, cmp); array--; 126 | branchless_swap(array, tmp, x, cmp); 127 | return; 128 | case 4: 129 | FUNC(parity_swap_four)(array, cmp); 130 | return; 131 | case 5: 132 | FUNC(parity_swap_five)(array, cmp); 133 | return; 134 | case 6: 135 | FUNC(parity_swap_six)(array, swap, cmp); 136 | return; 137 | case 7: 138 | FUNC(parity_swap_seven)(array, swap, cmp); 139 | return; 140 | } 141 | } 142 | 143 | // left must be equal or one smaller than right 144 | 145 | void FUNC(parity_merge)(VAR *dest, VAR *from, size_t left, size_t right, CMPFUNC *cmp) 146 | { 147 | VAR *ptl, *ptr, *tpl, *tpr, *tpd, *ptd; 148 | #if !defined __clang__ 149 | size_t x, y; 150 | #endif 151 | ptl = from; 152 | ptr = from + left; 153 | ptd = dest; 154 | tpl = ptr - 1; 155 | tpr = tpl + right; 156 | tpd = dest + left + right - 1; 157 | 158 | if (left < right) 159 | { 160 | *ptd++ = cmp(ptl, ptr) <= 0 ? *ptl++ : *ptr++; 161 | } 162 | *ptd++ = cmp(ptl, ptr) <= 0 ? *ptl++ : *ptr++; 163 | 164 | #if !defined cmp && !defined __clang__ // cache limit workaround for gcc 165 | if (left > QUAD_CACHE) 166 | { 167 | while (--left) 168 | { 169 | *ptd++ = cmp(ptl, ptr) <= 0 ? *ptl++ : *ptr++; 170 | *tpd-- = cmp(tpl, tpr) > 0 ? *tpl-- : *tpr--; 171 | } 172 | } 173 | else 174 | #endif 175 | { 176 | while (--left) 177 | { 178 | head_branchless_merge(ptd, x, ptl, ptr, cmp); 179 | tail_branchless_merge(tpd, y, tpl, tpr, cmp); 180 | } 181 | } 182 | *tpd = cmp(tpl, tpr) > 0 ? *tpl : *tpr; 183 | } 184 | 185 | void FUNC(tail_swap)(VAR *array, VAR *swap, size_t nmemb, CMPFUNC *cmp) 186 | { 187 | if (nmemb < 8) 188 | { 189 | FUNC(tiny_sort)(array, swap, nmemb, cmp); 190 | return; 191 | } 192 | size_t quad1, quad2, quad3, quad4, half1, half2; 193 | 194 | half1 = nmemb / 2; 195 | quad1 = half1 / 2; 196 | quad2 = half1 - quad1; 197 | half2 = nmemb - half1; 198 | quad3 = half2 / 2; 199 | quad4 = half2 - quad3; 200 | 201 | VAR *pta = array; 202 | 203 | FUNC(tail_swap)(pta, swap, quad1, cmp); pta += quad1; 204 | FUNC(tail_swap)(pta, swap, quad2, cmp); pta += quad2; 205 | FUNC(tail_swap)(pta, swap, quad3, cmp); pta += quad3; 206 | FUNC(tail_swap)(pta, swap, quad4, cmp); 207 | 208 | if (cmp(array + quad1 - 1, array + quad1) <= 0 && cmp(array + half1 - 1, array + half1) <= 0 && cmp(pta - 1, pta) <= 0) 209 | { 210 | return; 211 | } 212 | FUNC(parity_merge)(swap, array, quad1, quad2, cmp); 213 | FUNC(parity_merge)(swap + half1, array + half1, quad3, quad4, cmp); 214 | FUNC(parity_merge)(array, swap, half1, half2, cmp); 215 | } 216 | 217 | // the next three functions create sorted blocks of 32 elements 218 | 219 | void FUNC(quad_reversal)(VAR *pta, VAR *ptz) 220 | { 221 | VAR *ptb, *pty, tmp1, tmp2; 222 | 223 | size_t loop = (ptz - pta) / 2; 224 | 225 | ptb = pta + loop; 226 | pty = ptz - loop; 227 | 228 | if (loop % 2 == 0) 229 | { 230 | tmp2 = *ptb; *ptb-- = *pty; *pty++ = tmp2; loop--; 231 | } 232 | 233 | loop /= 2; 234 | 235 | do 236 | { 237 | tmp1 = *pta; *pta++ = *ptz; *ptz-- = tmp1; 238 | tmp2 = *ptb; *ptb-- = *pty; *pty++ = tmp2; 239 | } 240 | while (loop--); 241 | } 242 | 243 | void FUNC(quad_swap_merge)(VAR *array, VAR *swap, CMPFUNC *cmp) 244 | { 245 | VAR *pts, *ptl, *ptr; 246 | #if !defined __clang__ 247 | size_t x; 248 | #endif 249 | parity_merge_two(array + 0, swap + 0, x, ptl, ptr, pts, cmp); 250 | parity_merge_two(array + 4, swap + 4, x, ptl, ptr, pts, cmp); 251 | 252 | parity_merge_four(swap, array, x, ptl, ptr, pts, cmp); 253 | } 254 | 255 | void FUNC(tail_merge)(VAR *array, VAR *swap, size_t swap_size, size_t nmemb, size_t block, CMPFUNC *cmp); 256 | 257 | size_t FUNC(quad_swap)(VAR *array, size_t nmemb, CMPFUNC *cmp) 258 | { 259 | VAR tmp, swap[32]; 260 | size_t count; 261 | VAR *pta, *pts; 262 | unsigned char v1, v2, v3, v4, x; 263 | pta = array; 264 | 265 | count = nmemb / 8; 266 | 267 | while (count--) 268 | { 269 | v1 = cmp(pta + 0, pta + 1) > 0; 270 | v2 = cmp(pta + 2, pta + 3) > 0; 271 | v3 = cmp(pta + 4, pta + 5) > 0; 272 | v4 = cmp(pta + 6, pta + 7) > 0; 273 | 274 | switch (v1 + v2 * 2 + v3 * 4 + v4 * 8) 275 | { 276 | case 0: 277 | if (cmp(pta + 1, pta + 2) <= 0 && cmp(pta + 3, pta + 4) <= 0 && cmp(pta + 5, pta + 6) <= 0) 278 | { 279 | goto ordered; 280 | } 281 | FUNC(quad_swap_merge)(pta, swap, cmp); 282 | break; 283 | 284 | case 15: 285 | if (cmp(pta + 1, pta + 2) > 0 && cmp(pta + 3, pta + 4) > 0 && cmp(pta + 5, pta + 6) > 0) 286 | { 287 | pts = pta; 288 | goto reversed; 289 | } 290 | 291 | default: 292 | not_ordered: 293 | x = !v1; tmp = pta[x]; pta[0] = pta[v1]; pta[1] = tmp; pta += 2; 294 | x = !v2; tmp = pta[x]; pta[0] = pta[v2]; pta[1] = tmp; pta += 2; 295 | x = !v3; tmp = pta[x]; pta[0] = pta[v3]; pta[1] = tmp; pta += 2; 296 | x = !v4; tmp = pta[x]; pta[0] = pta[v4]; pta[1] = tmp; pta -= 6; 297 | 298 | FUNC(quad_swap_merge)(pta, swap, cmp); 299 | } 300 | pta += 8; 301 | 302 | continue; 303 | 304 | ordered: 305 | 306 | pta += 8; 307 | 308 | if (count--) 309 | { 310 | if ((v1 = cmp(pta + 0, pta + 1) > 0) | (v2 = cmp(pta + 2, pta + 3) > 0) | (v3 = cmp(pta + 4, pta + 5) > 0) | (v4 = cmp(pta + 6, pta + 7) > 0)) 311 | { 312 | if (v1 + v2 + v3 + v4 == 4 && cmp(pta + 1, pta + 2) > 0 && cmp(pta + 3, pta + 4) > 0 && cmp(pta + 5, pta + 6) > 0) 313 | { 314 | pts = pta; 315 | goto reversed; 316 | } 317 | goto not_ordered; 318 | } 319 | if (cmp(pta + 1, pta + 2) <= 0 && cmp(pta + 3, pta + 4) <= 0 && cmp(pta + 5, pta + 6) <= 0) 320 | { 321 | goto ordered; 322 | } 323 | FUNC(quad_swap_merge)(pta, swap, cmp); 324 | pta += 8; 325 | continue; 326 | } 327 | break; 328 | 329 | reversed: 330 | 331 | pta += 8; 332 | 333 | if (count--) 334 | { 335 | if ((v1 = cmp(pta + 0, pta + 1) <= 0) | (v2 = cmp(pta + 2, pta + 3) <= 0) | (v3 = cmp(pta + 4, pta + 5) <= 0) | (v4 = cmp(pta + 6, pta + 7) <= 0)) 336 | { 337 | // not reversed 338 | } 339 | else 340 | { 341 | if (cmp(pta - 1, pta) > 0 && cmp(pta + 1, pta + 2) > 0 && cmp(pta + 3, pta + 4) > 0 && cmp(pta + 5, pta + 6) > 0) 342 | { 343 | goto reversed; 344 | } 345 | } 346 | FUNC(quad_reversal)(pts, pta - 1); 347 | 348 | if (v1 + v2 + v3 + v4 == 4 && cmp(pta + 1, pta + 2) <= 0 && cmp(pta + 3, pta + 4) <= 0 && cmp(pta + 5, pta + 6) <= 0) 349 | { 350 | goto ordered; 351 | } 352 | if (v1 + v2 + v3 + v4 == 0 && cmp(pta + 1, pta + 2) > 0 && cmp(pta + 3, pta + 4) > 0 && cmp(pta + 5, pta + 6) > 0) 353 | { 354 | pts = pta; 355 | goto reversed; 356 | } 357 | 358 | x = !v1; tmp = pta[v1]; pta[0] = pta[x]; pta[1] = tmp; pta += 2; 359 | x = !v2; tmp = pta[v2]; pta[0] = pta[x]; pta[1] = tmp; pta += 2; 360 | x = !v3; tmp = pta[v3]; pta[0] = pta[x]; pta[1] = tmp; pta += 2; 361 | x = !v4; tmp = pta[v4]; pta[0] = pta[x]; pta[1] = tmp; pta -= 6; 362 | 363 | if (cmp(pta + 1, pta + 2) > 0 || cmp(pta + 3, pta + 4) > 0 || cmp(pta + 5, pta + 6) > 0) 364 | { 365 | FUNC(quad_swap_merge)(pta, swap, cmp); 366 | } 367 | pta += 8; 368 | continue; 369 | } 370 | 371 | switch (nmemb % 8) 372 | { 373 | case 7: if (cmp(pta + 5, pta + 6) <= 0) break; 374 | case 6: if (cmp(pta + 4, pta + 5) <= 0) break; 375 | case 5: if (cmp(pta + 3, pta + 4) <= 0) break; 376 | case 4: if (cmp(pta + 2, pta + 3) <= 0) break; 377 | case 3: if (cmp(pta + 1, pta + 2) <= 0) break; 378 | case 2: if (cmp(pta + 0, pta + 1) <= 0) break; 379 | case 1: if (cmp(pta - 1, pta + 0) <= 0) break; 380 | case 0: 381 | FUNC(quad_reversal)(pts, pta + nmemb % 8 - 1); 382 | 383 | if (pts == array) 384 | { 385 | return 1; 386 | } 387 | goto reverse_end; 388 | } 389 | FUNC(quad_reversal)(pts, pta - 1); 390 | break; 391 | } 392 | FUNC(tail_swap)(pta, swap, nmemb % 8, cmp); 393 | 394 | reverse_end: 395 | 396 | pta = array; 397 | 398 | for (count = nmemb / 32 ; count-- ; pta += 32) 399 | { 400 | if (cmp(pta + 7, pta + 8) <= 0 && cmp(pta + 15, pta + 16) <= 0 && cmp(pta + 23, pta + 24) <= 0) 401 | { 402 | continue; 403 | } 404 | FUNC(parity_merge)(swap, pta, 8, 8, cmp); 405 | FUNC(parity_merge)(swap + 16, pta + 16, 8, 8, cmp); 406 | FUNC(parity_merge)(pta, swap, 16, 16, cmp); 407 | } 408 | 409 | if (nmemb % 32 > 8) 410 | { 411 | FUNC(tail_merge)(pta, swap, 32, nmemb % 32, 8, cmp); 412 | } 413 | return 0; 414 | } 415 | 416 | // The next six functions are quad merge support routines 417 | 418 | void FUNC(cross_merge)(VAR *dest, VAR *from, size_t left, size_t right, CMPFUNC *cmp) 419 | { 420 | VAR *ptl, *tpl, *ptr, *tpr, *ptd, *tpd; 421 | size_t loop; 422 | #if !defined __clang__ 423 | size_t x, y; 424 | #endif 425 | ptl = from; 426 | ptr = from + left; 427 | tpl = ptr - 1; 428 | tpr = tpl + right; 429 | 430 | if (left + 1 >= right && right >= left && left >= 32) 431 | { 432 | if (cmp(ptl + 15, ptr) > 0 && cmp(ptl, ptr + 15) <= 0 && cmp(tpl, tpr - 15) > 0 && cmp(tpl - 15, tpr) <= 0) 433 | { 434 | FUNC(parity_merge)(dest, from, left, right, cmp); 435 | return; 436 | } 437 | } 438 | ptd = dest; 439 | tpd = dest + left + right - 1; 440 | 441 | while (1) 442 | { 443 | if (tpl - ptl > 8) 444 | { 445 | ptl8_ptr: if (cmp(ptl + 7, ptr) <= 0) 446 | { 447 | memcpy(ptd, ptl, 8 * sizeof(VAR)); ptd += 8; ptl += 8; 448 | 449 | if (tpl - ptl > 8) {goto ptl8_ptr;} continue; 450 | } 451 | 452 | tpl8_tpr: if (cmp(tpl - 7, tpr) > 0) 453 | { 454 | tpd -= 7; tpl -= 7; memcpy(tpd--, tpl--, 8 * sizeof(VAR)); 455 | 456 | if (tpl - ptl > 8) {goto tpl8_tpr;} continue; 457 | } 458 | } 459 | 460 | if (tpr - ptr > 8) 461 | { 462 | ptl_ptr8: if (cmp(ptl, ptr + 7) > 0) 463 | { 464 | memcpy(ptd, ptr, 8 * sizeof(VAR)); ptd += 8; ptr += 8; 465 | 466 | if (tpr - ptr > 8) {goto ptl_ptr8;} continue; 467 | } 468 | 469 | tpl_tpr8: if (cmp(tpl, tpr - 7) <= 0) 470 | { 471 | tpd -= 7; tpr -= 7; memcpy(tpd--, tpr--, 8 * sizeof(VAR)); 472 | 473 | if (tpr - ptr > 8) {goto tpl_tpr8;} continue; 474 | } 475 | } 476 | 477 | if (tpd - ptd < 16) 478 | { 479 | break; 480 | } 481 | 482 | #if !defined cmp && !defined __clang__ 483 | if (left > QUAD_CACHE) 484 | { 485 | loop = 8; do 486 | { 487 | *ptd++ = cmp(ptl, ptr) <= 0 ? *ptl++ : *ptr++; 488 | *tpd-- = cmp(tpl, tpr) > 0 ? *tpl-- : *tpr--; 489 | } 490 | while (--loop); 491 | } 492 | else 493 | #endif 494 | { 495 | loop = 8; do 496 | { 497 | head_branchless_merge(ptd, x, ptl, ptr, cmp); 498 | tail_branchless_merge(tpd, y, tpl, tpr, cmp); 499 | } 500 | while (--loop); 501 | } 502 | } 503 | 504 | while (ptl <= tpl && ptr <= tpr) 505 | { 506 | *ptd++ = cmp(ptl, ptr) <= 0 ? *ptl++ : *ptr++; 507 | } 508 | while (ptl <= tpl) 509 | { 510 | *ptd++ = *ptl++; 511 | } 512 | while (ptr <= tpr) 513 | { 514 | *ptd++ = *ptr++; 515 | } 516 | } 517 | 518 | void FUNC(quad_merge_block)(VAR *array, VAR *swap, size_t block, CMPFUNC *cmp) 519 | { 520 | VAR *pt1, *pt2, *pt3; 521 | size_t block_x_2 = block * 2; 522 | 523 | pt1 = array + block; 524 | pt2 = pt1 + block; 525 | pt3 = pt2 + block; 526 | 527 | switch ((cmp(pt1 - 1, pt1) <= 0) | (cmp(pt3 - 1, pt3) <= 0) * 2) 528 | { 529 | case 0: 530 | FUNC(cross_merge)(swap, array, block, block, cmp); 531 | FUNC(cross_merge)(swap + block_x_2, pt2, block, block, cmp); 532 | break; 533 | case 1: 534 | memcpy(swap, array, block_x_2 * sizeof(VAR)); 535 | FUNC(cross_merge)(swap + block_x_2, pt2, block, block, cmp); 536 | break; 537 | case 2: 538 | FUNC(cross_merge)(swap, array, block, block, cmp); 539 | memcpy(swap + block_x_2, pt2, block_x_2 * sizeof(VAR)); 540 | break; 541 | case 3: 542 | if (cmp(pt2 - 1, pt2) <= 0) 543 | return; 544 | memcpy(swap, array, block_x_2 * 2 * sizeof(VAR)); 545 | } 546 | FUNC(cross_merge)(array, swap, block_x_2, block_x_2, cmp); 547 | } 548 | 549 | size_t FUNC(quad_merge)(VAR *array, VAR *swap, size_t swap_size, size_t nmemb, size_t block, CMPFUNC *cmp) 550 | { 551 | VAR *pta, *pte; 552 | 553 | pte = array + nmemb; 554 | 555 | block *= 4; 556 | 557 | while (block <= nmemb && block <= swap_size) 558 | { 559 | pta = array; 560 | 561 | do 562 | { 563 | FUNC(quad_merge_block)(pta, swap, block / 4, cmp); 564 | 565 | pta += block; 566 | } 567 | while (pta + block <= pte); 568 | 569 | FUNC(tail_merge)(pta, swap, swap_size, pte - pta, block / 4, cmp); 570 | 571 | block *= 4; 572 | } 573 | 574 | FUNC(tail_merge)(array, swap, swap_size, nmemb, block / 4, cmp); 575 | 576 | return block / 2; 577 | } 578 | 579 | void FUNC(partial_forward_merge)(VAR *array, VAR *swap, size_t swap_size, size_t nmemb, size_t block, CMPFUNC *cmp) 580 | { 581 | VAR *ptl, *ptr, *tpl, *tpr; 582 | size_t x; 583 | 584 | if (nmemb == block) 585 | { 586 | return; 587 | } 588 | 589 | ptr = array + block; 590 | tpr = array + nmemb - 1; 591 | 592 | if (cmp(ptr - 1, ptr) <= 0) 593 | { 594 | return; 595 | } 596 | 597 | memcpy(swap, array, block * sizeof(VAR)); 598 | 599 | ptl = swap; 600 | tpl = swap + block - 1; 601 | 602 | while (ptl < tpl - 1 && ptr < tpr - 1) 603 | { 604 | ptr2: if (cmp(ptl, ptr + 1) > 0) 605 | { 606 | *array++ = *ptr++; *array++ = *ptr++; 607 | 608 | if (ptr < tpr - 1) {goto ptr2;} break; 609 | } 610 | if (cmp(ptl + 1, ptr) <= 0) 611 | { 612 | *array++ = *ptl++; *array++ = *ptl++; 613 | 614 | if (ptl < tpl - 1) {goto ptl2;} break; 615 | } 616 | 617 | goto cross_swap; 618 | 619 | ptl2: if (cmp(ptl + 1, ptr) <= 0) 620 | { 621 | *array++ = *ptl++; *array++ = *ptl++; 622 | 623 | if (ptl < tpl - 1) {goto ptl2;} break; 624 | } 625 | 626 | if (cmp(ptl, ptr + 1) > 0) 627 | { 628 | *array++ = *ptr++; *array++ = *ptr++; 629 | 630 | if (ptr < tpr - 1) {goto ptr2;} break; 631 | } 632 | 633 | cross_swap: 634 | 635 | x = cmp(ptl, ptr) <= 0; array[x] = *ptr; ptr += 1; array[!x] = *ptl; ptl += 1; array += 2; 636 | head_branchless_merge(array, x, ptl, ptr, cmp); 637 | } 638 | 639 | while (ptl <= tpl && ptr <= tpr) 640 | { 641 | *array++ = cmp(ptl, ptr) <= 0 ? *ptl++ : *ptr++; 642 | } 643 | 644 | while (ptl <= tpl) 645 | { 646 | *array++ = *ptl++; 647 | } 648 | } 649 | 650 | void FUNC(partial_backward_merge)(VAR *array, VAR *swap, size_t swap_size, size_t nmemb, size_t block, CMPFUNC *cmp) 651 | { 652 | VAR *tpl, *tpa, *tpr; 653 | size_t right, loop, x; 654 | 655 | if (nmemb == block) 656 | { 657 | return; 658 | } 659 | 660 | tpl = array + block - 1; 661 | tpa = array + nmemb - 1; 662 | 663 | if (cmp(tpl, tpl + 1) <= 0) 664 | { 665 | return; 666 | } 667 | 668 | right = nmemb - block; 669 | 670 | if (nmemb <= swap_size && right >= 64) 671 | { 672 | FUNC(cross_merge)(swap, array, block, right, cmp); 673 | 674 | memcpy(array, swap, nmemb * sizeof(VAR)); 675 | 676 | return; 677 | } 678 | 679 | memcpy(swap, array + block, right * sizeof(VAR)); 680 | 681 | tpr = swap + right - 1; 682 | 683 | while (tpl > array + 16 && tpr > swap + 16) 684 | { 685 | tpl_tpr16: if (cmp(tpl, tpr - 15) <= 0) 686 | { 687 | loop = 16; do *tpa-- = *tpr--; while (--loop); 688 | 689 | if (tpr > swap + 16) {goto tpl_tpr16;} break; 690 | } 691 | 692 | tpl16_tpr: if (cmp(tpl - 15, tpr) > 0) 693 | { 694 | loop = 16; do *tpa-- = *tpl--; while (--loop); 695 | 696 | if (tpl > array + 16) {goto tpl16_tpr;} break; 697 | } 698 | loop = 8; do 699 | { 700 | if (cmp(tpl, tpr - 1) <= 0) 701 | { 702 | *tpa-- = *tpr--; *tpa-- = *tpr--; 703 | } 704 | else if (cmp(tpl - 1, tpr) > 0) 705 | { 706 | *tpa-- = *tpl--; *tpa-- = *tpl--; 707 | } 708 | else 709 | { 710 | x = cmp(tpl, tpr) <= 0; tpa--; tpa[x] = *tpr; tpr -= 1; tpa[!x] = *tpl; tpl -= 1; tpa--; 711 | tail_branchless_merge(tpa, x, tpl, tpr, cmp); 712 | } 713 | } 714 | while (--loop); 715 | } 716 | 717 | while (tpr > swap + 1 && tpl > array + 1) 718 | { 719 | tpr2: if (cmp(tpl, tpr - 1) <= 0) 720 | { 721 | *tpa-- = *tpr--; *tpa-- = *tpr--; 722 | 723 | if (tpr > swap + 1) {goto tpr2;} break; 724 | } 725 | 726 | if (cmp(tpl - 1, tpr) > 0) 727 | { 728 | *tpa-- = *tpl--; *tpa-- = *tpl--; 729 | 730 | if (tpl > array + 1) {goto tpl2;} break; 731 | } 732 | goto cross_swap; 733 | 734 | tpl2: if (cmp(tpl - 1, tpr) > 0) 735 | { 736 | *tpa-- = *tpl--; *tpa-- = *tpl--; 737 | 738 | if (tpl > array + 1) {goto tpl2;} break; 739 | } 740 | 741 | if (cmp(tpl, tpr - 1) <= 0) 742 | { 743 | *tpa-- = *tpr--; *tpa-- = *tpr--; 744 | 745 | if (tpr > swap + 1) {goto tpr2;} break; 746 | } 747 | cross_swap: 748 | 749 | x = cmp(tpl, tpr) <= 0; tpa--; tpa[x] = *tpr; tpr -= 1; tpa[!x] = *tpl; tpl -= 1; tpa--; 750 | tail_branchless_merge(tpa, x, tpl, tpr, cmp); 751 | } 752 | 753 | while (tpr >= swap && tpl >= array) 754 | { 755 | *tpa-- = cmp(tpl, tpr) > 0 ? *tpl-- : *tpr--; 756 | } 757 | 758 | while (tpr >= swap) 759 | { 760 | *tpa-- = *tpr--; 761 | } 762 | } 763 | 764 | void FUNC(tail_merge)(VAR *array, VAR *swap, size_t swap_size, size_t nmemb, size_t block, CMPFUNC *cmp) 765 | { 766 | VAR *pta, *pte; 767 | 768 | pte = array + nmemb; 769 | 770 | while (block < nmemb && block <= swap_size) 771 | { 772 | for (pta = array ; pta + block < pte ; pta += block * 2) 773 | { 774 | if (pta + block * 2 < pte) 775 | { 776 | FUNC(partial_backward_merge)(pta, swap, swap_size, block * 2, block, cmp); 777 | 778 | continue; 779 | } 780 | FUNC(partial_backward_merge)(pta, swap, swap_size, pte - pta, block, cmp); 781 | 782 | break; 783 | } 784 | block *= 2; 785 | } 786 | } 787 | 788 | // the next four functions provide in-place rotate merge support 789 | 790 | void FUNC(trinity_rotation)(VAR *array, VAR *swap, size_t swap_size, size_t nmemb, size_t left) 791 | { 792 | VAR temp; 793 | size_t bridge, right = nmemb - left; 794 | 795 | if (swap_size > 65536) 796 | { 797 | swap_size = 65536; 798 | } 799 | 800 | if (left < right) 801 | { 802 | if (left <= swap_size) 803 | { 804 | memcpy(swap, array, left * sizeof(VAR)); 805 | memmove(array, array + left, right * sizeof(VAR)); 806 | memcpy(array + right, swap, left * sizeof(VAR)); 807 | } 808 | else 809 | { 810 | VAR *pta, *ptb, *ptc, *ptd; 811 | 812 | pta = array; 813 | ptb = pta + left; 814 | 815 | bridge = right - left; 816 | 817 | if (bridge <= swap_size && bridge > 3) 818 | { 819 | ptc = pta + right; 820 | ptd = ptc + left; 821 | 822 | memcpy(swap, ptb, bridge * sizeof(VAR)); 823 | 824 | while (left--) 825 | { 826 | *--ptc = *--ptd; *ptd = *--ptb; 827 | } 828 | memcpy(pta, swap, bridge * sizeof(VAR)); 829 | } 830 | else 831 | { 832 | ptc = ptb; 833 | ptd = ptc + right; 834 | 835 | bridge = left / 2; 836 | 837 | while (bridge--) 838 | { 839 | temp = *--ptb; *ptb = *pta; *pta++ = *ptc; *ptc++ = *--ptd; *ptd = temp; 840 | } 841 | 842 | bridge = (ptd - ptc) / 2; 843 | 844 | while (bridge--) 845 | { 846 | temp = *ptc; *ptc++ = *--ptd; *ptd = *pta; *pta++ = temp; 847 | } 848 | 849 | bridge = (ptd - pta) / 2; 850 | 851 | while (bridge--) 852 | { 853 | temp = *pta; *pta++ = *--ptd; *ptd = temp; 854 | } 855 | } 856 | } 857 | } 858 | else if (right < left) 859 | { 860 | if (right <= swap_size) 861 | { 862 | memcpy(swap, array + left, right * sizeof(VAR)); 863 | memmove(array + right, array, left * sizeof(VAR)); 864 | memcpy(array, swap, right * sizeof(VAR)); 865 | } 866 | else 867 | { 868 | VAR *pta, *ptb, *ptc, *ptd; 869 | 870 | pta = array; 871 | ptb = pta + left; 872 | 873 | bridge = left - right; 874 | 875 | if (bridge <= swap_size && bridge > 3) 876 | { 877 | ptc = pta + right; 878 | ptd = ptc + left; 879 | 880 | memcpy(swap, ptc, bridge * sizeof(VAR)); 881 | 882 | while (right--) 883 | { 884 | *ptc++ = *pta; *pta++ = *ptb++; 885 | } 886 | memcpy(ptd - bridge, swap, bridge * sizeof(VAR)); 887 | } 888 | else 889 | { 890 | ptc = ptb; 891 | ptd = ptc + right; 892 | 893 | bridge = right / 2; 894 | 895 | while (bridge--) 896 | { 897 | temp = *--ptb; *ptb = *pta; *pta++ = *ptc; *ptc++ = *--ptd; *ptd = temp; 898 | } 899 | 900 | bridge = (ptb - pta) / 2; 901 | 902 | while (bridge--) 903 | { 904 | temp = *--ptb; *ptb = *pta; *pta++ = *--ptd; *ptd = temp; 905 | } 906 | 907 | bridge = (ptd - pta) / 2; 908 | 909 | while (bridge--) 910 | { 911 | temp = *pta; *pta++ = *--ptd; *ptd = temp; 912 | } 913 | } 914 | } 915 | } 916 | else 917 | { 918 | VAR *pta, *ptb; 919 | 920 | pta = array; 921 | ptb = pta + left; 922 | 923 | while (left--) 924 | { 925 | temp = *pta; *pta++ = *ptb; *ptb++ = temp; 926 | } 927 | } 928 | } 929 | 930 | size_t FUNC(monobound_binary_first)(VAR *array, VAR *value, size_t top, CMPFUNC *cmp) 931 | { 932 | VAR *end; 933 | size_t mid; 934 | 935 | end = array + top; 936 | 937 | while (top > 1) 938 | { 939 | mid = top / 2; 940 | 941 | if (cmp(value, end - mid) <= 0) 942 | { 943 | end -= mid; 944 | } 945 | top -= mid; 946 | } 947 | 948 | if (cmp(value, end - 1) <= 0) 949 | { 950 | end--; 951 | } 952 | return (end - array); 953 | } 954 | 955 | void FUNC(rotate_merge_block)(VAR *array, VAR *swap, size_t swap_size, size_t lblock, size_t right, CMPFUNC *cmp) 956 | { 957 | size_t left, rblock, unbalanced; 958 | 959 | if (cmp(array + lblock - 1, array + lblock) <= 0) 960 | { 961 | return; 962 | } 963 | 964 | rblock = lblock / 2; 965 | lblock -= rblock; 966 | 967 | left = FUNC(monobound_binary_first)(array + lblock + rblock, array + lblock, right, cmp); 968 | 969 | right -= left; 970 | 971 | // [ lblock ] [ rblock ] [ left ] [ right ] 972 | 973 | if (left) 974 | { 975 | if (lblock + left <= swap_size) 976 | { 977 | memcpy(swap, array, lblock * sizeof(VAR)); 978 | memcpy(swap + lblock, array + lblock + rblock, left * sizeof(VAR)); 979 | memmove(array + lblock + left, array + lblock, rblock * sizeof(VAR)); 980 | 981 | FUNC(cross_merge)(array, swap, lblock, left, cmp); 982 | } 983 | else 984 | { 985 | FUNC(trinity_rotation)(array + lblock, swap, swap_size, rblock + left, rblock); 986 | 987 | unbalanced = (left * 2 < lblock) | (lblock * 2 < left); 988 | 989 | if (unbalanced && left <= swap_size) 990 | { 991 | FUNC(partial_backward_merge)(array, swap, swap_size, lblock + left, lblock, cmp); 992 | } 993 | else if (unbalanced && lblock <= swap_size) 994 | { 995 | FUNC(partial_forward_merge)(array, swap, swap_size, lblock + left, lblock, cmp); 996 | } 997 | else 998 | { 999 | FUNC(rotate_merge_block)(array, swap, swap_size, lblock, left, cmp); 1000 | } 1001 | } 1002 | } 1003 | 1004 | if (right) 1005 | { 1006 | unbalanced = (right * 2 < rblock) | (rblock * 2 < right); 1007 | 1008 | if ((unbalanced && right <= swap_size) || right + rblock <= swap_size) 1009 | { 1010 | FUNC(partial_backward_merge)(array + lblock + left, swap, swap_size, rblock + right, rblock, cmp); 1011 | } 1012 | else if (unbalanced && rblock <= swap_size) 1013 | { 1014 | FUNC(partial_forward_merge)(array + lblock + left, swap, swap_size, rblock + right, rblock, cmp); 1015 | } 1016 | else 1017 | { 1018 | FUNC(rotate_merge_block)(array + lblock + left, swap, swap_size, rblock, right, cmp); 1019 | } 1020 | } 1021 | } 1022 | 1023 | void FUNC(rotate_merge)(VAR *array, VAR *swap, size_t swap_size, size_t nmemb, size_t block, CMPFUNC *cmp) 1024 | { 1025 | VAR *pta, *pte; 1026 | 1027 | pte = array + nmemb; 1028 | 1029 | if (nmemb <= block * 2 && nmemb - block <= swap_size) 1030 | { 1031 | FUNC(partial_backward_merge)(array, swap, swap_size, nmemb, block, cmp); 1032 | 1033 | return; 1034 | } 1035 | 1036 | while (block < nmemb) 1037 | { 1038 | for (pta = array ; pta + block < pte ; pta += block * 2) 1039 | { 1040 | if (pta + block * 2 < pte) 1041 | { 1042 | FUNC(rotate_merge_block)(pta, swap, swap_size, block, block, cmp); 1043 | 1044 | continue; 1045 | } 1046 | FUNC(rotate_merge_block)(pta, swap, swap_size, block, pte - pta - block, cmp); 1047 | 1048 | break; 1049 | } 1050 | block *= 2; 1051 | } 1052 | } 1053 | 1054 | /////////////////////////////////////////////////////////////////////////////// 1055 | //┌─────────────────────────────────────────────────────────────────────────┐// 1056 | //│ ██████┐ ██┐ ██┐ █████┐ ██████┐ ███████┐ ██████┐ ██████┐ ████████┐ │// 1057 | //│ ██┌───██┐██│ ██│██┌──██┐██┌──██┐██┌────┘██┌───██┐██┌──██┐└──██┌──┘ │// 1058 | //│ ██│ ██│██│ ██│███████│██│ ██│███████┐██│ ██│██████┌┘ ██│ │// 1059 | //│ ██│▄▄ ██│██│ ██│██┌──██│██│ ██│└────██│██│ ██│██┌──██┐ ██│ │// 1060 | //│ └██████┌┘└██████┌┘██│ ██│██████┌┘███████│└██████┌┘██│ ██│ ██│ │// 1061 | //│ └──▀▀─┘ └─────┘ └─┘ └─┘└─────┘ └──────┘ └─────┘ └─┘ └─┘ └─┘ │// 1062 | //└─────────────────────────────────────────────────────────────────────────┘// 1063 | /////////////////////////////////////////////////////////////////////////////// 1064 | 1065 | void FUNC(quadsort)(void *array, size_t nmemb, CMPFUNC *cmp) 1066 | { 1067 | VAR *pta = (VAR *) array; 1068 | 1069 | if (nmemb < 32) 1070 | { 1071 | VAR swap[nmemb]; 1072 | 1073 | FUNC(tail_swap)(pta, swap, nmemb, cmp); 1074 | } 1075 | else if (FUNC(quad_swap)(pta, nmemb, cmp) == 0) 1076 | { 1077 | VAR *swap = NULL; 1078 | size_t block, swap_size = nmemb; 1079 | 1080 | if (nmemb > 4194304) for (swap_size = 4194304 ; swap_size * 8 <= nmemb ; swap_size *= 4) {} 1081 | 1082 | swap = (VAR *) malloc(swap_size * sizeof(VAR)); 1083 | 1084 | if (swap == NULL) 1085 | { 1086 | VAR stack[512]; 1087 | 1088 | block = FUNC(quad_merge)(pta, stack, 512, nmemb, 32, cmp); 1089 | 1090 | FUNC(rotate_merge)(pta, stack, 512, nmemb, block, cmp); 1091 | 1092 | return; 1093 | } 1094 | block = FUNC(quad_merge)(pta, swap, swap_size, nmemb, 32, cmp); 1095 | 1096 | FUNC(rotate_merge)(pta, swap, swap_size, nmemb, block, cmp); 1097 | 1098 | free(swap); 1099 | } 1100 | } 1101 | 1102 | void FUNC(quadsort_swap)(void *array, void *swap, size_t swap_size, size_t nmemb, CMPFUNC *cmp) 1103 | { 1104 | VAR *pta = (VAR *) array; 1105 | VAR *pts = (VAR *) swap; 1106 | 1107 | if (nmemb <= 96) 1108 | { 1109 | FUNC(tail_swap)(pta, pts, nmemb, cmp); 1110 | } 1111 | else if (FUNC(quad_swap)(pta, nmemb, cmp) == 0) 1112 | { 1113 | size_t block = FUNC(quad_merge)(pta, pts, swap_size, nmemb, 32, cmp); 1114 | 1115 | FUNC(rotate_merge)(pta, pts, swap_size, nmemb, block, cmp); 1116 | } 1117 | } 1118 | -------------------------------------------------------------------------------- /src/quadsort.h: -------------------------------------------------------------------------------- 1 | // quadsort 1.2.1.3 - Igor van den Hoven ivdhoven@gmail.com 2 | 3 | #ifndef QUADSORT_H 4 | #define QUADSORT_H 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | //#include 14 | 15 | typedef int CMPFUNC (const void *a, const void *b); 16 | 17 | //#define cmp(a,b) (*(a) > *(b)) 18 | 19 | 20 | // When sorting an array of pointers, like a string array, the QUAD_CACHE needs 21 | // to be set for proper performance when sorting large arrays. 22 | // quadsort_prim() can be used to sort arrays of 32 and 64 bit integers 23 | // without a comparison function or cache restrictions. 24 | 25 | // With a 6 MB L3 cache a value of 262144 works well. 26 | 27 | #ifdef cmp 28 | #define QUAD_CACHE 4294967295 29 | #else 30 | //#define QUAD_CACHE 131072 31 | #define QUAD_CACHE 262144 32 | //#define QUAD_CACHE 524288 33 | //#define QUAD_CACHE 4294967295 34 | #endif 35 | 36 | // utilize branchless ternary operations in clang 37 | 38 | #if !defined __clang__ 39 | #define head_branchless_merge(ptd, x, ptl, ptr, cmp) \ 40 | x = cmp(ptl, ptr) <= 0; \ 41 | *ptd = *ptl; \ 42 | ptl += x; \ 43 | ptd[x] = *ptr; \ 44 | ptr += !x; \ 45 | ptd++; 46 | #else 47 | #define head_branchless_merge(ptd, x, ptl, ptr, cmp) \ 48 | *ptd++ = cmp(ptl, ptr) <= 0 ? *ptl++ : *ptr++; 49 | #endif 50 | 51 | #if !defined __clang__ 52 | #define tail_branchless_merge(tpd, y, tpl, tpr, cmp) \ 53 | y = cmp(tpl, tpr) <= 0; \ 54 | *tpd = *tpl; \ 55 | tpl -= !y; \ 56 | tpd--; \ 57 | tpd[y] = *tpr; \ 58 | tpr -= y; 59 | #else 60 | #define tail_branchless_merge(tpd, x, tpl, tpr, cmp) \ 61 | *tpd-- = cmp(tpl, tpr) > 0 ? *tpl-- : *tpr--; 62 | #endif 63 | 64 | // guarantee small parity merges are inlined with minimal overhead 65 | 66 | #define parity_merge_two(array, swap, x, ptl, ptr, pts, cmp) \ 67 | ptl = array; ptr = array + 2; pts = swap; \ 68 | head_branchless_merge(pts, x, ptl, ptr, cmp); \ 69 | *pts = cmp(ptl, ptr) <= 0 ? *ptl : *ptr; \ 70 | \ 71 | ptl = array + 1; ptr = array + 3; pts = swap + 3; \ 72 | tail_branchless_merge(pts, x, ptl, ptr, cmp); \ 73 | *pts = cmp(ptl, ptr) > 0 ? *ptl : *ptr; 74 | 75 | #define parity_merge_four(array, swap, x, ptl, ptr, pts, cmp) \ 76 | ptl = array + 0; ptr = array + 4; pts = swap; \ 77 | head_branchless_merge(pts, x, ptl, ptr, cmp); \ 78 | head_branchless_merge(pts, x, ptl, ptr, cmp); \ 79 | head_branchless_merge(pts, x, ptl, ptr, cmp); \ 80 | *pts = cmp(ptl, ptr) <= 0 ? *ptl : *ptr; \ 81 | \ 82 | ptl = array + 3; ptr = array + 7; pts = swap + 7; \ 83 | tail_branchless_merge(pts, x, ptl, ptr, cmp); \ 84 | tail_branchless_merge(pts, x, ptl, ptr, cmp); \ 85 | tail_branchless_merge(pts, x, ptl, ptr, cmp); \ 86 | *pts = cmp(ptl, ptr) > 0 ? *ptl : *ptr; 87 | 88 | 89 | #if !defined __clang__ 90 | #define branchless_swap(pta, swap, x, cmp) \ 91 | x = cmp(pta, pta + 1) > 0; \ 92 | swap = pta[!x]; \ 93 | pta[0] = pta[x]; \ 94 | pta[1] = swap; 95 | #else 96 | #define branchless_swap(pta, swap, x, cmp) \ 97 | x = 0; \ 98 | swap = cmp(pta, pta + 1) > 0 ? pta[x++] : pta[1]; \ 99 | pta[0] = pta[x]; \ 100 | pta[1] = swap; 101 | #endif 102 | 103 | #define swap_branchless(pta, swap, x, y, cmp) \ 104 | x = cmp(pta, pta + 1) > 0; \ 105 | y = !x; \ 106 | swap = pta[y]; \ 107 | pta[0] = pta[x]; \ 108 | pta[1] = swap; 109 | 110 | ////////////////////////////////////////////////////////// 111 | // ┌───────────────────────────────────────────────────┐// 112 | // │ ██████┐ ██████┐ ██████┐ ██████┐████████┐ │// 113 | // │ └────██┐└────██┐ ██┌──██┐└─██┌─┘└──██┌──┘ │// 114 | // │ █████┌┘ █████┌┘ ██████┌┘ ██│ ██│ │// 115 | // │ └───██┐██┌───┘ ██┌──██┐ ██│ ██│ │// 116 | // │ ██████┌┘███████┐ ██████┌┘██████┐ ██│ │// 117 | // │ └─────┘ └──────┘ └─────┘ └─────┘ └─┘ │// 118 | // └───────────────────────────────────────────────────┘// 119 | ////////////////////////////////////////////////////////// 120 | 121 | #define VAR int 122 | #define FUNC(NAME) NAME##32 123 | 124 | #include "quadsort.c" 125 | 126 | #undef VAR 127 | #undef FUNC 128 | 129 | // quadsort_prim 130 | 131 | #define VAR int 132 | #define FUNC(NAME) NAME##_int32 133 | #ifndef cmp 134 | #define cmp(a,b) (*(a) > *(b)) 135 | #include "quadsort.c" 136 | #undef cmp 137 | #else 138 | #include "quadsort.c" 139 | #endif 140 | #undef VAR 141 | #undef FUNC 142 | 143 | #define VAR unsigned int 144 | #define FUNC(NAME) NAME##_uint32 145 | #ifndef cmp 146 | #define cmp(a,b) (*(a) > *(b)) 147 | #include "quadsort.c" 148 | #undef cmp 149 | #else 150 | #include "quadsort.c" 151 | #endif 152 | #undef VAR 153 | #undef FUNC 154 | 155 | ////////////////////////////////////////////////////////// 156 | // ┌───────────────────────────────────────────────────┐// 157 | // │ █████┐ ██┐ ██┐ ██████┐ ██████┐████████┐ │// 158 | // │ ██┌───┘ ██│ ██│ ██┌──██┐└─██┌─┘└──██┌──┘ │// 159 | // │ ██████┐ ███████│ ██████┌┘ ██│ ██│ │// 160 | // │ ██┌──██┐└────██│ ██┌──██┐ ██│ ██│ │// 161 | // │ └█████┌┘ ██│ ██████┌┘██████┐ ██│ │// 162 | // │ └────┘ └─┘ └─────┘ └─────┘ └─┘ │// 163 | // └───────────────────────────────────────────────────┘// 164 | ////////////////////////////////////////////////////////// 165 | 166 | #define VAR long long 167 | #define FUNC(NAME) NAME##64 168 | 169 | #include "quadsort.c" 170 | 171 | #undef VAR 172 | #undef FUNC 173 | 174 | // quadsort_prim 175 | 176 | #define VAR long long 177 | #define FUNC(NAME) NAME##_int64 178 | #ifndef cmp 179 | #define cmp(a,b) (*(a) > *(b)) 180 | #include "quadsort.c" 181 | #undef cmp 182 | #else 183 | #include "quadsort.c" 184 | #endif 185 | #undef VAR 186 | #undef FUNC 187 | 188 | #define VAR unsigned long long 189 | #define FUNC(NAME) NAME##_uint64 190 | #ifndef cmp 191 | #define cmp(a,b) (*(a) > *(b)) 192 | #include "quadsort.c" 193 | #undef cmp 194 | #else 195 | #include "quadsort.c" 196 | #endif 197 | #undef VAR 198 | #undef FUNC 199 | 200 | // This section is outside of 32/64 bit pointer territory, so no cache checks 201 | // necessary, unless sorting 32+ byte structures. 202 | 203 | #undef QUAD_CACHE 204 | #define QUAD_CACHE 4294967295 205 | 206 | ////////////////////////////////////////////////////////// 207 | //┌────────────────────────────────────────────────────┐// 208 | //│ █████┐ ██████┐ ██████┐████████┐ │// 209 | //│ ██┌──██┐ ██┌──██┐└─██┌─┘└──██┌──┘ │// 210 | //│ └█████┌┘ ██████┌┘ ██│ ██│ │// 211 | //│ ██┌──██┐ ██┌──██┐ ██│ ██│ │// 212 | //│ └█████┌┘ ██████┌┘██████┐ ██│ │// 213 | //│ └────┘ └─────┘ └─────┘ └─┘ │// 214 | //└────────────────────────────────────────────────────┘// 215 | ////////////////////////////////////////////////////////// 216 | 217 | #define VAR char 218 | #define FUNC(NAME) NAME##8 219 | 220 | #include "quadsort.c" 221 | 222 | #undef VAR 223 | #undef FUNC 224 | 225 | ////////////////////////////////////////////////////////// 226 | //┌────────────────────────────────────────────────────┐// 227 | //│ ▄██┐ █████┐ ██████┐ ██████┐████████┐│// 228 | //│ ████│ ██┌───┘ ██┌──██┐└─██┌─┘└──██┌──┘│// 229 | //│ └─██│ ██████┐ ██████┌┘ ██│ ██│ │// 230 | //│ ██│ ██┌──██┐ ██┌──██┐ ██│ ██│ │// 231 | //│ ██████┐└█████┌┘ ██████┌┘██████┐ ██│ │// 232 | //│ └─────┘ └────┘ └─────┘ └─────┘ └─┘ │// 233 | //└────────────────────────────────────────────────────┘// 234 | ////////////////////////////////////////////////////////// 235 | 236 | #define VAR short 237 | #define FUNC(NAME) NAME##16 238 | 239 | #include "quadsort.c" 240 | 241 | #undef VAR 242 | #undef FUNC 243 | 244 | ////////////////////////////////////////////////////////// 245 | //┌────────────────────────────────────────────────────┐// 246 | //│ ▄██┐ ██████┐ █████┐ ██████┐ ██████┐████████┐ │// 247 | //│ ████│ └────██┐██┌──██┐ ██┌──██┐└─██┌─┘└──██┌──┘ │// 248 | //│ └─██│ █████┌┘└█████┌┘ ██████┌┘ ██│ ██│ │// 249 | //│ ██│ ██┌───┘ ██┌──██┐ ██┌──██┐ ██│ ██│ │// 250 | //│ ██████┐███████┐└█████┌┘ ██████┌┘██████┐ ██│ │// 251 | //│ └─────┘└──────┘ └────┘ └─────┘ └─────┘ └─┘ │// 252 | //└────────────────────────────────────────────────────┘// 253 | ////////////////////////////////////////////////////////// 254 | 255 | // 128 reflects the name, though the actual size of a long double is 64, 80, 256 | // 96, or 128 bits, depending on platform. 257 | 258 | #if (DBL_MANT_DIG < LDBL_MANT_DIG) 259 | #define VAR long double 260 | #define FUNC(NAME) NAME##128 261 | #include "quadsort.c" 262 | #undef VAR 263 | #undef FUNC 264 | #endif 265 | 266 | /////////////////////////////////////////////////////////// 267 | //┌─────────────────────────────────────────────────────┐// 268 | //│ ██████┐██┐ ██┐███████┐████████┐ ██████┐ ███┐ ███┐│// 269 | //│██┌────┘██│ ██│██┌────┘└──██┌──┘██┌───██┐████┐████││// 270 | //│██│ ██│ ██│███████┐ ██│ ██│ ██│██┌███┌██││// 271 | //│██│ ██│ ██│└────██│ ██│ ██│ ██│██│└█┌┘██││// 272 | //│└██████┐└██████┌┘███████│ ██│ └██████┌┘██│ └┘ ██││// 273 | //│ └─────┘ └─────┘ └──────┘ └─┘ └─────┘ └─┘ └─┘│// 274 | //└─────────────────────────────────────────────────────┘// 275 | /////////////////////////////////////////////////////////// 276 | 277 | /* 278 | typedef struct {char bytes[32];} struct256; 279 | #define VAR struct256 280 | #define FUNC(NAME) NAME##256 281 | 282 | #include "quadsort.c" 283 | 284 | #undef VAR 285 | #undef FUNC 286 | */ 287 | 288 | /////////////////////////////////////////////////////////////////////////////// 289 | //┌─────────────────────────────────────────────────────────────────────────┐// 290 | //│ ██████┐ ██┐ ██┐ █████┐ ██████┐ ███████┐ ██████┐ ██████┐ ████████┐ │// 291 | //│ ██┌───██┐██│ ██│██┌──██┐██┌──██┐██┌────┘██┌───██┐██┌──██┐└──██┌──┘ │// 292 | //│ ██│ ██│██│ ██│███████│██│ ██│███████┐██│ ██│██████┌┘ ██│ │// 293 | //│ ██│▄▄ ██│██│ ██│██┌──██│██│ ██│└────██│██│ ██│██┌──██┐ ██│ │// 294 | //│ └██████┌┘└██████┌┘██│ ██│██████┌┘███████│└██████┌┘██│ ██│ ██│ │// 295 | //│ └──▀▀─┘ └─────┘ └─┘ └─┘└─────┘ └──────┘ └─────┘ └─┘ └─┘ └─┘ │// 296 | //└─────────────────────────────────────────────────────────────────────────┘// 297 | /////////////////////////////////////////////////////////////////////////////// 298 | 299 | 300 | void quadsort(void *array, size_t nmemb, size_t size, CMPFUNC *cmp) 301 | { 302 | if (nmemb < 2) 303 | { 304 | return; 305 | } 306 | 307 | switch (size) 308 | { 309 | case sizeof(char): 310 | quadsort8(array, nmemb, cmp); 311 | return; 312 | 313 | case sizeof(short): 314 | quadsort16(array, nmemb, cmp); 315 | return; 316 | 317 | case sizeof(int): 318 | quadsort32(array, nmemb, cmp); 319 | return; 320 | 321 | case sizeof(long long): 322 | quadsort64(array, nmemb, cmp); 323 | return; 324 | #if (DBL_MANT_DIG < LDBL_MANT_DIG) 325 | case sizeof(long double): 326 | quadsort128(array, nmemb, cmp); 327 | return; 328 | #endif 329 | // case sizeof(struct256): 330 | // quadsort256(array, nmemb, cmp); 331 | // return; 332 | 333 | default: 334 | #if (DBL_MANT_DIG < LDBL_MANT_DIG) 335 | assert(size == sizeof(char) || size == sizeof(short) || size == sizeof(int) || size == sizeof(long long) || size == sizeof(long double)); 336 | #else 337 | assert(size == sizeof(char) || size == sizeof(short) || size == sizeof(int) || size == sizeof(long long)); 338 | #endif 339 | // qsort(array, nmemb, size, cmp); 340 | } 341 | } 342 | 343 | // suggested size values for primitives: 344 | 345 | // case 0: unsigned char 346 | // case 1: signed char 347 | // case 2: signed short 348 | // case 3: unsigned short 349 | // case 4: signed int 350 | // case 5: unsigned int 351 | // case 6: float 352 | // case 7: double 353 | // case 8: signed long long 354 | // case 9: unsigned long long 355 | // case ?: long double, use sizeof(long double): 356 | 357 | void quadsort_prim(void *array, size_t nmemb, size_t size) 358 | { 359 | if (nmemb < 2) 360 | { 361 | return; 362 | } 363 | 364 | switch (size) 365 | { 366 | case 4: 367 | quadsort_int32(array, nmemb, NULL); 368 | return; 369 | case 5: 370 | quadsort_uint32(array, nmemb, NULL); 371 | return; 372 | case 8: 373 | quadsort_int64(array, nmemb, NULL); 374 | return; 375 | case 9: 376 | quadsort_uint64(array, nmemb, NULL); 377 | return; 378 | default: 379 | assert(size == sizeof(int) || size == sizeof(int) + 1 || size == sizeof(long long) || size == sizeof(long long) + 1); 380 | return; 381 | } 382 | } 383 | 384 | // Sort arrays of structures, the comparison function must be by reference. 385 | 386 | void quadsort_size(void *array, size_t nmemb, size_t size, CMPFUNC *cmp) 387 | { 388 | char **pti, *pta, *pts; 389 | size_t index, offset; 390 | 391 | if (nmemb < 2) 392 | { 393 | return; 394 | } 395 | pta = (char *) array; 396 | pti = (char **) malloc(nmemb * sizeof(char *)); 397 | 398 | assert(pti != NULL); 399 | 400 | for (index = offset = 0 ; index < nmemb ; index++) 401 | { 402 | pti[index] = pta + offset; 403 | 404 | offset += size; 405 | } 406 | 407 | switch (sizeof(size_t)) 408 | { 409 | case 4: quadsort32(pti, nmemb, cmp); break; 410 | case 8: quadsort64(pti, nmemb, cmp); break; 411 | } 412 | 413 | pts = (char *) malloc(nmemb * size); 414 | 415 | assert(pts != NULL); 416 | 417 | for (index = 0 ; index < nmemb ; index++) 418 | { 419 | memcpy(pts, pti[index], size); 420 | 421 | pts += size; 422 | } 423 | pts -= nmemb * size; 424 | 425 | memcpy(array, pts, nmemb * size); 426 | 427 | free(pti); 428 | free(pts); 429 | } 430 | 431 | #undef QUAD_CACHE 432 | 433 | #endif 434 | -------------------------------------------------------------------------------- /src/skipsort.c: -------------------------------------------------------------------------------- 1 | // skipsort 1.2.1.3 - Igor van den Hoven ivdhoven@gmail.com 2 | 3 | void FUNC(skip_partition)(VAR *array, VAR *swap, VAR *ptx, VAR *ptp, size_t nmemb, CMPFUNC *cmp); 4 | 5 | // Similar to quadsort, but detect both random and reverse order runs 6 | 7 | int FUNC(skip_analyze)(VAR *array, VAR *swap, size_t swap_size, size_t nmemb, CMPFUNC *cmp) 8 | { 9 | size_t count, span; 10 | VAR *pta, *pts; 11 | unsigned char v1, v2, v3, v4, x; 12 | pta = array; 13 | 14 | count = nmemb / 8; 15 | 16 | while (count--) 17 | { 18 | // granular 19 | 20 | v1 = cmp(pta + 0, pta + 1) > 0; 21 | v2 = cmp(pta + 2, pta + 3) > 0; 22 | v3 = cmp(pta + 4, pta + 5) > 0; 23 | v4 = cmp(pta + 6, pta + 7) > 0; 24 | 25 | switch (v1 + v2 * 2 + v3 * 4 + v4 * 8) 26 | { 27 | case 0: 28 | if (cmp(pta + 1, pta + 2) <= 0 && cmp(pta + 3, pta + 4) <= 0 && cmp(pta + 5, pta + 6) <= 0) 29 | { 30 | goto ordered; 31 | } 32 | pts = pta; 33 | goto random; 34 | 35 | case 15: 36 | if (cmp(pta + 1, pta + 2) > 0 && cmp(pta + 3, pta + 4) > 0 && cmp(pta + 5, pta + 6) > 0) 37 | { 38 | pts = pta; 39 | goto reversed; 40 | } 41 | 42 | default: 43 | pts = pta; 44 | goto random; 45 | } 46 | 47 | random: // random 48 | 49 | pta += 8; 50 | 51 | if (count--) 52 | { 53 | v1 = cmp(pta + 0, pta + 1) > 0; 54 | v2 = cmp(pta + 2, pta + 3) > 0; 55 | v3 = cmp(pta + 4, pta + 5) > 0; 56 | v4 = cmp(pta + 6, pta + 7) > 0; 57 | 58 | switch (v1 + v2 * 2 + v3 * 4 + v4 * 8) 59 | { 60 | case 0: 61 | if (cmp(pta + 1, pta + 2) <= 0 && cmp(pta + 3, pta + 4) <= 0 && cmp(pta + 5, pta + 6) <= 0) 62 | { 63 | if (count) 64 | { 65 | pta += 8; 66 | if (cmp(pta + 0, pta + 1) <= 0 && cmp(pta + 1, pta + 2) <= 0 && cmp(pta + 2, pta + 3) <= 0 && cmp(pta + 3, pta + 4) <= 0 && cmp(pta + 4, pta + 5) <= 0 && cmp(pta + 5, pta + 6) <= 0 && cmp(pta + 6, pta + 7) <= 0) 67 | { 68 | pta -= 8; 69 | break; 70 | } 71 | count--; 72 | } 73 | } 74 | goto randomc; 75 | 76 | case 15: 77 | if (cmp(pta + 1, pta + 2) > 0 && cmp(pta + 3, pta + 4) > 0 && cmp(pta + 5, pta + 6) > 0) 78 | { 79 | break; 80 | } 81 | 82 | default: 83 | randomc: 84 | if (count >= 6) 85 | { 86 | count -= 6; 87 | pta += 48; 88 | } 89 | goto random; 90 | } 91 | span = (pta - pts); 92 | 93 | if (span <= 96) 94 | { 95 | FUNC(tail_swap)(pts, swap, span, cmp); 96 | } 97 | else 98 | { 99 | FUNC(flux_partition)(pts, swap, pts, swap + span, span, cmp); 100 | } 101 | 102 | if (v1 | v2 | v3 | v4) 103 | { 104 | pts = pta; 105 | goto reversed; 106 | } 107 | pta += 8; 108 | count--; 109 | goto ordered; 110 | } 111 | span = (pta - pts); 112 | 113 | if (span <= 96) 114 | { 115 | FUNC(tail_swap)(pts, swap, span, cmp); 116 | break; 117 | } 118 | if (pts == array) 119 | { 120 | FUNC(flux_partition)(array, swap, pts, swap + nmemb, nmemb, cmp); 121 | return 1; 122 | } 123 | FUNC(flux_partition)(pts, swap, pts, swap + span, span, cmp); 124 | break; 125 | 126 | ordered: // ordered 127 | 128 | pta += 8; 129 | 130 | if (count--) 131 | { 132 | if ((v1 = cmp(pta + 0, pta + 1) > 0) | (v2 = cmp(pta + 2, pta + 3) > 0) | (v3 = cmp(pta + 4, pta + 5) > 0) | (v4 = cmp(pta + 6, pta + 7) > 0)) 133 | { 134 | pts = pta; 135 | goto random; 136 | } 137 | if (cmp(pta + 1, pta + 2) <= 0 && cmp(pta + 3, pta + 4) <= 0 && cmp(pta + 5, pta + 6) <= 0) 138 | { 139 | goto ordered; 140 | } 141 | FUNC(quad_swap_merge)(pta, swap, cmp); 142 | pta += 8; 143 | continue; 144 | } 145 | break; 146 | 147 | reversed: // reversed 148 | 149 | pta += 8; 150 | 151 | if (count--) 152 | { 153 | if ((v1 = cmp(pta + 0, pta + 1) <= 0) | (v2 = cmp(pta + 2, pta + 3) <= 0) | (v3 = cmp(pta + 4, pta + 5) <= 0) | (v4 = cmp(pta + 6, pta + 7) <= 0)) 154 | { 155 | not_reversed: 156 | 157 | x = !v1; swap[0] = pta[v1]; pta[0] = pta[x]; pta[1] = swap[0]; pta += 2; 158 | x = !v2; swap[0] = pta[v2]; pta[0] = pta[x]; pta[1] = swap[0]; pta += 2; 159 | x = !v3; swap[0] = pta[v3]; pta[0] = pta[x]; pta[1] = swap[0]; pta += 2; 160 | x = !v4; swap[0] = pta[v4]; pta[0] = pta[x]; pta[1] = swap[0]; pta -= 6; 161 | 162 | if (cmp(pta + 1, pta + 2) > 0 || cmp(pta + 3, pta + 4) > 0 || cmp(pta + 5, pta + 6) > 0) 163 | { 164 | FUNC(quad_swap_merge)(pta, swap, cmp); 165 | } 166 | } 167 | else 168 | { 169 | if (cmp(pta - 1, pta) > 0 && cmp(pta + 1, pta + 2) > 0 && cmp(pta + 3, pta + 4) > 0 && cmp(pta + 5, pta + 6) > 0) 170 | { 171 | goto reversed; 172 | } 173 | goto not_reversed; 174 | } 175 | FUNC(quad_reversal)(pts, pta - 1); 176 | pta += 8; 177 | continue; 178 | } 179 | 180 | switch (nmemb % 8) 181 | { 182 | case 7: if (cmp(pta + 5, pta + 6) <= 0) break; 183 | case 6: if (cmp(pta + 4, pta + 5) <= 0) break; 184 | case 5: if (cmp(pta + 3, pta + 4) <= 0) break; 185 | case 4: if (cmp(pta + 2, pta + 3) <= 0) break; 186 | case 3: if (cmp(pta + 1, pta + 2) <= 0) break; 187 | case 2: if (cmp(pta + 0, pta + 1) <= 0) break; 188 | case 1: if (cmp(pta - 1, pta + 0) <= 0) break; 189 | case 0: 190 | FUNC(quad_reversal)(pts, pta + nmemb % 8 - 1); 191 | 192 | if (pts == array) 193 | { 194 | return 1; 195 | } 196 | goto reverse_end; 197 | } 198 | FUNC(quad_reversal)(pts, pta - 1); 199 | break; 200 | } 201 | FUNC(tail_swap)(pta, swap, nmemb % 8, cmp); 202 | 203 | reverse_end: 204 | 205 | pta = array; 206 | 207 | for (count = nmemb / 32 ; count-- ; pta += 32) 208 | { 209 | if (cmp(pta + 7, pta + 8) <= 0 && cmp(pta + 15, pta + 16) <= 0 && cmp(pta + 23, pta + 24) <= 0) 210 | { 211 | continue; 212 | } 213 | FUNC(parity_merge)(swap, pta, 8, 8, cmp); 214 | FUNC(parity_merge)(swap + 16, pta + 16, 8, 8, cmp); 215 | FUNC(parity_merge)(pta, swap, 16, 16, cmp); 216 | } 217 | 218 | if (nmemb % 32 > 8) 219 | { 220 | FUNC(tail_merge)(pta, swap, 32, nmemb % 32, 8, cmp); 221 | } 222 | return 0; 223 | } 224 | 225 | void FUNC(skipsort)(void *array, size_t nmemb, CMPFUNC *cmp) 226 | { 227 | VAR *pta = (VAR *) array; 228 | 229 | if (nmemb <= 96) 230 | { 231 | VAR swap[nmemb]; 232 | 233 | FUNC(tail_swap)(pta, swap, nmemb, cmp); 234 | } 235 | else 236 | { 237 | VAR *swap = (VAR *) malloc(nmemb * sizeof(VAR)); 238 | 239 | if (swap == NULL) 240 | { 241 | FUNC(quadsort)(pta, nmemb, cmp); 242 | return; 243 | } 244 | if (FUNC(skip_analyze)(pta, swap, nmemb, nmemb, cmp) == 0) 245 | { 246 | FUNC(quad_merge)(pta, swap, nmemb, nmemb, 32, cmp); 247 | } 248 | free(swap); 249 | } 250 | } 251 | 252 | void FUNC(skipsort_swap)(VAR *array, VAR *swap, size_t swap_size, size_t nmemb, CMPFUNC *cmp) 253 | { 254 | if (nmemb <= 96) 255 | { 256 | FUNC(tail_swap)(array, swap, nmemb, cmp); 257 | } 258 | else if (swap_size < nmemb) 259 | { 260 | FUNC(quadsort_swap)(array, swap, swap_size, nmemb, cmp); 261 | } 262 | else 263 | { 264 | FUNC(skip_analyze)(array, swap, swap_size, nmemb, cmp); 265 | } 266 | } 267 | -------------------------------------------------------------------------------- /src/skipsort.h: -------------------------------------------------------------------------------- 1 | // skipsort 1.2.1.3 - Igor van den Hoven ivdhoven@gmail.com 2 | 3 | #ifndef SKIPSORT_H 4 | #define SKIPSORT_H 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | typedef int CMPFUNC (const void *a, const void *b); 12 | 13 | //#define cmp(a,b) (*(a) > *(b)) 14 | 15 | #ifndef QUADSORT_H 16 | #include "quadsort.h" 17 | #endif 18 | #ifndef FLUXSORT_H 19 | #include "fluxsort.h" 20 | #endif 21 | 22 | // When sorting an array of pointers, like a string array, QUAD_CACHE needs to 23 | // be adjusted in quadsort.h for proper performance when sorting large arrays. 24 | 25 | 26 | ////////////////////////////////////////////////////////// 27 | //┌────────────────────────────────────────────────────┐// 28 | //│ █████┐ ██████┐ ██████┐████████┐ │// 29 | //│ ██┌──██┐ ██┌──██┐└─██┌─┘└──██┌──┘ │// 30 | //│ └█████┌┘ ██████┌┘ ██│ ██│ │// 31 | //│ ██┌──██┐ ██┌──██┐ ██│ ██│ │// 32 | //│ └█████┌┘ ██████┌┘██████┐ ██│ │// 33 | //│ └────┘ └─────┘ └─────┘ └─┘ │// 34 | //└────────────────────────────────────────────────────┘// 35 | ////////////////////////////////////////////////////////// 36 | 37 | #define VAR char 38 | #define FUNC(NAME) NAME##8 39 | 40 | #include "skipsort.c" 41 | 42 | #undef VAR 43 | #undef FUNC 44 | 45 | ////////////////////////////////////////////////////////// 46 | //┌────────────────────────────────────────────────────┐// 47 | //│ ▄██┐ █████┐ ██████┐ ██████┐████████┐│// 48 | //│ ████│ ██┌───┘ ██┌──██┐└─██┌─┘└──██┌──┘│// 49 | //│ └─██│ ██████┐ ██████┌┘ ██│ ██│ │// 50 | //│ ██│ ██┌──██┐ ██┌──██┐ ██│ ██│ │// 51 | //│ ██████┐└█████┌┘ ██████┌┘██████┐ ██│ │// 52 | //│ └─────┘ └────┘ └─────┘ └─────┘ └─┘ │// 53 | //└────────────────────────────────────────────────────┘// 54 | ////////////////////////////////////////////////////////// 55 | 56 | #define VAR short 57 | #define FUNC(NAME) NAME##16 58 | 59 | #include "skipsort.c" 60 | 61 | #undef VAR 62 | #undef FUNC 63 | 64 | ////////////////////////////////////////////////////////// 65 | // ┌───────────────────────────────────────────────────┐// 66 | // │ ██████┐ ██████┐ ██████┐ ██████┐████████┐ │// 67 | // │ └────██┐└────██┐ ██┌──██┐└─██┌─┘└──██┌──┘ │// 68 | // │ █████┌┘ █████┌┘ ██████┌┘ ██│ ██│ │// 69 | // │ └───██┐██┌───┘ ██┌──██┐ ██│ ██│ │// 70 | // │ ██████┌┘███████┐ ██████┌┘██████┐ ██│ │// 71 | // │ └─────┘ └──────┘ └─────┘ └─────┘ └─┘ │// 72 | // └───────────────────────────────────────────────────┘// 73 | ////////////////////////////////////////////////////////// 74 | 75 | #define VAR int 76 | #define FUNC(NAME) NAME##32 77 | 78 | #include "skipsort.c" 79 | 80 | #undef VAR 81 | #undef FUNC 82 | 83 | #ifndef cmp 84 | #define cmp(a,b) (*(a) > *(b)) 85 | 86 | #define VAR int 87 | #define FUNC(NAME) NAME##_int32 88 | 89 | #include "skipsort.c" 90 | 91 | #undef VAR 92 | #undef FUNC 93 | 94 | #undef cmp 95 | #endif 96 | 97 | ////////////////////////////////////////////////////////// 98 | // ┌───────────────────────────────────────────────────┐// 99 | // │ █████┐ ██┐ ██┐ ██████┐ ██████┐████████┐ │// 100 | // │ ██┌───┘ ██│ ██│ ██┌──██┐└─██┌─┘└──██┌──┘ │// 101 | // │ ██████┐ ███████│ ██████┌┘ ██│ ██│ │// 102 | // │ ██┌──██┐└────██│ ██┌──██┐ ██│ ██│ │// 103 | // │ └█████┌┘ ██│ ██████┌┘██████┐ ██│ │// 104 | // │ └────┘ └─┘ └─────┘ └─────┘ └─┘ │// 105 | // └───────────────────────────────────────────────────┘// 106 | ////////////////////////////////////////////////////////// 107 | 108 | #define VAR long long 109 | #define FUNC(NAME) NAME##64 110 | 111 | #include "skipsort.c" 112 | 113 | #undef VAR 114 | #undef FUNC 115 | 116 | #ifndef cmp 117 | #define cmp(a,b) (*(a) > *(b)) 118 | 119 | #define VAR long long 120 | #define FUNC(NAME) NAME##_int64 121 | 122 | #include "skipsort.c" 123 | 124 | #undef VAR 125 | #undef FUNC 126 | 127 | #undef cmp 128 | #endif 129 | 130 | ////////////////////////////////////////////////////////// 131 | //┌────────────────────────────────────────────────────┐// 132 | //│ ▄██┐ ██████┐ █████┐ ██████┐ ██████┐████████┐ │// 133 | //│ ████│ └────██┐██┌──██┐ ██┌──██┐└─██┌─┘└──██┌──┘ │// 134 | //│ └─██│ █████┌┘└█████┌┘ ██████┌┘ ██│ ██│ │// 135 | //│ ██│ ██┌───┘ ██┌──██┐ ██┌──██┐ ██│ ██│ │// 136 | //│ ██████┐███████┐└█████┌┘ ██████┌┘██████┐ ██│ │// 137 | //│ └─────┘└──────┘ └────┘ └─────┘ └─────┘ └─┘ │// 138 | //└────────────────────────────────────────────────────┘// 139 | ////////////////////////////////////////////////////////// 140 | 141 | #define VAR long double 142 | #define FUNC(NAME) NAME##128 143 | 144 | #include "skipsort.c" 145 | 146 | #undef VAR 147 | #undef FUNC 148 | 149 | //////////////////////////////////////////////////////////////////////// 150 | //┌──────────────────────────────────────────────────────────────────┐// 151 | //│███████┐██┐ ██┐██████┐██████┐ ███████┐ ██████┐ ██████┐ ████████┐ │// 152 | //│██┌────┘██│ ██┌┘└─██┌─┘██┌──██┐██┌────┘██┌───██┐██┌──██┐└──██┌──┘ │// 153 | //│███████┐█████┌┘ ██│ ██████┌┘███████┐██│ ██│██████┌┘ ██│ │// 154 | //│└────██│██┌─██┐ ██│ ██┌───┘ └────██│██│ ██│██┌──██┐ ██│ │// 155 | //│███████│██│ ██┐██████┐██│ ███████│└██████┌┘██│ ██│ ██│ │// 156 | //│└──────┘└─┘ └─┘└─────┘└─┘ └──────┘ └─────┘ └─┘ └─┘ └─┘ │// 157 | //└──────────────────────────────────────────────────────────────────┘// 158 | //////////////////////////////////////////////////////////////////////// 159 | 160 | void skipsort(void *array, size_t nmemb, size_t size, CMPFUNC *cmp) 161 | { 162 | if (nmemb < 2) 163 | { 164 | return; 165 | } 166 | #ifndef cmp 167 | if (cmp == NULL) 168 | { 169 | switch (size) 170 | { 171 | case sizeof(int): 172 | return skipsort_int32(array, nmemb, cmp); 173 | case sizeof(long long): 174 | return skipsort_int64(array, nmemb, cmp); 175 | } 176 | return assert(size == sizeof(int)); 177 | } 178 | #endif 179 | 180 | switch (size) 181 | { 182 | case sizeof(char): 183 | return skipsort8(array, nmemb, cmp); 184 | 185 | case sizeof(short): 186 | return skipsort16(array, nmemb, cmp); 187 | 188 | case sizeof(int): 189 | return skipsort32(array, nmemb, cmp); 190 | 191 | case sizeof(long long): 192 | return skipsort64(array, nmemb, cmp); 193 | 194 | case sizeof(long double): 195 | return skipsort128(array, nmemb, cmp); 196 | 197 | default: 198 | return assert(size == sizeof(char) || size == sizeof(short) || size == sizeof(int) || size == sizeof(long long) || size == sizeof(long double)); 199 | } 200 | } 201 | 202 | #endif 203 | -------------------------------------------------------------------------------- /src/wolfsort.c: -------------------------------------------------------------------------------- 1 | // wolfsort 1.2.1.3 - Igor van den Hoven ivdhoven@gmail.com 2 | 3 | //#define GODMODE 4 | 5 | #ifdef GODMODE // inspired by rhsort, technically unstable. 6 | 7 | void FUNC(unstable_count)(VAR *array, size_t nmemb, size_t buckets, VAR min, CMPFUNC *cmp) 8 | { 9 | VAR *pta; 10 | size_t index; 11 | size_t *count = (size_t *) calloc(sizeof(size_t), buckets), loop; 12 | 13 | pta = array; 14 | 15 | for (index = nmemb / 16 ; index ; index--) 16 | { 17 | for (loop = 16 ; loop ; loop--) 18 | { 19 | count[*pta++ - min]++; 20 | } 21 | } 22 | 23 | for (index = nmemb % 16 ; index ; index--) 24 | { 25 | count[*pta++ - min]++; 26 | } 27 | 28 | pta = array; 29 | 30 | for (index = 0 ; index < buckets ; index++) 31 | { 32 | for (loop = count[index] ; loop ; loop--) 33 | { 34 | *pta++ = index + min; 35 | } 36 | } 37 | 38 | free(count); 39 | 40 | return; 41 | } 42 | #endif 43 | 44 | inline void FUNC(wolf_unguarded_insert)(VAR *array, size_t offset, size_t nmemb, CMPFUNC *cmp) 45 | { 46 | VAR key, *pta, *end; 47 | size_t i, top, x, y; 48 | 49 | for (i = offset ; i < nmemb ; i++) 50 | { 51 | pta = end = array + i; 52 | 53 | if (cmp(--pta, end) <= 0) 54 | { 55 | continue; 56 | } 57 | 58 | key = *end; 59 | 60 | if (cmp(array + 1, &key) > 0) 61 | { 62 | top = i - 1; 63 | 64 | do 65 | { 66 | *end-- = *pta--; 67 | } 68 | while (--top); 69 | 70 | *end-- = key; 71 | } 72 | else 73 | { 74 | do 75 | { 76 | *end-- = *pta--; 77 | *end-- = *pta--; 78 | } 79 | while (cmp(pta, &key) > 0); 80 | 81 | end[0] = end[1]; 82 | end[1] = key; 83 | } 84 | x = cmp(end, end + 1) > 0; y = !x; key = end[y]; end[0] = end[x]; end[1] = key; 85 | } 86 | } 87 | 88 | void FUNC(wolfsort_swap)(VAR *array, VAR *swap, size_t swap_size, size_t nmemb, CMPFUNC *cmp); 89 | 90 | void FUNC(wolf_partition)(VAR *array, VAR *aux, size_t aux_size, size_t nmemb, VAR min, VAR max, CMPFUNC *cmp) 91 | { 92 | VAR *swap, *pta, *pts, *ptd, range, moduler; 93 | size_t index, cnt, loop, dmemb, buckets; 94 | unsigned int *count, limit; 95 | 96 | if (nmemb < 32) 97 | { 98 | return FUNC(quadsort)(array, nmemb, cmp); 99 | } 100 | 101 | range = max - min; 102 | 103 | if (range >> 16 == 0 || (size_t) range <= nmemb / 4) 104 | { 105 | buckets = range + 1; 106 | moduler = 1; 107 | } 108 | else 109 | { 110 | buckets = nmemb <= 4 * 65536 ? nmemb / 4 : 1024; 111 | 112 | for (moduler = 4 ; (size_t) moduler <= range / buckets ; moduler *= 2) {} 113 | 114 | buckets = range / moduler + 1; 115 | } 116 | 117 | limit = (nmemb / buckets) * 4; 118 | 119 | count = (unsigned int *) calloc(sizeof(int), buckets); 120 | 121 | swap = aux; 122 | 123 | if (limit * buckets > aux_size) 124 | { 125 | swap = (VAR *) malloc(limit * buckets * sizeof(VAR)); 126 | } 127 | 128 | if (count == NULL || swap == NULL) 129 | { 130 | if (count) 131 | { 132 | free(count); 133 | } 134 | FUNC(fluxsort_swap)(array, aux, aux_size, nmemb, cmp); 135 | return; 136 | } 137 | 138 | ptd = pta = array; 139 | 140 | for (loop = nmemb ; loop ; loop--) 141 | { 142 | max = *pta++; 143 | 144 | index = (unsigned int) (max - min) / moduler; 145 | 146 | if (count[index] < limit) 147 | { 148 | swap[index * limit + count[index]++] = max; 149 | continue; 150 | } 151 | // The element doesn't fit, so we drop it to the main array. Inspired by rhsort. 152 | *ptd++ = max; 153 | } 154 | 155 | dmemb = ptd - array; 156 | 157 | if (dmemb) 158 | { 159 | ptd = array + nmemb - dmemb; 160 | 161 | memmove(ptd, array, dmemb * sizeof(VAR)); 162 | } 163 | pta = array; 164 | pts = swap; 165 | 166 | for (index = 0 ; index < buckets ; index++) 167 | { 168 | cnt = count[index]; 169 | 170 | if (cnt) 171 | { 172 | memcpy(pta, pts, cnt * sizeof(VAR)); 173 | 174 | if (moduler > 1) 175 | { 176 | FUNC(wolfsort_swap)(pta, swap, limit + pts - swap, cnt, cmp); 177 | } 178 | pta += cnt; 179 | } 180 | pts += limit; 181 | } 182 | 183 | if (dmemb) 184 | { 185 | FUNC(fluxsort_swap)(ptd, swap, dmemb, dmemb, cmp); 186 | 187 | FUNC(partial_backward_merge)(array, swap, nmemb, nmemb, nmemb - dmemb, cmp); 188 | } 189 | if (limit * buckets > aux_size) 190 | { 191 | free(swap); 192 | } 193 | free(count); 194 | } 195 | 196 | void FUNC(wolf_minmax)(VAR *min, VAR *max, VAR *pta, VAR *ptb, VAR *ptc, VAR *ptd, CMPFUNC *cmp) 197 | { 198 | if (cmp(min, pta) > 0) *min = *pta; 199 | if (cmp(pta, max) > 0) *max = *pta; 200 | if (cmp(min, ptb) > 0) *min = *ptb; 201 | if (cmp(ptb, max) > 0) *max = *ptb; 202 | if (cmp(min, ptc) > 0) *min = *ptc; 203 | if (cmp(ptc, max) > 0) *max = *ptc; 204 | if (cmp(min, ptd) > 0) *min = *ptd; 205 | if (cmp(ptd, max) > 0) *max = *ptd; 206 | } 207 | 208 | void FUNC(wolf_analyze)(VAR *array, VAR *swap, size_t swap_size, size_t nmemb, CMPFUNC *cmp) 209 | { 210 | unsigned char loop, asum, bsum, csum, dsum; 211 | unsigned int astreaks, bstreaks, cstreaks, dstreaks; 212 | size_t quad1, quad2, quad3, quad4, half1, half2; 213 | size_t cnt, abalance, bbalance, cbalance, dbalance; 214 | VAR min, max, *pta, *ptb, *ptc, *ptd; 215 | 216 | half1 = nmemb / 2; 217 | quad1 = half1 / 2; 218 | quad2 = half1 - quad1; 219 | half2 = nmemb - half1; 220 | quad3 = half2 / 2; 221 | quad4 = half2 - quad3; 222 | 223 | min = max = array[nmemb - 1]; 224 | 225 | pta = array; 226 | ptb = array + quad1; 227 | ptc = array + half1; 228 | ptd = array + half1 + quad3; 229 | 230 | astreaks = bstreaks = cstreaks = dstreaks = 0; 231 | abalance = bbalance = cbalance = dbalance = 0; 232 | 233 | for (cnt = nmemb ; cnt > 132 ; cnt -= 128) 234 | { 235 | for (asum = bsum = csum = dsum = 0, loop = 32 ; loop ; loop--) 236 | { 237 | FUNC(wolf_minmax)(&min, &max, pta, ptb, ptc, ptd, cmp); 238 | 239 | asum += cmp(pta, pta + 1) > 0; pta++; 240 | bsum += cmp(ptb, ptb + 1) > 0; ptb++; 241 | csum += cmp(ptc, ptc + 1) > 0; ptc++; 242 | dsum += cmp(ptd, ptd + 1) > 0; ptd++; 243 | } 244 | abalance += asum; astreaks += (asum == 0) | (asum == 32); 245 | bbalance += bsum; bstreaks += (bsum == 0) | (bsum == 32); 246 | cbalance += csum; cstreaks += (csum == 0) | (csum == 32); 247 | dbalance += dsum; dstreaks += (dsum == 0) | (dsum == 32); 248 | } 249 | 250 | for ( ; cnt > 7 ; cnt -= 4) 251 | { 252 | FUNC(wolf_minmax)(&min, &max, pta, ptb, ptc, ptd, cmp); 253 | 254 | abalance += cmp(pta, pta + 1) > 0; pta++; 255 | bbalance += cmp(ptb, ptb + 1) > 0; ptb++; 256 | cbalance += cmp(ptc, ptc + 1) > 0; ptc++; 257 | dbalance += cmp(ptd, ptd + 1) > 0; ptd++; 258 | } 259 | 260 | if (quad1 < quad2) 261 | { 262 | if (cmp(&min, ptb) > 0) min = *ptb; else if (cmp(ptb, &max) > 0) max = *ptb; 263 | bbalance += cmp(ptb, ptb + 1) > 0; ptb++; 264 | } 265 | if (quad1 < quad3) 266 | { 267 | if (cmp(&min, ptc) > 0) min = *ptc; else if (cmp(ptc, &max) > 0) max = *ptc; 268 | cbalance += cmp(ptc, ptc + 1) > 0; ptc++; 269 | } 270 | if (quad1 < quad4) 271 | { 272 | if (cmp(&min, ptd) > 0) min = *ptd; else if (cmp(ptd, &max) > 0) max = *ptd; 273 | dbalance += cmp(ptd, ptd + 1) > 0; ptd++; 274 | } 275 | FUNC(wolf_minmax)(&min, &max, pta, ptb, ptc, ptd, cmp); 276 | 277 | cnt = abalance + bbalance + cbalance + dbalance; 278 | 279 | if (cnt == 0) 280 | { 281 | if (cmp(pta, pta + 1) <= 0 && cmp(ptb, ptb + 1) <= 0 && cmp(ptc, ptc + 1) <= 0) 282 | { 283 | return; 284 | } 285 | } 286 | 287 | #ifdef GODMODE 288 | { 289 | VAR range = max - min; 290 | 291 | if (range < 65536 || range <= nmemb / 4) 292 | { 293 | FUNC(unstable_count)(array, nmemb, range + 1, min, cmp); 294 | return; 295 | } 296 | } 297 | #endif 298 | 299 | asum = quad1 - abalance == 1; 300 | bsum = quad2 - bbalance == 1; 301 | csum = quad3 - cbalance == 1; 302 | dsum = quad4 - dbalance == 1; 303 | 304 | if (asum | bsum | csum | dsum) 305 | { 306 | unsigned char span1 = (asum && bsum) * (cmp(pta, pta + 1) > 0); 307 | unsigned char span2 = (bsum && csum) * (cmp(ptb, ptb + 1) > 0); 308 | unsigned char span3 = (csum && dsum) * (cmp(ptc, ptc + 1) > 0); 309 | 310 | switch (span1 | span2 * 2 | span3 * 4) 311 | { 312 | case 0: break; 313 | case 1: FUNC(quad_reversal)(array, ptb); abalance = bbalance = 0; break; 314 | case 2: FUNC(quad_reversal)(pta + 1, ptc); bbalance = cbalance = 0; break; 315 | case 3: FUNC(quad_reversal)(array, ptc); abalance = bbalance = cbalance = 0; break; 316 | case 4: FUNC(quad_reversal)(ptb + 1, ptd); cbalance = dbalance = 0; break; 317 | case 5: FUNC(quad_reversal)(array, ptb); 318 | FUNC(quad_reversal)(ptb + 1, ptd); abalance = bbalance = cbalance = dbalance = 0; break; 319 | case 6: FUNC(quad_reversal)(pta + 1, ptd); bbalance = cbalance = dbalance = 0; break; 320 | case 7: FUNC(quad_reversal)(array, ptd); return; 321 | } 322 | 323 | if (asum && abalance) {FUNC(quad_reversal)(array, pta); abalance = 0;} 324 | if (bsum && bbalance) {FUNC(quad_reversal)(pta + 1, ptb); bbalance = 0;} 325 | if (csum && cbalance) {FUNC(quad_reversal)(ptb + 1, ptc); cbalance = 0;} 326 | if (dsum && dbalance) {FUNC(quad_reversal)(ptc + 1, ptd); dbalance = 0;} 327 | } 328 | 329 | #ifdef cmp 330 | cnt = nmemb / 256; // switch to quadsort if more than 50% ordered 331 | #else 332 | cnt = nmemb / 512; // switch to quadsort if more than 25% ordered 333 | #endif 334 | asum = astreaks > cnt; 335 | bsum = bstreaks > cnt; 336 | csum = cstreaks > cnt; 337 | dsum = dstreaks > cnt; 338 | 339 | #ifndef cmp 340 | if (quad1 > QUAD_CACHE) 341 | { 342 | asum = bsum = csum = dsum = 1; 343 | } 344 | #endif 345 | switch (asum + bsum * 2 + csum * 4 + dsum * 8) 346 | { 347 | case 0: 348 | FUNC(wolf_partition)(array, swap, swap_size, nmemb, min, max, cmp); 349 | return; 350 | case 1: 351 | if (abalance) FUNC(quadsort_swap)(array, swap, swap_size, quad1, cmp); 352 | FUNC(wolf_partition)(pta + 1, swap, swap_size, quad2 + half2, min, max, cmp); 353 | break; 354 | case 2: 355 | FUNC(wolf_partition)(array, swap, swap_size, quad1, min, max, cmp); 356 | if (bbalance) FUNC(quadsort_swap)(pta + 1, swap, swap_size, quad2, cmp); 357 | FUNC(wolf_partition)(ptb + 1, swap, swap_size, half2, min, max, cmp); 358 | break; 359 | case 3: 360 | if (abalance) FUNC(quadsort_swap)(array, swap, swap_size, quad1, cmp); 361 | if (bbalance) FUNC(quadsort_swap)(pta + 1, swap, swap_size, quad2, cmp); 362 | FUNC(wolf_partition)(ptb + 1, swap, swap_size, half2, min, max, cmp); 363 | break; 364 | case 4: 365 | FUNC(wolf_partition)(array, swap, swap_size, half1, min, max, cmp); 366 | if (cbalance) FUNC(quadsort_swap)(ptb + 1, swap, swap_size, quad3, cmp); 367 | FUNC(wolf_partition)(ptc + 1, swap, swap_size, quad4, min, max, cmp); 368 | break; 369 | case 8: 370 | FUNC(wolf_partition)(array, swap, swap_size, half1 + quad3, min, max, cmp); 371 | if (dbalance) FUNC(quadsort_swap)(ptc + 1, swap, swap_size, quad4, cmp); 372 | break; 373 | case 9: 374 | if (abalance) FUNC(quadsort_swap)(array, swap, swap_size, quad1, cmp); 375 | FUNC(wolf_partition)(pta + 1, swap, swap_size, quad2 + quad3, min, max, cmp); 376 | if (dbalance) FUNC(quadsort_swap)(ptc + 1, swap, swap_size, quad4, cmp); 377 | break; 378 | case 12: 379 | FUNC(wolf_partition)(array, swap, swap_size, half1, min, max, cmp); 380 | if (cbalance) FUNC(quadsort_swap)(ptb + 1, swap, swap_size, quad3, cmp); 381 | if (dbalance) FUNC(quadsort_swap)(ptc + 1, swap, swap_size, quad4, cmp); 382 | break; 383 | case 5: 384 | case 6: 385 | case 7: 386 | case 10: 387 | case 11: 388 | case 13: 389 | case 14: 390 | case 15: 391 | if (asum) 392 | { 393 | if (abalance) FUNC(quadsort_swap)(array, swap, swap_size, quad1, cmp); 394 | } 395 | else FUNC(wolf_partition)(array, swap, swap_size, quad1, min, max, cmp); 396 | if (bsum) 397 | { 398 | if (bbalance) FUNC(quadsort_swap)(pta + 1, swap, swap_size, quad2, cmp); 399 | } 400 | else FUNC(wolf_partition)(pta + 1, swap, swap_size, quad2, min, max, cmp); 401 | if (csum) 402 | { 403 | if (cbalance) FUNC(quadsort_swap)(ptb + 1, swap, swap_size, quad3, cmp); 404 | } 405 | else FUNC(wolf_partition)(ptb + 1, swap, swap_size, quad3, min, max, cmp); 406 | if (dsum) 407 | { 408 | if (dbalance) FUNC(quadsort_swap)(ptc + 1, swap, swap_size, quad4, cmp); 409 | } 410 | else FUNC(wolf_partition)(ptc + 1, swap, swap_size, quad4, min, max, cmp); 411 | break; 412 | } 413 | 414 | if (cmp(pta, pta + 1) <= 0) 415 | { 416 | memcpy(swap, array, half1 * sizeof(VAR)); 417 | 418 | if (cmp(ptc, ptc + 1) <= 0) 419 | { 420 | if (cmp(ptb, ptb + 1) <= 0) 421 | { 422 | return; 423 | } 424 | memcpy(swap + half1, array + half1, half2 * sizeof(VAR)); 425 | } 426 | else 427 | { 428 | FUNC(cross_merge)(swap + half1, array + half1, quad3, quad4, cmp); 429 | } 430 | } 431 | else 432 | { 433 | FUNC(cross_merge)(swap, array, quad1, quad2, cmp); 434 | 435 | if (cmp(ptc, ptc + 1) <= 0) 436 | { 437 | memcpy(swap + half1, array + half1, half2 * sizeof(VAR)); 438 | } 439 | else 440 | { 441 | FUNC(cross_merge)(swap + half1, ptb + 1, quad3, quad4, cmp); 442 | } 443 | } 444 | FUNC(cross_merge)(array, swap, half1, half2, cmp); 445 | } 446 | 447 | void FUNC(wolfsort)(void *array, size_t nmemb, CMPFUNC *cmp) 448 | { 449 | VAR *pta = (VAR *) array; 450 | 451 | if (nmemb <= 132) 452 | { 453 | FUNC(quadsort)(pta, nmemb, cmp); 454 | } 455 | else 456 | { 457 | VAR *swap = (VAR *) malloc(nmemb * sizeof(VAR)); 458 | 459 | if (swap == NULL) 460 | { 461 | FUNC(quadsort)(pta, nmemb, cmp); 462 | return; 463 | } 464 | 465 | FUNC(wolf_analyze)(pta, swap, nmemb, nmemb, cmp); 466 | 467 | free(swap); 468 | } 469 | } 470 | 471 | void FUNC(wolfsort_swap)(VAR *array, VAR *swap, size_t swap_size, size_t nmemb, CMPFUNC *cmp) 472 | { 473 | if (nmemb <= 132) 474 | { 475 | FUNC(quadsort_swap)(array, swap, nmemb, nmemb, cmp); 476 | } 477 | else 478 | { 479 | FUNC(wolf_analyze)(array, swap, swap_size, nmemb, cmp); 480 | } 481 | } 482 | -------------------------------------------------------------------------------- /src/wolfsort.h: -------------------------------------------------------------------------------- 1 | // wolfsort 1.2.1.3 - Igor van den Hoven ivdhoven@gmail.com 2 | 3 | #ifndef WOLFSORT_H 4 | #define WOLFSORT_H 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | typedef int CMPFUNC (const void *a, const void *b); 13 | 14 | //#define cmp(a,b) (*(a) > *(b)) 15 | 16 | // When sorting an array of pointers, like a string array, the QUAD_CACHE needs 17 | // to be set for proper performance when sorting large arrays. 18 | // wolfsort_prim() can be used to sort 32 and 64 bit primitives. 19 | 20 | // With a 6 MB L3 cache a value of 262144 works well. 21 | 22 | #ifdef cmp 23 | #define QUAD_CACHE 4294967295 24 | #else 25 | //#define QUAD_CACHE 131072 26 | #define QUAD_CACHE 262144 27 | //#define QUAD_CACHE 524288 28 | //#define QUAD_CACHE 4294967295 29 | #endif 30 | 31 | #ifndef FLUXSORT_H 32 | #include "fluxsort.h" 33 | #endif 34 | 35 | ////////////////////////////////////////////////////////// 36 | // ┌───────────────────────────────────────────────────┐// 37 | // │ ██████┐ ██████┐ ██████┐ ██████┐████████┐ │// 38 | // │ └────██┐└────██┐ ██┌──██┐└─██┌─┘└──██┌──┘ │// 39 | // │ █████┌┘ █████┌┘ ██████┌┘ ██│ ██│ │// 40 | // │ └───██┐██┌───┘ ██┌──██┐ ██│ ██│ │// 41 | // │ ██████┌┘███████┐ ██████┌┘██████┐ ██│ │// 42 | // │ └─────┘ └──────┘ └─────┘ └─────┘ └─┘ │// 43 | // └───────────────────────────────────────────────────┘// 44 | ////////////////////////////////////////////////////////// 45 | /* 46 | #define VAR int 47 | #define FUNC(NAME) NAME##32 48 | 49 | #include "wolfsort.c" 50 | 51 | #undef VAR 52 | #undef FUNC 53 | */ 54 | // wolfsort_prim 55 | 56 | #define VAR int 57 | #define FUNC(NAME) NAME##_int32 58 | #ifndef cmp 59 | #define cmp(a,b) (*(a) > *(b)) 60 | #include "wolfsort.c" 61 | #undef cmp 62 | #else 63 | #include "wolfsort.c" 64 | #endif 65 | #undef VAR 66 | #undef FUNC 67 | 68 | #define VAR unsigned int 69 | #define FUNC(NAME) NAME##_uint32 70 | #ifndef cmp 71 | #define cmp(a,b) (*(a) > *(b)) 72 | #include "wolfsort.c" 73 | #undef cmp 74 | #else 75 | #include "wolfsort.c" 76 | #endif 77 | #undef VAR 78 | #undef FUNC 79 | 80 | ////////////////////////////////////////////////////////// 81 | // ┌───────────────────────────────────────────────────┐// 82 | // │ █████┐ ██┐ ██┐ ██████┐ ██████┐████████┐ │// 83 | // │ ██┌───┘ ██│ ██│ ██┌──██┐└─██┌─┘└──██┌──┘ │// 84 | // │ ██████┐ ███████│ ██████┌┘ ██│ ██│ │// 85 | // │ ██┌──██┐└────██│ ██┌──██┐ ██│ ██│ │// 86 | // │ └█████┌┘ ██│ ██████┌┘██████┐ ██│ │// 87 | // │ └────┘ └─┘ └─────┘ └─────┘ └─┘ │// 88 | // └───────────────────────────────────────────────────┘// 89 | ////////////////////////////////////////////////////////// 90 | /* 91 | #define VAR long long 92 | #define FUNC(NAME) NAME##64 93 | 94 | #include "wolfsort.c" 95 | 96 | #undef VAR 97 | #undef FUNC 98 | */ 99 | // wolfsort_prim 100 | 101 | #define VAR long long 102 | #define FUNC(NAME) NAME##_int64 103 | #ifndef cmp 104 | #define cmp(a,b) (*(a) > *(b)) 105 | #include "wolfsort.c" 106 | #undef cmp 107 | #else 108 | #include "wolfsort.c" 109 | #endif 110 | #undef VAR 111 | #undef FUNC 112 | 113 | #define VAR unsigned long long 114 | #define FUNC(NAME) NAME##_uint64 115 | #ifndef cmp 116 | #define cmp(a,b) (*(a) > *(b)) 117 | #include "wolfsort.c" 118 | #undef cmp 119 | #else 120 | #include "wolfsort.c" 121 | #endif 122 | #undef VAR 123 | #undef FUNC 124 | 125 | // This section is outside of 32/64 bit pointer territory, so no cache checks 126 | // necessary, unless sorting 32+ byte structures. 127 | 128 | #undef QUAD_CACHE 129 | #define QUAD_CACHE 4294967295 130 | 131 | ////////////////////////////////////////////////////////// 132 | //┌────────────────────────────────────────────────────┐// 133 | //│ █████┐ ██████┐ ██████┐████████┐ │// 134 | //│ ██┌──██┐ ██┌──██┐└─██┌─┘└──██┌──┘ │// 135 | //│ └█████┌┘ ██████┌┘ ██│ ██│ │// 136 | //│ ██┌──██┐ ██┌──██┐ ██│ ██│ │// 137 | //│ └█████┌┘ ██████┌┘██████┐ ██│ │// 138 | //│ └────┘ └─────┘ └─────┘ └─┘ │// 139 | //└────────────────────────────────────────────────────┘// 140 | ////////////////////////////////////////////////////////// 141 | 142 | #define VAR char 143 | #define FUNC(NAME) NAME##8 144 | 145 | #include "wolfsort.c" 146 | 147 | #undef VAR 148 | #undef FUNC 149 | 150 | ////////////////////////////////////////////////////////// 151 | //┌────────────────────────────────────────────────────┐// 152 | //│ ▄██┐ █████┐ ██████┐ ██████┐████████┐│// 153 | //│ ████│ ██┌───┘ ██┌──██┐└─██┌─┘└──██┌──┘│// 154 | //│ └─██│ ██████┐ ██████┌┘ ██│ ██│ │// 155 | //│ ██│ ██┌──██┐ ██┌──██┐ ██│ ██│ │// 156 | //│ ██████┐└█████┌┘ ██████┌┘██████┐ ██│ │// 157 | //│ └─────┘ └────┘ └─────┘ └─────┘ └─┘ │// 158 | //└────────────────────────────────────────────────────┘// 159 | ////////////////////////////////////////////////////////// 160 | 161 | #define VAR short 162 | #define FUNC(NAME) NAME##16 163 | 164 | #include "wolfsort.c" 165 | 166 | #undef VAR 167 | #undef FUNC 168 | 169 | /////////////////////////////////////////////////////////// 170 | //┌─────────────────────────────────────────────────────┐// 171 | //│ ██████┐██┐ ██┐███████┐████████┐ ██████┐ ███┐ ███┐│// 172 | //│██┌────┘██│ ██│██┌────┘└──██┌──┘██┌───██┐████┐████││// 173 | //│██│ ██│ ██│███████┐ ██│ ██│ ██│██┌███┌██││// 174 | //│██│ ██│ ██│└────██│ ██│ ██│ ██│██│└█┌┘██││// 175 | //│└██████┐└██████┌┘███████│ ██│ └██████┌┘██│ └┘ ██││// 176 | //│ └─────┘ └─────┘ └──────┘ └─┘ └─────┘ └─┘ └─┘│// 177 | //└─────────────────────────────────────────────────────┘// 178 | /////////////////////////////////////////////////////////// 179 | 180 | /* 181 | typedef struct {char bytes[32];} struct256; 182 | #define VAR struct256 183 | #define FUNC(NAME) NAME##256 184 | 185 | #include "wolfsort.c" 186 | 187 | #undef VAR 188 | #undef FUNC 189 | */ 190 | 191 | ////////////////////////////////////////////////////////////////////////// 192 | //┌─────────────────────────────────────────────────────────────────────┐// 193 | //│██┐ ██┐ ██████┐ ██┐ ███████┐███████┐ ██████┐ ██████┐ ████████┐│// 194 | //│██│ ██│██┌───██┐██│ ██┌────┘██┌────┘██┌───██┐██┌──██┐└──██┌──┘│// 195 | //│██│ █┐ ██│██│ ██│██│ █████┐ ███████┐██│ ██│██████┌┘ ██│ │// 196 | //│██│███┐██│██│ ██│██│ ██┌──┘ └────██│██│ ██│██┌──██┐ ██│ │// 197 | //│└███┌███┌┘└██████┌┘███████┐██│ ███████│└██████┌┘██│ ██│ ██│ │// 198 | //│ └──┘└──┘ └─────┘ └──────┘└─┘ └──────┘ └─────┘ └─┘ └─┘ └─┘ │// 199 | //└─────────────────────────────────────────────────────────────────────┘// 200 | ////////////////////////////////////////////////////////////////////////// 201 | 202 | void wolfsort(void *array, size_t nmemb, size_t size, CMPFUNC *cmp) 203 | { 204 | if (nmemb < 2) 205 | { 206 | return; 207 | } 208 | 209 | switch (size) 210 | { 211 | case sizeof(char): 212 | wolfsort8(array, nmemb, cmp); 213 | return; 214 | 215 | case sizeof(short): 216 | wolfsort16(array, nmemb, cmp); 217 | return; 218 | 219 | case sizeof(int): 220 | wolfsort_uint32(array, nmemb, cmp); 221 | return; 222 | 223 | case sizeof(long long): 224 | wolfsort_uint64(array, nmemb, cmp); 225 | // fluxsort64(array, nmemb, cmp); // fluxsort generally beats wolfsort for 64+ bit types 226 | return; 227 | 228 | case sizeof(long double): 229 | fluxsort128(array, nmemb, cmp); 230 | return; 231 | 232 | // case sizeof(struct256): 233 | // wolfsort256(array, nmemb, cmp); 234 | return; 235 | 236 | default: 237 | assert(size == sizeof(char) || size == sizeof(short) || size == sizeof(int) || size == sizeof(long long) || size == sizeof(long double)); 238 | // qsort(array, nmemb, size, cmp); 239 | } 240 | } 241 | 242 | // suggested size values for primitives: 243 | 244 | // case 0: unsigned char 245 | // case 1: signed char 246 | // case 2: signed short 247 | // case 3: unsigned short 248 | // case 4: signed int 249 | // case 5: unsigned int 250 | // case 6: float 251 | // case 7: double 252 | // case 8: signed long long 253 | // case 9: unsigned long long 254 | // case 16: long double 255 | 256 | void wolfsort_prim(void *array, size_t nmemb, size_t size) 257 | { 258 | if (nmemb < 2) 259 | { 260 | return; 261 | } 262 | 263 | switch (size) 264 | { 265 | case 4: 266 | fluxsort_int32(array, nmemb, NULL); 267 | return; 268 | case 8: 269 | fluxsort_int64(array, nmemb, NULL); 270 | return; 271 | case 5: 272 | wolfsort_uint32(array, nmemb, NULL); 273 | return; 274 | case 9: 275 | wolfsort_uint64(array, nmemb, NULL); 276 | return; 277 | default: 278 | assert(size == sizeof(int) || size == sizeof(long long) || size == sizeof(int) + 1 || size == sizeof(long long) + 1); 279 | return; 280 | } 281 | } 282 | 283 | #undef QUAD_CACHE 284 | 285 | #endif 286 | --------------------------------------------------------------------------------