├── README.md ├── bench.c ├── images ├── graph1.png └── graph2.png ├── octosort.c └── octosort.h /README.md: -------------------------------------------------------------------------------- 1 | Origin 2 | ------ 3 | Octosort is a block merge sort based on [WikiSort](https://github.com/BonzaiThePenguin/WikiSort) and [quadsort](https://github.com/scandum/quadsort). This document primarily lists notable differences and some benchmarks. 4 | 5 | Octo swap 6 | --------- 7 | Like quadsort has the quad swap, octosort has the octo swap. The swap sorts between 4 and 8 elements at a time and performs runs on reverse ordered data. 8 | 9 | Monobound binary search 10 | ----------------------- 11 | WikiSort's binary search has been replaced with a [monobound binary search](https://github.com/scandum/binary_search), which is up to two times faster. 12 | 13 | Gries-Mills rotation 14 | -------------------- 15 | WikiSort's triple reversal rotation has been replaced with a Gries-Mills rotation, which is up to two times faster. 16 | 17 | Quad merge 18 | ---------- 19 | WikiSort already implemented a quad merge, which has been updated to no longer detect reverse order runs, since that's taken care off by the octo swap. 20 | 21 | Tail merge 22 | ---------- 23 | Quadsort's tail merge routine was added to perform partially in-place merges. 24 | 25 | Data Types 26 | ---------- 27 | Support was added for long doubles and 8, 16, 32, and 64 bit data types. By using 32 or 64 bit pointers it's possible to sort any other data type. 28 | 29 | Interface 30 | --------- 31 | The interface was changed to use the same one as qsort, which is described in [man qsort](https://man7.org/linux/man-pages/man3/qsort.3p.html). 32 | 33 | Memory 34 | ------ 35 | By default octosort uses 512 elements worth of stack memory. 36 | 37 | The minimum memory requirement for octosort is 1 element of stack memory, it can be configured to use n / 2 memory. 38 | 39 | Big O 40 | ----- 41 | ```cobol 42 | ┌───────────────────────┐┌───────────────────────┐ 43 | │comparisons ││swap memory │ 44 | ┌───────────────┐├───────┬───────┬───────┤├───────┬───────┬───────┤┌──────┐┌─────────┐┌─────────┐ 45 | │name ││min │avg │max ││min │avg │max ││stable││partition││adaptive │ 46 | ├───────────────┤├───────┼───────┼───────┤├───────┼───────┼───────┤├──────┤├─────────┤├─────────┤ 47 | │mergesort ││n log n│n log n│n log n││n │n │n ││yes ││no ││no │ 48 | ├───────────────┤├───────┼───────┼───────┤├───────┼───────┼───────┤├──────┤├─────────┤├─────────┤ 49 | │octosort ││n │n log n│n log n││1 │1 │1 ││yes ││no ││yes │ 50 | ├───────────────┤├───────┼───────┼───────┤├───────┼───────┼───────┤├──────┤├─────────┤├─────────┤ 51 | │quicksort ││n │n log n│n² ││1 │1 │1 ││no ││yes ││no │ 52 | └───────────────┘└───────┴───────┴───────┘└───────┴───────┴───────┘└──────┘└─────────┘└─────────┘ 53 | ``` 54 | 55 | Benchmarks 56 | ---------- 57 | The following benchmark was on WSL 2 gcc version 7.5.0 (Ubuntu 7.5.0-3ubuntu1~18.04). 58 | The source code was compiled using gcc -O3 bench.c. Each test was ran 100 times 59 | and only the best run is reported. It's generated by running the benchmark using 60 | 100000 100 as the argument. 61 | 62 | ![Graph](/images/graph1.png) 63 | 64 |
data table 65 | 66 | | Name | Items | Type | Best | Average | Compares | Samples | Distribution | 67 | | --------- | -------- | ---- | -------- | -------- | --------- | ------- | ---------------- | 68 | | qsort | 100000 | 32 | 0.008508 | 0.008779 | 1536367 | 100 | random order | 69 | | octosort | 100000 | 32 | 0.008792 | 0.008889 | 1800800 | 100 | random order | 70 | | | | | | | | | | 71 | | qsort | 100000 | 32 | 0.002024 | 0.002225 | 815024 | 100 | ascending order | 72 | | octosort | 100000 | 32 | 0.000328 | 0.000345 | 116524 | 100 | ascending order | 73 | | | | | | | | | | 74 | | qsort | 100000 | 32 | 0.002831 | 0.003088 | 915020 | 100 | ascending saw | 75 | | octosort | 100000 | 32 | 0.001537 | 0.001565 | 370372 | 100 | ascending saw | 76 | | | | | | | | | | 77 | | qsort | 100000 | 32 | 0.006426 | 0.006722 | 1531997 | 100 | generic order | 78 | | octosort | 100000 | 32 | 0.006437 | 0.006515 | 1633855 | 100 | generic order | 79 | | | | | | | | | | 80 | | qsort | 100000 | 32 | 0.002456 | 0.002657 | 853904 | 100 | descending order | 81 | | octosort | 100000 | 32 | 0.000221 | 0.000227 | 99999 | 100 | descending order | 82 | | | | | | | | | | 83 | | qsort | 100000 | 32 | 0.002832 | 0.003001 | 1063907 | 100 | descending saw | 84 | | octosort | 100000 | 32 | 0.001738 | 0.001849 | 693171 | 100 | descending saw | 85 | | | | | | | | | | 86 | | qsort | 100000 | 32 | 0.003744 | 0.003939 | 1012256 | 100 | random tail | 87 | | octosort | 100000 | 32 | 0.002684 | 0.002740 | 630603 | 100 | random tail | 88 | | | | | | | | | | 89 | | qsort | 100000 | 32 | 0.005464 | 0.005732 | 1200738 | 100 | random half | 90 | | octosort | 100000 | 32 | 0.004859 | 0.004911 | 1022394 | 100 | random half | 91 | | | | | | | | | | 92 | | qsort | 100000 | 32 | 0.004147 | 0.004685 | 1209200 | 100 | ascending tiles | 93 | | octosort | 100000 | 32 | 0.003146 | 0.003437 | 790377 | 100 | ascending tiles | 94 | 95 |
96 | 97 | 98 | The following benchmark was generated using 1000000 0 0 as the argument. 99 | 100 | ![Graph](/images/graph2.png) 101 | 102 |
data table 103 | 104 | | Name | Items | Type | Best | Average | Compares | Samples | Distribution | 105 | | --------- | -------- | ---- | -------- | -------- | --------- | ------- | ---------------- | 106 | | qsort | 4 | 32 | 0.001369 | 0.001439 | 5 | 100 | random 4 | 107 | | octosort | 4 | 32 | 0.000765 | 0.000776 | 6 | 100 | random 4 | 108 | | | | | | | | | | 109 | | qsort | 8 | 32 | 0.001511 | 0.001555 | 17 | 100 | random 8 | 110 | | octosort | 8 | 32 | 0.000893 | 0.000939 | 19 | 100 | random 8 | 111 | | | | | | | | | | 112 | | qsort | 16 | 32 | 0.001587 | 0.001952 | 46 | 100 | random 16 | 113 | | octosort | 16 | 32 | 0.001221 | 0.001281 | 55 | 100 | random 16 | 114 | | | | | | | | | | 115 | | qsort | 32 | 32 | 0.001795 | 0.002612 | 121 | 100 | random 32 | 116 | | octosort | 32 | 32 | 0.001319 | 0.001602 | 124 | 100 | random 32 | 117 | | | | | | | | | | 118 | | qsort | 64 | 32 | 0.002037 | 0.003018 | 309 | 100 | random 64 | 119 | | octosort | 64 | 32 | 0.001492 | 0.002195 | 319 | 100 | random 64 | 120 | | | | | | | | | | 121 | | qsort | 128 | 32 | 0.002304 | 0.003754 | 745 | 100 | random 128 | 122 | | octosort | 128 | 32 | 0.001674 | 0.003189 | 775 | 100 | random 128 | 123 | | | | | | | | | | 124 | | qsort | 256 | 32 | 0.003293 | 0.005024 | 1738 | 100 | random 256 | 125 | | octosort | 256 | 32 | 0.001909 | 0.003613 | 1806 | 100 | random 256 | 126 | | | | | | | | | | 127 | | qsort | 512 | 32 | 0.005293 | 0.006220 | 3968 | 100 | random 512 | 128 | | octosort | 512 | 32 | 0.003113 | 0.005086 | 4112 | 100 | random 512 | 129 | | | | | | | | | | 130 | | qsort | 1024 | 32 | 0.006530 | 0.007128 | 8962 | 100 | random 1024 | 131 | | octosort | 1024 | 32 | 0.005290 | 0.006494 | 10031 | 100 | random 1024 | 132 | | | | | | | | | | 133 | | qsort | 2048 | 32 | 0.007341 | 0.007810 | 19962 | 100 | random 2048 | 134 | | octosort | 2048 | 32 | 0.006943 | 0.007444 | 22885 | 100 | random 2048 | 135 | | | | | | | | | | 136 | | qsort | 4096 | 32 | 0.008086 | 0.008499 | 43966 | 100 | random 4096 | 137 | | octosort | 4096 | 32 | 0.008295 | 0.008441 | 51035 | 100 | random 4096 | 138 | | | | | | | | | | 139 | | qsort | 8192 | 32 | 0.008740 | 0.009142 | 96149 | 100 | random 8192 | 140 | | octosort | 8192 | 32 | 0.009122 | 0.009198 | 112238 | 100 | random 8192 | 141 | | | | | | | | | | 142 | | qsort | 16384 | 32 | 0.009405 | 0.009830 | 208702 | 100 | random 16384 | 143 | | octosort | 16384 | 32 | 0.009827 | 0.009949 | 244511 | 100 | random 16384 | 144 | | | | | | | | | | 145 | | qsort | 32768 | 32 | 0.010039 | 0.010421 | 450105 | 100 | random 32768 | 146 | | octosort | 32768 | 32 | 0.010525 | 0.010680 | 529041 | 100 | random 32768 | 147 | | | | | | | | | | 148 | | qsort | 65536 | 32 | 0.010708 | 0.011123 | 965773 | 100 | random 65536 | 149 | | octosort | 65536 | 32 | 0.011250 | 0.011431 | 1138363 | 100 | random 65536 | 150 | | | | | | | | | | 151 | | qsort | 131072 | 32 | 0.011316 | 0.011698 | 2062601 | 100 | random 131072 | 152 | | octosort | 131072 | 32 | 0.011982 | 0.012159 | 2437514 | 100 | random 131072 | 153 | 154 |
155 | -------------------------------------------------------------------------------- /bench.c: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (C) 2014-2021 Igor van den Hoven ivdhoven@gmail.com 3 | */ 4 | 5 | /* 6 | Permission is hereby granted, free of charge, to any person obtaining 7 | a copy of this software and associated documentation files (the 8 | "Software"), to deal in the Software without restriction, including 9 | without limitation the rights to use, copy, modify, merge, publish, 10 | distribute, sublicense, and/or sell copies of the Software, and to 11 | permit persons to whom the Software is furnished to do so, subject to 12 | the following conditions: 13 | 14 | The above copyright notice and this permission notice shall be 15 | included in all copies or substantial portions of the Software. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 18 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 19 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 20 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 21 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 22 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 23 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 24 | */ 25 | 26 | /* 27 | To compile use: 28 | 29 | gcc -O3 bench.c 30 | 31 | or 32 | 33 | g++ -O3 -w -fpermissive bench.c 34 | */ 35 | 36 | #include 37 | #include 38 | #include 39 | #include 40 | #include 41 | #include 42 | #include 43 | 44 | #include "octosort.h" 45 | 46 | //#define cmp(a,b) (*(a) > *(b)) 47 | 48 | //typedef int CMPFUNC (const void *a, const void *b); 49 | 50 | typedef void SRTFUNC(void *array, size_t nmemb, size_t size, CMPFUNC *cmpf); 51 | 52 | 53 | // Must prevent inlining so the benchmark is fair against qsort. 54 | 55 | // Remove __attribute__ ((noinline)) and comparisons++ for full throttle. 56 | 57 | size_t comparisons; 58 | 59 | __attribute__ ((noinline)) int cmp_int(const void * a, const void * b) 60 | { 61 | const int fa = *(const int *) a; 62 | const int fb = *(const int *) b; 63 | 64 | comparisons++; 65 | 66 | return fa - fb; 67 | } 68 | 69 | __attribute__ ((noinline)) int cmp_stable(const void * a, const void * b) 70 | { 71 | comparisons++; 72 | 73 | return *(int *) a / 100000 - *(int *) b / 100000; 74 | } 75 | 76 | __attribute__ ((noinline)) int cmp_long(const void * a, const void * b) 77 | { 78 | comparisons++; 79 | 80 | return (*(long long *) a > *(long long *) b) - (*(long long *) a < *(long long *) b); 81 | 82 | // return *(long long *) a > *(long long *) b; 83 | } 84 | 85 | __attribute__ ((noinline)) int cmp_long_double(const void * a, const void * b) 86 | { 87 | const long double fa = *(const long double *) a; 88 | const long double fb = *(const long double *) b; 89 | 90 | comparisons++; 91 | 92 | if (isnan(fa) || isnan(fb)) 93 | { 94 | return isnan(fa) - isnan(fb); 95 | } 96 | return ((fa > fb) - (fa < fb)); 97 | } 98 | 99 | 100 | int cmp_str(const void * a, const void * b) 101 | { 102 | return strcmp(*(const char **) a, *(const char **) b); 103 | } 104 | 105 | int cmp_float(const void * a, const void * b) 106 | { 107 | return *(float *) a - *(float *) b; 108 | } 109 | 110 | 111 | long long utime() 112 | { 113 | struct timeval now_time; 114 | 115 | gettimeofday(&now_time, NULL); 116 | 117 | return now_time.tv_sec * 1000000LL + now_time.tv_usec; 118 | } 119 | 120 | void seed_rand(unsigned long long seed) 121 | { 122 | srand(seed); 123 | } 124 | 125 | void test_sort(void *array, void *unsorted, void *valid, int minimum, int maximum, int samples, int repetitions, SRTFUNC *srt, const char *name, char *desc, size_t size, CMPFUNC *cmpf) 126 | { 127 | long long start, end, total, best, average; 128 | size_t rep, sam, max; 129 | long long *ptla = (long long *) array, *ptlv = valid; 130 | long double *ptda = (long double *) array, *ptdv = valid; 131 | int *pta = (int *) array, *ptv = (int *) valid, cnt; 132 | 133 | if (*name == '*') 134 | { 135 | if (!strcmp(desc, "random order") || !strcmp(desc, "random 1-4") || !strcmp(desc, "random 4")) 136 | { 137 | if (comparisons) 138 | { 139 | printf("%s\n", "| Name | Items | Type | Best | Average | Compares | Samples | Distribution |"); 140 | printf("%s\n", "| --------- | -------- | ---- | -------- | -------- | --------- | ------- | ---------------- |"); 141 | } 142 | else 143 | { 144 | printf("%s\n", "| Name | Items | Type | Best | Average | Loops | Samples | Distribution |"); 145 | printf("%s\n", "| --------- | -------- | ---- | -------- | -------- | --------- | ------- | ---------------- |"); 146 | } 147 | } 148 | else 149 | { 150 | printf("%s\n", "| | | | | | | | |"); 151 | } 152 | return; 153 | } 154 | 155 | best = average = 0; 156 | 157 | if (minimum == 7 && maximum == 7) 158 | { 159 | printf("\e[1;32m%10d %10d %10d %10d %10d %10d %10d\e[0m\n", pta[0], pta[1], pta[2], pta[3], pta[4], pta[5], pta[6]); 160 | } 161 | 162 | for (sam = 0 ; sam < samples ; sam++) 163 | { 164 | total = 0; 165 | 166 | max = minimum; 167 | 168 | if (repetitions > 1) 169 | { 170 | start = utime(); 171 | 172 | for (rep = 0 ; rep < repetitions ; rep++) 173 | { 174 | memcpy(array, unsorted, max * size); 175 | 176 | comparisons = 0; 177 | 178 | switch (*name) 179 | { 180 | case 'q': 181 | qsort(array, max, size, cmpf); 182 | break; 183 | 184 | case 'o': 185 | octosort(array, max, size, cmpf); 186 | break; 187 | } 188 | 189 | if (minimum < maximum && ++max > maximum) 190 | { 191 | max = minimum; 192 | } 193 | } 194 | end = utime(); 195 | } 196 | else 197 | { 198 | memcpy(array, unsorted, max * size); 199 | 200 | comparisons = 0; 201 | 202 | start = utime(); 203 | 204 | switch (*name) 205 | { 206 | case 'q': 207 | qsort(array, max, size, cmpf); 208 | break; 209 | case 'o': 210 | octosort(array, max, size, cmpf); 211 | break; 212 | } 213 | end = utime(); 214 | } 215 | 216 | total = end - start; 217 | 218 | if (!best || total < best) 219 | { 220 | best = total; 221 | } 222 | average += total; 223 | } 224 | 225 | if (minimum == 7 && maximum == 7) 226 | { 227 | printf("\e[1;32m%10d %10d %10d %10d %10d %10d %10d\e[0m\n", pta[0], pta[1], pta[2], pta[3], pta[4], pta[5], pta[6]); 228 | } 229 | 230 | if (repetitions == 0) 231 | { 232 | return; 233 | } 234 | 235 | average /= samples; 236 | 237 | if (cmpf == cmp_stable) 238 | { 239 | for (cnt = 1 ; cnt < maximum ; cnt++) 240 | { 241 | if (pta[cnt - 1] > pta[cnt]) 242 | { 243 | sprintf(desc, "\e[1;31m%16s\e[0m", "unstable"); 244 | 245 | break; 246 | } 247 | } 248 | } 249 | 250 | if (comparisons) 251 | { 252 | printf("|%10s | %8d | %4d | %f | %f | %9d | %7d | %16s |\n", name, maximum, (int) size * 8, best / 1000000.0, average / 1000000.0, (int) comparisons, samples, desc); 253 | } 254 | else 255 | { 256 | printf("|%10s | %8d | %4d | %f | %f | %9d | %7d | %16s |\n", name, maximum, (int) size * 8, best / 1000000.0, average / 1000000.0, repetitions, samples, desc); 257 | } 258 | 259 | if (minimum != maximum || cmpf == cmp_stable) 260 | { 261 | return; 262 | } 263 | 264 | for (cnt = 1 ; cnt < maximum ; cnt++) 265 | { 266 | if (size == sizeof(int)) 267 | { 268 | if (pta[cnt - 1] > pta[cnt]) 269 | { 270 | printf("%17s: not properly sorted at index %d. (%d vs %d\n", name, cnt, pta[cnt - 1], pta[cnt]); 271 | break; 272 | } 273 | if (pta[cnt - 1] == pta[cnt]) 274 | { 275 | // printf("%17s: Found a repeat value at index %d. (%d)\n", name, cnt, pta[cnt]); 276 | } 277 | } 278 | else if (size == sizeof(long long)) 279 | { 280 | if (ptla[cnt - 1] > ptla[cnt]) 281 | { 282 | printf("%17s: not properly sorted at index %d. (%lld vs %lld\n", name, cnt, ptla[cnt - 1], ptla[cnt]); 283 | break; 284 | } 285 | } 286 | else if (size == sizeof(long double)) 287 | { 288 | if (cmp_long_double(&ptda[cnt - 1], &ptda[cnt]) > 0) 289 | { 290 | printf("%17s: not properly sorted at index %d. (%Lf vs %Lf\n", name, cnt, ptda[cnt - 1], ptda[cnt]); 291 | break; 292 | } 293 | } 294 | } 295 | 296 | for (cnt = 1 ; cnt < maximum ; cnt++) 297 | { 298 | if (size == sizeof(int)) 299 | { 300 | if (pta[cnt] != ptv[cnt]) 301 | { 302 | printf(" validate: array[%d] != valid[%d]. (%d vs %d\n", cnt, cnt, pta[cnt], ptv[cnt]); 303 | break; 304 | } 305 | } 306 | else if (size == sizeof(long long)) 307 | { 308 | if (ptla[cnt] != ptlv[cnt]) 309 | { 310 | printf(" validate: array[%d] != valid[%d]. (%lld vs %lld\n", cnt, cnt, ptla[cnt], ptlv[cnt]); 311 | break; 312 | } 313 | } 314 | else if (size == sizeof(long double)) 315 | { 316 | if (ptda[cnt] != ptdv[cnt]) 317 | { 318 | printf(" validate: array[%d] != valid[%d]. (%Lf vs %Lf\n", cnt, cnt, ptda[cnt], ptdv[cnt]); 319 | break; 320 | } 321 | } 322 | } 323 | } 324 | 325 | void validate() 326 | { 327 | int seed = time(NULL); 328 | int cnt, val, max = 2000000; 329 | 330 | int *a_array, *r_array, *v_array; 331 | 332 | seed_rand(seed); 333 | 334 | a_array = (int *) malloc(max * sizeof(int)); 335 | r_array = (int *) malloc(max * sizeof(int)); 336 | v_array = (int *) malloc(max * sizeof(int)); 337 | 338 | for (cnt = 0 ; cnt < max ; cnt++) 339 | { 340 | r_array[cnt] = rand(); 341 | } 342 | 343 | 344 | for (cnt = 1 ; cnt < 100 ; cnt++) 345 | { 346 | memcpy(a_array, r_array, max * sizeof(int)); 347 | memcpy(v_array, r_array, max * sizeof(int)); 348 | 349 | octosort(a_array, cnt, sizeof(int), cmp_int); 350 | qsort(v_array, cnt, sizeof(int), cmp_int); 351 | 352 | for (val = 0 ; val < cnt ; val++) 353 | { 354 | if (val && v_array[val - 1] > v_array[val]) 355 | { 356 | printf("\e[1;31mvalidate rand: seed %d: size: %d Not properly sorted at index %d.\n", seed, cnt, val); 357 | return; 358 | } 359 | 360 | if (a_array[val] != v_array[val]) 361 | { 362 | printf("\e[1;31mvalidate rand: seed %d: size: %d Not verified at index %d.\n", seed, cnt, val); 363 | return; 364 | } 365 | } 366 | } 367 | 368 | // ascending saw 369 | 370 | for (cnt = 0 ; cnt < 1000 ; cnt++) 371 | { 372 | r_array[cnt] = rand(); 373 | } 374 | 375 | octosort(r_array + max / 4 * 0, max / 4, sizeof(int), cmp_int); 376 | octosort(r_array + max / 4 * 1, max / 4, sizeof(int), cmp_int); 377 | octosort(r_array + max / 4 * 2, max / 4, sizeof(int), cmp_int); 378 | octosort(r_array + max / 4 * 3, max / 4, sizeof(int), cmp_int); 379 | 380 | for (cnt = 1 ; cnt < 1000 ; cnt += 7) 381 | { 382 | memcpy(a_array, r_array, max * sizeof(int)); 383 | memcpy(v_array, r_array, max * sizeof(int)); 384 | 385 | octosort(a_array, cnt, sizeof(int), cmp_int); 386 | qsort(v_array, cnt, sizeof(int), cmp_int); 387 | 388 | for (val = 0 ; val < cnt ; val++) 389 | { 390 | if (val && v_array[val - 1] > v_array[val]) 391 | { 392 | printf("\e[1;31mvalidate ascending saw: seed %d: size: %d Not properly sorted at index %d.\n", seed, cnt, val); 393 | return; 394 | } 395 | 396 | if (a_array[val] != v_array[val]) 397 | { 398 | printf("\e[1;31mvalidate ascending saw: seed %d: size: %d Not verified at index %d.\n", seed, cnt, val); 399 | return; 400 | } 401 | } 402 | } 403 | 404 | // descending saw 405 | 406 | for (cnt = 0 ; cnt < 1000 ; cnt++) 407 | { 408 | r_array[cnt] = (max - cnt - 1) % 100000; 409 | } 410 | 411 | for (cnt = 1 ; cnt < 1000 ; cnt += 7) 412 | { 413 | memcpy(a_array, r_array, max * sizeof(int)); 414 | memcpy(v_array, r_array, max * sizeof(int)); 415 | 416 | octosort(a_array, cnt, sizeof(int), cmp_int); 417 | qsort(v_array, cnt, sizeof(int), cmp_int); 418 | 419 | for (val = 0 ; val < cnt ; val++) 420 | { 421 | if (val && v_array[val - 1] > v_array[val]) 422 | { 423 | printf("\e[1;31mvalidate descending saw: seed %d: size: %d Not properly sorted at index %d.\n", seed, cnt, val); 424 | return; 425 | } 426 | 427 | if (a_array[val] != v_array[val]) 428 | { 429 | printf("\e[1;31mvalidate descending saw: seed %d: size: %d Not verified at index %d.\n", seed, cnt, val); 430 | return; 431 | } 432 | } 433 | } 434 | 435 | // random tail 436 | 437 | for (cnt = 0 ; cnt < max * 3 / 4 ; cnt++) 438 | { 439 | r_array[cnt] = cnt; 440 | } 441 | 442 | for (cnt = max * 3 / 4 ; cnt < max ; cnt++) 443 | { 444 | r_array[cnt] = rand(); 445 | } 446 | 447 | for (cnt = 1 ; cnt < 1000 ; cnt += 7) 448 | { 449 | memcpy(a_array, r_array, max * sizeof(int)); 450 | memcpy(v_array, r_array, max * sizeof(int)); 451 | 452 | octosort(a_array, cnt, sizeof(int), cmp_int); 453 | qsort(v_array, cnt, sizeof(int), cmp_int); 454 | 455 | for (val = 0 ; val < cnt ; val++) 456 | { 457 | if (val && v_array[val - 1] > v_array[val]) 458 | { 459 | printf("\e[1;31mvalidate rand tail: seed %d: size: %d Not properly sorted at index %d.\n", seed, cnt, val); 460 | return; 461 | } 462 | 463 | if (a_array[val] != v_array[val]) 464 | { 465 | printf("\e[1;31mvalidate rand tail: seed %d: size: %d Not verified at index %d.\n", seed, cnt, val); 466 | return; 467 | } 468 | } 469 | } 470 | 471 | free(a_array); 472 | free(r_array); 473 | free(v_array); 474 | } 475 | 476 | 477 | int main(int argc, char **argv) 478 | { 479 | int max = 100000; 480 | int samples = 10; 481 | int repetitions = 1; 482 | int seed = 0; 483 | int cnt, rnd, lst; 484 | int *a_array, *r_array, *v_array; 485 | long long *la_array, *lr_array, *lv_array; 486 | long double *da_array, *dr_array, *dv_array; 487 | 488 | char dist[40], *sorts[] = { "*", "qsort", "octosort" }; 489 | 490 | if (argc >= 1 && argv[1] && *argv[1]) 491 | { 492 | max = atoi(argv[1]); 493 | } 494 | 495 | if (argc >= 2 && argv[2] && *argv[2]) 496 | { 497 | samples = atoi(argv[2]); 498 | } 499 | 500 | if (argc >= 3 && argv[3] && *argv[3]) 501 | { 502 | repetitions = atoi(argv[3]); 503 | } 504 | 505 | if (argc >= 4 && argv[4] && *argv[4]) 506 | { 507 | seed = atoi(argv[4]); 508 | } 509 | 510 | validate(); 511 | 512 | rnd = seed ? seed : time(NULL); 513 | 514 | a_array = (int *) malloc(max * sizeof(int)); 515 | r_array = (int *) malloc(max * sizeof(int)); 516 | v_array = (int *) malloc(max * sizeof(int)); 517 | 518 | printf("Info: int = %lu, long long = %lu, long double = %lu\n\n", sizeof(int) * 8, sizeof(long long) * 8, sizeof(long double) * 8); 519 | 520 | printf("Benchmark: array size: %d, samples: %d, repetitions: %d, seed: %d\n\n", max, samples, repetitions, rnd); 521 | 522 | if (samples == 0 && repetitions == 0) 523 | { 524 | goto small_range_test; 525 | } 526 | 527 | // 128 bit 528 | 529 | da_array = (long double *) malloc(max * sizeof(long double)); 530 | dr_array = (long double *) malloc(max * sizeof(long double)); 531 | dv_array = (long double *) malloc(max * sizeof(long double)); 532 | 533 | if (da_array == NULL || dr_array == NULL || dv_array == NULL) 534 | { 535 | printf("main(%d,%d,%d): malloc: %s\n", max, samples, repetitions, strerror(errno)); 536 | 537 | return 0; 538 | } 539 | 540 | seed_rand(rnd); 541 | 542 | for (cnt = 0 ; cnt < max ; cnt++) 543 | { 544 | dr_array[cnt] = rand() + 1.0 / (long double) (rand() + (rand() << 30LL)); 545 | } 546 | 547 | memcpy(dv_array, dr_array, max * sizeof(long double)); 548 | qsort(dv_array, max, sizeof(long double), cmp_long_double); 549 | 550 | for (cnt = 0 ; cnt < sizeof(sorts) / sizeof(char *) ; cnt++) 551 | { 552 | test_sort(da_array, dr_array, dv_array, max, max, samples, repetitions, qsort, sorts[cnt], "random order", sizeof(long double), cmp_long_double); 553 | } 554 | 555 | free(da_array); 556 | free(dr_array); 557 | free(dv_array); 558 | 559 | printf("\n"); 560 | 561 | // 64 bit 562 | 563 | la_array = (long long *) malloc(max * sizeof(long long)); 564 | lr_array = (long long *) malloc(max * sizeof(long long)); 565 | lv_array = (long long *) malloc(max * sizeof(long long)); 566 | 567 | if (la_array == NULL || lr_array == NULL || lv_array == NULL) 568 | { 569 | printf("main(%d,%d,%d): malloc: %s\n", max, samples, repetitions, strerror(errno)); 570 | 571 | return 0; 572 | } 573 | 574 | seed_rand(rnd); 575 | 576 | for (cnt = 0 ; cnt < max ; cnt++) 577 | { 578 | lr_array[cnt] = rand(); 579 | lr_array[cnt] += (unsigned long long) rand() << 32ULL; 580 | } 581 | 582 | memcpy(lv_array, lr_array, max * sizeof(long long)); 583 | qsort(lv_array, max, sizeof(long long), cmp_long); 584 | 585 | for (cnt = 0 ; cnt < sizeof(sorts) / sizeof(char *) ; cnt++) 586 | { 587 | test_sort(la_array, lr_array, lv_array, max, max, samples, repetitions, qsort, sorts[cnt], "random order", sizeof(long long), cmp_long); 588 | } 589 | 590 | printf("\n"); 591 | 592 | free(la_array); 593 | free(lr_array); 594 | free(lv_array); 595 | 596 | // 32 bit 597 | 598 | // random 599 | 600 | seed_rand(rnd); 601 | 602 | for (cnt = 0 ; cnt < max ; cnt++) 603 | { 604 | r_array[cnt] = rand(); 605 | } 606 | 607 | memcpy(v_array, r_array, max * sizeof(int)); 608 | qsort(v_array, max, sizeof(int), cmp_int); 609 | 610 | for (cnt = 0 ; cnt < sizeof(sorts) / sizeof(char *) ; cnt++) 611 | { 612 | test_sort(a_array, r_array, v_array, max, max, samples, repetitions, qsort, sorts[cnt], "random order", sizeof(int), cmp_int); 613 | } 614 | 615 | // ascending 616 | 617 | for (cnt = 0 ; cnt < max ; cnt++) 618 | { 619 | r_array[cnt] = cnt; 620 | } 621 | 622 | memcpy(v_array, r_array, max * sizeof(int)); 623 | memcpy(r_array, v_array, max * sizeof(int)); 624 | 625 | qsort(v_array, max, sizeof(int), cmp_int); 626 | 627 | for (cnt = 0 ; cnt < sizeof(sorts) / sizeof(char *) ; cnt++) 628 | { 629 | test_sort(a_array, r_array, v_array, max, max, samples, repetitions, qsort, sorts[cnt], "ascending order", sizeof(int), cmp_int); 630 | } 631 | 632 | // ascending saw 633 | 634 | for (cnt = 0 ; cnt < max ; cnt++) 635 | { 636 | r_array[cnt] = rand(); 637 | } 638 | 639 | memcpy(v_array, r_array, max * sizeof(int)); 640 | qsort(v_array + max / 4 * 0, max / 4, sizeof(int), cmp_int); 641 | qsort(v_array + max / 4 * 1, max / 4, sizeof(int), cmp_int); 642 | qsort(v_array + max / 4 * 2, max / 4, sizeof(int), cmp_int); 643 | qsort(v_array + max / 4 * 3, max / 4, sizeof(int), cmp_int); 644 | memcpy(r_array, v_array, max * sizeof(int)); 645 | 646 | qsort(v_array, max, sizeof(int), cmp_int); 647 | 648 | for (cnt = 0 ; cnt < sizeof(sorts) / sizeof(char *) ; cnt++) 649 | { 650 | test_sort(a_array, r_array, v_array, max, max, samples, repetitions, qsort, sorts[cnt], "ascending saw", sizeof(int), cmp_int); 651 | } 652 | 653 | // generic 654 | 655 | for (cnt = 0 ; cnt < max ; cnt++) 656 | { 657 | r_array[cnt] = rand() % 100; 658 | } 659 | 660 | memcpy(v_array, r_array, max * sizeof(int)); 661 | qsort(v_array, max, sizeof(int), cmp_int); 662 | 663 | for (cnt = 0 ; cnt < sizeof(sorts) / sizeof(char *) ; cnt++) 664 | { 665 | test_sort(a_array, r_array, v_array, max, max, samples, repetitions, qsort, sorts[cnt], "generic order", sizeof(int), cmp_int); 666 | } 667 | 668 | // descending 669 | 670 | for (cnt = 0 ; cnt < max ; cnt++) 671 | { 672 | r_array[cnt] = (max - cnt); 673 | } 674 | 675 | memcpy(v_array, r_array, max * sizeof(int)); 676 | qsort(v_array, max, sizeof(int), cmp_int); 677 | 678 | for (cnt = 0 ; cnt < sizeof(sorts) / sizeof(char *) ; cnt++) 679 | { 680 | test_sort(a_array, r_array, v_array, max, max, samples, repetitions, qsort, sorts[cnt], "descending order", sizeof(int), cmp_int); 681 | } 682 | 683 | // descending saw 684 | 685 | for (cnt = 0 ; cnt < max ; cnt++) 686 | { 687 | r_array[cnt] = (max - cnt - 1) % 10000; 688 | } 689 | 690 | memcpy(v_array, r_array, max * sizeof(int)); 691 | qsort(v_array, max, sizeof(int), cmp_int); 692 | 693 | for (cnt = 0 ; cnt < sizeof(sorts) / sizeof(char *) ; cnt++) 694 | { 695 | test_sort(a_array, r_array, v_array, max, max, samples, repetitions, qsort, sorts[cnt], "descending saw", sizeof(int), cmp_int); 696 | } 697 | 698 | // random tail 699 | 700 | seed_rand(rnd); 701 | 702 | for (cnt = 0 ; cnt < max ; cnt++) 703 | { 704 | r_array[cnt] = rand(); 705 | } 706 | 707 | memcpy(v_array, r_array, max * sizeof(int)); 708 | qsort(v_array, max * 3 / 4, sizeof(int), cmp_int); 709 | memcpy(r_array, v_array, max * sizeof(int)); 710 | qsort(v_array, max, sizeof(int), cmp_int); 711 | 712 | for (cnt = 0 ; cnt < sizeof(sorts) / sizeof(char *) ; cnt++) 713 | { 714 | test_sort(a_array, r_array, v_array, max, max, samples, repetitions, qsort, sorts[cnt], "random tail", sizeof(int), cmp_int); 715 | } 716 | 717 | seed_rand(rnd); 718 | 719 | for (cnt = 0 ; cnt < max ; cnt++) 720 | { 721 | r_array[cnt] = rand(); 722 | } 723 | 724 | memcpy(v_array, r_array, max * sizeof(int)); 725 | qsort(v_array, max / 2, sizeof(int), cmp_int); 726 | 727 | memcpy(r_array, v_array, max * sizeof(int)); 728 | qsort(v_array, max, sizeof(int), cmp_int); 729 | 730 | for (cnt = 0 ; cnt < sizeof(sorts) / sizeof(char *) ; cnt++) 731 | { 732 | test_sort(a_array, r_array, v_array, max, max, samples, repetitions, qsort, sorts[cnt], "random half", sizeof(int), cmp_int); 733 | } 734 | 735 | // tiles 736 | 737 | for (cnt = 0 ; cnt < max ; cnt++) 738 | { 739 | if (cnt % 2 == 0) 740 | { 741 | r_array[cnt] = 16777216 + cnt; 742 | } 743 | else 744 | { 745 | r_array[cnt] = 33554432 + cnt; 746 | } 747 | } 748 | 749 | memcpy(v_array, r_array, max * sizeof(int)); 750 | qsort(v_array, max, sizeof(int), cmp_int); 751 | 752 | for (cnt = 0 ; cnt < sizeof(sorts) / sizeof(char *) ; cnt++) 753 | { 754 | strcpy(dist, "ascending tiles"); 755 | 756 | test_sort(a_array, r_array, v_array, max, max, samples, repetitions, qsort, sorts[cnt], dist, sizeof(int), cmp_stable); 757 | } 758 | 759 | if (repetitions > 0) 760 | { 761 | goto end; 762 | } 763 | 764 | small_range_test: 765 | 766 | if (max >= 8192) 767 | { 768 | goto large_range_test; 769 | } 770 | 771 | for (lst = 1, samples = 32768, repetitions = 4 ; repetitions <= 4096 ; repetitions *= 2, samples /= 2) 772 | { 773 | if (max >= repetitions) 774 | { 775 | sprintf(dist, "random %d-%d", lst, repetitions); 776 | 777 | srand(rnd); 778 | 779 | for (cnt = 0 ; cnt < repetitions ; cnt++) 780 | { 781 | r_array[cnt] = rand(); 782 | } 783 | 784 | memcpy(v_array, r_array, repetitions * sizeof(int)); 785 | qsort(v_array, repetitions, sizeof(int), cmp_int); 786 | 787 | for (cnt = 0 ; cnt < sizeof(sorts) / sizeof(char *) ; cnt++) 788 | { 789 | test_sort(a_array, r_array, v_array, lst, repetitions, 100, samples, qsort, sorts[cnt], dist, sizeof(int), cmp_int); 790 | } 791 | lst = repetitions + 1; 792 | } 793 | } 794 | 795 | goto end; 796 | 797 | large_range_test: 798 | 799 | for (samples = 32768, repetitions = 4 ; samples > 0 ; repetitions *= 2, samples /= 2) 800 | { 801 | if (max >= repetitions) 802 | { 803 | srand(rnd); 804 | 805 | for (cnt = 0 ; cnt < repetitions ; cnt++) 806 | { 807 | r_array[cnt] = rand(); 808 | } 809 | 810 | memcpy(v_array, r_array, repetitions * sizeof(int)); 811 | qsort(v_array, repetitions, sizeof(int), cmp_int); 812 | 813 | sprintf(dist, "random %d", repetitions); 814 | 815 | for (cnt = 0 ; cnt < sizeof(sorts) / sizeof(char *) ; cnt++) 816 | { 817 | test_sort(a_array, r_array, v_array, repetitions, repetitions, 100, samples, qsort, sorts[cnt], dist, sizeof(int), cmp_int); 818 | } 819 | } 820 | } 821 | 822 | end: 823 | 824 | 825 | free(a_array); 826 | free(r_array); 827 | free(v_array); 828 | 829 | return 0; 830 | } 831 | -------------------------------------------------------------------------------- /images/graph1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scandum/octosort/73605cdbdfec66e7112c6a3a3830748fcd3bb665/images/graph1.png -------------------------------------------------------------------------------- /images/graph2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scandum/octosort/73605cdbdfec66e7112c6a3a3830748fcd3bb665/images/graph2.png -------------------------------------------------------------------------------- /octosort.c: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (C) 2014-2021 Igor van den Hoven ivdhoven@gmail.com 3 | */ 4 | 5 | /* 6 | Permission is hereby granted, free of charge, to any person obtaining 7 | a copy of this software and associated documentation files (the 8 | "Software"), to deal in the Software without restriction, including 9 | without limitation the rights to use, copy, modify, merge, publish, 10 | distribute, sublicense, and/or sell copies of the Software, and to 11 | permit persons to whom the Software is furnished to do so, subject to 12 | the following conditions: 13 | 14 | The above copyright notice and this permission notice shall be 15 | included in all copies or substantial portions of the Software. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 18 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 19 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 20 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 21 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 22 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 23 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 24 | */ 25 | 26 | /* 27 | octosort 1.0 28 | */ 29 | 30 | /* 31 | octosort is based on WikiSort and quadsort 32 | 33 | WikiSort: https://github.com/BonzaiThePenguin/WikiSort 34 | quadsort: https://github.com/scandum/quadsort 35 | searches: https://github.com/scandum/binary_search 36 | */ 37 | 38 | // binary insertion sort for up to 8 elements 39 | 40 | void FUNC(octo_tail_insert)(VAR *array, VAR *pta, CMPFUNC *cmp) 41 | { 42 | VAR *pte, key; 43 | 44 | pte = pta--; 45 | 46 | if (cmp(pta, pte) > 0) 47 | { 48 | key = *pte; 49 | 50 | if (cmp(pta - 3, &key) > 0) 51 | { 52 | *pte-- = *pta--; *pte-- = *pta--; *pte-- = *pta--; *pte-- = *pta--; 53 | } 54 | 55 | if (pta >= array + 1 && cmp(pta - 1, &key) > 0) 56 | { 57 | *pte-- = *pta--; *pte-- = *pta--; 58 | } 59 | 60 | if (pta >= array && cmp(pta, &key) > 0) 61 | { 62 | *pte-- = *pta; 63 | } 64 | *pte = key; 65 | } 66 | } 67 | 68 | // sort arrays of length 4 to 8 with reverse order run detection 69 | 70 | VAR *FUNC(octo_swap)(VAR array[], VAR *ptz, size_t start, size_t nmemb, CMPFUNC *cmp) 71 | { 72 | VAR *pta, swap; 73 | size_t i; 74 | 75 | pta = array + start; 76 | 77 | if (cmp(&pta[0], &pta[1]) > 0) 78 | { 79 | if (cmp(&pta[2], &pta[3]) > 0) 80 | { 81 | if (cmp(&pta[1], &pta[2]) > 0) 82 | { 83 | goto Swapper; 84 | } 85 | swap = pta[2]; pta[2] = pta[3]; pta[3] = swap; 86 | } 87 | swap = pta[0]; pta[0] = pta[1]; pta[1] = swap; 88 | } 89 | else if (cmp(&pta[2], &pta[3]) > 0) 90 | { 91 | swap = pta[2]; pta[2] = pta[3]; pta[3] = swap; 92 | } 93 | 94 | if (cmp(&pta[1], &pta[2]) > 0) 95 | { 96 | if (cmp(&pta[0], &pta[2]) <= 0) 97 | { 98 | if (cmp(&pta[1], &pta[3]) <= 0) 99 | { 100 | swap = pta[1]; pta[1] = pta[2]; pta[2] = swap; 101 | } 102 | else 103 | { 104 | swap = pta[1]; pta[1] = pta[2]; pta[2] = pta[3]; pta[3] = swap; 105 | } 106 | } 107 | else if (cmp(&pta[0], &pta[3]) > 0) 108 | { 109 | swap = pta[1]; pta[1] = pta[3]; pta[3] = swap; 110 | swap = pta[0]; pta[0] = pta[2]; pta[2] = swap; 111 | } 112 | else if (cmp(&pta[1], &pta[3]) <= 0) 113 | { 114 | swap = pta[1]; pta[1] = pta[0]; pta[0] = pta[2]; pta[2] = swap; 115 | } 116 | else 117 | { 118 | swap = pta[1]; pta[1] = pta[0]; pta[0] = pta[2]; pta[2] = pta[3]; pta[3] = swap; 119 | } 120 | } 121 | 122 | for (i = 4 ; i < nmemb ; i++) 123 | { 124 | FUNC(octo_tail_insert)(pta, &pta[i], cmp); 125 | } 126 | 127 | if (ptz) 128 | { 129 | do 130 | { 131 | swap = *ptz; 132 | *ptz++ = *--pta; 133 | *pta = swap; 134 | } 135 | while (ptz < pta); 136 | } 137 | return NULL; 138 | 139 | Swapper: 140 | 141 | if (ptz == NULL || cmp(&pta[-1], &pta[0]) > 0) 142 | { 143 | switch (nmemb) 144 | { 145 | case 8: 146 | if (cmp(&pta[6], &pta[7]) <= 0) 147 | { 148 | break; 149 | } 150 | case 7: 151 | if (cmp(&pta[5], &pta[6]) <= 0) 152 | { 153 | break; 154 | } 155 | case 6: 156 | if (cmp(&pta[4], &pta[5]) <= 0) 157 | { 158 | break; 159 | } 160 | case 5: 161 | if (cmp(&pta[3], &pta[4]) <= 0) 162 | { 163 | break; 164 | } 165 | case 4: 166 | return ptz ? ptz : pta; 167 | } 168 | } 169 | 170 | swap = pta[0]; pta[0] = pta[3]; pta[3] = swap; 171 | swap = pta[1]; pta[1] = pta[2]; pta[2] = swap; 172 | 173 | for (i = 4 ; i < nmemb ; i++) 174 | { 175 | FUNC(octo_tail_insert)(pta, &pta[i], cmp); 176 | } 177 | 178 | if (ptz) 179 | { 180 | do 181 | { 182 | swap = *ptz; 183 | *ptz++ = *--pta; 184 | *pta = swap; 185 | } 186 | while (ptz < pta); 187 | } 188 | return NULL; 189 | } 190 | 191 | // find the index of the first value within the range that is equal to array[index] 192 | 193 | size_t FUNC(monobound_binary_first)(const VAR array[], const VAR value, const Range range, CMPFUNC *cmp) 194 | { 195 | size_t top, mid, end = range.end; 196 | 197 | if (range.start >= end) 198 | { 199 | return range.start; 200 | } 201 | 202 | top = end - range.start; 203 | 204 | while (top > 1) 205 | { 206 | mid = top / 2; 207 | 208 | if (cmp(&value, &array[end - mid]) <= 0) 209 | { 210 | end -= mid; 211 | } 212 | top -= mid; 213 | } 214 | 215 | if (cmp(&value, &array[end-1]) <= 0) 216 | { 217 | return --end; 218 | } 219 | return end; 220 | } 221 | 222 | // find the index of the last value within the range that is equal to array[index], plus 1 223 | 224 | size_t FUNC(monobound_binary_last)(const VAR array[], const VAR value, const Range range, CMPFUNC *cmp) 225 | { 226 | size_t top, mid, start = range.start; 227 | 228 | if (start >= range.end) 229 | { 230 | return start; 231 | } 232 | 233 | top = range.end - start; 234 | 235 | while (top > 1) 236 | { 237 | mid = top / 2; 238 | 239 | if (cmp(&array[start + mid], &value) <= 0) 240 | { 241 | start += mid; 242 | } 243 | top -= mid; 244 | } 245 | 246 | if (cmp(&array[start], &value) <= 0) 247 | { 248 | return ++start; 249 | } 250 | return start; 251 | } 252 | 253 | // combine a linear search with a binary search to reduce the number of comparisons in situations 254 | // where have some idea as to how many unique values there are and where the next value might be 255 | 256 | size_t FUNC(FindFirstForward)(const VAR array[], const VAR value, const Range range, CMPFUNC *cmp, const size_t unique) 257 | { 258 | size_t skip, index; 259 | 260 | skip = Max(range_length(range) / unique, 1); 261 | 262 | for (index = range.start + skip ; cmp(&value, &array[index - 1]) > 0 ; index += skip) 263 | { 264 | if (index >= range.end - skip) 265 | { 266 | return FUNC(monobound_binary_first)(array, value, new_range(index, range.end), cmp); 267 | } 268 | } 269 | return FUNC(monobound_binary_first)(array, value, new_range(index - skip, index), cmp); 270 | } 271 | 272 | size_t FUNC(FindLastForward)(const VAR array[], const VAR value, const Range range, CMPFUNC *cmp, const size_t unique) 273 | { 274 | size_t skip, index; 275 | 276 | if (range_length(range) == 0) 277 | return range.start; 278 | 279 | skip = Max(range_length(range)/unique, 1); 280 | 281 | for (index = range.start + skip; cmp(&array[index - 1], &value) <= 0 ; index += skip) 282 | { 283 | if (index >= range.end - skip) 284 | { 285 | return FUNC(monobound_binary_last)(array, value, new_range(index, range.end), cmp); 286 | } 287 | } 288 | return FUNC(monobound_binary_last)(array, value, new_range(index - skip, index), cmp); 289 | } 290 | 291 | size_t FUNC(FindFirstBackward)(const VAR array[], const VAR value, const Range range, CMPFUNC *cmp, const size_t unique) 292 | { 293 | size_t skip, index; 294 | 295 | if (range_length(range) == 0) 296 | return range.start; 297 | 298 | skip = Max(range_length(range)/unique, 1); 299 | 300 | for (index = range.end - skip; index > range.start && cmp(&value, &array[index - 1]) <= 0 ; index -= skip) 301 | { 302 | if (index < range.start + skip) 303 | { 304 | return FUNC(monobound_binary_first)(array, value, new_range(range.start, index), cmp); 305 | } 306 | } 307 | return FUNC(monobound_binary_first)(array, value, new_range(index, index + skip), cmp); 308 | } 309 | 310 | size_t FUNC(FindLastBackward)(const VAR array[], const VAR value, const Range range, CMPFUNC *cmp, const size_t unique) 311 | { 312 | size_t skip, index; 313 | 314 | if (range_length(range) == 0) 315 | return range.start; 316 | 317 | skip = Max(range_length(range)/unique, 1); 318 | 319 | for (index = range.end - skip; index > range.start && cmp(&array[index - 1], &value) > 0 ; index -= skip) 320 | { 321 | if (index < range.start + skip) 322 | { 323 | return FUNC(monobound_binary_last)(array, value, new_range(range.start, index), cmp); 324 | } 325 | } 326 | return FUNC(monobound_binary_last)(array, value, new_range(index, index + skip), cmp); 327 | } 328 | 329 | // monobound binary insertion sort 330 | 331 | void FUNC(monobound_sort)(VAR array[], const Range range, CMPFUNC *cmp) 332 | { 333 | VAR *start, *pta, *end, key; 334 | size_t i, mid, top, nmemb; 335 | 336 | start = array + range.start; 337 | nmemb = range.end - range.start; 338 | 339 | for (i = 1 ; i < nmemb ; i++) 340 | { 341 | pta = end = start + i; 342 | 343 | if (cmp(--pta, end) <= 0) 344 | { 345 | continue; 346 | } 347 | top = i; 348 | 349 | while (top > 1) 350 | { 351 | mid = top / 2; 352 | 353 | if (cmp(pta - mid, end) > 0) 354 | { 355 | pta -= mid; 356 | } 357 | top -= mid; 358 | } 359 | 360 | key = *end; 361 | 362 | memmove(pta + 1, pta, (end - pta) * sizeof(VAR)); 363 | 364 | *pta = key; 365 | } 366 | } 367 | 368 | // swap a series of values in the array 369 | 370 | void FUNC(forward_block_swap)(VAR array[], const size_t start1, const size_t start2, size_t block_size) 371 | { 372 | VAR *pta, *ptb, swap; 373 | 374 | pta = array + start1; 375 | ptb = array + start2; 376 | 377 | while (block_size--) 378 | { 379 | swap = *pta; *pta++ = *ptb; *ptb++ = swap; 380 | } 381 | } 382 | 383 | void FUNC(backward_block_swap)(VAR array[], const size_t start1, const size_t start2, size_t block_size) 384 | { 385 | VAR *pta, *ptb, swap; 386 | 387 | pta = array + start1 + block_size; 388 | ptb = array + start2 + block_size; 389 | 390 | while (block_size--) 391 | { 392 | swap = *--pta; *pta = *--ptb; *ptb = swap; 393 | } 394 | } 395 | 396 | // rotate the values in an array ([0 1 2 3] becomes [1 2 3 0] if we rotate by 1) 397 | // this assumes that 0 <= amount <= range.length() 398 | 399 | void FUNC(Rotate)(VAR array[], const size_t amount, const Range range) 400 | { 401 | size_t start = range.start; 402 | size_t left = amount; 403 | size_t right = range.end - range.start - amount; 404 | size_t min = left <= right ? left : right; 405 | 406 | // Gries-Mills rotation 407 | 408 | while (min > 1) 409 | { 410 | if (left <= right) 411 | { 412 | do 413 | { 414 | FUNC(forward_block_swap)(array, start, start + left, left); 415 | 416 | start += left; 417 | right -= left; 418 | } 419 | while (left <= right); 420 | 421 | min = right; 422 | } 423 | else 424 | { 425 | do 426 | { 427 | FUNC(backward_block_swap)(array, start + left - right, start + left, right); 428 | 429 | left -= right; 430 | } 431 | while (right <= left); 432 | 433 | min = left; 434 | } 435 | } 436 | 437 | if (min) 438 | { 439 | if (left <= right) 440 | { 441 | VAR swap = array[start]; 442 | memmove(&array[start], &array[start + 1], (right) * sizeof(VAR)); 443 | array[start + right] = swap; 444 | } 445 | else 446 | { 447 | VAR swap = array[start + left]; 448 | memmove(&array[start + 1], &array[start], (left) * sizeof(VAR)); 449 | array[start] = swap; 450 | } 451 | } 452 | } 453 | 454 | // merge two ranges from one array into another array 455 | 456 | void FUNC(forward_merge_into)(VAR *dest, VAR *from, size_t nmemb, size_t block, CMPFUNC *cmp) 457 | { 458 | VAR *l, *r, *m, *e; // left, right, middle, end 459 | 460 | l = from; 461 | r = from + block; 462 | m = r; 463 | e = l + nmemb; 464 | 465 | while (1) 466 | { 467 | if (cmp(l, r) <= 0) 468 | { 469 | *dest++ = *l++; 470 | 471 | if (l == m) 472 | { 473 | do *dest++ = *r++; while (r < e); 474 | 475 | return; 476 | } 477 | } 478 | else 479 | { 480 | *dest++ = *r++; 481 | 482 | if (r == e) 483 | { 484 | do *dest++ = *l++; while (l < m); 485 | 486 | return; 487 | } 488 | } 489 | } 490 | } 491 | 492 | void FUNC(external_backward_merge)(VAR *array, VAR *swap, size_t nmemb, size_t block, CMPFUNC *cmp) 493 | { 494 | VAR *r, *m, *e, *s; // right, middle, end, swap 495 | 496 | m = array + block; 497 | e = array + nmemb - 1; 498 | r = m--; 499 | 500 | if (cmp(m, r) <= 0) 501 | { 502 | return; 503 | } 504 | 505 | while (cmp(m, e) <= 0) 506 | { 507 | e--; 508 | } 509 | 510 | s = swap; 511 | 512 | do *s++ = *r++; while (r <= e); 513 | 514 | s--; 515 | 516 | *e-- = *m--; 517 | 518 | if (cmp(array, swap) <= 0) 519 | { 520 | while (1) 521 | { 522 | if (cmp(m, s) > 0) 523 | { 524 | *e-- = *m--; 525 | } 526 | else 527 | { 528 | *e-- = *s--; 529 | 530 | if (s < swap) 531 | { 532 | return; 533 | } 534 | } 535 | } 536 | } 537 | else 538 | { 539 | while (1) 540 | { 541 | if (cmp(m, s) > 0) 542 | { 543 | *e-- = *m--; 544 | 545 | if (m < array) 546 | { 547 | do *e-- = *s--; while (s >= swap); 548 | 549 | return; 550 | } 551 | } 552 | else 553 | { 554 | *e-- = *s--; 555 | } 556 | } 557 | } 558 | } 559 | 560 | // merge operation using an external buffer 561 | 562 | void FUNC(MergeExternal)(VAR array[], const Range A, const Range B, CMPFUNC *cmp, VAR *cache) 563 | { 564 | VAR *A_index = &cache[0]; 565 | VAR *B_index = &array[B.start]; 566 | VAR *insert_index = &array[A.start]; 567 | VAR *A_last = &cache[range_length(A)]; 568 | VAR *B_last = &array[B.end]; 569 | 570 | if (range_length(B) > 0 && range_length(A) > 0) 571 | { 572 | while (1) 573 | { 574 | if (cmp(A_index, B_index) <= 0) 575 | { 576 | *insert_index++ = *A_index++; 577 | 578 | if (A_index == A_last) 579 | break; 580 | } 581 | else 582 | { 583 | *insert_index++ = *B_index++; 584 | 585 | if (B_index == B_last) 586 | break; 587 | } 588 | } 589 | } 590 | // copy the remainder of A into the final array 591 | 592 | memcpy(insert_index, A_index, (A_last - A_index) * sizeof(VAR)); 593 | } 594 | 595 | // merge operation using an internal buffer 596 | 597 | // whenever we find a value to add to the final array, swap it with the value that's 598 | // already in that spot when this algorithm is finished, the 'I' range will contain 599 | // its original contents, but in a different order 600 | 601 | void FUNC(MergeInternal)(VAR array[], const Range A, const Range B, CMPFUNC *cmp, const Range I) 602 | { 603 | VAR swap, *pta, *ptb, *pti; 604 | size_t a = 0, b = 0, i = 0; 605 | size_t length_A = range_length(A); 606 | size_t length_B = range_length(B); 607 | 608 | if (length_A > 0 && length_B > 0) 609 | { 610 | pta = array + A.start; 611 | ptb = array + B.start; 612 | pti = array + I.start; 613 | 614 | while (1) 615 | { 616 | if (cmp(&pti[a], &ptb[b]) <= 0) 617 | { 618 | swap = pta[i]; pta[i++] = pti[a]; pti[a] = swap; 619 | 620 | if (++a >= length_A) 621 | break; 622 | } 623 | else 624 | { 625 | swap = pta[i]; pta[i++] = ptb[b]; ptb[b] = swap; 626 | 627 | if (++b >= length_B) 628 | break; 629 | } 630 | } 631 | } 632 | FUNC(backward_block_swap)(array, I.start + a, A.start + i, length_A - a); 633 | } 634 | 635 | // merge operation without a buffer 636 | 637 | // this just repeatedly binary searches into B and rotates A into position. 638 | // the paper suggests using the 'rotation-based Hwang and Lin algorithm' here, 639 | // but I decided to stick with this because it had better situational performance 640 | 641 | // (Hwang and Lin is designed for merging subarrays of very different sizes, 642 | // but WikiSort almost always uses subarrays that are roughly the same size) 643 | 644 | // normally this is incredibly suboptimal, but this function is only called 645 | // when none of the A or B blocks in any subarray contained 2√A unique values, 646 | // which places a hard limit on the number of times this will ACTUALLY need 647 | // to binary search and rotate. 648 | 649 | // according to my analysis the worst case is √A rotations performed on √A items 650 | // once the constant factors are removed, which ends up being O(n) 651 | 652 | // again, this is NOT a general-purpose solution – it only works well in this case! 653 | // kind of like how the O(n^2) insertion sort is used in some places 654 | 655 | void FUNC(MergeInPlace)(VAR array[], Range A, Range B, CMPFUNC *cmp, VAR *cache, const size_t cache_size) 656 | { 657 | if (range_length(A) == 0 || range_length(B) == 0) 658 | { 659 | return; 660 | } 661 | 662 | while (1) 663 | { 664 | // find the first place in B where the first item in A needs to be inserted 665 | size_t mid = FUNC(monobound_binary_first)(array, array[A.start], B, cmp); 666 | 667 | // rotate A into place 668 | size_t amount = mid - A.end; 669 | 670 | FUNC(Rotate)(array, range_length(A), new_range(A.start, mid)); 671 | 672 | if (B.end == mid) 673 | { 674 | break; 675 | } 676 | 677 | // calculate the new A and B ranges 678 | 679 | B.start = mid; 680 | A = new_range(A.start + amount, B.start); 681 | A.start = FUNC(monobound_binary_last)(array, array[A.start], A, cmp); 682 | 683 | if (range_length(A) == 0) 684 | { 685 | break; 686 | } 687 | } 688 | } 689 | 690 | // bottom-up merge sort combined with an in-place merge algorithm for O(1) memory use 691 | 692 | void FUNC(octosort)(VAR array[], size_t size, VAR *external_cache, size_t cache_size, CMPFUNC *cmp) 693 | { 694 | VAR swap, stack_cache[512], *cache = external_cache; 695 | 696 | #if DYNAMIC_CACHE 697 | // turns into a full-throttle merge sort since everything fits into the cache 698 | 699 | if (cache == NULL) 700 | { 701 | cache_size = 1 + size / 2; 702 | 703 | cache = (VAR *) malloc(cache_size * sizeof(VAR)); 704 | 705 | if (cache == NULL) 706 | { 707 | external_cache = cache = stack_cache; 708 | 709 | cache_size = 512; 710 | } 711 | } 712 | #else 713 | // since the cache size is fixed, it's still O(1) memory 714 | // the minimum stack size is typically 8192 KB, so 512 elements should fit comfortably 715 | // removing the cache entirely gives 60% of the performance of qsort() 716 | 717 | if (cache == NULL) 718 | { 719 | cache = stack_cache; 720 | 721 | cache_size = 512; 722 | } 723 | #endif 724 | 725 | // if the array is of size 1, 2, 3 .. 8 sort them like so: 726 | 727 | if (size <= 8) 728 | { 729 | FUNC(monobound_sort)(array, new_range(0, size), cmp); 730 | 731 | goto End; 732 | } 733 | 734 | WikiIterator iterator = WikiIterator_new(size, 4); 735 | 736 | VAR *pto = NULL; 737 | 738 | // sort groups of 4-8 items at a time 739 | 740 | while (!WikiIterator_finished(&iterator)) 741 | { 742 | Range range = WikiIterator_nextRange(&iterator); 743 | 744 | pto = FUNC(octo_swap)(array, pto, range.start, range.end - range.start, cmp); 745 | } 746 | 747 | if (pto) 748 | { 749 | VAR *pta = array + size - 1; 750 | VAR *ptz = pto; 751 | 752 | do 753 | { 754 | swap = *ptz; 755 | *ptz = *pta; 756 | *pta = swap; 757 | } 758 | while (++ptz < --pta); 759 | 760 | if (pto == array) 761 | { 762 | goto End; 763 | } 764 | } 765 | 766 | // then merge sort the higher levels, which can be 8-15, 16-31, 32-63, 64-127, etc. 767 | 768 | while (1) 769 | { 770 | // if every A and B block will fit into the cache, use a special branch specifically for merging with the cache 771 | // (we use < rather than <= since the block size might be one more than iterator.length()) 772 | if (WikiIterator_length(&iterator) < cache_size) 773 | { 774 | // perform a quad merge if the four subarrays fit into the cache 775 | 776 | // array: [A][B][C][D] 777 | // cache: [A B] Step 1 778 | // cache: [C D] Step 2 779 | // array: [A B C D] Step 3 780 | 781 | if ((WikiIterator_length(&iterator) + 1) * 4 <= cache_size && (WikiIterator_length(&iterator) + 1) * 4 <= size) 782 | { 783 | WikiIterator_begin(&iterator); 784 | 785 | while (!WikiIterator_finished(&iterator)) 786 | { 787 | Range A = WikiIterator_nextRange(&iterator); 788 | Range B = WikiIterator_nextRange(&iterator); 789 | Range C = WikiIterator_nextRange(&iterator); 790 | Range D = WikiIterator_nextRange(&iterator); 791 | 792 | if (cmp(&array[A.end - 1], &array[B.start]) <= 0) 793 | { 794 | if (cmp(&array[C.end - 1], &array[D.start]) <= 0) 795 | { 796 | if (cmp(&array[B.end - 1], &array[C.start]) <= 0) 797 | { 798 | continue; // A through D are in order, skip doing anything else 799 | } 800 | // A and B are in order and C and D are in order, copy to cache 801 | memcpy(&cache[0], &array[A.start], range_length(A) * sizeof(VAR)); 802 | memcpy(&cache[A.end - A.start], &array[B.start], range_length(B) * sizeof(VAR)); 803 | memcpy(&cache[B.end - A.start], &array[C.start], range_length(C) * sizeof(VAR)); 804 | memcpy(&cache[C.end - A.start], &array[D.start], range_length(D) * sizeof(VAR)); 805 | 806 | goto Step3; 807 | } 808 | // A and B are in order, copy to cache 809 | memcpy(&cache[0], &array[A.start], range_length(A) * sizeof(VAR)); 810 | memcpy(&cache[A.end - A.start], &array[B.start], range_length(B) * sizeof(VAR)); 811 | 812 | goto Step2; 813 | } 814 | // Step1: 815 | 816 | // A and B are not in order, merge to cache 817 | FUNC(forward_merge_into)(cache, array + A.start, B.end - A.start, A.end - A.start, cmp); 818 | 819 | if (cmp(&array[C.end - 1], &array[D.start]) <= 0) // C and D are in order, copy to cache 820 | { 821 | memcpy(&cache[B.end - A.start], &array[C.start], range_length(C) * sizeof(VAR)); 822 | memcpy(&cache[C.end - A.start], &array[D.start], range_length(D) * sizeof(VAR)); 823 | } 824 | else 825 | { 826 | Step2: 827 | 828 | // C and D are not in order, merge to cache 829 | FUNC(forward_merge_into)(&cache[B.end - A.start], &array[C.start], D.end - C.start, C.end - C.start, cmp); 830 | } 831 | Step3: 832 | 833 | // merge A through D from the cache back into the array 834 | FUNC(forward_merge_into)(&array[A.start], &cache[0], D.end - A.start, B.end - A.start, cmp); 835 | } 836 | 837 | // we merged two levels at the same time, so we're done with this level already 838 | // iterator.nextLevel() is called again at the bottom of this outer merge loop 839 | 840 | WikiIterator_nextLevel(&iterator); 841 | } 842 | else 843 | { 844 | WikiIterator_begin(&iterator); 845 | 846 | while (!WikiIterator_finished(&iterator)) 847 | { 848 | Range A = WikiIterator_nextRange(&iterator); 849 | Range B = WikiIterator_nextRange(&iterator); 850 | 851 | if (cmp(&array[A.end - 1], &array[B.start]) <= 0) 852 | { 853 | continue; // A and B are in order, skip doing anything else 854 | } 855 | // A and B are not in order, merge through the cache 856 | FUNC(external_backward_merge)(array + A.start, cache, B.end - A.start, range_length(A), cmp); 857 | } 858 | } 859 | } 860 | else 861 | { 862 | // this is where the in-place merge logic starts! 863 | // 1. pull out two internal buffers each containing √A unique values 864 | // 1a. adjust block_size and buffer_size if we couldn't find enough unique values 865 | // 2. loop over the A and B subarrays within this level of the merge sort 866 | // 3. break A and B into blocks of size 'block_size' 867 | // 4. "tag" each of the A blocks with values from the first internal buffer 868 | // 5. roll the A blocks through the B blocks and drop/rotate them where they belong 869 | // 6. merge each A block with any B values that follow, using the cache or the second internal buffer 870 | // 7. sort the second internal buffer if it exists 871 | // 8. redistribute the two internal buffers back into the array 872 | 873 | size_t block_size = monobound_sqrt(WikiIterator_length(&iterator)); 874 | 875 | size_t buffer_size = WikiIterator_length(&iterator) / block_size + 1; 876 | 877 | // as an optimization, we really only need to pull out the internal buffers once for each level of merges 878 | // after that we can reuse the same buffers over and over, then redistribute it when we're finished with this level 879 | 880 | Range buffer1, buffer2, A, B; 881 | size_t find_separately = 0; 882 | size_t index, last, count, find, start, pull_index = 0; 883 | 884 | struct 885 | { 886 | size_t from; 887 | size_t to; 888 | size_t count; 889 | Range range; 890 | } 891 | pull[2]; 892 | 893 | pull[0].from = pull[0].to = pull[0].count = 0; pull[0].range = new_range(0, 0); 894 | pull[1].from = pull[1].to = pull[1].count = 0; pull[1].range = new_range(0, 0); 895 | 896 | buffer1 = new_range(0, 0); 897 | buffer2 = new_range(0, 0); 898 | 899 | find = buffer_size * 2; // find two internal buffers of size 'buffer_size' each 900 | 901 | if (block_size <= cache_size) 902 | { 903 | find = buffer_size; // if every A block fits into the cache then we won't need the second internal buffer 904 | } 905 | else if (find > WikiIterator_length(&iterator)) 906 | { 907 | find_separately = 1; // we can't fit both buffers into the same A or B subarray, so find two buffers separately 908 | find = buffer_size; 909 | } 910 | 911 | // we need to find either a single contiguous space containing 2√A unique values, which will be split up into two buffers of size √A each, 912 | // or we need to find one buffer of < 2√A unique values, and a second buffer of √A unique values, 913 | // OR if we couldn't find that many unique values, we need the largest possible buffer we can get 914 | 915 | // in the case where it couldn't find a single buffer of at least √A unique values, 916 | // all of the Merge steps must be replaced by a different merge algorithm (MergeInPlace) 917 | 918 | WikiIterator_begin(&iterator); 919 | 920 | while (!WikiIterator_finished(&iterator)) 921 | { 922 | A = WikiIterator_nextRange(&iterator); 923 | B = WikiIterator_nextRange(&iterator); 924 | 925 | // just store information about where the values will be pulled from and to, 926 | // as well as how many values there are, to create the two internal buffers 927 | 928 | // check A for the number of unique values we need to fill an internal buffer 929 | // these values will be pulled out to the start of A 930 | for (last = A.start, count = 1; count < find; last = index, count++) 931 | { 932 | index = FUNC(FindLastForward)(array, array[last], new_range(last + 1, A.end), cmp, find - count); 933 | 934 | if (index == A.end) 935 | break; 936 | } 937 | index = last; 938 | 939 | if (count >= buffer_size) 940 | { 941 | // keep track of the range within the array where we'll need to "pull out" these values to create the internal buffer 942 | PULL(A.start); 943 | pull_index = 1; 944 | 945 | if (count == buffer_size + buffer_size) 946 | { 947 | // we were able to find a single contiguous section containing 2√A unique values, 948 | // so this section can be used to contain both of the internal buffers we'll need 949 | buffer1 = new_range(A.start, A.start + buffer_size); 950 | buffer2 = new_range(A.start + buffer_size, A.start + count); 951 | break; 952 | } 953 | else if (find == buffer_size + buffer_size) 954 | { 955 | // we found a buffer that contains at least √A unique values, but did not contain the full 2√A unique values, 956 | // so we still need to find a second separate buffer of at least √A unique values 957 | buffer1 = new_range(A.start, A.start + count); 958 | find = buffer_size; 959 | } 960 | else if (block_size <= cache_size) 961 | { 962 | // we found the first and only internal buffer that we need, so we're done! 963 | buffer1 = new_range(A.start, A.start + count); 964 | break; 965 | } 966 | else if (find_separately) 967 | { 968 | // found one buffer, but now find the other one 969 | buffer1 = new_range(A.start, A.start + count); 970 | find_separately = 0; 971 | } 972 | else 973 | { 974 | // we found a second buffer in an 'A' subarray containing √A unique values, so we're done! 975 | buffer2 = new_range(A.start, A.start + count); 976 | break; 977 | } 978 | } 979 | else if (pull_index == 0 && count > range_length(buffer1)) 980 | { 981 | // keep track of the largest buffer we were able to find 982 | buffer1 = new_range(A.start, A.start + count); 983 | PULL(A.start); 984 | } 985 | 986 | // check B for the number of unique values we need to fill an internal buffer 987 | // these values will be pulled out to the end of B 988 | for (last = B.end - 1, count = 1; count < find; last = index - 1, count++) 989 | { 990 | index = FUNC(FindFirstBackward)(array, array[last], new_range(B.start, last), cmp, find - count); 991 | if (index == B.start) 992 | break; 993 | } 994 | index = last; 995 | 996 | if (count >= buffer_size) 997 | { 998 | // keep track of the range within the array where we'll need to "pull out" these values to create the internal buffer 999 | PULL(B.end); 1000 | pull_index = 1; 1001 | 1002 | if (count == buffer_size + buffer_size) 1003 | { 1004 | // we were able to find a single contiguous section containing 2√A unique values, 1005 | // so this section can be used to contain both of the internal buffers we'll need 1006 | buffer1 = new_range(B.end - count, B.end - buffer_size); 1007 | buffer2 = new_range(B.end - buffer_size, B.end); 1008 | break; 1009 | } 1010 | else if (find == buffer_size + buffer_size) 1011 | { 1012 | // we found a buffer that contains at least √A unique values, but did not contain the full 2√A unique values, 1013 | // so we still need to find a second separate buffer of at least √A unique values 1014 | buffer1 = new_range(B.end - count, B.end); 1015 | find = buffer_size; 1016 | } 1017 | else if (block_size <= cache_size) 1018 | { 1019 | // we found the first and only internal buffer that we need, so we're done! 1020 | buffer1 = new_range(B.end - count, B.end); 1021 | break; 1022 | } 1023 | else if (find_separately) 1024 | { 1025 | // found one buffer, but now find the other one 1026 | buffer1 = new_range(B.end - count, B.end); 1027 | find_separately = 0; 1028 | } 1029 | else 1030 | { 1031 | // buffer2 will be pulled out from a 'B' subarray, so if the first buffer was pulled out from the corresponding 'A' subarray, 1032 | // we need to adjust the end point for that A subarray so it knows to stop redistributing its values before reaching buffer2 1033 | if (pull[0].range.start == A.start) pull[0].range.end -= pull[1].count; 1034 | 1035 | // we found a second buffer in an 'B' subarray containing √A unique values, so we're done! 1036 | buffer2 = new_range(B.end - count, B.end); 1037 | break; 1038 | } 1039 | } 1040 | else if (pull_index == 0 && count > range_length(buffer1)) 1041 | { 1042 | // keep track of the largest buffer we were able to find 1043 | buffer1 = new_range(B.end - count, B.end); 1044 | PULL(B.end); 1045 | } 1046 | } 1047 | 1048 | // pull out the two ranges so we can use them as internal buffers 1049 | for (pull_index = 0; pull_index < 2; pull_index++) 1050 | { 1051 | Range range; 1052 | size_t length = pull[pull_index].count; 1053 | 1054 | if (pull[pull_index].to < pull[pull_index].from) 1055 | { 1056 | // we're pulling the values out to the left, which means the start of an A subarray 1057 | index = pull[pull_index].from; 1058 | for (count = 1; count < length; count++) 1059 | { 1060 | index = FUNC(FindFirstBackward)(array, array[index - 1], new_range(pull[pull_index].to, pull[pull_index].from - (count - 1)), cmp, length - count); 1061 | range = new_range(index + 1, pull[pull_index].from + 1); 1062 | FUNC(Rotate)(array, range_length(range) - count, range); 1063 | pull[pull_index].from = index + count; 1064 | } 1065 | } 1066 | else if (pull[pull_index].to > pull[pull_index].from) 1067 | { 1068 | // we're pulling values out to the right, which means the end of a B subarray 1069 | index = pull[pull_index].from + 1; 1070 | for (count = 1; count < length; count++) 1071 | { 1072 | index = FUNC(FindLastForward)(array, array[index], new_range(index, pull[pull_index].to), cmp, length - count); 1073 | range = new_range(pull[pull_index].from, index - 1); 1074 | FUNC(Rotate)(array, count, range); 1075 | pull[pull_index].from = index - 1 - count; 1076 | } 1077 | } 1078 | } 1079 | 1080 | // adjust block_size and buffer_size based on the values we were able to pull out 1081 | buffer_size = range_length(buffer1); 1082 | block_size = WikiIterator_length(&iterator)/buffer_size + 1; 1083 | 1084 | // the first buffer NEEDS to be large enough to tag each of the evenly sized A blocks, 1085 | // so this was originally here to test the math for adjusting block_size above 1086 | // assert((WikiIterator_length(&iterator) + 1)/block_size <= buffer_size); 1087 | 1088 | // now that the two internal buffers have been created, it's time to merge each A+B combination at this level of the merge sort! 1089 | WikiIterator_begin(&iterator); 1090 | 1091 | while (!WikiIterator_finished(&iterator)) 1092 | { 1093 | A = WikiIterator_nextRange(&iterator); 1094 | B = WikiIterator_nextRange(&iterator); 1095 | 1096 | // remove any parts of A or B that are being used by the internal buffers 1097 | start = A.start; 1098 | 1099 | if (start == pull[0].range.start) 1100 | { 1101 | if (pull[0].from > pull[0].to) 1102 | { 1103 | A.start += pull[0].count; 1104 | 1105 | // if the internal buffer takes up the entire A or B subarray, then there's nothing to merge 1106 | // this only happens for very small subarrays, like √4 = 2, 2 * (2 internal buffers) = 4, 1107 | // which also only happens when cache_size is small or 1 since it'd otherwise use MergeExternal 1108 | if (range_length(A) == 0) 1109 | continue; 1110 | } 1111 | else if (pull[0].from < pull[0].to) 1112 | { 1113 | B.end -= pull[0].count; 1114 | if (range_length(B) == 0) 1115 | continue; 1116 | } 1117 | } 1118 | 1119 | if (start == pull[1].range.start) 1120 | { 1121 | if (pull[1].from > pull[1].to) 1122 | { 1123 | A.start += pull[1].count; 1124 | if (range_length(A) == 0) 1125 | continue; 1126 | } 1127 | else if (pull[1].from < pull[1].to) 1128 | { 1129 | B.end -= pull[1].count; 1130 | if (range_length(B) == 0) 1131 | continue; 1132 | } 1133 | } 1134 | 1135 | if (cmp(&array[A.end - 1], &array[B.start]) > 0) // A and B are not in order, so merge them 1136 | { 1137 | Range blockA, firstA, lastA, lastB, blockB; 1138 | size_t indexA, findA; 1139 | 1140 | // break the remainder of A into blocks. firstA is the uneven-sized first A block 1141 | blockA = new_range(A.start, A.end); 1142 | firstA = new_range(A.start, A.start + range_length(blockA) % block_size); 1143 | 1144 | // swap the first value of each A block with the value in buffer1 1145 | for (indexA = buffer1.start, index = firstA.end; index < blockA.end; indexA++, index += block_size) 1146 | { 1147 | SWAP(array[indexA], array[index]); 1148 | } 1149 | 1150 | // start rolling the A blocks through the B blocks! 1151 | // whenever we leave an A block behind, we'll need to merge the previous A block with any B blocks that follow it, so track that information as well 1152 | lastA = firstA; 1153 | lastB = new_range(0, 0); 1154 | blockB = new_range(B.start, B.start + Min(block_size, range_length(B))); 1155 | blockA.start += range_length(firstA); 1156 | indexA = buffer1.start; 1157 | 1158 | // if the first unevenly sized A block fits into the cache, copy it there for when we go to Merge it 1159 | // otherwise, if the second buffer is available, block swap the contents into that 1160 | if (range_length(lastA) <= cache_size) 1161 | { 1162 | memcpy(&cache[0], &array[lastA.start], range_length(lastA) * sizeof(VAR)); 1163 | } 1164 | else if (range_length(buffer2) > 0) 1165 | { 1166 | FUNC(forward_block_swap)(array, lastA.start, buffer2.start, range_length(lastA)); 1167 | } 1168 | 1169 | if (range_length(blockA) > 0) 1170 | { 1171 | while (1) 1172 | { 1173 | // if there's a previous B block and the first value of the minimum A block is <= the last value of the previous B block, 1174 | // then drop that minimum A block behind. or if there are no B blocks left then keep dropping the remaining A blocks. 1175 | 1176 | if (range_length(blockB) == 0 || (range_length(lastB) > 0 && cmp(&array[indexA], &array[lastB.end - 1]) <= 0)) 1177 | { 1178 | // figure out where to split the previous B block, and rotate it at the split 1179 | size_t B_split = FUNC(monobound_binary_first)(array, array[indexA], lastB, cmp); 1180 | size_t B_remaining = lastB.end - B_split; 1181 | 1182 | // swap the minimum A block to the beginning of the rolling A blocks 1183 | size_t minA = blockA.start; 1184 | for (findA = minA + block_size; findA < blockA.end; findA += block_size) 1185 | { 1186 | if (cmp(&array[minA], &array[findA]) > 0) 1187 | { 1188 | minA = findA; 1189 | } 1190 | } 1191 | FUNC(forward_block_swap)(array, blockA.start, minA, block_size); 1192 | 1193 | // swap the first item of the previous A block back with its original value, which is stored in buffer1 1194 | SWAP(array[blockA.start], array[indexA]); 1195 | indexA++; 1196 | 1197 | // locally merge the previous A block with the B values that follow it if lastA fits into the external cache 1198 | // we'll use that (with MergeExternal), or if the second internal buffer exists we'll use that (with MergeInternal), 1199 | // or failing that we'll use a strictly in-place merge algorithm (MergeInPlace) 1200 | 1201 | if (range_length(lastA) <= cache_size) 1202 | { 1203 | FUNC(MergeExternal)(array, lastA, new_range(lastA.end, B_split), cmp, cache); 1204 | } 1205 | else if (range_length(buffer2) > 0) 1206 | { 1207 | FUNC(MergeInternal)(array, lastA, new_range(lastA.end, B_split), cmp, buffer2); 1208 | } 1209 | else 1210 | { 1211 | FUNC(MergeInPlace)(array, lastA, new_range(lastA.end, B_split), cmp, cache, cache_size); 1212 | } 1213 | 1214 | if (range_length(buffer2) > 0 || block_size <= cache_size) 1215 | { 1216 | // copy the previous A block into the cache or buffer2, since that's where we need it to be when we go to merge it anyway 1217 | 1218 | if (block_size <= cache_size) 1219 | { 1220 | memcpy(&cache[0], &array[blockA.start], block_size * sizeof(VAR)); 1221 | } 1222 | else 1223 | { 1224 | FUNC(forward_block_swap)(array, blockA.start, buffer2.start, block_size); 1225 | } 1226 | 1227 | // this is equivalent to rotating, but faster 1228 | // the area normally taken up by the A block is either the contents of buffer2, or data we don't need anymore since we memcopied it 1229 | // either way, we don't need to retain the order of those items, so instead of rotating we can just block swap B to where it belongs 1230 | FUNC(forward_block_swap)(array, B_split, blockA.start + block_size - B_remaining, B_remaining); 1231 | } 1232 | else 1233 | { 1234 | // we are unable to use the 'buffer2' trick to speed up the rotation operation since buffer2 doesn't exist, so perform a normal rotation 1235 | FUNC(Rotate)(array, blockA.start - B_split, new_range(B_split, blockA.start + block_size)); 1236 | } 1237 | 1238 | // update the range for the remaining A blocks, and the range remaining from the B block after it was split 1239 | lastA = new_range(blockA.start - B_remaining, blockA.start - B_remaining + block_size); 1240 | lastB = new_range(lastA.end, lastA.end + B_remaining); 1241 | 1242 | // if there are no more A blocks remaining, this step is finished! 1243 | blockA.start += block_size; 1244 | if (range_length(blockA) == 0) 1245 | { 1246 | break; 1247 | } 1248 | 1249 | } 1250 | else if (range_length(blockB) < block_size) 1251 | { 1252 | // move the last B block, which is unevenly sized, to before the remaining A blocks, by using a rotation 1253 | // the cache is disabled here since it might contain the contents of the previous A block 1254 | FUNC(Rotate)(array, blockB.start - blockA.start, new_range(blockA.start, blockB.end)); 1255 | 1256 | lastB = new_range(blockA.start, blockA.start + range_length(blockB)); 1257 | blockA.start += range_length(blockB); 1258 | blockA.end += range_length(blockB); 1259 | blockB.end = blockB.start; 1260 | } 1261 | else 1262 | { 1263 | // roll the leftmost A block to the end by swapping it with the next B block 1264 | FUNC(forward_block_swap)(array, blockA.start, blockB.start, block_size); 1265 | lastB = new_range(blockA.start, blockA.start + block_size); 1266 | 1267 | blockA.start += block_size; 1268 | blockA.end += block_size; 1269 | blockB.start += block_size; 1270 | 1271 | if (blockB.end > B.end - block_size) 1272 | { 1273 | blockB.end = B.end; 1274 | } 1275 | else 1276 | { 1277 | blockB.end += block_size; 1278 | } 1279 | } 1280 | } 1281 | } 1282 | 1283 | // merge the last A block with the remaining B values 1284 | if (range_length(lastA) <= cache_size) 1285 | { 1286 | FUNC(MergeExternal)(array, lastA, new_range(lastA.end, B.end), cmp, cache); 1287 | } 1288 | else if (range_length(buffer2) > 0) 1289 | { 1290 | FUNC(MergeInternal)(array, lastA, new_range(lastA.end, B.end), cmp, buffer2); 1291 | } 1292 | else 1293 | { 1294 | FUNC(MergeInPlace)(array, lastA, new_range(lastA.end, B.end), cmp, cache, cache_size); 1295 | } 1296 | } 1297 | } 1298 | 1299 | // when we're finished with this merge step we should have the one or two internal buffers left over, where the second buffer is all jumbled up 1300 | // insertion sort the second buffer, then redistribute the buffers back into the array using the opposite process used for creating the buffer 1301 | 1302 | // While an unstable sort like quicksort could be applied here, in benchmarks it was consistently slightly slower than a simple insertion sort, 1303 | // even for tens of millions of items. this may be because insertion sort is quite fast when the data is already somewhat sorted, like it is here 1304 | 1305 | FUNC(monobound_sort)(array, buffer2, cmp); 1306 | 1307 | for (pull_index = 0; pull_index < 2; pull_index++) 1308 | { 1309 | size_t amount, unique = pull[pull_index].count * 2; 1310 | if (pull[pull_index].from > pull[pull_index].to) 1311 | { 1312 | // the values were pulled out to the left, so redistribute them back to the right 1313 | Range buffer = new_range(pull[pull_index].range.start, pull[pull_index].range.start + pull[pull_index].count); 1314 | while (range_length(buffer) > 0) 1315 | { 1316 | index = FUNC(FindFirstForward)(array, array[buffer.start], new_range(buffer.end, pull[pull_index].range.end), cmp, unique); 1317 | amount = index - buffer.end; 1318 | FUNC(Rotate)(array, range_length(buffer), new_range(buffer.start, index)); 1319 | buffer.start += (amount + 1); 1320 | buffer.end += amount; 1321 | unique -= 2; 1322 | } 1323 | } 1324 | else if (pull[pull_index].from < pull[pull_index].to) 1325 | { 1326 | // the values were pulled out to the right, so redistribute them back to the left 1327 | Range buffer = new_range(pull[pull_index].range.end - pull[pull_index].count, pull[pull_index].range.end); 1328 | while (range_length(buffer) > 0) 1329 | { 1330 | index = FUNC(FindLastBackward)(array, array[buffer.end - 1], new_range(pull[pull_index].range.start, buffer.start), cmp, unique); 1331 | amount = buffer.start - index; 1332 | FUNC(Rotate)(array, amount, new_range(index, buffer.end)); 1333 | buffer.start -= amount; 1334 | buffer.end -= (amount + 1); 1335 | unique -= 2; 1336 | } 1337 | } 1338 | } 1339 | } 1340 | 1341 | // double the size of each A and B subarray that will be merged in the next level 1342 | 1343 | if (!WikiIterator_nextLevel(&iterator)) 1344 | { 1345 | break; 1346 | } 1347 | } 1348 | 1349 | End: 1350 | 1351 | #if DYNAMIC_CACHE 1352 | 1353 | if (cache != external_cache) 1354 | { 1355 | free(cache); 1356 | } 1357 | 1358 | #endif 1359 | 1360 | return; 1361 | } 1362 | -------------------------------------------------------------------------------- /octosort.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (C) 2014-2021 Igor van den Hoven ivdhoven@gmail.com 3 | */ 4 | 5 | /* 6 | Permission is hereby granted, free of charge, to any person obtaining 7 | a copy of this software and associated documentation files (the 8 | "Software"), to deal in the Software without restriction, including 9 | without limitation the rights to use, copy, modify, merge, publish, 10 | distribute, sublicense, and/or sell copies of the Software, and to 11 | permit persons to whom the Software is furnished to do so, subject to 12 | the following conditions: 13 | 14 | The above copyright notice and this permission notice shall be 15 | included in all copies or substantial portions of the Software. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 18 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 19 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 20 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 21 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 22 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 23 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 24 | */ 25 | 26 | /* 27 | octosort 1.0 28 | */ 29 | 30 | /* 31 | octosort is based on WikiSort and quadsort 32 | 33 | WikiSort: https://github.com/BonzaiThePenguin/WikiSort 34 | quadsort: https://github.com/scandum/quadsort 35 | searches: https://github.com/scandum/binary_search 36 | */ 37 | 38 | #ifndef OCTOSORT_H 39 | #define OCTOSORT_H 40 | 41 | #include 42 | #include 43 | #include 44 | #include 45 | 46 | //#define cmp(a,b) (*(a) > *(b)) 47 | 48 | typedef int CMPFUNC (const void *a, const void *b); 49 | 50 | // Set to 1 to see how it performs when given more memory 51 | 52 | #define DYNAMIC_CACHE 0 53 | 54 | // utilities 55 | 56 | #define SWAP(value1, value2) {swap = value1;value1 = value2;value2 = swap;} 57 | 58 | #define PULL(_to) \ 59 | pull[pull_index].range = new_range(A.start, B.end); \ 60 | pull[pull_index].count = count; \ 61 | pull[pull_index].from = index; \ 62 | pull[pull_index].to = _to 63 | 64 | // not as fast as math.h's sqrt() but it's portable 65 | 66 | size_t monobound_sqrt(const size_t size) 67 | { 68 | size_t bot, mid, top, sum; 69 | 70 | bot = 0; 71 | top = 65536; 72 | 73 | while (top > 1) 74 | { 75 | mid = top / 2; 76 | sum = bot + mid; 77 | 78 | if (sum * sum <= size) 79 | { 80 | bot += mid; 81 | } 82 | top -= mid; 83 | } 84 | return bot; 85 | } 86 | 87 | size_t Min(const size_t a, const size_t b) 88 | { 89 | return a < b ? a : b; 90 | } 91 | 92 | size_t Max(const size_t a, const size_t b) 93 | { 94 | return a > b ? a : b; 95 | } 96 | 97 | // 63 -> 32, 64 -> 64, etc. this comes from Hacker's Delight 98 | 99 | size_t FloorPowerOfTwo (const size_t value) 100 | { 101 | size_t x = value; 102 | x = x | (x >> 1); 103 | x = x | (x >> 2); 104 | x = x | (x >> 4); 105 | x = x | (x >> 8); 106 | x = x | (x >> 16); 107 | #if __LP64__ 108 | x = x | (x >> 32); 109 | #endif 110 | return x - (x >> 1); 111 | } 112 | 113 | // structure to represent ranges within the array 114 | 115 | typedef struct 116 | { 117 | size_t start; 118 | size_t end; 119 | } 120 | Range; 121 | 122 | size_t range_length(Range range) 123 | { 124 | return range.end - range.start; 125 | } 126 | 127 | Range new_range(const size_t start, const size_t end) 128 | { 129 | return (Range) {start, end}; 130 | } 131 | 132 | 133 | // calculate how to scale the index value to the range within the array 134 | // the bottom-up merge sort only operates on values that are powers of two, 135 | // so scale down to that power of two, then use a fraction to scale back again 136 | 137 | typedef struct 138 | { 139 | size_t size; 140 | size_t power_of_two; 141 | size_t numerator; 142 | size_t decimal; 143 | size_t denominator; 144 | size_t decimal_step; 145 | size_t numerator_step; 146 | } 147 | WikiIterator; 148 | 149 | void WikiIterator_begin(WikiIterator *me) 150 | { 151 | me->numerator = me->decimal = 0; 152 | } 153 | 154 | Range WikiIterator_nextRange(WikiIterator *me) 155 | { 156 | size_t start = me->decimal; 157 | 158 | me->decimal += me->decimal_step; 159 | me->numerator += me->numerator_step; 160 | 161 | if (me->numerator >= me->denominator) 162 | { 163 | me->numerator -= me->denominator; 164 | me->decimal++; 165 | } 166 | 167 | return new_range(start, me->decimal); 168 | } 169 | 170 | size_t WikiIterator_finished(WikiIterator *me) 171 | { 172 | return (me->decimal >= me->size); 173 | } 174 | 175 | size_t WikiIterator_nextLevel(WikiIterator *me) 176 | { 177 | me->decimal_step += me->decimal_step; 178 | me->numerator_step += me->numerator_step; 179 | 180 | if (me->numerator_step >= me->denominator) 181 | { 182 | me->numerator_step -= me->denominator; 183 | me->decimal_step++; 184 | } 185 | 186 | return (me->decimal_step < me->size); 187 | } 188 | 189 | size_t WikiIterator_length(WikiIterator *me) 190 | { 191 | return me->decimal_step; 192 | } 193 | 194 | WikiIterator WikiIterator_new(size_t size2, size_t min_level) 195 | { 196 | WikiIterator me; 197 | 198 | me.size = size2; 199 | me.power_of_two = FloorPowerOfTwo(me.size); 200 | me.denominator = me.power_of_two/min_level; 201 | me.numerator_step = me.size % me.denominator; 202 | me.decimal_step = me.size/me.denominator; 203 | 204 | WikiIterator_begin(&me); 205 | 206 | return me; 207 | } 208 | 209 | ////////////////////////////////////////////////////////// 210 | //┌────────────────────────────────────────────────────┐// 211 | //│ █████┐ ██████┐ ██████┐████████┐ │// 212 | //│ ██┌──██┐ ██┌──██┐└─██┌─┘└──██┌──┘ │// 213 | //│ └█████┌┘ ██████┌┘ ██│ ██│ │// 214 | //│ ██┌──██┐ ██┌──██┐ ██│ ██│ │// 215 | //│ └█████┌┘ ██████┌┘██████┐ ██│ │// 216 | //│ └────┘ └─────┘ └─────┘ └─┘ │// 217 | //└────────────────────────────────────────────────────┘// 218 | ////////////////////////////////////////////////////////// 219 | 220 | #undef VAR 221 | #undef FUNC 222 | #undef STRUCT 223 | 224 | #define VAR char 225 | #define FUNC(NAME) NAME##8 226 | #define STRUCT(NAME) struct NAME##8 227 | 228 | #include "octosort.c" 229 | 230 | ////////////////////////////////////////////////////////// 231 | //┌────────────────────────────────────────────────────┐// 232 | //│ ▄██┐ █████┐ ██████┐ ██████┐████████┐│// 233 | //│ ████│ ██┌───┘ ██┌──██┐└─██┌─┘└──██┌──┘│// 234 | //│ └─██│ ██████┐ ██████┌┘ ██│ ██│ │// 235 | //│ ██│ ██┌──██┐ ██┌──██┐ ██│ ██│ │// 236 | //│ ██████┐└█████┌┘ ██████┌┘██████┐ ██│ │// 237 | //│ └─────┘ └────┘ └─────┘ └─────┘ └─┘ │// 238 | //└────────────────────────────────────────────────────┘// 239 | ////////////////////////////////////////////////////////// 240 | 241 | #undef VAR 242 | #undef FUNC 243 | #undef STRUCT 244 | 245 | #define VAR short 246 | #define FUNC(NAME) NAME##16 247 | #define STRUCT(NAME) struct NAME##16 248 | 249 | #include "octosort.c" 250 | 251 | ////////////////////////////////////////////////////////// 252 | // ┌───────────────────────────────────────────────────┐// 253 | // │ ██████┐ ██████┐ ██████┐ ██████┐████████┐ │// 254 | // │ └────██┐└────██┐ ██┌──██┐└─██┌─┘└──██┌──┘ │// 255 | // │ █████┌┘ █████┌┘ ██████┌┘ ██│ ██│ │// 256 | // │ └───██┐██┌───┘ ██┌──██┐ ██│ ██│ │// 257 | // │ ██████┌┘███████┐ ██████┌┘██████┐ ██│ │// 258 | // │ └─────┘ └──────┘ └─────┘ └─────┘ └─┘ │// 259 | // └───────────────────────────────────────────────────┘// 260 | ////////////////////////////////////////////////////////// 261 | 262 | #undef VAR 263 | #undef FUNC 264 | #undef STRUCT 265 | 266 | #define VAR int 267 | #define FUNC(NAME) NAME##32 268 | #define STRUCT(NAME) struct NAME##32 269 | 270 | #include "octosort.c" 271 | 272 | ////////////////////////////////////////////////////////// 273 | // ┌───────────────────────────────────────────────────┐// 274 | // │ █████┐ ██┐ ██┐ ██████┐ ██████┐████████┐ │// 275 | // │ ██┌───┘ ██│ ██│ ██┌──██┐└─██┌─┘└──██┌──┘ │// 276 | // │ ██████┐ ███████│ ██████┌┘ ██│ ██│ │// 277 | // │ ██┌──██┐└────██│ ██┌──██┐ ██│ ██│ │// 278 | // │ └█████┌┘ ██│ ██████┌┘██████┐ ██│ │// 279 | // │ └────┘ └─┘ └─────┘ └─────┘ └─┘ │// 280 | // └───────────────────────────────────────────────────┘// 281 | ////////////////////////////////////////////////////////// 282 | 283 | #undef VAR 284 | #undef FUNC 285 | #undef STRUCT 286 | 287 | #define VAR long long 288 | #define FUNC(NAME) NAME##64 289 | #define STRUCT(NAME) struct NAME##64 290 | 291 | #include "octosort.c" 292 | 293 | ////////////////////////////////////////////////////////// 294 | //┌────────────────────────────────────────────────────┐// 295 | //│ ▄██┐ ██████┐ █████┐ ██████┐ ██████┐████████┐ │// 296 | //│ ████│ └────██┐██┌──██┐ ██┌──██┐└─██┌─┘└──██┌──┘ │// 297 | //│ └─██│ █████┌┘└█████┌┘ ██████┌┘ ██│ ██│ │// 298 | //│ ██│ ██┌───┘ ██┌──██┐ ██┌──██┐ ██│ ██│ │// 299 | //│ ██████┐███████┐└█████┌┘ ██████┌┘██████┐ ██│ │// 300 | //│ └─────┘└──────┘ └────┘ └─────┘ └─────┘ └─┘ │// 301 | //└────────────────────────────────────────────────────┘// 302 | ////////////////////////////////////////////////////////// 303 | 304 | #undef VAR 305 | #undef FUNC 306 | #undef STRUCT 307 | 308 | #define VAR long double 309 | #define FUNC(NAME) NAME##128 310 | #define STRUCT(NAME) struct NAME##128 311 | 312 | #include "octosort.c" 313 | 314 | 315 | //////////////////////////////////////////////////////////////////////////////// 316 | //┌──────────────────────────────────────────────────────────────────────────┐// 317 | //│ ██████┐ ██████┐████████┐ ██████┐ ███████┐ ██████┐ ██████┐ ████████┐ │// 318 | //│ ██┌───██┐██┌────┘└──██┌──┘██┌───██┐██┌────┘██┌───██┐██┌──██┐└──██┌──┘ │// 319 | //│ ██│ ██│██│ ██│ ██│ ██│███████┐██│ ██│██████┌┘ ██│ │// 320 | //│ ██│ ██│██│ ██│ ██│ ██│└────██│██│ ██│██┌──██┐ ██│ │// 321 | //│ └██████┌┘└██████┐ ██│ └██████┌┘███████│└██████┌┘██│ ██│ ██│ │// 322 | //│ └─────┘ └─────┘ └─┘ └─────┘ └──────┘ └─────┘ └─┘ └─┘ └─┘ │// 323 | //└──────────────────────────────────────────────────────────────────────────┘// 324 | //////////////////////////////////////////////////////////////////////////////// 325 | 326 | void octosort(void *array, size_t nmemb, size_t size, CMPFUNC *cmp) 327 | { 328 | if (nmemb < 2) 329 | { 330 | return; 331 | } 332 | 333 | switch (size) 334 | { 335 | case sizeof(char): 336 | return octosort8(array, nmemb, NULL, 0, cmp); 337 | 338 | case sizeof(short): 339 | return octosort16(array, nmemb, NULL, 0, cmp); 340 | 341 | case sizeof(int): 342 | return octosort32(array, nmemb, NULL, 0, cmp); 343 | 344 | case sizeof(long long): 345 | return octosort64(array, nmemb, NULL, 0, cmp); 346 | 347 | case sizeof(long double): 348 | return octosort128(array, nmemb, NULL, 0, cmp); 349 | 350 | default: 351 | return assert(size == sizeof(char) || size == sizeof(short) || size == sizeof(int) || size == sizeof(long long) || size == sizeof(long double)); 352 | } 353 | } 354 | 355 | #undef DYNAMIC_CACHE 356 | #undef PULL 357 | #undef SWAP 358 | 359 | #undef VAR 360 | #undef FUNC 361 | #undef STRUCT 362 | 363 | #endif 364 | --------------------------------------------------------------------------------