├── README.md
├── bench.c
├── images
├── graph1.png
└── graph2.png
├── octosort.c
└── octosort.h
/README.md:
--------------------------------------------------------------------------------
1 | Origin
2 | ------
3 | Octosort is a block merge sort based on [WikiSort](https://github.com/BonzaiThePenguin/WikiSort) and [quadsort](https://github.com/scandum/quadsort). This document primarily lists notable differences and some benchmarks.
4 |
5 | Octo swap
6 | ---------
7 | Like quadsort has the quad swap, octosort has the octo swap. The swap sorts between 4 and 8 elements at a time and performs runs on reverse ordered data.
8 |
9 | Monobound binary search
10 | -----------------------
11 | WikiSort's binary search has been replaced with a [monobound binary search](https://github.com/scandum/binary_search), which is up to two times faster.
12 |
13 | Gries-Mills rotation
14 | --------------------
15 | WikiSort's triple reversal rotation has been replaced with a Gries-Mills rotation, which is up to two times faster.
16 |
17 | Quad merge
18 | ----------
19 | WikiSort already implemented a quad merge, which has been updated to no longer detect reverse order runs, since that's taken care off by the octo swap.
20 |
21 | Tail merge
22 | ----------
23 | Quadsort's tail merge routine was added to perform partially in-place merges.
24 |
25 | Data Types
26 | ----------
27 | Support was added for long doubles and 8, 16, 32, and 64 bit data types. By using 32 or 64 bit pointers it's possible to sort any other data type.
28 |
29 | Interface
30 | ---------
31 | The interface was changed to use the same one as qsort, which is described in [man qsort](https://man7.org/linux/man-pages/man3/qsort.3p.html).
32 |
33 | Memory
34 | ------
35 | By default octosort uses 512 elements worth of stack memory.
36 |
37 | The minimum memory requirement for octosort is 1 element of stack memory, it can be configured to use n / 2 memory.
38 |
39 | Big O
40 | -----
41 | ```cobol
42 | ┌───────────────────────┐┌───────────────────────┐
43 | │comparisons ││swap memory │
44 | ┌───────────────┐├───────┬───────┬───────┤├───────┬───────┬───────┤┌──────┐┌─────────┐┌─────────┐
45 | │name ││min │avg │max ││min │avg │max ││stable││partition││adaptive │
46 | ├───────────────┤├───────┼───────┼───────┤├───────┼───────┼───────┤├──────┤├─────────┤├─────────┤
47 | │mergesort ││n log n│n log n│n log n││n │n │n ││yes ││no ││no │
48 | ├───────────────┤├───────┼───────┼───────┤├───────┼───────┼───────┤├──────┤├─────────┤├─────────┤
49 | │octosort ││n │n log n│n log n││1 │1 │1 ││yes ││no ││yes │
50 | ├───────────────┤├───────┼───────┼───────┤├───────┼───────┼───────┤├──────┤├─────────┤├─────────┤
51 | │quicksort ││n │n log n│n² ││1 │1 │1 ││no ││yes ││no │
52 | └───────────────┘└───────┴───────┴───────┘└───────┴───────┴───────┘└──────┘└─────────┘└─────────┘
53 | ```
54 |
55 | Benchmarks
56 | ----------
57 | The following benchmark was on WSL 2 gcc version 7.5.0 (Ubuntu 7.5.0-3ubuntu1~18.04).
58 | The source code was compiled using gcc -O3 bench.c. Each test was ran 100 times
59 | and only the best run is reported. It's generated by running the benchmark using
60 | 100000 100 as the argument.
61 |
62 | 
63 |
64 | data table
65 |
66 | | Name | Items | Type | Best | Average | Compares | Samples | Distribution |
67 | | --------- | -------- | ---- | -------- | -------- | --------- | ------- | ---------------- |
68 | | qsort | 100000 | 32 | 0.008508 | 0.008779 | 1536367 | 100 | random order |
69 | | octosort | 100000 | 32 | 0.008792 | 0.008889 | 1800800 | 100 | random order |
70 | | | | | | | | | |
71 | | qsort | 100000 | 32 | 0.002024 | 0.002225 | 815024 | 100 | ascending order |
72 | | octosort | 100000 | 32 | 0.000328 | 0.000345 | 116524 | 100 | ascending order |
73 | | | | | | | | | |
74 | | qsort | 100000 | 32 | 0.002831 | 0.003088 | 915020 | 100 | ascending saw |
75 | | octosort | 100000 | 32 | 0.001537 | 0.001565 | 370372 | 100 | ascending saw |
76 | | | | | | | | | |
77 | | qsort | 100000 | 32 | 0.006426 | 0.006722 | 1531997 | 100 | generic order |
78 | | octosort | 100000 | 32 | 0.006437 | 0.006515 | 1633855 | 100 | generic order |
79 | | | | | | | | | |
80 | | qsort | 100000 | 32 | 0.002456 | 0.002657 | 853904 | 100 | descending order |
81 | | octosort | 100000 | 32 | 0.000221 | 0.000227 | 99999 | 100 | descending order |
82 | | | | | | | | | |
83 | | qsort | 100000 | 32 | 0.002832 | 0.003001 | 1063907 | 100 | descending saw |
84 | | octosort | 100000 | 32 | 0.001738 | 0.001849 | 693171 | 100 | descending saw |
85 | | | | | | | | | |
86 | | qsort | 100000 | 32 | 0.003744 | 0.003939 | 1012256 | 100 | random tail |
87 | | octosort | 100000 | 32 | 0.002684 | 0.002740 | 630603 | 100 | random tail |
88 | | | | | | | | | |
89 | | qsort | 100000 | 32 | 0.005464 | 0.005732 | 1200738 | 100 | random half |
90 | | octosort | 100000 | 32 | 0.004859 | 0.004911 | 1022394 | 100 | random half |
91 | | | | | | | | | |
92 | | qsort | 100000 | 32 | 0.004147 | 0.004685 | 1209200 | 100 | ascending tiles |
93 | | octosort | 100000 | 32 | 0.003146 | 0.003437 | 790377 | 100 | ascending tiles |
94 |
95 |
96 |
97 |
98 | The following benchmark was generated using 1000000 0 0 as the argument.
99 |
100 | 
101 |
102 | data table
103 |
104 | | Name | Items | Type | Best | Average | Compares | Samples | Distribution |
105 | | --------- | -------- | ---- | -------- | -------- | --------- | ------- | ---------------- |
106 | | qsort | 4 | 32 | 0.001369 | 0.001439 | 5 | 100 | random 4 |
107 | | octosort | 4 | 32 | 0.000765 | 0.000776 | 6 | 100 | random 4 |
108 | | | | | | | | | |
109 | | qsort | 8 | 32 | 0.001511 | 0.001555 | 17 | 100 | random 8 |
110 | | octosort | 8 | 32 | 0.000893 | 0.000939 | 19 | 100 | random 8 |
111 | | | | | | | | | |
112 | | qsort | 16 | 32 | 0.001587 | 0.001952 | 46 | 100 | random 16 |
113 | | octosort | 16 | 32 | 0.001221 | 0.001281 | 55 | 100 | random 16 |
114 | | | | | | | | | |
115 | | qsort | 32 | 32 | 0.001795 | 0.002612 | 121 | 100 | random 32 |
116 | | octosort | 32 | 32 | 0.001319 | 0.001602 | 124 | 100 | random 32 |
117 | | | | | | | | | |
118 | | qsort | 64 | 32 | 0.002037 | 0.003018 | 309 | 100 | random 64 |
119 | | octosort | 64 | 32 | 0.001492 | 0.002195 | 319 | 100 | random 64 |
120 | | | | | | | | | |
121 | | qsort | 128 | 32 | 0.002304 | 0.003754 | 745 | 100 | random 128 |
122 | | octosort | 128 | 32 | 0.001674 | 0.003189 | 775 | 100 | random 128 |
123 | | | | | | | | | |
124 | | qsort | 256 | 32 | 0.003293 | 0.005024 | 1738 | 100 | random 256 |
125 | | octosort | 256 | 32 | 0.001909 | 0.003613 | 1806 | 100 | random 256 |
126 | | | | | | | | | |
127 | | qsort | 512 | 32 | 0.005293 | 0.006220 | 3968 | 100 | random 512 |
128 | | octosort | 512 | 32 | 0.003113 | 0.005086 | 4112 | 100 | random 512 |
129 | | | | | | | | | |
130 | | qsort | 1024 | 32 | 0.006530 | 0.007128 | 8962 | 100 | random 1024 |
131 | | octosort | 1024 | 32 | 0.005290 | 0.006494 | 10031 | 100 | random 1024 |
132 | | | | | | | | | |
133 | | qsort | 2048 | 32 | 0.007341 | 0.007810 | 19962 | 100 | random 2048 |
134 | | octosort | 2048 | 32 | 0.006943 | 0.007444 | 22885 | 100 | random 2048 |
135 | | | | | | | | | |
136 | | qsort | 4096 | 32 | 0.008086 | 0.008499 | 43966 | 100 | random 4096 |
137 | | octosort | 4096 | 32 | 0.008295 | 0.008441 | 51035 | 100 | random 4096 |
138 | | | | | | | | | |
139 | | qsort | 8192 | 32 | 0.008740 | 0.009142 | 96149 | 100 | random 8192 |
140 | | octosort | 8192 | 32 | 0.009122 | 0.009198 | 112238 | 100 | random 8192 |
141 | | | | | | | | | |
142 | | qsort | 16384 | 32 | 0.009405 | 0.009830 | 208702 | 100 | random 16384 |
143 | | octosort | 16384 | 32 | 0.009827 | 0.009949 | 244511 | 100 | random 16384 |
144 | | | | | | | | | |
145 | | qsort | 32768 | 32 | 0.010039 | 0.010421 | 450105 | 100 | random 32768 |
146 | | octosort | 32768 | 32 | 0.010525 | 0.010680 | 529041 | 100 | random 32768 |
147 | | | | | | | | | |
148 | | qsort | 65536 | 32 | 0.010708 | 0.011123 | 965773 | 100 | random 65536 |
149 | | octosort | 65536 | 32 | 0.011250 | 0.011431 | 1138363 | 100 | random 65536 |
150 | | | | | | | | | |
151 | | qsort | 131072 | 32 | 0.011316 | 0.011698 | 2062601 | 100 | random 131072 |
152 | | octosort | 131072 | 32 | 0.011982 | 0.012159 | 2437514 | 100 | random 131072 |
153 |
154 |
155 |
--------------------------------------------------------------------------------
/bench.c:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright (C) 2014-2021 Igor van den Hoven ivdhoven@gmail.com
3 | */
4 |
5 | /*
6 | Permission is hereby granted, free of charge, to any person obtaining
7 | a copy of this software and associated documentation files (the
8 | "Software"), to deal in the Software without restriction, including
9 | without limitation the rights to use, copy, modify, merge, publish,
10 | distribute, sublicense, and/or sell copies of the Software, and to
11 | permit persons to whom the Software is furnished to do so, subject to
12 | the following conditions:
13 |
14 | The above copyright notice and this permission notice shall be
15 | included in all copies or substantial portions of the Software.
16 |
17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
20 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
21 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
22 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
23 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 | */
25 |
26 | /*
27 | To compile use:
28 |
29 | gcc -O3 bench.c
30 |
31 | or
32 |
33 | g++ -O3 -w -fpermissive bench.c
34 | */
35 |
36 | #include
37 | #include
38 | #include
39 | #include
40 | #include
41 | #include
42 | #include
43 |
44 | #include "octosort.h"
45 |
46 | //#define cmp(a,b) (*(a) > *(b))
47 |
48 | //typedef int CMPFUNC (const void *a, const void *b);
49 |
50 | typedef void SRTFUNC(void *array, size_t nmemb, size_t size, CMPFUNC *cmpf);
51 |
52 |
53 | // Must prevent inlining so the benchmark is fair against qsort.
54 |
55 | // Remove __attribute__ ((noinline)) and comparisons++ for full throttle.
56 |
57 | size_t comparisons;
58 |
59 | __attribute__ ((noinline)) int cmp_int(const void * a, const void * b)
60 | {
61 | const int fa = *(const int *) a;
62 | const int fb = *(const int *) b;
63 |
64 | comparisons++;
65 |
66 | return fa - fb;
67 | }
68 |
69 | __attribute__ ((noinline)) int cmp_stable(const void * a, const void * b)
70 | {
71 | comparisons++;
72 |
73 | return *(int *) a / 100000 - *(int *) b / 100000;
74 | }
75 |
76 | __attribute__ ((noinline)) int cmp_long(const void * a, const void * b)
77 | {
78 | comparisons++;
79 |
80 | return (*(long long *) a > *(long long *) b) - (*(long long *) a < *(long long *) b);
81 |
82 | // return *(long long *) a > *(long long *) b;
83 | }
84 |
85 | __attribute__ ((noinline)) int cmp_long_double(const void * a, const void * b)
86 | {
87 | const long double fa = *(const long double *) a;
88 | const long double fb = *(const long double *) b;
89 |
90 | comparisons++;
91 |
92 | if (isnan(fa) || isnan(fb))
93 | {
94 | return isnan(fa) - isnan(fb);
95 | }
96 | return ((fa > fb) - (fa < fb));
97 | }
98 |
99 |
100 | int cmp_str(const void * a, const void * b)
101 | {
102 | return strcmp(*(const char **) a, *(const char **) b);
103 | }
104 |
105 | int cmp_float(const void * a, const void * b)
106 | {
107 | return *(float *) a - *(float *) b;
108 | }
109 |
110 |
111 | long long utime()
112 | {
113 | struct timeval now_time;
114 |
115 | gettimeofday(&now_time, NULL);
116 |
117 | return now_time.tv_sec * 1000000LL + now_time.tv_usec;
118 | }
119 |
120 | void seed_rand(unsigned long long seed)
121 | {
122 | srand(seed);
123 | }
124 |
125 | void test_sort(void *array, void *unsorted, void *valid, int minimum, int maximum, int samples, int repetitions, SRTFUNC *srt, const char *name, char *desc, size_t size, CMPFUNC *cmpf)
126 | {
127 | long long start, end, total, best, average;
128 | size_t rep, sam, max;
129 | long long *ptla = (long long *) array, *ptlv = valid;
130 | long double *ptda = (long double *) array, *ptdv = valid;
131 | int *pta = (int *) array, *ptv = (int *) valid, cnt;
132 |
133 | if (*name == '*')
134 | {
135 | if (!strcmp(desc, "random order") || !strcmp(desc, "random 1-4") || !strcmp(desc, "random 4"))
136 | {
137 | if (comparisons)
138 | {
139 | printf("%s\n", "| Name | Items | Type | Best | Average | Compares | Samples | Distribution |");
140 | printf("%s\n", "| --------- | -------- | ---- | -------- | -------- | --------- | ------- | ---------------- |");
141 | }
142 | else
143 | {
144 | printf("%s\n", "| Name | Items | Type | Best | Average | Loops | Samples | Distribution |");
145 | printf("%s\n", "| --------- | -------- | ---- | -------- | -------- | --------- | ------- | ---------------- |");
146 | }
147 | }
148 | else
149 | {
150 | printf("%s\n", "| | | | | | | | |");
151 | }
152 | return;
153 | }
154 |
155 | best = average = 0;
156 |
157 | if (minimum == 7 && maximum == 7)
158 | {
159 | printf("\e[1;32m%10d %10d %10d %10d %10d %10d %10d\e[0m\n", pta[0], pta[1], pta[2], pta[3], pta[4], pta[5], pta[6]);
160 | }
161 |
162 | for (sam = 0 ; sam < samples ; sam++)
163 | {
164 | total = 0;
165 |
166 | max = minimum;
167 |
168 | if (repetitions > 1)
169 | {
170 | start = utime();
171 |
172 | for (rep = 0 ; rep < repetitions ; rep++)
173 | {
174 | memcpy(array, unsorted, max * size);
175 |
176 | comparisons = 0;
177 |
178 | switch (*name)
179 | {
180 | case 'q':
181 | qsort(array, max, size, cmpf);
182 | break;
183 |
184 | case 'o':
185 | octosort(array, max, size, cmpf);
186 | break;
187 | }
188 |
189 | if (minimum < maximum && ++max > maximum)
190 | {
191 | max = minimum;
192 | }
193 | }
194 | end = utime();
195 | }
196 | else
197 | {
198 | memcpy(array, unsorted, max * size);
199 |
200 | comparisons = 0;
201 |
202 | start = utime();
203 |
204 | switch (*name)
205 | {
206 | case 'q':
207 | qsort(array, max, size, cmpf);
208 | break;
209 | case 'o':
210 | octosort(array, max, size, cmpf);
211 | break;
212 | }
213 | end = utime();
214 | }
215 |
216 | total = end - start;
217 |
218 | if (!best || total < best)
219 | {
220 | best = total;
221 | }
222 | average += total;
223 | }
224 |
225 | if (minimum == 7 && maximum == 7)
226 | {
227 | printf("\e[1;32m%10d %10d %10d %10d %10d %10d %10d\e[0m\n", pta[0], pta[1], pta[2], pta[3], pta[4], pta[5], pta[6]);
228 | }
229 |
230 | if (repetitions == 0)
231 | {
232 | return;
233 | }
234 |
235 | average /= samples;
236 |
237 | if (cmpf == cmp_stable)
238 | {
239 | for (cnt = 1 ; cnt < maximum ; cnt++)
240 | {
241 | if (pta[cnt - 1] > pta[cnt])
242 | {
243 | sprintf(desc, "\e[1;31m%16s\e[0m", "unstable");
244 |
245 | break;
246 | }
247 | }
248 | }
249 |
250 | if (comparisons)
251 | {
252 | printf("|%10s | %8d | %4d | %f | %f | %9d | %7d | %16s |\n", name, maximum, (int) size * 8, best / 1000000.0, average / 1000000.0, (int) comparisons, samples, desc);
253 | }
254 | else
255 | {
256 | printf("|%10s | %8d | %4d | %f | %f | %9d | %7d | %16s |\n", name, maximum, (int) size * 8, best / 1000000.0, average / 1000000.0, repetitions, samples, desc);
257 | }
258 |
259 | if (minimum != maximum || cmpf == cmp_stable)
260 | {
261 | return;
262 | }
263 |
264 | for (cnt = 1 ; cnt < maximum ; cnt++)
265 | {
266 | if (size == sizeof(int))
267 | {
268 | if (pta[cnt - 1] > pta[cnt])
269 | {
270 | printf("%17s: not properly sorted at index %d. (%d vs %d\n", name, cnt, pta[cnt - 1], pta[cnt]);
271 | break;
272 | }
273 | if (pta[cnt - 1] == pta[cnt])
274 | {
275 | // printf("%17s: Found a repeat value at index %d. (%d)\n", name, cnt, pta[cnt]);
276 | }
277 | }
278 | else if (size == sizeof(long long))
279 | {
280 | if (ptla[cnt - 1] > ptla[cnt])
281 | {
282 | printf("%17s: not properly sorted at index %d. (%lld vs %lld\n", name, cnt, ptla[cnt - 1], ptla[cnt]);
283 | break;
284 | }
285 | }
286 | else if (size == sizeof(long double))
287 | {
288 | if (cmp_long_double(&ptda[cnt - 1], &ptda[cnt]) > 0)
289 | {
290 | printf("%17s: not properly sorted at index %d. (%Lf vs %Lf\n", name, cnt, ptda[cnt - 1], ptda[cnt]);
291 | break;
292 | }
293 | }
294 | }
295 |
296 | for (cnt = 1 ; cnt < maximum ; cnt++)
297 | {
298 | if (size == sizeof(int))
299 | {
300 | if (pta[cnt] != ptv[cnt])
301 | {
302 | printf(" validate: array[%d] != valid[%d]. (%d vs %d\n", cnt, cnt, pta[cnt], ptv[cnt]);
303 | break;
304 | }
305 | }
306 | else if (size == sizeof(long long))
307 | {
308 | if (ptla[cnt] != ptlv[cnt])
309 | {
310 | printf(" validate: array[%d] != valid[%d]. (%lld vs %lld\n", cnt, cnt, ptla[cnt], ptlv[cnt]);
311 | break;
312 | }
313 | }
314 | else if (size == sizeof(long double))
315 | {
316 | if (ptda[cnt] != ptdv[cnt])
317 | {
318 | printf(" validate: array[%d] != valid[%d]. (%Lf vs %Lf\n", cnt, cnt, ptda[cnt], ptdv[cnt]);
319 | break;
320 | }
321 | }
322 | }
323 | }
324 |
325 | void validate()
326 | {
327 | int seed = time(NULL);
328 | int cnt, val, max = 2000000;
329 |
330 | int *a_array, *r_array, *v_array;
331 |
332 | seed_rand(seed);
333 |
334 | a_array = (int *) malloc(max * sizeof(int));
335 | r_array = (int *) malloc(max * sizeof(int));
336 | v_array = (int *) malloc(max * sizeof(int));
337 |
338 | for (cnt = 0 ; cnt < max ; cnt++)
339 | {
340 | r_array[cnt] = rand();
341 | }
342 |
343 |
344 | for (cnt = 1 ; cnt < 100 ; cnt++)
345 | {
346 | memcpy(a_array, r_array, max * sizeof(int));
347 | memcpy(v_array, r_array, max * sizeof(int));
348 |
349 | octosort(a_array, cnt, sizeof(int), cmp_int);
350 | qsort(v_array, cnt, sizeof(int), cmp_int);
351 |
352 | for (val = 0 ; val < cnt ; val++)
353 | {
354 | if (val && v_array[val - 1] > v_array[val])
355 | {
356 | printf("\e[1;31mvalidate rand: seed %d: size: %d Not properly sorted at index %d.\n", seed, cnt, val);
357 | return;
358 | }
359 |
360 | if (a_array[val] != v_array[val])
361 | {
362 | printf("\e[1;31mvalidate rand: seed %d: size: %d Not verified at index %d.\n", seed, cnt, val);
363 | return;
364 | }
365 | }
366 | }
367 |
368 | // ascending saw
369 |
370 | for (cnt = 0 ; cnt < 1000 ; cnt++)
371 | {
372 | r_array[cnt] = rand();
373 | }
374 |
375 | octosort(r_array + max / 4 * 0, max / 4, sizeof(int), cmp_int);
376 | octosort(r_array + max / 4 * 1, max / 4, sizeof(int), cmp_int);
377 | octosort(r_array + max / 4 * 2, max / 4, sizeof(int), cmp_int);
378 | octosort(r_array + max / 4 * 3, max / 4, sizeof(int), cmp_int);
379 |
380 | for (cnt = 1 ; cnt < 1000 ; cnt += 7)
381 | {
382 | memcpy(a_array, r_array, max * sizeof(int));
383 | memcpy(v_array, r_array, max * sizeof(int));
384 |
385 | octosort(a_array, cnt, sizeof(int), cmp_int);
386 | qsort(v_array, cnt, sizeof(int), cmp_int);
387 |
388 | for (val = 0 ; val < cnt ; val++)
389 | {
390 | if (val && v_array[val - 1] > v_array[val])
391 | {
392 | printf("\e[1;31mvalidate ascending saw: seed %d: size: %d Not properly sorted at index %d.\n", seed, cnt, val);
393 | return;
394 | }
395 |
396 | if (a_array[val] != v_array[val])
397 | {
398 | printf("\e[1;31mvalidate ascending saw: seed %d: size: %d Not verified at index %d.\n", seed, cnt, val);
399 | return;
400 | }
401 | }
402 | }
403 |
404 | // descending saw
405 |
406 | for (cnt = 0 ; cnt < 1000 ; cnt++)
407 | {
408 | r_array[cnt] = (max - cnt - 1) % 100000;
409 | }
410 |
411 | for (cnt = 1 ; cnt < 1000 ; cnt += 7)
412 | {
413 | memcpy(a_array, r_array, max * sizeof(int));
414 | memcpy(v_array, r_array, max * sizeof(int));
415 |
416 | octosort(a_array, cnt, sizeof(int), cmp_int);
417 | qsort(v_array, cnt, sizeof(int), cmp_int);
418 |
419 | for (val = 0 ; val < cnt ; val++)
420 | {
421 | if (val && v_array[val - 1] > v_array[val])
422 | {
423 | printf("\e[1;31mvalidate descending saw: seed %d: size: %d Not properly sorted at index %d.\n", seed, cnt, val);
424 | return;
425 | }
426 |
427 | if (a_array[val] != v_array[val])
428 | {
429 | printf("\e[1;31mvalidate descending saw: seed %d: size: %d Not verified at index %d.\n", seed, cnt, val);
430 | return;
431 | }
432 | }
433 | }
434 |
435 | // random tail
436 |
437 | for (cnt = 0 ; cnt < max * 3 / 4 ; cnt++)
438 | {
439 | r_array[cnt] = cnt;
440 | }
441 |
442 | for (cnt = max * 3 / 4 ; cnt < max ; cnt++)
443 | {
444 | r_array[cnt] = rand();
445 | }
446 |
447 | for (cnt = 1 ; cnt < 1000 ; cnt += 7)
448 | {
449 | memcpy(a_array, r_array, max * sizeof(int));
450 | memcpy(v_array, r_array, max * sizeof(int));
451 |
452 | octosort(a_array, cnt, sizeof(int), cmp_int);
453 | qsort(v_array, cnt, sizeof(int), cmp_int);
454 |
455 | for (val = 0 ; val < cnt ; val++)
456 | {
457 | if (val && v_array[val - 1] > v_array[val])
458 | {
459 | printf("\e[1;31mvalidate rand tail: seed %d: size: %d Not properly sorted at index %d.\n", seed, cnt, val);
460 | return;
461 | }
462 |
463 | if (a_array[val] != v_array[val])
464 | {
465 | printf("\e[1;31mvalidate rand tail: seed %d: size: %d Not verified at index %d.\n", seed, cnt, val);
466 | return;
467 | }
468 | }
469 | }
470 |
471 | free(a_array);
472 | free(r_array);
473 | free(v_array);
474 | }
475 |
476 |
477 | int main(int argc, char **argv)
478 | {
479 | int max = 100000;
480 | int samples = 10;
481 | int repetitions = 1;
482 | int seed = 0;
483 | int cnt, rnd, lst;
484 | int *a_array, *r_array, *v_array;
485 | long long *la_array, *lr_array, *lv_array;
486 | long double *da_array, *dr_array, *dv_array;
487 |
488 | char dist[40], *sorts[] = { "*", "qsort", "octosort" };
489 |
490 | if (argc >= 1 && argv[1] && *argv[1])
491 | {
492 | max = atoi(argv[1]);
493 | }
494 |
495 | if (argc >= 2 && argv[2] && *argv[2])
496 | {
497 | samples = atoi(argv[2]);
498 | }
499 |
500 | if (argc >= 3 && argv[3] && *argv[3])
501 | {
502 | repetitions = atoi(argv[3]);
503 | }
504 |
505 | if (argc >= 4 && argv[4] && *argv[4])
506 | {
507 | seed = atoi(argv[4]);
508 | }
509 |
510 | validate();
511 |
512 | rnd = seed ? seed : time(NULL);
513 |
514 | a_array = (int *) malloc(max * sizeof(int));
515 | r_array = (int *) malloc(max * sizeof(int));
516 | v_array = (int *) malloc(max * sizeof(int));
517 |
518 | printf("Info: int = %lu, long long = %lu, long double = %lu\n\n", sizeof(int) * 8, sizeof(long long) * 8, sizeof(long double) * 8);
519 |
520 | printf("Benchmark: array size: %d, samples: %d, repetitions: %d, seed: %d\n\n", max, samples, repetitions, rnd);
521 |
522 | if (samples == 0 && repetitions == 0)
523 | {
524 | goto small_range_test;
525 | }
526 |
527 | // 128 bit
528 |
529 | da_array = (long double *) malloc(max * sizeof(long double));
530 | dr_array = (long double *) malloc(max * sizeof(long double));
531 | dv_array = (long double *) malloc(max * sizeof(long double));
532 |
533 | if (da_array == NULL || dr_array == NULL || dv_array == NULL)
534 | {
535 | printf("main(%d,%d,%d): malloc: %s\n", max, samples, repetitions, strerror(errno));
536 |
537 | return 0;
538 | }
539 |
540 | seed_rand(rnd);
541 |
542 | for (cnt = 0 ; cnt < max ; cnt++)
543 | {
544 | dr_array[cnt] = rand() + 1.0 / (long double) (rand() + (rand() << 30LL));
545 | }
546 |
547 | memcpy(dv_array, dr_array, max * sizeof(long double));
548 | qsort(dv_array, max, sizeof(long double), cmp_long_double);
549 |
550 | for (cnt = 0 ; cnt < sizeof(sorts) / sizeof(char *) ; cnt++)
551 | {
552 | test_sort(da_array, dr_array, dv_array, max, max, samples, repetitions, qsort, sorts[cnt], "random order", sizeof(long double), cmp_long_double);
553 | }
554 |
555 | free(da_array);
556 | free(dr_array);
557 | free(dv_array);
558 |
559 | printf("\n");
560 |
561 | // 64 bit
562 |
563 | la_array = (long long *) malloc(max * sizeof(long long));
564 | lr_array = (long long *) malloc(max * sizeof(long long));
565 | lv_array = (long long *) malloc(max * sizeof(long long));
566 |
567 | if (la_array == NULL || lr_array == NULL || lv_array == NULL)
568 | {
569 | printf("main(%d,%d,%d): malloc: %s\n", max, samples, repetitions, strerror(errno));
570 |
571 | return 0;
572 | }
573 |
574 | seed_rand(rnd);
575 |
576 | for (cnt = 0 ; cnt < max ; cnt++)
577 | {
578 | lr_array[cnt] = rand();
579 | lr_array[cnt] += (unsigned long long) rand() << 32ULL;
580 | }
581 |
582 | memcpy(lv_array, lr_array, max * sizeof(long long));
583 | qsort(lv_array, max, sizeof(long long), cmp_long);
584 |
585 | for (cnt = 0 ; cnt < sizeof(sorts) / sizeof(char *) ; cnt++)
586 | {
587 | test_sort(la_array, lr_array, lv_array, max, max, samples, repetitions, qsort, sorts[cnt], "random order", sizeof(long long), cmp_long);
588 | }
589 |
590 | printf("\n");
591 |
592 | free(la_array);
593 | free(lr_array);
594 | free(lv_array);
595 |
596 | // 32 bit
597 |
598 | // random
599 |
600 | seed_rand(rnd);
601 |
602 | for (cnt = 0 ; cnt < max ; cnt++)
603 | {
604 | r_array[cnt] = rand();
605 | }
606 |
607 | memcpy(v_array, r_array, max * sizeof(int));
608 | qsort(v_array, max, sizeof(int), cmp_int);
609 |
610 | for (cnt = 0 ; cnt < sizeof(sorts) / sizeof(char *) ; cnt++)
611 | {
612 | test_sort(a_array, r_array, v_array, max, max, samples, repetitions, qsort, sorts[cnt], "random order", sizeof(int), cmp_int);
613 | }
614 |
615 | // ascending
616 |
617 | for (cnt = 0 ; cnt < max ; cnt++)
618 | {
619 | r_array[cnt] = cnt;
620 | }
621 |
622 | memcpy(v_array, r_array, max * sizeof(int));
623 | memcpy(r_array, v_array, max * sizeof(int));
624 |
625 | qsort(v_array, max, sizeof(int), cmp_int);
626 |
627 | for (cnt = 0 ; cnt < sizeof(sorts) / sizeof(char *) ; cnt++)
628 | {
629 | test_sort(a_array, r_array, v_array, max, max, samples, repetitions, qsort, sorts[cnt], "ascending order", sizeof(int), cmp_int);
630 | }
631 |
632 | // ascending saw
633 |
634 | for (cnt = 0 ; cnt < max ; cnt++)
635 | {
636 | r_array[cnt] = rand();
637 | }
638 |
639 | memcpy(v_array, r_array, max * sizeof(int));
640 | qsort(v_array + max / 4 * 0, max / 4, sizeof(int), cmp_int);
641 | qsort(v_array + max / 4 * 1, max / 4, sizeof(int), cmp_int);
642 | qsort(v_array + max / 4 * 2, max / 4, sizeof(int), cmp_int);
643 | qsort(v_array + max / 4 * 3, max / 4, sizeof(int), cmp_int);
644 | memcpy(r_array, v_array, max * sizeof(int));
645 |
646 | qsort(v_array, max, sizeof(int), cmp_int);
647 |
648 | for (cnt = 0 ; cnt < sizeof(sorts) / sizeof(char *) ; cnt++)
649 | {
650 | test_sort(a_array, r_array, v_array, max, max, samples, repetitions, qsort, sorts[cnt], "ascending saw", sizeof(int), cmp_int);
651 | }
652 |
653 | // generic
654 |
655 | for (cnt = 0 ; cnt < max ; cnt++)
656 | {
657 | r_array[cnt] = rand() % 100;
658 | }
659 |
660 | memcpy(v_array, r_array, max * sizeof(int));
661 | qsort(v_array, max, sizeof(int), cmp_int);
662 |
663 | for (cnt = 0 ; cnt < sizeof(sorts) / sizeof(char *) ; cnt++)
664 | {
665 | test_sort(a_array, r_array, v_array, max, max, samples, repetitions, qsort, sorts[cnt], "generic order", sizeof(int), cmp_int);
666 | }
667 |
668 | // descending
669 |
670 | for (cnt = 0 ; cnt < max ; cnt++)
671 | {
672 | r_array[cnt] = (max - cnt);
673 | }
674 |
675 | memcpy(v_array, r_array, max * sizeof(int));
676 | qsort(v_array, max, sizeof(int), cmp_int);
677 |
678 | for (cnt = 0 ; cnt < sizeof(sorts) / sizeof(char *) ; cnt++)
679 | {
680 | test_sort(a_array, r_array, v_array, max, max, samples, repetitions, qsort, sorts[cnt], "descending order", sizeof(int), cmp_int);
681 | }
682 |
683 | // descending saw
684 |
685 | for (cnt = 0 ; cnt < max ; cnt++)
686 | {
687 | r_array[cnt] = (max - cnt - 1) % 10000;
688 | }
689 |
690 | memcpy(v_array, r_array, max * sizeof(int));
691 | qsort(v_array, max, sizeof(int), cmp_int);
692 |
693 | for (cnt = 0 ; cnt < sizeof(sorts) / sizeof(char *) ; cnt++)
694 | {
695 | test_sort(a_array, r_array, v_array, max, max, samples, repetitions, qsort, sorts[cnt], "descending saw", sizeof(int), cmp_int);
696 | }
697 |
698 | // random tail
699 |
700 | seed_rand(rnd);
701 |
702 | for (cnt = 0 ; cnt < max ; cnt++)
703 | {
704 | r_array[cnt] = rand();
705 | }
706 |
707 | memcpy(v_array, r_array, max * sizeof(int));
708 | qsort(v_array, max * 3 / 4, sizeof(int), cmp_int);
709 | memcpy(r_array, v_array, max * sizeof(int));
710 | qsort(v_array, max, sizeof(int), cmp_int);
711 |
712 | for (cnt = 0 ; cnt < sizeof(sorts) / sizeof(char *) ; cnt++)
713 | {
714 | test_sort(a_array, r_array, v_array, max, max, samples, repetitions, qsort, sorts[cnt], "random tail", sizeof(int), cmp_int);
715 | }
716 |
717 | seed_rand(rnd);
718 |
719 | for (cnt = 0 ; cnt < max ; cnt++)
720 | {
721 | r_array[cnt] = rand();
722 | }
723 |
724 | memcpy(v_array, r_array, max * sizeof(int));
725 | qsort(v_array, max / 2, sizeof(int), cmp_int);
726 |
727 | memcpy(r_array, v_array, max * sizeof(int));
728 | qsort(v_array, max, sizeof(int), cmp_int);
729 |
730 | for (cnt = 0 ; cnt < sizeof(sorts) / sizeof(char *) ; cnt++)
731 | {
732 | test_sort(a_array, r_array, v_array, max, max, samples, repetitions, qsort, sorts[cnt], "random half", sizeof(int), cmp_int);
733 | }
734 |
735 | // tiles
736 |
737 | for (cnt = 0 ; cnt < max ; cnt++)
738 | {
739 | if (cnt % 2 == 0)
740 | {
741 | r_array[cnt] = 16777216 + cnt;
742 | }
743 | else
744 | {
745 | r_array[cnt] = 33554432 + cnt;
746 | }
747 | }
748 |
749 | memcpy(v_array, r_array, max * sizeof(int));
750 | qsort(v_array, max, sizeof(int), cmp_int);
751 |
752 | for (cnt = 0 ; cnt < sizeof(sorts) / sizeof(char *) ; cnt++)
753 | {
754 | strcpy(dist, "ascending tiles");
755 |
756 | test_sort(a_array, r_array, v_array, max, max, samples, repetitions, qsort, sorts[cnt], dist, sizeof(int), cmp_stable);
757 | }
758 |
759 | if (repetitions > 0)
760 | {
761 | goto end;
762 | }
763 |
764 | small_range_test:
765 |
766 | if (max >= 8192)
767 | {
768 | goto large_range_test;
769 | }
770 |
771 | for (lst = 1, samples = 32768, repetitions = 4 ; repetitions <= 4096 ; repetitions *= 2, samples /= 2)
772 | {
773 | if (max >= repetitions)
774 | {
775 | sprintf(dist, "random %d-%d", lst, repetitions);
776 |
777 | srand(rnd);
778 |
779 | for (cnt = 0 ; cnt < repetitions ; cnt++)
780 | {
781 | r_array[cnt] = rand();
782 | }
783 |
784 | memcpy(v_array, r_array, repetitions * sizeof(int));
785 | qsort(v_array, repetitions, sizeof(int), cmp_int);
786 |
787 | for (cnt = 0 ; cnt < sizeof(sorts) / sizeof(char *) ; cnt++)
788 | {
789 | test_sort(a_array, r_array, v_array, lst, repetitions, 100, samples, qsort, sorts[cnt], dist, sizeof(int), cmp_int);
790 | }
791 | lst = repetitions + 1;
792 | }
793 | }
794 |
795 | goto end;
796 |
797 | large_range_test:
798 |
799 | for (samples = 32768, repetitions = 4 ; samples > 0 ; repetitions *= 2, samples /= 2)
800 | {
801 | if (max >= repetitions)
802 | {
803 | srand(rnd);
804 |
805 | for (cnt = 0 ; cnt < repetitions ; cnt++)
806 | {
807 | r_array[cnt] = rand();
808 | }
809 |
810 | memcpy(v_array, r_array, repetitions * sizeof(int));
811 | qsort(v_array, repetitions, sizeof(int), cmp_int);
812 |
813 | sprintf(dist, "random %d", repetitions);
814 |
815 | for (cnt = 0 ; cnt < sizeof(sorts) / sizeof(char *) ; cnt++)
816 | {
817 | test_sort(a_array, r_array, v_array, repetitions, repetitions, 100, samples, qsort, sorts[cnt], dist, sizeof(int), cmp_int);
818 | }
819 | }
820 | }
821 |
822 | end:
823 |
824 |
825 | free(a_array);
826 | free(r_array);
827 | free(v_array);
828 |
829 | return 0;
830 | }
831 |
--------------------------------------------------------------------------------
/images/graph1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scandum/octosort/73605cdbdfec66e7112c6a3a3830748fcd3bb665/images/graph1.png
--------------------------------------------------------------------------------
/images/graph2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scandum/octosort/73605cdbdfec66e7112c6a3a3830748fcd3bb665/images/graph2.png
--------------------------------------------------------------------------------
/octosort.c:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright (C) 2014-2021 Igor van den Hoven ivdhoven@gmail.com
3 | */
4 |
5 | /*
6 | Permission is hereby granted, free of charge, to any person obtaining
7 | a copy of this software and associated documentation files (the
8 | "Software"), to deal in the Software without restriction, including
9 | without limitation the rights to use, copy, modify, merge, publish,
10 | distribute, sublicense, and/or sell copies of the Software, and to
11 | permit persons to whom the Software is furnished to do so, subject to
12 | the following conditions:
13 |
14 | The above copyright notice and this permission notice shall be
15 | included in all copies or substantial portions of the Software.
16 |
17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
20 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
21 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
22 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
23 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 | */
25 |
26 | /*
27 | octosort 1.0
28 | */
29 |
30 | /*
31 | octosort is based on WikiSort and quadsort
32 |
33 | WikiSort: https://github.com/BonzaiThePenguin/WikiSort
34 | quadsort: https://github.com/scandum/quadsort
35 | searches: https://github.com/scandum/binary_search
36 | */
37 |
38 | // binary insertion sort for up to 8 elements
39 |
40 | void FUNC(octo_tail_insert)(VAR *array, VAR *pta, CMPFUNC *cmp)
41 | {
42 | VAR *pte, key;
43 |
44 | pte = pta--;
45 |
46 | if (cmp(pta, pte) > 0)
47 | {
48 | key = *pte;
49 |
50 | if (cmp(pta - 3, &key) > 0)
51 | {
52 | *pte-- = *pta--; *pte-- = *pta--; *pte-- = *pta--; *pte-- = *pta--;
53 | }
54 |
55 | if (pta >= array + 1 && cmp(pta - 1, &key) > 0)
56 | {
57 | *pte-- = *pta--; *pte-- = *pta--;
58 | }
59 |
60 | if (pta >= array && cmp(pta, &key) > 0)
61 | {
62 | *pte-- = *pta;
63 | }
64 | *pte = key;
65 | }
66 | }
67 |
68 | // sort arrays of length 4 to 8 with reverse order run detection
69 |
70 | VAR *FUNC(octo_swap)(VAR array[], VAR *ptz, size_t start, size_t nmemb, CMPFUNC *cmp)
71 | {
72 | VAR *pta, swap;
73 | size_t i;
74 |
75 | pta = array + start;
76 |
77 | if (cmp(&pta[0], &pta[1]) > 0)
78 | {
79 | if (cmp(&pta[2], &pta[3]) > 0)
80 | {
81 | if (cmp(&pta[1], &pta[2]) > 0)
82 | {
83 | goto Swapper;
84 | }
85 | swap = pta[2]; pta[2] = pta[3]; pta[3] = swap;
86 | }
87 | swap = pta[0]; pta[0] = pta[1]; pta[1] = swap;
88 | }
89 | else if (cmp(&pta[2], &pta[3]) > 0)
90 | {
91 | swap = pta[2]; pta[2] = pta[3]; pta[3] = swap;
92 | }
93 |
94 | if (cmp(&pta[1], &pta[2]) > 0)
95 | {
96 | if (cmp(&pta[0], &pta[2]) <= 0)
97 | {
98 | if (cmp(&pta[1], &pta[3]) <= 0)
99 | {
100 | swap = pta[1]; pta[1] = pta[2]; pta[2] = swap;
101 | }
102 | else
103 | {
104 | swap = pta[1]; pta[1] = pta[2]; pta[2] = pta[3]; pta[3] = swap;
105 | }
106 | }
107 | else if (cmp(&pta[0], &pta[3]) > 0)
108 | {
109 | swap = pta[1]; pta[1] = pta[3]; pta[3] = swap;
110 | swap = pta[0]; pta[0] = pta[2]; pta[2] = swap;
111 | }
112 | else if (cmp(&pta[1], &pta[3]) <= 0)
113 | {
114 | swap = pta[1]; pta[1] = pta[0]; pta[0] = pta[2]; pta[2] = swap;
115 | }
116 | else
117 | {
118 | swap = pta[1]; pta[1] = pta[0]; pta[0] = pta[2]; pta[2] = pta[3]; pta[3] = swap;
119 | }
120 | }
121 |
122 | for (i = 4 ; i < nmemb ; i++)
123 | {
124 | FUNC(octo_tail_insert)(pta, &pta[i], cmp);
125 | }
126 |
127 | if (ptz)
128 | {
129 | do
130 | {
131 | swap = *ptz;
132 | *ptz++ = *--pta;
133 | *pta = swap;
134 | }
135 | while (ptz < pta);
136 | }
137 | return NULL;
138 |
139 | Swapper:
140 |
141 | if (ptz == NULL || cmp(&pta[-1], &pta[0]) > 0)
142 | {
143 | switch (nmemb)
144 | {
145 | case 8:
146 | if (cmp(&pta[6], &pta[7]) <= 0)
147 | {
148 | break;
149 | }
150 | case 7:
151 | if (cmp(&pta[5], &pta[6]) <= 0)
152 | {
153 | break;
154 | }
155 | case 6:
156 | if (cmp(&pta[4], &pta[5]) <= 0)
157 | {
158 | break;
159 | }
160 | case 5:
161 | if (cmp(&pta[3], &pta[4]) <= 0)
162 | {
163 | break;
164 | }
165 | case 4:
166 | return ptz ? ptz : pta;
167 | }
168 | }
169 |
170 | swap = pta[0]; pta[0] = pta[3]; pta[3] = swap;
171 | swap = pta[1]; pta[1] = pta[2]; pta[2] = swap;
172 |
173 | for (i = 4 ; i < nmemb ; i++)
174 | {
175 | FUNC(octo_tail_insert)(pta, &pta[i], cmp);
176 | }
177 |
178 | if (ptz)
179 | {
180 | do
181 | {
182 | swap = *ptz;
183 | *ptz++ = *--pta;
184 | *pta = swap;
185 | }
186 | while (ptz < pta);
187 | }
188 | return NULL;
189 | }
190 |
191 | // find the index of the first value within the range that is equal to array[index]
192 |
193 | size_t FUNC(monobound_binary_first)(const VAR array[], const VAR value, const Range range, CMPFUNC *cmp)
194 | {
195 | size_t top, mid, end = range.end;
196 |
197 | if (range.start >= end)
198 | {
199 | return range.start;
200 | }
201 |
202 | top = end - range.start;
203 |
204 | while (top > 1)
205 | {
206 | mid = top / 2;
207 |
208 | if (cmp(&value, &array[end - mid]) <= 0)
209 | {
210 | end -= mid;
211 | }
212 | top -= mid;
213 | }
214 |
215 | if (cmp(&value, &array[end-1]) <= 0)
216 | {
217 | return --end;
218 | }
219 | return end;
220 | }
221 |
222 | // find the index of the last value within the range that is equal to array[index], plus 1
223 |
224 | size_t FUNC(monobound_binary_last)(const VAR array[], const VAR value, const Range range, CMPFUNC *cmp)
225 | {
226 | size_t top, mid, start = range.start;
227 |
228 | if (start >= range.end)
229 | {
230 | return start;
231 | }
232 |
233 | top = range.end - start;
234 |
235 | while (top > 1)
236 | {
237 | mid = top / 2;
238 |
239 | if (cmp(&array[start + mid], &value) <= 0)
240 | {
241 | start += mid;
242 | }
243 | top -= mid;
244 | }
245 |
246 | if (cmp(&array[start], &value) <= 0)
247 | {
248 | return ++start;
249 | }
250 | return start;
251 | }
252 |
253 | // combine a linear search with a binary search to reduce the number of comparisons in situations
254 | // where have some idea as to how many unique values there are and where the next value might be
255 |
256 | size_t FUNC(FindFirstForward)(const VAR array[], const VAR value, const Range range, CMPFUNC *cmp, const size_t unique)
257 | {
258 | size_t skip, index;
259 |
260 | skip = Max(range_length(range) / unique, 1);
261 |
262 | for (index = range.start + skip ; cmp(&value, &array[index - 1]) > 0 ; index += skip)
263 | {
264 | if (index >= range.end - skip)
265 | {
266 | return FUNC(monobound_binary_first)(array, value, new_range(index, range.end), cmp);
267 | }
268 | }
269 | return FUNC(monobound_binary_first)(array, value, new_range(index - skip, index), cmp);
270 | }
271 |
272 | size_t FUNC(FindLastForward)(const VAR array[], const VAR value, const Range range, CMPFUNC *cmp, const size_t unique)
273 | {
274 | size_t skip, index;
275 |
276 | if (range_length(range) == 0)
277 | return range.start;
278 |
279 | skip = Max(range_length(range)/unique, 1);
280 |
281 | for (index = range.start + skip; cmp(&array[index - 1], &value) <= 0 ; index += skip)
282 | {
283 | if (index >= range.end - skip)
284 | {
285 | return FUNC(monobound_binary_last)(array, value, new_range(index, range.end), cmp);
286 | }
287 | }
288 | return FUNC(monobound_binary_last)(array, value, new_range(index - skip, index), cmp);
289 | }
290 |
291 | size_t FUNC(FindFirstBackward)(const VAR array[], const VAR value, const Range range, CMPFUNC *cmp, const size_t unique)
292 | {
293 | size_t skip, index;
294 |
295 | if (range_length(range) == 0)
296 | return range.start;
297 |
298 | skip = Max(range_length(range)/unique, 1);
299 |
300 | for (index = range.end - skip; index > range.start && cmp(&value, &array[index - 1]) <= 0 ; index -= skip)
301 | {
302 | if (index < range.start + skip)
303 | {
304 | return FUNC(monobound_binary_first)(array, value, new_range(range.start, index), cmp);
305 | }
306 | }
307 | return FUNC(monobound_binary_first)(array, value, new_range(index, index + skip), cmp);
308 | }
309 |
310 | size_t FUNC(FindLastBackward)(const VAR array[], const VAR value, const Range range, CMPFUNC *cmp, const size_t unique)
311 | {
312 | size_t skip, index;
313 |
314 | if (range_length(range) == 0)
315 | return range.start;
316 |
317 | skip = Max(range_length(range)/unique, 1);
318 |
319 | for (index = range.end - skip; index > range.start && cmp(&array[index - 1], &value) > 0 ; index -= skip)
320 | {
321 | if (index < range.start + skip)
322 | {
323 | return FUNC(monobound_binary_last)(array, value, new_range(range.start, index), cmp);
324 | }
325 | }
326 | return FUNC(monobound_binary_last)(array, value, new_range(index, index + skip), cmp);
327 | }
328 |
329 | // monobound binary insertion sort
330 |
331 | void FUNC(monobound_sort)(VAR array[], const Range range, CMPFUNC *cmp)
332 | {
333 | VAR *start, *pta, *end, key;
334 | size_t i, mid, top, nmemb;
335 |
336 | start = array + range.start;
337 | nmemb = range.end - range.start;
338 |
339 | for (i = 1 ; i < nmemb ; i++)
340 | {
341 | pta = end = start + i;
342 |
343 | if (cmp(--pta, end) <= 0)
344 | {
345 | continue;
346 | }
347 | top = i;
348 |
349 | while (top > 1)
350 | {
351 | mid = top / 2;
352 |
353 | if (cmp(pta - mid, end) > 0)
354 | {
355 | pta -= mid;
356 | }
357 | top -= mid;
358 | }
359 |
360 | key = *end;
361 |
362 | memmove(pta + 1, pta, (end - pta) * sizeof(VAR));
363 |
364 | *pta = key;
365 | }
366 | }
367 |
368 | // swap a series of values in the array
369 |
370 | void FUNC(forward_block_swap)(VAR array[], const size_t start1, const size_t start2, size_t block_size)
371 | {
372 | VAR *pta, *ptb, swap;
373 |
374 | pta = array + start1;
375 | ptb = array + start2;
376 |
377 | while (block_size--)
378 | {
379 | swap = *pta; *pta++ = *ptb; *ptb++ = swap;
380 | }
381 | }
382 |
383 | void FUNC(backward_block_swap)(VAR array[], const size_t start1, const size_t start2, size_t block_size)
384 | {
385 | VAR *pta, *ptb, swap;
386 |
387 | pta = array + start1 + block_size;
388 | ptb = array + start2 + block_size;
389 |
390 | while (block_size--)
391 | {
392 | swap = *--pta; *pta = *--ptb; *ptb = swap;
393 | }
394 | }
395 |
396 | // rotate the values in an array ([0 1 2 3] becomes [1 2 3 0] if we rotate by 1)
397 | // this assumes that 0 <= amount <= range.length()
398 |
399 | void FUNC(Rotate)(VAR array[], const size_t amount, const Range range)
400 | {
401 | size_t start = range.start;
402 | size_t left = amount;
403 | size_t right = range.end - range.start - amount;
404 | size_t min = left <= right ? left : right;
405 |
406 | // Gries-Mills rotation
407 |
408 | while (min > 1)
409 | {
410 | if (left <= right)
411 | {
412 | do
413 | {
414 | FUNC(forward_block_swap)(array, start, start + left, left);
415 |
416 | start += left;
417 | right -= left;
418 | }
419 | while (left <= right);
420 |
421 | min = right;
422 | }
423 | else
424 | {
425 | do
426 | {
427 | FUNC(backward_block_swap)(array, start + left - right, start + left, right);
428 |
429 | left -= right;
430 | }
431 | while (right <= left);
432 |
433 | min = left;
434 | }
435 | }
436 |
437 | if (min)
438 | {
439 | if (left <= right)
440 | {
441 | VAR swap = array[start];
442 | memmove(&array[start], &array[start + 1], (right) * sizeof(VAR));
443 | array[start + right] = swap;
444 | }
445 | else
446 | {
447 | VAR swap = array[start + left];
448 | memmove(&array[start + 1], &array[start], (left) * sizeof(VAR));
449 | array[start] = swap;
450 | }
451 | }
452 | }
453 |
454 | // merge two ranges from one array into another array
455 |
456 | void FUNC(forward_merge_into)(VAR *dest, VAR *from, size_t nmemb, size_t block, CMPFUNC *cmp)
457 | {
458 | VAR *l, *r, *m, *e; // left, right, middle, end
459 |
460 | l = from;
461 | r = from + block;
462 | m = r;
463 | e = l + nmemb;
464 |
465 | while (1)
466 | {
467 | if (cmp(l, r) <= 0)
468 | {
469 | *dest++ = *l++;
470 |
471 | if (l == m)
472 | {
473 | do *dest++ = *r++; while (r < e);
474 |
475 | return;
476 | }
477 | }
478 | else
479 | {
480 | *dest++ = *r++;
481 |
482 | if (r == e)
483 | {
484 | do *dest++ = *l++; while (l < m);
485 |
486 | return;
487 | }
488 | }
489 | }
490 | }
491 |
492 | void FUNC(external_backward_merge)(VAR *array, VAR *swap, size_t nmemb, size_t block, CMPFUNC *cmp)
493 | {
494 | VAR *r, *m, *e, *s; // right, middle, end, swap
495 |
496 | m = array + block;
497 | e = array + nmemb - 1;
498 | r = m--;
499 |
500 | if (cmp(m, r) <= 0)
501 | {
502 | return;
503 | }
504 |
505 | while (cmp(m, e) <= 0)
506 | {
507 | e--;
508 | }
509 |
510 | s = swap;
511 |
512 | do *s++ = *r++; while (r <= e);
513 |
514 | s--;
515 |
516 | *e-- = *m--;
517 |
518 | if (cmp(array, swap) <= 0)
519 | {
520 | while (1)
521 | {
522 | if (cmp(m, s) > 0)
523 | {
524 | *e-- = *m--;
525 | }
526 | else
527 | {
528 | *e-- = *s--;
529 |
530 | if (s < swap)
531 | {
532 | return;
533 | }
534 | }
535 | }
536 | }
537 | else
538 | {
539 | while (1)
540 | {
541 | if (cmp(m, s) > 0)
542 | {
543 | *e-- = *m--;
544 |
545 | if (m < array)
546 | {
547 | do *e-- = *s--; while (s >= swap);
548 |
549 | return;
550 | }
551 | }
552 | else
553 | {
554 | *e-- = *s--;
555 | }
556 | }
557 | }
558 | }
559 |
560 | // merge operation using an external buffer
561 |
562 | void FUNC(MergeExternal)(VAR array[], const Range A, const Range B, CMPFUNC *cmp, VAR *cache)
563 | {
564 | VAR *A_index = &cache[0];
565 | VAR *B_index = &array[B.start];
566 | VAR *insert_index = &array[A.start];
567 | VAR *A_last = &cache[range_length(A)];
568 | VAR *B_last = &array[B.end];
569 |
570 | if (range_length(B) > 0 && range_length(A) > 0)
571 | {
572 | while (1)
573 | {
574 | if (cmp(A_index, B_index) <= 0)
575 | {
576 | *insert_index++ = *A_index++;
577 |
578 | if (A_index == A_last)
579 | break;
580 | }
581 | else
582 | {
583 | *insert_index++ = *B_index++;
584 |
585 | if (B_index == B_last)
586 | break;
587 | }
588 | }
589 | }
590 | // copy the remainder of A into the final array
591 |
592 | memcpy(insert_index, A_index, (A_last - A_index) * sizeof(VAR));
593 | }
594 |
595 | // merge operation using an internal buffer
596 |
597 | // whenever we find a value to add to the final array, swap it with the value that's
598 | // already in that spot when this algorithm is finished, the 'I' range will contain
599 | // its original contents, but in a different order
600 |
601 | void FUNC(MergeInternal)(VAR array[], const Range A, const Range B, CMPFUNC *cmp, const Range I)
602 | {
603 | VAR swap, *pta, *ptb, *pti;
604 | size_t a = 0, b = 0, i = 0;
605 | size_t length_A = range_length(A);
606 | size_t length_B = range_length(B);
607 |
608 | if (length_A > 0 && length_B > 0)
609 | {
610 | pta = array + A.start;
611 | ptb = array + B.start;
612 | pti = array + I.start;
613 |
614 | while (1)
615 | {
616 | if (cmp(&pti[a], &ptb[b]) <= 0)
617 | {
618 | swap = pta[i]; pta[i++] = pti[a]; pti[a] = swap;
619 |
620 | if (++a >= length_A)
621 | break;
622 | }
623 | else
624 | {
625 | swap = pta[i]; pta[i++] = ptb[b]; ptb[b] = swap;
626 |
627 | if (++b >= length_B)
628 | break;
629 | }
630 | }
631 | }
632 | FUNC(backward_block_swap)(array, I.start + a, A.start + i, length_A - a);
633 | }
634 |
635 | // merge operation without a buffer
636 |
637 | // this just repeatedly binary searches into B and rotates A into position.
638 | // the paper suggests using the 'rotation-based Hwang and Lin algorithm' here,
639 | // but I decided to stick with this because it had better situational performance
640 |
641 | // (Hwang and Lin is designed for merging subarrays of very different sizes,
642 | // but WikiSort almost always uses subarrays that are roughly the same size)
643 |
644 | // normally this is incredibly suboptimal, but this function is only called
645 | // when none of the A or B blocks in any subarray contained 2√A unique values,
646 | // which places a hard limit on the number of times this will ACTUALLY need
647 | // to binary search and rotate.
648 |
649 | // according to my analysis the worst case is √A rotations performed on √A items
650 | // once the constant factors are removed, which ends up being O(n)
651 |
652 | // again, this is NOT a general-purpose solution – it only works well in this case!
653 | // kind of like how the O(n^2) insertion sort is used in some places
654 |
655 | void FUNC(MergeInPlace)(VAR array[], Range A, Range B, CMPFUNC *cmp, VAR *cache, const size_t cache_size)
656 | {
657 | if (range_length(A) == 0 || range_length(B) == 0)
658 | {
659 | return;
660 | }
661 |
662 | while (1)
663 | {
664 | // find the first place in B where the first item in A needs to be inserted
665 | size_t mid = FUNC(monobound_binary_first)(array, array[A.start], B, cmp);
666 |
667 | // rotate A into place
668 | size_t amount = mid - A.end;
669 |
670 | FUNC(Rotate)(array, range_length(A), new_range(A.start, mid));
671 |
672 | if (B.end == mid)
673 | {
674 | break;
675 | }
676 |
677 | // calculate the new A and B ranges
678 |
679 | B.start = mid;
680 | A = new_range(A.start + amount, B.start);
681 | A.start = FUNC(monobound_binary_last)(array, array[A.start], A, cmp);
682 |
683 | if (range_length(A) == 0)
684 | {
685 | break;
686 | }
687 | }
688 | }
689 |
690 | // bottom-up merge sort combined with an in-place merge algorithm for O(1) memory use
691 |
692 | void FUNC(octosort)(VAR array[], size_t size, VAR *external_cache, size_t cache_size, CMPFUNC *cmp)
693 | {
694 | VAR swap, stack_cache[512], *cache = external_cache;
695 |
696 | #if DYNAMIC_CACHE
697 | // turns into a full-throttle merge sort since everything fits into the cache
698 |
699 | if (cache == NULL)
700 | {
701 | cache_size = 1 + size / 2;
702 |
703 | cache = (VAR *) malloc(cache_size * sizeof(VAR));
704 |
705 | if (cache == NULL)
706 | {
707 | external_cache = cache = stack_cache;
708 |
709 | cache_size = 512;
710 | }
711 | }
712 | #else
713 | // since the cache size is fixed, it's still O(1) memory
714 | // the minimum stack size is typically 8192 KB, so 512 elements should fit comfortably
715 | // removing the cache entirely gives 60% of the performance of qsort()
716 |
717 | if (cache == NULL)
718 | {
719 | cache = stack_cache;
720 |
721 | cache_size = 512;
722 | }
723 | #endif
724 |
725 | // if the array is of size 1, 2, 3 .. 8 sort them like so:
726 |
727 | if (size <= 8)
728 | {
729 | FUNC(monobound_sort)(array, new_range(0, size), cmp);
730 |
731 | goto End;
732 | }
733 |
734 | WikiIterator iterator = WikiIterator_new(size, 4);
735 |
736 | VAR *pto = NULL;
737 |
738 | // sort groups of 4-8 items at a time
739 |
740 | while (!WikiIterator_finished(&iterator))
741 | {
742 | Range range = WikiIterator_nextRange(&iterator);
743 |
744 | pto = FUNC(octo_swap)(array, pto, range.start, range.end - range.start, cmp);
745 | }
746 |
747 | if (pto)
748 | {
749 | VAR *pta = array + size - 1;
750 | VAR *ptz = pto;
751 |
752 | do
753 | {
754 | swap = *ptz;
755 | *ptz = *pta;
756 | *pta = swap;
757 | }
758 | while (++ptz < --pta);
759 |
760 | if (pto == array)
761 | {
762 | goto End;
763 | }
764 | }
765 |
766 | // then merge sort the higher levels, which can be 8-15, 16-31, 32-63, 64-127, etc.
767 |
768 | while (1)
769 | {
770 | // if every A and B block will fit into the cache, use a special branch specifically for merging with the cache
771 | // (we use < rather than <= since the block size might be one more than iterator.length())
772 | if (WikiIterator_length(&iterator) < cache_size)
773 | {
774 | // perform a quad merge if the four subarrays fit into the cache
775 |
776 | // array: [A][B][C][D]
777 | // cache: [A B] Step 1
778 | // cache: [C D] Step 2
779 | // array: [A B C D] Step 3
780 |
781 | if ((WikiIterator_length(&iterator) + 1) * 4 <= cache_size && (WikiIterator_length(&iterator) + 1) * 4 <= size)
782 | {
783 | WikiIterator_begin(&iterator);
784 |
785 | while (!WikiIterator_finished(&iterator))
786 | {
787 | Range A = WikiIterator_nextRange(&iterator);
788 | Range B = WikiIterator_nextRange(&iterator);
789 | Range C = WikiIterator_nextRange(&iterator);
790 | Range D = WikiIterator_nextRange(&iterator);
791 |
792 | if (cmp(&array[A.end - 1], &array[B.start]) <= 0)
793 | {
794 | if (cmp(&array[C.end - 1], &array[D.start]) <= 0)
795 | {
796 | if (cmp(&array[B.end - 1], &array[C.start]) <= 0)
797 | {
798 | continue; // A through D are in order, skip doing anything else
799 | }
800 | // A and B are in order and C and D are in order, copy to cache
801 | memcpy(&cache[0], &array[A.start], range_length(A) * sizeof(VAR));
802 | memcpy(&cache[A.end - A.start], &array[B.start], range_length(B) * sizeof(VAR));
803 | memcpy(&cache[B.end - A.start], &array[C.start], range_length(C) * sizeof(VAR));
804 | memcpy(&cache[C.end - A.start], &array[D.start], range_length(D) * sizeof(VAR));
805 |
806 | goto Step3;
807 | }
808 | // A and B are in order, copy to cache
809 | memcpy(&cache[0], &array[A.start], range_length(A) * sizeof(VAR));
810 | memcpy(&cache[A.end - A.start], &array[B.start], range_length(B) * sizeof(VAR));
811 |
812 | goto Step2;
813 | }
814 | // Step1:
815 |
816 | // A and B are not in order, merge to cache
817 | FUNC(forward_merge_into)(cache, array + A.start, B.end - A.start, A.end - A.start, cmp);
818 |
819 | if (cmp(&array[C.end - 1], &array[D.start]) <= 0) // C and D are in order, copy to cache
820 | {
821 | memcpy(&cache[B.end - A.start], &array[C.start], range_length(C) * sizeof(VAR));
822 | memcpy(&cache[C.end - A.start], &array[D.start], range_length(D) * sizeof(VAR));
823 | }
824 | else
825 | {
826 | Step2:
827 |
828 | // C and D are not in order, merge to cache
829 | FUNC(forward_merge_into)(&cache[B.end - A.start], &array[C.start], D.end - C.start, C.end - C.start, cmp);
830 | }
831 | Step3:
832 |
833 | // merge A through D from the cache back into the array
834 | FUNC(forward_merge_into)(&array[A.start], &cache[0], D.end - A.start, B.end - A.start, cmp);
835 | }
836 |
837 | // we merged two levels at the same time, so we're done with this level already
838 | // iterator.nextLevel() is called again at the bottom of this outer merge loop
839 |
840 | WikiIterator_nextLevel(&iterator);
841 | }
842 | else
843 | {
844 | WikiIterator_begin(&iterator);
845 |
846 | while (!WikiIterator_finished(&iterator))
847 | {
848 | Range A = WikiIterator_nextRange(&iterator);
849 | Range B = WikiIterator_nextRange(&iterator);
850 |
851 | if (cmp(&array[A.end - 1], &array[B.start]) <= 0)
852 | {
853 | continue; // A and B are in order, skip doing anything else
854 | }
855 | // A and B are not in order, merge through the cache
856 | FUNC(external_backward_merge)(array + A.start, cache, B.end - A.start, range_length(A), cmp);
857 | }
858 | }
859 | }
860 | else
861 | {
862 | // this is where the in-place merge logic starts!
863 | // 1. pull out two internal buffers each containing √A unique values
864 | // 1a. adjust block_size and buffer_size if we couldn't find enough unique values
865 | // 2. loop over the A and B subarrays within this level of the merge sort
866 | // 3. break A and B into blocks of size 'block_size'
867 | // 4. "tag" each of the A blocks with values from the first internal buffer
868 | // 5. roll the A blocks through the B blocks and drop/rotate them where they belong
869 | // 6. merge each A block with any B values that follow, using the cache or the second internal buffer
870 | // 7. sort the second internal buffer if it exists
871 | // 8. redistribute the two internal buffers back into the array
872 |
873 | size_t block_size = monobound_sqrt(WikiIterator_length(&iterator));
874 |
875 | size_t buffer_size = WikiIterator_length(&iterator) / block_size + 1;
876 |
877 | // as an optimization, we really only need to pull out the internal buffers once for each level of merges
878 | // after that we can reuse the same buffers over and over, then redistribute it when we're finished with this level
879 |
880 | Range buffer1, buffer2, A, B;
881 | size_t find_separately = 0;
882 | size_t index, last, count, find, start, pull_index = 0;
883 |
884 | struct
885 | {
886 | size_t from;
887 | size_t to;
888 | size_t count;
889 | Range range;
890 | }
891 | pull[2];
892 |
893 | pull[0].from = pull[0].to = pull[0].count = 0; pull[0].range = new_range(0, 0);
894 | pull[1].from = pull[1].to = pull[1].count = 0; pull[1].range = new_range(0, 0);
895 |
896 | buffer1 = new_range(0, 0);
897 | buffer2 = new_range(0, 0);
898 |
899 | find = buffer_size * 2; // find two internal buffers of size 'buffer_size' each
900 |
901 | if (block_size <= cache_size)
902 | {
903 | find = buffer_size; // if every A block fits into the cache then we won't need the second internal buffer
904 | }
905 | else if (find > WikiIterator_length(&iterator))
906 | {
907 | find_separately = 1; // we can't fit both buffers into the same A or B subarray, so find two buffers separately
908 | find = buffer_size;
909 | }
910 |
911 | // we need to find either a single contiguous space containing 2√A unique values, which will be split up into two buffers of size √A each,
912 | // or we need to find one buffer of < 2√A unique values, and a second buffer of √A unique values,
913 | // OR if we couldn't find that many unique values, we need the largest possible buffer we can get
914 |
915 | // in the case where it couldn't find a single buffer of at least √A unique values,
916 | // all of the Merge steps must be replaced by a different merge algorithm (MergeInPlace)
917 |
918 | WikiIterator_begin(&iterator);
919 |
920 | while (!WikiIterator_finished(&iterator))
921 | {
922 | A = WikiIterator_nextRange(&iterator);
923 | B = WikiIterator_nextRange(&iterator);
924 |
925 | // just store information about where the values will be pulled from and to,
926 | // as well as how many values there are, to create the two internal buffers
927 |
928 | // check A for the number of unique values we need to fill an internal buffer
929 | // these values will be pulled out to the start of A
930 | for (last = A.start, count = 1; count < find; last = index, count++)
931 | {
932 | index = FUNC(FindLastForward)(array, array[last], new_range(last + 1, A.end), cmp, find - count);
933 |
934 | if (index == A.end)
935 | break;
936 | }
937 | index = last;
938 |
939 | if (count >= buffer_size)
940 | {
941 | // keep track of the range within the array where we'll need to "pull out" these values to create the internal buffer
942 | PULL(A.start);
943 | pull_index = 1;
944 |
945 | if (count == buffer_size + buffer_size)
946 | {
947 | // we were able to find a single contiguous section containing 2√A unique values,
948 | // so this section can be used to contain both of the internal buffers we'll need
949 | buffer1 = new_range(A.start, A.start + buffer_size);
950 | buffer2 = new_range(A.start + buffer_size, A.start + count);
951 | break;
952 | }
953 | else if (find == buffer_size + buffer_size)
954 | {
955 | // we found a buffer that contains at least √A unique values, but did not contain the full 2√A unique values,
956 | // so we still need to find a second separate buffer of at least √A unique values
957 | buffer1 = new_range(A.start, A.start + count);
958 | find = buffer_size;
959 | }
960 | else if (block_size <= cache_size)
961 | {
962 | // we found the first and only internal buffer that we need, so we're done!
963 | buffer1 = new_range(A.start, A.start + count);
964 | break;
965 | }
966 | else if (find_separately)
967 | {
968 | // found one buffer, but now find the other one
969 | buffer1 = new_range(A.start, A.start + count);
970 | find_separately = 0;
971 | }
972 | else
973 | {
974 | // we found a second buffer in an 'A' subarray containing √A unique values, so we're done!
975 | buffer2 = new_range(A.start, A.start + count);
976 | break;
977 | }
978 | }
979 | else if (pull_index == 0 && count > range_length(buffer1))
980 | {
981 | // keep track of the largest buffer we were able to find
982 | buffer1 = new_range(A.start, A.start + count);
983 | PULL(A.start);
984 | }
985 |
986 | // check B for the number of unique values we need to fill an internal buffer
987 | // these values will be pulled out to the end of B
988 | for (last = B.end - 1, count = 1; count < find; last = index - 1, count++)
989 | {
990 | index = FUNC(FindFirstBackward)(array, array[last], new_range(B.start, last), cmp, find - count);
991 | if (index == B.start)
992 | break;
993 | }
994 | index = last;
995 |
996 | if (count >= buffer_size)
997 | {
998 | // keep track of the range within the array where we'll need to "pull out" these values to create the internal buffer
999 | PULL(B.end);
1000 | pull_index = 1;
1001 |
1002 | if (count == buffer_size + buffer_size)
1003 | {
1004 | // we were able to find a single contiguous section containing 2√A unique values,
1005 | // so this section can be used to contain both of the internal buffers we'll need
1006 | buffer1 = new_range(B.end - count, B.end - buffer_size);
1007 | buffer2 = new_range(B.end - buffer_size, B.end);
1008 | break;
1009 | }
1010 | else if (find == buffer_size + buffer_size)
1011 | {
1012 | // we found a buffer that contains at least √A unique values, but did not contain the full 2√A unique values,
1013 | // so we still need to find a second separate buffer of at least √A unique values
1014 | buffer1 = new_range(B.end - count, B.end);
1015 | find = buffer_size;
1016 | }
1017 | else if (block_size <= cache_size)
1018 | {
1019 | // we found the first and only internal buffer that we need, so we're done!
1020 | buffer1 = new_range(B.end - count, B.end);
1021 | break;
1022 | }
1023 | else if (find_separately)
1024 | {
1025 | // found one buffer, but now find the other one
1026 | buffer1 = new_range(B.end - count, B.end);
1027 | find_separately = 0;
1028 | }
1029 | else
1030 | {
1031 | // buffer2 will be pulled out from a 'B' subarray, so if the first buffer was pulled out from the corresponding 'A' subarray,
1032 | // we need to adjust the end point for that A subarray so it knows to stop redistributing its values before reaching buffer2
1033 | if (pull[0].range.start == A.start) pull[0].range.end -= pull[1].count;
1034 |
1035 | // we found a second buffer in an 'B' subarray containing √A unique values, so we're done!
1036 | buffer2 = new_range(B.end - count, B.end);
1037 | break;
1038 | }
1039 | }
1040 | else if (pull_index == 0 && count > range_length(buffer1))
1041 | {
1042 | // keep track of the largest buffer we were able to find
1043 | buffer1 = new_range(B.end - count, B.end);
1044 | PULL(B.end);
1045 | }
1046 | }
1047 |
1048 | // pull out the two ranges so we can use them as internal buffers
1049 | for (pull_index = 0; pull_index < 2; pull_index++)
1050 | {
1051 | Range range;
1052 | size_t length = pull[pull_index].count;
1053 |
1054 | if (pull[pull_index].to < pull[pull_index].from)
1055 | {
1056 | // we're pulling the values out to the left, which means the start of an A subarray
1057 | index = pull[pull_index].from;
1058 | for (count = 1; count < length; count++)
1059 | {
1060 | index = FUNC(FindFirstBackward)(array, array[index - 1], new_range(pull[pull_index].to, pull[pull_index].from - (count - 1)), cmp, length - count);
1061 | range = new_range(index + 1, pull[pull_index].from + 1);
1062 | FUNC(Rotate)(array, range_length(range) - count, range);
1063 | pull[pull_index].from = index + count;
1064 | }
1065 | }
1066 | else if (pull[pull_index].to > pull[pull_index].from)
1067 | {
1068 | // we're pulling values out to the right, which means the end of a B subarray
1069 | index = pull[pull_index].from + 1;
1070 | for (count = 1; count < length; count++)
1071 | {
1072 | index = FUNC(FindLastForward)(array, array[index], new_range(index, pull[pull_index].to), cmp, length - count);
1073 | range = new_range(pull[pull_index].from, index - 1);
1074 | FUNC(Rotate)(array, count, range);
1075 | pull[pull_index].from = index - 1 - count;
1076 | }
1077 | }
1078 | }
1079 |
1080 | // adjust block_size and buffer_size based on the values we were able to pull out
1081 | buffer_size = range_length(buffer1);
1082 | block_size = WikiIterator_length(&iterator)/buffer_size + 1;
1083 |
1084 | // the first buffer NEEDS to be large enough to tag each of the evenly sized A blocks,
1085 | // so this was originally here to test the math for adjusting block_size above
1086 | // assert((WikiIterator_length(&iterator) + 1)/block_size <= buffer_size);
1087 |
1088 | // now that the two internal buffers have been created, it's time to merge each A+B combination at this level of the merge sort!
1089 | WikiIterator_begin(&iterator);
1090 |
1091 | while (!WikiIterator_finished(&iterator))
1092 | {
1093 | A = WikiIterator_nextRange(&iterator);
1094 | B = WikiIterator_nextRange(&iterator);
1095 |
1096 | // remove any parts of A or B that are being used by the internal buffers
1097 | start = A.start;
1098 |
1099 | if (start == pull[0].range.start)
1100 | {
1101 | if (pull[0].from > pull[0].to)
1102 | {
1103 | A.start += pull[0].count;
1104 |
1105 | // if the internal buffer takes up the entire A or B subarray, then there's nothing to merge
1106 | // this only happens for very small subarrays, like √4 = 2, 2 * (2 internal buffers) = 4,
1107 | // which also only happens when cache_size is small or 1 since it'd otherwise use MergeExternal
1108 | if (range_length(A) == 0)
1109 | continue;
1110 | }
1111 | else if (pull[0].from < pull[0].to)
1112 | {
1113 | B.end -= pull[0].count;
1114 | if (range_length(B) == 0)
1115 | continue;
1116 | }
1117 | }
1118 |
1119 | if (start == pull[1].range.start)
1120 | {
1121 | if (pull[1].from > pull[1].to)
1122 | {
1123 | A.start += pull[1].count;
1124 | if (range_length(A) == 0)
1125 | continue;
1126 | }
1127 | else if (pull[1].from < pull[1].to)
1128 | {
1129 | B.end -= pull[1].count;
1130 | if (range_length(B) == 0)
1131 | continue;
1132 | }
1133 | }
1134 |
1135 | if (cmp(&array[A.end - 1], &array[B.start]) > 0) // A and B are not in order, so merge them
1136 | {
1137 | Range blockA, firstA, lastA, lastB, blockB;
1138 | size_t indexA, findA;
1139 |
1140 | // break the remainder of A into blocks. firstA is the uneven-sized first A block
1141 | blockA = new_range(A.start, A.end);
1142 | firstA = new_range(A.start, A.start + range_length(blockA) % block_size);
1143 |
1144 | // swap the first value of each A block with the value in buffer1
1145 | for (indexA = buffer1.start, index = firstA.end; index < blockA.end; indexA++, index += block_size)
1146 | {
1147 | SWAP(array[indexA], array[index]);
1148 | }
1149 |
1150 | // start rolling the A blocks through the B blocks!
1151 | // whenever we leave an A block behind, we'll need to merge the previous A block with any B blocks that follow it, so track that information as well
1152 | lastA = firstA;
1153 | lastB = new_range(0, 0);
1154 | blockB = new_range(B.start, B.start + Min(block_size, range_length(B)));
1155 | blockA.start += range_length(firstA);
1156 | indexA = buffer1.start;
1157 |
1158 | // if the first unevenly sized A block fits into the cache, copy it there for when we go to Merge it
1159 | // otherwise, if the second buffer is available, block swap the contents into that
1160 | if (range_length(lastA) <= cache_size)
1161 | {
1162 | memcpy(&cache[0], &array[lastA.start], range_length(lastA) * sizeof(VAR));
1163 | }
1164 | else if (range_length(buffer2) > 0)
1165 | {
1166 | FUNC(forward_block_swap)(array, lastA.start, buffer2.start, range_length(lastA));
1167 | }
1168 |
1169 | if (range_length(blockA) > 0)
1170 | {
1171 | while (1)
1172 | {
1173 | // if there's a previous B block and the first value of the minimum A block is <= the last value of the previous B block,
1174 | // then drop that minimum A block behind. or if there are no B blocks left then keep dropping the remaining A blocks.
1175 |
1176 | if (range_length(blockB) == 0 || (range_length(lastB) > 0 && cmp(&array[indexA], &array[lastB.end - 1]) <= 0))
1177 | {
1178 | // figure out where to split the previous B block, and rotate it at the split
1179 | size_t B_split = FUNC(monobound_binary_first)(array, array[indexA], lastB, cmp);
1180 | size_t B_remaining = lastB.end - B_split;
1181 |
1182 | // swap the minimum A block to the beginning of the rolling A blocks
1183 | size_t minA = blockA.start;
1184 | for (findA = minA + block_size; findA < blockA.end; findA += block_size)
1185 | {
1186 | if (cmp(&array[minA], &array[findA]) > 0)
1187 | {
1188 | minA = findA;
1189 | }
1190 | }
1191 | FUNC(forward_block_swap)(array, blockA.start, minA, block_size);
1192 |
1193 | // swap the first item of the previous A block back with its original value, which is stored in buffer1
1194 | SWAP(array[blockA.start], array[indexA]);
1195 | indexA++;
1196 |
1197 | // locally merge the previous A block with the B values that follow it if lastA fits into the external cache
1198 | // we'll use that (with MergeExternal), or if the second internal buffer exists we'll use that (with MergeInternal),
1199 | // or failing that we'll use a strictly in-place merge algorithm (MergeInPlace)
1200 |
1201 | if (range_length(lastA) <= cache_size)
1202 | {
1203 | FUNC(MergeExternal)(array, lastA, new_range(lastA.end, B_split), cmp, cache);
1204 | }
1205 | else if (range_length(buffer2) > 0)
1206 | {
1207 | FUNC(MergeInternal)(array, lastA, new_range(lastA.end, B_split), cmp, buffer2);
1208 | }
1209 | else
1210 | {
1211 | FUNC(MergeInPlace)(array, lastA, new_range(lastA.end, B_split), cmp, cache, cache_size);
1212 | }
1213 |
1214 | if (range_length(buffer2) > 0 || block_size <= cache_size)
1215 | {
1216 | // copy the previous A block into the cache or buffer2, since that's where we need it to be when we go to merge it anyway
1217 |
1218 | if (block_size <= cache_size)
1219 | {
1220 | memcpy(&cache[0], &array[blockA.start], block_size * sizeof(VAR));
1221 | }
1222 | else
1223 | {
1224 | FUNC(forward_block_swap)(array, blockA.start, buffer2.start, block_size);
1225 | }
1226 |
1227 | // this is equivalent to rotating, but faster
1228 | // the area normally taken up by the A block is either the contents of buffer2, or data we don't need anymore since we memcopied it
1229 | // either way, we don't need to retain the order of those items, so instead of rotating we can just block swap B to where it belongs
1230 | FUNC(forward_block_swap)(array, B_split, blockA.start + block_size - B_remaining, B_remaining);
1231 | }
1232 | else
1233 | {
1234 | // we are unable to use the 'buffer2' trick to speed up the rotation operation since buffer2 doesn't exist, so perform a normal rotation
1235 | FUNC(Rotate)(array, blockA.start - B_split, new_range(B_split, blockA.start + block_size));
1236 | }
1237 |
1238 | // update the range for the remaining A blocks, and the range remaining from the B block after it was split
1239 | lastA = new_range(blockA.start - B_remaining, blockA.start - B_remaining + block_size);
1240 | lastB = new_range(lastA.end, lastA.end + B_remaining);
1241 |
1242 | // if there are no more A blocks remaining, this step is finished!
1243 | blockA.start += block_size;
1244 | if (range_length(blockA) == 0)
1245 | {
1246 | break;
1247 | }
1248 |
1249 | }
1250 | else if (range_length(blockB) < block_size)
1251 | {
1252 | // move the last B block, which is unevenly sized, to before the remaining A blocks, by using a rotation
1253 | // the cache is disabled here since it might contain the contents of the previous A block
1254 | FUNC(Rotate)(array, blockB.start - blockA.start, new_range(blockA.start, blockB.end));
1255 |
1256 | lastB = new_range(blockA.start, blockA.start + range_length(blockB));
1257 | blockA.start += range_length(blockB);
1258 | blockA.end += range_length(blockB);
1259 | blockB.end = blockB.start;
1260 | }
1261 | else
1262 | {
1263 | // roll the leftmost A block to the end by swapping it with the next B block
1264 | FUNC(forward_block_swap)(array, blockA.start, blockB.start, block_size);
1265 | lastB = new_range(blockA.start, blockA.start + block_size);
1266 |
1267 | blockA.start += block_size;
1268 | blockA.end += block_size;
1269 | blockB.start += block_size;
1270 |
1271 | if (blockB.end > B.end - block_size)
1272 | {
1273 | blockB.end = B.end;
1274 | }
1275 | else
1276 | {
1277 | blockB.end += block_size;
1278 | }
1279 | }
1280 | }
1281 | }
1282 |
1283 | // merge the last A block with the remaining B values
1284 | if (range_length(lastA) <= cache_size)
1285 | {
1286 | FUNC(MergeExternal)(array, lastA, new_range(lastA.end, B.end), cmp, cache);
1287 | }
1288 | else if (range_length(buffer2) > 0)
1289 | {
1290 | FUNC(MergeInternal)(array, lastA, new_range(lastA.end, B.end), cmp, buffer2);
1291 | }
1292 | else
1293 | {
1294 | FUNC(MergeInPlace)(array, lastA, new_range(lastA.end, B.end), cmp, cache, cache_size);
1295 | }
1296 | }
1297 | }
1298 |
1299 | // when we're finished with this merge step we should have the one or two internal buffers left over, where the second buffer is all jumbled up
1300 | // insertion sort the second buffer, then redistribute the buffers back into the array using the opposite process used for creating the buffer
1301 |
1302 | // While an unstable sort like quicksort could be applied here, in benchmarks it was consistently slightly slower than a simple insertion sort,
1303 | // even for tens of millions of items. this may be because insertion sort is quite fast when the data is already somewhat sorted, like it is here
1304 |
1305 | FUNC(monobound_sort)(array, buffer2, cmp);
1306 |
1307 | for (pull_index = 0; pull_index < 2; pull_index++)
1308 | {
1309 | size_t amount, unique = pull[pull_index].count * 2;
1310 | if (pull[pull_index].from > pull[pull_index].to)
1311 | {
1312 | // the values were pulled out to the left, so redistribute them back to the right
1313 | Range buffer = new_range(pull[pull_index].range.start, pull[pull_index].range.start + pull[pull_index].count);
1314 | while (range_length(buffer) > 0)
1315 | {
1316 | index = FUNC(FindFirstForward)(array, array[buffer.start], new_range(buffer.end, pull[pull_index].range.end), cmp, unique);
1317 | amount = index - buffer.end;
1318 | FUNC(Rotate)(array, range_length(buffer), new_range(buffer.start, index));
1319 | buffer.start += (amount + 1);
1320 | buffer.end += amount;
1321 | unique -= 2;
1322 | }
1323 | }
1324 | else if (pull[pull_index].from < pull[pull_index].to)
1325 | {
1326 | // the values were pulled out to the right, so redistribute them back to the left
1327 | Range buffer = new_range(pull[pull_index].range.end - pull[pull_index].count, pull[pull_index].range.end);
1328 | while (range_length(buffer) > 0)
1329 | {
1330 | index = FUNC(FindLastBackward)(array, array[buffer.end - 1], new_range(pull[pull_index].range.start, buffer.start), cmp, unique);
1331 | amount = buffer.start - index;
1332 | FUNC(Rotate)(array, amount, new_range(index, buffer.end));
1333 | buffer.start -= amount;
1334 | buffer.end -= (amount + 1);
1335 | unique -= 2;
1336 | }
1337 | }
1338 | }
1339 | }
1340 |
1341 | // double the size of each A and B subarray that will be merged in the next level
1342 |
1343 | if (!WikiIterator_nextLevel(&iterator))
1344 | {
1345 | break;
1346 | }
1347 | }
1348 |
1349 | End:
1350 |
1351 | #if DYNAMIC_CACHE
1352 |
1353 | if (cache != external_cache)
1354 | {
1355 | free(cache);
1356 | }
1357 |
1358 | #endif
1359 |
1360 | return;
1361 | }
1362 |
--------------------------------------------------------------------------------
/octosort.h:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright (C) 2014-2021 Igor van den Hoven ivdhoven@gmail.com
3 | */
4 |
5 | /*
6 | Permission is hereby granted, free of charge, to any person obtaining
7 | a copy of this software and associated documentation files (the
8 | "Software"), to deal in the Software without restriction, including
9 | without limitation the rights to use, copy, modify, merge, publish,
10 | distribute, sublicense, and/or sell copies of the Software, and to
11 | permit persons to whom the Software is furnished to do so, subject to
12 | the following conditions:
13 |
14 | The above copyright notice and this permission notice shall be
15 | included in all copies or substantial portions of the Software.
16 |
17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
20 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
21 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
22 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
23 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 | */
25 |
26 | /*
27 | octosort 1.0
28 | */
29 |
30 | /*
31 | octosort is based on WikiSort and quadsort
32 |
33 | WikiSort: https://github.com/BonzaiThePenguin/WikiSort
34 | quadsort: https://github.com/scandum/quadsort
35 | searches: https://github.com/scandum/binary_search
36 | */
37 |
38 | #ifndef OCTOSORT_H
39 | #define OCTOSORT_H
40 |
41 | #include
42 | #include
43 | #include
44 | #include
45 |
46 | //#define cmp(a,b) (*(a) > *(b))
47 |
48 | typedef int CMPFUNC (const void *a, const void *b);
49 |
50 | // Set to 1 to see how it performs when given more memory
51 |
52 | #define DYNAMIC_CACHE 0
53 |
54 | // utilities
55 |
56 | #define SWAP(value1, value2) {swap = value1;value1 = value2;value2 = swap;}
57 |
58 | #define PULL(_to) \
59 | pull[pull_index].range = new_range(A.start, B.end); \
60 | pull[pull_index].count = count; \
61 | pull[pull_index].from = index; \
62 | pull[pull_index].to = _to
63 |
64 | // not as fast as math.h's sqrt() but it's portable
65 |
66 | size_t monobound_sqrt(const size_t size)
67 | {
68 | size_t bot, mid, top, sum;
69 |
70 | bot = 0;
71 | top = 65536;
72 |
73 | while (top > 1)
74 | {
75 | mid = top / 2;
76 | sum = bot + mid;
77 |
78 | if (sum * sum <= size)
79 | {
80 | bot += mid;
81 | }
82 | top -= mid;
83 | }
84 | return bot;
85 | }
86 |
87 | size_t Min(const size_t a, const size_t b)
88 | {
89 | return a < b ? a : b;
90 | }
91 |
92 | size_t Max(const size_t a, const size_t b)
93 | {
94 | return a > b ? a : b;
95 | }
96 |
97 | // 63 -> 32, 64 -> 64, etc. this comes from Hacker's Delight
98 |
99 | size_t FloorPowerOfTwo (const size_t value)
100 | {
101 | size_t x = value;
102 | x = x | (x >> 1);
103 | x = x | (x >> 2);
104 | x = x | (x >> 4);
105 | x = x | (x >> 8);
106 | x = x | (x >> 16);
107 | #if __LP64__
108 | x = x | (x >> 32);
109 | #endif
110 | return x - (x >> 1);
111 | }
112 |
113 | // structure to represent ranges within the array
114 |
115 | typedef struct
116 | {
117 | size_t start;
118 | size_t end;
119 | }
120 | Range;
121 |
122 | size_t range_length(Range range)
123 | {
124 | return range.end - range.start;
125 | }
126 |
127 | Range new_range(const size_t start, const size_t end)
128 | {
129 | return (Range) {start, end};
130 | }
131 |
132 |
133 | // calculate how to scale the index value to the range within the array
134 | // the bottom-up merge sort only operates on values that are powers of two,
135 | // so scale down to that power of two, then use a fraction to scale back again
136 |
137 | typedef struct
138 | {
139 | size_t size;
140 | size_t power_of_two;
141 | size_t numerator;
142 | size_t decimal;
143 | size_t denominator;
144 | size_t decimal_step;
145 | size_t numerator_step;
146 | }
147 | WikiIterator;
148 |
149 | void WikiIterator_begin(WikiIterator *me)
150 | {
151 | me->numerator = me->decimal = 0;
152 | }
153 |
154 | Range WikiIterator_nextRange(WikiIterator *me)
155 | {
156 | size_t start = me->decimal;
157 |
158 | me->decimal += me->decimal_step;
159 | me->numerator += me->numerator_step;
160 |
161 | if (me->numerator >= me->denominator)
162 | {
163 | me->numerator -= me->denominator;
164 | me->decimal++;
165 | }
166 |
167 | return new_range(start, me->decimal);
168 | }
169 |
170 | size_t WikiIterator_finished(WikiIterator *me)
171 | {
172 | return (me->decimal >= me->size);
173 | }
174 |
175 | size_t WikiIterator_nextLevel(WikiIterator *me)
176 | {
177 | me->decimal_step += me->decimal_step;
178 | me->numerator_step += me->numerator_step;
179 |
180 | if (me->numerator_step >= me->denominator)
181 | {
182 | me->numerator_step -= me->denominator;
183 | me->decimal_step++;
184 | }
185 |
186 | return (me->decimal_step < me->size);
187 | }
188 |
189 | size_t WikiIterator_length(WikiIterator *me)
190 | {
191 | return me->decimal_step;
192 | }
193 |
194 | WikiIterator WikiIterator_new(size_t size2, size_t min_level)
195 | {
196 | WikiIterator me;
197 |
198 | me.size = size2;
199 | me.power_of_two = FloorPowerOfTwo(me.size);
200 | me.denominator = me.power_of_two/min_level;
201 | me.numerator_step = me.size % me.denominator;
202 | me.decimal_step = me.size/me.denominator;
203 |
204 | WikiIterator_begin(&me);
205 |
206 | return me;
207 | }
208 |
209 | //////////////////////////////////////////////////////////
210 | //┌────────────────────────────────────────────────────┐//
211 | //│ █████┐ ██████┐ ██████┐████████┐ │//
212 | //│ ██┌──██┐ ██┌──██┐└─██┌─┘└──██┌──┘ │//
213 | //│ └█████┌┘ ██████┌┘ ██│ ██│ │//
214 | //│ ██┌──██┐ ██┌──██┐ ██│ ██│ │//
215 | //│ └█████┌┘ ██████┌┘██████┐ ██│ │//
216 | //│ └────┘ └─────┘ └─────┘ └─┘ │//
217 | //└────────────────────────────────────────────────────┘//
218 | //////////////////////////////////////////////////////////
219 |
220 | #undef VAR
221 | #undef FUNC
222 | #undef STRUCT
223 |
224 | #define VAR char
225 | #define FUNC(NAME) NAME##8
226 | #define STRUCT(NAME) struct NAME##8
227 |
228 | #include "octosort.c"
229 |
230 | //////////////////////////////////////////////////////////
231 | //┌────────────────────────────────────────────────────┐//
232 | //│ ▄██┐ █████┐ ██████┐ ██████┐████████┐│//
233 | //│ ████│ ██┌───┘ ██┌──██┐└─██┌─┘└──██┌──┘│//
234 | //│ └─██│ ██████┐ ██████┌┘ ██│ ██│ │//
235 | //│ ██│ ██┌──██┐ ██┌──██┐ ██│ ██│ │//
236 | //│ ██████┐└█████┌┘ ██████┌┘██████┐ ██│ │//
237 | //│ └─────┘ └────┘ └─────┘ └─────┘ └─┘ │//
238 | //└────────────────────────────────────────────────────┘//
239 | //////////////////////////////////////////////////////////
240 |
241 | #undef VAR
242 | #undef FUNC
243 | #undef STRUCT
244 |
245 | #define VAR short
246 | #define FUNC(NAME) NAME##16
247 | #define STRUCT(NAME) struct NAME##16
248 |
249 | #include "octosort.c"
250 |
251 | //////////////////////////////////////////////////////////
252 | // ┌───────────────────────────────────────────────────┐//
253 | // │ ██████┐ ██████┐ ██████┐ ██████┐████████┐ │//
254 | // │ └────██┐└────██┐ ██┌──██┐└─██┌─┘└──██┌──┘ │//
255 | // │ █████┌┘ █████┌┘ ██████┌┘ ██│ ██│ │//
256 | // │ └───██┐██┌───┘ ██┌──██┐ ██│ ██│ │//
257 | // │ ██████┌┘███████┐ ██████┌┘██████┐ ██│ │//
258 | // │ └─────┘ └──────┘ └─────┘ └─────┘ └─┘ │//
259 | // └───────────────────────────────────────────────────┘//
260 | //////////////////////////////////////////////////////////
261 |
262 | #undef VAR
263 | #undef FUNC
264 | #undef STRUCT
265 |
266 | #define VAR int
267 | #define FUNC(NAME) NAME##32
268 | #define STRUCT(NAME) struct NAME##32
269 |
270 | #include "octosort.c"
271 |
272 | //////////////////////////////////////////////////////////
273 | // ┌───────────────────────────────────────────────────┐//
274 | // │ █████┐ ██┐ ██┐ ██████┐ ██████┐████████┐ │//
275 | // │ ██┌───┘ ██│ ██│ ██┌──██┐└─██┌─┘└──██┌──┘ │//
276 | // │ ██████┐ ███████│ ██████┌┘ ██│ ██│ │//
277 | // │ ██┌──██┐└────██│ ██┌──██┐ ██│ ██│ │//
278 | // │ └█████┌┘ ██│ ██████┌┘██████┐ ██│ │//
279 | // │ └────┘ └─┘ └─────┘ └─────┘ └─┘ │//
280 | // └───────────────────────────────────────────────────┘//
281 | //////////////////////////////////////////////////////////
282 |
283 | #undef VAR
284 | #undef FUNC
285 | #undef STRUCT
286 |
287 | #define VAR long long
288 | #define FUNC(NAME) NAME##64
289 | #define STRUCT(NAME) struct NAME##64
290 |
291 | #include "octosort.c"
292 |
293 | //////////////////////////////////////////////////////////
294 | //┌────────────────────────────────────────────────────┐//
295 | //│ ▄██┐ ██████┐ █████┐ ██████┐ ██████┐████████┐ │//
296 | //│ ████│ └────██┐██┌──██┐ ██┌──██┐└─██┌─┘└──██┌──┘ │//
297 | //│ └─██│ █████┌┘└█████┌┘ ██████┌┘ ██│ ██│ │//
298 | //│ ██│ ██┌───┘ ██┌──██┐ ██┌──██┐ ██│ ██│ │//
299 | //│ ██████┐███████┐└█████┌┘ ██████┌┘██████┐ ██│ │//
300 | //│ └─────┘└──────┘ └────┘ └─────┘ └─────┘ └─┘ │//
301 | //└────────────────────────────────────────────────────┘//
302 | //////////////////////////////////////////////////////////
303 |
304 | #undef VAR
305 | #undef FUNC
306 | #undef STRUCT
307 |
308 | #define VAR long double
309 | #define FUNC(NAME) NAME##128
310 | #define STRUCT(NAME) struct NAME##128
311 |
312 | #include "octosort.c"
313 |
314 |
315 | ////////////////////////////////////////////////////////////////////////////////
316 | //┌──────────────────────────────────────────────────────────────────────────┐//
317 | //│ ██████┐ ██████┐████████┐ ██████┐ ███████┐ ██████┐ ██████┐ ████████┐ │//
318 | //│ ██┌───██┐██┌────┘└──██┌──┘██┌───██┐██┌────┘██┌───██┐██┌──██┐└──██┌──┘ │//
319 | //│ ██│ ██│██│ ██│ ██│ ██│███████┐██│ ██│██████┌┘ ██│ │//
320 | //│ ██│ ██│██│ ██│ ██│ ██│└────██│██│ ██│██┌──██┐ ██│ │//
321 | //│ └██████┌┘└██████┐ ██│ └██████┌┘███████│└██████┌┘██│ ██│ ██│ │//
322 | //│ └─────┘ └─────┘ └─┘ └─────┘ └──────┘ └─────┘ └─┘ └─┘ └─┘ │//
323 | //└──────────────────────────────────────────────────────────────────────────┘//
324 | ////////////////////////////////////////////////////////////////////////////////
325 |
326 | void octosort(void *array, size_t nmemb, size_t size, CMPFUNC *cmp)
327 | {
328 | if (nmemb < 2)
329 | {
330 | return;
331 | }
332 |
333 | switch (size)
334 | {
335 | case sizeof(char):
336 | return octosort8(array, nmemb, NULL, 0, cmp);
337 |
338 | case sizeof(short):
339 | return octosort16(array, nmemb, NULL, 0, cmp);
340 |
341 | case sizeof(int):
342 | return octosort32(array, nmemb, NULL, 0, cmp);
343 |
344 | case sizeof(long long):
345 | return octosort64(array, nmemb, NULL, 0, cmp);
346 |
347 | case sizeof(long double):
348 | return octosort128(array, nmemb, NULL, 0, cmp);
349 |
350 | default:
351 | return assert(size == sizeof(char) || size == sizeof(short) || size == sizeof(int) || size == sizeof(long long) || size == sizeof(long double));
352 | }
353 | }
354 |
355 | #undef DYNAMIC_CACHE
356 | #undef PULL
357 | #undef SWAP
358 |
359 | #undef VAR
360 | #undef FUNC
361 | #undef STRUCT
362 |
363 | #endif
364 |
--------------------------------------------------------------------------------