├── .gitignore
├── Makefile
├── README.md
├── UNLICENSE
├── genetic.c
├── hillclimb.c
├── hp16.c
├── prospector.c
└── tests
    ├── degski64.c
    ├── h2hash32.c
    ├── hash32shift.c
    ├── murmurhash3_finalizer32.c
    └── splitmix64.c


/.gitignore:
--------------------------------------------------------------------------------
1 | prospector
2 | *.so
3 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | CC      = cc
 2 | CFLAGS  = -std=c99 -Wall -Wextra -march=native -O3 -ggdb3 -fopenmp
 3 | LDFLAGS =
 4 | LDLIBS  = -lm -ldl
 5 | 
 6 | compile: prospector genetic hillclimb hp16
 7 | 
 8 | prospector: prospector.c
 9 | 	$(CC) $(LDFLAGS) $(CFLAGS) -o $@ prospector.c $(LDLIBS)
10 | 
11 | genetic: genetic.c
12 | 	$(CC) $(LDFLAGS) $(CFLAGS) -o $@ genetic.c $(LDLIBS)
13 | 
14 | hillclimb: hillclimb.c
15 | 	$(CC) $(LDFLAGS) $(CFLAGS) -o $@ hillclimb.c $(LDLIBS)
16 | 
17 | hp16: hp16.c
18 | 	$(CC) $(LDFLAGS) $(CFLAGS) -o $@ hp16.c $(LDLIBS)
19 | 
20 | tests/degski64.so: tests/degski64.c
21 | tests/h2hash32.so: tests/h2hash32.c
22 | tests/hash32shift.so: tests/hash32shift.c
23 | tests/splitmix64.so: tests/splitmix64.c
24 | 
25 | hashes = \
26 |     tests/degski64.so \
27 |     tests/h2hash32.so \
28 |     tests/hash32shift.so \
29 |     tests/murmurhash3_finalizer32.so \
30 |     tests/splitmix64.so
31 | 
32 | check: prospector $(hashes)
33 | 	./prospector -E -8 -l tests/degski64.so
34 | 	./prospector -E -4 -l tests/h2hash32.so
35 | 	./prospector -E -4 -l tests/hash32shift.so
36 | 	./prospector -E -4 -l tests/murmurhash3_finalizer32.so
37 | 	./prospector -E -8 -l tests/splitmix64.so
38 | 
39 | clean:
40 | 	rm -f prospector genetic hillclimb hp16 $(hashes)
41 | 
42 | .SUFFIXES: .so .c
43 | .c.so:
44 | 	$(CC) -shared $(LDFLAGS) -fPIC $(CFLAGS) -o $@ $<
45 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Hash Function Prospector
  2 | 
  3 | This is a little tool for automated [integer hash function][wang]
  4 | discovery. It generates billions of [integer hash functions][jenkins] at
  5 | random from a selection of [nine reversible operations][rev] ([also][]).
  6 | The generated functions are JIT compiled and their avalanche behavior is
  7 | evaluated. The current best function is printed out in C syntax.
  8 | 
  9 | The *avalanche score* is the number of output bits that remain "fixed"
 10 | on average when a single input bit is flipped. Lower scores are better.
 11 | Ideally the score is 0 — e.g. every output bit flips with a 50% chance
 12 | when a single input bit is flipped.
 13 | 
 14 | Prospector can generate both 32-bit and 64-bit integer hash functions.
 15 | Check the usage (`-h`) for the full selection of options. Due to the JIT
 16 | compiler, only x86-64 is supported, though the functions it discovers
 17 | can, of course, be used anywhere.
 18 | 
 19 | Article: [Prospecting for Hash Functions][article]
 20 | 
 21 | ## Discovered Hash Functions
 22 | 
 23 | There are two useful classes of hash functions discovered by the
 24 | prospector and the other helper utilities here. Both use an
 25 | *xorshift-multiply-xorshift* construction, but with a different number
 26 | of rounds.
 27 | 
 28 | ### Two round functions
 29 | 
 30 | **Update**: [TheIronBorn has used combinatorial optimization][best] to
 31 | discover the best known parameters for this construction:
 32 | 
 33 |     [16 21f0aaad 15 d35a2d97 15] = 0.10760229515479501
 34 | 
 35 | * * *
 36 | 
 37 | This 32-bit, two-round permutation has a particularly low bias and even
 38 | beats the venerable MurmurHash3 32-bit finalizer by a tiny margin. The
 39 | hash function construction was discovered by the prospector, then the
 40 | parameters were tuned using hill climbing and a genetic algorithm.
 41 | 
 42 | ```c
 43 | // exact bias: 0.17353355999581582
 44 | uint32_t
 45 | lowbias32(uint32_t x)
 46 | {
 47 |     x ^= x >> 16;
 48 |     x *= 0x7feb352d;
 49 |     x ^= x >> 15;
 50 |     x *= 0x846ca68b;
 51 |     x ^= x >> 16;
 52 |     return x;
 53 | }
 54 | 
 55 | // inverse
 56 | uint32_t
 57 | lowbias32_r(uint32_t x)
 58 | {
 59 |     x ^= x >> 16;
 60 |     x *= 0x43021123;
 61 |     x ^= x >> 15 ^ x >> 30;
 62 |     x *= 0x1d69e2a5;
 63 |     x ^= x >> 16;
 64 |     return x;
 65 | }
 66 | ```
 67 | 
 68 | More 2-round constants with low bias, some even better than `lowbias32`:
 69 | 
 70 |     [15 d168aaad 15 af723597 15] = 0.15983776156606694
 71 |     [17 9e485565 16 ef1d6b47 16] = 0.16143129787074881
 72 |     [16 604baa5d 15 43d6ce97 15] = 0.16491052655811722
 73 |     [16 a812d533 15 b278e4ad 17] = 0.16540778981744320
 74 |     [16 9c8f2d35 15 5d1346b5 17] = 0.16835348823718840
 75 |     [16 88c0a94b 14 9d06da59 17] = 0.16898511658356749
 76 |     [16 a52fb2cd 15 551e4d49 16] = 0.17162579707098322
 77 |     [16 b237694b 15 eb5b4593 15] = 0.17274184020173433
 78 |     [16 7feb352d 15 846ca68b 16] = 0.17353355999581582
 79 |     [16 4bdc9aa5 15 2729b469 16] = 0.17355424787865850
 80 |     [16 dc63b4d3 15 2c32b9a9 15] = 0.17368589564800074
 81 |     [16 e02bd533 15 0364c8ad 17] = 0.17447893149410759
 82 |     [16 603a32a7 15 5a522677 15] = 0.17514135907753242
 83 |     [16 ac10d4eb 15 9d51b169 16] = 0.17676510450127819
 84 |     [15 f15f5959 14 7db29359 16] = 0.18103205436627479
 85 |     [16 83747333 14 aa256573 16] = 0.18105722344231542
 86 |     [16 be8b6ca7 14 6dd624b5 16] = 0.18223928664971270
 87 |     [17 7186cd35 15 fe6bba73 15] = 0.18312741727971640
 88 |     [16 93f2552b 15 959b4a4d 15] = 0.18360629205797341
 89 |     [16 df892d4b 15 3c2da6b3 16] = 0.18368195486921446
 90 |     [15 49c34cd3 13 e7418ca7 16] = 0.18400092964673831
 91 |     [15 4811acab 15 5591acd7 16] = 0.18522661033580071
 92 |     [16 dc85aaa7 15 6658a5cb 15] = 0.18577280285788791
 93 |     [16 1ec9b4db 15 3224d38d 17] = 0.18631684392389897
 94 |     [16 8ee0d535 15 5dc6b5af 15] = 0.18664478683752250
 95 |     [16 462daaad 15 0a36c95d 16] = 0.18674876992866513
 96 |     [16 17cdd657 15 a426cb25 15] = 0.18995262675473334
 97 |     [16 ab39aacb 15 a1b5d19b 15] = 0.19045785238099658
 98 |     [17 cd8512ad 15 b95c5a73 15] = 0.19050717016846502
 99 |     [16 aecc96b5 15 f64dcd47 15] = 0.19077817816874504
100 |     [15 2548acd5 15 0b39d397 16] = 0.19121161052714156
101 |     [15 7f19c559 15 b356358d 16] = 0.19198007174447981
102 |     [16 4ffcab35 15 e98db28b 16] = 0.19423994132339928
103 |     [15 1216ccb5 15 3abcdca9 15] = 0.19426091938816648
104 |     [16 97219aad 15 ab46b735 15] = 0.19536391240344408
105 |     [16 c845a997 15 f214db9b 17] = 0.19553179377831409
106 |     [15 3a7ba96b 13 5e919299 16] = 0.19563436462680908
107 |     [16 c3d9a965 16 362e4b47 15] = 0.19575424692659107
108 |     [17 179cd515 15 4c495d47 15] = 0.19608530402798924
109 |     [16 5dce3553 15 a655d8e9 15] = 0.19621753012889542
110 |     [17 88a5ad35 16 96338b27 16] = 0.19653922266398804
111 |     [17 0364d657 15 ac2a34c5 15] = 0.19665754791333651
112 |     [16 3c9aa9ab 16 051369d7 16] = 0.19687211117412906
113 |     [17 0ee6d967 15 9c8a4a33 16] = 0.19722490309575344
114 |     [16 b921a6cb 14 30b5a6d1 16] = 0.19745192295417058
115 |     [18 a136aaad 16 9f6d62d7 17] = 0.19768193144773874
116 |     [16 0ae84d3b 15 3b9d4e5b 17] = 0.19776257374279985
117 |     [17 24f4d2cd 15 1ba3b969 16] = 0.19789489706453650
118 |     [16 418fb5b3 15 8cf3539b 16] = 0.19817117175199098
119 |     [16 f0ae2ad7 15 8965d939 16] = 0.19881758420284917
120 |     [17 9bde596b 16 1c9e9647 16] = 0.19882570872036193
121 |     [16 bd10754b 14 35a29b0d 16] = 0.19885203058591913
122 |     [17 78d31553 15 c547ac65 15] = 0.19918133404528665
123 |     [15 81aab34d 15 18e746a3 15] = 0.19938572052445763
124 |     [16 054335ab 15 146da68b 16] = 0.19943843016872725
125 |     [17 a1c76a55 16 5ca46b97 16] = 0.19959562213253398
126 |     [15 c62f4d53 14 62b8a46b 16] = 0.19973996656987172
127 |     [16 6872cd2d 15 f4a0d975 17] = 0.19992260539370590
128 | 
129 | This next function was discovered using only the prospector. It has a bit more
130 | bias than the previous function.
131 | 
132 | ```c
133 | // exact bias: 0.34968228323361017
134 | uint32_t
135 | prospector32(uint32_t x)
136 | {
137 |     x ^= x >> 15;
138 |     x *= 0x2c1b3c6d;
139 |     x ^= x >> 12;
140 |     x *= 0x297a2d39;
141 |     x ^= x >> 15;
142 |     return x;
143 | }
144 | ```
145 | 
146 | To use the prospector search randomly for alternative multiplication constants,
147 | run it like so:
148 | 
149 |     $ ./prospector -p xorr:15,mul,xorr:12,mul,xorr:15
150 | 
151 | ### Three round functions
152 | 
153 | Another round of multiply-xorshift in this construction allows functions
154 | with carefully chosen parameters to reach the theoretical bias limit
155 | (bias = ~0.021). For example, this hash function is indistinguishable
156 | from a perfect PRF (e.g. a random permutation of all 32-bit integers):
157 | 
158 | ```c
159 | // exact bias: 0.020888578919738908
160 | uint32_t
161 | triple32(uint32_t x)
162 | {
163 |     x ^= x >> 17;
164 |     x *= 0xed5ad4bb;
165 |     x ^= x >> 11;
166 |     x *= 0xac4c1b51;
167 |     x ^= x >> 15;
168 |     x *= 0x31848bab;
169 |     x ^= x >> 14;
170 |     return x;
171 | }
172 | 
173 | // inverse
174 | uint32_t
175 | triple32_r(uint32_t x)
176 | {
177 |     x ^= x >> 14 ^ x >> 28;
178 |     x *= 0x32b21703;
179 |     x ^= x >> 15 ^ x >> 30;
180 |     x *= 0x469e0db1;
181 |     x ^= x >> 11 ^ x >> 22;
182 |     x *= 0x79a85073;
183 |     x ^= x >> 17;
184 |     return x;
185 | }
186 | ```
187 | 
188 | More 3-round constants with low bias:
189 | 
190 |     [17 ed5ad4bb 11 ac4c1b51 15 31848bab 14] = 0.020888578919738908
191 |     [16 aeccedab 14 ac613e37 16 19c89935 17] = 0.021246568167078764
192 |     [16 236f7153 12 33cd8663 15 3e06b66b 16] = 0.021280991798512679
193 |     [18 4260bb47 13 27e8e1ed 15 9d48a33b 15] = 0.021576730651802156
194 |     [17 3f6cde45 12 51d608ef 16 6e93639d 17] = 0.021772288363808408
195 |     [15 5dfa224b 14 4bee7e4b 17 930ee371 15] = 0.02184521628884813
196 |     [17 3964f363 14 9ac3751d 16 4e8772cb 17] = 0.021883292578109576
197 |     [16 66046c65 14 d3f0865b 16 f9999193 16] = 0.0219446068365007
198 |     [16 b1a89b33 14 09136aaf 16 5f2a44a7 15] = 0.021998624107282542
199 |     [16 24767aad 12 daa18229 16 e9e53beb 16] = 0.022043911220395354
200 |     [15 42f91d8d 14 61355a85 15 dcf2a949 14] = 0.022052539152635078
201 |     [15 4df8395b 15 466b428b 16 b4b2868b 16] = 0.022140187420461286
202 |     [16 2bbed51b 14 cd09896b 16 38d4c587 15] = 0.022159936298777144
203 |     [16 0ab694cd 14 4c139e47 16 11a42c3b 16] = 0.02220928191220355
204 |     [17 7f1e072b 12 8750a507 16 ecbb5b5f 16] = 0.022283743052847804
205 |     [16 f1be7bad 14 73a54099 15 3b85b963 15] = 0.022316544125749647
206 |     [16 66e756d5 14 b5f5a9cd 16 84e56b11 16] = 0.022372957847491555
207 |     [15 233354bb 15 ce1247bd 16 855089bb 17] = 0.022406591070966285
208 |     [16 eb6805ab 15 d2c7b7a7 16 7645a32b 16] = 0.022427060650927547
209 |     [16 8288ab57 14 0d1bfe57 16 131631e5 16] = 0.022431656871313443
210 |     [16 45109e55 14 3b94759d 16 adf31ea5 17] = 0.022436433678417977
211 |     [15 26cd1933 14 e3da1d59 16 5a17445d 16] = 0.022460520416491526
212 |     [16 7001e6eb 14 bb8e7313 16 3aa8c523 15] = 0.022491767264054854
213 |     [16 49ed0a13 14 83588f29 15 658f258d 15] = 0.022500668856510898
214 |     [16 6cdb9705 14 4d58d2ed 14 c8642b37 16] = 0.022504626537729222
215 |     [16 a986846b 14 bdd5372d 15 ad44de6b 17] = 0.022528238323120016
216 |     [16 c9575725 15 9448f4c5 16 3b7a5443 16] = 0.022586511310042686
217 |     [15 fc54c453 13 08213789 15 669f96eb 16] = 0.022591114646032095
218 |     [16 d47ef17b 14 642fa58f 16 a8b65b9b 16] = 0.022600633971701509
219 |     [15 00bfaa73 14 8799c69b 16 731985b1 16] = 0.022645866629596379
220 |     [16 953a55e9 15 8523822b 17 56e7aa63 15] = 0.022667180032713324
221 |     [16 a3d7345b 15 7f41c9c7 16 308bd62d 17] = 0.022688845770122031
222 |     [16 195565c7 14 16064d6f 16 0f9ec575 15] = 0.022697810688752193
223 |     [16 13566dbb 14 59369a03 15 990f9d1b 16] = 0.022712430070797596
224 |     [16 8430cc4b 15 a7831cbd 15 c6ccbd33 15] = 0.022734765033419774
225 |     [16 699f272b 14 09c01023 16 39bd48c3 15] = 0.022854175321846512
226 |     [15 336536c3 13 4f0e38b1 16 15d229f7 16] = 0.022884125170795171
227 |     [16 221f686d 12 d8948a07 16 ed8a8345 16] = 0.022902500408830236
228 |     [16 d7ca8cbb 13 eb4e259f 15 34ab1143 16] = 0.022905955538176669
229 |     [16 7cb04f65 14 9b96da73 16 83625687 15] = 0.022906573700088178
230 |     [15 5156196b 14 940d8869 15 0086f473 17] = 0.022984943828687553
231 | 
232 | Prepending an increment to `triple32` breaks the `hash(0) = 0` issue while
233 | also lowering the bias a tiny bit further:
234 | 
235 | ```c
236 | // exact bias: 0.020829410544597495
237 | uint32_t
238 | triple32inc(uint32_t x)
239 | {
240 |     x++;
241 |     x ^= x >> 17;
242 |     x *= 0xed5ad4bb;
243 |     x ^= x >> 11;
244 |     x *= 0xac4c1b51;
245 |     x ^= x >> 15;
246 |     x *= 0x31848bab;
247 |     x ^= x >> 14;
248 |     return x;
249 | }
250 | 
251 | // inverse
252 | uint32_t
253 | triple32inc_r(uint32_t x)
254 | {
255 |     x ^= x >> 14 ^ x >> 28;
256 |     x *= 0x32b21703;
257 |     x ^= x >> 15 ^ x >> 30;
258 |     x *= 0x469e0db1;
259 |     x ^= x >> 11 ^ x >> 22;
260 |     x *= 0x79a85073;
261 |     x ^= x >> 17;
262 |     x--;
263 |     return x;
264 | }
265 | ```
266 | 
267 | ## Measuring exact bias
268 | 
269 | The `-E` mode evaluates the bias of a given hash function (`-p` or `-l`). By
270 | default the prospector uses an estimate to quickly evaluate a function's bias,
271 | but it's non-deterministic and there's a lot of noise in the result. To
272 | exhaustively measure the exact bias, use the `-e` option.
273 | 
274 | The function to be checked can be defined using `-p` and a pattern or
275 | `-l` and a shared library containing a function named `hash()`. For
276 | example, to measure the exact bias of the best hash function above:
277 | 
278 |     $ ./prospector -Eep xorr:16,mul:e2d0d4cb,xorr:15,mul:3c6ad939,xorr:15
279 | 
280 | Or drop the function in a C file named hash.c, and name the function
281 | `hash()`. This lets you test hash functions that can't be represented
282 | using the prospector's limited notion of hash functions.
283 | 
284 |     $ cc -O3 -shared -fPIC -l hash.so hash.c
285 |     $ ./prospector -Eel ./hash.so
286 | 
287 | By default it treats its input as a 32-bit hash function. Use the `-8`
288 | switch to test (by estimation) 64-bit functions. There is no exact,
289 | exhaustive test for 64-bit hash functions since that would take far too
290 | long.
291 | 
292 | ## Reversible operation selection
293 | 
294 | ```c
295 | x  = ~x;
296 | x ^= constant;
297 | x *= constant | 1; // e.g. only odd constants
298 | x += constant;
299 | x ^= x >> constant;
300 | x ^= x << constant;
301 | x += x << constant;
302 | x -= x << constant;
303 | x <<<= constant; // left rotation
304 | x = bswap(x) // swap high and low bytes.
305 | ```
306 | 
307 | Technically `x = ~x` is covered by `x ^= constant`. However, `~x` is
308 | uniquely special and particularly useful. The generator is very unlikely
309 | to generate the one correct constant for the XOR operator that achieves
310 | the same effect.
311 | 
312 | ## 16-bit hashes
313 | 
314 | Because the constraints are different for 16-bit hashes there's a separate
315 | tool for generating these hashes: `hp16`. Unlike the 32-bit / 64-bit
316 | prospector, this implementation is fully portable and will run on just
317 | about any system. It's also capable of generating and evaluating 128KiB
318 | s-boxes.
319 | 
320 | Since 16-bit hashes are more likely to be needed on machines that, say,
321 | lack fast multiplication instructions, certain operations can be omitted
322 | during exploration (`-m`, `-r`).
323 | 
324 | Some interesting results so far:
325 | 
326 | ```c
327 | // 2-round xorshift-multiply (-Xn2)
328 | // bias = 0.0085905051336723701
329 | uint16_t hash16_xm2(uint16_t x)
330 | {
331 |     x ^= x >> 8; x *= 0x88b5U;
332 |     x ^= x >> 7; x *= 0xdb2dU;
333 |     x ^= x >> 9;
334 |     return x;
335 | }
336 | 
337 | // 3-round xorshift-multiply (-Xn3)
338 | // bias = 0.0045976709018820602
339 | uint16_t hash16_xm3(uint16_t x)
340 | {
341 |     x ^= x >>  7; x *= 0x2993U;
342 |     x ^= x >>  5; x *= 0xe877U;
343 |     x ^= x >>  9; x *= 0x0235U;
344 |     x ^= x >> 10;
345 |     return x;
346 | }
347 | 
348 | // No multiplication (-Imn6)
349 | // bias = 0.023840118344741465
350 | uint16_t hash16_s6(uint16_t x)
351 | {
352 |     x += x << 7; x ^= x >> 8;
353 |     x += x << 3; x ^= x >> 2;
354 |     x += x << 4; x ^= x >> 8;
355 |     return x;
356 | }
357 | 
358 | // Which is identical to this xorshift-multiply
359 | uint16_t hash16_s6(uint16_t x)
360 | {
361 |     x *= 0x0081U; x ^= x >> 8;
362 |     x *= 0x0009U; x ^= x >> 2;
363 |     x *= 0x0011U; x ^= x >> 8;
364 |     return x;
365 | }
366 | ```
367 | 
368 | A good 3-round xorshift hash (a short search via `hp16 -Xn3`) is a close
369 | approximation of a good s-box (i.e. `hp16 -S`).
370 | 
371 | Be mindful of C integer promotion rules when doing 16-bit operations. For
372 | instance, on 32-bit implementations unsigned 16-bit operands will be
373 | promoted to signed 32-bit integers, leading to incorrect results in
374 | certain cases. The C programs printed by this program are careful to
375 | promote 16-bit operations to "unsigned int" where needed.
376 | 
377 | 
378 | [also]: https://marc-b-reynolds.github.io/math/2017/10/13/IntegerBijections.html
379 | [article]: https://nullprogram.com/blog/2018/07/31/
380 | [best]: https://github.com/skeeto/hash-prospector/issues/19
381 | [jenkins]: http://burtleburtle.net/bob/hash/integer.html
382 | [rev]: http://papa.bretmulvey.com/post/124027987928/hash-functions
383 | [wang]: https://gist.github.com/badboy/6267743
384 | 


--------------------------------------------------------------------------------
/UNLICENSE:
--------------------------------------------------------------------------------
 1 | This is free and unencumbered software released into the public domain.
 2 | 
 3 | Anyone is free to copy, modify, publish, use, compile, sell, or
 4 | distribute this software, either in source code form or as a compiled
 5 | binary, for any purpose, commercial or non-commercial, and by any
 6 | means.
 7 | 
 8 | In jurisdictions that recognize copyright laws, the author or authors
 9 | of this software dedicate any and all copyright interest in the
10 | software to the public domain. We make this dedication for the benefit
11 | of the public at large and to the detriment of our heirs and
12 | successors. We intend this dedication to be an overt act of
13 | relinquishment in perpetuity of all present and future rights to this
14 | software under copyright law.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 | OTHER DEALINGS IN THE SOFTWARE.
23 | 
24 | For more information, please refer to <http://unlicense.org/>
25 | 


--------------------------------------------------------------------------------
/genetic.c:
--------------------------------------------------------------------------------
  1 | /* Genetic algorithm to explore xorshift-multiply-xorshift hashes.
  2 |  */
  3 | #include <math.h>
  4 | #include <time.h>
  5 | #include <stdio.h>
  6 | #include <stdint.h>
  7 | #include <stdlib.h>
  8 | 
  9 | #define POOL      40
 10 | #define THRESHOLD 2.0  // Use exact when estimate is below this
 11 | #define DONTCARE  0.3  // Only print tuples with bias below this threshold
 12 | #define QUALITY   18   // 2^N iterations of estimate samples
 13 | #define RESETMINS 90   // Reset pool after this many minutes of no progress
 14 | 
 15 | static uint64_t
 16 | rand64(uint64_t s[4])
 17 | {
 18 |     uint64_t x = s[1] * 5;
 19 |     uint64_t r = ((x << 7) | (x >> 57)) * 9;
 20 |     uint64_t t = s[1] << 17;
 21 |     s[2] ^= s[0];
 22 |     s[3] ^= s[1];
 23 |     s[1] ^= s[2];
 24 |     s[0] ^= s[3];
 25 |     s[2] ^= t;
 26 |     s[3] = (s[3] << 45) | (s[3] >> 19);
 27 |     return r;
 28 | }
 29 | 
 30 | 
 31 | #define FLAG_SCORED  (1u << 0)
 32 | #define FLAG_EXACT   (1u << 1)
 33 | #define FLAG_PRINTED (1u << 2)
 34 | 
 35 | struct gene {
 36 |     double score;
 37 |     short s[3];
 38 |     uint32_t c[2];
 39 |     unsigned flags;
 40 | };
 41 | 
 42 | static uint32_t
 43 | hash(const struct gene *g, uint32_t x)
 44 | {
 45 |     x ^= x >> g->s[0];
 46 |     x *= g->c[0];
 47 |     x ^= x >> g->s[1];
 48 |     x *= g->c[1];
 49 |     x ^= x >> g->s[2];
 50 |     return x;
 51 | }
 52 | 
 53 | static double
 54 | estimate_bias32(const struct gene *g, uint64_t rng[4])
 55 | {
 56 |     long n = 1L << QUALITY;
 57 |     long bins[32][32] = {{0}};
 58 |     for (long i = 0; i < n; i++) {
 59 |         uint32_t x = rand64(rng);
 60 |         uint32_t h0 = hash(g, x);
 61 |         for (int j = 0; j < 32; j++) {
 62 |             uint32_t bit = UINT32_C(1) << j;
 63 |             uint32_t h1 = hash(g, x ^ bit);
 64 |             uint32_t set = h0 ^ h1;
 65 |             for (int k = 0; k < 32; k++)
 66 |                 bins[j][k] += (set >> k) & 1;
 67 |         }
 68 |     }
 69 |     double mean = 0;
 70 |     for (int j = 0; j < 32; j++) {
 71 |         for (int k = 0; k < 32; k++) {
 72 |             double diff = (bins[j][k] - n / 2) / (n / 2.0);
 73 |             mean += (diff * diff) / (32 * 32);
 74 |         }
 75 |     }
 76 |     return sqrt(mean) * 1000.0;
 77 | }
 78 | 
 79 | #define EXACT_SPLIT 32  // must be power of two
 80 | static double
 81 | exact_bias32(const struct gene *g)
 82 | {
 83 |     long long bins[32][32] = {{0}};
 84 |     static const uint64_t range = (UINT64_C(1) << 32) / EXACT_SPLIT;
 85 |     #pragma omp parallel for
 86 |     for (int i = 0; i < EXACT_SPLIT; i++) {
 87 |         long long b[32][32] = {{0}};
 88 |         for (uint64_t x = i * range; x < (i + 1) * range; x++) {
 89 |             uint32_t h0 = hash(g, x);
 90 |             for (int j = 0; j < 32; j++) {
 91 |                 uint32_t bit = UINT32_C(1) << j;
 92 |                 uint32_t h1 = hash(g, x ^ bit);
 93 |                 uint32_t set = h0 ^ h1;
 94 |                 for (int k = 0; k < 32; k++)
 95 |                     b[j][k] += (set >> k) & 1;
 96 |             }
 97 |         }
 98 |         #pragma omp critical
 99 |         for (int j = 0; j < 32; j++)
100 |             for (int k = 0; k < 32; k++)
101 |                 bins[j][k] += b[j][k];
102 |     }
103 |     double mean = 0.0;
104 |     for (int j = 0; j < 32; j++) {
105 |         for (int k = 0; k < 32; k++) {
106 |             double diff = (bins[j][k] - 2147483648L) / 2147483648.0;
107 |             mean += (diff * diff) / (32 * 32);
108 |         }
109 |     }
110 |     return sqrt(mean) * 1000.0;
111 | }
112 | 
113 | static void
114 | gene_gen(struct gene *g, uint64_t rng[4])
115 | {
116 |     uint64_t s = rand64(rng);
117 |     uint64_t c = rand64(rng);
118 |     g->s[0] = 10 + (s >>  0) % 10;
119 |     g->s[1] = 10 + (s >> 24) % 10;
120 |     g->s[2] = 10 + (s >> 48) % 10;
121 |     g->c[0] = c | 1u;
122 |     g->c[1] = (c >> 32) | 1u;
123 |     g->flags = 0;
124 | }
125 | 
126 | static void
127 | gene_print(const struct gene *g, FILE *f)
128 | {
129 |     fprintf(f, "[%2d %08lx %2d %08lx %2d]",
130 |             g->s[0], (unsigned long)g->c[0],
131 |             g->s[1], (unsigned long)g->c[1], g->s[2]);
132 | }
133 | 
134 | static int
135 | small(uint64_t r)
136 | {
137 |     static const int v[] = {-3, -2, -1, +1, +2, +3};
138 |     return v[r % 6];
139 | }
140 | 
141 | static void
142 | gene_mutate(struct gene *g, uint64_t rng[4])
143 | {
144 |     uint64_t r = rand64(rng);
145 |     int s = r % 5;
146 |     r >>= 3;
147 |     switch (s) {
148 |         case 0:
149 |             g->s[0] += small(r);
150 |             break;
151 |         case 1:
152 |             g->s[1] += small(r);
153 |             break;
154 |         case 2:
155 |             g->s[2] += small(r);
156 |             break;
157 |         case 3:
158 |             g->c[0] += (int)(r & 0xffff) - 32768;
159 |             break;
160 |         case 4:
161 |             g->c[1] += (int)(r & 0xffff) - 32768;
162 |             break;
163 |     }
164 |     g->flags = 0;
165 | }
166 | 
167 | static void
168 | gene_cross(struct gene *g,
169 |            const struct gene *a,
170 |            const struct gene *b,
171 |            uint64_t rng[4])
172 | {
173 |     uint64_t r = rand64(rng);
174 |     *g = *a;
175 |     switch (r & 2) {
176 |         case 0: g->c[0] = b->c[0]; /* FALLTHROUGH */
177 |         case 1: g->s[1] = b->s[1]; /* FALLTHROUGH */
178 |         case 2: g->c[1] = b->c[1]; /* FALLTHROUGH */
179 |         case 3: g->s[2] = b->s[2];
180 |     }
181 |     g->flags = 0;
182 | }
183 | 
184 | static int
185 | gene_same(const struct gene *a, const struct gene *b)
186 | {
187 |     return a->s[0] == b->s[0] &&
188 |            a->s[1] == b->s[1] &&
189 |            a->s[2] == b->s[2] &&
190 |            a->c[0] == b->c[0] &&
191 |            a->c[1] == b->c[1];
192 | }
193 | 
194 | static void
195 | rng_init(void *p, size_t len)
196 | {
197 |     FILE *f = fopen("/dev/urandom", "rb");
198 |     if (!f)
199 |         abort();
200 |     if (!fread(p, 1, len, f))
201 |         abort();
202 |     fclose(f);
203 | }
204 | 
205 | static int
206 | cmp(const void *pa, const void *pb)
207 | {
208 |     double a = *(double *)pa;
209 |     double b = *(double *)pb;
210 |     if (a < b)
211 |         return -1;
212 |     if (b < a)
213 |         return 1;
214 |     return 0;
215 | }
216 | 
217 | static void
218 | undup(struct gene *pool, uint64_t rng[4])
219 | {
220 |     for (int i = 0; i < POOL; i++)
221 |         for (int j = i + 1; j < POOL; j++)
222 |             if (gene_same(pool + i, pool + j))
223 |                 gene_mutate(pool + j, rng);
224 | }
225 | 
226 | int
227 | main(void)
228 | {
229 |     int verbose = 1;
230 |     double best = 1000.0;
231 |     time_t best_time = time(0);
232 |     uint64_t rng[POOL][4];
233 |     struct gene pool[POOL];
234 | 
235 |     rng_init(rng, sizeof(rng));
236 |     for (int i = 0; i < POOL; i++)
237 |         gene_gen(pool + i, rng[0]);
238 | 
239 |     for (;;) {
240 |         #pragma omp parallel for schedule(dynamic)
241 |         for (int i = 0; i < POOL; i++) {
242 |             if (!(pool[i].flags & FLAG_SCORED)) {
243 |                 pool[i].score = estimate_bias32(pool + i, rng[i]);
244 |                 pool[i].flags |= FLAG_SCORED;
245 |             }
246 |         }
247 |         for (int i = 0; i < POOL; i++) {
248 |             if (!(pool[i].flags & FLAG_EXACT) && pool[i].score < THRESHOLD) {
249 |                 pool[i].score = exact_bias32(pool + i);
250 |                 pool[i].flags |= FLAG_EXACT;
251 |             }
252 |         }
253 | 
254 |         qsort(pool, POOL, sizeof(*pool), cmp);
255 |         if (verbose) {
256 |             for (int i = 0; i < POOL; i++) {
257 |                 if (!(pool[i].flags & FLAG_PRINTED) &&
258 |                       pool[i].score < DONTCARE) {
259 |                     gene_print(pool + i, stdout);
260 |                     printf(" = %.17g\n", pool[i].score);
261 |                     pool[i].flags |= FLAG_PRINTED;
262 |                 }
263 |             }
264 |         }
265 | 
266 |         time_t now = time(0);
267 |         if (pool[0].score < best) {
268 |             best = pool[0].score;
269 |             best_time = now;
270 |         } else if (now - best_time > RESETMINS * 60) {
271 |             best = 1000.0;
272 |             best_time = now;
273 |             for (int i = 0; i < POOL; i++)
274 |                 gene_gen(pool + i, rng[0]);
275 |         }
276 | 
277 |         int c = POOL / 4;
278 |         for (int a = 0; c < POOL && a < POOL / 4; a++)
279 |             for (int b = a + 1; c < POOL && b < POOL / 4; b++)
280 |                 gene_cross(pool + c++, pool + a, pool + b, rng[0]);
281 |         undup(pool, rng[0]);
282 |     }
283 | }
284 | 


--------------------------------------------------------------------------------
/hillclimb.c:
--------------------------------------------------------------------------------
  1 | #define _POSIX_C_SOURCE 200112L
  2 | #define WIN32_LEAN_AND_MEAN
  3 | #include <math.h>
  4 | #include <ctype.h>
  5 | #include <stdio.h>
  6 | #include <stdint.h>
  7 | #include <stdlib.h>
  8 | #include <string.h>
  9 | 
 10 | #define HASHN       3    // number of multiplies in hash
 11 | #define SHIFT_RANGE 1    // radius of shift search
 12 | #define CONST_RANGE 2    // radius of const search
 13 | #define QUALITY     18   // 2^N iterations of estimate samples
 14 | #define THRESHOLD   1.95 // regenerate anything lower than this estimate
 15 | 
 16 | static int optind = 1;
 17 | static int opterr = 1;
 18 | static int optopt;
 19 | static char *optarg;
 20 | static int
 21 | getopt(int argc, char * const argv[], const char *optstring)
 22 | {
 23 |     static int optpos = 1;
 24 |     const char *arg;
 25 |     (void)argc;
 26 |     /* Reset? */
 27 |     if (optind == 0) {
 28 |         optind = 1;
 29 |         optpos = 1;
 30 |     }
 31 |     arg = argv[optind];
 32 |     if (arg && strcmp(arg, "--") == 0) {
 33 |         optind++;
 34 |         return -1;
 35 |     } else if (!arg || arg[0] != '-' || !isalnum(arg[1])) {
 36 |         return -1;
 37 |     } else {
 38 |         const char *opt = strchr(optstring, arg[optpos]);
 39 |         optopt = arg[optpos];
 40 |         if (!opt) {
 41 |             if (opterr && *optstring != ':')
 42 |                 fprintf(stderr, "%s: illegal option: %c\n", argv[0], optopt);
 43 |             return '?';
 44 |         } else if (opt[1] == ':') {
 45 |             if (arg[optpos + 1]) {
 46 |                 optarg = (char *)arg + optpos + 1;
 47 |                 optind++;
 48 |                 optpos = 1;
 49 |                 return optopt;
 50 |             } else if (argv[optind + 1]) {
 51 |                 optarg = (char *)argv[optind + 1];
 52 |                 optind += 2;
 53 |                 optpos = 1;
 54 |                 return optopt;
 55 |             } else {
 56 |                 if (opterr && *optstring != ':')
 57 |                     fprintf(stderr,
 58 |                             "%s: option requires an argument: %c\n",
 59 |                             argv[0], optopt);
 60 |                 return *optstring == ':' ? ':' : '?';
 61 |             }
 62 |         } else {
 63 |             if (!arg[++optpos]) {
 64 |                 optind++;
 65 |                 optpos = 1;
 66 |             }
 67 |             return optopt;
 68 |         }
 69 |     }
 70 | }
 71 | 
 72 | #if defined(__unix__)
 73 | #include <sys/time.h>
 74 | uint64_t
 75 | uepoch(void)
 76 | {
 77 |     struct timeval tv;
 78 |     gettimeofday(&tv, NULL);
 79 |     return 1000000LL * tv.tv_sec + tv.tv_usec;
 80 | }
 81 | #elif defined(_WIN32)
 82 | #include <windows.h>
 83 | uint64_t
 84 | uepoch(void)
 85 | {
 86 |     FILETIME ft;
 87 |     GetSystemTimeAsFileTime(&ft);
 88 |     uint64_t tt = ft.dwHighDateTime;
 89 |     tt <<= 32;
 90 |     tt |= ft.dwLowDateTime;
 91 |     tt /=10;
 92 |     tt -= UINT64_C(11644473600000000);
 93 |     return tt;
 94 | }
 95 | #endif
 96 | 
 97 | static uint64_t
 98 | rand64(uint64_t s[4])
 99 | {
100 |     uint64_t x = s[1] * 5;
101 |     uint64_t r = ((x << 7) | (x >> 57)) * 9;
102 |     uint64_t t = s[1] << 17;
103 |     s[2] ^= s[0];
104 |     s[3] ^= s[1];
105 |     s[1] ^= s[2];
106 |     s[0] ^= s[3];
107 |     s[2] ^= t;
108 |     s[3] = (s[3] << 45) | (s[3] >> 19);
109 |     return r;
110 | }
111 | 
112 | struct hash {
113 |     uint32_t c[HASHN];
114 |     char s[HASHN + 1];
115 | };
116 | 
117 | static void
118 | hash_gen(struct hash *h, uint64_t rng[4])
119 | {
120 |     for (int i = 0; i < HASHN; i++)
121 |         h->c[i] = (rand64(rng) >> 32) | 1u;
122 |     for (int i = 0; i <= HASHN; i++)
123 |         h->s[i] = 16;
124 | }
125 | 
126 | static int
127 | hash_equal(const struct hash *a, const struct hash *b)
128 | {
129 |     for (int i = 0; i < HASHN; i++) {
130 |         if (a->c[i] != b->c[i])
131 |             return 0;
132 |         if (a->s[i] != b->s[i])
133 |             return 0;
134 |     }
135 |     return a->s[HASHN] == b->s[HASHN];
136 | }
137 | 
138 | static void
139 | hash_print(const struct hash *h)
140 | {
141 |     putchar('[');
142 |     for (int i = 0; i < HASHN; i++)
143 |         printf("%2d %08lx ", h->s[i], (unsigned long)h->c[i]);
144 |     printf("%2d]", h->s[HASHN]);
145 |     fflush(stdout);
146 | }
147 | 
148 | static int
149 | hash_parse(struct hash *h, char *str)
150 | {
151 |     long s;
152 |     unsigned long c;
153 |     char *end, *tok;
154 |     if (*str != '[')
155 |         return 0;
156 |     str++;
157 |     for (int i = 0; i < HASHN; i++) {
158 |         tok = strtok(i ? 0 : str, " ");
159 |         s = strtol(tok, &end, 10);
160 |         if (s < 1 || s > 31 || !(*end == 0 || *end == ' '))
161 |             return 0;
162 |         h->s[i] = s;
163 |         tok = strtok(0, " ");
164 |         c = strtoul(tok, &end, 16);
165 |         if (c > 0xffffffffUL || !(*end == 0 || *end == ' '))
166 |             return 0;
167 |         h->c[i] = c;
168 |     }
169 |     tok = strtok(0, "]");
170 |     s = strtol(tok, &end, 10);
171 |     if (s < 1 || s > 31 || *end)
172 |         return 0;
173 |     h->s[HASHN] = s;
174 |     return 1;
175 | }
176 | 
177 | static uint32_t
178 | hash(const struct hash *h, uint32_t x)
179 | {
180 |     for (int i = 0; i < HASHN; i++) {
181 |         x ^= x >> h->s[i];
182 |         x *= h->c[i];
183 |     }
184 |     x ^= x >> h->s[HASHN];
185 |     return x;
186 | }
187 | 
188 | static double
189 | estimate_bias32(const struct hash *f, uint64_t rng[4])
190 | {
191 |     long n = 1L << QUALITY;
192 |     long bins[32][32] = {{0}};
193 |     for (long i = 0; i < n; i++) {
194 |         uint32_t x = rand64(rng);
195 |         uint32_t h0 = hash(f, x);
196 |         for (int j = 0; j < 32; j++) {
197 |             uint32_t bit = UINT32_C(1) << j;
198 |             uint32_t h1 = hash(f, x ^ bit);
199 |             uint32_t set = h0 ^ h1;
200 |             for (int k = 0; k < 32; k++)
201 |                 bins[j][k] += (set >> k) & 1;
202 |         }
203 |     }
204 |     double mean = 0;
205 |     for (int j = 0; j < 32; j++) {
206 |         for (int k = 0; k < 32; k++) {
207 |             double diff = (bins[j][k] - n / 2) / (n / 2.0);
208 |             mean += (diff * diff) / (32 * 32);
209 |         }
210 |     }
211 |     return sqrt(mean) * 1000.0;
212 | }
213 | 
214 | #define EXACT_SPLIT 32  // must be power of two
215 | static double
216 | exact_bias32(const struct hash *f)
217 | {
218 |     int i; // declare here to work around Visual Studio issue
219 |     long long bins[32][32] = {{0}};
220 |     static const uint64_t range = (UINT64_C(1) << 32) / EXACT_SPLIT;
221 |     #pragma omp parallel for
222 |     for (i = 0; i < EXACT_SPLIT; i++) {
223 |         long long b[32][32] = {{0}};
224 |         for (uint64_t x = i * range; x < (i + 1) * range; x++) {
225 |             uint32_t h0 = hash(f, x);
226 |             for (int j = 0; j < 32; j++) {
227 |                 uint32_t bit = UINT32_C(1) << j;
228 |                 uint32_t h1 = hash(f, x ^ bit);
229 |                 uint32_t set = h0 ^ h1;
230 |                 for (int k = 0; k < 32; k++)
231 |                     b[j][k] += (set >> k) & 1;
232 |             }
233 |         }
234 |         #pragma omp critical
235 |         for (int j = 0; j < 32; j++)
236 |             for (int k = 0; k < 32; k++)
237 |                 bins[j][k] += b[j][k];
238 |     }
239 |     double mean = 0.0;
240 |     for (int j = 0; j < 32; j++) {
241 |         for (int k = 0; k < 32; k++) {
242 |             double diff = (bins[j][k] - 2147483648L) / 2147483648.0;
243 |             mean += (diff * diff) / (32 * 32);
244 |         }
245 |     }
246 |     return sqrt(mean) * 1000.0;
247 | }
248 | 
249 | static void
250 | hash_gen_strict(struct hash *h, uint64_t rng[4])
251 | {
252 |     do
253 |         hash_gen(h, rng);
254 |     while (estimate_bias32(h, rng) > THRESHOLD);
255 | }
256 | 
257 | static uint64_t
258 | load64(const void *buf)
259 | {
260 |     const unsigned char *p = buf;
261 |     return (uint64_t)p[0] <<  0 |
262 |            (uint64_t)p[1] <<  8 |
263 |            (uint64_t)p[2] << 16 |
264 |            (uint64_t)p[3] << 24 |
265 |            (uint64_t)p[4] << 32 |
266 |            (uint64_t)p[5] << 40 |
267 |            (uint64_t)p[6] << 48 |
268 |            (uint64_t)p[7] << 56;
269 | }
270 | 
271 | static uint64_t
272 | mix64(uint64_t x, uint64_t y)
273 | {
274 |     uint64_t r = 0x2b8a130976726633 * x - 0xb28cbd28446adb17 * y;
275 |     r ^= r >> 32;
276 |     return r;
277 | }
278 | 
279 | static uint64_t
280 | hash64(uint64_t x, uint64_t m)
281 | {
282 |     x *= m;
283 |     x ^= x >> 32;
284 |     return x;
285 | }
286 | 
287 | static void
288 | mix64x4(uint64_t x[4])
289 | {
290 |     uint64_t i = 0xf81db9ba6dabee4e;
291 |     uint64_t m = 0xb1d9e3fbc08321db;
292 |     x[0] = hash64(x[0] + 0x347534cdcf0982b6, m);
293 |     x[1] = hash64(x[1] + 0x975e2ee8f0f23aa8, m += i);
294 |     x[2] = hash64(x[2] + 0x7baf736c6c769a0b, m += i);
295 |     x[3] = hash64(x[3] + 0x884afc96accb90d9, m += i);
296 |     #define ROUND64(a, b, c, d) \
297 |         x[b] = mix64(hash64(x[a], m += i), x[b]); \
298 |         x[c] = mix64(hash64(x[a], m += i), x[c]); \
299 |         x[d] = mix64(hash64(x[a], m += i), x[d])
300 |     ROUND64(0, 1, 2, 3);
301 |     ROUND64(1, 0, 2, 3);
302 |     ROUND64(2, 0, 1, 3);
303 |     ROUND64(3, 0, 1, 3);
304 |     #undef ROUND64
305 | }
306 | 
307 | static void
308 | rng_init(uint64_t rng[4])
309 | {
310 |     void *p = malloc(1024L * 1024);
311 |     rng[0] = uepoch();
312 |     rng[1] = (uint64_t)rng_init;
313 |     rng[2] = (uint64_t)rng;
314 |     rng[3] = (uint64_t)p;
315 |     free(p);
316 |     mix64x4(rng);
317 | }
318 | 
319 | /* Modular multiplicative inverse (32-bit) */
320 | static uint32_t
321 | modinv32(uint32_t x)
322 | {
323 |     uint32_t a = x;
324 |     x += x - a * x * x;
325 |     x += x - a * x * x;
326 |     x += x - a * x * x;
327 |     x += x - a * x * x;
328 |     x += x - a * x * x;
329 |     return x;
330 | }
331 | 
332 | static void
333 | usage(FILE *f)
334 | {
335 |     fprintf(f, "usage: hillclimb [-EhIqs] [-p INIT] [-x SEED]\n");
336 |     fprintf(f, "  -E       Evaluate given pattern (-p)\n");
337 |     fprintf(f, "  -h       Print this message and exit\n");
338 |     fprintf(f, "  -I       Invert given pattern (-p) an quit\n");
339 |     fprintf(f, "  -p INIT  Provide an initial hash function\n");
340 |     fprintf(f, "  -q       Print less information (quiet)\n");
341 |     fprintf(f, "  -s       Quit after finding a local minima\n");
342 |     fprintf(f, "  -x SEED  Seed PRNG from a string (up to 32 bytes)\n");
343 | }
344 | 
345 | int
346 | main(int argc, char **argv)
347 | {
348 |     int seeded = 0;
349 |     uint64_t rng[4];
350 |     struct hash cur, last = {0};
351 |     int generate = 1;
352 |     int one_shot = 0;
353 |     int quiet = 0;
354 |     int invert = 0;
355 |     int evaluate = 0;
356 |     double cur_score = -1;
357 | 
358 |     int option;
359 |     while ((option = getopt(argc, argv, "EhIp:qsx:")) != -1) {
360 |         switch (option) {
361 |             case 'E': {
362 |                 evaluate = 1;
363 |             } break;
364 |             case 'h': {
365 |                 usage(stdout);
366 |                 exit(EXIT_SUCCESS);
367 |             } break;
368 |             case 'I': {
369 |                 invert = 1;
370 |             } break;
371 |             case 'p': {
372 |                 if (!hash_parse(&cur, optarg)) {
373 |                     fprintf(stderr, "hillclimb: invalid pattern: %s\n", optarg);
374 |                     exit(EXIT_FAILURE);
375 |                 }
376 |                 generate = 0;
377 |             } break;
378 |             case 'q': {
379 |                 quiet++;
380 |             } break;
381 |             case 's': {
382 |                 one_shot = 1;
383 |             } break;
384 |             case 'x': {
385 |                 unsigned char buf[32] = {0};
386 |                 size_t len = strlen(optarg);
387 |                 if (len > sizeof(buf)) {
388 |                     fprintf(stderr, "hillclimb: seed too long (> 32 bytes)\n");
389 |                     exit(EXIT_FAILURE);
390 |                 }
391 |                 memcpy(buf, optarg, len);
392 |                 rng[0] = load64(buf +  0);
393 |                 rng[1] = load64(buf +  8);
394 |                 rng[2] = load64(buf + 16);
395 |                 rng[3] = load64(buf + 24);
396 |                 mix64x4(rng);
397 |                 seeded = 1;
398 |             } break;
399 |             default:
400 |                 usage(stderr);
401 |                 exit(EXIT_FAILURE);
402 |         }
403 |     }
404 | 
405 |     if (invert) {
406 |         if (generate) {
407 |             fprintf(stderr, "hillclimb: -I requires -p\n");
408 |             exit(EXIT_FAILURE);
409 |         }
410 |         printf("uint32_t\nhash_r(uint32_t x)\n{\n");
411 |         for (int i = 0; i < HASHN * 2 + 1; i++) {
412 |             switch (i & 1) {
413 |                 case 0: {
414 |                     int s = HASHN - i / 2;
415 |                     printf("    x ^=");
416 |                     for (int i = cur.s[s]; i < 32; i += cur.s[s])
417 |                         printf(" %sx >> %d", i == cur.s[s] ? "" : "^ ", i);
418 |                     printf(";\n");
419 |                 } break;
420 |                 case 1: {
421 |                     int c = HASHN - (i + 1) / 2;
422 |                     unsigned long inv = modinv32(cur.c[c]);
423 |                     printf("    x *= 0x%08lx;\n", inv);
424 |                 } break;
425 |             }
426 |         }
427 |         printf("    return x;\n}\n");
428 |         exit(EXIT_SUCCESS);
429 |     }
430 | 
431 |     if (evaluate) {
432 |         if (generate) {
433 |             fprintf(stderr, "hillclimb: -E requires -p\n");
434 |             exit(EXIT_FAILURE);
435 |         }
436 |         hash_print(&cur);
437 |         printf(" = %.17g\n", exact_bias32(&cur));
438 |         exit(EXIT_SUCCESS);
439 |     }
440 | 
441 |     if (!seeded)
442 |         rng_init(rng);
443 | 
444 |     if (generate)
445 |         hash_gen_strict(&cur, rng);
446 | 
447 |     for (;;) {
448 |         int found = 0;
449 |         struct hash best;
450 |         double best_score;
451 | 
452 |         if (quiet < 2)
453 |             hash_print(&cur);
454 |         if (cur_score < 0)
455 |             cur_score = exact_bias32(&cur);
456 |         if (quiet < 2)
457 |             printf(" = %.17g\n", cur_score);
458 | 
459 |         best = cur;
460 |         best_score = cur_score;
461 | 
462 |         /* Explore around shifts */
463 |         for (int i = 0; i <= HASHN; i++) {
464 |             /* In theory the shift could drift above 31 or below 1, but
465 |              * in practice it would never get this far since these would
466 |              * be terrible hashes.
467 |              */
468 |             for (int d = -SHIFT_RANGE; d <= +SHIFT_RANGE; d++) {
469 |                 if (d == 0) continue;
470 |                 struct hash tmp = cur;
471 |                 tmp.s[i] += d;
472 |                 if (hash_equal(&tmp, &last)) continue;
473 |                 if (quiet <= 0) {
474 |                     printf("  ");
475 |                     hash_print(&tmp);
476 |                 }
477 |                 double score = exact_bias32(&tmp);
478 |                 if (quiet <= 0)
479 |                     printf(" = %.17g\n", score);
480 |                 if (score < best_score) {
481 |                     best_score = score;
482 |                     best = tmp;
483 |                     found = 1;
484 |                 }
485 |             }
486 |         }
487 | 
488 |         /* Explore around constants */
489 |         for (int i = 0; i < HASHN; i++) {
490 |             for (int d = -CONST_RANGE; d <= +CONST_RANGE; d += 2) {
491 |                 if (d == 0) continue;
492 |                 struct hash tmp = cur;
493 |                 tmp.c[i] += d;
494 |                 if (hash_equal(&tmp, &last)) continue;
495 |                 if (quiet <= 0) {
496 |                     printf("  ");
497 |                     hash_print(&tmp);
498 |                 }
499 |                 double score = exact_bias32(&tmp);
500 |                 if (quiet <= 0)
501 |                     printf(" = %.17g\n", score);
502 |                 if (score < best_score) {
503 |                     best_score = score;
504 |                     best = tmp;
505 |                     found = 1;
506 |                 }
507 |             }
508 |         }
509 | 
510 |         if (found) {
511 |             /* Move to the lowest item found */
512 |             if (quiet < 1)
513 |                 puts("CLIMB");
514 |             last = cur;
515 |             cur = best;
516 |             cur_score = best_score;
517 |         } else if (one_shot) {
518 |             /* Hit local minima, exit */
519 |             if (quiet < 1)
520 |                 puts("DONE");
521 |             hash_print(&cur);
522 |             printf(" = %.17g\n", cur_score);
523 |             break;
524 |         } else {
525 |             /* Hit local minima, reset */
526 |             if (quiet < 1)
527 |                 puts("RESET");
528 |             hash_print(&cur);
529 |             printf(" = %.17g\n", cur_score);
530 |             last.s[0] = 0; // set to invalid
531 |             hash_gen_strict(&cur, rng);
532 |             cur_score = -1;
533 |         }
534 |     }
535 | }
536 | 


--------------------------------------------------------------------------------
/hp16.c:
--------------------------------------------------------------------------------
  1 | /* 16-bit hash prospector
  2 |  *
  3 |  * Unlike the 32-bit / 64-bit prospector, this implementation is fully
  4 |  * portable and will run on just about any system. It's also capable of
  5 |  * generating and evaluating 128kB s-boxes.
  6 |  *
  7 |  * Be mindful of C integer promotion rules when doing 16-bit operations.
  8 |  * For instance, on 32-bit implementations unsigned 16-bit operands will
  9 |  * be promoted to signed 32-bit integers, leading to incorrect results in
 10 |  * certain cases. The C programs printed by this program are careful to
 11 |  * promote 16-bit operations to "unsigned int" where needed.
 12 |  *
 13 |  * Since 16-bit hashes are likely to be needed on machines that do not
 14 |  * have efficient hardware multiplication or whose ISAs lack rotation
 15 |  * instructions, these operations may be optionally omitted during
 16 |  * exploration (-m, -r).
 17 |  *
 18 |  * This is free and unencumbered software released into the public domain.
 19 |  */
 20 | #include <ctype.h>
 21 | #include <math.h>
 22 | #include <stdio.h>
 23 | #include <stdlib.h>
 24 | #include <string.h>
 25 | #include <time.h>
 26 | 
 27 | #define OPS_MAX 32
 28 | 
 29 | enum hf_type {
 30 |     HF16_XOR,   // x ^= imm
 31 |     HF16_MUL,   // x *= imm (odd)
 32 |     HF16_ADD,   // x += imm
 33 |     HF16_ROT,   // x  = (x << imm) | (x >> (16 - imm))
 34 |     HF16_NOT,   // x  = ~x
 35 |     HF16_XORL,  // x ^= x << imm
 36 |     HF16_XORR,  // x ^= x >> imm
 37 |     HF16_ADDL,  // x += x << imm
 38 |     HF16_SUBL,  // x -= x << imm
 39 |     HF16_SBOX,  // x  = sbox[x]
 40 | };
 41 | 
 42 | struct hf_op {
 43 |     enum hf_type type;
 44 |     unsigned imm;
 45 | };
 46 | 
 47 | static unsigned short sbox[1L<<16];
 48 | 
 49 | static unsigned long long
 50 | hash64(unsigned long long x)
 51 | {
 52 |     x ^= x >> 32;
 53 |     x *= 0x25b751109e05be63;
 54 |     x &= 0xffffffffffffffff;
 55 |     x ^= x >> 32;
 56 |     x *= 0x2330e1453ed4b9b9;
 57 |     x &= 0xffffffffffffffff;
 58 |     x ^= x >> 32;
 59 |     return x;
 60 | }
 61 | 
 62 | static unsigned long
 63 | u32(unsigned long long *s)
 64 | {
 65 |     unsigned long r = *s >> 32;
 66 |     *s = *s*0x7c3c3267d015ceb5 + 1;
 67 |     r &= 0xffffffff;
 68 |     r ^= r >> 16;
 69 |     r *= 0x60857ba9;
 70 |     return r & 0xffffffff;
 71 | }
 72 | 
 73 | static unsigned long
 74 | randint(unsigned long r, unsigned long long s[1])
 75 | {
 76 |     unsigned long long x = u32(s);
 77 |     unsigned long long m = x * r;
 78 |     unsigned long y = m & 0xffffffff;
 79 |     if (y < r) {
 80 |         unsigned long t = -r % r;
 81 |         while (y < t) {
 82 |             x = u32(s);
 83 |             m = x * r;
 84 |             y = m & 0xffffffff;
 85 |         }
 86 |     }
 87 |     return m >> 32;
 88 | }
 89 | 
 90 | static struct hf_op
 91 | hf_gen(enum hf_type type, unsigned long long s[1])
 92 | {
 93 |     struct hf_op op;
 94 |     op.type = type;
 95 |     switch (op.type) {
 96 |     case HF16_NOT:
 97 |     case HF16_SBOX: op.imm = 0; break;
 98 |     case HF16_XOR:
 99 |     case HF16_ADD:  op.imm = u32(s)>>16; break;
100 |     case HF16_MUL:  op.imm = u32(s)>>16 | 1; break;
101 |     case HF16_ROT:
102 |     case HF16_XORL:
103 |     case HF16_XORR:
104 |     case HF16_ADDL:
105 |     case HF16_SUBL: op.imm = 1 + u32(s)%15; break;
106 |     }
107 |     return op;
108 | }
109 | 
110 | /* May these operations be adjacent? */
111 | static int
112 | hf_type_valid(enum hf_type a, enum hf_type b)
113 | {
114 |     switch (a) {
115 |     case HF16_NOT:
116 |     case HF16_XOR:
117 |     case HF16_MUL:
118 |     case HF16_ADD:
119 |     case HF16_ROT:
120 |     case HF16_SBOX: return a != b;
121 |     case HF16_XORL:
122 |     case HF16_XORR:
123 |     case HF16_ADDL:
124 |     case HF16_SUBL: return 1;
125 |     }
126 |     return 0;
127 | }
128 | 
129 | static void
130 | hf_genfunc(struct hf_op *ops, int n, unsigned long long s[1])
131 | {
132 |     for (int i = 0; i < n; i++) {
133 |         do {
134 |             enum hf_type type = u32(s) % HF16_SBOX;  // (exclude sbox)
135 |             ops[i] = hf_gen(type, s);
136 |         } while (i > 0 && !hf_type_valid(ops[i-1].type, ops[i].type));
137 |     }
138 | }
139 | 
140 | /* Indicate operation diffusion direction (+1 left, 0 none, -1 right). */
141 | static int
142 | opdir(struct hf_op op)
143 | {
144 |     switch (op.type) {
145 |     case HF16_NOT:
146 |     case HF16_XOR:
147 |     case HF16_ADD:
148 |     case HF16_SBOX: return 0;
149 |     case HF16_MUL:
150 |     case HF16_XORL:
151 |     case HF16_ADDL:
152 |     case HF16_SUBL: return +1;
153 |     case HF16_XORR: return -1;
154 |     case HF16_ROT:  if (op.imm < 8) return +1;
155 |                     if (op.imm > 8) return -1;
156 |                     return 0;
157 |     }
158 |     abort();
159 | }
160 | 
161 | /* Prefer to alternate bit diffusion directions. */
162 | static void
163 | hf_gensmart(struct hf_op *ops, int n, unsigned long long s[1])
164 | {
165 |     int dir = 0;
166 |     for (int i = 0; i < n; i++) {
167 |         int newdir;
168 |         do {
169 |             ops[i] = hf_gen(u32(s)%HF16_SBOX, s);
170 |             newdir = opdir(ops[i]);
171 |         } while (dir && newdir == dir);
172 |         dir = newdir ? newdir : dir;
173 |     }
174 | }
175 | 
176 | static int
177 | popcount(int v)
178 | {
179 |     // both GCC and Clang recognize this function as popcnt
180 |     int c = 0;
181 |     for (; v; c++) v &= v - 1;
182 |     return c;
183 | }
184 | 
185 | static void
186 | hf_genxormul(struct hf_op *ops, int n, unsigned long long s[1])
187 | {
188 |     ops[0].type = HF16_XORR;
189 |     ops[0].imm = 1 + popcount(u32(s) >> 18);
190 |     for (int i = 0; i < n; i++) {
191 |         ops[2*i+1].type = HF16_MUL;
192 |         ops[2*i+1].imm = u32(s)>>16 | 1;
193 |         ops[2*i+2].type = HF16_XORR;
194 |         ops[2*i+2].imm = 1 + popcount(u32(s) >> 18);
195 |     }
196 | }
197 | 
198 | /* An Add-Xor-Shift (AXS) hash alternates between diffusion leftward and
199 |  * rightward where one direction is always xorshift and the other direction is
200 |  * always add/sub-shift.
201 |  *
202 |  *   x ^= x >> A; x += x << B;
203 |  *   x ^= x >> C; x -= x << D;
204 |  *   x ^= x >> E; x += x << F;
205 |  *
206 |  * This function generates all permutations of this construction in order.
207 |  */
208 | #define AXS_COUNT 182250000
209 | #define AXS_SIZE  6
210 | static void
211 | hf_genaxs(struct hf_op *ops, long i)
212 | {
213 |     int shifts[] = {
214 |         1 + (i /      1) % 15,
215 |         1 + (i /     15) % 15,
216 |         1 + (i /    225) % 15,
217 |         1 + (i /   3375) % 15,
218 |         1 + (i /  50625) % 15,
219 |         1 + (i / 759375) % 15,
220 |     };
221 |     int types[] = {
222 |         (i / 11390625) % 2,
223 |         (i / 22781250) % 2,
224 |         (i / 45562500) % 2,
225 |     };
226 |     int swap = (i / 91125000) % 2;
227 |     for (int j = 0; j < 6; j += 2) {
228 |         ops[j+ swap].type = types[j/2] ? HF16_ADDL : HF16_SUBL;
229 |         ops[j+ swap].imm = shifts[j+0];
230 |         ops[j+!swap].type = HF16_XORR;
231 |         ops[j+!swap].imm = shifts[j+1];
232 |     }
233 | }
234 | 
235 | static unsigned
236 | hf_apply(const struct hf_op *ops, int n, unsigned x)
237 | {
238 |     for (int i = 0; i < n; i++) {
239 |         switch (ops[i].type) {
240 |         case HF16_XOR:  x ^= ops[i].imm; break;
241 |         case HF16_MUL:  x *= ops[i].imm; break;
242 |         case HF16_ADD:  x += ops[i].imm; break;
243 |         case HF16_ROT:  x  = x<<ops[i].imm | x>>(16 - ops[i].imm); break;
244 |         case HF16_NOT:  x  = ~x; break;
245 |         case HF16_XORL: x ^= x << ops[i].imm; break;
246 |         case HF16_XORR: x ^= x >> ops[i].imm; break;
247 |         case HF16_ADDL: x += x << ops[i].imm; break;
248 |         case HF16_SUBL: x -= x << ops[i].imm; break;
249 |         case HF16_SBOX: x  = sbox[x]; break;
250 |         }
251 |         x &= 0xffff;
252 |     }
253 |     return x;
254 | }
255 | 
256 | static void
257 | hf_print(const struct hf_op *ops, int n, FILE *f)
258 | {
259 |     fprintf(f, "uint16_t hash(uint16_t x)\n");
260 |     fprintf(f, "{\n");
261 |     for (int i = 0; i < n; i++) {
262 |         fputs("    ", f);
263 |         switch (ops[i].type) {
264 |         case HF16_XOR:
265 |             fprintf(f, "x ^= 0x%04x;\n", ops[i].imm);
266 |             break;
267 |         case HF16_MUL:
268 |             fprintf(f, "x *= 0x%04xU;\n", ops[i].imm);
269 |             break;
270 |         case HF16_ADD:
271 |             fprintf(f, "x += 0x%04xU;\n", ops[i].imm);
272 |             break;
273 |         case HF16_ROT:
274 |             fprintf(f, "x  = (unsigned)x<<%d | x >>%d;\n",
275 |                     ops[i].imm, 16-ops[i].imm);
276 |             break;
277 |         case HF16_NOT:
278 |             fprintf(f, "x  = ~x;\n");
279 |             break;
280 |         case HF16_XORL:
281 |             fprintf(f, "x ^= (unsigned)x << %d;\n", ops[i].imm);
282 |             break;
283 |         case HF16_XORR:
284 |             fprintf(f, "x ^= x >> %d;\n", ops[i].imm);
285 |             break;
286 |         case HF16_ADDL:
287 |             fprintf(f, "x += (unsigned)x << %d;\n", ops[i].imm);
288 |             break;
289 |         case HF16_SUBL:
290 |             fprintf(f, "x -= (unsigned)x << %d;\n", ops[i].imm);
291 |             break;
292 |         case HF16_SBOX:
293 |             fprintf(f, "x  = sbox[x];\n");
294 |             break;
295 |         }
296 |     }
297 |     fprintf(f, "    return x;\n");
298 |     fprintf(f, "}\n");
299 | }
300 | 
301 | static void
302 | sbox_init(void)
303 | {
304 |     for (long i = 0; i < 1L<<16; i++) {
305 |         sbox[i] = i;
306 |     }
307 | }
308 | 
309 | static void
310 | sbox_shuffle(unsigned long long s[1])
311 | {
312 |     for (long i = 0xffff; i > 0; i--) {
313 |         long j = randint(i + 1, s);
314 |         unsigned swap = sbox[i];
315 |         sbox[i] = sbox[j];
316 |         sbox[j] = swap;
317 |     }
318 | }
319 | 
320 | static void
321 | sbox_print(FILE *f)
322 | {
323 |     for (long i = 0; i < 1L<<16; i++) {
324 |         fprintf(f, "%04x%c", sbox[i], i % 16 == 15 ? '\n' : ' ');
325 |     }
326 | }
327 | 
328 | static double
329 | score(const struct hf_op *ops, int n)
330 | {
331 |     long bins[32][32] = {{0}};
332 |     for (long x = 0; x < 1L<<16; x++) {
333 |         unsigned h0 = hf_apply(ops, n, x);
334 |         for (int j = 0; j < 16; j++) {
335 |             unsigned bit = 1U << j;
336 |             unsigned h1 = hf_apply(ops, n, x^bit);
337 |             unsigned set = h0 ^ h1;
338 |             for (int k = 0; k < 16; k++)
339 |                 bins[j][k] += (set >> k) & 1;
340 |         }
341 |     }
342 | 
343 |     double mean = 0.0;
344 |     for (int j = 0; j < 16; j++) {
345 |         for (int k = 0; k < 16; k++) {
346 |             double diff = (bins[j][k] - (1<<15)) / (double)(1<<15);
347 |             mean += (diff * diff) / (16 * 16);
348 |         }
349 |     }
350 |     return sqrt(mean);
351 | }
352 | 
353 | static int
354 | match(const struct hf_op *ops, int n, int types)
355 | {
356 |     for (int i = 0; i < n; i++) {
357 |         if (1<<ops[i].type & types) {
358 |             return 1;
359 |         }
360 |     }
361 |     return 0;
362 | }
363 | 
364 | static int xoptind = 1;
365 | static int xopterr = 1;
366 | static int xoptopt;
367 | static char *xoptarg;
368 | 
369 | static int
370 | xgetopt(int argc, char **argv, const char *optstring)
371 | {
372 |     static int optpos = 1;
373 |     const char *arg;
374 |     (void)argc;
375 | 
376 |     /* Reset? */
377 |     if (xoptind == 0) {
378 |         xoptind = 1;
379 |         optpos = 1;
380 |     }
381 | 
382 |     arg = argv[xoptind];
383 |     if (arg && strcmp(arg, "--") == 0) {
384 |         xoptind++;
385 |         return -1;
386 |     } else if (!arg || arg[0] != '-' || !isalnum(arg[1])) {
387 |         return -1;
388 |     } else {
389 |         const char *opt = strchr(optstring, arg[optpos]);
390 |         xoptopt = arg[optpos];
391 |         if (!opt) {
392 |             if (xopterr && *optstring != ':')
393 |                 fprintf(stderr, "%s: illegal option: %c\n", argv[0], xoptopt);
394 |             return '?';
395 |         } else if (opt[1] == ':') {
396 |             if (arg[optpos + 1]) {
397 |                 xoptarg = (char *)arg + optpos + 1;
398 |                 xoptind++;
399 |                 optpos = 1;
400 |                 return xoptopt;
401 |             } else if (argv[xoptind + 1]) {
402 |                 xoptarg = (char *)argv[xoptind + 1];
403 |                 xoptind += 2;
404 |                 optpos = 1;
405 |                 return xoptopt;
406 |             } else {
407 |                 if (xopterr && *optstring != ':')
408 |                     fprintf(stderr,
409 |                             "%s: option requires an argument: %c\n",
410 |                             argv[0], xoptopt);
411 |                 return *optstring == ':' ? ':' : '?';
412 |             }
413 |         } else {
414 |             if (!arg[++optpos]) {
415 |                 xoptind++;
416 |                 optpos = 1;
417 |             }
418 |             return xoptopt;
419 |         }
420 |     }
421 | }
422 | 
423 | static void
424 | usage(FILE *f)
425 | {
426 |     fprintf(f, "hp16: [-HISX] [-hmr] [-n INT]\n");
427 |     fprintf(f, "  -A     mode: evaluate AXS hashes\n");
428 |     fprintf(f, "  -H     mode: random hash prospector (default)\n");
429 |     fprintf(f, "  -I     mode: smarter (?) hash prospector\n");
430 |     fprintf(f, "  -S     mode: s-box prospector \n");
431 |     fprintf(f, "  -X     mode: xorshift-multiply prospector\n");
432 |     fprintf(f, "  -h     print this message and exit\n");
433 |     fprintf(f, "  -m     exclude multiplication\n");
434 |     fprintf(f, "  -n INT number of operations\n");
435 |     fprintf(f, "  -r     exclude rotation\n");
436 | }
437 | 
438 | int
439 | main(int argc, char **argv)
440 | {
441 |     char *ptr;
442 |     int n = 0;
443 |     int exclude = 0;
444 |     enum {
445 |         MODE_HASH, MODE_SMART, MODE_XORMUL, MODE_SBOX, MODE_AXS
446 |     } mode = MODE_HASH;
447 |     unsigned long tmp;
448 |     struct hf_op ops[1+2*OPS_MAX] = {{HF16_SBOX, 0}};
449 | 
450 |     int option;
451 |     while ((option = xgetopt(argc, argv, "AHhImn:rSX")) != -1) {
452 |         switch (option) {
453 |         case 'A':
454 |             mode = MODE_AXS;
455 |             break;
456 |         case 'H':
457 |             mode = MODE_HASH;
458 |             break;
459 |         case 'h':
460 |             usage(stdout);
461 |             return 0;
462 |         case 'I':
463 |             mode = MODE_SMART;
464 |             break;
465 |         case 'm':
466 |             exclude |= 1<<HF16_MUL;
467 |             break;
468 |         case 'n':
469 |             tmp = strtoul(xoptarg, &ptr, 10);
470 |             if (!tmp || *ptr || tmp > OPS_MAX) {
471 |                 fprintf(stderr, "fatal: invalid n, %s\n", xoptarg);
472 |                 usage(stderr);
473 |                 return 1;
474 |             }
475 |             n = tmp;
476 |             break;
477 |         case 'r':
478 |             exclude |= 1<<HF16_ROT;
479 |             break;
480 |         case 'S':
481 |             mode = MODE_SBOX;
482 |             break;
483 |         case 'X':
484 |             mode = MODE_XORMUL;
485 |             break;
486 |         case '?':
487 |             usage(stderr);
488 |             return 1;
489 |         }
490 |     }
491 | 
492 |     switch (mode) {
493 |     case MODE_HASH:
494 |     case MODE_SMART:  n = n ? n : 7; break;
495 |     case MODE_XORMUL: n = n ? 1 + 2*n : 5; break;
496 |     case MODE_SBOX:   sbox_init(); n = 1; break;
497 |     case MODE_AXS:    break;
498 |     }
499 | 
500 |     double best = 1;
501 |     unsigned long long s[1] = {hash64(time(0))};
502 | 
503 |     if (mode == MODE_AXS) {
504 |         #pragma omp parallel for
505 |         for (long i = 0; i < AXS_COUNT; i++) {
506 |             struct hf_op hf[AXS_SIZE];
507 |             hf_genaxs(hf, i);
508 |             double r = score(hf, AXS_SIZE);
509 |             #pragma omp critical
510 |             if (r < best) {
511 |                 best = r;
512 |                 printf("// bias = %.17g\n", r);
513 |                 hf_print(hf, AXS_SIZE, stdout);
514 |                 fputc('\n', stdout);
515 |                 fflush(stdout);
516 |             }
517 |         }
518 |         return 0;
519 |     }
520 | 
521 |     for (;;) {
522 |         *s += hash64(time(0));
523 |         switch (mode) {
524 |         case MODE_HASH:
525 |             do {
526 |                 hf_genfunc(ops, n, s);
527 |             } while (match(ops, n, exclude));
528 |             break;
529 |         case MODE_SMART:
530 |             do {
531 |                 hf_gensmart(ops, n, s);
532 |             } while (match(ops, n, exclude));
533 |             break;
534 |         case MODE_XORMUL:
535 |             hf_genxormul(ops, (n-1)/2, s);
536 |             break;
537 |         case MODE_SBOX:
538 |             sbox_shuffle(s);
539 |             break;
540 |         case MODE_AXS:
541 |             abort();
542 |         }
543 |         *s -= hash64(clock());
544 | 
545 |         double r = score(ops, n);
546 |         if (r < best) {
547 |             switch (mode) {
548 |             case MODE_HASH:
549 |             case MODE_SMART:
550 |             case MODE_XORMUL:
551 |                 printf("// bias = %.17g\n", r);
552 |                 hf_print(ops, n, stdout);
553 |                 fputc('\n', stdout);
554 |                 break;
555 |             case MODE_SBOX:
556 |                 fprintf(stdout, "// bias = %.17g\n", r);
557 |                 sbox_print(stdout);
558 |                 fputc('\n', stdout);
559 |                 fprintf(stderr, "// bias = %.17g\n", r);
560 |                 fflush(stderr);
561 |                 break;
562 |             case MODE_AXS:
563 |                 abort();
564 |             }
565 |             fflush(stdout);
566 |             best = r;
567 |         }
568 |     }
569 | }
570 | 


--------------------------------------------------------------------------------
/prospector.c:
--------------------------------------------------------------------------------
  1 | #define _DEFAULT_SOURCE // MAP_ANONYMOUS
  2 | #include <math.h>
  3 | #include <errno.h>
  4 | #include <stdio.h>
  5 | #include <stdlib.h>
  6 | #include <stdint.h>
  7 | #include <string.h>
  8 | 
  9 | #include <fcntl.h>
 10 | #include <dlfcn.h>
 11 | #include <unistd.h>
 12 | #include <sys/mman.h>
 13 | #include <sys/time.h>
 14 | 
 15 | #define ABI __attribute__((sysv_abi))
 16 | 
 17 | #define countof(a) ((int)(sizeof(a) / sizeof(0[a])))
 18 | 
 19 | static uint64_t
 20 | xoroshiro128plus(uint64_t s[2])
 21 | {
 22 |     uint64_t s0 = s[0];
 23 |     uint64_t s1 = s[1];
 24 |     uint64_t result = s0 + s1;
 25 |     s1 ^= s0;
 26 |     s[0] = ((s0 << 24) | (s0 >> 40)) ^ s1 ^ (s1 << 16);
 27 |     s[1] = (s1 << 37) | (s1 >> 27);
 28 |     return result;
 29 | }
 30 | 
 31 | enum hf_type {
 32 |     /* 32 bits */
 33 |     HF32_XOR,  // x ^= const32
 34 |     HF32_MUL,  // x *= const32 (odd)
 35 |     HF32_ADD,  // x += const32
 36 |     HF32_ROT,  // x  = (x << const5) | (x >> (32 - const5))
 37 |     HF32_NOT,  // x  = ~x
 38 |     HF32_BSWAP,// x  = bswap32(x)
 39 |     HF32_XORL, // x ^= x << const5
 40 |     HF32_XORR, // x ^= x >> const5
 41 |     HF32_ADDL, // x += x << const5
 42 |     HF32_SUBL, // x -= x << const5
 43 |     /* 64 bits */
 44 |     HF64_XOR,
 45 |     HF64_MUL,
 46 |     HF64_ADD,
 47 |     HF64_ROT,
 48 |     HF64_NOT,
 49 |     HF64_BSWAP,
 50 |     HF64_XORL,
 51 |     HF64_XORR,
 52 |     HF64_ADDL,
 53 |     HF64_SUBL,
 54 | };
 55 | 
 56 | static const char hf_names[][8] = {
 57 |     [HF32_XOR]  = "32xor",
 58 |     [HF32_MUL]  = "32mul",
 59 |     [HF32_ADD]  = "32add",
 60 |     [HF32_ROT]  = "32rot",
 61 |     [HF32_NOT]  = "32not",
 62 |     [HF32_BSWAP]= "32bswap",
 63 |     [HF32_XORL] = "32xorl",
 64 |     [HF32_XORR] = "32xorr",
 65 |     [HF32_ADDL] = "32addl",
 66 |     [HF32_SUBL] = "32subl",
 67 |     [HF64_XOR]  = "64xor",
 68 |     [HF64_MUL]  = "64mul",
 69 |     [HF64_ADD]  = "64add",
 70 |     [HF64_ROT]  = "64rot",
 71 |     [HF64_NOT]  = "64not",
 72 |     [HF64_BSWAP]= "64bswap",
 73 |     [HF64_XORL] = "64xorl",
 74 |     [HF64_XORR] = "64xorr",
 75 |     [HF64_ADDL] = "64addl",
 76 |     [HF64_SUBL] = "64subl",
 77 | };
 78 | 
 79 | #define FOP_LOCKED  (1 << 0)
 80 | struct hf_op {
 81 |     enum hf_type type;
 82 |     uint64_t constant;
 83 |     int flags;
 84 | };
 85 | 
 86 | /* Randomize the constants of the given hash operation.
 87 |  */
 88 | static void
 89 | hf_randomize(struct hf_op *op, uint64_t s[2])
 90 | {
 91 |     uint64_t r = xoroshiro128plus(s);
 92 |     switch (op->type) {
 93 |         case HF32_NOT:
 94 |         case HF64_NOT:
 95 |         case HF32_BSWAP:
 96 |         case HF64_BSWAP:
 97 |             op->constant = 0;
 98 |             break;
 99 |         case HF32_XOR:
100 |         case HF32_ADD:
101 |             op->constant = (uint32_t)r;
102 |             break;
103 |         case HF32_MUL:
104 |             op->constant = (uint32_t)r | 1;
105 |             break;
106 |         case HF32_ROT:
107 |         case HF32_XORL:
108 |         case HF32_XORR:
109 |         case HF32_ADDL:
110 |         case HF32_SUBL:
111 |             op->constant = 1 + r % 31;
112 |             break;
113 |         case HF64_XOR:
114 |         case HF64_ADD:
115 |             op->constant = r;
116 |             break;
117 |         case HF64_MUL:
118 |             op->constant = r | 1;
119 |             break;
120 |         case HF64_ROT:
121 |         case HF64_XORL:
122 |         case HF64_XORR:
123 |         case HF64_ADDL:
124 |         case HF64_SUBL:
125 |             op->constant = 1 + r % 63;
126 |             break;
127 |     }
128 | }
129 | 
130 | #define F_U64     (1 << 0)
131 | #define F_TINY    (1 << 1)  // don't use big constants
132 | 
133 | static void
134 | hf_gen(struct hf_op *op, uint64_t s[2], int flags)
135 | {
136 |     uint64_t r = xoroshiro128plus(s);
137 |     int min = flags & F_TINY ? 3 : 0;
138 |     op->type = (r % (9 - min)) + min + (flags & F_U64 ? 9 : 0);
139 |     hf_randomize(op, s);
140 | }
141 | 
142 | /* Return 1 if these operations may be adjacent
143 | */
144 | static int
145 | hf_type_valid(enum hf_type a, enum hf_type b)
146 | {
147 |     switch (a) {
148 |         case HF32_NOT:
149 |         case HF32_BSWAP:
150 |         case HF32_XOR:
151 |         case HF32_MUL:
152 |         case HF32_ADD:
153 |         case HF32_ROT:
154 |         case HF64_NOT:
155 |         case HF64_BSWAP:
156 |         case HF64_XOR:
157 |         case HF64_MUL:
158 |         case HF64_ADD:
159 |         case HF64_ROT:
160 |             return a != b;
161 |         case HF32_XORL:
162 |         case HF32_XORR:
163 |         case HF32_ADDL:
164 |         case HF32_SUBL:
165 |         case HF64_XORL:
166 |         case HF64_XORR:
167 |         case HF64_ADDL:
168 |         case HF64_SUBL:
169 |             return 1;
170 |     }
171 |     abort();
172 | }
173 | 
174 | static void
175 | hf_genfunc(struct hf_op *ops, int n, int flags, uint64_t s[2])
176 | {
177 |     hf_gen(ops, s, flags);
178 |     for (int i = 1; i < n; i++) {
179 |         do {
180 |             hf_gen(ops + i, s, flags);
181 |         } while (!hf_type_valid(ops[i - 1].type, ops[i].type));
182 |     }
183 | }
184 | 
185 | /* Randomize the parameters of the given functoin.
186 |  */
187 | static void
188 | hf_randfunc(struct hf_op *ops, int n, uint64_t s[2])
189 | {
190 |     for (int i = 0; i < n; i++)
191 |         if (!(ops[i].flags & FOP_LOCKED))
192 |             hf_randomize(ops + i, s);
193 | }
194 | 
195 | static void
196 | hf_print(const struct hf_op *op, char *buf)
197 | {
198 |     unsigned long long c = op->constant;
199 |     switch (op->type) {
200 |         case HF32_NOT:
201 |         case HF64_NOT:
202 |             sprintf(buf, "x  = ~x;");
203 |             break;
204 |         case HF32_BSWAP:
205 |             sprintf(buf, "x  = __builtin_bswap32(x);");
206 |             break;
207 |         case HF64_BSWAP:
208 |             sprintf(buf, "x  = __builtin_bswap64(x);");
209 |             break;
210 |         case HF32_XOR:
211 |             sprintf(buf, "x ^= 0x%08llx;", c);
212 |             break;
213 |         case HF32_MUL:
214 |             sprintf(buf, "x *= 0x%08llx;", c);
215 |             break;
216 |         case HF32_ADD:
217 |             sprintf(buf, "x += 0x%08llx;", c);
218 |             break;
219 |         case HF32_ROT:
220 |             sprintf(buf, "x  = (x << %llu) | (x >> %lld);", c, 32 - c);
221 |             break;
222 |         case HF32_XORL:
223 |             sprintf(buf, "x ^= x << %llu;", c);
224 |             break;
225 |         case HF32_XORR:
226 |             sprintf(buf, "x ^= x >> %llu;", c);
227 |             break;
228 |         case HF32_ADDL:
229 |             sprintf(buf, "x += x << %llu;", c);
230 |             break;
231 |         case HF32_SUBL:
232 |             sprintf(buf, "x -= x << %llu;", c);
233 |             break;
234 |         case HF64_XOR:
235 |             sprintf(buf, "x ^= 0x%016llx;", c);
236 |             break;
237 |         case HF64_MUL:
238 |             sprintf(buf, "x *= 0x%016llx;", c);
239 |             break;
240 |         case HF64_ADD:
241 |             sprintf(buf, "x += 0x%016llx;", c);
242 |             break;
243 |         case HF64_ROT:
244 |             sprintf(buf, "x  = (x << %llu) | (x >> %lld);", c, 64 - c);
245 |             break;
246 |         case HF64_XORL:
247 |             sprintf(buf, "x ^= x << %llu;", c);
248 |             break;
249 |         case HF64_XORR:
250 |             sprintf(buf, "x ^= x >> %llu;", c);
251 |             break;
252 |         case HF64_ADDL:
253 |             sprintf(buf, "x += x << %llu;", c);
254 |             break;
255 |         case HF64_SUBL:
256 |             sprintf(buf, "x -= x << %llu;", c);
257 |             break;
258 |     }
259 | }
260 | 
261 | static void
262 | hf_printfunc(const struct hf_op *ops, int n, FILE *f)
263 | {
264 |     if (ops[0].type <= HF32_SUBL)
265 |         fprintf(f, "uint32_t\nhash(uint32_t x)\n{\n");
266 |     else
267 |         fprintf(f, "uint64_t\nhash(uint64_t x)\n{\n");
268 |     for (int i = 0; i < n; i++) {
269 |         char buf[64];
270 |         hf_print(ops + i, buf);
271 |         fprintf(f, "    %s\n", buf);
272 |     }
273 |     fprintf(f, "    return x;\n}\n");
274 | }
275 | 
276 | static unsigned char *
277 | hf_compile(const struct hf_op *ops, int n, unsigned char *buf)
278 | {
279 |     if (ops[0].type <= HF32_SUBL) {
280 |         /* mov eax, edi*/
281 |         *buf++ = 0x89;
282 |         *buf++ = 0xf8;
283 |     } else {
284 |         /* mov rax, rdi*/
285 |         *buf++ = 0x48;
286 |         *buf++ = 0x89;
287 |         *buf++ = 0xf8;
288 |     }
289 | 
290 |     for (int i = 0; i < n; i++) {
291 |         switch (ops[i].type) {
292 |             case HF32_NOT:
293 |                 /* not eax */
294 |                 *buf++ = 0xf7;
295 |                 *buf++ = 0xd0;
296 |                 break;
297 |             case HF32_BSWAP:
298 |                 /* bswap eax */
299 |                 *buf++ = 0x0f;
300 |                 *buf++ = 0xc8;
301 |                 break;
302 |             case HF32_XOR:
303 |                 /* xor eax, imm32 */
304 |                 *buf++ = 0x35;
305 |                 *buf++ = ops[i].constant >>  0;
306 |                 *buf++ = ops[i].constant >>  8;
307 |                 *buf++ = ops[i].constant >> 16;
308 |                 *buf++ = ops[i].constant >> 24;
309 |                 break;
310 |             case HF32_MUL:
311 |                 /* imul eax, eax, imm32 */
312 |                 *buf++ = 0x69;
313 |                 *buf++ = 0xc0;
314 |                 *buf++ = ops[i].constant >>  0;
315 |                 *buf++ = ops[i].constant >>  8;
316 |                 *buf++ = ops[i].constant >> 16;
317 |                 *buf++ = ops[i].constant >> 24;
318 |                 break;
319 |             case HF32_ADD:
320 |                 /* add eax, imm32 */
321 |                 *buf++ = 0x05;
322 |                 *buf++ = ops[i].constant >>  0;
323 |                 *buf++ = ops[i].constant >>  8;
324 |                 *buf++ = ops[i].constant >> 16;
325 |                 *buf++ = ops[i].constant >> 24;
326 |                 break;
327 |             case HF32_ROT:
328 |                 /* rol eax, imm8 */
329 |                 *buf++ = 0xc1;
330 |                 *buf++ = 0xc0;
331 |                 *buf++ = ops[i].constant;
332 |                 break;
333 |             case HF32_XORL:
334 |                 /* mov edi, eax */
335 |                 *buf++ = 0x89;
336 |                 *buf++ = 0xc7;
337 |                 /* shl edi, imm8 */
338 |                 *buf++ = 0xc1;
339 |                 *buf++ = 0xe7;
340 |                 *buf++ = ops[i].constant;
341 |                 /* xor eax, edi */
342 |                 *buf++ = 0x31;
343 |                 *buf++ = 0xf8;
344 |                 break;
345 |             case HF32_XORR:
346 |                 /* mov edi, eax */
347 |                 *buf++ = 0x89;
348 |                 *buf++ = 0xc7;
349 |                 /* shr edi, imm8 */
350 |                 *buf++ = 0xc1;
351 |                 *buf++ = 0xef;
352 |                 *buf++ = ops[i].constant;
353 |                 /* xor eax, edi */
354 |                 *buf++ = 0x31;
355 |                 *buf++ = 0xf8;
356 |                 break;
357 |             case HF32_ADDL:
358 |                 /* mov edi, eax */
359 |                 *buf++ = 0x89;
360 |                 *buf++ = 0xc7;
361 |                 /* shl edi, imm8 */
362 |                 *buf++ = 0xc1;
363 |                 *buf++ = 0xe7;
364 |                 *buf++ = ops[i].constant;
365 |                 /* add eax, edi */
366 |                 *buf++ = 0x01;
367 |                 *buf++ = 0xf8;
368 |                 break;
369 |             case HF32_SUBL:
370 |                 /* mov edi, eax */
371 |                 *buf++ = 0x89;
372 |                 *buf++ = 0xc7;
373 |                 /* shl edi, imm8 */
374 |                 *buf++ = 0xc1;
375 |                 *buf++ = 0xe7;
376 |                 *buf++ = ops[i].constant;
377 |                 /* sub eax, edi */
378 |                 *buf++ = 0x29;
379 |                 *buf++ = 0xf8;
380 |                 break;
381 |             case HF64_NOT:
382 |                 /* not rax */
383 |                 *buf++ = 0x48;
384 |                 *buf++ = 0xf7;
385 |                 *buf++ = 0xd0;
386 |                 break;
387 |             case HF64_BSWAP:
388 |                 /* bswap rax */
389 |                 *buf++ = 0x48;
390 |                 *buf++ = 0x0f;
391 |                 *buf++ = 0xc8;
392 |                 break;
393 |             case HF64_XOR:
394 |                 /* mov rdi, imm64 */
395 |                 *buf++ = 0x48;
396 |                 *buf++ = 0xbf;
397 |                 *buf++ = ops[i].constant >>  0;
398 |                 *buf++ = ops[i].constant >>  8;
399 |                 *buf++ = ops[i].constant >> 16;
400 |                 *buf++ = ops[i].constant >> 24;
401 |                 *buf++ = ops[i].constant >> 32;
402 |                 *buf++ = ops[i].constant >> 40;
403 |                 *buf++ = ops[i].constant >> 48;
404 |                 *buf++ = ops[i].constant >> 56;
405 |                 /* xor rax, rdi */
406 |                 *buf++ = 0x48;
407 |                 *buf++ = 0x31;
408 |                 *buf++ = 0xf8;
409 |                 break;
410 |             case HF64_MUL:
411 |                 /* mov rdi, imm64 */
412 |                 *buf++ = 0x48;
413 |                 *buf++ = 0xbf;
414 |                 *buf++ = ops[i].constant >>  0;
415 |                 *buf++ = ops[i].constant >>  8;
416 |                 *buf++ = ops[i].constant >> 16;
417 |                 *buf++ = ops[i].constant >> 24;
418 |                 *buf++ = ops[i].constant >> 32;
419 |                 *buf++ = ops[i].constant >> 40;
420 |                 *buf++ = ops[i].constant >> 48;
421 |                 *buf++ = ops[i].constant >> 56;
422 |                 /* imul rax, rdi */
423 |                 *buf++ = 0x48;
424 |                 *buf++ = 0x0f;
425 |                 *buf++ = 0xaf;
426 |                 *buf++ = 0xc7;
427 |                 break;
428 |             case HF64_ADD:
429 |                 /* mov rdi, imm64 */
430 |                 *buf++ = 0x48;
431 |                 *buf++ = 0xbf;
432 |                 *buf++ = ops[i].constant >>  0;
433 |                 *buf++ = ops[i].constant >>  8;
434 |                 *buf++ = ops[i].constant >> 16;
435 |                 *buf++ = ops[i].constant >> 24;
436 |                 *buf++ = ops[i].constant >> 32;
437 |                 *buf++ = ops[i].constant >> 40;
438 |                 *buf++ = ops[i].constant >> 48;
439 |                 *buf++ = ops[i].constant >> 56;
440 |                 /* add rax, rdi */
441 |                 *buf++ = 0x48;
442 |                 *buf++ = 0x01;
443 |                 *buf++ = 0xf8;
444 |                 break;
445 |             case HF64_ROT:
446 |                 /* rol rax, imm8 */
447 |                 *buf++ = 0x48;
448 |                 *buf++ = 0xc1;
449 |                 *buf++ = 0xc0;
450 |                 *buf++ = ops[i].constant;
451 |                 break;
452 |             case HF64_XORL:
453 |                 /* mov edi, eax */
454 |                 *buf++ = 0x48;
455 |                 *buf++ = 0x89;
456 |                 *buf++ = 0xc7;
457 |                 /* shl rdi, imm8 */
458 |                 *buf++ = 0x48;
459 |                 *buf++ = 0xc1;
460 |                 *buf++ = 0xe7;
461 |                 *buf++ = ops[i].constant;
462 |                 /* xor rax, rdi */
463 |                 *buf++ = 0x48;
464 |                 *buf++ = 0x31;
465 |                 *buf++ = 0xf8;
466 |                 break;
467 |             case HF64_XORR:
468 |                 /* mov rdi, rax */
469 |                 *buf++ = 0x48;
470 |                 *buf++ = 0x89;
471 |                 *buf++ = 0xc7;
472 |                 /* shr rdi, imm8 */
473 |                 *buf++ = 0x48;
474 |                 *buf++ = 0xc1;
475 |                 *buf++ = 0xef;
476 |                 *buf++ = ops[i].constant;
477 |                 /* xor rax, rdi */
478 |                 *buf++ = 0x48;
479 |                 *buf++ = 0x31;
480 |                 *buf++ = 0xf8;
481 |                 break;
482 |             case HF64_ADDL:
483 |                 /* mov rdi, rax */
484 |                 *buf++ = 0x48;
485 |                 *buf++ = 0x89;
486 |                 *buf++ = 0xc7;
487 |                 /* shl rdi, imm8 */
488 |                 *buf++ = 0x48;
489 |                 *buf++ = 0xc1;
490 |                 *buf++ = 0xe7;
491 |                 *buf++ = ops[i].constant;
492 |                 /* add rax, rdi */
493 |                 *buf++ = 0x48;
494 |                 *buf++ = 0x01;
495 |                 *buf++ = 0xf8;
496 |                 break;
497 |             case HF64_SUBL:
498 |                 /* mov rdi, rax */
499 |                 *buf++ = 0x48;
500 |                 *buf++ = 0x89;
501 |                 *buf++ = 0xc7;
502 |                 /* shl rdi, imm8 */
503 |                 *buf++ = 0x48;
504 |                 *buf++ = 0xc1;
505 |                 *buf++ = 0xe7;
506 |                 *buf++ = ops[i].constant;
507 |                 /* sub rax, rdi */
508 |                 *buf++ = 0x48;
509 |                 *buf++ = 0x29;
510 |                 *buf++ = 0xf8;
511 |                 break;
512 |         }
513 |     }
514 | 
515 |     /* ret */
516 |     *buf++ = 0xc3;
517 |     return buf;
518 | }
519 | 
520 | static void *
521 | execbuf_alloc(void)
522 | {
523 |     int prot = PROT_READ | PROT_WRITE;
524 |     int flags = MAP_PRIVATE | MAP_ANONYMOUS;
525 |     void *p = mmap(NULL, 4096, prot, flags, -1, 0);
526 |     if (p == MAP_FAILED) {
527 |         fprintf(stderr, "prospector: %s\n", strerror(errno));
528 |         exit(EXIT_FAILURE);
529 |     }
530 |     return p;
531 | }
532 | 
533 | static enum {
534 |     WXR_UNKNOWN, WXR_ENABLED, WXR_DISABLED
535 | } wxr_enabled = WXR_UNKNOWN;
536 | 
537 | static void
538 | execbuf_lock(void *buf)
539 | {
540 |     switch (wxr_enabled) {
541 |         case WXR_UNKNOWN:
542 |             if (!mprotect(buf, 4096, PROT_READ | PROT_WRITE | PROT_EXEC)) {
543 |                 wxr_enabled = WXR_DISABLED;
544 |                 return;
545 |             }
546 |             wxr_enabled = WXR_ENABLED;
547 |             /* FALLTHROUGH */
548 |         case WXR_ENABLED:
549 |             if (mprotect(buf, 4096, PROT_READ | PROT_EXEC)) {
550 |                 fprintf(stderr,
551 |                         "prospector: mprotect(PROT_EXEC) failed: %s\n",
552 |                         strerror(errno));
553 |                 exit(EXIT_FAILURE);
554 |             }
555 |             break;
556 |         case WXR_DISABLED:
557 |             break;
558 |     }
559 | }
560 | 
561 | static void
562 | execbuf_unlock(void *buf)
563 | {
564 |     switch (wxr_enabled) {
565 |         case WXR_UNKNOWN:
566 |             abort();
567 |         case WXR_ENABLED:
568 |             mprotect(buf, 4096, PROT_READ | PROT_WRITE);
569 |             break;
570 |         case WXR_DISABLED:
571 |             break;
572 |     }
573 | }
574 | 
575 | /* Higher quality is slower but has more consistent results. */
576 | static int score_quality = 18;
577 | 
578 | /* Measures how each input bit affects each output bit. This measures
579 |  * both bias and avalanche.
580 |  */
581 | static double
582 | estimate_bias32(uint32_t ABI (*f)(uint32_t), uint64_t rng[2])
583 | {
584 |     long n = 1L << score_quality;
585 |     long bins[32][32] = {{0}};
586 |     for (long i = 0; i < n; i++) {
587 |         uint32_t x = xoroshiro128plus(rng);
588 |         uint32_t h0 = f(x);
589 |         for (int j = 0; j < 32; j++) {
590 |             uint32_t bit = UINT32_C(1) << j;
591 |             uint32_t h1 = f(x ^ bit);
592 |             uint32_t set = h0 ^ h1;
593 |             for (int k = 0; k < 32; k++)
594 |                 bins[j][k] += (set >> k) & 1;
595 |         }
596 |     }
597 |     double mean = 0;
598 |     for (int j = 0; j < 32; j++) {
599 |         for (int k = 0; k < 32; k++) {
600 |             /* FIXME: normalize this somehow */
601 |             double diff = (bins[j][k] - n / 2) / (n / 2.0);
602 |             mean += (diff * diff) / (32 * 32);
603 |         }
604 |     }
605 |     return sqrt(mean) * 1000.0;
606 | }
607 | 
608 | static double
609 | estimate_bias64(uint64_t ABI (*f)(uint64_t), uint64_t rng[2])
610 | {
611 |     long n = 1L << score_quality;
612 |     long bins[64][64] = {{0}};
613 |     for (long i = 0; i < n; i++) {
614 |         uint64_t x = xoroshiro128plus(rng);
615 |         uint64_t h0 = f(x);
616 |         for (int j = 0; j < 64; j++) {
617 |             uint64_t bit = UINT64_C(1) << j;
618 |             uint64_t h1 = f(x ^ bit);
619 |             uint64_t set = h0 ^ h1;
620 |             for (int k = 0; k < 64; k++)
621 |                 bins[j][k] += (set >> k) & 1;
622 |         }
623 |     }
624 |     double mean = 0;
625 |     for (int j = 0; j < 64; j++) {
626 |         for (int k = 0; k < 64; k++) {
627 |             /* FIXME: normalize this somehow */
628 |             double diff = (bins[j][k] - n / 2) / (n / 2.0);
629 |             mean += (diff * diff) / (64 * 64);
630 |         }
631 |     }
632 |     return sqrt(mean) * 1000.0;
633 | }
634 | 
635 | #define EXACT_SPLIT 32  // must be power of two
636 | static double
637 | exact_bias32(uint32_t ABI (*f)(uint32_t))
638 | {
639 |     long long bins[32][32] = {{0}};
640 |     static const uint64_t range = (UINT64_C(1) << 32) / EXACT_SPLIT;
641 |     #pragma omp parallel for
642 |     for (int i = 0; i < EXACT_SPLIT; i++) {
643 |         long long b[32][32] = {{0}};
644 |         for (uint64_t x = i * range; x < (i + 1) * range; x++) {
645 |             uint32_t h0 = f(x);
646 |             for (int j = 0; j < 32; j++) {
647 |                 uint32_t bit = UINT32_C(1) << j;
648 |                 uint32_t h1 = f(x ^ bit);
649 |                 uint32_t set = h0 ^ h1;
650 |                 for (int k = 0; k < 32; k++)
651 |                     b[j][k] += (set >> k) & 1;
652 |             }
653 |         }
654 |         #pragma omp critical
655 |         for (int j = 0; j < 32; j++)
656 |             for (int k = 0; k < 32; k++)
657 |                 bins[j][k] += b[j][k];
658 |     }
659 |     double mean = 0.0;
660 |     for (int j = 0; j < 32; j++) {
661 |         for (int k = 0; k < 32; k++) {
662 |             double diff = (bins[j][k] - 2147483648L) / 2147483648.0;
663 |             mean += (diff * diff) / (32 * 32);
664 |         }
665 |     }
666 |     return sqrt(mean) * 1000.0;
667 | }
668 | 
669 | static void
670 | usage(FILE *f)
671 | {
672 |     fprintf(f, "usage: prospector "
673 |             "[-E|L|S] [-4|-8] [-ehs] [-l lib] [-p pattern] [-r n:m] [-t x]\n");
674 |     fprintf(f, " -4          Generate 32-bit hash functions (default)\n");
675 |     fprintf(f, " -8          Generate 64-bit hash functions\n");
676 |     fprintf(f, " -e          Measure bias exactly (requires -E)\n");
677 |     fprintf(f, " -h          Print this help message\n");
678 |     fprintf(f, " -l ./lib.so Load hash() from a shared object\n");
679 |     fprintf(f, " -p pattern  Search only a given pattern\n");
680 |     fprintf(f, " -q n        Score quality knob (12-30, default: 18)\n");
681 |     fprintf(f, " -r n:m      Use between n and m operations [3:6]\n");
682 |     fprintf(f, " -s          Don't use large constants\n");
683 |     fprintf(f, " -t x        Initial score threshold [10.0]\n");
684 |     fprintf(f, " -E          Single evaluation mode (requires -p or -l)\n");
685 |     fprintf(f, " -S          Hash function search mode (default)\n");
686 |     fprintf(f, " -L          Enumerate output mode (requires -p or -l)\n");
687 | }
688 | 
689 | static int
690 | parse_operand(struct hf_op *op, char *buf)
691 | {
692 |     op->flags |= FOP_LOCKED;
693 |     switch (op->type) {
694 |         case HF32_NOT:
695 |         case HF64_NOT:
696 |         case HF32_BSWAP:
697 |         case HF64_BSWAP:
698 |             return 0;
699 |         case HF32_XOR:
700 |         case HF32_MUL:
701 |         case HF32_ADD:
702 |         case HF64_XOR:
703 |         case HF64_MUL:
704 |         case HF64_ADD:
705 |             op->constant = strtoull(buf, 0, 16);
706 |             return 1;
707 |         case HF32_ROT:
708 |         case HF32_XORL:
709 |         case HF32_XORR:
710 |         case HF32_ADDL:
711 |         case HF32_SUBL:
712 |         case HF64_ROT:
713 |         case HF64_XORL:
714 |         case HF64_XORR:
715 |         case HF64_ADDL:
716 |         case HF64_SUBL:
717 |             op->constant = atoi(buf);
718 |             return 1;
719 |     }
720 |     return 0;
721 | }
722 | 
723 | static int
724 | parse_template(struct hf_op *ops, int n, char *template, int flags)
725 | {
726 |     int c = 0;
727 |     int offset = flags & F_U64 ? HF64_XOR : 0;
728 | 
729 |     for (char *tok = strtok(template, ","); tok; tok = strtok(0, ",")) {
730 |         if (c == n) return 0;
731 |         int found = 0;
732 |         size_t operand = strcspn(tok, ":");
733 |         int sep = tok[operand];
734 |         tok[operand] = 0;
735 |         ops[c].flags = 0;
736 |         for (int i = 0; i < countof(hf_names); i++) {
737 |             if (!strcmp(hf_names[i] + 2, tok)) {
738 |                 found = 1;
739 |                 ops[c].type = i + offset;
740 |                 break;
741 |             }
742 |         }
743 |         if (!found)
744 |             return 0;
745 |         if (sep == ':' && !parse_operand(ops + c, tok + operand + 1))
746 |             return 0;
747 |         c++;
748 |     }
749 |     return c;
750 | }
751 | 
752 | static void *
753 | load_function(const char *so)
754 | {
755 |     void *handle = dlopen(so, RTLD_NOW);
756 |     if (!handle) {
757 |         fprintf(stderr, "prospector: could not load %s\n", so);
758 |         exit(EXIT_FAILURE);
759 |     }
760 |     void *f = dlsym(handle, "hash");
761 |     if (!f) {
762 |         fprintf(stderr, "prospector: could not find 'hash' in %s\n", so);
763 |         exit(EXIT_FAILURE);
764 |     }
765 |     return f;
766 | }
767 | 
768 | static uint64_t
769 | uepoch(void)
770 | {
771 |     struct timeval tv;
772 |     gettimeofday(&tv, NULL);
773 |     return 1000000LL * tv.tv_sec + tv.tv_usec;
774 | }
775 | 
776 | int
777 | main(int argc, char **argv)
778 | {
779 |     int nops = 0;
780 |     int min = 3;
781 |     int max = 6;
782 |     int flags = 0;
783 |     int use_exact = 0;
784 |     double best = 100.0;
785 |     char *dynamic = 0;
786 |     char *template = 0;
787 |     struct hf_op ops[32];
788 |     void *buf = execbuf_alloc();
789 |     uint64_t rng[2] = {0x2a2bc037b59ff989, 0x6d7db86fa2f632ca};
790 | 
791 |     enum {MODE_SEARCH, MODE_EVAL, MODE_LIST} mode = MODE_SEARCH;
792 | 
793 |     int option;
794 |     while ((option = getopt(argc, argv, "48EehLl:q:r:st:p:")) != -1) {
795 |         switch (option) {
796 |             case '4':
797 |                 flags &= ~F_U64;
798 |                 break;
799 |             case '8':
800 |                 flags |= F_U64;
801 |                 break;
802 |             case 'E':
803 |                 mode = MODE_EVAL;
804 |                 break;
805 |             case 'e':
806 |                 use_exact = 1;
807 |                 break;
808 |             case 'h': usage(stdout);
809 |                 exit(EXIT_SUCCESS);
810 |                 break;
811 |             case 'L':
812 |                 mode = MODE_LIST;
813 |                 break;
814 |             case 'l':
815 |                 dynamic = optarg;
816 |                 break;
817 |             case 'p':
818 |                 template = optarg;
819 |                 break;
820 |             case 'r':
821 |                 if (sscanf(optarg, "%d:%d", &min, &max) != 2 ||
822 |                     min < 1 || max > countof(ops) || min > max) {
823 |                     fprintf(stderr, "prospector: invalid range (-r): %s\n",
824 |                             optarg);
825 |                     exit(EXIT_FAILURE);
826 |                 }
827 |                 break;
828 |             case 'q':
829 |                 score_quality = atoi(optarg);
830 |                 if (score_quality < 12 || score_quality > 30) {
831 |                     fprintf(stderr, "prospector: invalid quality: %s\n",
832 |                             optarg);
833 |                     exit(EXIT_FAILURE);
834 |                 }
835 |                 break;
836 |             case 'S':
837 |                 mode = MODE_SEARCH;
838 |                 break;
839 |             case 's':
840 |                 flags |= F_TINY;
841 |                 break;
842 |             case 't':
843 |                 best = strtod(optarg, 0);
844 |                 break;
845 |             default:
846 |                 usage(stderr);
847 |                 exit(EXIT_FAILURE);
848 |         }
849 |     }
850 | 
851 |     /* Get a unique seed */
852 |     FILE *urandom = fopen("/dev/urandom", "rb");
853 |     if (urandom) {
854 |         if (!fread(rng, sizeof(rng), 1, urandom)) {
855 |             fputs("prospector: failed to read /dev/urandom\n", stderr);
856 |             exit(EXIT_FAILURE);
857 |         }
858 |         fclose(urandom);
859 |     }
860 | 
861 |     if (template) {
862 |         nops = parse_template(ops, countof(ops), template, flags);
863 |         if (!nops) {
864 |             fprintf(stderr, "prospector: invalid template\n");
865 |             exit(EXIT_FAILURE);
866 |         }
867 |     }
868 | 
869 |     if (mode == MODE_EVAL) {
870 |         double bias;
871 |         void *hashptr = 0;
872 |         if (template) {
873 |             hf_randfunc(ops, nops, rng);
874 |             hf_compile(ops, nops, buf);
875 |             execbuf_lock(buf);
876 |             hashptr = buf;
877 |         } else if (dynamic) {
878 |             hashptr = load_function(dynamic);
879 |         } else {
880 |             fprintf(stderr, "prospector: must supply -p or -l\n");
881 |             exit(EXIT_FAILURE);
882 |         }
883 | 
884 |         uint64_t nhash;
885 |         uint64_t beg = uepoch();
886 |         if (flags & F_U64) {
887 |             uint64_t ABI (*hash)(uint64_t) = hashptr;
888 |             if (use_exact)
889 |                 fputs("warning: no exact bias for 64-bit\n", stderr);
890 |             bias = estimate_bias64(hash, rng);
891 |             nhash = (1L << score_quality) * 33;
892 |         } else {
893 |             uint32_t ABI (*hash)(uint32_t) = hashptr;
894 |             if (use_exact) {
895 |                 bias = exact_bias32(hash);
896 |                 nhash = (1LL << 32) * 33;
897 |             } else {
898 |                 bias = estimate_bias32(hash, rng);
899 |                 nhash = (1L << score_quality) * 65;
900 |             }
901 |         }
902 |         uint64_t end = uepoch();
903 |         printf("bias      = %.17g\n", bias);
904 |         printf("speed     = %.3f nsec / hash\n", (end - beg) * 1000.0 / nhash);
905 |         return 0;
906 |     }
907 | 
908 |     if (mode == MODE_LIST) {
909 |         void *hashptr = 0;
910 |         if (template) {
911 |             hf_randfunc(ops, nops, rng);
912 |             hf_compile(ops, nops, buf);
913 |             execbuf_lock(buf);
914 |             hashptr = buf;
915 |         } else if (dynamic) {
916 |             hashptr = load_function(dynamic);
917 |         } else {
918 |             fprintf(stderr, "prospector: must supply -p or -l\n");
919 |             exit(EXIT_FAILURE);
920 |         }
921 | 
922 |         if (flags & F_U64) {
923 |             uint64_t ABI (*hash)(uint64_t) = hashptr;
924 |             uint64_t i = 0;
925 |             do
926 |                 printf("%016llx %016llx\n",
927 |                         (unsigned long long)i,
928 |                         (unsigned long long)hash(i));
929 |             while (++i);
930 |         } else {
931 |             uint32_t ABI (*hash)(uint32_t) = hashptr;
932 |             uint32_t i = 0;
933 |             do
934 |                 printf("%08lx %08lx\n",
935 |                         (unsigned long)i,
936 |                         (unsigned long)hash(i));
937 |             while (++i);
938 |         }
939 |         return 0;
940 |     }
941 | 
942 |     for (;;) {
943 |         /* Generate */
944 |         if (template) {
945 |             hf_randfunc(ops, nops, rng);
946 |         } else {
947 |             nops = min + xoroshiro128plus(rng) % (max - min + 1);
948 |             hf_genfunc(ops, nops, flags, rng);
949 |         }
950 | 
951 |         /* Evaluate */
952 |         double score;
953 |         hf_compile(ops, nops, buf);
954 |         execbuf_lock(buf);
955 |         if (flags & F_U64) {
956 |             uint64_t ABI (*hash)(uint64_t) = (void *)buf;
957 |             score = estimate_bias64(hash, rng);
958 |         } else {
959 |             uint32_t ABI (*hash)(uint32_t) = (void *)buf;
960 |             score = estimate_bias32(hash, rng);
961 |         }
962 |         execbuf_unlock(buf);
963 | 
964 |         /* Compare */
965 |         if (score < best) {
966 |             printf("// score = %.17g\n", score);
967 |             hf_printfunc(ops, nops, stdout);
968 |             fflush(stdout);
969 |             best = score;
970 |         }
971 |     }
972 | }
973 | 


--------------------------------------------------------------------------------
/tests/degski64.c:
--------------------------------------------------------------------------------
 1 | /* H2 32-bit hash
 2 |  * https://github.com/h2database/h2database
 3 |  * src/test/org/h2/test/store/CalculateHashConstant.java
 4 |  */
 5 | #include <stdint.h>
 6 | 
 7 | __attribute__((sysv_abi))
 8 | uint64_t
 9 | hash(uint64_t x)
10 | {
11 |     x ^= x >> 32;
12 |     x *= 0xd6e8feb86659fd93;
13 |     x ^= x >> 32;
14 |     x *= 0xd6e8feb86659fd93;
15 |     x ^= x >> 32;
16 |     return x;
17 | }
18 | 
19 | __attribute__((sysv_abi))
20 | uint64_t
21 | unhash(uint64_t x)
22 | {
23 |     x ^= x >> 32;
24 |     x *= 0xcfee444d8b59a89b;
25 |     x ^= x >> 32;
26 |     x *= 0xcfee444d8b59a89b;
27 |     x ^= x >> 32;
28 |     return x;
29 | }
30 | 


--------------------------------------------------------------------------------
/tests/h2hash32.c:
--------------------------------------------------------------------------------
 1 | /* H2 32-bit hash
 2 |  * https://github.com/h2database/h2database
 3 |  * src/test/org/h2/test/store/CalculateHashConstant.java
 4 |  */
 5 | #include <stdint.h>
 6 | 
 7 | // exact bias: 1.4249702882580686
 8 | __attribute__((sysv_abi))
 9 | uint32_t
10 | hash(uint32_t x)
11 | {
12 |     x ^= x >> 16;
13 |     x *= 0x45d9f3b;
14 |     x ^= x >> 16;
15 |     x *= 0x45d9f3b;
16 |     x ^= x >> 16;
17 |     return x;
18 | }
19 | 
20 | __attribute__((sysv_abi))
21 | uint32_t
22 | unhash(uint32_t x)
23 | {
24 |     x ^= x >> 16;
25 |     x *= 0x119de1f3;
26 |     x ^= x >> 16;
27 |     x *= 0x119de1f3;
28 |     x ^= x >> 16;
29 |     return x;
30 | }
31 | 


--------------------------------------------------------------------------------
/tests/hash32shift.c:
--------------------------------------------------------------------------------
 1 | #include <stdint.h>
 2 | 
 3 | // exact bias: 44.000700486813841
 4 | __attribute__((sysv_abi))
 5 | uint32_t
 6 | hash(uint32_t x)
 7 | {
 8 |     x  = ~x + (x << 15);
 9 |     x ^= x >> 12;
10 |     x += x << 2;
11 |     x ^= x >> 4;
12 |     x *= 2057;
13 |     x ^= x >> 16;
14 |     return x;
15 | }
16 | 


--------------------------------------------------------------------------------
/tests/murmurhash3_finalizer32.c:
--------------------------------------------------------------------------------
 1 | #include <stdint.h>
 2 | 
 3 | // exact bias: 0.26398543281818287
 4 | __attribute__((sysv_abi))
 5 | uint32_t
 6 | hash(uint32_t x)
 7 | {
 8 |     x ^= x >> 16;
 9 |     x *= 0x85ebca6b;
10 |     x ^= x >> 13;
11 |     x *= 0xc2b2ae35;
12 |     x ^= x >> 16;
13 |     return x;
14 | }
15 | 


--------------------------------------------------------------------------------
/tests/splitmix64.c:
--------------------------------------------------------------------------------
 1 | #include <stdint.h>
 2 | 
 3 | __attribute__((sysv_abi))
 4 | uint64_t
 5 | hash(uint64_t x)
 6 | {
 7 |     x += 0x9e3779b97f4a7c15;
 8 |     x ^= (x >> 30);
 9 |     x *= 0xbf58476d1ce4e5b9;
10 |     x ^= (x >> 27);
11 |     x *= 0x94d049bb133111eb;
12 |     x ^= (x >> 31);
13 |     return x;
14 | }
15 | 


--------------------------------------------------------------------------------