├── .gitignore
├── LICENSE
├── README.md
├── SMHasher
    ├── AvalancheTest.cpp
    ├── AvalancheTest.h
    ├── Bitslice.cpp
    ├── Bitvec.cpp
    ├── Bitvec.h
    ├── CMakeLists.txt
    ├── DifferentialTest.cpp
    ├── DifferentialTest.h
    ├── FarshTest.cpp
    ├── Hashes.cpp
    ├── Hashes.h
    ├── KeysetTest.cpp
    ├── KeysetTest.h
    ├── MurmurHash
    │   ├── MurmurHash3.cpp
    │   └── MurmurHash3.h
    ├── Platform.cpp
    ├── Platform.h
    ├── Poly1305Test.cpp
    ├── Random.cpp
    ├── Random.h
    ├── SHA1
    │   ├── sha1.cpp
    │   └── sha1.h
    ├── SpeedTest.cpp
    ├── SpeedTest.h
    ├── SpookyHash
    │   ├── SpookyV2.cpp
    │   └── SpookyV2.h
    ├── SpookyHashTest.cpp
    ├── Stats.cpp
    ├── Stats.h
    ├── Types.cpp
    ├── Types.h
    ├── UHashTest.cpp
    ├── UMAC
    │   ├── rijndael-alg-fst.c
    │   ├── rijndael-alg-fst.h
    │   ├── umac.c
    │   └── umac.h
    ├── VHashTest.cpp
    ├── VMAC
    │   ├── vmac.c
    │   └── vmac.h
    ├── compile.cmd
    ├── crc.cpp
    ├── main.cpp
    ├── poly1305
    │   ├── poly1305.c
    │   └── poly1305.h
    ├── pstdint.h
    ├── reports
    │   ├── MurMur3c_x86_128.txt
    │   ├── MurMur3c_x86_32.txt
    │   ├── SlowZZH128.txt
    │   ├── SlowZZH32.txt
    │   ├── SlowZZH64.txt
    │   ├── Spooky128.txt
    │   ├── Spooky32.txt
    │   ├── Spooky64.txt
    │   ├── VHash128.txt
    │   ├── VHash64.txt
    │   ├── XXH32.txt
    │   ├── XXH64.txt
    │   ├── ZZH128.txt
    │   ├── ZZH32.txt
    │   ├── ZZH64.txt
    │   ├── crc32.txt
    │   ├── sha1_128.txt
    │   ├── sha1_32.txt
    │   ├── sha1_64.txt
    │   ├── smhasher-ModXXH32-report.txt
    │   ├── smhasher-ModXXH64-report.txt
    │   ├── smhasher-SimdZZH32-report.txt
    │   ├── smhasher-SimdZZH64-report.txt
    │   ├── smhasher-SlowWideZZH32-report.txt
    │   ├── smhasher-SlowWideZZH64-report.txt
    │   ├── smhasher-SlowZZH32-report.txt
    │   ├── smhasher-SlowZZH64-report.txt
    │   ├── smhasher-Spooky128-report.txt
    │   ├── smhasher-Spooky32-report.txt
    │   ├── smhasher-Spooky64-report.txt
    │   ├── smhasher-WideZZH32-report.txt
    │   ├── smhasher-WideZZH64-report.txt
    │   ├── smhasher-XXH32-report.txt
    │   ├── smhasher-XXH64-report.txt
    │   ├── smhasher-ZZH128-2cycles.txt
    │   ├── smhasher-ZZH128-report.txt
    │   ├── smhasher-ZZH32-2cycles.txt
    │   ├── smhasher-ZZH32-report.txt
    │   ├── smhasher-ZZH64-2cycles.txt
    │   ├── smhasher-ZZH64-report.txt
    │   ├── smhasher-farsh128-report.txt
    │   ├── smhasher-farsh256-report.txt
    │   ├── smhasher-farsh32-report.txt
    │   ├── smhasher-farsh64-report.txt
    │   ├── smhasher-murmur3a_x86_32-report.txt
    │   ├── smhasher-murmur3c_x86_128-report.txt
    │   ├── smhasher-murmur3f_x64_128-report.txt
    │   ├── smhasher-poly1305-report.txt
    │   ├── smhasher-uhash128-report.txt
    │   ├── smhasher-uhash32-report.txt
    │   ├── smhasher-uhash64-report.txt
    │   └── smhasher-vhash64-report.txt
    ├── sha1.cpp
    ├── xxHash
    │   ├── xxhash.c
    │   └── xxhash.h
    └── xxHashTest.cpp
├── asm-listings
    ├── gcc-x64-avx2.lst
    ├── gcc-x64-nosimd.lst
    ├── gcc-x64.lst
    ├── gcc-x86-avx2.lst
    ├── gcc-x86-sse2.lst
    ├── gcc-x86.lst
    └── make-listings.cmd
├── benchmark
    ├── CpuID.h
    ├── compile-CpuID.cmd
    ├── compile-other.cmd
    ├── compile.cmd
    ├── main.cpp
    ├── runme.cmd
    └── timer.h
├── farsh.c
└── farsh.h


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Compiled Object files
 2 | *.slo
 3 | *.lo
 4 | *.o
 5 | *.obj
 6 | 
 7 | # Precompiled Headers
 8 | *.gch
 9 | *.pch
10 | 
11 | # Compiled Dynamic libraries
12 | *.so
13 | *.dylib
14 | *.dll
15 | 
16 | # Fortran module files
17 | *.mod
18 | 
19 | # Compiled Static libraries
20 | *.lai
21 | *.la
22 | *.a
23 | *.lib
24 | 
25 | # Executables
26 | *.exe
27 | *.out
28 | *.app
29 | 
30 | 0/*
31 | 1/*
32 | 2/*
33 | 3/*
34 | 4/*
35 | 5/*
36 | 6/*
37 | 7/*
38 | 8/*
39 | 9/*
40 | keys/
41 | asm-listings/aaa.cmd
42 | benchmark/aaa.cmd
43 | SMHasher/aaa.cmd
44 |    
45 | SMHasher/128.cmd
46 | SMHasher/32.cmd
47 | SMHasher/64.cmd
48 | SMHasher/33.cmd
49 | SMHasher/sp.cmd
50 | SMHasher/mur.cmd
51 | SMHasher/all.cmd


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2015-16 Bulat Ziganshin <bulat.ziganshin@gmail.com>
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | FARSH stands for Fast and Reliable (but not Secure) 32-bit Hash.
  2 | While established [new speed records](#benchmark)
  3 | and [successfully passed](SMHasher/reports/smhasher-farsh32-report.txt) the [SMHasher] testsuite,
  4 | it's not as reliable as the [competition](#competition).
  5 | [Discussion and additional benchmarks](http://encode.ru/threads/2213-FARSH-hashing-30-GB-s!).
  6 | 
  7 | # Features / to-do list
  8 | - [x] compute hashes up to 1024 bits long
  9 | - [x] hashing with user-supplied key material
 10 | - [x] [successfully passed](SMHasher/reports/smhasher-farsh32-report.txt) the [SMHasher] testsuite
 11 | - [ ] even faster and better quality hash mixing
 12 | - [x] SSE2/AVX2 manually-optimized main loop
 13 | - [x] 16-byte aligned key material and (optionally) input data for maximum speed on older CPUs
 14 | - [ ] manual unrolling of main loop (since msvc/icl can't do it themselves) or asm code
 15 | - [ ] try PSLLQ instead of PSHUFD in SSE2 code to [improve speed on older CPUs](http://encode.ru/threads/2213-FARSH-hashing-30-GB-s!?p=43983&viewfull=1#post43983)
 16 | - [ ] `farsh_init/farsh_update/farsh_result` streaming API
 17 | - [ ] `farsh64*/farsh128*` APIs for faster computation of multi-word hashes
 18 | - [ ] `SSE2/AVX2/NEON?` options in the API (+alignment check for SSE2) for selection of the code path instead of compile-time choice
 19 | 
 20 | 
 21 | # [API](farsh.h)
 22 | - `uint32_t farsh(void *data, size_t size, uint64_t seed)`
 23 | returns 32-bit hash of the buffer
 24 | - `void farsh_n(void *data, size_t size, int k, int n, uint64_t seed, void *hash)`
 25 | computes `n` 32-bit hashes starting with the hash number `k`, storing results to the `hash` buffer.
 26 | It's `n` times slower than computation of single 32-bit hash.
 27 | Hash computed by the `farsh` function has number 0. The function aborts if `k+n > 32`.
 28 | - `uint32_t farsh_keyed(void *data, size_t size, void *key, uint64_t seed)`
 29 | computes 32-bit hash using `key`, that should be 1024-byte long and aligned to 16-byte boundary.
 30 | - `void farsh_keyed_n(void *data, size_t size, void *key, int n, uint64_t seed, void *hash)`
 31 | computes `n` 32-bit hashes using `key`, storing results to the `hash` buffer.
 32 | `key` should be `1024+16*(n-1)` bytes long and aligned to 16-byte boundary.
 33 | - Hash functions accept 64-bit `seed` that can be used to "personalize" the hash value. Use seed==0 if you don't need that feature.
 34 | Seeding may have lower quality than in the [competition](#competition) since the seed value mixed with block hashes rather than raw data.
 35 | - Header file provides symbolic names for the above-mentioned constants:
 36 | `FARSH_MAX_HASHES == 32,  FARSH_BASE_KEY_SIZE == 1024,  FARSH_BASE_KEY_ALIGNMENT == 16,  FARSH_EXTRA_KEY_SIZE == 16`
 37 | 
 38 | 
 39 | # Internals
 40 | The current FARSH version combines two hashing algorithms.
 41 | 
 42 | Low-level hashing algorithm splits all input data into 1024-byte blocks and computes hash value for every block.
 43 | It's the very short cycle borrowed from [UHASH] that combines 1024 bytes of input data with 1024 bytes of key material.
 44 | The hash value returned by this cycle is 64-bit long, and [UMAC thesis] proved that it has 32 bits of entropy.
 45 | So the low-level algorithm compresses each 1024-byte block of input data into 64-bit value carrying 32 bits of entropy.
 46 | 
 47 | High-level hashing algorithm is a stripped-down version of [xxHash64]. It receives sequence of 64-bit values from the previous level
 48 | and combines them into final 32-bit hash result. Since the original [xxHash64] algorithm successfully passes all [SMHasher] tests
 49 | while computing 64-bit hash from raw data, it's no surprise that modified algorithm is able to compute high-quality 32-bit hash
 50 | from the sequence of numbers each carrying 32 bits of entropy.
 51 | 
 52 | The power of the FARSH algorithm comes from its inner cycle, that is very short (read: fast) and allows highly-parallel implementations,
 53 | so it can fully exploit power of multi-core, SIMD, VLIW and SIMT (GPU) architectures.
 54 | At the same time, there is math proof that it can deliver 32 bits of entropy so we can use it without any doubts.
 55 | 
 56 | 
 57 | ## Universal hashing
 58 | Main loop uses [universal hashing] formula from [UMAC] with a precomputed key material of 1024 bytes (plus 512 bytes for longer hashes).
 59 | FARSH is essentially [UHASH] with higher-level hashing algorithms replaced with simpler non-cryptographic one.
 60 | The universal hashing formula used here (and copied intact from UMAC) is as simple as
 61 | ```C
 62 |     uint64_t sum = 0;  uint32_t *data, *key;
 63 |     for (i=0; i < elements; i+=2)
 64 |         sum  +=  uint64_t(data[i] + key[i]) * (data[i+1] + key[i+1]);
 65 | ```
 66 | 
 67 | ## The main loop
 68 | - [Source code](farsh.c#L28)
 69 | - Asm code (can be found by searching for adcl+mull/pmuludq instructions)
 70 |   - [gcc -O3 -funroll-loops -m32](asm-listings/gcc-x86.lst#L340)
 71 |   - [gcc -O3 -funroll-loops -m32 -msse2 -DFARSH_SSE2](asm-listings/gcc-x86-sse2.lst#L349)
 72 |   - [gcc -O3 -funroll-loops -m32 -mavx2 -DFARSH_AVX2](asm-listings/gcc-x86-avx2.lst#L350)
 73 |   - [gcc -O3 -funroll-loops -m64        -DFARSH_SSE2](asm-listings/gcc-x64.lst#L252)
 74 |   - [gcc -O3 -funroll-loops -m64 -mavx2 -DFARSH_AVX2](asm-listings/gcc-x64-avx2.lst#L259)
 75 | 
 76 | 
 77 | # Benchmark
 78 | [Benchmark](benchmark) measures overall hash speed as well as internal loop speed.
 79 | The internal loop speed is a hard limit for the speed of any future FARSH version,
 80 | while the overall speed includes time required for pretty slow high-level hashing.
 81 | Future versions should replace it with faster algorithm still satisfying the [SMHasher] requirements,
 82 | making overall hash speed within 10% of the internal loop speed.
 83 | 
 84 | Executables were [compiled](benchmark/compile.cmd) with GCC 4.9.2.
 85 | Aligned versions make sure that data being hashed are 64-byte aligned,
 86 | unaligned versions make sure that data are unaligned.
 87 | This makes big difference on Core2 and older Intel CPUs.
 88 | 
 89 | [Intel Haswell i7-4770 3.9 GHz (AVX2)](http://ark.intel.com/products/75122/Intel-Core-i7-4770-Processor-8M-Cache-up-to-3_90-GHz),
 90 | other IvyBridge to Skylake CPUs has pretty close performance/GHz:
 91 | 
 92 | Executable                |  FARSH 0.2 speed             |  Internal loop speed
 93 | --------------------------|-----------------------------:|----------------------------:
 94 | aligned-farsh-x64-avx2    |  54.536 GB/s = 50.790 GiB/s  |  65.645 GB/s = 61.137 GiB/s
 95 | aligned-farsh-x64         |  31.162 GB/s = 29.022 GiB/s  |  35.722 GB/s = 33.269 GiB/s
 96 | aligned-farsh-x86-avx2    |  40.279 GB/s = 37.513 GiB/s  |  61.682 GB/s = 57.446 GiB/s
 97 | aligned-farsh-x86-sse2    |  25.221 GB/s = 23.489 GiB/s  |  33.584 GB/s = 31.277 GiB/s
 98 | aligned-farsh-x86         |   6.255 GB/s =  5.825 GiB/s  |   6.336 GB/s =  5.901 GiB/s
 99 | ||
100 | farsh-x64-avx2            |  46.024 GB/s = 42.863 GiB/s  |  64.967 GB/s = 60.505 GiB/s
101 | farsh-x64                 |  30.335 GB/s = 28.252 GiB/s  |  34.891 GB/s = 32.495 GiB/s
102 | farsh-x86-avx2            |  35.273 GB/s = 32.851 GiB/s  |  57.252 GB/s = 53.320 GiB/s
103 | farsh-x86-sse2            |  24.502 GB/s = 22.820 GiB/s  |  33.325 GB/s = 31.037 GiB/s
104 | farsh-x86                 |   6.283 GB/s =  5.852 GiB/s  |   6.763 GB/s =  6.299 GiB/s
105 | 
106 | 
107 | [Intel Pentium M processor 1.5 GHz (SSE2)](http://ark.intel.com/products/27576/Intel-Pentium-M-Processor-1_50-GHz-1M-Cache-400-MHz-FSB):
108 | 
109 | Executable                |  FARSH 0.2 speed             |  Internal loop speed
110 | --------------------------|-----------------------------:|----------------------------:
111 | aligned-farsh-x86-sse2    |   2.625 GB/s = 2.444 GiB/s   |   2.791 GB/s = 2.5 GiB/s
112 | aligned-farsh-x86         |   1.664 GB/s = 1.550 GiB/s   |   1.946 GB/s = 1.8 GiB/s
113 | ||
114 | farsh-x86-sse2            |   2.025 GB/s = 1.886 GiB/s   |   2.302 GB/s = 2.1 GiB/s
115 | farsh-x86                 |   1.471 GB/s = 1.370 GiB/s   |   1.715 GB/s = 1.5 GiB/s
116 | 
117 | 
118 | K10: [AMD Athlon II X2 220 Processor 2.8 GHz (SSE3)](http://www.cpu-world.com/CPUs/K10/AMD-Athlon%20II%20X2%20220%20-%20ADX220OCK22GM.html):
119 | 
120 | Executable                |  FARSH 0.2 speed             |  Internal loop speed
121 | --------------------------|-----------------------------:|----------------------------:
122 | aligned-farsh-x64         |  11.300 GB/s = 10.524 GiB/s  |  14.446 GB/s = 13.454 GiB/s
123 | aligned-farsh-x86-sse2    |  10.899 GB/s = 10.151 GiB/s  |  13.280 GB/s = 12.368 GiB/s
124 | aligned-farsh-x86         |   3.805 GB/s =  3.544 GiB/s  |   5.089 GB/s =  4.740 GiB/s
125 | ||
126 | farsh-x64                 |  12.823 GB/s = 11.943 GiB/s  |  14.187 GB/s = 13.212 GiB/s
127 | farsh-x86-sse2            |  10.933 GB/s = 10.182 GiB/s  |  12.389 GB/s = 11.538 GiB/s
128 | farsh-x86                 |   3.786 GB/s =  3.526 GiB/s  |   5.825 GB/s =  5.425 GiB/s
129 | 
130 | 
131 | Piledriver: [AMD A8-5500 APU 3.7 GHz (AVX)](http://www.cpu-world.com/CPUs/Bulldozer/AMD-A8-Series%20A8-5500.html):
132 | 
133 | Executable                |  FARSH 0.2 speed             |  Internal loop speed
134 | --------------------------|-----------------------------:|----------------------------:
135 | aligned-farsh-x64         |  17.130 GB/s = 15.953 GiB/s  |  21.394 GB/s = 19.924 GiB/s
136 | aligned-farsh-x86-sse2    |  13.790 GB/s = 12.843 GiB/s  |  20.830 GB/s = 19.400 GiB/s
137 | aligned-farsh-x86         |   3.872 GB/s =  3.606 GiB/s  |   5.457 GB/s =  5.082 GiB/s
138 | ||
139 | farsh-x64                 |  15.313 GB/s = 14.262 GiB/s  |  19.659 GB/s = 18.309 GiB/s
140 | farsh-x86-sse2            |  13.812 GB/s = 12.863 GiB/s  |  18.977 GB/s = 17.674 GiB/s
141 | farsh-x86                 |   3.959 GB/s =  3.687 GiB/s  |   5.056 GB/s =  4.709 GiB/s
142 | 
143 | More results and benchmarking executables are available in those [forum posts](http://encode.ru/threads/2213-FARSH-hashing-30-GB-s!?p=48907&viewfull=1#post48907).
144 | 
145 | 
146 | 
147 | # Competition
148 | Fast non-cryptographic hashes:
149 | - [MumHash](https://github.com/vnmakarov/mum-hash) (2016)
150 | - [HighwayHash](https://github.com/google/highwayhash) (2016)
151 | - [CLHash](http://lemire.me/blog/2015/10/26/crazily-fast-hashing-with-carry-less-multiplications),
152 | even [faster with Broadwell](http://lemire.me/blog/2015/12/24/your-software-should-follow-your-hardware-the-clhash-example) (2015)
153 | - [MetroHash](https://github.com/jandrewrogers/MetroHash) (2015)
154 | - Go language [32-bit](https://github.com/golang/go/blob/master/src/runtime/hash32.go) and [64-bit](https://github.com/golang/go/blob/master/src/runtime/hash64.go) hashes (2014)
155 | - [xxHash][xxHash] (2012) and [xxHash64][xxHash64] (2014)
156 | - [SpookyHash](http://burtleburtle.net/bob/hash/spooky.html): a 128-bit noncryptographic hash (2012)
157 | - The [CityHash](https://github.com/google/cityhash) family of hash functions (2011)
158 | - [MurmurHash3](https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp) (2011)
159 | - [Hasshe2](http://cessu.blogspot.ru/2008/11/hashing-with-sse2-revisited-or-my-hash.html) by Cessu (2008)
160 | 
161 | Further reading:
162 | - [More info](https://github.com/aappleby/smhasher/wiki/SMHasher) about the [SMHasher] testsuite
163 | - [A lot of hashes](https://github.com/rurban/smhasher) tested by SMHasher (see doc subdir)
164 | - Interesting historical [overview](http://blog.reverberate.org/2012/01/state-of-hash-functions-2012.html)
165 | - [SuperFastHash](http://www.azillionmonkeys.com/qed/hash.html)
166 | - Bob Jenkins [1997 Dr Dobbs article](http://www.burtleburtle.net/bob/hash/doobs.html) and its [extended version](http://burtleburtle.net/bob/hash/evahash.html)
167 | 
168 | MAC/PRF, i.e. cryprographically secure keyed hashes:
169 | - [UMAC] and [VMAC]
170 | - The [Poly1305-AES](https://en.wikipedia.org/wiki/Poly1305) message-authentication code
171 | - [SipHash](https://131002.net/siphash/)
172 | - Cryptoanalysis of [CityHash64, MurmurHash](https://131002.net/siphash/#at) and [xxHash](http://crypto.stackexchange.com/questions/6408/from-hash-to-cryptographic-hash)
173 | 
174 | 
175 | 
176 | [VMAC]: http://en.wikipedia.org/wiki/VMAC
177 | [UMAC]: http://en.wikipedia.org/wiki/UMAC
178 | [UMAC thesis]: http://fastcrypto.org/umac/umac_thesis.pdf
179 | [UHASH]: https://tools.ietf.org/html/rfc4418#section-5
180 | [universal hashing]: http://en.wikipedia.org/wiki/Universal_hashing
181 | [xxHash]: https://github.com/Cyan4973/xxHash
182 | [xxHash64]: https://github.com/Cyan4973/xxHash
183 | [SMHasher]: https://github.com/aappleby/smhasher
184 | 


--------------------------------------------------------------------------------
/SMHasher/AvalancheTest.cpp:
--------------------------------------------------------------------------------
 1 | #include "AvalancheTest.h"
 2 | 
 3 | //-----------------------------------------------------------------------------
 4 | 
 5 | void PrintAvalancheDiagram ( int x, int y, int reps, double scale, int * bins )
 6 | {
 7 |   const char * symbols = ".123456789X";
 8 | 
 9 |   for(int i = 0; i < y; i++)
10 |   {
11 |     printf("[");
12 |     for(int j = 0; j < x; j++)
13 |     {
14 |       int k = (y - i) -1;
15 | 
16 |       int bin = bins[k + (j*y)];
17 | 
18 |       double b = double(bin) / double(reps);
19 |       b = fabs(b*2 - 1);
20 | 
21 |       b *= scale;
22 | 
23 |       int s = (int)floor(b*10);
24 | 
25 |       if(s > 10) s = 10;
26 |       if(s < 0) s = 0;
27 | 
28 |       printf("%c",symbols[s]);
29 |     }
30 | 
31 |     printf("]\n");
32 |   }
33 | }
34 | 
35 | //----------------------------------------------------------------------------
36 | 
37 | double maxBias ( std::vector<int> & counts, int reps )
38 | {
39 |   double worst = 0;
40 | 
41 |   for(int i = 0; i < (int)counts.size(); i++)
42 |   {
43 |     double c = double(counts[i]) / double(reps);
44 | 
45 |     double d = fabs(c * 2 - 1);
46 |       
47 |     if(d > worst)
48 |     {
49 |       worst = d;
50 |     }
51 |   }
52 | 
53 |   return worst;
54 | }
55 | 
56 | //-----------------------------------------------------------------------------
57 | 


--------------------------------------------------------------------------------
/SMHasher/AvalancheTest.h:
--------------------------------------------------------------------------------
  1 | //-----------------------------------------------------------------------------
  2 | // Flipping a single bit of a key should cause an "avalanche" of changes in
  3 | // the hash function's output. Ideally, each output bits should flip 50% of
  4 | // the time - if the probability of an output bit flipping is not 50%, that bit
  5 | // is "biased". Too much bias means that patterns applied to the input will
  6 | // cause "echoes" of the patterns in the output, which in turn can cause the
  7 | // hash function to fail to create an even, random distribution of hash values.
  8 | 
  9 | 
 10 | #pragma once
 11 | 
 12 | #include "Types.h"
 13 | #include "Random.h"
 14 | 
 15 | #include <vector>
 16 | #include <stdio.h>
 17 | #include <math.h>
 18 | 
 19 | // Avalanche fails if a bit is biased by more than 1%
 20 | 
 21 | #define AVALANCHE_FAIL 0.01
 22 | 
 23 | double maxBias ( std::vector<int> & counts, int reps );
 24 | 
 25 | //-----------------------------------------------------------------------------
 26 | 
 27 | template < typename keytype, typename hashtype >
 28 | void calcBias ( pfHash hash, std::vector<int> & counts, int reps, Rand & r )
 29 | {
 30 |   const int keybytes = sizeof(keytype);
 31 |   const int hashbytes = sizeof(hashtype);
 32 | 
 33 |   const int keybits = keybytes * 8;
 34 |   const int hashbits = hashbytes * 8;
 35 | 
 36 |   keytype K;
 37 |   hashtype A,B;
 38 | 
 39 |   for(int irep = 0; irep < reps; irep++)
 40 |   {
 41 |     if(irep % (reps/10) == 0) printf(".");
 42 | 
 43 |     r.rand_p(&K,keybytes);
 44 | 
 45 |     hash(&K,keybytes,0,&A);
 46 | 
 47 |     int * cursor = &counts[0];
 48 | 
 49 |     for(int iBit = 0; iBit < keybits; iBit++)
 50 |     {
 51 |       flipbit(&K,keybytes,iBit);
 52 |       hash(&K,keybytes,0,&B);
 53 |       flipbit(&K,keybytes,iBit);
 54 | 
 55 |       for(int iOut = 0; iOut < hashbits; iOut++)
 56 |       {
 57 |         int bitA = getbit(&A,hashbytes,iOut);
 58 |         int bitB = getbit(&B,hashbytes,iOut);
 59 | 
 60 |         (*cursor++) += (bitA ^ bitB);
 61 |       }
 62 |     }
 63 |   }
 64 | }
 65 | 
 66 | //-----------------------------------------------------------------------------
 67 | 
 68 | template < typename keytype, typename hashtype >
 69 | bool AvalancheTest ( pfHash hash, const int reps )
 70 | {
 71 |   Rand r(48273);
 72 | 
 73 |   const int keybytes = sizeof(keytype);
 74 |   const int hashbytes = sizeof(hashtype);
 75 | 
 76 |   const int keybits = keybytes * 8;
 77 |   const int hashbits = hashbytes * 8;
 78 | 
 79 |   printf("Testing %3d-bit keys -> %3d-bit hashes, %8d reps",keybits,hashbits,reps);
 80 | 
 81 |   //----------
 82 | 
 83 |   std::vector<int> bins(keybits*hashbits,0);
 84 | 
 85 |   calcBias<keytype,hashtype>(hash,bins,reps,r);
 86 | 
 87 |   //----------
 88 | 
 89 |   bool result = true;
 90 | 
 91 |   double b = maxBias(bins,reps);
 92 | 
 93 |   printf(" worst bias is %f%%",b * 100.0);
 94 | 
 95 |   if(b > AVALANCHE_FAIL)
 96 |   {
 97 |     int * cursor = &bins[0];
 98 | 
 99 |     for(int iBit = 0; iBit < keybits; iBit++)
100 |     {
101 |       for(int iOut = 0; iOut < hashbits; iOut++)
102 |       {
103 |         double k = *cursor++/(double)reps;
104 |         if (k>0.51 || k<0.49)
105 |           0 && printf(", %d->%d %f%%",iBit,iOut,k * 100.0);   // enable if you need detailed information
106 |       }
107 |     }
108 |     result = false;
109 |   }
110 | 
111 |   printf("\n");
112 | 
113 |   return result;
114 | }
115 | 
116 | //----------------------------------------------------------------------------
117 | // Tests the Bit Independence Criteron. Stricter than Avalanche, but slow and
118 | // not really all that useful.
119 | 
120 | template< typename keytype, typename hashtype >
121 | void BicTest ( pfHash hash, const int keybit, const int reps, double & maxBias, int & maxA, int & maxB, bool verbose )
122 | {
123 |   Rand r(11938);
124 | 
125 |   const int keybytes = sizeof(keytype);
126 |   const int hashbytes = sizeof(hashtype);
127 |   const int hashbits = hashbytes * 8;
128 | 
129 |   std::vector<int> bins(hashbits*hashbits*4,0);
130 | 
131 |   keytype key;
132 |   hashtype h1,h2;
133 | 
134 |   for(int irep = 0; irep < reps; irep++)
135 |   {
136 |     if(verbose)
137 |     {
138 |       if(irep % (reps/10) == 0) printf(".");
139 |     }
140 | 
141 |     r.rand_p(&key,keybytes);
142 |     hash(&key,keybytes,0,&h1);
143 | 
144 |     flipbit(key,keybit);
145 |     hash(&key,keybytes,0,&h2);
146 | 
147 |     hashtype d = h1 ^ h2;
148 | 
149 |     for(int out1 = 0; out1 < hashbits; out1++)
150 |     for(int out2 = 0; out2 < hashbits; out2++)
151 |     {
152 |       if(out1 == out2) continue;
153 | 
154 |       uint32_t b = getbit(d,out1) | (getbit(d,out2) << 1);
155 | 
156 |       bins[(out1 * hashbits + out2) * 4 + b]++;
157 |     }
158 |   }
159 | 
160 |   if(verbose) printf("\n");
161 | 
162 |   maxBias = 0;
163 | 
164 |   for(int out1 = 0; out1 < hashbits; out1++)
165 |   {
166 |     for(int out2 = 0; out2 < hashbits; out2++)
167 |     {
168 |       if(out1 == out2)
169 |       {
170 |         if(verbose) printf("\\");
171 |         continue;
172 |       }
173 | 
174 |       double bias = 0;
175 | 
176 |       for(int b = 0; b < 4; b++)
177 |       {
178 |         double b2 = double(bins[(out1 * hashbits + out2) * 4 + b]) / double(reps / 2);
179 |         b2 = fabs(b2 * 2 - 1);
180 | 
181 |         if(b2 > bias) bias = b2;
182 |       }
183 | 
184 |       if(bias > maxBias)
185 |       {
186 |         maxBias = bias;
187 |         maxA = out1;
188 |         maxB = out2;
189 |       }
190 | 
191 |       if(verbose)
192 |       {
193 |         if     (bias < 0.01) printf(".");
194 |         else if(bias < 0.05) printf("o");
195 |         else if(bias < 0.33) printf("O");
196 |         else                 printf("X");
197 |       }
198 |     }
199 | 
200 |     if(verbose) printf("\n");
201 |   }
202 | }
203 | 
204 | //----------
205 | 
206 | template< typename keytype, typename hashtype >
207 | bool BicTest ( pfHash hash, const int reps )
208 | {
209 |   const int keybytes = sizeof(keytype);
210 |   const int keybits = keybytes * 8;
211 | 
212 |   double maxBias = 0;
213 |   int maxK = 0;
214 |   int maxA = 0;
215 |   int maxB = 0;
216 | 
217 |   for(int i = 0; i < keybits; i++)
218 |   {
219 |     if(i % (keybits/10) == 0) printf(".");
220 | 
221 |     double bias;
222 |     int a,b;
223 | 
224 |     BicTest<keytype,hashtype>(hash,i,reps,bias,a,b,true);
225 | 
226 |     if(bias > maxBias)
227 |     {
228 |       maxBias = bias;
229 |       maxK = i;
230 |       maxA = a;
231 |       maxB = b;
232 |     }
233 |   }
234 | 
235 |   printf("Max bias %f - (%3d : %3d,%3d)\n",maxBias,maxK,maxA,maxB);
236 | 
237 |   // Bit independence is harder to pass than avalanche, so we're a bit more lax here.
238 | 
239 |   bool result = (maxBias < 0.05);
240 | 
241 |   return result;
242 | }
243 | 
244 | //-----------------------------------------------------------------------------
245 | // BIC test variant - store all intermediate data in a table, draw diagram
246 | // afterwards (much faster)
247 | 
248 | template< typename keytype, typename hashtype >
249 | void BicTest3 ( pfHash hash, const int reps, bool verbose = true )
250 | {
251 |   const int keybytes = sizeof(keytype);
252 |   const int keybits = keybytes * 8;
253 |   const int hashbytes = sizeof(hashtype);
254 |   const int hashbits = hashbytes * 8;
255 |   const int pagesize = hashbits*hashbits*4;
256 | 
257 |   Rand r(11938);
258 | 
259 |   double maxBias = 0;
260 |   int maxK = 0;
261 |   int maxA = 0;
262 |   int maxB = 0;
263 | 
264 |   keytype key;
265 |   hashtype h1,h2;
266 | 
267 |   std::vector<int> bins(keybits*pagesize,0);
268 | 
269 |   for(int keybit = 0; keybit < keybits; keybit++)
270 |   {
271 |     if(keybit % (keybits/10) == 0) printf(".");
272 | 
273 |     int * page = &bins[keybit*pagesize];
274 | 
275 |     for(int irep = 0; irep < reps; irep++)
276 |     {
277 |       r.rand_p(&key,keybytes);
278 |       hash(&key,keybytes,0,&h1);
279 |       flipbit(key,keybit);
280 |       hash(&key,keybytes,0,&h2);
281 | 
282 |       hashtype d = h1 ^ h2;
283 | 
284 |       for(int out1 = 0; out1 < hashbits-1; out1++)
285 |       for(int out2 = out1+1; out2 < hashbits; out2++)
286 |       {
287 |         int * b = &page[(out1*hashbits+out2)*4];
288 | 
289 |         uint32_t x = getbit(d,out1) | (getbit(d,out2) << 1);
290 | 
291 |         b[x]++;
292 |       }
293 |     }
294 |   }
295 | 
296 |   printf("\n");
297 | 
298 |   for(int out1 = 0; out1 < hashbits-1; out1++)
299 |   {
300 |     for(int out2 = out1+1; out2 < hashbits; out2++)
301 |     {
302 |       if(verbose) printf("(%3d,%3d) - ",out1,out2);
303 | 
304 |       for(int keybit = 0; keybit < keybits; keybit++)
305 |       {
306 |         int * page = &bins[keybit*pagesize];
307 |         int * bins = &page[(out1*hashbits+out2)*4];
308 | 
309 |         double bias = 0;
310 | 
311 |         for(int b = 0; b < 4; b++)
312 |         {
313 |           double b2 = double(bins[b]) / double(reps / 2);
314 |           b2 = fabs(b2 * 2 - 1);
315 | 
316 |           if(b2 > bias) bias = b2;
317 |         }
318 | 
319 |         if(bias > maxBias)
320 |         {
321 |           maxBias = bias;
322 |           maxK = keybit;
323 |           maxA = out1;
324 |           maxB = out2;
325 |         }
326 | 
327 |         if(verbose)
328 |         {
329 |           if     (bias < 0.01) printf(".");
330 |           else if(bias < 0.05) printf("o");
331 |           else if(bias < 0.33) printf("O");
332 |           else                 printf("X");
333 |         }
334 |       }
335 | 
336 |       // Finished keybit
337 | 
338 |       if(verbose) printf("\n");
339 |     }
340 | 
341 |     if(verbose)
342 |     {
343 |       for(int i = 0; i < keybits+12; i++) printf("-");
344 |       printf("\n");
345 |     }
346 |   }
347 | 
348 |   printf("Max bias %f - (%3d : %3d,%3d)\n",maxBias,maxK,maxA,maxB);
349 | }
350 | 
351 | 
352 | //-----------------------------------------------------------------------------
353 | // BIC test variant - iterate over output bits, then key bits. No temp storage,
354 | // but slooooow
355 | 
356 | template< typename keytype, typename hashtype >
357 | void BicTest2 ( pfHash hash, const int reps, bool verbose = true )
358 | {
359 |   const int keybytes = sizeof(keytype);
360 |   const int keybits = keybytes * 8;
361 |   const int hashbytes = sizeof(hashtype);
362 |   const int hashbits = hashbytes * 8;
363 | 
364 |   Rand r(11938);
365 | 
366 |   double maxBias = 0;
367 |   int maxK = 0;
368 |   int maxA = 0;
369 |   int maxB = 0;
370 | 
371 |   keytype key;
372 |   hashtype h1,h2;
373 | 
374 |   for(int out1 = 0; out1 < hashbits-1; out1++)
375 |   for(int out2 = out1+1; out2 < hashbits; out2++)
376 |   {
377 |     if(verbose) printf("(%3d,%3d) - ",out1,out2);
378 | 
379 |     for(int keybit = 0; keybit < keybits; keybit++)
380 |     {
381 |       int bins[4] = { 0, 0, 0, 0 };
382 | 
383 |       for(int irep = 0; irep < reps; irep++)
384 |       {
385 |         r.rand_p(&key,keybytes);
386 |         hash(&key,keybytes,0,&h1);
387 |         flipbit(key,keybit);
388 |         hash(&key,keybytes,0,&h2);
389 | 
390 |         hashtype d = h1 ^ h2;
391 | 
392 |         uint32_t b = getbit(d,out1) | (getbit(d,out2) << 1);
393 | 
394 |         bins[b]++;
395 |       }
396 | 
397 |       double bias = 0;
398 | 
399 |       for(int b = 0; b < 4; b++)
400 |       {
401 |         double b2 = double(bins[b]) / double(reps / 2);
402 |         b2 = fabs(b2 * 2 - 1);
403 | 
404 |         if(b2 > bias) bias = b2;
405 |       }
406 | 
407 |       if(bias > maxBias)
408 |       {
409 |         maxBias = bias;
410 |         maxK = keybit;
411 |         maxA = out1;
412 |         maxB = out2;
413 |       }
414 | 
415 |       if(verbose)
416 |       {
417 |         if     (bias < 0.05) printf(".");
418 |         else if(bias < 0.10) printf("o");
419 |         else if(bias < 0.50) printf("O");
420 |         else                 printf("X");
421 |       }
422 |     }
423 | 
424 |     // Finished keybit
425 | 
426 |     if(verbose) printf("\n");
427 |   }
428 | 
429 |   printf("Max bias %f - (%3d : %3d,%3d)\n",maxBias,maxK,maxA,maxB);
430 | }
431 | 
432 | //-----------------------------------------------------------------------------
433 | 


--------------------------------------------------------------------------------
/SMHasher/Bitslice.cpp:
--------------------------------------------------------------------------------
  1 | #include "Bitvec.h"
  2 | #include <vector>
  3 | #include <assert.h>
  4 | 
  5 | // handle xnor
  6 | 
  7 | typedef std::vector<uint32_t> slice;
  8 | typedef std::vector<slice> slice_vec;
  9 | 
 10 | int countbits ( slice & v )
 11 | {
 12 |   int c = 0;
 13 | 
 14 |   for(size_t i = 0; i < v.size(); i++)
 15 |   {
 16 |     int d = countbits(v[i]);
 17 | 
 18 |     c += d;
 19 |   }
 20 | 
 21 |   return c;
 22 | }
 23 | 
 24 | int countxor ( slice & a, slice & b )
 25 | {
 26 |   assert(a.size() == b.size());
 27 | 
 28 |   int c = 0;
 29 | 
 30 |   for(size_t i = 0; i < a.size(); i++)
 31 |   {
 32 |     int d = countbits(a[i] ^ b[i]);
 33 | 
 34 |     c += d;
 35 |   }
 36 | 
 37 |   return c;
 38 | }
 39 | 
 40 | void xoreq ( slice & a, slice & b )
 41 | {
 42 |   assert(a.size() == b.size());
 43 | 
 44 |   for(size_t i = 0; i < a.size(); i++)
 45 |   {
 46 |     a[i] ^= b[i];
 47 |   }
 48 | }
 49 | 
 50 | //-----------------------------------------------------------------------------
 51 | // Bitslice a hash set
 52 | 
 53 | template< typename hashtype >
 54 | void Bitslice ( std::vector<hashtype> & hashes, slice_vec & slices )
 55 | {
 56 |   const int hashbytes = sizeof(hashtype);
 57 |   const int hashbits = hashbytes * 8;
 58 |   const int slicelen = ((int)hashes.size() + 31) / 32;
 59 | 
 60 |   slices.clear();
 61 |   slices.resize(hashbits);
 62 | 
 63 |   for(int i = 0; i < (int)slices.size(); i++)
 64 |   {
 65 |     slices[i].resize(slicelen,0);
 66 |   }
 67 | 
 68 |   for(int j = 0; j < hashbits; j++)
 69 |   {
 70 |     void * sliceblob = &(slices[j][0]);
 71 | 
 72 |     for(int i = 0; i < (int)hashes.size(); i++)
 73 |     {
 74 |       int b = getbit(hashes[i],j);
 75 | 
 76 |       setbit(sliceblob,slicelen*4,i,b);
 77 |     }
 78 |   }
 79 | }
 80 | 
 81 | void FactorSlices ( slice_vec & slices )
 82 | {
 83 |   std::vector<int> counts(slices.size(),0);
 84 | 
 85 |   for(size_t i = 0; i < slices.size(); i++)
 86 |   {
 87 |     counts[i] = countbits(slices[i]);
 88 |   }
 89 | 
 90 |   bool changed = true;
 91 | 
 92 |   while(changed)
 93 |   {
 94 |     int bestA = -1;
 95 |     int bestB = -1;
 96 | 
 97 |     for(int j = 0; j < (int)slices.size()-1; j++)
 98 |     {
 99 |       for(int i = j+1; i < (int)slices.size(); i++)
100 |       {
101 |         int d = countxor(slices[i],slices[j]);
102 | 
103 |         if((d < counts[i]) && (d < counts[j]))
104 |         {
105 |           if(counts[i] < counts[j])
106 |           {
107 |             bestA = j;
108 |             bestB = i;
109 |           }
110 |         }
111 |         else if(d < counts[i])
112 |         {
113 |           //bestA = 
114 |         }
115 |       }
116 |     }
117 |   }
118 | }
119 | 
120 | 
121 | void foo ( void )
122 | {
123 |   slice a;
124 |   slice_vec b;
125 | 
126 |   Bitslice(a,b);
127 | }


--------------------------------------------------------------------------------
/SMHasher/Bitvec.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include "Platform.h"
  4 | 
  5 | #include <vector>
  6 | 
  7 | //-----------------------------------------------------------------------------
  8 | 
  9 | void     printbits   ( const void * blob, int len );
 10 | void     printhex32  ( const void * blob, int len );
 11 | void     printbytes  ( const void * blob, int len );
 12 | void     printbytes2 ( const void * blob, int len );
 13 | 
 14 | uint32_t popcount    ( uint32_t v );
 15 | uint32_t parity      ( uint32_t v );
 16 | 
 17 | uint32_t getbit      ( const void * blob, int len, uint32_t bit );
 18 | uint32_t getbit_wrap ( const void * blob, int len, uint32_t bit );
 19 | 
 20 | void     setbit      ( void * blob, int len, uint32_t bit );
 21 | void     setbit      ( void * blob, int len, uint32_t bit, uint32_t val );
 22 | 
 23 | void     clearbit    ( void * blob, int len, uint32_t bit );
 24 | 
 25 | void     flipbit     ( void * blob, int len, uint32_t bit );
 26 | 
 27 | int      countbits   ( uint32_t v );
 28 | int      countbits   ( std::vector<uint32_t> & v );
 29 | 
 30 | int      countbits   ( const void * blob, int len );
 31 | 
 32 | void     invert      ( std::vector<uint32_t> & v );
 33 | 
 34 | //----------
 35 | 
 36 | template< typename T >
 37 | inline uint32_t getbit ( T & blob, uint32_t bit )
 38 | {
 39 |   return getbit(&blob,sizeof(blob),bit);
 40 | }
 41 | 
 42 | template<> inline uint32_t getbit ( uint32_t & blob, uint32_t bit ) { return (blob >> (bit & 31)) & 1; }
 43 | template<> inline uint32_t getbit ( uint64_t & blob, uint32_t bit ) { return (blob >> (bit & 63)) & 1; }
 44 | 
 45 | //----------
 46 | 
 47 | template< typename T >
 48 | inline void setbit ( T & blob, uint32_t bit )
 49 | {
 50 |   return setbit(&blob,sizeof(blob),bit);
 51 | }
 52 | 
 53 | template<> inline void setbit ( uint32_t & blob, uint32_t bit ) { blob |= uint32_t(1) << (bit & 31); }
 54 | template<> inline void setbit ( uint64_t & blob, uint32_t bit ) { blob |= uint64_t(1) << (bit & 63); }
 55 | 
 56 | //----------
 57 | 
 58 | template< typename T >
 59 | inline void flipbit ( T & blob, uint32_t bit )
 60 | {
 61 |   flipbit(&blob,sizeof(blob),bit);
 62 | }
 63 | 
 64 | template<> inline void flipbit ( uint32_t & blob, uint32_t bit ) { bit &= 31; blob ^= (uint32_t(1) << bit); }
 65 | template<> inline void flipbit ( uint64_t & blob, uint32_t bit ) { bit &= 63; blob ^= (uint64_t(1) << bit); }
 66 | 
 67 | //-----------------------------------------------------------------------------
 68 | // Left and right shift of blobs. The shift(N) versions work on chunks of N
 69 | // bits at a time (faster)
 70 | 
 71 | void lshift1  ( void * blob, int len, int c );
 72 | void lshift8  ( void * blob, int len, int c );
 73 | void lshift32 ( void * blob, int len, int c );
 74 | 
 75 | void rshift1  ( void * blob, int len, int c );
 76 | void rshift8  ( void * blob, int len, int c );
 77 | void rshift32 ( void * blob, int len, int c );
 78 | 
 79 | inline void lshift ( void * blob, int len, int c )
 80 | {
 81 |   if((len & 3) == 0)
 82 |   {
 83 |     lshift32(blob,len,c);
 84 |   }
 85 |   else
 86 |   {
 87 |     lshift8(blob,len,c);
 88 |   }
 89 | }
 90 | 
 91 | inline void rshift ( void * blob, int len, int c )
 92 | {
 93 |   if((len & 3) == 0)
 94 |   {
 95 |     rshift32(blob,len,c);
 96 |   }
 97 |   else
 98 |   {
 99 |     rshift8(blob,len,c);
100 |   }
101 | }
102 | 
103 | template < typename T >
104 | inline void lshift ( T & blob, int c )
105 | {
106 |   if((sizeof(T) & 3) == 0)
107 |   {
108 |     lshift32(&blob,sizeof(T),c);
109 |   }
110 |   else
111 |   {
112 |     lshift8(&blob,sizeof(T),c);
113 |   }
114 | }
115 | 
116 | template < typename T >
117 | inline void rshift ( T & blob, int c )
118 | {
119 |   if((sizeof(T) & 3) == 0)
120 |   {
121 |     lshift32(&blob,sizeof(T),c);
122 |   }
123 |   else
124 |   {
125 |     lshift8(&blob,sizeof(T),c);
126 |   }
127 | }
128 | 
129 | template<> inline void lshift ( uint32_t & blob, int c ) { blob <<= c; }
130 | template<> inline void lshift ( uint64_t & blob, int c ) { blob <<= c; }
131 | template<> inline void rshift ( uint32_t & blob, int c ) { blob >>= c; }
132 | template<> inline void rshift ( uint64_t & blob, int c ) { blob >>= c; }
133 | 
134 | //-----------------------------------------------------------------------------
135 | // Left and right rotate of blobs. The rot(N) versions work on chunks of N
136 | // bits at a time (faster)
137 | 
138 | void lrot1    ( void * blob, int len, int c );
139 | void lrot8    ( void * blob, int len, int c );
140 | void lrot32   ( void * blob, int len, int c );
141 | 
142 | void rrot1    ( void * blob, int len, int c );
143 | void rrot8    ( void * blob, int len, int c );
144 | void rrot32   ( void * blob, int len, int c );
145 | 
146 | inline void lrot ( void * blob, int len, int c )
147 | {
148 |   if((len & 3) == 0)
149 |   {
150 |     return lrot32(blob,len,c);
151 |   }
152 |   else
153 |   {
154 |     return lrot8(blob,len,c);
155 |   }
156 | }
157 | 
158 | inline void rrot ( void * blob, int len, int c )
159 | {
160 |   if((len & 3) == 0)
161 |   {
162 |     return rrot32(blob,len,c);
163 |   }
164 |   else
165 |   {
166 |     return rrot8(blob,len,c);
167 |   }
168 | }
169 | 
170 | template < typename T >
171 | inline void lrot ( T & blob, int c )
172 | {
173 |   if((sizeof(T) & 3) == 0)
174 |   {
175 |     return lrot32(&blob,sizeof(T),c);
176 |   }
177 |   else
178 |   {
179 |     return lrot8(&blob,sizeof(T),c);
180 |   }
181 | }
182 | 
183 | template < typename T >
184 | inline void rrot ( T & blob, int c )
185 | {
186 |   if((sizeof(T) & 3) == 0)
187 |   {
188 |     return rrot32(&blob,sizeof(T),c);
189 |   }
190 |   else
191 |   {
192 |     return rrot8(&blob,sizeof(T),c);
193 |   }
194 | }
195 | 
196 | template<> inline void lrot ( uint32_t & blob, int c ) { blob = ROTL32(blob,c); }
197 | template<> inline void lrot ( uint64_t & blob, int c ) { blob = ROTL64(blob,c); }
198 | template<> inline void rrot ( uint32_t & blob, int c ) { blob = ROTR32(blob,c); }
199 | template<> inline void rrot ( uint64_t & blob, int c ) { blob = ROTR64(blob,c); }
200 | 
201 | //-----------------------------------------------------------------------------
202 | // Bit-windowing functions - select some N-bit subset of the input blob
203 | 
204 | uint32_t window1  ( void * blob, int len, int start, int count );
205 | uint32_t window8  ( void * blob, int len, int start, int count );
206 | uint32_t window32 ( void * blob, int len, int start, int count );
207 | 
208 | inline uint32_t window ( void * blob, int len, int start, int count )
209 | {
210 |   if(len & 3)
211 |   {
212 |     return window8(blob,len,start,count);
213 |   }
214 |   else
215 |   {
216 |     return window32(blob,len,start,count);
217 |   }
218 | }
219 | 
220 | template < typename T >
221 | inline uint32_t window ( T & blob, int start, int count )
222 | {
223 |   if((sizeof(T) & 3) == 0)
224 |   {
225 |     return window32(&blob,sizeof(T),start,count);
226 |   }
227 |   else
228 |   {
229 |     return window8(&blob,sizeof(T),start,count);
230 |   }
231 | }
232 | 
233 | template<> 
234 | inline uint32_t window ( uint32_t & blob, int start, int count )
235 | {
236 |   return ROTR32(blob,start) & ((1<<count)-1);
237 | }
238 | 
239 | template<> 
240 | inline uint32_t window ( uint64_t & blob, int start, int count )
241 | {
242 |   return (uint32_t)ROTR64(blob,start) & ((1<<count)-1);
243 | }
244 | 
245 | //-----------------------------------------------------------------------------
246 | 


--------------------------------------------------------------------------------
/SMHasher/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | project(SMHasher)
 2 | 
 3 | cmake_minimum_required(VERSION 2.4)
 4 | 
 5 | set(CMAKE_BUILD_TYPE Release)
 6 | 
 7 | add_library(
 8 |   SMHasherSupport
 9 |   AvalancheTest.cpp
10 |   Bitslice.cpp
11 |   Bitvec.cpp
12 |   DifferentialTest.cpp
13 |   FarshTest.cpp
14 |   Hashes.cpp
15 |   KeysetTest.cpp
16 |   Platform.cpp
17 |   Random.cpp
18 |   SpeedTest.cpp
19 |   Stats.cpp
20 |   Types.cpp
21 | )
22 | 
23 | add_executable(
24 |   SMHasher
25 |   main.cpp
26 | )
27 | 
28 | target_link_libraries(
29 |   SMHasher
30 |   SMHasherSupport
31 | )
32 | 


--------------------------------------------------------------------------------
/SMHasher/DifferentialTest.cpp:
--------------------------------------------------------------------------------
1 | #include "DifferentialTest.h"
2 | 
3 | //----------------------------------------------------------------------------
4 | 


--------------------------------------------------------------------------------
/SMHasher/DifferentialTest.h:
--------------------------------------------------------------------------------
  1 | //-----------------------------------------------------------------------------
  2 | // Differential collision & distribution tests - generate a bunch of random keys,
  3 | // see what happens to the hash value when we flip a few bits of the key.
  4 | 
  5 | #pragma once
  6 | 
  7 | #include "Types.h"
  8 | #include "Stats.h"      // for chooseUpToK
  9 | #include "KeysetTest.h" // for SparseKeygenRecurse
 10 | #include "Random.h"
 11 | 
 12 | #include <vector>
 13 | #include <algorithm>
 14 | #include <stdio.h>
 15 | 
 16 | //-----------------------------------------------------------------------------
 17 | // Sort through the differentials, ignoring collisions that only occured once 
 18 | // (these could be false positives). If we find collisions of 3 or more, the
 19 | // differential test fails.
 20 | 
 21 | template < class keytype >
 22 | bool ProcessDifferentials ( std::vector<keytype> & diffs, int reps, bool dumpCollisions )
 23 | {
 24 |   std::sort(diffs.begin(), diffs.end());
 25 | 
 26 |   int count = 1;
 27 |   int ignore = 0;
 28 | 
 29 |   bool result = true;
 30 | 
 31 |   if(diffs.size())
 32 |   {
 33 |     keytype kp = diffs[0];
 34 | 
 35 |     for(int i = 1; i < (int)diffs.size(); i++)
 36 |     {
 37 |       if(diffs[i] == kp)
 38 |       {
 39 |         count++;
 40 |         continue;
 41 |       }
 42 |       else
 43 |       {
 44 |         if(count > 1)
 45 |         {
 46 |           result = false;
 47 | 
 48 |           double pct = 100 * (double(count) / double(reps));
 49 | 
 50 |           if(dumpCollisions)
 51 |           {
 52 |             printbits((unsigned char*)&kp,sizeof(kp));
 53 |             printf(" - %4.2f%%\n", pct );
 54 |           }
 55 |         }
 56 |         else 
 57 |         {
 58 |           ignore++;
 59 |         }
 60 | 
 61 |         kp = diffs[i];
 62 |         count = 1;
 63 |       }
 64 |     }
 65 | 
 66 |     if(count > 1)
 67 |     {
 68 |       double pct = 100 * (double(count) / double(reps));
 69 | 
 70 |       if(dumpCollisions)
 71 |       {
 72 |         printbits((unsigned char*)&kp,sizeof(kp));
 73 |         printf(" - %4.2f%%\n", pct );
 74 |       }
 75 |     }
 76 |     else 
 77 |     {
 78 |       ignore++;
 79 |     }
 80 |   }
 81 | 
 82 |   printf("%d total collisions, of which %d single collisions were ignored",(int)diffs.size(),ignore);
 83 | 
 84 |   if(result == false)
 85 |   {
 86 |     printf(" !!!!! ");
 87 |   }
 88 | 
 89 |   printf("\n");
 90 |   printf("\n");
 91 | 
 92 |   return result;
 93 | }
 94 | 
 95 | //-----------------------------------------------------------------------------
 96 | // Check all possible keybits-choose-N differentials for collisions, report
 97 | // ones that occur significantly more often than expected.
 98 | 
 99 | // Random collisions can happen with probability 1 in 2^32 - if we do more than
100 | // 2^32 tests, we'll probably see some spurious random collisions, so don't report
101 | // them.
102 | 
103 | template < typename keytype, typename hashtype >
104 | void DiffTestRecurse ( pfHash hash, keytype & k1, keytype & k2, hashtype & h1, hashtype & h2, int start, int bitsleft, std::vector<keytype> & diffs )
105 | {
106 |   const int bits = sizeof(keytype)*8;
107 | 
108 |   for(int i = start; i < bits; i++)
109 |   {
110 |     flipbit(&k2,sizeof(k2),i);
111 |     bitsleft--;
112 | 
113 |     hash(&k2,sizeof(k2),0,&h2);
114 | 
115 |     if(h1 == h2)
116 |     {
117 |       diffs.push_back(k1 ^ k2);
118 |     }
119 | 
120 |     if(bitsleft)
121 |     {
122 |       DiffTestRecurse(hash,k1,k2,h1,h2,i+1,bitsleft,diffs);
123 |     }
124 | 
125 |     flipbit(&k2,sizeof(k2),i);
126 |     bitsleft++;
127 |   }
128 | }
129 | 
130 | //----------
131 | 
132 | template < typename keytype, typename hashtype >
133 | bool DiffTest ( pfHash hash, int diffbits, int reps, bool dumpCollisions )
134 | {
135 |   const int keybits = sizeof(keytype) * 8;
136 |   const int hashbits = sizeof(hashtype) * 8;
137 | 
138 |   double diffcount = chooseUpToK(keybits,diffbits);
139 |   double testcount = (diffcount * double(reps));
140 |   double expected  = testcount / pow(2.0,double(hashbits));
141 | 
142 |   Rand r(100);
143 | 
144 |   std::vector<keytype> diffs;
145 | 
146 |   keytype k1,k2;
147 |   hashtype h1,h2;
148 | 
149 |   printf("Testing %0.f up-to-%d-bit differentials in %d-bit keys -> %d bit hashes.\n",diffcount,diffbits,keybits,hashbits);
150 |   printf("%d reps, %0.f total tests, expecting %2.2f random collisions",reps,testcount,expected);
151 | 
152 |   for(int i = 0; i < reps; i++)
153 |   {
154 |     if(i % (reps/10) == 0) printf(".");
155 | 
156 |     r.rand_p(&k1,sizeof(keytype));
157 |     k2 = k1;
158 | 
159 |     hash(&k1,sizeof(k1),0,(uint32_t*)&h1);
160 | 
161 |     DiffTestRecurse<keytype,hashtype>(hash,k1,k2,h1,h2,0,diffbits,diffs);
162 |   }
163 |   printf("\n");
164 | 
165 |   bool result = true;
166 | 
167 |   result &= ProcessDifferentials(diffs,reps,dumpCollisions);
168 | 
169 |   return result;
170 | }
171 | 
172 | //-----------------------------------------------------------------------------
173 | // Differential distribution test - for each N-bit input differential, generate
174 | // a large set of differential key pairs, hash them, and test the output 
175 | // differentials using our distribution test code.
176 | 
177 | // This is a very hard test to pass - even if the hash values are well-distributed,
178 | // the differences between hash values may not be. It's also not entirely relevant
179 | // for testing hash functions, but it's still interesting.
180 | 
181 | // This test is a _lot_ of work, as it's essentially a full keyset test for
182 | // each of a potentially huge number of input differentials. To speed things
183 | // along, we do only a few distribution tests per keyset instead of the full
184 | // grid.
185 | 
186 | // #TODO - put diagram drawing back on
187 | 
188 | template < typename keytype, typename hashtype >
189 | void DiffDistTest ( pfHash hash, const int diffbits, int trials, double & worst, double & avg )
190 | {
191 |   std::vector<keytype>  keys(trials);
192 |   std::vector<hashtype> A(trials),B(trials);
193 | 
194 |   for(int i = 0; i < trials; i++)
195 |   {
196 |     rand_p(&keys[i],sizeof(keytype));
197 | 
198 |     hash(&keys[i],sizeof(keytype),0,(uint32_t*)&A[i]);
199 |   }
200 | 
201 |   //----------
202 | 
203 |   std::vector<keytype> diffs;
204 | 
205 |   keytype temp(0);
206 | 
207 |   SparseKeygenRecurse<keytype>(0,diffbits,true,temp,diffs);
208 | 
209 |   //----------
210 | 
211 |   worst = 0;
212 |   avg = 0;
213 | 
214 |   hashtype h2;
215 | 
216 |   for(size_t j = 0; j < diffs.size(); j++)
217 |   {
218 |     keytype & d = diffs[j];
219 | 
220 |     for(int i = 0; i < trials; i++)
221 |     {
222 |       keytype k2 = keys[i] ^ d;
223 | 
224 |       hash(&k2,sizeof(k2),0,&h2);
225 | 
226 |       B[i] = A[i] ^ h2;
227 |     }
228 | 
229 |     double dworst,davg;
230 | 
231 |     TestDistributionFast(B,dworst,davg);
232 | 
233 |     avg += davg;
234 |     worst = (dworst > worst) ? dworst : worst;
235 |   }
236 | 
237 |   avg /= double(diffs.size());
238 | }
239 | 
240 | //-----------------------------------------------------------------------------
241 | // Simpler differential-distribution test - for all 1-bit differentials,
242 | // generate random key pairs and run full distribution/collision tests on the
243 | // hash differentials
244 | 
245 | template < typename keytype, typename hashtype >
246 | bool DiffDistTest2 ( pfHash hash  )
247 | {
248 |   Rand r(857374);
249 | 
250 |   int keybits = sizeof(keytype) * 8;
251 |   const int keycount = 256*256*32;
252 |   keytype k;
253 |   
254 |   std::vector<hashtype> hashes(keycount);
255 |   hashtype h1,h2;
256 | 
257 |   bool result = true;
258 | 
259 |   for(int keybit = 0; keybit < keybits; keybit++)
260 |   {
261 |     printf("Testing bit %d\n",keybit);
262 | 
263 |     for(int i = 0; i < keycount; i++)
264 |     {
265 |       r.rand_p(&k,sizeof(keytype));
266 |       
267 |       hash(&k,sizeof(keytype),0,&h1);
268 |       flipbit(&k,sizeof(keytype),keybit);
269 |       hash(&k,sizeof(keytype),0,&h2);
270 | 
271 |       hashes[i] = h1 ^ h2;
272 |     }
273 | 
274 |     result &= TestHashList<hashtype>(hashes,true,true,true);
275 |     printf("\n");
276 |   }
277 | 
278 |   return result;
279 | }
280 | 
281 | //----------------------------------------------------------------------------
282 | 


--------------------------------------------------------------------------------
/SMHasher/FarshTest.cpp:
--------------------------------------------------------------------------------
 1 | #include "../farsh.c"
 2 | 
 3 | void farsh32_test ( const void * key, int len, unsigned seed, void * out )
 4 | {
 5 |   farsh_n(key,len,0,1,seed,out);
 6 | }
 7 | 
 8 | void farsh64_test ( const void * key, int len, unsigned seed, void * out )
 9 | {
10 |   farsh_n(key,len,0,2,seed,out);
11 | }
12 | 
13 | void farsh128_test ( const void * key, int len, unsigned seed, void * out )
14 | {
15 |   farsh_n(key,len,0,4,seed,out);
16 | }
17 | 
18 | void farsh256_test ( const void * key, int len, unsigned seed, void * out )
19 | {
20 |   farsh_n(key,len,0,8,seed,out);
21 | }
22 | 


--------------------------------------------------------------------------------
/SMHasher/Hashes.cpp:
--------------------------------------------------------------------------------
 1 | #include "Hashes.h"
 2 | 
 3 | #include "Random.h"
 4 | 
 5 | 
 6 | //----------------------------------------------------------------------------
 7 | // fake / bad hashes
 8 | 
 9 | void DoNothingHash ( const void *, int, uint32_t, void * )
10 | {
11 | }
12 | 
13 | //-----------------------------------------------------------------------------
14 | // One-byte-at-a-time hash based on Murmur's mix
15 | 
16 | uint32_t MurmurOAAT ( const void * key, int len, uint32_t seed )
17 | {
18 |   const uint8_t * data = (const uint8_t*)key;
19 | 
20 |   uint32_t h = seed;
21 | 
22 |   for(int i = 0; i < len; i++)
23 |   {
24 |     h ^= data[i];
25 |     h *= 0x5bd1e995;
26 |     h ^= h >> 15;
27 |   }
28 | 
29 |   return h;
30 | }
31 | 
32 | 
33 | //-----------------------------------------------------------------------------
34 | // 32-bit parts of MurmurHash3_x86_128
35 | 
36 | void Murmur3c_32 ( const void * key, const int len, uint32_t seed, void * out )
37 | {
38 |   uint32_t full_out[4];
39 |   MurmurHash3_x86_128 ( key, len, seed, full_out );
40 |   *(uint32_t*)out = full_out[0];
41 | }
42 | 
43 | void Murmur3c_32a ( const void * key, const int len, uint32_t seed, void * out )
44 | {
45 |   uint32_t full_out[4];
46 |   MurmurHash3_x86_128 ( key, len, seed, full_out );
47 |   *(uint32_t*)out = full_out[1];
48 | }
49 | 
50 | void Murmur3c_32b ( const void * key, const int len, uint32_t seed, void * out )
51 | {
52 |   uint32_t full_out[4];
53 |   MurmurHash3_x86_128 ( key, len, seed, full_out );
54 |   *(uint32_t*)out = full_out[2];
55 | }
56 | 
57 | void Murmur3c_32c ( const void * key, const int len, uint32_t seed, void * out )
58 | {
59 |   uint32_t full_out[4];
60 |   MurmurHash3_x86_128 ( key, len, seed, full_out );
61 |   *(uint32_t*)out = full_out[3];
62 | }
63 | 


--------------------------------------------------------------------------------
/SMHasher/Hashes.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include "Types.h"
  4 | #include "MurmurHash3.h"
  5 | 
  6 | //----------
  7 | // These are _not_ hash functions (even though people tend to use crc32 as one...)
  8 | 
  9 | void DoNothingHash ( const void * key, int len, uint32_t seed, void * out );
 10 | void crc32         ( const void * key, int len, uint32_t seed, void * out );
 11 | 
 12 | //----------
 13 | // 32/64/128-bit parts of SHA1
 14 | 
 15 | void sha1_32  ( const void * key, int len, uint32_t seed, void * out );
 16 | void sha1_32a ( const void * key, int len, uint32_t seed, void * out );
 17 | void sha1_32b ( const void * key, int len, uint32_t seed, void * out );
 18 | void sha1_32c ( const void * key, int len, uint32_t seed, void * out );
 19 | void sha1_64  ( const void * key, int len, uint32_t seed, void * out );
 20 | void sha1_64a ( const void * key, int len, uint32_t seed, void * out );
 21 | void sha1_128 ( const void * key, int len, uint32_t seed, void * out );
 22 | 
 23 | //----------
 24 | // General purpose hashes
 25 | 
 26 | uint32_t MurmurOAAT ( const void * key, int len, uint32_t seed );
 27 | 
 28 | void farsh32_test  ( const void * key, int len, unsigned seed, void * out );
 29 | void farsh64_test  ( const void * key, int len, unsigned seed, void * out );
 30 | void farsh128_test ( const void * key, int len, unsigned seed, void * out );
 31 | void farsh256_test ( const void * key, int len, unsigned seed, void * out );
 32 | 
 33 | void uhash32_test  ( const void * key, int len, unsigned seed, void * out );
 34 | 
 35 | void vhash64_test  ( const void * key, int len, unsigned seed, void * out );
 36 | void vhash128_test ( const void * key, int len, unsigned seed, void * out );
 37 | 
 38 | void poly1305_test ( const void * key, int len, unsigned seed, void * out );
 39 | 
 40 | //-----------------------------------------------------------------------------
 41 | // SpookyHashV2 and its 32-bit parts
 42 | 
 43 | void SpookyHash32_test ( const void * key, int len, unsigned seed, void * out );
 44 | void SpookyHash32a_test( const void * key, int len, unsigned seed, void * out );
 45 | void SpookyHash32b_test( const void * key, int len, unsigned seed, void * out );
 46 | void SpookyHash32c_test( const void * key, int len, unsigned seed, void * out );
 47 | void SpookyHash64_test ( const void * key, int len, unsigned seed, void * out );
 48 | void SpookyHash128_test( const void * key, int len, unsigned seed, void * out );
 49 | 
 50 | //-----------------------------------------------------------------------------
 51 | // 32-bit parts of MurmurHash3_x86_128
 52 | 
 53 | void Murmur3c_32 ( const void * key, const int len, uint32_t seed, void * out );
 54 | void Murmur3c_32a( const void * key, const int len, uint32_t seed, void * out );
 55 | void Murmur3c_32b( const void * key, const int len, uint32_t seed, void * out );
 56 | void Murmur3c_32c( const void * key, const int len, uint32_t seed, void * out );
 57 | 
 58 | //-----------------------------------------------------------------------------
 59 | // xxHash
 60 | 
 61 | void XXH32_test ( const void * key, int len, unsigned seed, void * out );
 62 | void XXH64_test ( const void * key, int len, unsigned seed, void * out );
 63 | 
 64 | //-----------------------------------------------------------------------------
 65 | // XXH32 with XXH64 backend
 66 | 
 67 | void ModXXH32_test   ( const void * key, int len, unsigned seed, void * out );
 68 | void ModXXH32a_test  ( const void * key, int len, unsigned seed, void * out );
 69 | void ModXXH64_test   ( const void * key, int len, unsigned seed, void * out );
 70 | 
 71 | //-----------------------------------------------------------------------------
 72 | // ZZH & SlowZZH: my experimental x86-optimized hashes
 73 | 
 74 | void ZZH32_test      ( const void * key, int len, unsigned seed, void * out );
 75 | void ZZH32a_test     ( const void * key, int len, unsigned seed, void * out );
 76 | void ZZH32b_test     ( const void * key, int len, unsigned seed, void * out );
 77 | void ZZH32c_test     ( const void * key, int len, unsigned seed, void * out );
 78 | void ZZH64_test      ( const void * key, int len, unsigned seed, void * out );
 79 | void ZZH64a_test     ( const void * key, int len, unsigned seed, void * out );
 80 | void ZZH128_test     ( const void * key, int len, unsigned seed, void * out );
 81 | 
 82 | void SlowZZH32_test  ( const void * key, int len, unsigned seed, void * out );
 83 | void SlowZZH32a_test ( const void * key, int len, unsigned seed, void * out );
 84 | void SlowZZH32b_test ( const void * key, int len, unsigned seed, void * out );
 85 | void SlowZZH32c_test ( const void * key, int len, unsigned seed, void * out );
 86 | void SlowZZH64_test  ( const void * key, int len, unsigned seed, void * out );
 87 | void SlowZZH64a_test ( const void * key, int len, unsigned seed, void * out );
 88 | void SlowZZH128_test ( const void * key, int len, unsigned seed, void * out );
 89 | 
 90 | //-----------------------------------------------------------------------------
 91 | // WideZZH & SlowWideZZH: my experimental x64-optimized hashes
 92 | 
 93 | void WideZZH32_test  ( const void * key, int len, unsigned seed, void * out );
 94 | void WideZZH32a_test ( const void * key, int len, unsigned seed, void * out );
 95 | void WideZZH64_test  ( const void * key, int len, unsigned seed, void * out );
 96 | 
 97 | void SlowWideZZH32_test  ( const void * key, int len, unsigned seed, void * out );
 98 | void SlowWideZZH32a_test ( const void * key, int len, unsigned seed, void * out );
 99 | void SlowWideZZH64_test  ( const void * key, int len, unsigned seed, void * out );
100 | 
101 | //-----------------------------------------------------------------------------
102 | // SimdZZH: my experimental SIMD-optimized hashe
103 | 
104 | void SimdZZH32_test  ( const void * key, int len, unsigned seed, void * out );
105 | void SimdZZH32a_test ( const void * key, int len, unsigned seed, void * out );
106 | void SimdZZH64_test  ( const void * key, int len, unsigned seed, void * out );
107 | 


--------------------------------------------------------------------------------
/SMHasher/KeysetTest.cpp:
--------------------------------------------------------------------------------
  1 | #include "KeysetTest.h"
  2 | 
  3 | #include "Platform.h"
  4 | #include "Random.h"
  5 | 
  6 | #include <map>
  7 | #include <set>
  8 | 
  9 | //-----------------------------------------------------------------------------
 10 | // This should hopefully be a thorough and uambiguous test of whether a hash
 11 | // is correctly implemented on a given platform
 12 | 
 13 | bool VerificationTest ( pfHash hash, const int hashbits, uint32_t expected, bool verbose )
 14 | {
 15 |   const int hashbytes = hashbits / 8;
 16 | 
 17 |   uint8_t * key    = new uint8_t[256];
 18 |   uint8_t * hashes = new uint8_t[hashbytes * 256];
 19 |   uint8_t * final  = new uint8_t[hashbytes];
 20 | 
 21 |   memset(key,0,256);
 22 |   memset(hashes,0,hashbytes*256);
 23 |   memset(final,0,hashbytes);
 24 | 
 25 |   // Hash keys of the form {0}, {0,1}, {0,1,2}... up to N=255,using 256-N as
 26 |   // the seed
 27 | 
 28 |   for(int i = 0; i < 256; i++)
 29 |   {
 30 |     key[i] = (uint8_t)i;
 31 | 
 32 |     hash(key,i,256-i,&hashes[i*hashbytes]);
 33 |   }
 34 | 
 35 |   // Then hash the result array
 36 | 
 37 |   hash(hashes,hashbytes*256,0,final);
 38 | 
 39 |   // The first four bytes of that hash, interpreted as a little-endian integer, is our
 40 |   // verification value
 41 | 
 42 |   uint32_t verification = (final[0] << 0) | (final[1] << 8) | (final[2] << 16) | (final[3] << 24);
 43 | 
 44 |   delete [] key;
 45 |   delete [] hashes;
 46 |   delete [] final;
 47 | 
 48 |   //----------
 49 | 
 50 |   if(expected != verification)
 51 |   {
 52 |     if(verbose) printf("Verification value 0x%08X : Failed! (Expected 0x%08x)\n",verification,expected);
 53 |     return false;
 54 |   }
 55 |   else
 56 |   {
 57 |     if(verbose) printf("Verification value 0x%08X : Passed!\n",verification);
 58 |     return true;
 59 |   }
 60 | }
 61 | 
 62 | //----------------------------------------------------------------------------
 63 | // Basic sanity checks -
 64 | 
 65 | // A hash function should not be reading outside the bounds of the key.
 66 | 
 67 | // Flipping a bit of a key should, with overwhelmingly high probability,
 68 | // result in a different hash.
 69 | 
 70 | // Hashing the same key twice should always produce the same result.
 71 | 
 72 | // The memory alignment of the key should not affect the hash result.
 73 | 
 74 | bool SanityTest ( pfHash hash, const int hashbits )
 75 | {
 76 |   printf("Running sanity check 1");
 77 | 
 78 |   Rand r(883741);
 79 | 
 80 |   bool result = true;
 81 | 
 82 |   const int hashbytes = hashbits/8;
 83 |   const int reps = 10;
 84 |   const int keymax = 256;
 85 |   const int pad = 16;
 86 |   const int buflen = keymax + pad*3;
 87 | 
 88 |   uint8_t * buffer1 = new uint8_t[buflen];
 89 |   uint8_t * buffer2 = new uint8_t[buflen];
 90 | 
 91 |   uint8_t * hash1 = new uint8_t[hashbytes];
 92 |   uint8_t * hash2 = new uint8_t[hashbytes];
 93 | 
 94 |   //----------
 95 | 
 96 |   for(int irep = 0; irep < reps; irep++)
 97 |   {
 98 |     if(irep % (reps/10) == 0) printf(".");
 99 | 
100 |     for(int len = 4; len <= keymax; len++)
101 |     {
102 |       for(int offset = pad; offset < pad*2; offset++)
103 |       {
104 |         uint8_t * key1 = &buffer1[pad];
105 |         uint8_t * key2 = &buffer2[pad+offset];
106 | 
107 |         r.rand_p(buffer1,buflen);
108 |         r.rand_p(buffer2,buflen);
109 | 
110 |         memcpy(key2,key1,len);
111 | 
112 |         hash(key1,len,0,hash1);
113 | 
114 |         for(int bit = 0; bit < (len * 8); bit++)
115 |         {
116 |           // Flip a bit, hash the key -> we should get a different result.
117 | 
118 |           flipbit(key2,len,bit);
119 |           hash(key2,len,0,hash2);
120 | 
121 |           if(memcmp(hash1,hash2,hashbytes) == 0)
122 |           {
123 |             result = false;
124 |           }
125 | 
126 |           // Flip it back, hash again -> we should get the original result.
127 | 
128 |           flipbit(key2,len,bit);
129 |           hash(key2,len,0,hash2);
130 | 
131 |           if(memcmp(hash1,hash2,hashbytes) != 0)
132 |           {
133 |             result = false;
134 |           }
135 |         }
136 |       }
137 |     }
138 |   }
139 | 
140 |   if(result == false)
141 |   {
142 |     printf("*********FAIL*********\n");
143 |   }
144 |   else
145 |   {
146 |     printf("PASS\n");
147 |   }
148 | 
149 |   delete [] buffer1;
150 |   delete [] buffer2;
151 | 
152 |   delete [] hash1;
153 |   delete [] hash2;
154 | 
155 |   return result;
156 | }
157 | 
158 | //----------------------------------------------------------------------------
159 | // Appending zero bytes to a key should always cause it to produce a different
160 | // hash value
161 | 
162 | void AppendedZeroesTest ( pfHash hash, const int hashbits )
163 | {
164 |   printf("Running sanity check 2");
165 | 
166 |   Rand r(173994);
167 | 
168 |   const int hashbytes = hashbits/8;
169 | 
170 |   for(int rep = 0; rep < 100; rep++)
171 |   {
172 |     if(rep % 10 == 0) printf(".");
173 | 
174 |     unsigned char key[256];
175 | 
176 |     memset(key,0,sizeof(key));
177 | 
178 |     r.rand_p(key,32);
179 | 
180 |     uint32_t h1[16];
181 |     uint32_t h2[16];
182 | 
183 |     memset(h1,0,hashbytes);
184 |     memset(h2,0,hashbytes);
185 | 
186 |     for(int i = 0; i < 32; i++)
187 |     {
188 |       hash(key,32+i,0,h1);
189 | 
190 |       if(memcmp(h1,h2,hashbytes) == 0)
191 |       {
192 |         printf("\n*********FAIL*********\n");
193 |         return;
194 |       }
195 | 
196 |       memcpy(h2,h1,hashbytes);
197 |     }
198 |   }
199 | 
200 |   printf("PASS\n");
201 | }
202 | 
203 | //-----------------------------------------------------------------------------
204 | // Generate all keys of up to N bytes containing two non-zero bytes
205 | 
206 | void TwoBytesKeygen ( int maxlen, KeyCallback & c )
207 | {
208 |   //----------
209 |   // Compute # of keys
210 | 
211 |   int keycount = 0;
212 | 
213 |   for(int i = 2; i <= maxlen; i++) keycount += (int)chooseK(i,2);
214 | 
215 |   keycount *= 255*255;
216 | 
217 |   for(int i = 2; i <= maxlen; i++) keycount += i*255;
218 | 
219 |   printf("Keyset 'TwoBytes' - up-to-%d-byte keys, %d total keys\n",maxlen, keycount);
220 | 
221 |   c.reserve(keycount);
222 | 
223 |   //----------
224 |   // Add all keys with one non-zero byte
225 | 
226 |   uint8_t key[256];
227 | 
228 |   memset(key,0,256);
229 | 
230 |   for(int keylen = 2; keylen <= maxlen; keylen++)
231 |   for(int byteA = 0; byteA < keylen; byteA++)
232 |   {
233 |     for(int valA = 1; valA <= 255; valA++)
234 |     {
235 |       key[byteA] = (uint8_t)valA;
236 | 
237 |       c(key,keylen);
238 |     }
239 | 
240 |     key[byteA] = 0;
241 |   }
242 | 
243 |   //----------
244 |   // Add all keys with two non-zero bytes
245 | 
246 |   for(int keylen = 2; keylen <= maxlen; keylen++)
247 |   for(int byteA = 0; byteA < keylen-1; byteA++)
248 |   for(int byteB = byteA+1; byteB < keylen; byteB++)
249 |   {
250 |     for(int valA = 1; valA <= 255; valA++)
251 |     {
252 |       key[byteA] = (uint8_t)valA;
253 | 
254 |       for(int valB = 1; valB <= 255; valB++)
255 |       {
256 |         key[byteB] = (uint8_t)valB;
257 |         c(key,keylen);
258 |       }
259 | 
260 |       key[byteB] = 0;
261 |     }
262 | 
263 |     key[byteA] = 0;
264 |   }
265 | }
266 | 
267 | //-----------------------------------------------------------------------------
268 | 
269 | template< typename hashtype >
270 | void DumpCollisionMap ( CollisionMap<hashtype,ByteVec> & cmap )
271 | {
272 |   typedef CollisionMap<hashtype,ByteVec> cmap_t;
273 | 
274 |   for(typename cmap_t::iterator it = cmap.begin(); it != cmap.end(); ++it)
275 |   {
276 |     const hashtype & hash = (*it).first;
277 | 
278 |     printf("Hash - ");
279 |     printbytes(&hash,sizeof(hashtype));
280 |     printf("\n");
281 | 
282 |     std::vector<ByteVec> & keys = (*it).second;
283 | 
284 |     for(int i = 0; i < (int)keys.size(); i++)
285 |     {
286 |       ByteVec & key = keys[i];
287 | 
288 |       printf("Key  - ");
289 |       printbytes(&key[0],(int)key.size());
290 |       printf("\n");
291 |     }
292 |     printf("\n");
293 |   }
294 | 
295 | }
296 | 
297 | // test code
298 | 
299 | void ReportCollisions ( pfHash hash )
300 | {
301 |   printf("Hashing keyset\n");
302 | 
303 |   std::vector<uint128_t> hashes;
304 | 
305 |   HashCallback<uint128_t> c(hash,hashes);
306 | 
307 |   TwoBytesKeygen(20,c);
308 | 
309 |   printf("%d hashes\n",(int)hashes.size());
310 | 
311 |   printf("Finding collisions\n");
312 | 
313 |   HashSet<uint128_t> collisions;
314 | 
315 |   FindCollisions(hashes,collisions,1000);
316 | 
317 |   printf("%d collisions\n",(int)collisions.size());
318 | 
319 |   printf("Mapping collisions\n");
320 | 
321 |   CollisionMap<uint128_t,ByteVec> cmap;
322 | 
323 |   CollisionCallback<uint128_t> c2(hash,collisions,cmap);
324 | 
325 |   TwoBytesKeygen(20,c2);
326 | 
327 |   printf("Dumping collisions\n");
328 | 
329 |   DumpCollisionMap(cmap);
330 | }
331 | 


--------------------------------------------------------------------------------
/SMHasher/MurmurHash/MurmurHash3.cpp:
--------------------------------------------------------------------------------
  1 | //-----------------------------------------------------------------------------
  2 | // MurmurHash3 was written by Austin Appleby, and is placed in the public
  3 | // domain. The author hereby disclaims copyright to this source code.
  4 | 
  5 | // Note - The x86 and x64 versions do _not_ produce the same results, as the
  6 | // algorithms are optimized for their respective platforms. You can still
  7 | // compile and run any of them on any platform, but your performance with the
  8 | // non-native version will be less than optimal.
  9 | 
 10 | #include "MurmurHash3.h"
 11 | 
 12 | //-----------------------------------------------------------------------------
 13 | // Platform-specific functions and macros
 14 | 
 15 | // Microsoft Visual Studio
 16 | 
 17 | #if defined(_MSC_VER)
 18 | 
 19 | #define FORCE_INLINE	__forceinline
 20 | 
 21 | #include <stdlib.h>
 22 | 
 23 | #define ROTL32(x,y)	_rotl(x,y)
 24 | #define ROTL64(x,y)	_rotl64(x,y)
 25 | 
 26 | #define BIG_CONSTANT(x) (x)
 27 | 
 28 | // Other compilers
 29 | 
 30 | #else	// defined(_MSC_VER)
 31 | 
 32 | #define	FORCE_INLINE inline __attribute__((always_inline))
 33 | 
 34 | inline uint32_t rotl32 ( uint32_t x, int8_t r )
 35 | {
 36 |   return (x << r) | (x >> (32 - r));
 37 | }
 38 | 
 39 | inline uint64_t rotl64 ( uint64_t x, int8_t r )
 40 | {
 41 |   return (x << r) | (x >> (64 - r));
 42 | }
 43 | 
 44 | #define	ROTL32(x,y)	rotl32(x,y)
 45 | #define ROTL64(x,y)	rotl64(x,y)
 46 | 
 47 | #define BIG_CONSTANT(x) (x##LLU)
 48 | 
 49 | #endif // !defined(_MSC_VER)
 50 | 
 51 | //-----------------------------------------------------------------------------
 52 | // Block read - if your platform needs to do endian-swapping or can only
 53 | // handle aligned reads, do the conversion here
 54 | 
 55 | FORCE_INLINE uint32_t getblock32 ( const uint32_t * p, int i )
 56 | {
 57 |   return p[i];
 58 | }
 59 | 
 60 | FORCE_INLINE uint64_t getblock64 ( const uint64_t * p, int i )
 61 | {
 62 |   return p[i];
 63 | }
 64 | 
 65 | //-----------------------------------------------------------------------------
 66 | // Finalization mix - force all bits of a hash block to avalanche
 67 | 
 68 | FORCE_INLINE uint32_t fmix32 ( uint32_t h )
 69 | {
 70 |   h ^= h >> 16;
 71 |   h *= 0x85ebca6b;
 72 |   h ^= h >> 13;
 73 |   h *= 0xc2b2ae35;
 74 |   h ^= h >> 16;
 75 | 
 76 |   return h;
 77 | }
 78 | 
 79 | //----------
 80 | 
 81 | FORCE_INLINE uint64_t fmix64 ( uint64_t k )
 82 | {
 83 |   k ^= k >> 33;
 84 |   k *= BIG_CONSTANT(0xff51afd7ed558ccd);
 85 |   k ^= k >> 33;
 86 |   k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53);
 87 |   k ^= k >> 33;
 88 | 
 89 |   return k;
 90 | }
 91 | 
 92 | //-----------------------------------------------------------------------------
 93 | 
 94 | void MurmurHash3_x86_32 ( const void * key, int len,
 95 |                           uint32_t seed, void * out )
 96 | {
 97 |   const uint8_t * data = (const uint8_t*)key;
 98 |   const int nblocks = len / 4;
 99 | 
100 |   uint32_t h1 = seed;
101 | 
102 |   const uint32_t c1 = 0xcc9e2d51;
103 |   const uint32_t c2 = 0x1b873593;
104 | 
105 |   //----------
106 |   // body
107 | 
108 |   const uint32_t * blocks = (const uint32_t *)(data + nblocks*4);
109 | 
110 |   for(int i = -nblocks; i; i++)
111 |   {
112 |     uint32_t k1 = getblock32(blocks,i);
113 | 
114 |     k1 *= c1;
115 |     k1 = ROTL32(k1,15);
116 |     k1 *= c2;
117 |     
118 |     h1 ^= k1;
119 |     h1 = ROTL32(h1,13); 
120 |     h1 = h1*5+0xe6546b64;
121 |   }
122 | 
123 |   //----------
124 |   // tail
125 | 
126 |   const uint8_t * tail = (const uint8_t*)(data + nblocks*4);
127 | 
128 |   uint32_t k1 = 0;
129 | 
130 |   switch(len & 3)
131 |   {
132 |   case 3: k1 ^= tail[2] << 16;
133 |   case 2: k1 ^= tail[1] << 8;
134 |   case 1: k1 ^= tail[0];
135 |           k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
136 |   };
137 | 
138 |   //----------
139 |   // finalization
140 | 
141 |   h1 ^= len;
142 | 
143 |   h1 = fmix32(h1);
144 | 
145 |   *(uint32_t*)out = h1;
146 | } 
147 | 
148 | //-----------------------------------------------------------------------------
149 | 
150 | void MurmurHash3_x86_128 ( const void * key, const int len,
151 |                            uint32_t seed, void * out )
152 | {
153 |   const uint8_t * data = (const uint8_t*)key;
154 |   const int nblocks = len / 16;
155 | 
156 |   uint32_t h1 = seed;
157 |   uint32_t h2 = seed;
158 |   uint32_t h3 = seed;
159 |   uint32_t h4 = seed;
160 | 
161 |   const uint32_t c1 = 0x239b961b; 
162 |   const uint32_t c2 = 0xab0e9789;
163 |   const uint32_t c3 = 0x38b34ae5; 
164 |   const uint32_t c4 = 0xa1e38b93;
165 | 
166 |   //----------
167 |   // body
168 | 
169 |   const uint32_t * blocks = (const uint32_t *)(data + nblocks*16);
170 | 
171 |   for(int i = -nblocks; i; i++)
172 |   {
173 |     uint32_t k1 = getblock32(blocks,i*4+0);
174 |     uint32_t k2 = getblock32(blocks,i*4+1);
175 |     uint32_t k3 = getblock32(blocks,i*4+2);
176 |     uint32_t k4 = getblock32(blocks,i*4+3);
177 | 
178 |     k1 *= c1; k1  = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
179 | 
180 |     h1 = ROTL32(h1,19); h1 += h2; h1 = h1*5+0x561ccd1b;
181 | 
182 |     k2 *= c2; k2  = ROTL32(k2,16); k2 *= c3; h2 ^= k2;
183 | 
184 |     h2 = ROTL32(h2,17); h2 += h3; h2 = h2*5+0x0bcaa747;
185 | 
186 |     k3 *= c3; k3  = ROTL32(k3,17); k3 *= c4; h3 ^= k3;
187 | 
188 |     h3 = ROTL32(h3,15); h3 += h4; h3 = h3*5+0x96cd1c35;
189 | 
190 |     k4 *= c4; k4  = ROTL32(k4,18); k4 *= c1; h4 ^= k4;
191 | 
192 |     h4 = ROTL32(h4,13); h4 += h1; h4 = h4*5+0x32ac3b17;
193 |   }
194 | 
195 |   //----------
196 |   // tail
197 | 
198 |   const uint8_t * tail = (const uint8_t*)(data + nblocks*16);
199 | 
200 |   uint32_t k1 = 0;
201 |   uint32_t k2 = 0;
202 |   uint32_t k3 = 0;
203 |   uint32_t k4 = 0;
204 | 
205 |   switch(len & 15)
206 |   {
207 |   case 15: k4 ^= tail[14] << 16;
208 |   case 14: k4 ^= tail[13] << 8;
209 |   case 13: k4 ^= tail[12] << 0;
210 |            k4 *= c4; k4  = ROTL32(k4,18); k4 *= c1; h4 ^= k4;
211 | 
212 |   case 12: k3 ^= tail[11] << 24;
213 |   case 11: k3 ^= tail[10] << 16;
214 |   case 10: k3 ^= tail[ 9] << 8;
215 |   case  9: k3 ^= tail[ 8] << 0;
216 |            k3 *= c3; k3  = ROTL32(k3,17); k3 *= c4; h3 ^= k3;
217 | 
218 |   case  8: k2 ^= tail[ 7] << 24;
219 |   case  7: k2 ^= tail[ 6] << 16;
220 |   case  6: k2 ^= tail[ 5] << 8;
221 |   case  5: k2 ^= tail[ 4] << 0;
222 |            k2 *= c2; k2  = ROTL32(k2,16); k2 *= c3; h2 ^= k2;
223 | 
224 |   case  4: k1 ^= tail[ 3] << 24;
225 |   case  3: k1 ^= tail[ 2] << 16;
226 |   case  2: k1 ^= tail[ 1] << 8;
227 |   case  1: k1 ^= tail[ 0] << 0;
228 |            k1 *= c1; k1  = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
229 |   };
230 | 
231 |   //----------
232 |   // finalization
233 | 
234 |   h1 ^= len; h2 ^= len; h3 ^= len; h4 ^= len;
235 | 
236 |   h1 += h2; h1 += h3; h1 += h4;
237 |   h2 += h1; h3 += h1; h4 += h1;
238 | 
239 |   h1 = fmix32(h1);
240 |   h2 = fmix32(h2);
241 |   h3 = fmix32(h3);
242 |   h4 = fmix32(h4);
243 | 
244 |   h1 += h2; h1 += h3; h1 += h4;
245 |   h2 += h1; h3 += h1; h4 += h1;
246 | 
247 |   ((uint32_t*)out)[0] = h1;
248 |   ((uint32_t*)out)[1] = h2;
249 |   ((uint32_t*)out)[2] = h3;
250 |   ((uint32_t*)out)[3] = h4;
251 | }
252 | 
253 | //-----------------------------------------------------------------------------
254 | 
255 | void MurmurHash3_x64_128 ( const void * key, const int len,
256 |                            const uint32_t seed, void * out )
257 | {
258 |   const uint8_t * data = (const uint8_t*)key;
259 |   const int nblocks = len / 16;
260 | 
261 |   uint64_t h1 = seed;
262 |   uint64_t h2 = seed;
263 | 
264 |   const uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5);
265 |   const uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f);
266 | 
267 |   //----------
268 |   // body
269 | 
270 |   const uint64_t * blocks = (const uint64_t *)(data);
271 | 
272 |   for(int i = 0; i < nblocks; i++)
273 |   {
274 |     uint64_t k1 = getblock64(blocks,i*2+0);
275 |     uint64_t k2 = getblock64(blocks,i*2+1);
276 | 
277 |     k1 *= c1; k1  = ROTL64(k1,31); k1 *= c2; h1 ^= k1;
278 | 
279 |     h1 = ROTL64(h1,27); h1 += h2; h1 = h1*5+0x52dce729;
280 | 
281 |     k2 *= c2; k2  = ROTL64(k2,33); k2 *= c1; h2 ^= k2;
282 | 
283 |     h2 = ROTL64(h2,31); h2 += h1; h2 = h2*5+0x38495ab5;
284 |   }
285 | 
286 |   //----------
287 |   // tail
288 | 
289 |   const uint8_t * tail = (const uint8_t*)(data + nblocks*16);
290 | 
291 |   uint64_t k1 = 0;
292 |   uint64_t k2 = 0;
293 | 
294 |   switch(len & 15)
295 |   {
296 |   case 15: k2 ^= ((uint64_t)tail[14]) << 48;
297 |   case 14: k2 ^= ((uint64_t)tail[13]) << 40;
298 |   case 13: k2 ^= ((uint64_t)tail[12]) << 32;
299 |   case 12: k2 ^= ((uint64_t)tail[11]) << 24;
300 |   case 11: k2 ^= ((uint64_t)tail[10]) << 16;
301 |   case 10: k2 ^= ((uint64_t)tail[ 9]) << 8;
302 |   case  9: k2 ^= ((uint64_t)tail[ 8]) << 0;
303 |            k2 *= c2; k2  = ROTL64(k2,33); k2 *= c1; h2 ^= k2;
304 | 
305 |   case  8: k1 ^= ((uint64_t)tail[ 7]) << 56;
306 |   case  7: k1 ^= ((uint64_t)tail[ 6]) << 48;
307 |   case  6: k1 ^= ((uint64_t)tail[ 5]) << 40;
308 |   case  5: k1 ^= ((uint64_t)tail[ 4]) << 32;
309 |   case  4: k1 ^= ((uint64_t)tail[ 3]) << 24;
310 |   case  3: k1 ^= ((uint64_t)tail[ 2]) << 16;
311 |   case  2: k1 ^= ((uint64_t)tail[ 1]) << 8;
312 |   case  1: k1 ^= ((uint64_t)tail[ 0]) << 0;
313 |            k1 *= c1; k1  = ROTL64(k1,31); k1 *= c2; h1 ^= k1;
314 |   };
315 | 
316 |   //----------
317 |   // finalization
318 | 
319 |   h1 ^= len; h2 ^= len;
320 | 
321 |   h1 += h2;
322 |   h2 += h1;
323 | 
324 |   h1 = fmix64(h1);
325 |   h2 = fmix64(h2);
326 | 
327 |   h1 += h2;
328 |   h2 += h1;
329 | 
330 |   ((uint64_t*)out)[0] = h1;
331 |   ((uint64_t*)out)[1] = h2;
332 | }
333 | 
334 | //-----------------------------------------------------------------------------
335 | 
336 | 


--------------------------------------------------------------------------------
/SMHasher/MurmurHash/MurmurHash3.h:
--------------------------------------------------------------------------------
 1 | //-----------------------------------------------------------------------------
 2 | // MurmurHash3 was written by Austin Appleby, and is placed in the public
 3 | // domain. The author hereby disclaims copyright to this source code.
 4 | 
 5 | #ifndef _MURMURHASH3_H_
 6 | #define _MURMURHASH3_H_
 7 | 
 8 | //-----------------------------------------------------------------------------
 9 | // Platform-specific functions and macros
10 | 
11 | // Microsoft Visual Studio
12 | 
13 | #if defined(_MSC_VER) && (_MSC_VER < 1600)
14 | 
15 | typedef unsigned char uint8_t;
16 | typedef unsigned int uint32_t;
17 | typedef unsigned __int64 uint64_t;
18 | 
19 | // Other compilers
20 | 
21 | #else	// defined(_MSC_VER)
22 | 
23 | #include <stdint.h>
24 | 
25 | #endif // !defined(_MSC_VER)
26 | 
27 | //-----------------------------------------------------------------------------
28 | 
29 | void MurmurHash3_x86_32  ( const void * key, int len, uint32_t seed, void * out );
30 | 
31 | void MurmurHash3_x86_128 ( const void * key, int len, uint32_t seed, void * out );
32 | 
33 | void MurmurHash3_x64_128 ( const void * key, int len, uint32_t seed, void * out );
34 | 
35 | //-----------------------------------------------------------------------------
36 | 
37 | #endif // _MURMURHASH3_H_
38 | 


--------------------------------------------------------------------------------
/SMHasher/Platform.cpp:
--------------------------------------------------------------------------------
 1 | #include "Platform.h"
 2 | 
 3 | #include <stdio.h>
 4 | 
 5 | void testRDTSC ( void )
 6 | {
 7 |   int64_t temp = rdtsc();
 8 | 
 9 |   printf("%d",(int)temp);
10 | }
11 | 
12 | #if defined(_WIN32)
13 | 
14 | #include <windows.h>
15 | 
16 | static DWORD_PTR process_mask = 1, system_mask = 1;
17 | int thread_priority = THREAD_PRIORITY_NORMAL;
18 | 
19 | void SetAffinity ( int cpu )
20 | {
21 |   GetProcessAffinityMask(GetCurrentProcess(), &process_mask, &system_mask);   // i don't know why, but it can't fetch the process mask as set by the "start" command
22 |   thread_priority = GetThreadPriority (GetCurrentThread ());
23 | 
24 |   SetThreadAffinityMask(GetCurrentThread(),cpu);
25 |   SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_HIGHEST);
26 | }
27 | 
28 | void ResetAffinity()
29 | {
30 |   SetThreadAffinityMask(GetCurrentThread(),process_mask);
31 |   SetThreadPriority(GetCurrentThread(), thread_priority);
32 | }
33 | 
34 | #else
35 | 
36 | #include <sched.h>
37 | 
38 | void SetAffinity ( int /*cpu*/ )
39 | {
40 | #if !defined(__CYGWIN__) && !defined(__APPLE__)
41 |   cpu_set_t mask;
42 | 
43 |   CPU_ZERO(&mask);
44 | 
45 |   CPU_SET(2,&mask);
46 | 
47 |   if( sched_setaffinity(0,sizeof(mask),&mask) == -1)
48 |   {
49 |     printf("WARNING: Could not set CPU affinity\n");
50 |   }
51 | #endif
52 | }
53 | 
54 | void ResetAffinity()
55 | {
56 |   // #TODO
57 | }
58 | 
59 | #endif
60 | 


--------------------------------------------------------------------------------
/SMHasher/Platform.h:
--------------------------------------------------------------------------------
 1 | //-----------------------------------------------------------------------------
 2 | // Platform-specific functions and macros
 3 | 
 4 | #pragma once
 5 | 
 6 | void SetAffinity ( int cpu );
 7 | void ResetAffinity();
 8 | 
 9 | //-----------------------------------------------------------------------------
10 | // Microsoft Visual Studio
11 | 
12 | #if defined(_MSC_VER)
13 | 
14 | #define FORCE_INLINE	__forceinline
15 | #define	NEVER_INLINE  __declspec(noinline)
16 | 
17 | #include <stdlib.h>
18 | #include <math.h>   // Has to be included before intrin.h or VC complains about 'ceil'
19 | #include <intrin.h> // for __rdtsc
20 | #include "pstdint.h"
21 | 
22 | #define ROTL32(x,y)	_rotl(x,y)
23 | #define ROTL64(x,y)	_rotl64(x,y)
24 | #define ROTR32(x,y)	_rotr(x,y)
25 | #define ROTR64(x,y)	_rotr64(x,y)
26 | 
27 | #pragma warning(disable : 4127) // "conditional expression is constant" in the if()s for avalanchetest
28 | #pragma warning(disable : 4100)
29 | #pragma warning(disable : 4702)
30 | 
31 | #define BIG_CONSTANT(x) (x)
32 | 
33 | // RDTSC == Read Time Stamp Counter
34 | 
35 | #define rdtsc() __rdtsc()
36 | 
37 | //-----------------------------------------------------------------------------
38 | // Other compilers
39 | 
40 | #else	//	defined(_MSC_VER)
41 | 
42 | #include <stdint.h>
43 | 
44 | #define	FORCE_INLINE inline __attribute__((always_inline))
45 | #define	NEVER_INLINE __attribute__((noinline))
46 | 
47 | inline uint32_t rotl32 ( uint32_t x, int8_t r )
48 | {
49 |   return (x << r) | (x >> (32 - r));
50 | }
51 | 
52 | inline uint64_t rotl64 ( uint64_t x, int8_t r )
53 | {
54 |   return (x << r) | (x >> (64 - r));
55 | }
56 | 
57 | inline uint32_t rotr32 ( uint32_t x, int8_t r )
58 | {
59 |   return (x >> r) | (x << (32 - r));
60 | }
61 | 
62 | inline uint64_t rotr64 ( uint64_t x, int8_t r )
63 | {
64 |   return (x >> r) | (x << (64 - r));
65 | }
66 | 
67 | #define	ROTL32(x,y)	rotl32(x,y)
68 | #define ROTL64(x,y)	rotl64(x,y)
69 | #define	ROTR32(x,y)	rotr32(x,y)
70 | #define ROTR64(x,y)	rotr64(x,y)
71 | 
72 | #define BIG_CONSTANT(x) (x##LLU)
73 | 
74 | __inline__ unsigned long long int rdtsc()
75 | {
76 | #ifdef __x86_64__
77 |     unsigned int a, d;
78 |     __asm__ volatile ("rdtsc" : "=a" (a), "=d" (d));
79 |     return (unsigned long)a | ((unsigned long long)d << 32);
80 | #elif defined(__i386__)
81 |     unsigned long long int x;
82 |     __asm__ volatile ("rdtsc" : "=A" (x));
83 |     return x;
84 | #else
85 | #define NO_CYCLE_COUNTER
86 |     return 0;
87 | #endif
88 | }
89 | 
90 | #include <strings.h>
91 | #define _stricmp strcasecmp
92 | 
93 | #endif	//	!defined(_MSC_VER)
94 | 
95 | //-----------------------------------------------------------------------------
96 | 


--------------------------------------------------------------------------------
/SMHasher/Poly1305Test.cpp:
--------------------------------------------------------------------------------
 1 | #include "poly1305/poly1305.c"
 2 | 
 3 | const unsigned int poly1305_key[POLY1305_KEYLEN/4] = {
 4 |   // 16-byte AES key k
 5 |   0xb8fe6c39,0x23a44bbe,0x7c01812c,0xf721ad1c,
 6 |   // r[3], r[7], r[11], r[15] are required to have their top four bits clear, and r[4], r[8], r[12] are required to have their bottom two bits clear
 7 |   0xed46de8, 0x39097d8, 0x240a4a4, 0x7b3671c};
 8 | 
 9 | void poly1305_test ( const void * key, int len, unsigned seed, void * out )
10 | {
11 |   poly1305_auth ((unsigned char*)out, (const unsigned char*)key, len, (const unsigned char*)poly1305_key);
12 | }
13 | 


--------------------------------------------------------------------------------
/SMHasher/Random.cpp:
--------------------------------------------------------------------------------
1 | #include "Random.h"
2 | 
3 | Rand g_rand1(1);
4 | Rand g_rand2(2);
5 | Rand g_rand3(3);
6 | Rand g_rand4(4);
7 | 
8 | //-----------------------------------------------------------------------------
9 | 


--------------------------------------------------------------------------------
/SMHasher/Random.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include "Types.h"
  4 | 
  5 | //-----------------------------------------------------------------------------
  6 | // Xorshift RNG based on code by George Marsaglia
  7 | // http://en.wikipedia.org/wiki/Xorshift
  8 | 
  9 | struct Rand
 10 | {
 11 |   uint32_t x;
 12 |   uint32_t y;
 13 |   uint32_t z;
 14 |   uint32_t w;
 15 | 
 16 |   Rand()
 17 |   {
 18 |     reseed(uint32_t(0));
 19 |   }
 20 | 
 21 |   Rand( uint32_t seed )
 22 |   {
 23 |     reseed(seed);
 24 |   }
 25 | 
 26 |   void reseed ( uint32_t seed )
 27 |   {
 28 |     x = 0x498b3bc5 ^ seed;
 29 |     y = 0;
 30 |     z = 0;
 31 |     w = 0;
 32 | 
 33 |     for(int i = 0; i < 10; i++) mix();
 34 |   }
 35 | 
 36 |   void reseed ( uint64_t seed )
 37 |   {
 38 |     x = 0x498b3bc5 ^ (uint32_t)(seed >>  0);
 39 |     y = 0x5a05089a ^ (uint32_t)(seed >> 32);
 40 |     z = 0;
 41 |     w = 0;
 42 | 
 43 |     for(int i = 0; i < 10; i++) mix();
 44 |   }
 45 | 
 46 |   //-----------------------------------------------------------------------------
 47 | 
 48 |   void mix ( void )
 49 |   {
 50 |     uint32_t t = x ^ (x << 11);
 51 |     x = y; y = z; z = w;
 52 |     w = w ^ (w >> 19) ^ t ^ (t >> 8); 
 53 |   }
 54 | 
 55 |   uint32_t rand_u32 ( void )
 56 |   {
 57 |     mix();
 58 | 
 59 |     return x;
 60 |   }
 61 | 
 62 |   uint64_t rand_u64 ( void ) 
 63 |   {
 64 |     mix();
 65 | 
 66 |     uint64_t a = x;
 67 |     uint64_t b = y;
 68 | 
 69 |     return (a << 32) | b;
 70 |   }
 71 | 
 72 |   void rand_p ( void * blob, int bytes )
 73 |   {
 74 |     uint32_t * blocks = reinterpret_cast<uint32_t*>(blob);
 75 | 
 76 |     while(bytes >= 4)
 77 |     {
 78 |       blocks[0] = rand_u32();
 79 |       blocks++;
 80 |       bytes -= 4;
 81 |     }
 82 | 
 83 |     uint8_t * tail = reinterpret_cast<uint8_t*>(blocks);
 84 | 
 85 |     for(int i = 0; i < bytes; i++)
 86 |     {
 87 |       tail[i] = (uint8_t)rand_u32();
 88 |     }
 89 |   }
 90 | };
 91 | 
 92 | //-----------------------------------------------------------------------------
 93 | 
 94 | extern Rand g_rand1;
 95 | 
 96 | inline uint32_t rand_u32 ( void ) { return g_rand1.rand_u32(); }
 97 | inline uint64_t rand_u64 ( void ) { return g_rand1.rand_u64(); }
 98 | 
 99 | inline void rand_p ( void * blob, int bytes )
100 | {
101 |   uint32_t * blocks = (uint32_t*)blob;
102 | 
103 |   while(bytes >= 4)
104 |   {
105 |     *blocks++ = rand_u32();
106 |     bytes -= 4;
107 |   }
108 | 
109 |   uint8_t * tail = (uint8_t*)blocks;
110 | 
111 |   for(int i = 0; i < bytes; i++)
112 |   {
113 |     tail[i] = (uint8_t)rand_u32();
114 |   }
115 | }
116 | 
117 | //-----------------------------------------------------------------------------
118 | 


--------------------------------------------------------------------------------
/SMHasher/SHA1/sha1.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 | SHA-1 in C
  3 | By Steve Reid <sreid@sea-to-sky.net>
  4 | 100% Public Domain
  5 | 
  6 | -----------------
  7 | Modified 7/98
  8 | By James H. Brown <jbrown@burgoyne.com>
  9 | Still 100% Public Domain
 10 | 
 11 | Corrected a problem which generated improper hash values on 16 bit machines
 12 | Routine SHA1Update changed from
 13 |   void SHA1Update(SHA1_CTX* context, unsigned char* data, unsigned int
 14 | len)
 15 | to
 16 |   void SHA1Update(SHA1_CTX* context, unsigned char* data, unsigned
 17 | long len)
 18 | 
 19 | The 'len' parameter was declared an int which works fine on 32 bit machines.
 20 | However, on 16 bit machines an int is too small for the shifts being done
 21 | against
 22 | it.  This caused the hash function to generate incorrect values if len was
 23 | greater than 8191 (8K - 1) due to the 'len << 3' on line 3 of SHA1Update().
 24 | 
 25 | Since the file IO in main() reads 16K at a time, any file 8K or larger would
 26 | be guaranteed to generate the wrong hash (e.g. Test Vector #3, a million
 27 | "a"s).
 28 | 
 29 | I also changed the declaration of variables i & j in SHA1Update to
 30 | unsigned long from unsigned int for the same reason.
 31 | 
 32 | These changes should make no difference to any 32 bit implementations since
 33 | an
 34 | int and a long are the same size in those environments.
 35 | 
 36 | --
 37 | I also corrected a few compiler warnings generated by Borland C.
 38 | 1. Added #include <process.h> for exit() prototype
 39 | 2. Removed unused variable 'j' in SHA1Final
 40 | 3. Changed exit(0) to return(0) at end of main.
 41 | 
 42 | ALL changes I made can be located by searching for comments containing 'JHB'
 43 | -----------------
 44 | Modified 8/98
 45 | By Steve Reid <sreid@sea-to-sky.net>
 46 | Still 100% public domain
 47 | 
 48 | 1- Removed #include <process.h> and used return() instead of exit()
 49 | 2- Fixed overwriting of finalcount in SHA1Final() (discovered by Chris Hall)
 50 | 3- Changed email address from steve@edmweb.com to sreid@sea-to-sky.net
 51 | 
 52 | -----------------
 53 | Modified 4/01
 54 | By Saul Kravitz <Saul.Kravitz@celera.com>
 55 | Still 100% PD
 56 | Modified to run on Compaq Alpha hardware.
 57 | 
 58 | -----------------
 59 | Modified 07/2002
 60 | By Ralph Giles <giles@ghostscript.com>
 61 | Still 100% public domain
 62 | modified for use with stdint types, autoconf
 63 | code cleanup, removed attribution comments
 64 | switched SHA1Final() argument order for consistency
 65 | use SHA1_ prefix for public api
 66 | move public api to sha1.h
 67 | */
 68 | 
 69 | /*
 70 | Test Vectors (from FIPS PUB 180-1)
 71 | "abc"
 72 |   A9993E36 4706816A BA3E2571 7850C26C 9CD0D89D
 73 | "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq"
 74 |   84983E44 1C3BD26E BAAE4AA1 F95129E5 E54670F1
 75 | A million repetitions of "a"
 76 |   34AA973C D4C4DAA4 F61EEB2B DBAD2731 6534016F
 77 | */
 78 | 
 79 | #include <stdio.h>
 80 | #include <string.h>
 81 | #include <stdlib.h>
 82 | 
 83 | #include "sha1.h"
 84 | 
 85 | #if defined(_MSC_VER)
 86 | #pragma warning(disable : 4267)
 87 | #pragma warning(disable : 4996)
 88 | #pragma warning(disable : 4100)
 89 | #endif
 90 | 
 91 | void SHA1_Transform(uint32_t state[5], const uint8_t buffer[64]);
 92 | 
 93 | #define rol ROTL32
 94 | 
 95 | /* blk0() and blk() perform the initial expand. */
 96 | /* I got the idea of expanding during the round function from SSLeay */
 97 | /* FIXME: can we do this in an endian-proof way? */
 98 | 
 99 | #ifdef WORDS_BIGENDIAN
100 | #define blk0(i) block->l[i]
101 | #else
102 | #define blk0(i) (block->l[i] = (rol(block->l[i],24)&0xFF00FF00) | (rol(block->l[i],8)&0x00FF00FF))
103 | #endif
104 | #define blk(i) (block->l[i&15] = rol(block->l[(i+13)&15]^block->l[(i+8)&15] ^ block->l[(i+2)&15]^block->l[i&15],1))
105 | 
106 | /* (R0+R1), R2, R3, R4 are the different operations used in SHA1 */
107 | #define R0(v,w,x,y,z,i) z+=((w&(x^y))^y)+blk0(i)+0x5A827999+rol(v,5);w=rol(w,30);
108 | #define R1(v,w,x,y,z,i) z+=((w&(x^y))^y)+blk(i)+0x5A827999+rol(v,5);w=rol(w,30);
109 | #define R2(v,w,x,y,z,i) z+=(w^x^y)+blk(i)+0x6ED9EBA1+rol(v,5);w=rol(w,30);
110 | #define R3(v,w,x,y,z,i) z+=(((w|x)&y)|(w&x))+blk(i)+0x8F1BBCDC+rol(v,5);w=rol(w,30);
111 | #define R4(v,w,x,y,z,i) z+=(w^x^y)+blk(i)+0xCA62C1D6+rol(v,5);w=rol(w,30);
112 | 
113 | 
114 | /* Hash a single 512-bit block. This is the core of the algorithm. */
115 | void SHA1_Transform(uint32_t state[5], const uint8_t buffer[64])
116 | {
117 |     uint32_t a, b, c, d, e;
118 |     typedef union {
119 |         uint8_t c[64];
120 |         uint32_t l[16];
121 |     } CHAR64LONG16;
122 |     CHAR64LONG16* block;
123 | 
124 |     block = (CHAR64LONG16*)buffer;
125 | 
126 |     /* Copy context->state[] to working vars */
127 |     a = state[0];
128 |     b = state[1];
129 |     c = state[2];
130 |     d = state[3];
131 |     e = state[4];
132 | 
133 |     /* 4 rounds of 20 operations each. Loop unrolled. */
134 |     R0(a,b,c,d,e, 0); R0(e,a,b,c,d, 1); R0(d,e,a,b,c, 2); R0(c,d,e,a,b, 3);
135 |     R0(b,c,d,e,a, 4); R0(a,b,c,d,e, 5); R0(e,a,b,c,d, 6); R0(d,e,a,b,c, 7);
136 |     R0(c,d,e,a,b, 8); R0(b,c,d,e,a, 9); R0(a,b,c,d,e,10); R0(e,a,b,c,d,11);
137 |     R0(d,e,a,b,c,12); R0(c,d,e,a,b,13); R0(b,c,d,e,a,14); R0(a,b,c,d,e,15);
138 |     R1(e,a,b,c,d,16); R1(d,e,a,b,c,17); R1(c,d,e,a,b,18); R1(b,c,d,e,a,19);
139 |     R2(a,b,c,d,e,20); R2(e,a,b,c,d,21); R2(d,e,a,b,c,22); R2(c,d,e,a,b,23);
140 |     R2(b,c,d,e,a,24); R2(a,b,c,d,e,25); R2(e,a,b,c,d,26); R2(d,e,a,b,c,27);
141 |     R2(c,d,e,a,b,28); R2(b,c,d,e,a,29); R2(a,b,c,d,e,30); R2(e,a,b,c,d,31);
142 |     R2(d,e,a,b,c,32); R2(c,d,e,a,b,33); R2(b,c,d,e,a,34); R2(a,b,c,d,e,35);
143 |     R2(e,a,b,c,d,36); R2(d,e,a,b,c,37); R2(c,d,e,a,b,38); R2(b,c,d,e,a,39);
144 |     R3(a,b,c,d,e,40); R3(e,a,b,c,d,41); R3(d,e,a,b,c,42); R3(c,d,e,a,b,43);
145 |     R3(b,c,d,e,a,44); R3(a,b,c,d,e,45); R3(e,a,b,c,d,46); R3(d,e,a,b,c,47);
146 |     R3(c,d,e,a,b,48); R3(b,c,d,e,a,49); R3(a,b,c,d,e,50); R3(e,a,b,c,d,51);
147 |     R3(d,e,a,b,c,52); R3(c,d,e,a,b,53); R3(b,c,d,e,a,54); R3(a,b,c,d,e,55);
148 |     R3(e,a,b,c,d,56); R3(d,e,a,b,c,57); R3(c,d,e,a,b,58); R3(b,c,d,e,a,59);
149 |     R4(a,b,c,d,e,60); R4(e,a,b,c,d,61); R4(d,e,a,b,c,62); R4(c,d,e,a,b,63);
150 |     R4(b,c,d,e,a,64); R4(a,b,c,d,e,65); R4(e,a,b,c,d,66); R4(d,e,a,b,c,67);
151 |     R4(c,d,e,a,b,68); R4(b,c,d,e,a,69); R4(a,b,c,d,e,70); R4(e,a,b,c,d,71);
152 |     R4(d,e,a,b,c,72); R4(c,d,e,a,b,73); R4(b,c,d,e,a,74); R4(a,b,c,d,e,75);
153 |     R4(e,a,b,c,d,76); R4(d,e,a,b,c,77); R4(c,d,e,a,b,78); R4(b,c,d,e,a,79);
154 | 
155 |     /* Add the working vars back into context.state[] */
156 |     state[0] += a;
157 |     state[1] += b;
158 |     state[2] += c;
159 |     state[3] += d;
160 |     state[4] += e;
161 | 
162 |     /* Wipe variables */
163 |     a = b = c = d = e = 0;
164 | }
165 | 
166 | 
167 | /* SHA1Init - Initialize new context */
168 | void SHA1_Init(SHA1_CTX* context)
169 | {
170 |     /* SHA1 initialization constants */
171 |     context->state[0] = 0x67452301;
172 |     context->state[1] = 0xEFCDAB89;
173 |     context->state[2] = 0x98BADCFE;
174 |     context->state[3] = 0x10325476;
175 |     context->state[4] = 0xC3D2E1F0;
176 |     context->count[0] = 0;
177 |   context->count[1] = 0;
178 | }
179 | 
180 | 
181 | /* Run your data through this. */
182 | void SHA1_Update(SHA1_CTX* context, const uint8_t* data, const size_t len)
183 | {
184 |     size_t i, j;
185 | 
186 |     j = (context->count[0] >> 3) & 63;
187 |     if ((context->count[0] += len << 3) < (len << 3)) context->count[1]++;
188 | 
189 |     context->count[1] += (len >> 29);
190 | 
191 |     if ((j + len) > 63) 
192 |   {
193 |         memcpy(&context->buffer[j], data, (i = 64-j));
194 |         SHA1_Transform(context->state, context->buffer);
195 | 
196 |         for ( ; i + 63 < len; i += 64) 
197 |     {
198 |             SHA1_Transform(context->state, data + i);
199 |         }
200 | 
201 |         j = 0;
202 |     }
203 |     else i = 0;
204 |     memcpy(&context->buffer[j], &data[i], len - i);
205 | }
206 | 
207 | 
208 | /* Add padding and return the message digest. */
209 | void SHA1_Final(SHA1_CTX* context, uint8_t digest[SHA1_DIGEST_SIZE])
210 | {
211 |     uint32_t i;
212 |     uint8_t  finalcount[8];
213 | 
214 |     for (i = 0; i < 8; i++) {
215 |         finalcount[i] = (unsigned char)((context->count[(i >= 4 ? 0 : 1)]
216 |          >> ((3-(i & 3)) * 8) ) & 255);  /* Endian independent */
217 |     }
218 |     SHA1_Update(context, (uint8_t *)"\200", 1);
219 |     while ((context->count[0] & 504) != 448) {
220 |         SHA1_Update(context, (uint8_t *)"\0", 1);
221 |     }
222 |     SHA1_Update(context, finalcount, 8);  /* Should cause a SHA1_Transform() */
223 |     for (i = 0; i < SHA1_DIGEST_SIZE; i++) {
224 |         digest[i] = (uint8_t)
225 |          ((context->state[i>>2] >> ((3-(i & 3)) * 8) ) & 255);
226 |     }
227 | 
228 |     /* Wipe variables */
229 |     i = 0;
230 |     memset(context->buffer, 0, 64);
231 |     memset(context->state, 0, 20);
232 |     memset(context->count, 0, 8);
233 |     memset(finalcount, 0, 8);	/* SWR */
234 | }
235 | 
236 | //-----------------------------------------------------------------------------
237 | // self test
238 | 
239 | //#define TEST
240 | 
241 | #ifdef TEST
242 | 
243 | static char *test_data[] = {
244 |     "abc",
245 |     "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq",
246 |     "A million repetitions of 'a'"};
247 | static char *test_results[] = {
248 |     "A9993E36 4706816A BA3E2571 7850C26C 9CD0D89D",
249 |     "84983E44 1C3BD26E BAAE4AA1 F95129E5 E54670F1",
250 |     "34AA973C D4C4DAA4 F61EEB2B DBAD2731 6534016F"};
251 | 
252 | 
253 | void digest_to_hex(const uint8_t digest[SHA1_DIGEST_SIZE], char *output)
254 | {
255 |     int i,j;
256 |     char *c = output;
257 | 
258 |     for (i = 0; i < SHA1_DIGEST_SIZE/4; i++) {
259 |         for (j = 0; j < 4; j++) {
260 |             sprintf(c,"%02X", digest[i*4+j]);
261 |             c += 2;
262 |         }
263 |         sprintf(c, " ");
264 |         c += 1;
265 |     }
266 |     *(c - 1) = '\0';
267 | }
268 | 
269 | int main(int argc, char** argv)
270 | {
271 |     int k;
272 |     SHA1_CTX context;
273 |     uint8_t digest[20];
274 |     char output[80];
275 | 
276 |     fprintf(stdout, "verifying SHA-1 implementation... ");
277 | 
278 |     for (k = 0; k < 2; k++){
279 |         SHA1_Init(&context);
280 |         SHA1_Update(&context, (uint8_t*)test_data[k], strlen(test_data[k]));
281 |         SHA1_Final(&context, digest);
282 |   digest_to_hex(digest, output);
283 | 
284 |         if (strcmp(output, test_results[k])) {
285 |             fprintf(stdout, "FAIL\n");
286 |             fprintf(stderr,"* hash of \"%s\" incorrect:\n", test_data[k]);
287 |             fprintf(stderr,"\t%s returned\n", output);
288 |             fprintf(stderr,"\t%s is correct\n", test_results[k]);
289 |             return (1);
290 |         }
291 |     }
292 |     /* million 'a' vector we feed separately */
293 |     SHA1_Init(&context);
294 |     for (k = 0; k < 1000000; k++)
295 |         SHA1_Update(&context, (uint8_t*)"a", 1);
296 |     SHA1_Final(&context, digest);
297 |     digest_to_hex(digest, output);
298 |     if (strcmp(output, test_results[2])) {
299 |         fprintf(stdout, "FAIL\n");
300 |         fprintf(stderr,"* hash of \"%s\" incorrect:\n", test_data[2]);
301 |         fprintf(stderr,"\t%s returned\n", output);
302 |         fprintf(stderr,"\t%s is correct\n", test_results[2]);
303 |         return (1);
304 |     }
305 | 
306 |     /* success */
307 |     fprintf(stdout, "ok\n");
308 |     return(0);
309 | }
310 | #endif /* TEST */
311 | 


--------------------------------------------------------------------------------
/SMHasher/SHA1/sha1.h:
--------------------------------------------------------------------------------
 1 | /* public api for steve reid's public domain SHA-1 implementation */
 2 | /* this file is in the public domain */
 3 | 
 4 | #pragma once
 5 | 
 6 | #include "../Platform.h"
 7 | 
 8 | struct SHA1_CTX
 9 | {
10 |     uint32_t state[5];
11 |     uint32_t count[2];
12 |     uint8_t  buffer[64];
13 | };
14 | 
15 | #define SHA1_DIGEST_SIZE 20
16 | 
17 | void SHA1_Init(SHA1_CTX* context);
18 | void SHA1_Update(SHA1_CTX* context, const uint8_t* data, const size_t len);
19 | void SHA1_Final(SHA1_CTX* context, uint8_t digest[SHA1_DIGEST_SIZE]);
20 | 


--------------------------------------------------------------------------------
/SMHasher/SpeedTest.cpp:
--------------------------------------------------------------------------------
  1 | #include "SpeedTest.h"
  2 | 
  3 | #include "Random.h"
  4 | 
  5 | #include <stdio.h>   // for printf
  6 | #include <memory.h>  // for memset
  7 | #include <math.h>    // for sqrt
  8 | #include <algorithm> // for sort
  9 | 
 10 | //-----------------------------------------------------------------------------
 11 | // We view our timing values as a series of random variables V that has been
 12 | // contaminated with occasional outliers due to cache misses, thread
 13 | // preemption, etcetera. To filter out the outliers, we search for the largest
 14 | // subset of V such that all its values are within three standard deviations
 15 | // of the mean.
 16 | 
 17 | double CalcMean ( std::vector<double> & v )
 18 | {
 19 |   double mean = 0;
 20 | 
 21 |   for(int i = 0; i < (int)v.size(); i++)
 22 |   {
 23 |     mean += v[i];
 24 |   }
 25 | 
 26 |   mean /= double(v.size());
 27 | 
 28 |   return mean;
 29 | }
 30 | 
 31 | double CalcMean ( std::vector<double> & v, int a, int b )
 32 | {
 33 |   double mean = 0;
 34 | 
 35 |   for(int i = a; i <= b; i++)
 36 |   {
 37 |     mean += v[i];
 38 |   }
 39 | 
 40 |   mean /= (b-a+1);
 41 | 
 42 |   return mean;
 43 | }
 44 | 
 45 | double CalcStdv ( std::vector<double> & v, int a, int b )
 46 | {
 47 |   double mean = CalcMean(v,a,b);
 48 | 
 49 |   double stdv = 0;
 50 | 
 51 |   for(int i = a; i <= b; i++)
 52 |   {
 53 |     double x = v[i] - mean;
 54 | 
 55 |     stdv += x*x;
 56 |   }
 57 | 
 58 |   stdv = sqrt(stdv / (b-a+1));
 59 | 
 60 |   return stdv;
 61 | }
 62 | 
 63 | // Return true if the largest value in v[0,len) is more than three
 64 | // standard deviations from the mean
 65 | 
 66 | bool ContainsOutlier ( std::vector<double> & v, size_t len )
 67 | {
 68 |   double mean = 0;
 69 | 
 70 |   for(size_t i = 0; i < len; i++)
 71 |   {
 72 |     mean += v[i];
 73 |   }
 74 | 
 75 |   mean /= double(len);
 76 | 
 77 |   double stdv = 0;
 78 | 
 79 |   for(size_t i = 0; i < len; i++)
 80 |   {
 81 |     double x = v[i] - mean;
 82 |     stdv += x*x;
 83 |   }
 84 | 
 85 |   stdv = sqrt(stdv / double(len));
 86 | 
 87 |   double cutoff = mean + stdv*3;
 88 | 
 89 |   return v[len-1] > cutoff;
 90 | }
 91 | 
 92 | // Do a binary search to find the largest subset of v that does not contain
 93 | // outliers.
 94 | 
 95 | void FilterOutliers ( std::vector<double> & v )
 96 | {
 97 |   std::sort(v.begin(),v.end());
 98 | 
 99 |   size_t len = 0;
100 | 
101 |   for(size_t x = 0x40000000; x; x = x >> 1 )
102 |   {
103 |     if((len | x) >= v.size()) continue;
104 | 
105 |     if(!ContainsOutlier(v,len | x))
106 |     {
107 |       len |= x;
108 |     }
109 |   }
110 | 
111 |   v.resize(len);
112 | }
113 | 
114 | // Iteratively tighten the set to find a subset that does not contain
115 | // outliers. I'm not positive this works correctly in all cases.
116 | 
117 | void FilterOutliers2 ( std::vector<double> & v )
118 | {
119 |   std::sort(v.begin(),v.end());
120 | 
121 |   int a = 0;
122 |   int b = (int)(v.size() - 1);
123 | 
124 |   for(int i = 0; i < 10; i++)
125 |   {
126 |     //printf("%d %d\n",a,b);
127 | 
128 |     double mean = CalcMean(v,a,b);
129 |     double stdv = CalcStdv(v,a,b);
130 | 
131 |     double cutA = mean - stdv*3;
132 |     double cutB = mean + stdv*3;
133 | 
134 |     while((a < b) && (v[a] < cutA)) a++;
135 |     while((b > a) && (v[b] > cutB)) b--;
136 |   }
137 | 
138 |   std::vector<double> v2;
139 | 
140 |   v2.insert(v2.begin(),v.begin()+a,v.begin()+b+1);
141 | 
142 |   v.swap(v2);
143 | }
144 | 
145 | //-----------------------------------------------------------------------------
146 | // We really want the rdtsc() calls to bracket the function call as tightly
147 | // as possible, but that's hard to do portably. We'll try and get as close as
148 | // possible by marking the function as NEVER_INLINE (to keep the optimizer from
149 | // moving it) and marking the timing variables as "volatile register".
150 | 
151 | NEVER_INLINE int64_t timehash ( pfHash hash, int hashsize, const void * key, int len, int seed, const int repeats, bool measure_throughput )
152 | {
153 |   volatile register int64_t begin,end;
154 | 
155 |   uint32_t temp[16];
156 | 
157 |   begin = rdtsc();
158 | 
159 |   if (measure_throughput) {
160 |     for(int i = 0; i < repeats; i++)
161 |     {
162 |         hash(key,len,seed,temp);
163 |     }
164 |   } else {  // measure back-to-back latency
165 |     switch (hashsize)
166 |     {
167 |       case  32: for(int i = 0; i < repeats; i++)
168 |                 {
169 |                     hash(key,len,seed,temp);
170 |                     seed = temp[0];                // ensure that new seed depends on ALL bits of hash result
171 |                 }
172 |                 break;
173 | 
174 |       case  64: for(int i = 0; i < repeats; i++)
175 |                 {
176 |                     hash(key,len,seed,temp);
177 |                     seed  =  (sizeof(size_t) == 4?  temp[0] + temp[1]
178 |                                                  :  (*(uint64_t*)temp >> 1));
179 |                 }
180 |                 break;
181 | 
182 |       case 128: for(int i = 0; i < repeats; i++)
183 |                 {
184 |                     hash(key,len,seed,temp);
185 |                     seed  =  temp[0] + temp[1] + temp[2] + temp[3];
186 |                 }
187 |                 break;
188 | 
189 |       case 256: for(int i = 0; i < repeats; i++)
190 |                 {
191 |                     hash(key,len,seed,temp);
192 |                     seed = temp[0];
193 |                     for (int j=1; j < 256/32; j++)
194 |                         seed += temp[j];
195 |                 }
196 |                 break;
197 | 
198 |       case 512: for(int i = 0; i < repeats; i++)
199 |                 {
200 |                     hash(key,len,seed,temp);
201 |                     seed = temp[0];
202 |                     for (int j=1; j < 512/32; j++)
203 |                         seed += temp[j];
204 |                 }
205 |                 break;
206 | 
207 |     }
208 |   }
209 | 
210 |   end = rdtsc();
211 | 
212 |   return end-begin;
213 | }
214 | 
215 | //-----------------------------------------------------------------------------
216 | 
217 | double SpeedTest ( pfHash hash, int hashsize, uint32_t seed, const int trials, const int repeats, const int blocksize, const int align, bool measure_throughput )
218 | {
219 |   Rand r(seed);
220 | 
221 |   uint8_t * buf = new uint8_t[blocksize + 512];
222 | 
223 |   uint64_t t1 = reinterpret_cast<uint64_t>(buf);
224 | 
225 |   t1 = (t1 + 255) & BIG_CONSTANT(0xFFFFFFFFFFFFFF00);
226 |   t1 += align;
227 | 
228 |   uint8_t * block = reinterpret_cast<uint8_t*>(t1);
229 | 
230 |   r.rand_p(block,blocksize);
231 | 
232 |   //----------
233 | 
234 |   std::vector<double> times;
235 |   times.reserve(trials);
236 | 
237 |   for(int itrial = 0; itrial < trials; itrial++)
238 |   {
239 |     r.rand_p(block,blocksize);
240 | 
241 |     double t = (double)timehash(hash,hashsize,block,blocksize,itrial,repeats,measure_throughput);
242 | 
243 |     if(t > 0) times.push_back(t);
244 |   }
245 | 
246 |   //----------
247 | 
248 |   std::sort(times.begin(),times.end());
249 | 
250 |   FilterOutliers(times);
251 | 
252 |   delete [] buf;
253 | 
254 |   return CalcMean(times)/repeats;
255 | }
256 | 
257 | //-----------------------------------------------------------------------------
258 | // 256k blocks seem to give the best results.
259 | 
260 | void BulkSpeedTest ( pfHash hash, int hashsize, uint32_t seed )
261 | {
262 |   const int trials = 2999;
263 |   const int repeats = 1;
264 |   const int blocksize = 256 * 1024;
265 |   const bool measure_throughput = true;
266 | 
267 |   printf("Bulk speed test - %d-byte keys\n",blocksize);
268 | 
269 |   for(int align = 0; align < 8; align++)
270 |   {
271 |     double cycles = SpeedTest(hash,hashsize,seed,trials,repeats,blocksize,align,measure_throughput);
272 | 
273 |     double bestbpc = double(blocksize)/cycles;
274 | 
275 |     double bestbps = (bestbpc * 3000000000.0 / 1048576.0);
276 |     printf("Alignment %2d - %6.3f bytes/cycle - %7.2f MiB/sec @ 3 ghz\n",align,bestbpc,bestbps);
277 |   }
278 | }
279 | 
280 | //-----------------------------------------------------------------------------
281 | 
282 | void TinySpeedTest ( pfHash hash, int hashsize, int max_keysize, uint32_t seed, bool verbose )
283 | {
284 |   const int trials = 1000;
285 |   const int repeats = 1000;
286 |   std::vector<double> cycles_latency(max_keysize+1);
287 |   std::vector<double> cycles_throughput(max_keysize+1);
288 | 
289 |   printf("Small key speed test");
290 | 
291 |   for (int i=0; i<10; i++)
292 |   {
293 |     if(verbose) printf(".");
294 | 
295 |     for(int keysize = 0; keysize <= max_keysize; keysize++)
296 |     {
297 |       double cycles;
298 | 
299 |       cycles = SpeedTest(hash,hashsize,seed,trials,repeats,keysize,0,false);
300 |       if (i==0 || cycles < cycles_latency[keysize])
301 |         cycles_latency[keysize] = cycles;
302 | 
303 |       cycles = SpeedTest(hash,hashsize,seed,trials,repeats,keysize,0,true);
304 |       if (i==0 || cycles < cycles_throughput[keysize])
305 |         cycles_throughput[keysize] = cycles;
306 |     }
307 |   }
308 |   printf("\n");
309 | 
310 |   for(int keysize = 0; keysize <= max_keysize; keysize++)
311 |   {
312 |     printf("%4d-byte keys - latency %8.2f cycles/hash,  throughput %8.2f cycles/hash\n",
313 |       keysize, cycles_latency[keysize], cycles_throughput[keysize]);
314 |   }
315 | }
316 | 
317 | //-----------------------------------------------------------------------------
318 | 


--------------------------------------------------------------------------------
/SMHasher/SpeedTest.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | 
3 | #include "Types.h"
4 | 
5 | void BulkSpeedTest ( pfHash hash, int hashsize, uint32_t seed );
6 | void TinySpeedTest ( pfHash hash, int hashsize, int max_keysize, uint32_t seed, bool verbose );
7 | 
8 | //-----------------------------------------------------------------------------
9 | 


--------------------------------------------------------------------------------
/SMHasher/SpookyHash/SpookyV2.cpp:
--------------------------------------------------------------------------------
  1 | // Spooky Hash
  2 | // A 128-bit noncryptographic hash, for checksums and table lookup
  3 | // By Bob Jenkins.  Public domain.
  4 | //   Oct 31 2010: published framework, disclaimer ShortHash isn't right
  5 | //   Nov 7 2010: disabled ShortHash
  6 | //   Oct 31 2011: replace End, ShortMix, ShortEnd, enable ShortHash again
  7 | //   April 10 2012: buffer overflow on platforms without unaligned reads
  8 | //   July 12 2012: was passing out variables in final to in/out in short
  9 | //   July 30 2012: I reintroduced the buffer overflow
 10 | //   August 5 2012: SpookyV2: d = should be d += in short hash, and remove extra mix from long hash
 11 | 
 12 | #include <memory.h>
 13 | #include "SpookyV2.h"
 14 | 
 15 | #define ALLOW_UNALIGNED_READS 1
 16 | 
 17 | //
 18 | // short hash ... it could be used on any message, 
 19 | // but it's used by Spooky just for short messages.
 20 | //
 21 | void SpookyHash::Short(
 22 |     const void *message,
 23 |     size_t length,
 24 |     uint64 *hash1,
 25 |     uint64 *hash2)
 26 | {
 27 |     uint64 buf[2*sc_numVars];
 28 |     union 
 29 |     { 
 30 |         const uint8 *p8; 
 31 |         uint32 *p32;
 32 |         uint64 *p64; 
 33 |         size_t i; 
 34 |     } u;
 35 | 
 36 |     u.p8 = (const uint8 *)message;
 37 |     
 38 |     if (!ALLOW_UNALIGNED_READS && (u.i & 0x7))
 39 |     {
 40 |         memcpy(buf, message, length);
 41 |         u.p64 = buf;
 42 |     }
 43 | 
 44 |     size_t remainder = length%32;
 45 |     uint64 a=*hash1;
 46 |     uint64 b=*hash2;
 47 |     uint64 c=sc_const;
 48 |     uint64 d=sc_const;
 49 | 
 50 |     if (length > 15)
 51 |     {
 52 |         const uint64 *end = u.p64 + (length/32)*4;
 53 |         
 54 |         // handle all complete sets of 32 bytes
 55 |         for (; u.p64 < end; u.p64 += 4)
 56 |         {
 57 |             c += u.p64[0];
 58 |             d += u.p64[1];
 59 |             ShortMix(a,b,c,d);
 60 |             a += u.p64[2];
 61 |             b += u.p64[3];
 62 |         }
 63 |         
 64 |         //Handle the case of 16+ remaining bytes.
 65 |         if (remainder >= 16)
 66 |         {
 67 |             c += u.p64[0];
 68 |             d += u.p64[1];
 69 |             ShortMix(a,b,c,d);
 70 |             u.p64 += 2;
 71 |             remainder -= 16;
 72 |         }
 73 |     }
 74 |     
 75 |     // Handle the last 0..15 bytes, and its length
 76 |     d += ((uint64)length) << 56;
 77 |     switch (remainder)
 78 |     {
 79 |     case 15:
 80 |     d += ((uint64)u.p8[14]) << 48;
 81 |     case 14:
 82 |         d += ((uint64)u.p8[13]) << 40;
 83 |     case 13:
 84 |         d += ((uint64)u.p8[12]) << 32;
 85 |     case 12:
 86 |         d += u.p32[2];
 87 |         c += u.p64[0];
 88 |         break;
 89 |     case 11:
 90 |         d += ((uint64)u.p8[10]) << 16;
 91 |     case 10:
 92 |         d += ((uint64)u.p8[9]) << 8;
 93 |     case 9:
 94 |         d += (uint64)u.p8[8];
 95 |     case 8:
 96 |         c += u.p64[0];
 97 |         break;
 98 |     case 7:
 99 |         c += ((uint64)u.p8[6]) << 48;
100 |     case 6:
101 |         c += ((uint64)u.p8[5]) << 40;
102 |     case 5:
103 |         c += ((uint64)u.p8[4]) << 32;
104 |     case 4:
105 |         c += u.p32[0];
106 |         break;
107 |     case 3:
108 |         c += ((uint64)u.p8[2]) << 16;
109 |     case 2:
110 |         c += ((uint64)u.p8[1]) << 8;
111 |     case 1:
112 |         c += (uint64)u.p8[0];
113 |         break;
114 |     case 0:
115 |         c += sc_const;
116 |         d += sc_const;
117 |     }
118 |     ShortEnd(a,b,c,d);
119 |     *hash1 = a;
120 |     *hash2 = b;
121 | }
122 | 
123 | 
124 | 
125 | 
126 | // do the whole hash in one call
127 | void SpookyHash::Hash128(
128 |     const void *message, 
129 |     size_t length, 
130 |     uint64 *hash1, 
131 |     uint64 *hash2)
132 | {
133 |     if (length < sc_bufSize)
134 |     {
135 |         Short(message, length, hash1, hash2);
136 |         return;
137 |     }
138 | 
139 |     uint64 h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11;
140 |     uint64 buf[sc_numVars];
141 |     uint64 *end;
142 |     union 
143 |     { 
144 |         const uint8 *p8; 
145 |         uint64 *p64; 
146 |         size_t i; 
147 |     } u;
148 |     size_t remainder;
149 |     
150 |     h0=h3=h6=h9  = *hash1;
151 |     h1=h4=h7=h10 = *hash2;
152 |     h2=h5=h8=h11 = sc_const;
153 |     
154 |     u.p8 = (const uint8 *)message;
155 |     end = u.p64 + (length/sc_blockSize)*sc_numVars;
156 | 
157 |     // handle all whole sc_blockSize blocks of bytes
158 |     if (ALLOW_UNALIGNED_READS || ((u.i & 0x7) == 0))
159 |     {
160 |         while (u.p64 < end)
161 |         { 
162 |             Mix(u.p64, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
163 | 	    u.p64 += sc_numVars;
164 |         }
165 |     }
166 |     else
167 |     {
168 |         while (u.p64 < end)
169 |         {
170 |             memcpy(buf, u.p64, sc_blockSize);
171 |             Mix(buf, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
172 | 	    u.p64 += sc_numVars;
173 |         }
174 |     }
175 | 
176 |     // handle the last partial block of sc_blockSize bytes
177 |     remainder = (length - ((const uint8 *)end-(const uint8 *)message));
178 |     memcpy(buf, end, remainder);
179 |     memset(((uint8 *)buf)+remainder, 0, sc_blockSize-remainder);
180 |     ((uint8 *)buf)[sc_blockSize-1] = remainder;
181 |     
182 |     // do some final mixing 
183 |     End(buf, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
184 |     *hash1 = h0;
185 |     *hash2 = h1;
186 | }
187 | 
188 | 
189 | 
190 | // init spooky state
191 | void SpookyHash::Init(uint64 seed1, uint64 seed2)
192 | {
193 |     m_length = 0;
194 |     m_remainder = 0;
195 |     m_state[0] = seed1;
196 |     m_state[1] = seed2;
197 | }
198 | 
199 | 
200 | // add a message fragment to the state
201 | void SpookyHash::Update(const void *message, size_t length)
202 | {
203 |     uint64 h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11;
204 |     size_t newLength = length + m_remainder;
205 |     uint8  remainder;
206 |     union 
207 |     { 
208 |         const uint8 *p8; 
209 |         uint64 *p64; 
210 |         size_t i; 
211 |     } u;
212 |     const uint64 *end;
213 |     
214 |     // Is this message fragment too short?  If it is, stuff it away.
215 |     if (newLength < sc_bufSize)
216 |     {
217 |         memcpy(&((uint8 *)m_data)[m_remainder], message, length);
218 |         m_length = length + m_length;
219 |         m_remainder = (uint8)newLength;
220 |         return;
221 |     }
222 |     
223 |     // init the variables
224 |     if (m_length < sc_bufSize)
225 |     {
226 |         h0=h3=h6=h9  = m_state[0];
227 |         h1=h4=h7=h10 = m_state[1];
228 |         h2=h5=h8=h11 = sc_const;
229 |     }
230 |     else
231 |     {
232 |         h0 = m_state[0];
233 |         h1 = m_state[1];
234 |         h2 = m_state[2];
235 |         h3 = m_state[3];
236 |         h4 = m_state[4];
237 |         h5 = m_state[5];
238 |         h6 = m_state[6];
239 |         h7 = m_state[7];
240 |         h8 = m_state[8];
241 |         h9 = m_state[9];
242 |         h10 = m_state[10];
243 |         h11 = m_state[11];
244 |     }
245 |     m_length = length + m_length;
246 |     
247 |     // if we've got anything stuffed away, use it now
248 |     if (m_remainder)
249 |     {
250 |         uint8 prefix = sc_bufSize-m_remainder;
251 |         memcpy(&(((uint8 *)m_data)[m_remainder]), message, prefix);
252 |         u.p64 = m_data;
253 |         Mix(u.p64, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
254 |         Mix(&u.p64[sc_numVars], h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
255 |         u.p8 = ((const uint8 *)message) + prefix;
256 |         length -= prefix;
257 |     }
258 |     else
259 |     {
260 |         u.p8 = (const uint8 *)message;
261 |     }
262 |     
263 |     // handle all whole blocks of sc_blockSize bytes
264 |     end = u.p64 + (length/sc_blockSize)*sc_numVars;
265 |     remainder = (uint8)(length-((const uint8 *)end-u.p8));
266 |     if (ALLOW_UNALIGNED_READS || (u.i & 0x7) == 0)
267 |     {
268 |         while (u.p64 < end)
269 |         { 
270 |             Mix(u.p64, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
271 | 	    u.p64 += sc_numVars;
272 |         }
273 |     }
274 |     else
275 |     {
276 |         while (u.p64 < end)
277 |         { 
278 |             memcpy(m_data, u.p8, sc_blockSize);
279 |             Mix(m_data, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
280 | 	    u.p64 += sc_numVars;
281 |         }
282 |     }
283 | 
284 |     // stuff away the last few bytes
285 |     m_remainder = remainder;
286 |     memcpy(m_data, end, remainder);
287 |     
288 |     // stuff away the variables
289 |     m_state[0] = h0;
290 |     m_state[1] = h1;
291 |     m_state[2] = h2;
292 |     m_state[3] = h3;
293 |     m_state[4] = h4;
294 |     m_state[5] = h5;
295 |     m_state[6] = h6;
296 |     m_state[7] = h7;
297 |     m_state[8] = h8;
298 |     m_state[9] = h9;
299 |     m_state[10] = h10;
300 |     m_state[11] = h11;
301 | }
302 | 
303 | 
304 | // report the hash for the concatenation of all message fragments so far
305 | void SpookyHash::Final(uint64 *hash1, uint64 *hash2)
306 | {
307 |     // init the variables
308 |     if (m_length < sc_bufSize)
309 |     {
310 |         *hash1 = m_state[0];
311 |         *hash2 = m_state[1];
312 |         Short( m_data, m_length, hash1, hash2);
313 |         return;
314 |     }
315 |     
316 |     const uint64 *data = (const uint64 *)m_data;
317 |     uint8 remainder = m_remainder;
318 |     
319 |     uint64 h0 = m_state[0];
320 |     uint64 h1 = m_state[1];
321 |     uint64 h2 = m_state[2];
322 |     uint64 h3 = m_state[3];
323 |     uint64 h4 = m_state[4];
324 |     uint64 h5 = m_state[5];
325 |     uint64 h6 = m_state[6];
326 |     uint64 h7 = m_state[7];
327 |     uint64 h8 = m_state[8];
328 |     uint64 h9 = m_state[9];
329 |     uint64 h10 = m_state[10];
330 |     uint64 h11 = m_state[11];
331 | 
332 |     if (remainder >= sc_blockSize)
333 |     {
334 |         // m_data can contain two blocks; handle any whole first block
335 |         Mix(data, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
336 |         data += sc_numVars;
337 |         remainder -= sc_blockSize;
338 |     }
339 | 
340 |     // mix in the last partial block, and the length mod sc_blockSize
341 |     memset(&((uint8 *)data)[remainder], 0, (sc_blockSize-remainder));
342 | 
343 |     ((uint8 *)data)[sc_blockSize-1] = remainder;
344 |     
345 |     // do some final mixing
346 |     End(data, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
347 | 
348 |     *hash1 = h0;
349 |     *hash2 = h1;
350 | }
351 | 
352 | 


--------------------------------------------------------------------------------
/SMHasher/SpookyHash/SpookyV2.h:
--------------------------------------------------------------------------------
  1 | //
  2 | // SpookyHash: a 128-bit noncryptographic hash function
  3 | // By Bob Jenkins, public domain
  4 | //   Oct 31 2010: alpha, framework + SpookyHash::Mix appears right
  5 | //   Oct 31 2011: alpha again, Mix only good to 2^^69 but rest appears right
  6 | //   Dec 31 2011: beta, improved Mix, tested it for 2-bit deltas
  7 | //   Feb  2 2012: production, same bits as beta
  8 | //   Feb  5 2012: adjusted definitions of uint* to be more portable
  9 | //   Mar 30 2012: 3 bytes/cycle, not 4.  Alpha was 4 but wasn't thorough enough.
 10 | //   August 5 2012: SpookyV2 (different results)
 11 | // 
 12 | // Up to 3 bytes/cycle for long messages.  Reasonably fast for short messages.
 13 | // All 1 or 2 bit deltas achieve avalanche within 1% bias per output bit.
 14 | //
 15 | // This was developed for and tested on 64-bit x86-compatible processors.
 16 | // It assumes the processor is little-endian.  There is a macro
 17 | // controlling whether unaligned reads are allowed (by default they are).
 18 | // This should be an equally good hash on big-endian machines, but it will
 19 | // compute different results on them than on little-endian machines.
 20 | //
 21 | // Google's CityHash has similar specs to SpookyHash, and CityHash is faster
 22 | // on new Intel boxes.  MD4 and MD5 also have similar specs, but they are orders
 23 | // of magnitude slower.  CRCs are two or more times slower, but unlike 
 24 | // SpookyHash, they have nice math for combining the CRCs of pieces to form 
 25 | // the CRCs of wholes.  There are also cryptographic hashes, but those are even 
 26 | // slower than MD5.
 27 | //
 28 | 
 29 | #include <stddef.h>
 30 | 
 31 | #ifdef _MSC_VER
 32 | # define INLINE __forceinline
 33 |   typedef  unsigned __int64 uint64;
 34 |   typedef  unsigned __int32 uint32;
 35 |   typedef  unsigned __int16 uint16;
 36 |   typedef  unsigned __int8  uint8;
 37 | #else
 38 | # include <stdint.h>
 39 | # define INLINE inline
 40 |   typedef  uint64_t  uint64;
 41 |   typedef  uint32_t  uint32;
 42 |   typedef  uint16_t  uint16;
 43 |   typedef  uint8_t   uint8;
 44 | #endif
 45 | 
 46 | 
 47 | class SpookyHash
 48 | {
 49 | public:
 50 |     //
 51 |     // SpookyHash: hash a single message in one call, produce 128-bit output
 52 |     //
 53 |     static void Hash128(
 54 |         const void *message,  // message to hash
 55 |         size_t length,        // length of message in bytes
 56 |         uint64 *hash1,        // in/out: in seed 1, out hash value 1
 57 |         uint64 *hash2);       // in/out: in seed 2, out hash value 2
 58 | 
 59 |     //
 60 |     // Hash64: hash a single message in one call, return 64-bit output
 61 |     //
 62 |     static uint64 Hash64(
 63 |         const void *message,  // message to hash
 64 |         size_t length,        // length of message in bytes
 65 |         uint64 seed)          // seed
 66 |     {
 67 |         uint64 hash1 = seed;
 68 |         Hash128(message, length, &hash1, &seed);
 69 |         return hash1;
 70 |     }
 71 | 
 72 |     //
 73 |     // Hash32: hash a single message in one call, produce 32-bit output
 74 |     //
 75 |     static uint32 Hash32(
 76 |         const void *message,  // message to hash
 77 |         size_t length,        // length of message in bytes
 78 |         uint32 seed)          // seed
 79 |     {
 80 |         uint64 hash1 = seed, hash2 = seed;
 81 |         Hash128(message, length, &hash1, &hash2);
 82 |         return (uint32)hash1;
 83 |     }
 84 | 
 85 |     //
 86 |     // Init: initialize the context of a SpookyHash
 87 |     //
 88 |     void Init(
 89 |         uint64 seed1,       // any 64-bit value will do, including 0
 90 |         uint64 seed2);      // different seeds produce independent hashes
 91 |     
 92 |     //
 93 |     // Update: add a piece of a message to a SpookyHash state
 94 |     //
 95 |     void Update(
 96 |         const void *message,  // message fragment
 97 |         size_t length);       // length of message fragment in bytes
 98 | 
 99 | 
100 |     //
101 |     // Final: compute the hash for the current SpookyHash state
102 |     //
103 |     // This does not modify the state; you can keep updating it afterward
104 |     //
105 |     // The result is the same as if SpookyHash() had been called with
106 |     // all the pieces concatenated into one message.
107 |     //
108 |     void Final(
109 |         uint64 *hash1,    // out only: first 64 bits of hash value.
110 |         uint64 *hash2);   // out only: second 64 bits of hash value.
111 | 
112 |     //
113 |     // left rotate a 64-bit value by k bytes
114 |     //
115 |     static INLINE uint64 Rot64(uint64 x, int k)
116 |     {
117 |         return (x << k) | (x >> (64 - k));
118 |     }
119 | 
120 |     //
121 |     // This is used if the input is 96 bytes long or longer.
122 |     //
123 |     // The internal state is fully overwritten every 96 bytes.
124 |     // Every input bit appears to cause at least 128 bits of entropy
125 |     // before 96 other bytes are combined, when run forward or backward
126 |     //   For every input bit,
127 |     //   Two inputs differing in just that input bit
128 |     //   Where "differ" means xor or subtraction
129 |     //   And the base value is random
130 |     //   When run forward or backwards one Mix
131 |     // I tried 3 pairs of each; they all differed by at least 212 bits.
132 |     //
133 |     static INLINE void Mix(
134 |         const uint64 *data, 
135 |         uint64 &s0, uint64 &s1, uint64 &s2, uint64 &s3,
136 |         uint64 &s4, uint64 &s5, uint64 &s6, uint64 &s7,
137 |         uint64 &s8, uint64 &s9, uint64 &s10,uint64 &s11)
138 |     {
139 |       s0 += data[0];    s2 ^= s10;    s11 ^= s0;    s0 = Rot64(s0,11);    s11 += s1;
140 |       s1 += data[1];    s3 ^= s11;    s0 ^= s1;    s1 = Rot64(s1,32);    s0 += s2;
141 |       s2 += data[2];    s4 ^= s0;    s1 ^= s2;    s2 = Rot64(s2,43);    s1 += s3;
142 |       s3 += data[3];    s5 ^= s1;    s2 ^= s3;    s3 = Rot64(s3,31);    s2 += s4;
143 |       s4 += data[4];    s6 ^= s2;    s3 ^= s4;    s4 = Rot64(s4,17);    s3 += s5;
144 |       s5 += data[5];    s7 ^= s3;    s4 ^= s5;    s5 = Rot64(s5,28);    s4 += s6;
145 |       s6 += data[6];    s8 ^= s4;    s5 ^= s6;    s6 = Rot64(s6,39);    s5 += s7;
146 |       s7 += data[7];    s9 ^= s5;    s6 ^= s7;    s7 = Rot64(s7,57);    s6 += s8;
147 |       s8 += data[8];    s10 ^= s6;    s7 ^= s8;    s8 = Rot64(s8,55);    s7 += s9;
148 |       s9 += data[9];    s11 ^= s7;    s8 ^= s9;    s9 = Rot64(s9,54);    s8 += s10;
149 |       s10 += data[10];    s0 ^= s8;    s9 ^= s10;    s10 = Rot64(s10,22);    s9 += s11;
150 |       s11 += data[11];    s1 ^= s9;    s10 ^= s11;    s11 = Rot64(s11,46);    s10 += s0;
151 |     }
152 | 
153 |     //
154 |     // Mix all 12 inputs together so that h0, h1 are a hash of them all.
155 |     //
156 |     // For two inputs differing in just the input bits
157 |     // Where "differ" means xor or subtraction
158 |     // And the base value is random, or a counting value starting at that bit
159 |     // The final result will have each bit of h0, h1 flip
160 |     // For every input bit,
161 |     // with probability 50 +- .3%
162 |     // For every pair of input bits,
163 |     // with probability 50 +- 3%
164 |     //
165 |     // This does not rely on the last Mix() call having already mixed some.
166 |     // Two iterations was almost good enough for a 64-bit result, but a
167 |     // 128-bit result is reported, so End() does three iterations.
168 |     //
169 |     static INLINE void EndPartial(
170 |         uint64 &h0, uint64 &h1, uint64 &h2, uint64 &h3,
171 |         uint64 &h4, uint64 &h5, uint64 &h6, uint64 &h7, 
172 |         uint64 &h8, uint64 &h9, uint64 &h10,uint64 &h11)
173 |     {
174 |         h11+= h1;    h2 ^= h11;   h1 = Rot64(h1,44);
175 |         h0 += h2;    h3 ^= h0;    h2 = Rot64(h2,15);
176 |         h1 += h3;    h4 ^= h1;    h3 = Rot64(h3,34);
177 |         h2 += h4;    h5 ^= h2;    h4 = Rot64(h4,21);
178 |         h3 += h5;    h6 ^= h3;    h5 = Rot64(h5,38);
179 |         h4 += h6;    h7 ^= h4;    h6 = Rot64(h6,33);
180 |         h5 += h7;    h8 ^= h5;    h7 = Rot64(h7,10);
181 |         h6 += h8;    h9 ^= h6;    h8 = Rot64(h8,13);
182 |         h7 += h9;    h10^= h7;    h9 = Rot64(h9,38);
183 |         h8 += h10;   h11^= h8;    h10= Rot64(h10,53);
184 |         h9 += h11;   h0 ^= h9;    h11= Rot64(h11,42);
185 |         h10+= h0;    h1 ^= h10;   h0 = Rot64(h0,54);
186 |     }
187 | 
188 |     static INLINE void End(
189 |         const uint64 *data, 
190 |         uint64 &h0, uint64 &h1, uint64 &h2, uint64 &h3,
191 |         uint64 &h4, uint64 &h5, uint64 &h6, uint64 &h7, 
192 |         uint64 &h8, uint64 &h9, uint64 &h10,uint64 &h11)
193 |     {
194 |         h0 += data[0];   h1 += data[1];   h2 += data[2];   h3 += data[3];
195 |         h4 += data[4];   h5 += data[5];   h6 += data[6];   h7 += data[7];
196 |         h8 += data[8];   h9 += data[9];   h10 += data[10]; h11 += data[11];
197 |         EndPartial(h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
198 |         EndPartial(h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
199 |         EndPartial(h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
200 |     }
201 | 
202 |     //
203 |     // The goal is for each bit of the input to expand into 128 bits of 
204 |     //   apparent entropy before it is fully overwritten.
205 |     // n trials both set and cleared at least m bits of h0 h1 h2 h3
206 |     //   n: 2   m: 29
207 |     //   n: 3   m: 46
208 |     //   n: 4   m: 57
209 |     //   n: 5   m: 107
210 |     //   n: 6   m: 146
211 |     //   n: 7   m: 152
212 |     // when run forwards or backwards
213 |     // for all 1-bit and 2-bit diffs
214 |     // with diffs defined by either xor or subtraction
215 |     // with a base of all zeros plus a counter, or plus another bit, or random
216 |     //
217 |     static INLINE void ShortMix(uint64 &h0, uint64 &h1, uint64 &h2, uint64 &h3)
218 |     {
219 |         h2 = Rot64(h2,50);  h2 += h3;  h0 ^= h2;
220 |         h3 = Rot64(h3,52);  h3 += h0;  h1 ^= h3;
221 |         h0 = Rot64(h0,30);  h0 += h1;  h2 ^= h0;
222 |         h1 = Rot64(h1,41);  h1 += h2;  h3 ^= h1;
223 |         h2 = Rot64(h2,54);  h2 += h3;  h0 ^= h2;
224 |         h3 = Rot64(h3,48);  h3 += h0;  h1 ^= h3;
225 |         h0 = Rot64(h0,38);  h0 += h1;  h2 ^= h0;
226 |         h1 = Rot64(h1,37);  h1 += h2;  h3 ^= h1;
227 |         h2 = Rot64(h2,62);  h2 += h3;  h0 ^= h2;
228 |         h3 = Rot64(h3,34);  h3 += h0;  h1 ^= h3;
229 |         h0 = Rot64(h0,5);   h0 += h1;  h2 ^= h0;
230 |         h1 = Rot64(h1,36);  h1 += h2;  h3 ^= h1;
231 |     }
232 | 
233 |     //
234 |     // Mix all 4 inputs together so that h0, h1 are a hash of them all.
235 |     //
236 |     // For two inputs differing in just the input bits
237 |     // Where "differ" means xor or subtraction
238 |     // And the base value is random, or a counting value starting at that bit
239 |     // The final result will have each bit of h0, h1 flip
240 |     // For every input bit,
241 |     // with probability 50 +- .3% (it is probably better than that)
242 |     // For every pair of input bits,
243 |     // with probability 50 +- .75% (the worst case is approximately that)
244 |     //
245 |     static INLINE void ShortEnd(uint64 &h0, uint64 &h1, uint64 &h2, uint64 &h3)
246 |     {
247 |         h3 ^= h2;  h2 = Rot64(h2,15);  h3 += h2;
248 |         h0 ^= h3;  h3 = Rot64(h3,52);  h0 += h3;
249 |         h1 ^= h0;  h0 = Rot64(h0,26);  h1 += h0;
250 |         h2 ^= h1;  h1 = Rot64(h1,51);  h2 += h1;
251 |         h3 ^= h2;  h2 = Rot64(h2,28);  h3 += h2;
252 |         h0 ^= h3;  h3 = Rot64(h3,9);   h0 += h3;
253 |         h1 ^= h0;  h0 = Rot64(h0,47);  h1 += h0;
254 |         h2 ^= h1;  h1 = Rot64(h1,54);  h2 += h1;
255 |         h3 ^= h2;  h2 = Rot64(h2,32);  h3 += h2;
256 |         h0 ^= h3;  h3 = Rot64(h3,25);  h0 += h3;
257 |         h1 ^= h0;  h0 = Rot64(h0,63);  h1 += h0;
258 |     }
259 |     
260 | private:
261 | 
262 |     //
263 |     // Short is used for messages under 192 bytes in length
264 |     // Short has a low startup cost, the normal mode is good for long
265 |     // keys, the cost crossover is at about 192 bytes.  The two modes were
266 |     // held to the same quality bar.
267 |     // 
268 |     static void Short(
269 |         const void *message,  // message (array of bytes, not necessarily aligned)
270 |         size_t length,        // length of message (in bytes)
271 |         uint64 *hash1,        // in/out: in the seed, out the hash value
272 |         uint64 *hash2);       // in/out: in the seed, out the hash value
273 | 
274 |     // number of uint64's in internal state
275 |     static const size_t sc_numVars = 12;
276 | 
277 |     // size of the internal state
278 |     static const size_t sc_blockSize = sc_numVars*8;
279 | 
280 |     // size of buffer of unhashed data, in bytes
281 |     static const size_t sc_bufSize = 2*sc_blockSize;
282 | 
283 |     //
284 |     // sc_const: a constant which:
285 |     //  * is not zero
286 |     //  * is odd
287 |     //  * is a not-very-regular mix of 1's and 0's
288 |     //  * does not need any other special mathematical properties
289 |     //
290 |     static const uint64 sc_const = 0xdeadbeefdeadbeefLL;
291 | 
292 |     uint64 m_data[2*sc_numVars];   // unhashed data, for partial messages
293 |     uint64 m_state[sc_numVars];  // internal state of the hash
294 |     size_t m_length;             // total length of the input so far
295 |     uint8  m_remainder;          // length of unhashed data stashed in m_data
296 | };
297 | 
298 | 
299 | 
300 | 


--------------------------------------------------------------------------------
/SMHasher/SpookyHashTest.cpp:
--------------------------------------------------------------------------------
 1 | #include <stdint.h>
 2 | #include "SpookyHash/SpookyV2.cpp"
 3 | 
 4 | void SpookyHash32_test ( const void * key, int len, unsigned seed, void * out )
 5 | {
 6 |   *(uint32_t*)out = SpookyHash::Hash32(key,len,seed);
 7 | }
 8 | 
 9 | void SpookyHash32a_test ( const void * key, int len, unsigned seed, void * out )
10 | {
11 |   uint64_t seed_out[2];
12 |   seed_out[0] = seed;
13 |   seed_out[1] = 0;
14 |   SpookyHash::Hash128(key,len,seed_out,seed_out+1);
15 |   *(uint32_t*)out  =  seed_out[0] >> 32;
16 | }
17 | 
18 | void SpookyHash32b_test ( const void * key, int len, unsigned seed, void * out )
19 | {
20 |   uint64_t seed_out[2];
21 |   seed_out[0] = seed;
22 |   seed_out[1] = 0;
23 |   SpookyHash::Hash128(key,len,seed_out,seed_out+1);
24 |   *(uint32_t*)out  =  seed_out[1];
25 | }
26 | 
27 | void SpookyHash32c_test ( const void * key, int len, unsigned seed, void * out )
28 | {
29 |   uint64_t seed_out[2];
30 |   seed_out[0] = seed;
31 |   seed_out[1] = 0;
32 |   SpookyHash::Hash128(key,len,seed_out,seed_out+1);
33 |   *(uint32_t*)out  =  seed_out[1] >> 32;
34 | }
35 | 
36 | void SpookyHash64_test ( const void * key, int len, unsigned seed, void * out )
37 | {
38 |   *(uint64_t*)out = SpookyHash::Hash64(key,len,seed);
39 | }
40 | 
41 | void SpookyHash128_test ( const void * key, int len, unsigned seed, void * out )
42 | {
43 |   uint64_t *seed_out = (uint64_t*) out;
44 |   seed_out[0] = seed;
45 |   seed_out[1] = 0;
46 |   SpookyHash::Hash128(key,len,seed_out,seed_out+1);
47 | }
48 | 


--------------------------------------------------------------------------------
/SMHasher/Stats.cpp:
--------------------------------------------------------------------------------
  1 | #include "Stats.h"
  2 | 
  3 | //-----------------------------------------------------------------------------
  4 | 
  5 | double chooseK ( int n, int k )
  6 | {
  7 |   if(k > (n - k)) k = n - k;
  8 | 
  9 |   double c = 1;
 10 | 
 11 |   for(int i = 0; i < k; i++)
 12 |   {
 13 |     double t = double(n-i) / double(i+1);
 14 | 
 15 |     c *= t;
 16 |   }
 17 | 
 18 |     return c;
 19 | }
 20 | 
 21 | double chooseUpToK ( int n, int k )
 22 | {
 23 |   double c = 0;
 24 | 
 25 |   for(int i = 1; i <= k; i++)
 26 |   {
 27 |     c += chooseK(n,i);
 28 |   }
 29 | 
 30 |   return c;
 31 | }
 32 | 
 33 | //-----------------------------------------------------------------------------
 34 | // Distribution "score"
 35 | // TODO - big writeup of what this score means
 36 | 
 37 | // Basically, we're computing a constant that says "The test distribution is as
 38 | // uniform, RMS-wise, as a random distribution restricted to (1-X)*100 percent of
 39 | // the bins. This makes for a nice uniform way to rate a distribution that isn't
 40 | // dependent on the number of bins or the number of keys
 41 | 
 42 | // (as long as # keys > # bins * 3 or so, otherwise random fluctuations show up
 43 | // as distribution weaknesses)
 44 | 
 45 | double calcScore ( const int * bins, const int bincount, const int keycount )
 46 | {
 47 |   double n = bincount;
 48 |   double k = keycount;
 49 | 
 50 |   // compute rms value
 51 | 
 52 |   double r = 0;
 53 | 
 54 |   for(int i = 0; i < bincount; i++)
 55 |   {
 56 |     double b = bins[i];
 57 | 
 58 |     r += b*b;
 59 |   }
 60 | 
 61 |   r = sqrt(r / n);
 62 | 
 63 |   // compute fill factor
 64 | 
 65 |   double f = (k*k - 1) / (n*r*r - k);
 66 | 
 67 |   // rescale to (0,1) with 0 = good, 1 = bad
 68 | 
 69 |   return 1 - (f / n);
 70 | }
 71 | 
 72 | 
 73 | //----------------------------------------------------------------------------
 74 | 
 75 | void plot ( double n )
 76 | {
 77 |   double n2 = n * 1;
 78 | 
 79 |   if(n2 < 0) n2 = 0;
 80 | 
 81 |   n2 *= 100;
 82 | 
 83 |   if(n2 > 64) n2 = 64;
 84 | 
 85 |   int n3 = (int)n2;
 86 | 
 87 |   if(n3 == 0)
 88 |     printf(".");
 89 |   else
 90 |   {
 91 |     char x = '0' + char(n3);
 92 | 
 93 |     if(x > '9') x = 'X';
 94 | 
 95 |     printf("%c",x);
 96 |   }
 97 | }
 98 | 
 99 | //-----------------------------------------------------------------------------
100 | 


--------------------------------------------------------------------------------
/SMHasher/Stats.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include "Types.h"
  4 | 
  5 | #include <math.h>
  6 | #include <vector>
  7 | #include <map>
  8 | #include <algorithm>   // for std::sort
  9 | #include <string.h>    // for memset
 10 | #include <stdio.h>     // for printf
 11 | 
 12 | double calcScore ( const int * bins, const int bincount, const int ballcount );
 13 | 
 14 | void plot ( double n );
 15 | 
 16 | inline double ExpectedCollisions ( double balls, double bins )
 17 | {
 18 |   return balls - bins + bins * pow(1 - 1/bins,balls);
 19 | }
 20 | 
 21 | double chooseK ( int b, int k );
 22 | double chooseUpToK ( int n, int k );
 23 | 
 24 | //-----------------------------------------------------------------------------
 25 | 
 26 | inline uint32_t f3mix ( uint32_t k )
 27 | {
 28 |   k ^= k >> 16;
 29 |   k *= 0x85ebca6b;
 30 |   k ^= k >> 13;
 31 |   k *= 0xc2b2ae35;
 32 |   k ^= k >> 16;
 33 | 
 34 |   return k;
 35 | }
 36 | 
 37 | //-----------------------------------------------------------------------------
 38 | // Sort the hash list, count the total number of collisions and return
 39 | // the first N collisions for further processing
 40 | 
 41 | template< typename hashtype >
 42 | int FindCollisions ( std::vector<hashtype> & hashes, 
 43 |                      HashSet<hashtype> & collisions,
 44 |                      int maxCollisions )
 45 | {
 46 |   int collcount = 0;
 47 | 
 48 |   std::sort(hashes.begin(),hashes.end());
 49 | 
 50 |   for(size_t i = 1; i < hashes.size(); i++)
 51 |   {
 52 |     if(hashes[i] == hashes[i-1])
 53 |     {
 54 |       collcount++;
 55 | 
 56 |       if((int)collisions.size() < maxCollisions)
 57 |       {
 58 |         collisions.insert(hashes[i]);
 59 |       }
 60 |     }
 61 |   }
 62 | 
 63 |   return collcount;
 64 | }
 65 | 
 66 | //-----------------------------------------------------------------------------
 67 | 
 68 | template < class keytype, typename hashtype >
 69 | int PrintCollisions ( hashfunc<hashtype> hash, std::vector<keytype> & keys )
 70 | {
 71 |   int collcount = 0;
 72 | 
 73 |   typedef std::map<hashtype,keytype> htab;
 74 |   htab tab;
 75 | 
 76 |   for(size_t i = 1; i < keys.size(); i++)
 77 |   {
 78 |     keytype & k1 = keys[i];
 79 | 
 80 |     hashtype h = hash(&k1,sizeof(keytype),0);
 81 | 
 82 |     typename htab::iterator it = tab.find(h);
 83 | 
 84 |     if(it != tab.end())
 85 |     {
 86 |       keytype & k2 = (*it).second;
 87 | 
 88 |       printf("A: ");
 89 |       printbits(&k1,sizeof(keytype));
 90 |       printf("B: ");
 91 |       printbits(&k2,sizeof(keytype));
 92 |     }
 93 |     else
 94 |     {
 95 |       tab.insert( std::make_pair(h,k1) );
 96 |     }
 97 |   }
 98 | 
 99 |   return collcount;
100 | }
101 | 
102 | //----------------------------------------------------------------------------
103 | // Measure the distribution "score" for each possible N-bit span up to 20 bits
104 | 
105 | template< typename hashtype >
106 | double TestDistribution ( std::vector<hashtype> & hashes, bool drawDiagram )
107 | {
108 |   printf("Testing distribution - ");
109 | 
110 |   if(drawDiagram) printf("\n");
111 | 
112 |   const int hashbits = sizeof(hashtype) * 8;
113 | 
114 |   int maxwidth = 20;
115 | 
116 |   // We need at least 5 keys per bin to reliably test distribution biases
117 |   // down to 1%, so don't bother to test sparser distributions than that
118 | 
119 |   while(double(hashes.size()) / double(1 << maxwidth) < 5.0)
120 |   {
121 |     maxwidth--;
122 |   }
123 | 
124 |   std::vector<int> bins;
125 |   bins.resize(1 << maxwidth);
126 | 
127 |   double worst = 0;
128 |   int worstStart = -1;
129 |   int worstWidth = -1;
130 | 
131 |   for(int start = 0; start < hashbits; start++)
132 |   {
133 |     int width = maxwidth;
134 |     int bincount = (1 << width);
135 | 
136 |     memset(&bins[0],0,sizeof(int)*bincount);
137 | 
138 |     for(size_t j = 0; j < hashes.size(); j++)
139 |     {
140 |       hashtype & hash = hashes[j];
141 | 
142 |       uint32_t index = window(&hash,sizeof(hash),start,width);
143 | 
144 |       bins[index]++;
145 |     }
146 | 
147 |     // Test the distribution, then fold the bins in half,
148 |     // repeat until we're down to 256 bins
149 | 
150 |     if(drawDiagram) printf("[");
151 | 
152 |     while(bincount >= 256)
153 |     {
154 |       double n = calcScore(&bins[0],bincount,(int)hashes.size());
155 | 
156 |       if(drawDiagram) plot(n);
157 | 
158 |       if(n > worst)
159 |       {
160 |         worst = n;
161 |         worstStart = start;
162 |         worstWidth = width;
163 |       }
164 | 
165 |       width--;
166 |       bincount /= 2;
167 | 
168 |       if(width < 8) break;
169 | 
170 |       for(int i = 0; i < bincount; i++)
171 |       {
172 |         bins[i] += bins[i+bincount];
173 |       }
174 |     }
175 | 
176 |     if(drawDiagram) printf("]\n");
177 |   }
178 | 
179 |   double pct = worst * 100.0;
180 | 
181 |   printf("Worst bias is the %3d-bit window at bit %3d - %5.3f%%",worstWidth,worstStart,pct);
182 |   if(pct >= 1.0) printf(" !!!!! ");
183 |   printf("\n");
184 | 
185 |   return worst;
186 | }
187 | 
188 | //----------------------------------------------------------------------------
189 | 
190 | template < typename hashtype >
191 | bool TestHashList ( std::vector<hashtype> & hashes, std::vector<hashtype> & collisions, bool testDist, bool drawDiagram )
192 | {
193 |   bool result = true;
194 | 
195 |   {
196 |     size_t count = hashes.size();
197 | 
198 |     double expected = (double(count) * double(count-1)) / pow(2.0,double(sizeof(hashtype) * 8 + 1));
199 | 
200 |     printf("Testing collisions   - Expected %8.2f, ",expected);
201 | 
202 |     double collcount = 0;
203 | 
204 |     HashSet<hashtype> collisions;
205 | 
206 |     collcount = FindCollisions(hashes,collisions,1000);
207 | 
208 |     printf("actual %8.2f (%5.2fx)",collcount, collcount / expected);
209 | 
210 |     if(sizeof(hashtype) == sizeof(uint32_t))
211 |     {
212 |     // 2x expected collisions = fail
213 | 
214 |     // #TODO - collision failure cutoff needs to be expressed as a standard deviation instead
215 |     // of a scale factor, otherwise we fail erroneously if there are a small expected number
216 |     // of collisions
217 | 
218 |     if(double(collcount) / double(expected) > 2.0)
219 |     {
220 |       printf(" !!!!! ");
221 |       result = false;
222 |     }
223 |     }
224 |     else
225 |     {
226 |       // For all hashes larger than 32 bits, _any_ collisions are a failure.
227 |       
228 |       if(collcount > 0)
229 |       {
230 |         printf(" !!!!! ");
231 |         result = false;
232 |       }
233 |     }
234 | 
235 |     printf("\n");
236 |   }
237 | 
238 |   //----------
239 | 
240 |   if(testDist)
241 |   {
242 |     TestDistribution(hashes,drawDiagram);
243 |   }
244 | 
245 |   return result;
246 | }
247 | 
248 | //----------
249 | 
250 | template < typename hashtype >
251 | bool TestHashList ( std::vector<hashtype> & hashes, bool /*testColl*/, bool testDist, bool drawDiagram )
252 | {
253 |   std::vector<hashtype> collisions;
254 | 
255 |   return TestHashList(hashes,collisions,testDist,drawDiagram);
256 | }
257 | 
258 | //-----------------------------------------------------------------------------
259 | 
260 | template < class keytype, typename hashtype >
261 | bool TestKeyList ( hashfunc<hashtype> hash, std::vector<keytype> & keys, bool testColl, bool testDist, bool drawDiagram )
262 | {
263 |   int keycount = (int)keys.size();
264 | 
265 |   std::vector<hashtype> hashes;
266 | 
267 |   hashes.resize(keycount);
268 | 
269 |   printf("Hashing");
270 | 
271 |   for(int i = 0; i < keycount; i++)
272 |   {
273 |     if(i % (keycount / 10) == 0) printf(".");
274 | 
275 |     keytype & k = keys[i];
276 | 
277 |     hash(&k,sizeof(k),0,&hashes[i]);
278 |   }
279 | 
280 |   printf("\n");
281 | 
282 |   bool result = TestHashList(hashes,testColl,testDist,drawDiagram);
283 | 
284 |   printf("\n");
285 | 
286 |   return result;
287 | }
288 | 
289 | //-----------------------------------------------------------------------------
290 | // Bytepair test - generate 16-bit indices from all possible non-overlapping
291 | // 8-bit sections of the hash value, check distribution on all of them.
292 | 
293 | // This is a very good test for catching weak intercorrelations between bits - 
294 | // much harder to pass than the normal distribution test. However, it doesn't
295 | // really model the normal usage of hash functions in hash table lookup, so
296 | // I'm not sure it's that useful (and hash functions that fail this test but
297 | // pass the normal distribution test still work well in practice)
298 | 
299 | template < typename hashtype >
300 | double TestDistributionBytepairs ( std::vector<hashtype> & hashes, bool drawDiagram )
301 | {
302 |   const int nbytes = sizeof(hashtype);
303 |   const int hashbits = nbytes * 8;
304 |   
305 |   const int nbins = 65536;
306 | 
307 |   std::vector<int> bins(nbins,0);
308 | 
309 |   double worst = 0;
310 | 
311 |   for(int a = 0; a < hashbits; a++)
312 |   {
313 |     if(drawDiagram) if((a % 8 == 0) && (a > 0)) printf("\n");
314 | 
315 |     if(drawDiagram) printf("[");
316 | 
317 |     for(int b = 0; b < hashbits; b++)
318 |     {
319 |       if(drawDiagram) if((b % 8 == 0) && (b > 0)) printf(" ");
320 | 
321 |       bins.clear();
322 |       bins.resize(nbins,0);
323 | 
324 |       for(size_t i = 0; i < hashes.size(); i++)
325 |       {
326 |         hashtype & hash = hashes[i];
327 | 
328 |         uint32_t pa = window(&hash,sizeof(hash),a,8);
329 |         uint32_t pb = window(&hash,sizeof(hash),b,8);
330 | 
331 |         bins[pa | (pb << 8)]++;
332 |       }
333 | 
334 |       double s = calcScore(bins,bins.size(),hashes.size());
335 | 
336 |       if(drawDiagram) plot(s);
337 | 
338 |       if(s > worst)
339 |       {
340 |         worst = s;
341 |       }
342 |     }
343 | 
344 |     if(drawDiagram) printf("]\n");
345 |   }
346 | 
347 |   return worst;
348 | }
349 | 
350 | //-----------------------------------------------------------------------------
351 | // Simplified test - only check 64k distributions, and only on byte boundaries
352 | 
353 | template < typename hashtype >
354 | void TestDistributionFast ( std::vector<hashtype> & hashes, double & dworst, double & davg )
355 | {
356 |   const int hashbits = sizeof(hashtype) * 8;
357 |   const int nbins = 65536;
358 |   
359 |   std::vector<int> bins(nbins,0);
360 | 
361 |   dworst = -1.0e90;
362 |   davg = 0;
363 | 
364 |   for(int start = 0; start < hashbits; start += 8)
365 |   {
366 |     bins.clear();
367 |     bins.resize(nbins,0);
368 | 
369 |     for(size_t j = 0; j < hashes.size(); j++)
370 |     {
371 |       hashtype & hash = hashes[j];
372 | 
373 |       uint32_t index = window(&hash,sizeof(hash),start,16);
374 | 
375 |       bins[index]++;
376 |     }
377 | 
378 |     double n = calcScore(&bins.front(),(int)bins.size(),(int)hashes.size());
379 |     
380 |     davg += n;
381 | 
382 |     if(n > dworst) dworst = n;
383 |   }
384 | 
385 |   davg /= double(hashbits/8);
386 | }
387 | 
388 | //-----------------------------------------------------------------------------
389 | 


--------------------------------------------------------------------------------
/SMHasher/Types.cpp:
--------------------------------------------------------------------------------
  1 | #include "Types.h"
  2 | 
  3 | #include "Random.h"
  4 | 
  5 | #include <stdio.h>
  6 | 
  7 | uint32_t MurmurOAAT ( const void * blob, int len, uint32_t seed );
  8 | 
  9 | //-----------------------------------------------------------------------------
 10 | 
 11 | #if defined(_MSC_VER)
 12 | #pragma optimize( "", off )
 13 | #endif
 14 | 
 15 | void blackhole ( uint32_t )
 16 | {
 17 | }
 18 | 
 19 | uint32_t whitehole ( void )
 20 | {
 21 |   return 0;
 22 | }
 23 | 
 24 | #if defined(_MSC_VER)
 25 | #pragma optimize( "", on ) 
 26 | #endif
 27 | 
 28 | uint32_t g_verify = 1;
 29 | 
 30 | void MixVCode ( const void * blob, int len )
 31 | {
 32 | 	g_verify = MurmurOAAT(blob,len,g_verify);
 33 | }
 34 | 
 35 | //-----------------------------------------------------------------------------
 36 | 
 37 | bool isprime ( uint32_t x )
 38 | {
 39 |   uint32_t p[] = 
 40 |   {
 41 |     2,3,5,7,11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71,73,79,83,89,97,101,
 42 |     103,107,109,113,127,131,137,139,149,151,157,163,167,173,179,181,191,193,197,
 43 |     199,211,223,227,229,233,239,241,251
 44 |   };
 45 | 
 46 |   for(size_t i=0; i < sizeof(p)/sizeof(uint32_t); i++)
 47 |   { 
 48 |     if((x % p[i]) == 0)
 49 |     {
 50 |       return false;
 51 |     }
 52 |   } 
 53 | 
 54 |   for(int i = 257; i < 65536; i += 2) 
 55 |   { 
 56 |     if((x % i) == 0)
 57 |     {
 58 |       return false;
 59 |     }
 60 |   }
 61 | 
 62 |   return true;
 63 | }
 64 | 
 65 | void GenerateMixingConstants ( void )
 66 | {
 67 |   Rand r(8350147);
 68 | 
 69 |   int count = 0;
 70 | 
 71 |   int trials = 0;
 72 |   int bitfail = 0;
 73 |   int popfail = 0;
 74 |   int matchfail = 0;
 75 |   int primefail = 0;
 76 | 
 77 |   //for(uint32_t x = 1; x; x++)
 78 |   while(count < 100)
 79 |   {
 80 |     //if(x % 100000000 == 0) printf(".");
 81 | 
 82 |     trials++;
 83 |     uint32_t b = r.rand_u32();
 84 |     //uint32_t b = x;
 85 | 
 86 |     //----------
 87 |     // must have between 14 and 18 set bits
 88 | 
 89 |     if(popcount(b) < 16) { b = 0; popfail++; }
 90 |     if(popcount(b) > 16) { b = 0; popfail++; }
 91 | 
 92 |     if(b == 0) continue;
 93 | 
 94 |     //----------
 95 |     // must have 3-5 bits set per 8-bit window
 96 | 
 97 |     for(int i = 0; i < 32; i++)
 98 |     {
 99 |       uint32_t c = ROTL32(b,i) & 0xFF;
100 | 
101 |       if(popcount(c) < 3) { b = 0; bitfail++; break; }
102 |       if(popcount(c) > 5) { b = 0; bitfail++; break; }
103 |     }
104 | 
105 |     if(b == 0) continue;
106 | 
107 |     //----------
108 |     // all 8-bit windows must be different
109 | 
110 |     uint8_t match[256];
111 | 
112 |     memset(match,0,256);
113 | 
114 |     for(int i = 0; i < 32; i++)
115 |     {
116 |       uint32_t c = ROTL32(b,i) & 0xFF;
117 |       
118 |       if(match[c]) { b = 0; matchfail++; break; }
119 | 
120 |       match[c] = 1;
121 |     }
122 | 
123 |     if(b == 0) continue;
124 | 
125 |     //----------
126 |     // must be prime
127 | 
128 |     if(!isprime(b))
129 |     {
130 |       b = 0;
131 |       primefail++;
132 |     }
133 | 
134 |     if(b == 0) continue;
135 | 
136 |     //----------
137 | 
138 |     if(b)
139 |     {
140 |       printf("0x%08x : 0x%08x\n",b,~b);
141 |       count++;
142 |     }
143 |   }
144 | 
145 |   printf("%d %d %d %d %d %d\n",trials,popfail,bitfail,matchfail,primefail,count);
146 | }
147 | 
148 | //-----------------------------------------------------------------------------
149 | 


--------------------------------------------------------------------------------
/SMHasher/Types.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include "Platform.h"
  4 | #include "Bitvec.h"
  5 | 
  6 | #include <memory.h>
  7 | #include <vector>
  8 | #include <map>
  9 | #include <set>
 10 | 
 11 | //-----------------------------------------------------------------------------
 12 | // If the optimizer detects that a value in a speed test is constant or unused,
 13 | // the optimizer may remove references to it or otherwise create code that
 14 | // would not occur in a real-world application. To prevent the optimizer from
 15 | // doing this we declare two trivial functions that either sink or source data,
 16 | // and bar the compiler from optimizing them.
 17 | 
 18 | void     blackhole ( uint32_t x );
 19 | uint32_t whitehole ( void );
 20 | 
 21 | //-----------------------------------------------------------------------------
 22 | // We want to verify that every test produces the same result on every platform
 23 | // To do this, we hash the results of every test to produce an overall
 24 | // verification value for the whole test suite. If two runs produce the same
 25 | // verification value, then every test in both run produced the same results
 26 | 
 27 | extern uint32_t g_verify;
 28 | 
 29 | // Mix the given blob of data into the verification code
 30 | 
 31 | void MixVCode ( const void * blob, int len );
 32 | 
 33 | 
 34 | //-----------------------------------------------------------------------------
 35 | 
 36 | typedef void (*pfHash) ( const void * blob, const int len, const uint32_t seed, void * out );
 37 | 
 38 | struct ByteVec : public std::vector<uint8_t>
 39 | {
 40 |   ByteVec ( const void * key, int len )
 41 |   {
 42 |     resize(len);
 43 |     memcpy(&front(),key,len);
 44 |   }
 45 | };
 46 | 
 47 | template< typename hashtype, typename keytype >
 48 | struct CollisionMap : public std::map< hashtype, std::vector<keytype> >
 49 | {
 50 | };
 51 | 
 52 | template< typename hashtype >
 53 | struct HashSet : public std::set<hashtype>
 54 | {
 55 | };
 56 | 
 57 | //-----------------------------------------------------------------------------
 58 | 
 59 | template < class T >
 60 | class hashfunc
 61 | {
 62 | public:
 63 | 
 64 |   hashfunc ( pfHash h ) : m_hash(h)
 65 |   {
 66 |   }
 67 | 
 68 |   inline void operator () ( const void * key, const int len, const uint32_t seed, uint32_t * out )
 69 |   {
 70 |     m_hash(key,len,seed,out);
 71 |   }
 72 | 
 73 |   inline operator pfHash ( void ) const
 74 |   {
 75 |     return m_hash;
 76 |   }
 77 | 
 78 |   inline T operator () ( const void * key, const int len, const uint32_t seed ) 
 79 |   {
 80 |     T result;
 81 | 
 82 |     m_hash(key,len,seed,(uint32_t*)&result);
 83 | 
 84 |     return result;
 85 |   }
 86 | 
 87 |   pfHash m_hash;
 88 | };
 89 | 
 90 | //-----------------------------------------------------------------------------
 91 | // Key-processing callback objects. Simplifies keyset testing a bit.
 92 | 
 93 | struct KeyCallback
 94 | {
 95 |   KeyCallback() : m_count(0)
 96 |   {
 97 |   }
 98 | 
 99 |   virtual ~KeyCallback()
100 |   {
101 |   }
102 | 
103 |   virtual void operator() ( const void * key, int len )
104 |   {
105 |     m_count++;
106 |   }
107 | 
108 |   virtual void reserve ( int keycount )
109 |   {
110 |   };
111 | 
112 |   int m_count;
113 | };
114 | 
115 | //----------
116 | 
117 | template<typename hashtype>
118 | struct HashCallback : public KeyCallback
119 | {
120 |   typedef std::vector<hashtype> hashvec;
121 | 
122 |   HashCallback ( pfHash hash, hashvec & hashes ) : m_hashes(hashes), m_pfHash(hash)
123 |   {
124 |     m_hashes.clear();
125 |   }
126 | 
127 |   virtual void operator () ( const void * key, int len )
128 |   {
129 |     size_t newsize = m_hashes.size() + 1;
130 |     
131 |     m_hashes.resize(newsize);
132 | 
133 |     m_pfHash(key,len,0,&m_hashes.back());
134 |   }
135 | 
136 |   virtual void reserve ( int keycount )
137 |   {
138 |     m_hashes.reserve(keycount);
139 |   }
140 | 
141 |   hashvec & m_hashes;
142 |   pfHash m_pfHash;
143 | 
144 |   //----------
145 | 
146 | private:
147 | 
148 |   HashCallback & operator = ( const HashCallback & );
149 | };
150 | 
151 | //----------
152 | 
153 | template<typename hashtype>
154 | struct CollisionCallback : public KeyCallback
155 | {
156 |   typedef HashSet<hashtype> hashset;
157 |   typedef CollisionMap<hashtype,ByteVec> collmap;
158 | 
159 |   CollisionCallback ( pfHash hash, hashset & collisions, collmap & cmap ) 
160 |   : m_pfHash(hash), 
161 |     m_collisions(collisions),
162 |     m_collmap(cmap)
163 |   {
164 |   }
165 | 
166 |   virtual void operator () ( const void * key, int len )
167 |   {
168 |     hashtype h;
169 | 
170 |     m_pfHash(key,len,0,&h);
171 |     
172 |     if(m_collisions.count(h))
173 |     {
174 |       m_collmap[h].push_back( ByteVec(key,len) );
175 |     }
176 |   }
177 | 
178 |   //----------
179 | 
180 |   pfHash m_pfHash;
181 |   hashset & m_collisions;
182 |   collmap & m_collmap;
183 | 
184 | private:
185 | 
186 |   CollisionCallback & operator = ( const CollisionCallback & c );
187 | };
188 | 
189 | //-----------------------------------------------------------------------------
190 | 
191 | template < int _bits >
192 | class Blob
193 | {
194 | public:
195 | 
196 |   Blob()
197 |   {
198 |     for(size_t i = 0; i < sizeof(bytes); i++)
199 |     {
200 |       bytes[i] = 0;
201 |     }
202 |   }
203 | 
204 |   Blob ( int x )
205 |   {
206 |     for(size_t i = 0; i < sizeof(bytes); i++)
207 |     {
208 |       bytes[i] = 0;
209 |     }
210 | 
211 |     *(int*)bytes = x;
212 |   }
213 | 
214 |   Blob ( const Blob & k )
215 |   {
216 |     for(size_t i = 0; i < sizeof(bytes); i++)
217 |     {
218 |       bytes[i] = k.bytes[i];
219 |     }
220 |   }
221 | 
222 |   Blob & operator = ( const Blob & k )
223 |   {
224 |     for(size_t i = 0; i < sizeof(bytes); i++)
225 |     {
226 |       bytes[i] = k.bytes[i];
227 |     }
228 | 
229 |     return *this;
230 |   }
231 | 
232 |   Blob ( uint64_t a, uint64_t b )
233 |   {
234 |     uint64_t t[2] = {a,b};
235 |     set(&t,16);
236 |   }
237 | 
238 |   void set ( const void * blob, size_t len )
239 |   {
240 |     const uint8_t * k = (const uint8_t*)blob;
241 | 
242 |     len = len > sizeof(bytes) ? sizeof(bytes) : len;
243 | 
244 |     for(size_t i = 0; i < len; i++)
245 |     {
246 |       bytes[i] = k[i];
247 |     }
248 | 
249 |     for(size_t i = len; i < sizeof(bytes); i++)
250 |     {
251 |       bytes[i] = 0;
252 |     }
253 |   }
254 | 
255 |   uint8_t & operator [] ( int i )
256 |   {
257 |     return bytes[i];
258 |   }
259 | 
260 |   const uint8_t & operator [] ( int i ) const
261 |   {
262 |     return bytes[i];
263 |   }
264 | 
265 |   //----------
266 |   // boolean operations
267 |   
268 |   bool operator < ( const Blob & k ) const
269 |   {
270 |     for(size_t i = 0; i < sizeof(bytes); i++)
271 |     {
272 |       if(bytes[i] < k.bytes[i]) return true;
273 |       if(bytes[i] > k.bytes[i]) return false;
274 |     }
275 | 
276 |     return false;
277 |   }
278 | 
279 |   bool operator == ( const Blob & k ) const
280 |   {
281 |     for(size_t i = 0; i < sizeof(bytes); i++)
282 |     {
283 |       if(bytes[i] != k.bytes[i]) return false;
284 |     }
285 | 
286 |     return true;
287 |   }
288 | 
289 |   bool operator != ( const Blob & k ) const
290 |   {
291 |     return !(*this == k);
292 |   }
293 | 
294 |   //----------
295 |   // bitwise operations
296 | 
297 |   Blob operator ^ ( const Blob & k ) const 
298 |   {
299 |     Blob t;
300 | 
301 |     for(size_t i = 0; i < sizeof(bytes); i++)
302 |     {
303 |       t.bytes[i] = bytes[i] ^ k.bytes[i];
304 |     }
305 | 
306 |     return t;
307 |   }
308 | 
309 |   Blob & operator ^= ( const Blob & k )
310 |   {
311 |     for(size_t i = 0; i < sizeof(bytes); i++)
312 |     {
313 |       bytes[i] ^= k.bytes[i];
314 |     }
315 | 
316 |     return *this;
317 |   }
318 | 
319 |   int operator & ( int x )
320 |   {
321 |     return (*(int*)bytes) & x;
322 |   }
323 | 
324 |   Blob & operator &= ( const Blob & k )
325 |   {
326 |     for(size_t i = 0; i < sizeof(bytes); i++)
327 |     {
328 |       bytes[i] &= k.bytes[i];
329 |     }
330 |   }
331 | 
332 |   Blob operator << ( int c )
333 |   {
334 |     Blob t = *this;
335 | 
336 |     lshift(&t.bytes[0],sizeof(bytes),c);
337 | 
338 |     return t;
339 |   }
340 | 
341 |   Blob operator >> ( int c )
342 |   {
343 |     Blob t = *this;
344 | 
345 |     rshift(&t.bytes[0],sizeof(bytes),c);
346 | 
347 |     return t;
348 |   }
349 | 
350 |   Blob & operator <<= ( int c )
351 |   {
352 |     lshift(&bytes[0],sizeof(bytes),c);
353 | 
354 |     return *this;
355 |   }
356 | 
357 |   Blob & operator >>= ( int c )
358 |   {
359 |     rshift(&bytes[0],sizeof(bytes),c);
360 | 
361 |     return *this;
362 |   }
363 | 
364 |   //----------
365 |   
366 | private:
367 | 
368 |   uint8_t bytes[(_bits+7)/8];
369 | };
370 | 
371 | typedef Blob<128> uint128_t;
372 | typedef Blob<256> uint256_t;
373 | 
374 | //-----------------------------------------------------------------------------
375 | 


--------------------------------------------------------------------------------
/SMHasher/UHashTest.cpp:
--------------------------------------------------------------------------------
 1 | #include "UMAC/umac.c"
 2 | #include "UMAC/rijndael-alg-fst.c"
 3 | 
 4 | char uhash_key[] = "123456789abcdef";
 5 | uhash_ctx_t ahc = uhash_alloc(uhash_key);
 6 | 
 7 | void uhash32_test ( const void * key, int len, unsigned seed, void * out )
 8 | {
 9 |   uhash_reset (ahc);
10 |   uhash_update (ahc, (char*)key, len);
11 |   uhash_final (ahc, (char*)out);
12 | }
13 | 


--------------------------------------------------------------------------------
/SMHasher/UMAC/rijndael-alg-fst.h:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * rijndael-alg-fst.h
 3 |  *
 4 |  * @version 3.0 (December 2000)
 5 |  *
 6 |  * Optimised ANSI C code for the Rijndael cipher (now AES)
 7 |  *
 8 |  * @author Vincent Rijmen <vincent.rijmen@esat.kuleuven.ac.be>
 9 |  * @author Antoon Bosselaers <antoon.bosselaers@esat.kuleuven.ac.be>
10 |  * @author Paulo Barreto <paulo.barreto@terra.com.br>
11 |  *
12 |  * This code is hereby placed in the public domain.
13 |  *
14 |  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
15 |  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
16 |  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 |  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
18 |  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
19 |  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
20 |  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
21 |  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
22 |  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
23 |  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
24 |  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 |  */
26 | #ifndef __RIJNDAEL_ALG_FST_H
27 | #define __RIJNDAEL_ALG_FST_H
28 | 
29 | #define MAXKC	(256/32)
30 | #define MAXKB	(256/8)
31 | #define MAXNR	14
32 | 
33 | typedef unsigned char	u8;	
34 | typedef unsigned short	u16;	
35 | typedef unsigned int	u32;
36 | 
37 | int rijndaelKeySetupEnc(u32 rk[/*4*(Nr + 1)*/], const u8 cipherKey[], int keyBits);
38 | int rijndaelKeySetupDec(u32 rk[/*4*(Nr + 1)*/], const u8 cipherKey[], int keyBits);
39 | void rijndaelEncrypt(const u32 rk[/*4*(Nr + 1)*/], int Nr, const u8 pt[16], u8 ct[16]);
40 | void rijndaelDecrypt(const u32 rk[/*4*(Nr + 1)*/], int Nr, const u8 ct[16], u8 pt[16]);
41 | 
42 | #ifdef INTERMEDIATE_VALUE_KAT
43 | void rijndaelEncryptRound(const u32 rk[/*4*(Nr + 1)*/], int Nr, u8 block[16], int rounds);
44 | void rijndaelDecryptRound(const u32 rk[/*4*(Nr + 1)*/], int Nr, u8 block[16], int rounds);
45 | #endif /* INTERMEDIATE_VALUE_KAT */
46 | 
47 | #endif /* __RIJNDAEL_ALG_FST_H */
48 | 


--------------------------------------------------------------------------------
/SMHasher/UMAC/umac.h:
--------------------------------------------------------------------------------
  1 | /* -----------------------------------------------------------------------
  2 |  * 
  3 |  * umac.h -- C Implementation UMAC Message Authentication
  4 |  *
  5 |  * Version 0.90 of draft-krovetz-umac-03.txt -- 2004 October
  6 |  *
  7 |  * For a full description of UMAC message authentication see the UMAC
  8 |  * world-wide-web page at http://www.cs.ucdavis.edu/~rogaway/umac
  9 |  * Please report bugs and suggestions to the UMAC webpage.
 10 |  *
 11 |  * Copyright (c) 1999-2004 Ted Krovetz
 12 |  *                                                                 
 13 |  * Permission to use, copy, modify, and distribute this software and
 14 |  * its documentation for any purpose and with or without fee, is hereby
 15 |  * granted provided that the above copyright notice appears in all copies
 16 |  * and in supporting documentation, and that the name of the copyright
 17 |  * holder not be used in advertising or publicity pertaining to
 18 |  * distribution of the software without specific, written prior permission.
 19 |  *
 20 |  * Comments should be directed to Ted Krovetz (tdk@acm.org)                                        
 21 |  *                                                                   
 22 |  * ---------------------------------------------------------------------- */
 23 |  
 24 |  /* ////////////////////// IMPORTANT NOTES /////////////////////////////////
 25 |   *
 26 |   * 1) This version does not work properly on messages larger than 16MB
 27 |   *
 28 |   * 2) If you set the switch to use SSE2, then all data must be 16-byte
 29 |   *    aligned
 30 |   *
 31 |   * 3) When calling the function umac(), it is assumed that msg is in
 32 |   * a writable buffer of length divisible by 32 bytes. The message itself
 33 |   * does not have to fill the entire buffer, but bytes beyond msg may be
 34 |   * zeroed.
 35 |   *
 36 |   * 4) Two free AES implementations are supported by this implementation of
 37 |   * UMAC. Paulo Barreto's version is in the public domain and can be found
 38 |   * at http://www.esat.kuleuven.ac.be/~rijmen/rijndael/ (search for
 39 |   * "Barreto"). The only two files needed are rijndael-alg-fst.c and
 40 |   * rijndael-alg-fst.h.
 41 |   * Brian Gladman's version is distributed with GNU Public lisence
 42 |   * and can be found at http://fp.gladman.plus.com/AES/index.htm. It
 43 |   * includes a fast IA-32 assembly version.
 44 |   *
 45 |   /////////////////////////////////////////////////////////////////////// */
 46 | 
 47 | 
 48 | #ifdef __cplusplus
 49 |     extern "C" {
 50 | #endif
 51 | 
 52 | typedef struct umac_ctx *umac_ctx_t;
 53 | 
 54 | umac_ctx_t umac_new(char key[]);
 55 | /* Dynamically allocate a umac_ctx struct, initialize variables, 
 56 |  * generate subkeys from key.
 57 |  */
 58 | 
 59 | int umac_reset(umac_ctx_t ctx);
 60 | /* Reset a umac_ctx to begin authenicating a new message */
 61 | 
 62 | int umac_update(umac_ctx_t ctx, char *input, long len);
 63 | /* Incorporate len bytes pointed to by input into context ctx */
 64 | 
 65 | int umac_final(umac_ctx_t ctx, char tag[], char nonce[8]);
 66 | /* Incorporate any pending data and the ctr value, and return tag. 
 67 |  * This function returns error code if ctr < 0. 
 68 |  */
 69 | 
 70 | int umac_delete(umac_ctx_t ctx);
 71 | /* Deallocate the context structure */
 72 | 
 73 | int umac(umac_ctx_t ctx, char *input, 
 74 |          long len, char tag[],
 75 |          char nonce[8]);
 76 | /* All-in-one implementation of the functions Reset, Update and Final */
 77 | 
 78 | 
 79 | /* uhash.h */
 80 | 
 81 | 
 82 | typedef struct uhash_ctx *uhash_ctx_t;
 83 |   /* The uhash_ctx structure is defined by the implementation of the    */
 84 |   /* UHASH functions.                                                   */
 85 |  
 86 | uhash_ctx_t uhash_alloc(char key[16]);
 87 |   /* Dynamically allocate a uhash_ctx struct and generate subkeys using */
 88 |   /* the kdf and kdf_key passed in. If kdf_key_len is 0 then RC6 is     */
 89 |   /* used to generate key with a fixed key. If kdf_key_len > 0 but kdf  */
 90 |   /* is NULL then the first 16 bytes pointed at by kdf_key is used as a */
 91 |   /* key for an RC6 based KDF.                                          */
 92 |   
 93 | int uhash_free(uhash_ctx_t ctx);
 94 | 
 95 | int uhash_set_params(uhash_ctx_t ctx,
 96 |                    void       *params);
 97 | 
 98 | int uhash_reset(uhash_ctx_t ctx);
 99 | 
100 | int uhash_update(uhash_ctx_t ctx,
101 |                char       *input,
102 |                long        len);
103 | 
104 | int uhash_final(uhash_ctx_t ctx,
105 |               char        ouput[]);
106 | 
107 | int uhash(uhash_ctx_t ctx,
108 |         char       *input,
109 |         long        len,
110 |         char        output[]);
111 | 
112 | #ifdef __cplusplus
113 |     }
114 | #endif
115 | 


--------------------------------------------------------------------------------
/SMHasher/VHashTest.cpp:
--------------------------------------------------------------------------------
 1 | #include "VMAC/vmac.c"
 2 | 
 3 | unsigned char vhash_key1[] = "0123456789abcdef";
 4 | unsigned char vhash_key2[] = "fedcba9876543210";
 5 | 
 6 | void vhash64_test ( const void * key, int len, unsigned seed, void * out )
 7 | {
 8 |   static vmac_ctx_t ctx[1];  static int inited=0;
 9 |   if (!inited)
10 |     inited=1, vmac_set_key(vhash_key1, ctx);
11 | 
12 |   *(uint64_t*) out = vhash((unsigned char*)key, len, NULL, ctx);
13 | }
14 | 
15 | void vhash128_test ( const void * key, int len, unsigned seed, void * out )
16 | {
17 |   static vmac_ctx_t ctx[2];  static int inited=0;
18 |   if (!inited)
19 |     inited=1, vmac_set_key(vhash_key1, ctx), vmac_set_key(vhash_key2, ctx+1);
20 | 
21 |   ((uint64_t*)out)[0] = vhash((unsigned char*)key, len, NULL, ctx);
22 |   ((uint64_t*)out)[1] = vhash((unsigned char*)key, len, NULL, ctx+1);
23 | }
24 | 


--------------------------------------------------------------------------------
/SMHasher/VMAC/vmac.h:
--------------------------------------------------------------------------------
  1 | #ifndef HEADER_VMAC_H
  2 | #define HEADER_VMAC_H
  3 | 
  4 | /* --------------------------------------------------------------------------
  5 |  * VMAC and VHASH Implementation by Ted Krovetz (tdk@acm.org) and Wei Dai.
  6 |  * This implementation is herby placed in the public domain.
  7 |  * The authors offers no warranty. Use at your own risk.
  8 |  * Please send bug reports to the authors.
  9 |  * Last modified: 17 APR 08, 1700 PDT
 10 |  * ----------------------------------------------------------------------- */
 11 | 
 12 | /* --------------------------------------------------------------------------
 13 |  * User definable settings.
 14 |  * ----------------------------------------------------------------------- */
 15 | #ifndef VMAC_TAG_LEN
 16 | #define VMAC_TAG_LEN   64 /* Must be 64 or 128 - 64 sufficient for most    */
 17 | #endif
 18 | #ifndef VMAC_KEY_LEN
 19 | #define VMAC_KEY_LEN  128 /* Must be 128, 192 or 256                       */
 20 | #endif
 21 | #ifndef VMAC_NHBYTES
 22 | #define VMAC_NHBYTES  128 /* Must 2^i for any 3 < i < 13. Standard = 128   */
 23 | #endif
 24 | #define VMAC_PREFER_BIG_ENDIAN  0  /* Prefer non-x86 */
 25 | 
 26 | #define VMAC_USE_OPENSSL  0 /* Set to non-zero to use OpenSSL's AES        */
 27 | #define VMAC_CACHE_NONCES 1 /* Set to non-zero to cause caching            */
 28 |                             /* of consecutive nonces on 64-bit tags        */
 29 | 
 30 | #ifndef VMAC_RUN_TESTS
 31 | #define VMAC_RUN_TESTS 0  /* Set to non-zero to check vectors and speed    */
 32 | #endif
 33 | #define VMAC_HZ (4600e6)  /* Set to hz of host machine to get speed        */
 34 | #ifndef VMAC_HASH_ONLY
 35 | #define VMAC_HASH_ONLY 0  /* Set to non-zero to time hash only (not-mac)   */
 36 | #endif
 37 | /* Speeds of cpus I have access to
 38 | #define hz (2400e6)  glyme Core 2 "Conroe"
 39 | #define hz (2000e6)  jupiter G5
 40 | #define hz (1592e6)  titan
 41 | #define hz (2793e6)  athena/gaia
 42 | #define hz (1250e6)  isis G4
 43 | #define hz (2160e6)  imac Core 2 "Merom"
 44 | #define hz (266e6)   ppc/arm
 45 | #define hz (400e6)   mips
 46 | */
 47 | 
 48 | /* --------------------------------------------------------------------------
 49 |  * This implementation uses uint32_t and uint64_t as names for unsigned 32-
 50 |  * and 64-bit integer types. These are defined in C99 stdint.h. The
 51 |  * following may need adaptation if you are not running a C99 or
 52 |  * Microsoft C environment.
 53 |  * ----------------------------------------------------------------------- */
 54 | #ifndef VMAC_USE_STDINT
 55 | #define VMAC_USE_STDINT 1  /* Set to zero if system has no stdint.h        */
 56 | #endif
 57 | 
 58 | #if VMAC_USE_STDINT && !_MSC_VER /* Try stdint.h if non-Microsoft          */
 59 | #ifdef  __cplusplus
 60 | #define __STDC_CONSTANT_MACROS
 61 | #endif
 62 | #include <stdint.h>
 63 | #ifndef UINT64_C
 64 | #define UINT64_C(v) v ## ULL
 65 | #endif
 66 | #elif (_MSC_VER)                  /* Microsoft C does not have stdint.h    */
 67 | typedef unsigned __int32 uint32_t;
 68 | typedef unsigned __int64 uint64_t;
 69 | #ifndef UINT64_C
 70 | #define UINT64_C(v) v ## UI64
 71 | #endif
 72 | #else                             /* Guess sensibly - may need adaptation  */
 73 | typedef unsigned int uint32_t;
 74 | typedef unsigned long long uint64_t;
 75 | #ifndef UINT64_C
 76 | #define UINT64_C(v) v ## ULL
 77 | #endif
 78 | #endif
 79 | 
 80 | /* --------------------------------------------------------------------------
 81 |  * This implementation supports two free AES implementations: OpenSSL's and
 82 |  * Paulo Barreto's. To use OpenSSL's, you will need to include the OpenSSL
 83 |  * crypto library (eg, gcc -lcrypto foo.c). For Barreto's, you will need
 84 |  * to compile rijndael-alg-fst.c, last seen at http://www.iaik.tu-graz.ac.at/
 85 |  * research/krypto/AES/old/~rijmen/rijndael/rijndael-fst-3.0.zip and
 86 |  * http://homes.esat.kuleuven.be/~rijmen/rijndael/rijndael-fst-3.0.zip.
 87 |  * To use a different implementation, use these definitions as a model.
 88 |  * ----------------------------------------------------------------------- */
 89 | #if VMAC_USE_LIB_TOM_CRYPT
 90 | 
 91 | #define LTC_NO_CIPHERS
 92 | #define   LTC_RIJNDAEL
 93 | #define     ENCRYPT_ONLY
 94 | #include "crypt/crypt_argchk.c"
 95 | #include "ciphers/aes/aes.c"
 96 | 
 97 | typedef symmetric_key aes_int_key;
 98 | 
 99 | #define aes_encryption(in,out,int_key)                  \
100 | 	    	aes_enc_ecb_encrypt((unsigned char *)(in),(unsigned char *)(out),(int_key))
101 | #define aes_key_setup(key,int_key)                      \
102 | 	    	aes_enc_setup((key),VMAC_KEY_LEN/CHAR_BIT,0,(int_key))
103 | 
104 | #elif VMAC_USE_OPENSSL
105 | 
106 | #include <openssl/aes.h>
107 | typedef AES_KEY aes_int_key;
108 | 
109 | #define aes_encryption(in,out,int_key)                  \
110 | 	    	AES_encrypt((unsigned char *)(in),(unsigned char *)(out),(int_key))
111 | #define aes_key_setup(key,int_key)                      \
112 | 	    	AES_set_encrypt_key((key),VMAC_KEY_LEN,(int_key))
113 | 
114 | #else
115 | 
116 | #include "rijndael-alg-fst.h"
117 | typedef u32 aes_int_key[4*(VMAC_KEY_LEN/32+7)];
118 | 
119 | #define aes_encryption(in,out,int_key)                  \
120 | 	    	rijndaelEncrypt((u32 *)(int_key),           \
121 | 	                        ((VMAC_KEY_LEN/32)+6),      \
122 | 	    				    (u8 *)(in), (u8 *)(out))
123 | #define aes_key_setup(user_key,int_key)                 \
124 | 	    	rijndaelKeySetupEnc((u32 *)(int_key),       \
125 | 	    	                    (u8 *)(user_key), \
126 | 	    	                    VMAC_KEY_LEN)
127 | #endif
128 | 
129 | /* --------------------------------------------------------------------- */
130 | 
131 | typedef struct {
132 | 	uint64_t nhkey  [(VMAC_NHBYTES/8)+2*(VMAC_TAG_LEN/64-1)];
133 | 	uint64_t polykey[2*VMAC_TAG_LEN/64];
134 | 	uint64_t l3key  [2*VMAC_TAG_LEN/64];
135 | 	uint64_t polytmp[2*VMAC_TAG_LEN/64];
136 | 	aes_int_key cipher_key;
137 | 	#if (VMAC_TAG_LEN == 64) && (VMAC_CACHE_NONCES)
138 | 	uint64_t cached_nonce[2];
139 | 	uint64_t cached_aes[2];
140 | 	#endif
141 | 	int first_block_processed;
142 | } vmac_ctx_t;
143 | 
144 | /* --------------------------------------------------------------------- */
145 | #ifdef  __cplusplus
146 | extern "C" {
147 | #endif
148 | /* --------------------------------------------------------------------------
149 |  *                        <<<<< USAGE NOTES >>>>>
150 |  *
151 |  * Given msg m (mbytes in length) and nonce buffer n
152 |  * this function returns a tag as its output. The tag is returned as
153 |  * a number. When VMAC_TAG_LEN == 64, the 'return'ed integer is the tag,
154 |  * and *tagl is meaningless. When VMAC_TAG_LEN == 128 the tag is the
155 |  * number y * 2^64 + *tagl where y is the function's return value.
156 |  * If you want to consider tags to be strings, then you must do so with
157 |  * an agreed upon endian orientation for interoperability, and convert
158 |  * the results appropriately. VHASH hashes m without creating any tag.
159 |  * Consecutive substrings forming a prefix of a message may be passed
160 |  * to vhash_update, with vhash or vmac being called with the remainder
161 |  * to produce the output.
162 |  *
163 |  * Requirements:
164 |  * - The first bit of the nonce buffer n must be 0. An i byte nonce, is made
165 |  *   as the first 16-i bytes of n being zero, and the final i the nonce.
166 |  * - vhash_update MUST have mbytes be a positive multiple of VMAC_NHBYTES
167 |  *
168 |  * The following requirements was removed by the changes made by Bulat Ziganshin:
169 |  * - On 32-bit architectures with SSE2 instructions, ctx and m MUST be
170 |  *   begin on 16-byte memory boundaries.
171 |  * - m MUST be your message followed by zeroes to the nearest 16-byte
172 |  *   boundary. If m is a length multiple of 16 bytes, then it is already
173 |  *   at a 16-byte boundary and needs no padding. mbytes should be your
174 |  *   message length without any padding.
175 |  * ----------------------------------------------------------------------- */
176 | 
177 | #define vmac_update vhash_update
178 | 
179 | void vhash_update(unsigned char m[],
180 |           unsigned int mbytes,
181 |           vmac_ctx_t *ctx);
182 | 
183 | uint64_t vmac(unsigned char m[],
184 |          unsigned int mbytes,
185 |          unsigned char n[16],
186 |          uint64_t *tagl,
187 |          vmac_ctx_t *ctx);
188 | 
189 | uint64_t vhash(unsigned char m[],
190 |           unsigned int mbytes,
191 |           uint64_t *tagl,
192 |           vmac_ctx_t *ctx);
193 | 
194 | /* --------------------------------------------------------------------------
195 |  * When passed a VMAC_KEY_LEN bit user_key, this function initialazies ctx.
196 |  * ----------------------------------------------------------------------- */
197 | 
198 | void vmac_set_key(unsigned char user_key[], vmac_ctx_t *ctx);
199 | 
200 | /* --------------------------------------------------------------------------
201 |  * This function aborts current hash and resets ctx, ready for a new message.
202 |  * ----------------------------------------------------------------------- */
203 | 
204 | void vhash_abort(vmac_ctx_t *ctx);
205 | 
206 | /* --------------------------------------------------------------------- */
207 | 
208 | #ifdef  __cplusplus
209 | }
210 | #endif
211 | 
212 | #endif /* HEADER_AES_H */
213 | 


--------------------------------------------------------------------------------
/SMHasher/compile.cmd:
--------------------------------------------------------------------------------
1 | g++ -O3 -funroll-loops -s -static -m64 -msse4 -IMurmurHash -IUMAC crc.cpp sha1.cpp MurmurHash/MurmurHash3.cpp SpookyHashTest.cpp xxHashTest.cpp Poly1305Test.cpp VHashTest.cpp UHashTest.cpp FarshTest.cpp Hashes.cpp AvalancheTest.cpp Bitslice.cpp Bitvec.cpp DifferentialTest.cpp KeysetTest.cpp main.cpp Platform.cpp Random.cpp SpeedTest.cpp Stats.cpp Types.cpp
2 | 


--------------------------------------------------------------------------------
/SMHasher/crc.cpp:
--------------------------------------------------------------------------------
  1 | #include "Platform.h"
  2 | 
  3 | /*
  4 |  * This file is derived from crc32.c from the zlib-1.1.3 distribution
  5 |  * by Jean-loup Gailly and Mark Adler.
  6 |  */
  7 | 
  8 | /* crc32.c -- compute the CRC-32 of a data stream
  9 |  * Copyright (C) 1995-1998 Mark Adler
 10 |  * For conditions of distribution and use, see copyright notice in zlib.h
 11 |  */
 12 | 
 13 | 
 14 | /* ========================================================================
 15 |  * Table of CRC-32's of all single-byte values (made by make_crc_table)
 16 |  */
 17 | static const uint32_t crc_table[256] = {
 18 |   0x00000000L, 0x77073096L, 0xee0e612cL, 0x990951baL, 0x076dc419L,
 19 |   0x706af48fL, 0xe963a535L, 0x9e6495a3L, 0x0edb8832L, 0x79dcb8a4L,
 20 |   0xe0d5e91eL, 0x97d2d988L, 0x09b64c2bL, 0x7eb17cbdL, 0xe7b82d07L,
 21 |   0x90bf1d91L, 0x1db71064L, 0x6ab020f2L, 0xf3b97148L, 0x84be41deL,
 22 |   0x1adad47dL, 0x6ddde4ebL, 0xf4d4b551L, 0x83d385c7L, 0x136c9856L,
 23 |   0x646ba8c0L, 0xfd62f97aL, 0x8a65c9ecL, 0x14015c4fL, 0x63066cd9L,
 24 |   0xfa0f3d63L, 0x8d080df5L, 0x3b6e20c8L, 0x4c69105eL, 0xd56041e4L,
 25 |   0xa2677172L, 0x3c03e4d1L, 0x4b04d447L, 0xd20d85fdL, 0xa50ab56bL,
 26 |   0x35b5a8faL, 0x42b2986cL, 0xdbbbc9d6L, 0xacbcf940L, 0x32d86ce3L,
 27 |   0x45df5c75L, 0xdcd60dcfL, 0xabd13d59L, 0x26d930acL, 0x51de003aL,
 28 |   0xc8d75180L, 0xbfd06116L, 0x21b4f4b5L, 0x56b3c423L, 0xcfba9599L,
 29 |   0xb8bda50fL, 0x2802b89eL, 0x5f058808L, 0xc60cd9b2L, 0xb10be924L,
 30 |   0x2f6f7c87L, 0x58684c11L, 0xc1611dabL, 0xb6662d3dL, 0x76dc4190L,
 31 |   0x01db7106L, 0x98d220bcL, 0xefd5102aL, 0x71b18589L, 0x06b6b51fL,
 32 |   0x9fbfe4a5L, 0xe8b8d433L, 0x7807c9a2L, 0x0f00f934L, 0x9609a88eL,
 33 |   0xe10e9818L, 0x7f6a0dbbL, 0x086d3d2dL, 0x91646c97L, 0xe6635c01L,
 34 |   0x6b6b51f4L, 0x1c6c6162L, 0x856530d8L, 0xf262004eL, 0x6c0695edL,
 35 |   0x1b01a57bL, 0x8208f4c1L, 0xf50fc457L, 0x65b0d9c6L, 0x12b7e950L,
 36 |   0x8bbeb8eaL, 0xfcb9887cL, 0x62dd1ddfL, 0x15da2d49L, 0x8cd37cf3L,
 37 |   0xfbd44c65L, 0x4db26158L, 0x3ab551ceL, 0xa3bc0074L, 0xd4bb30e2L,
 38 |   0x4adfa541L, 0x3dd895d7L, 0xa4d1c46dL, 0xd3d6f4fbL, 0x4369e96aL,
 39 |   0x346ed9fcL, 0xad678846L, 0xda60b8d0L, 0x44042d73L, 0x33031de5L,
 40 |   0xaa0a4c5fL, 0xdd0d7cc9L, 0x5005713cL, 0x270241aaL, 0xbe0b1010L,
 41 |   0xc90c2086L, 0x5768b525L, 0x206f85b3L, 0xb966d409L, 0xce61e49fL,
 42 |   0x5edef90eL, 0x29d9c998L, 0xb0d09822L, 0xc7d7a8b4L, 0x59b33d17L,
 43 |   0x2eb40d81L, 0xb7bd5c3bL, 0xc0ba6cadL, 0xedb88320L, 0x9abfb3b6L,
 44 |   0x03b6e20cL, 0x74b1d29aL, 0xead54739L, 0x9dd277afL, 0x04db2615L,
 45 |   0x73dc1683L, 0xe3630b12L, 0x94643b84L, 0x0d6d6a3eL, 0x7a6a5aa8L,
 46 |   0xe40ecf0bL, 0x9309ff9dL, 0x0a00ae27L, 0x7d079eb1L, 0xf00f9344L,
 47 |   0x8708a3d2L, 0x1e01f268L, 0x6906c2feL, 0xf762575dL, 0x806567cbL,
 48 |   0x196c3671L, 0x6e6b06e7L, 0xfed41b76L, 0x89d32be0L, 0x10da7a5aL,
 49 |   0x67dd4accL, 0xf9b9df6fL, 0x8ebeeff9L, 0x17b7be43L, 0x60b08ed5L,
 50 |   0xd6d6a3e8L, 0xa1d1937eL, 0x38d8c2c4L, 0x4fdff252L, 0xd1bb67f1L,
 51 |   0xa6bc5767L, 0x3fb506ddL, 0x48b2364bL, 0xd80d2bdaL, 0xaf0a1b4cL,
 52 |   0x36034af6L, 0x41047a60L, 0xdf60efc3L, 0xa867df55L, 0x316e8eefL,
 53 |   0x4669be79L, 0xcb61b38cL, 0xbc66831aL, 0x256fd2a0L, 0x5268e236L,
 54 |   0xcc0c7795L, 0xbb0b4703L, 0x220216b9L, 0x5505262fL, 0xc5ba3bbeL,
 55 |   0xb2bd0b28L, 0x2bb45a92L, 0x5cb36a04L, 0xc2d7ffa7L, 0xb5d0cf31L,
 56 |   0x2cd99e8bL, 0x5bdeae1dL, 0x9b64c2b0L, 0xec63f226L, 0x756aa39cL,
 57 |   0x026d930aL, 0x9c0906a9L, 0xeb0e363fL, 0x72076785L, 0x05005713L,
 58 |   0x95bf4a82L, 0xe2b87a14L, 0x7bb12baeL, 0x0cb61b38L, 0x92d28e9bL,
 59 |   0xe5d5be0dL, 0x7cdcefb7L, 0x0bdbdf21L, 0x86d3d2d4L, 0xf1d4e242L,
 60 |   0x68ddb3f8L, 0x1fda836eL, 0x81be16cdL, 0xf6b9265bL, 0x6fb077e1L,
 61 |   0x18b74777L, 0x88085ae6L, 0xff0f6a70L, 0x66063bcaL, 0x11010b5cL,
 62 |   0x8f659effL, 0xf862ae69L, 0x616bffd3L, 0x166ccf45L, 0xa00ae278L,
 63 |   0xd70dd2eeL, 0x4e048354L, 0x3903b3c2L, 0xa7672661L, 0xd06016f7L,
 64 |   0x4969474dL, 0x3e6e77dbL, 0xaed16a4aL, 0xd9d65adcL, 0x40df0b66L,
 65 |   0x37d83bf0L, 0xa9bcae53L, 0xdebb9ec5L, 0x47b2cf7fL, 0x30b5ffe9L,
 66 |   0xbdbdf21cL, 0xcabac28aL, 0x53b39330L, 0x24b4a3a6L, 0xbad03605L,
 67 |   0xcdd70693L, 0x54de5729L, 0x23d967bfL, 0xb3667a2eL, 0xc4614ab8L,
 68 |   0x5d681b02L, 0x2a6f2b94L, 0xb40bbe37L, 0xc30c8ea1L, 0x5a05df1bL,
 69 |   0x2d02ef8dL
 70 | };
 71 | 
 72 | /* ========================================================================= */
 73 | 
 74 | #define DO1(buf) crc = crc_table[((int)crc ^ (*buf++)) & 0xff] ^ (crc >> 8);
 75 | #define DO2(buf)  DO1(buf); DO1(buf);
 76 | #define DO4(buf)  DO2(buf); DO2(buf);
 77 | #define DO8(buf)  DO4(buf); DO4(buf);
 78 | 
 79 | /* ========================================================================= */
 80 | 
 81 | void crc32 ( const void * key, int len, uint32_t seed, void * out )
 82 | {
 83 |   uint8_t * buf = (uint8_t*)key;
 84 |   uint32_t crc = seed ^ 0xffffffffL;
 85 | 
 86 |   while (len >= 8)
 87 |   {
 88 |     DO8(buf);
 89 |     len -= 8;
 90 |   }
 91 | 
 92 |   while(len--)
 93 |   {
 94 |     DO1(buf);
 95 |   } 
 96 | 
 97 |   crc ^= 0xffffffffL;
 98 | 
 99 |   *(uint32_t*)out = crc;
100 | }
101 | 


--------------------------------------------------------------------------------
/SMHasher/poly1305/poly1305.c:
--------------------------------------------------------------------------------
  1 | /* 
  2 |  * Public Domain poly1305 from Andrew Moon
  3 |  * poly1305-donna-unrolled.c from https://github.com/floodyberry/poly1305-donna
  4 |  */
  5 | 
  6 | /* $OpenBSD: poly1305.c,v 1.3 2013/12/19 22:57:13 djm Exp $ */
  7 | 
  8 | #include <sys/types.h>
  9 | #include <stdint.h>
 10 | 
 11 | #include "poly1305.h"
 12 | 
 13 | #define mul32x32_64(a,b) ((uint64_t)(a) * (b))
 14 | 
 15 | #define U8TO32_LE(p) \
 16 | 	(((uint32_t)((p)[0])) | \
 17 | 	 ((uint32_t)((p)[1]) <<  8) | \
 18 | 	 ((uint32_t)((p)[2]) << 16) | \
 19 | 	 ((uint32_t)((p)[3]) << 24))
 20 | 
 21 | #define U32TO8_LE(p, v) \
 22 | 	do { \
 23 | 		(p)[0] = (uint8_t)((v)); \
 24 | 		(p)[1] = (uint8_t)((v) >>  8); \
 25 | 		(p)[2] = (uint8_t)((v) >> 16); \
 26 | 		(p)[3] = (uint8_t)((v) >> 24); \
 27 | 	} while (0)
 28 | 
 29 | void
 30 | poly1305_auth(unsigned char out[POLY1305_TAGLEN], const unsigned char *m, size_t inlen, const unsigned char key[POLY1305_KEYLEN]) {
 31 | 	uint32_t t0,t1,t2,t3;
 32 | 	uint32_t h0,h1,h2,h3,h4;
 33 | 	uint32_t r0,r1,r2,r3,r4;
 34 | 	uint32_t s1,s2,s3,s4;
 35 | 	uint32_t b, nb;
 36 | 	size_t j;
 37 | 	uint64_t t[5];
 38 | 	uint64_t f0,f1,f2,f3;
 39 | 	uint32_t g0,g1,g2,g3,g4;
 40 | 	uint64_t c;
 41 | 	unsigned char mp[16];
 42 | 
 43 | 	/* clamp key */
 44 | 	t0 = U8TO32_LE(key+0);
 45 | 	t1 = U8TO32_LE(key+4);
 46 | 	t2 = U8TO32_LE(key+8);
 47 | 	t3 = U8TO32_LE(key+12);
 48 | 
 49 | 	/* precompute multipliers */
 50 | 	r0 = t0 & 0x3ffffff; t0 >>= 26; t0 |= t1 << 6;
 51 | 	r1 = t0 & 0x3ffff03; t1 >>= 20; t1 |= t2 << 12;
 52 | 	r2 = t1 & 0x3ffc0ff; t2 >>= 14; t2 |= t3 << 18;
 53 | 	r3 = t2 & 0x3f03fff; t3 >>= 8;
 54 | 	r4 = t3 & 0x00fffff;
 55 | 
 56 | 	s1 = r1 * 5;
 57 | 	s2 = r2 * 5;
 58 | 	s3 = r3 * 5;
 59 | 	s4 = r4 * 5;
 60 | 
 61 | 	/* init state */
 62 | 	h0 = 0;
 63 | 	h1 = 0;
 64 | 	h2 = 0;
 65 | 	h3 = 0;
 66 | 	h4 = 0;
 67 | 
 68 | 	/* full blocks */
 69 | 	if (inlen < 16) goto poly1305_donna_atmost15bytes;
 70 | poly1305_donna_16bytes:
 71 | 	m += 16;
 72 | 	inlen -= 16;
 73 | 
 74 | 	t0 = U8TO32_LE(m-16);
 75 | 	t1 = U8TO32_LE(m-12);
 76 | 	t2 = U8TO32_LE(m-8);
 77 | 	t3 = U8TO32_LE(m-4);
 78 | 
 79 | 	h0 += t0 & 0x3ffffff;
 80 | 	h1 += ((((uint64_t)t1 << 32) | t0) >> 26) & 0x3ffffff;
 81 | 	h2 += ((((uint64_t)t2 << 32) | t1) >> 20) & 0x3ffffff;
 82 | 	h3 += ((((uint64_t)t3 << 32) | t2) >> 14) & 0x3ffffff;
 83 | 	h4 += (t3 >> 8) | (1 << 24);
 84 | 
 85 | 
 86 | poly1305_donna_mul:
 87 | 	t[0]  = mul32x32_64(h0,r0) + mul32x32_64(h1,s4) + mul32x32_64(h2,s3) + mul32x32_64(h3,s2) + mul32x32_64(h4,s1);
 88 | 	t[1]  = mul32x32_64(h0,r1) + mul32x32_64(h1,r0) + mul32x32_64(h2,s4) + mul32x32_64(h3,s3) + mul32x32_64(h4,s2);
 89 | 	t[2]  = mul32x32_64(h0,r2) + mul32x32_64(h1,r1) + mul32x32_64(h2,r0) + mul32x32_64(h3,s4) + mul32x32_64(h4,s3);
 90 | 	t[3]  = mul32x32_64(h0,r3) + mul32x32_64(h1,r2) + mul32x32_64(h2,r1) + mul32x32_64(h3,r0) + mul32x32_64(h4,s4);
 91 | 	t[4]  = mul32x32_64(h0,r4) + mul32x32_64(h1,r3) + mul32x32_64(h2,r2) + mul32x32_64(h3,r1) + mul32x32_64(h4,r0);
 92 | 
 93 | 	                h0 = (uint32_t)t[0] & 0x3ffffff; c =           (t[0] >> 26);
 94 | 	t[1] += c;      h1 = (uint32_t)t[1] & 0x3ffffff; b = (uint32_t)(t[1] >> 26);
 95 | 	t[2] += b;      h2 = (uint32_t)t[2] & 0x3ffffff; b = (uint32_t)(t[2] >> 26);
 96 | 	t[3] += b;      h3 = (uint32_t)t[3] & 0x3ffffff; b = (uint32_t)(t[3] >> 26);
 97 | 	t[4] += b;      h4 = (uint32_t)t[4] & 0x3ffffff; b = (uint32_t)(t[4] >> 26);
 98 | 	h0 += b * 5;
 99 | 
100 | 	if (inlen >= 16) goto poly1305_donna_16bytes;
101 | 
102 | 	/* final bytes */
103 | poly1305_donna_atmost15bytes:
104 | 	if (!inlen) goto poly1305_donna_finish;
105 | 
106 | 	for (j = 0; j < inlen; j++) mp[j] = m[j];
107 | 	mp[j++] = 1;
108 | 	for (; j < 16; j++)	mp[j] = 0;
109 | 	inlen = 0;
110 | 
111 | 	t0 = U8TO32_LE(mp+0);
112 | 	t1 = U8TO32_LE(mp+4);
113 | 	t2 = U8TO32_LE(mp+8);
114 | 	t3 = U8TO32_LE(mp+12);
115 | 
116 | 	h0 += t0 & 0x3ffffff;
117 | 	h1 += ((((uint64_t)t1 << 32) | t0) >> 26) & 0x3ffffff;
118 | 	h2 += ((((uint64_t)t2 << 32) | t1) >> 20) & 0x3ffffff;
119 | 	h3 += ((((uint64_t)t3 << 32) | t2) >> 14) & 0x3ffffff;
120 | 	h4 += (t3 >> 8);
121 | 
122 | 	goto poly1305_donna_mul;
123 | 
124 | poly1305_donna_finish:
125 | 	             b = h0 >> 26; h0 = h0 & 0x3ffffff;
126 | 	h1 +=     b; b = h1 >> 26; h1 = h1 & 0x3ffffff;
127 | 	h2 +=     b; b = h2 >> 26; h2 = h2 & 0x3ffffff;
128 | 	h3 +=     b; b = h3 >> 26; h3 = h3 & 0x3ffffff;
129 | 	h4 +=     b; b = h4 >> 26; h4 = h4 & 0x3ffffff;
130 | 	h0 += b * 5; b = h0 >> 26; h0 = h0 & 0x3ffffff;
131 | 	h1 +=     b;
132 | 
133 | 	g0 = h0 + 5; b = g0 >> 26; g0 &= 0x3ffffff;
134 | 	g1 = h1 + b; b = g1 >> 26; g1 &= 0x3ffffff;
135 | 	g2 = h2 + b; b = g2 >> 26; g2 &= 0x3ffffff;
136 | 	g3 = h3 + b; b = g3 >> 26; g3 &= 0x3ffffff;
137 | 	g4 = h4 + b - (1 << 26);
138 | 
139 | 	b = (g4 >> 31) - 1;
140 | 	nb = ~b;
141 | 	h0 = (h0 & nb) | (g0 & b);
142 | 	h1 = (h1 & nb) | (g1 & b);
143 | 	h2 = (h2 & nb) | (g2 & b);
144 | 	h3 = (h3 & nb) | (g3 & b);
145 | 	h4 = (h4 & nb) | (g4 & b);
146 | 
147 | 	f0 = ((h0      ) | (h1 << 26)) + (uint64_t)U8TO32_LE(&key[16]);
148 | 	f1 = ((h1 >>  6) | (h2 << 20)) + (uint64_t)U8TO32_LE(&key[20]);
149 | 	f2 = ((h2 >> 12) | (h3 << 14)) + (uint64_t)U8TO32_LE(&key[24]);
150 | 	f3 = ((h3 >> 18) | (h4 <<  8)) + (uint64_t)U8TO32_LE(&key[28]);
151 | 
152 | 	U32TO8_LE(&out[ 0], f0); f1 += (f0 >> 32);
153 | 	U32TO8_LE(&out[ 4], f1); f2 += (f1 >> 32);
154 | 	U32TO8_LE(&out[ 8], f2); f3 += (f2 >> 32);
155 | 	U32TO8_LE(&out[12], f3);
156 | }
157 | 


--------------------------------------------------------------------------------
/SMHasher/poly1305/poly1305.h:
--------------------------------------------------------------------------------
 1 | /* $OpenBSD: poly1305.h,v 1.4 2014/05/02 03:27:54 djm Exp $ */
 2 | 
 3 | /*
 4 |  * Public Domain poly1305 from Andrew Moon
 5 |  * poly1305-donna-unrolled.c from https://github.com/floodyberry/poly1305-donna
 6 |  */
 7 | 
 8 | #ifndef POLY1305_H
 9 | #define POLY1305_H
10 | 
11 | #define POLY1305_KEYLEN		32
12 | #define POLY1305_TAGLEN		16
13 | 
14 | void poly1305_auth(unsigned char out[POLY1305_TAGLEN], const unsigned char *m, size_t inlen, const unsigned char key[POLY1305_KEYLEN]);
15 | 
16 | #endif	/* POLY1305_H */
17 | 


--------------------------------------------------------------------------------
/SMHasher/sha1.cpp:
--------------------------------------------------------------------------------
 1 | // Test 32/64/128-bit parts of SHA1
 2 | 
 3 | #include "Hashes.h"
 4 | #include "SHA1/sha1.cpp"
 5 | 
 6 | void sha1 ( const void * key, int len, uint32_t seed, void * out, int start, int count )
 7 | {
 8 |   SHA1_CTX context;
 9 | 
10 |   uint8_t digest[20], seed8[] = {seed, seed>>8, seed>>16, seed>>24};
11 | 
12 |   SHA1_Init(&context);
13 |   SHA1_Update(&context, seed8, 4);
14 |   SHA1_Update(&context, (uint8_t*)key, len);
15 |   SHA1_Final(&context, digest);
16 |  
17 |   memcpy(out, &digest[start*4], count*4);
18 | }
19 | 
20 | void sha1_32  ( const void * key, int len, uint32_t seed, void * out )   {sha1(key,len,seed,out,0,1);}
21 | void sha1_32a ( const void * key, int len, uint32_t seed, void * out )   {sha1(key,len,seed,out,1,1);}
22 | void sha1_32b ( const void * key, int len, uint32_t seed, void * out )   {sha1(key,len,seed,out,2,1);}
23 | void sha1_32c ( const void * key, int len, uint32_t seed, void * out )   {sha1(key,len,seed,out,3,1);}
24 | void sha1_64  ( const void * key, int len, uint32_t seed, void * out )   {sha1(key,len,seed,out,0,2);}
25 | void sha1_64a ( const void * key, int len, uint32_t seed, void * out )   {sha1(key,len,seed,out,2,2);}
26 | void sha1_128 ( const void * key, int len, uint32_t seed, void * out )   {sha1(key,len,seed,out,0,4);}
27 | 


--------------------------------------------------------------------------------
/asm-listings/make-listings.cmd:
--------------------------------------------------------------------------------
1 | gcc -O3 -funroll-loops -s -static -c ../farsh.c -m32               -Wa,-adhlns=gcc-x86.lst
2 | gcc -O3 -funroll-loops -s -static -c ../farsh.c -m32 -msse2 -DSSE2 -Wa,-adhlns=gcc-x86-sse2.lst
3 | gcc -O3 -funroll-loops -s -static -c ../farsh.c -m32 -mavx2 -DAVX2 -Wa,-adhlns=gcc-x86-avx2.lst
4 | gcc -O3 -funroll-loops -s -static -c ../farsh.c -m64               -Wa,-adhlns=gcc-x64-nosimd.lst
5 | gcc -O3 -funroll-loops -s -static -c ../farsh.c -m64 -msse2 -DSSE2 -Wa,-adhlns=gcc-x64.lst
6 | gcc -O3 -funroll-loops -s -static -c ../farsh.c -m64 -mavx2 -DAVX2 -Wa,-adhlns=gcc-x64-avx2.lst
7 | 


--------------------------------------------------------------------------------
/benchmark/CpuID.h:
--------------------------------------------------------------------------------
  1 | // Single-file micro-library implementing CPUID-based CPU feature test
  2 | // as well as providing CPU name and highest SIMD version strings.
  3 | // You can find usage examples at the end of file.
  4 | // (c) Bulat Ziganshin & Unknown author
  5 | // Placed into public domain
  6 | 
  7 | #pragma once
  8 | 
  9 | #include <stdint.h>
 10 | #include <memory.h>
 11 | #include <string.h>
 12 | #if defined(_MSC_VER)
 13 | # include <intrin.h>
 14 | #endif
 15 | 
 16 | enum CPUIDInfoType
 17 | {
 18 |     RequestLastID = 0,  FeatureSupport = 1,  NewestFeatureSupport = 7,
 19 |     RequestLastExtendedID = 0x80000000,  BrandNameFirst = 0x80000002,  BrandNameLast = 0x80000004
 20 | };
 21 | 
 22 | struct CpuidFeatures
 23 | {
 24 |     union
 25 |     {
 26 |         uint32_t CPUInfo[5*4];
 27 |         struct
 28 |         {
 29 |             // FeatureSupport: EAX
 30 |             unsigned SteppingID     :4;//4      0-3
 31 |             unsigned Model          :4;//8      4-7
 32 |             unsigned Family         :4;//12     8-11
 33 |             unsigned TypeItl        :2;//14     12-13
 34 |             unsigned Reserved11     :2;//16     14-15
 35 |             unsigned ExtendedModel  :4;//20     16-19
 36 |             unsigned ExtendedFamily :8;//28     20-27
 37 |             unsigned Reserved12     :3;//32     28-31
 38 |             // FeatureSupport: EBX
 39 |             unsigned BrandIndex     :8;//8      0-7
 40 |             unsigned QwordCFLUSH    :8;//16     8-15
 41 |             unsigned LogicProcCount :8;//24     16-23
 42 |             unsigned ApicID         :8;//32     24-31
 43 |             // FeatureSupport: ECX
 44 |             unsigned SSE3           :1;//1      0
 45 |             unsigned PCLMULQDQ      :1;//2      1
 46 |             unsigned DTES64         :1;//3      2
 47 |             unsigned MWAIT          :1;//4      3
 48 |             unsigned CPLDebug       :1;//5      4
 49 |             unsigned VMExt          :1;//6      5
 50 |             unsigned SafeModeExt    :1;//7      6
 51 |             unsigned IntelSpeedStep :1;//8      7
 52 |             unsigned ThermalMonitor :1;//9      8
 53 |             unsigned SupplSSE3      :1;//10     9
 54 |             unsigned L1CtxID        :1;//11     10
 55 |             unsigned SDBG           :1;//12     11
 56 |             unsigned FMA3           :1;//13     12
 57 |             unsigned CMPXCHG16B     :1;//14     13
 58 |             unsigned xTPR           :1;//15     14
 59 |             unsigned MSRDebug       :1;//16     15
 60 |             unsigned Reserved32     :1;//17     16
 61 |             unsigned ProcContextID  :1;//18     17
 62 |             unsigned DirectCacheAcc :1;//19     18
 63 |             unsigned SSE41          :1;//20     19
 64 |             unsigned SSE42          :1;//21     20
 65 |             unsigned x2APIC         :1;//22     21
 66 |             unsigned MOVBE          :1;//23     22
 67 |             unsigned POPCNT         :1;//24     23
 68 |             unsigned TSC_DEADLINE   :1;//25     24
 69 |             unsigned AES_NI         :1;//26     25
 70 |             unsigned XSAVE          :1;//27     26
 71 |             unsigned OSXSAVE        :1;//28     27
 72 |             unsigned AVX            :1;//29     28
 73 |             unsigned F16C           :1;//30     29
 74 |             unsigned RDRND          :1;//31     30
 75 |             unsigned HYPERVISOR     :1;//32     31
 76 |             // FeatureSupport: EDX
 77 |             unsigned FPU            :1;//1      0
 78 |             unsigned VME            :1;//2      1
 79 |             unsigned DE             :1;//3      2
 80 |             unsigned PSE            :1;//4      3
 81 |             unsigned TSC            :1;//5      4
 82 |             unsigned MSR            :1;//6      5
 83 |             unsigned PAE            :1;//7      6
 84 |             unsigned MCE            :1;//8      7
 85 |             unsigned Cx8            :1;//9      8
 86 |             unsigned APIC           :1;//10     9
 87 |             unsigned Reserved41     :1;//11     10
 88 |             unsigned SEP            :1;//12     11
 89 |             unsigned MTTR           :1;//13     12
 90 |             unsigned PGE            :1;//14     13
 91 |             unsigned MCA            :1;//15     14
 92 |             unsigned CMOV           :1;//16     15
 93 |             unsigned PAT            :1;//17     16
 94 |             unsigned PSE36          :1;//18     17
 95 |             unsigned PSN            :1;//19     18
 96 |             unsigned CFLUSH         :1;//20     19
 97 |             unsigned Reserved42     :1;//21     20
 98 |             unsigned DS             :1;//22     21
 99 |             unsigned ACPI           :1;//23     22
100 |             unsigned MMX            :1;//24     23
101 |             unsigned FXSR           :1;//25     24
102 |             unsigned SSE            :1;//26     25
103 |             unsigned SSE2           :1;//27     26
104 |             unsigned SS             :1;//28     27
105 |             unsigned HTT            :1;//29     28
106 |             unsigned TM             :1;//30     29
107 |             unsigned IA64           :1;//31     30
108 |             unsigned PBE            :1;//32     31
109 | 
110 |             // NewestFeatureSupport: EAX
111 |             unsigned Reserved51     :32;//32    0-31
112 |             // NewestFeatureSupport: EBX
113 |             unsigned FSGSBASE       :1;//1      0
114 |             unsigned TSC_ADJUST     :1;//2      1
115 |             unsigned SGX            :1;//3      2
116 |             unsigned BMI1           :1;//4      3
117 |             unsigned HLE            :1;//5      4
118 |             unsigned AVX2           :1;//6      5
119 |             unsigned Reserved61     :1;//7      6
120 |             unsigned SMEP           :1;//8      7
121 |             unsigned BMI2           :1;//9      8
122 |             unsigned ERMS           :1;//10     9
123 |             unsigned INVPCID        :1;//11     10
124 |             unsigned RTM            :1;//12     11
125 |             unsigned PQM            :1;//13     12
126 |             unsigned FPU_CS_DS_depr :1;//14     13
127 |             unsigned MPX            :1;//15     14
128 |             unsigned PQE            :1;//16     15
129 |             unsigned AVX512F        :1;//17     16
130 |             unsigned AVX512DQ       :1;//18     17
131 |             unsigned RDSEED         :1;//19     18
132 |             unsigned ADX            :1;//20     19
133 |             unsigned SMAP           :1;//21     20
134 |             unsigned AVX512IFMA     :1;//22     21
135 |             unsigned PCOMMIT        :1;//23     22
136 |             unsigned CLFLUSHOPT     :1;//24     23
137 |             unsigned CLWB           :1;//25     24
138 |             unsigned ProcessorTrace :1;//26     25
139 |             unsigned AVX512PF       :1;//27     26
140 |             unsigned AVX512ER       :1;//28     27
141 |             unsigned AVX512CD       :1;//29     28
142 |             unsigned SHA            :1;//30     29
143 |             unsigned AVX512BW       :1;//31     30
144 |             unsigned AVX512VL       :1;//32     31
145 |             // NewestFeatureSupport: ECX
146 |             unsigned PREFETCHWT1    :1;//1      0
147 |             unsigned AVX512VBMI     :1;//2      1
148 |             unsigned Reserved71     :30;//3     2-31
149 |             // NewestFeatureSupport: EDX
150 |             unsigned Reserved81     :32;//32    0-31
151 | 
152 |             char IDString[3*4*4];
153 |             char HighestSupportedSimdString[48];  // round up the entire structure size to 128 bytes
154 |         };
155 |     };
156 | };
157 | 
158 | 
159 | inline void run_cpuid (uint32_t eax /*function_id*/, uint32_t ecx /*subfunction_id*/, uint32_t* abcd /*results*/)
160 | {
161 | #if defined(_MSC_VER)
162 |     __cpuidex(abcd, eax, ecx);
163 | #else
164 |     uint32_t ebx, edx;
165 | # if defined( __i386__ ) && defined ( __PIC__ )
166 |      /* in case of PIC under 32-bit EBX cannot be clobbered */
167 |     __asm__ ( "movl %%ebx, %%edi \n\t cpuid \n\t xchgl %%ebx, %%edi" : "=D" (ebx),
168 | # else
169 |     __asm__ ( "cpuid" : "+b" (ebx),
170 | # endif
171 |               "+a" (eax), "+c" (ecx), "=d" (edx) );
172 |     abcd[0] = eax; abcd[1] = ebx; abcd[2] = ecx; abcd[3] = edx;
173 | #endif
174 | }
175 | 
176 | 
177 | inline void GetCpuidFeatures (struct CpuidFeatures *featureStruct)
178 | {
179 |     uint32_t cpuInfo[4] = {0};  uint32_t i;
180 |     memset (featureStruct, 0, sizeof(struct CpuidFeatures));
181 | 
182 |     // Calling run_cpuid with 0 as the function_id argument
183 |     // gets the number of the highest valid function ID.
184 |     run_cpuid (RequestLastID, 0, cpuInfo);
185 | 
186 |     // Request bit fieds describing features supported by the CPU
187 |     if (cpuInfo[0] >= FeatureSupport)
188 |         run_cpuid (FeatureSupport, 0, featureStruct->CPUInfo);
189 | 
190 |     if (cpuInfo[0] >= NewestFeatureSupport)
191 |         run_cpuid (NewestFeatureSupport, 0, featureStruct->CPUInfo + 4);
192 | 
193 |     // Compute HighestSupportedSimdString from bit fields
194 |     strcpy (featureStruct->HighestSupportedSimdString,
195 |                 featureStruct->AVX512VBMI?"AVX-512 VBMI" :
196 |                 featureStruct->AVX512BW?  "AVX-512 BW" :
197 |                 featureStruct->AVX512DQ?  "AVX-512 DQ" :
198 |                 featureStruct->AVX512F?   "AVX-512F" :
199 |                 featureStruct->AVX2?      "AVX2" :
200 |                 featureStruct->AVX?       "AVX" :
201 |                 featureStruct->AES_NI?    "AES-NI" :
202 |                 featureStruct->SSE42?     "SSE 4.2" :
203 |                 featureStruct->SSE41?     "SSE 4.1" :
204 |                 featureStruct->SupplSSE3? "Supplemental SSE3" :
205 |                 featureStruct->SSE3?      "SSE3" :
206 |                 featureStruct->SSE2?      "SSE2" :
207 |                 featureStruct->SSE?       "SSE"  :
208 |                 featureStruct->MMX?       "MMX"  : "no MMX");
209 | 
210 | 
211 |     // Calling __cpuid with 0x80000000 as the function_id argument
212 |     // gets the number of the highest valid extended ID.
213 |     run_cpuid (RequestLastExtendedID, 0, cpuInfo);
214 | 
215 |     // Interpret CPU brand string if reported
216 |     if (cpuInfo[0] >= BrandNameLast) {
217 |         for (i=BrandNameFirst; i<=BrandNameLast; i++)
218 |             run_cpuid (i, 0, featureStruct->CPUInfo + 4*(i+2-BrandNameFirst));
219 |     } else {
220 |         strcpy (featureStruct->IDString, "Ancient CPU");
221 |     }
222 | }
223 | 
224 | 
225 | #ifdef CPUID_MAIN
226 | // Compile with "gcc -x c CPUID.h -DCPUID_MAIN -s -static -oCpuID"
227 | #include <stdio.h>
228 | int main()
229 | {
230 |     struct CpuidFeatures features;
231 |     GetCpuidFeatures(&features);
232 | 
233 |     // Display CPU name and highest supported SIMD level
234 |     printf("%s: %s\n", features.IDString, features.HighestSupportedSimdString);
235 | 
236 |     // Another possible usage:
237 |     // if (features.AVX2)  run_AVX2_specific_code();
238 | }
239 | #endif
240 | 


--------------------------------------------------------------------------------
/benchmark/compile-CpuID.cmd:
--------------------------------------------------------------------------------
1 | gcc -x c CpuID.h -DCPUID_MAIN -s -static -oCpuID
2 | 


--------------------------------------------------------------------------------
/benchmark/compile-other.cmd:
--------------------------------------------------------------------------------
 1 | @echo off
 2 | set options=%* main.cpp
 3 | set options_ms=-MP -Gy -GL -GR- -nologo %options% user32.lib shell32.lib ole32.lib advapi32.lib -link -LARGEADDRESSAWARE
 4 | set options_ms_cl=-O2 -GL -Gy -EHsc %options_ms%
 5 | set options_ms_icl=-w -O3 -Qipo -Qunroll64 %options_ms%
 6 | set options_ms_x86=-MACHINE:x86 -SUBSYSTEM:CONSOLE,5.01
 7 | set options_ms_x64=-MACHINE:x64 -SUBSYSTEM:CONSOLE,5.02
 8 | set options_gcc4=-O3 -msse2 -funroll-loops -std=c++11 -s -static -lstdc++ %options%
 9 | 
10 | gcc -m32 %options_gcc4% -ofarsh32.exe
11 | gcc -m64 %options_gcc4% -ofarsh64.exe
12 | 
13 | call "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" x86
14 | cl -Fefarsh32m.exe -arch:SSE2 %options_ms_cl% %options_ms_x86%
15 | 
16 | call "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" x86_amd64
17 | cl -Fefarsh64m.exe %options_ms_cl% %options_ms_x64%
18 | 
19 | call "C:\Program Files (x86)\Intel\Composer XE 2013 SP1\bin\ipsxe-comp-vars.bat" ia32
20 | icl -Fefarsh32i.exe -arch:SSE2 %options_ms_icl% %options_ms_x86%
21 | iccpatch.exe farsh32i.exe >nul
22 | 
23 | call "C:\Program Files (x86)\Intel\Composer XE 2013 SP1\bin\ipsxe-comp-vars.bat" intel64
24 | icl -Fefarsh64i.exe %options_ms_icl% %options_ms_x64%
25 | iccpatch.exe farsh64i.exe >nul
26 | 
27 | del *.exe.bak *.obj *.res >nul 2>nul
28 | 


--------------------------------------------------------------------------------
/benchmark/compile.cmd:
--------------------------------------------------------------------------------
 1 | gcc -O3 -funroll-loops -s -static -m32 -march=pentium3     main.cpp -oaligned-farsh-x86         -DFARSH_ALIGNED_INPUT
 2 | gcc -O3 -funroll-loops -s -static -m32 -msse2 -DFARSH_SSE2 main.cpp -oaligned-farsh-x86-sse2    -DFARSH_ALIGNED_INPUT
 3 | gcc -O3 -funroll-loops -s -static -m32 -mavx2 -DFARSH_AVX2 main.cpp -oaligned-farsh-x86-avx2    -DFARSH_ALIGNED_INPUT
 4 | gcc -O3 -funroll-loops -s -static -m64                     main.cpp -oaligned-farsh-x64-nosimd  -DFARSH_ALIGNED_INPUT
 5 | gcc -O3 -funroll-loops -s -static -m64 -msse2 -DFARSH_SSE2 main.cpp -oaligned-farsh-x64         -DFARSH_ALIGNED_INPUT
 6 | gcc -O3 -funroll-loops -s -static -m64 -mavx2 -DFARSH_AVX2 main.cpp -oaligned-farsh-x64-avx2    -DFARSH_ALIGNED_INPUT
 7 | gcc -O3 -funroll-loops -s -static -m32 -march=pentium3     main.cpp -ofarsh-x86
 8 | gcc -O3 -funroll-loops -s -static -m32 -msse2 -DFARSH_SSE2 main.cpp -ofarsh-x86-sse2
 9 | gcc -O3 -funroll-loops -s -static -m32 -mavx2 -DFARSH_AVX2 main.cpp -ofarsh-x86-avx2
10 | gcc -O3 -funroll-loops -s -static -m64                     main.cpp -ofarsh-x64-nosimd
11 | gcc -O3 -funroll-loops -s -static -m64 -msse2 -DFARSH_SSE2 main.cpp -ofarsh-x64
12 | gcc -O3 -funroll-loops -s -static -m64 -mavx2 -DFARSH_AVX2 main.cpp -ofarsh-x64-avx2
13 | 


--------------------------------------------------------------------------------
/benchmark/main.cpp:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <stdint.h>
  3 | #include "timer.h"
  4 | #if defined(FARSH_AVX2) || defined(FARSH_SSE2)
  5 | #include "CpuID.h"
  6 | #endif
  7 | 
  8 | #include "../farsh.c"
  9 | 
 10 | #if __GNUC__
 11 | #define ALIGN(n)      __attribute__ ((aligned(n)))
 12 | #elif _MSC_VER
 13 | #define ALIGN(n)      __declspec(align(n))
 14 | #else
 15 | #define ALIGN(n)
 16 | #endif
 17 | 
 18 | int main (int argc, char **argv)
 19 | {
 20 |     bool print_table  =  (argc > 1);    // if any cmdline parameter was given
 21 |     bool x64  =  (sizeof(void*) == 8);  // check for 64-bit platform
 22 | 
 23 | #ifdef FARSH_AVX2
 24 |     char simdext[] = "-avx2";
 25 |     struct CpuidFeatures features;  GetCpuidFeatures(&features);
 26 |     if (! features.AVX2)  {if (!print_table) printf("AVX2 not found!\n"); return 1;}
 27 | #elif defined(FARSH_SSE2)
 28 |     const char *simdext = x64? "":"-sse2";
 29 |     struct CpuidFeatures features;  GetCpuidFeatures(&features);
 30 |     if (! features.SSE2)  {if (!print_table) printf("SSE2 not found!\n"); return 1;}
 31 | #else
 32 |     const char *simdext = x64? "-nosimd":"";
 33 | #endif
 34 | 
 35 | #ifdef FARSH_ALIGNED_INPUT
 36 |     bool ALIGNED_INPUT = true;
 37 | #else
 38 |     bool ALIGNED_INPUT = false;
 39 | #endif
 40 | 
 41 |     // Choose the display format for results
 42 |     int format = argc==1? 0 :
 43 |                  strcmp(argv[1],"1")==0? 1 :
 44 |                  strcmp(argv[1],"2")==0? 2 : -1;
 45 |     if (format <= 0) {
 46 |         printf("FARSH 0.2 Benchmark. See https://github.com/Bulat-Ziganshin/FARSH\n"
 47 |                "  Usage: farsh [1|2] - choose display format\n");
 48 |         if (format < 0)  return 3;
 49 |     }
 50 | 
 51 |     char progname[100];
 52 |     sprintf (progname, "%sfarsh-%s%s", ALIGNED_INPUT? "aligned-":"",
 53 |                                        x64? "x64":"x86",
 54 |                                        simdext);
 55 | 
 56 |     // CHECK THE ZEROES HASHING
 57 |     const size_t ZEROES = 64*1024;
 58 |     ALIGN(64) static char zero[ZEROES] = {0};
 59 |     for (int i=0; i<=ZEROES; i++)
 60 |     {
 61 |         //uint32_t h = farsh (zero, i);
 62 |         //printf("%5d %08x\n", i, h);
 63 |         //printf("%4d %08x %08x %08x %08x :: ", minbytes, (UINT)(h), (UINT)(h>>32), sum1, sum2);
 64 |     }
 65 | 
 66 | 
 67 |     // PREPARE TEST DATA. DATASIZE+FARSH_BASE_KEY_SIZE should be less than the L1 cache size, otherwise speed may be limited by memory reads
 68 |     const size_t DATASIZE = 12*1024;
 69 |     ALIGN(64) static char data_array[DATASIZE+1];
 70 |     char *data  =  ALIGNED_INPUT? data_array : data_array + 1;
 71 |     for (int i=0; i<DATASIZE; i++)
 72 |         data[i] = char((123456791u*i) >> ((i%16)+8));
 73 | 
 74 | 
 75 | #ifndef FARSH_ALIGNED_INPUT
 76 |     // CHECK FOR POSSIBLE DATA ALIGNMENT PROBLEMS
 77 |     for (int i=0; i<=64; i++)
 78 |     {
 79 |         uint32_t h = farsh (data+i, DATASIZE+1-i, 0);
 80 |         if (h==42)  break;   // anti-optimization trick
 81 | 
 82 |         char out[32*4];
 83 |         for (int j=1; j<=32; j++)
 84 |             farsh_n (data+i, DATASIZE+1-i, 0, j, 0, out);
 85 |     }
 86 | #endif
 87 | 
 88 | 
 89 |     // BENCHMARK
 90 |     const uint64_t DATASET = uint64_t(100)<<30;
 91 |     if (format > 0)   printf("%-24s  |", progname);
 92 |     else              printf("Hashing %d GiB:", int(DATASET>>30));
 93 |     const int EXTRA_LOOPS = (100<<20) / DATASIZE;   // These extra loops are required to enable the SIMD engine and switch CPU core to the maximum frequency
 94 |     Timer t;
 95 |     uint32_t h = 0;
 96 | 
 97 | 
 98 |     for (int i=0; i < EXTRA_LOOPS+DATASET/DATASIZE; i++)
 99 |     {
100 |         if (i == EXTRA_LOOPS)
101 |             t.Start();
102 | 
103 |         h = farsh (data, DATASIZE, h);
104 | 
105 |         if (i == 0  &&  h != 0xd300ddd8) {   // check hash correctness
106 |             printf("\nWrong hash value: 0x%08X !!!\n", h);
107 |             return 2;
108 |         }
109 |     }
110 |     t.Stop();  double speed = DATASET / t.Elapsed();
111 |     if (print_table)  printf("%8.3lf GB/s =%7.3lf GiB/s", speed/1e9, speed/(1<<30));
112 |     else              printf(" %.3lf milliseconds =%7.3lf GB/s =%7.3lf GiB/s\n", t.Elapsed()*1000, speed/1e9, speed/(1<<30));
113 |     double t1 = t.Elapsed();
114 | 
115 | 
116 |     const uint32_t *keys = FARSH_KEYS;
117 |     if (t.Elapsed() == 1e42)   data++, keys++;   // anti-optimization trick
118 | 
119 |     if (format==0)  printf("Internal loop:  ");
120 |     t.Start();
121 |     for (int i=0; i < DATASET/FARSH_BASE_KEY_SIZE; i++)
122 |     {
123 |         uint64_t h = farsh_full_block ((uint32_t*)data, keys);
124 |         if (h==42)  data[0] = i;    // anti-optimization trick
125 |     }
126 |     t.Stop();  speed = DATASET / t.Elapsed();
127 |     if (print_table)  printf("  |%8.3lf GB/s =%7.3lf GiB/s", speed/1e9, speed/(1<<30));
128 |     else              printf(" %.3lf milliseconds =%7.3lf GB/s =%7.3lf GiB/s\n", t.Elapsed()*1000, speed/1e9, speed/(1<<30));
129 | 
130 |     t1 -= t.Elapsed();
131 |     speed = DATASET / t1;
132 |     if      (format==2)  printf("  |%9.3lf GB/s =%8.3lf GiB/s", speed/1e9, speed/(1<<30));
133 |     else if (format==0)  printf("External loop:   %.3lf milliseconds = %.3lf GB/s = %.3lf GiB/s", t1*1000, speed/1e9, speed/(1<<30));
134 |     printf("\n");
135 | 
136 |     return 0;
137 | }
138 | 


--------------------------------------------------------------------------------
/benchmark/runme.cmd:
--------------------------------------------------------------------------------
1 | for %%e in (*.exe) do @start /b /wait  /realtime %%e 1
2 | 


--------------------------------------------------------------------------------
/benchmark/timer.h:
--------------------------------------------------------------------------------
 1 | /* Allows to measure the time required to execute XXX() in the following way:
 2 | 
 3 |     Timer t;
 4 |     t.Start();
 5 |     XXX();
 6 |     t.Stop();
 7 |     double seconds = t.Elapsed();
 8 | */
 9 | 
10 | #pragma once
11 | 
12 | 
13 | #if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
14 | 
15 | #include <windows.h>
16 | 
17 | struct Timer
18 | {
19 |     Timer()
20 |     {
21 |         // Initialize the resolution of the timer
22 |         if (!QueryPerformanceFrequency(&m_freq))
23 |         {
24 |             printf("QueryPerformanceFrequency failed!\n");
25 |         }
26 | 
27 |         // Calculate the overhead of the timer in ticks
28 |         QueryPerformanceCounter(&m_start);
29 |         QueryPerformanceCounter(&m_stop);
30 |         m_overhead = m_stop.QuadPart - m_start.QuadPart;
31 |     }
32 | 
33 |     void Start()
34 |     {
35 |         QueryPerformanceCounter(&m_start);
36 |     }
37 | 
38 |     void Stop()
39 |     {
40 |         QueryPerformanceCounter(&m_stop);
41 |     }
42 | 
43 |     // Returns elapsed time in seconds
44 |     double Elapsed()
45 |     {
46 |         return (m_stop.QuadPart - m_start.QuadPart - m_overhead) / double(m_freq.QuadPart);
47 |     }
48 | 
49 | private:
50 | 
51 |     LARGE_INTEGER m_start;
52 |     LARGE_INTEGER m_stop;
53 |     LARGE_INTEGER m_freq;
54 |     LONGLONG m_overhead;
55 | };
56 | 
57 | 
58 | #else // this should handle any Unixes
59 | 
60 | #include <sys/time.h>
61 | 
62 | struct Timer
63 | {
64 |     Timer()
65 |     {
66 |         // Calculate the timer overhead
67 |         overhead = 0;
68 |         Start();
69 |         Stop();
70 |         overhead = Elapsed();
71 |     }
72 | 
73 |     void Start()
74 |     {
75 |         gettimeofday (&timerStart, NULL);
76 |     }
77 | 
78 |     void Stop()
79 |     {
80 |         gettimeofday (&timerStop, NULL);
81 |     }
82 | 
83 |     // Returns elapsed time in seconds
84 |     double Elapsed()
85 |     {
86 |         struct timeval timerElapsed;
87 |         timersub (&timerStop, &timerStart, &timerElapsed);
88 |         return (timerElapsed.tv_sec + timerElapsed.tv_usec/1e6 - overhead);
89 |     }
90 | 
91 | private:
92 | 
93 |     struct timeval timerStart, timerStop;
94 |     double overhead;
95 | };
96 | 
97 | #endif
98 | 


--------------------------------------------------------------------------------
/farsh.c:
--------------------------------------------------------------------------------
  1 | #include "farsh.h"
  2 | 
  3 | #include <stddef.h>   /* for size_t */
  4 | #include <stdint.h>   /* for uint32_t & uint64_t */
  5 | #include <stdlib.h>   /* for abort() */
  6 | #include <memory.h>   /* for memcpy() */
  7 | 
  8 | #if __GNUC__
  9 | #include <x86intrin.h>
 10 | #define ALIGN(n)      __attribute__ ((aligned(n)))
 11 | #elif _MSC_VER
 12 | #include <intrin.h>
 13 | #define ALIGN(n)      __declspec(align(n))
 14 | #else
 15 | #define ALIGN(n)
 16 | #endif
 17 | 
 18 | #define STRIPE          FARSH_BASE_KEY_SIZE
 19 | #define STRIPE_ELEMENTS (STRIPE/sizeof(uint32_t))  /* should be power of 2 due to use of 'x % STRIPE_ELEMENTS' below */
 20 | #define EXTRA_ELEMENTS  (((FARSH_MAX_HASHES-1) * FARSH_EXTRA_KEY_SIZE) / sizeof(uint32_t))
 21 | 
 22 | ALIGN(64) static const uint32_t FARSH_KEYS [STRIPE_ELEMENTS + EXTRA_ELEMENTS] = {  /* STRIPE bytes of key material plus extra keys for hashes up to 1024 bits long */
 23 |     0xb8fe6c39,0x23a44bbe,0x7c01812c,0xf721ad1c,0xded46de9,0x839097db,0x7240a4a4,0xb7b3671f,0xcb79e64e,0xccc0e578,0x825ad07d,0xccff7221,0xb8084674,0xf743248e,0xe03590e6,0x813a264c,0x3c2852bb,0x91c300cb,0x88d0658b,0x1b532ea3,0x71644897,0xa20df94e,0x3819ef46,0xa9deacd8,0xa8fa763f,0xe39c343f,0xf9dcbbc7,0xc70b4f1d,0x8a51e04b,0xcdb45931,0xc89f7ec9,0xd9787364,0x4f6a0752,0xa79b079c,0x8fc49499,0x8ec9b7a9,0x33c92249,0x4eb6404f,0xfb2afb4e,0xa4814255,0x2f0e1b98,0xace93b24,0x188850cd,0x6c5c74a7,0x66fa4404,0xeac5ac83,0x34d3ebc3,0xc581a0ff,0xfa1363eb,0x170ddd51,0xb7f0da49,0xd3165526,0x29d4689e,0x2b16be58,0x7d47a1fc,0x8ff8b8d1,0x7ad031ce,0x45cb3a8f,0x95160428,0xafd7fbca,0xbb4b407e,0x995274a4,0xeb9a2d93,0x3be78908,0xed475f6c,0x919cd8f2,0xd3861e5a,0x6e31390c,0xfe6a3a49,0xdcad0914,0x06508beb,0xa88399f3,0xb058112f,0xe8b0fa79,0x29b4da06,0xedc253fb,0xc3e96dad,0x6e372b83,0x4f78b153,0xfffa6e86,0x21beeeec,0x01caea02,0x1267e50d,0x11e6092f,0xe819d298,0x832f80dd,0x0c4e2477,0xbc7886eb,0x01506637,0x8ba89668,0x6d11e7a0,0xfc12fd15,0x86a54c19,0x593ce3dd,0xd2b13fe5,0x8e772b53,0xae4a60cc,0x647a3b1b,0x547786e0,0x3ec4378e,0x8d7acf89,0xca36f947,0x0e89d5ef,0xaada6a3c,0x6da4a109,0x9ac6e11c,0x686691ef,0xa357bd2b,0xd16f1b9a,0x38c70303,0x7d4622b3,0x2968fa8f,0x8ca5bcb9,0xfcd61005,0x228b5e96,0x2c9dcc19,0x57cf243c,0x3c53f9c1,0x0cc7952c,0x686de4f0,0x93a747b5,0x4e87a510,0x975e91ae,0x4c10b98e,0x8a7f068c,0x346b19ab,0x353ca625,0xf20a50e0,0xce9921f6,0xdf66e014,0x0a11ef4b,0x8bc84ddf,0x84d25d22,0xc823936d,0x94741ec3,0x88278a60,0xb8649331,0x7a707a10,0x7292cad6,0xa7c644c2,0xbd156bfa,0x646c9578,0xb7f4dfd5,0x9f8277a7,0x7013924e,0xad674cc3,0x2cae9d05,0x912a9a22,0xf67c53fa,0x8d7e22a9,0x59ae372b,0x850199f3,0x63a2102c,0xd6ff1261,0x56738ee1,0xaa95145b,0xfdd12832,0x5b684deb,0x0784de94,0xaa62390e,0xbb7ccf19,0x0fefd572,0x565b41ca,0x2206d202,0x2d608479,0x4c0fcd3d,0xd36d3be3,0x155a9a65,0x10f9e732,0xac9b0f1e,0x1f72a03b,0xea9440ae,0x5b674b4f,0x31a827d1,0xecca954f,0x3d2cd61e,0x768d3da4,0x93745ac1,0x1d5d58cb,0x4b86f3b6,0x2aba923a,0x0e65814c,0x8ae063d9,0xcd6969b0,0x36641585,0x742af59d,0x613a1316,0x338ea471,0x47861af3,0x30479dc3,0x1270a481,0x08771069,0xe3c4f0d2,0x0229874c,0x5a8a3bc1,0xe30d9733,0xd05be5a2,0xe2af31ba,0x222049f9,0x9f923b6a,0x033f64ec,0xe528b62b,0x8201efbd,0x2107d877,0xd8312ef1,0xa5679f99,0x1730b51b,0x752616d2,0x05305909,0x0dca440b,0x2093cdd9,0x6409ab50,0xba5c8ecc,0x8d4708ea,0x429f0917,0xb762fab0,0x5161ea75,0x45eba0eb,0xb6f34b41,0x52047123,0xe4181523,0x8d74e90a,0x54fa401c,0xddda0cc7,0x63df182a,0xc6403ef6,0x348ec6e8,0xb9ff57f5,0xf652b8bd,0x0f86b0f3,0xfb3a088a,0x4dc71533,0x7b3617d2,0xa34e87eb,0xba2a9bdd,0xe3381306,0x14bad6bb,0xc96dc7c2,0x333b54b6,0x9be47cfa,0x1dcf9299,0xe7ea5f99,0xb38feacd,0xc3cfe2f7,0x5b87e822,0x39c5ab56,0x18f4a18f,0x2d484d9c,0x4163d519,0x79769e98,0xf58a67f0,0x40590c02,0x319671c0,0x266b133a,0xaf81b287,0x6a31f737,
 24 |     0xe3bc0197,0x55079913,0x9f72c696,0x363e00c8,0x53153947,0xebfd127f,0x00f60519,0x46a6b62a,0x93b83380,0x3fe29324,0xdfc67091,0x0f62386d,0xdc375e79,0x8fea3f3e,0xdf8463d0,0x3702fa7b,0x3954435e,0x87caa648,0xa9158bee,0x08f30c25,0x66b82936,0xe7fc3feb,0x183c5450,0xd7ef4345,0x798c7963,0xc02cf557,0x098553d1,0xfa4312aa,0xe29ef883,0x7caf128d,0x74b3a07d,0xc8efdf5b,0x8db23782,0x2c409f4a,0xdae469da,0x4d3e1b3f,0x2e7b9a58,0xc83e3753,0xcefd96a6,0x44ddb068,0x5faed141,0xdee7d0f1,0xc223dbb4,0x7bfbe104,0x114d6e1d,0x52039cd5,0x307c0a9c,0xa6289c12,0x20ee8b3e,0x03724b0b,0xba68ae4a,0x93c5f2a1,0x9af27bb2,0x480f0eba,0xc14c6bbe,0xe7331f87,0xf0104df4,0x22c05363,0xb7e6d08a,0x6f15c449,0x4b9ee2cd,0x6b2c78ae,0x25ed2673,0xb6256596,0x99ad4803,0x654f8f10,0xe89eca64,0xd9a506df,0x530dc5fa,0xfe75be5c,0xa543833d,0xf739fd45,0x1605b488,0xe50f614a,0xe930df83,0x4540195d,0xf2da0f32,0x6b04f79c,0xe3c73c99,0xb3a5265c,0x5a1be07d,0xbda13d2a,0xeddc281c,0xe9d9a39a,0xde9beff1,0x573c1747,0x40be5b3e,0x3756e968,0x968077b6,0x6525a28f,0x747d0735,0x8a0ec11d,0x49c03af5,0xf3def45b,0xc3c9214d,0x9ea2e76d,0xfad3a715,0xcaa7ad89,0xde828e4c,0xa5769bd5,0x467cdb5a,0xd5f2cacb,0x68ebd182,0x8d40341a,0x21556887,0x000a5f6f,0x5ad8a473,0xafe7e886,0x98997d39,0x945ad218,0x46be0c93,0x93a5bd3a,0x3ffa4a8c,0xd834d936,0x2f022a2a,0x20791c6b,0x5db51516,0x8defeed2,0x9dee28a5,0x5188eba7,0xab4f8c67,0x48ceac96,0x2a11e16f,0xc1593b6d
 25 |     };
 26 | 
 27 | /* Internal: hash exactly STRIPE bytes */
 28 | static uint64_t farsh_full_block (const uint32_t *data, const uint32_t *key)
 29 | {
 30 | #ifdef FARSH_AVX2
 31 |     __m256i sum = _mm256_setzero_si256();  __m128i sum128;  int i;
 32 |     const __m256i *xdata = (const __m256i *) data;
 33 |     const __m256i *xkey  = (const __m256i *) key;
 34 | 
 35 |     for (i=0; i < STRIPE/sizeof(__m256i); i++)
 36 |     {
 37 |         __m256i d = _mm256_loadu_si256 (xdata+i);
 38 |         __m256i k = _mm256_loadu_si256 (xkey+i);
 39 |         __m256i dk = _mm256_add_epi32(d,k);                                     // uint32 dk[8]  = {d0+k0, d1+k1 .. d7+k7}
 40 |         __m256i res = _mm256_mul_epu32 (dk, _mm256_shuffle_epi32 (dk,0x31));    // uint64 res[4] = {dk0*dk1, dk2*dk3, dk4*dk5, dk6*dk7}
 41 |         sum = _mm256_add_epi64(sum,res);
 42 |     }
 43 |     sum = _mm256_add_epi64 (sum, _mm256_shuffle_epi32(sum,3*4+2));              // return sum of four 64-bit values in the sum
 44 |     sum128 = _mm_add_epi64 (_mm256_castsi256_si128(sum), _mm256_extracti128_si256(sum,1));
 45 |     return *(uint64_t*) &sum128;
 46 | #elif defined(FARSH_SSE2)
 47 |     __m128i sum = _mm_setzero_si128();  int i;
 48 |     const __m128i *xdata = (const __m128i *) data;
 49 |     const __m128i *xkey  = (const __m128i *) key;
 50 | 
 51 |     for (i=0; i < STRIPE/sizeof(__m128i); i++)
 52 |     {
 53 | #ifdef FARSH_ALIGNED_INPUT
 54 |         __m128i d = _mm_load_si128 (xdata+i);
 55 | #else
 56 |         __m128i d = _mm_loadu_si128 (xdata+i);
 57 | #endif
 58 |         __m128i k = _mm_load_si128 (xkey+i);
 59 |         __m128i dk = _mm_add_epi32(d,k);                                        // uint32 dk[4]  = {d0+k0, d1+k1, d2+k2, d3+k3}
 60 |         __m128i res = _mm_mul_epu32 (dk, _mm_shuffle_epi32 (dk,0x31));          // uint64 res[2] = {dk0*dk1,dk2*dk3}
 61 |         sum = _mm_add_epi64(sum,res);
 62 |     }
 63 |     sum = _mm_add_epi64 (sum, _mm_shuffle_epi32(sum,3*4+2));                    // return sum of two 64-bit values in the sum
 64 |     return *(uint64_t*) &sum;
 65 | #else
 66 |     uint64_t sum = 0;  int i;
 67 |     for (i=0; i < STRIPE_ELEMENTS; i+=2)
 68 |         sum += (data[i] + key[i]) * (uint64_t)(data[i+1] + key[i+1]);
 69 |     return sum;
 70 | #endif
 71 | }
 72 | 
 73 | /* Internal: hash less than STRIPE bytes, with careful handling of partial uint32_t pair at the end of buffer */
 74 | static uint64_t farsh_partial_block (const uint32_t *data, size_t bytes, const uint32_t *key)
 75 | {
 76 |     uint64_t sum = 0;  int i;
 77 |     size_t elements = (bytes/sizeof(uint32_t)) & (~1);
 78 | 
 79 |     uint32_t extra_data[2] = {0};
 80 |     size_t extra_bytes = bytes - elements*sizeof(uint32_t);
 81 |     memcpy (extra_data, data+elements, extra_bytes);
 82 | 
 83 |     for (i=0; i < elements; i+=2)
 84 |         sum += (data[i] + key[i]) * (uint64_t)(data[i+1] + key[i+1]);
 85 |     if (extra_bytes)
 86 |         sum += (extra_data[0] + key[i]) * (uint64_t)(extra_data[1] + key[i+1]);
 87 |     return sum;
 88 | }
 89 | 
 90 | /* ////////////////////////////////////////////////////////////////////////// */
 91 | /* Hash mixing code, including all constants, was kidnapped from the xxHash64 */
 92 | 
 93 | /* Internal: combine hash of the current block with overall hashsum */
 94 | static uint64_t farsh_combine (uint64_t sum, uint64_t h)
 95 | {
 96 |     uint64_t PRIME64_1 = 11400714785074694791ULL;
 97 |     uint64_t PRIME64_2 = 14029467366897019727ULL;
 98 |     uint64_t PRIME64_4 =  9650029242287828579ULL;
 99 |     h *= PRIME64_2;
100 |     h += h >> 31;
101 |     h *= PRIME64_1;
102 |     sum ^= h;
103 |     sum = (sum+(sum>>27)) * PRIME64_1 + PRIME64_4;
104 |     return sum;
105 | }
106 | 
107 | /* Internal: compute the final hashsum value */
108 | static uint32_t farsh_final (uint64_t sum)
109 | {
110 |     uint64_t PRIME64_2 = 14029467366897019727ULL;
111 |     uint64_t PRIME64_3 =  1609587929392839161ULL;
112 |     sum ^= sum >> 33;
113 |     sum *= PRIME64_2;
114 |     sum ^= sum >> 29;
115 |     sum *= PRIME64_3;
116 |     return (uint32_t)sum ^ (uint32_t)(sum >> 32);
117 | }
118 | /* End of hash mixing code kidnapped from the xxHash64 */
119 | /* ////////////////////////////////////////////////////////////////////////// */
120 | 
121 | 
122 | /* Public API functions documented in farsh.h */
123 | 
124 | uint32_t farsh_keyed (const void *data, size_t bytes, const void *key, uint64_t seed)
125 | {
126 |     uint64_t sum = seed;
127 |     const char *ptr     = (const char*) data;
128 |     const uint32_t *key_ptr = (const uint32_t*) key;
129 |     while (bytes >= STRIPE)
130 |     {
131 |         size_t chunk = STRIPE;
132 |         uint64_t h = farsh_full_block ((const uint32_t*)ptr, key_ptr);
133 |         sum = farsh_combine (sum, h);
134 |         ptr += chunk;  bytes -= chunk;
135 |     }
136 |     if (bytes)
137 |     {
138 |         size_t chunk = bytes;
139 |         uint64_t h = farsh_partial_block ((const uint32_t*)ptr, chunk, key_ptr);
140 |         sum = farsh_combine (sum, h);
141 |         ptr += chunk;  bytes -= chunk;
142 |     }
143 |     return farsh_final(sum) ^ key_ptr[bytes%STRIPE_ELEMENTS];   /* ensure that zeroes at the end of data will affect the hash value */
144 | }
145 | 
146 | void farsh_keyed_n (const void *data, size_t bytes, const void *key, int n, uint64_t seed, void *hash)
147 | {
148 |     int i;  uint32_t *hash_ptr = (uint32_t*)hash;
149 |     for (i=0; i < n; i++)
150 |         hash_ptr[i] = farsh_keyed (data, bytes, (const char*)key + i*FARSH_EXTRA_KEY_SIZE, seed);
151 | }
152 | 
153 | void farsh_n (const void *data, size_t bytes, int k, int n, uint64_t seed, void *hash)
154 | {
155 |     if (k+n > FARSH_MAX_HASHES)  abort();  /* FARSH_KEYS contains only material for the hashes 0..FARSH_MAX_HASHES-1 */
156 |     farsh_keyed_n (data, bytes, (const char*)FARSH_KEYS + k*FARSH_EXTRA_KEY_SIZE, n, seed, hash);
157 | }
158 | 
159 | uint32_t farsh (const void *data, size_t bytes, uint64_t seed)
160 | {
161 |     return farsh_keyed (data, bytes, FARSH_KEYS, seed);
162 | }
163 | 
164 | #undef EXTRA_ELEMENTS
165 | #undef STRIPE
166 | #undef STRIPE_ELEMENTS
167 | #undef ALIGN
168 | 


--------------------------------------------------------------------------------
/farsh.h:
--------------------------------------------------------------------------------
 1 | #include <stddef.h>   /* for size_t */
 2 | #include <stdint.h>   /* for uint32_t & uint64_t */
 3 | 
 4 | /* Return 32-bit hash of the buffer */
 5 | uint32_t farsh (const void *data, size_t bytes, uint64_t seed);
 6 | 
 7 | /* Compute `n` 32-bit hashes starting with the hash number `k`, storing results to the `hash` buffer.
 8 | It's `n` times slower than computation of single 32-bit hash.
 9 | Hash computed by the `farsh` function has number 0. The function aborts if `k+n > 32`. */
10 | void farsh_n (const void *data, size_t bytes, int k, int n, uint64_t seed, void *hash);
11 | 
12 | /* Compute 32-bit hash using `key`, that should be 1024-byte long and aligned to 16-byte boundary. */
13 | uint32_t farsh_keyed (const void *data, size_t bytes, const void *key, uint64_t seed);
14 | 
15 | /* Compute `n` 32-bit hashes using `key`, storing results to the `hash` buffer.
16 | `key` should be `1024+16*(n-1)` bytes long and aligned to 16-byte boundary. */
17 | void farsh_keyed_n (const void *data, size_t bytes, const void *key, int n, uint64_t seed, void *hash);
18 | 
19 | /* Hash functions accept 64-bit `seed` that can be used to "personalize" the hash value. Use seed==0 if you don't need that feature.
20 | Seeding may have lower quality than in xxHash&co since the seed value mixed with block hashes rather than raw data. */
21 | 
22 | /* Symbolic names for the above-mentioned constants */
23 | #define FARSH_MAX_HASHES             32  /* number of 32-bit hashes supported by the built-in key */
24 | #define FARSH_BASE_KEY_SIZE        1024  /* size of user-supplied key required to compute 32-bit hash with index 0 */
25 | #define FARSH_EXTRA_KEY_SIZE         16  /* extra bytes required to compute 32-bit hash with every next index */
26 | #define FARSH_BASE_KEY_ALIGNMENT     16  /* user-supplied key should be aligned to this size, otherwise SSE2 code may fail. For maximum speed, it's recommended to align key to 64 bytes. */
27 | 


--------------------------------------------------------------------------------