├── .gitignore ├── LICENSE ├── README.md ├── SMHasher ├── AvalancheTest.cpp ├── AvalancheTest.h ├── Bitslice.cpp ├── Bitvec.cpp ├── Bitvec.h ├── CMakeLists.txt ├── DifferentialTest.cpp ├── DifferentialTest.h ├── FarshTest.cpp ├── Hashes.cpp ├── Hashes.h ├── KeysetTest.cpp ├── KeysetTest.h ├── MurmurHash │ ├── MurmurHash3.cpp │ └── MurmurHash3.h ├── Platform.cpp ├── Platform.h ├── Poly1305Test.cpp ├── Random.cpp ├── Random.h ├── SHA1 │ ├── sha1.cpp │ └── sha1.h ├── SpeedTest.cpp ├── SpeedTest.h ├── SpookyHash │ ├── SpookyV2.cpp │ └── SpookyV2.h ├── SpookyHashTest.cpp ├── Stats.cpp ├── Stats.h ├── Types.cpp ├── Types.h ├── UHashTest.cpp ├── UMAC │ ├── rijndael-alg-fst.c │ ├── rijndael-alg-fst.h │ ├── umac.c │ └── umac.h ├── VHashTest.cpp ├── VMAC │ ├── vmac.c │ └── vmac.h ├── compile.cmd ├── crc.cpp ├── main.cpp ├── poly1305 │ ├── poly1305.c │ └── poly1305.h ├── pstdint.h ├── reports │ ├── MurMur3c_x86_128.txt │ ├── MurMur3c_x86_32.txt │ ├── SlowZZH128.txt │ ├── SlowZZH32.txt │ ├── SlowZZH64.txt │ ├── Spooky128.txt │ ├── Spooky32.txt │ ├── Spooky64.txt │ ├── VHash128.txt │ ├── VHash64.txt │ ├── XXH32.txt │ ├── XXH64.txt │ ├── ZZH128.txt │ ├── ZZH32.txt │ ├── ZZH64.txt │ ├── crc32.txt │ ├── sha1_128.txt │ ├── sha1_32.txt │ ├── sha1_64.txt │ ├── smhasher-ModXXH32-report.txt │ ├── smhasher-ModXXH64-report.txt │ ├── smhasher-SimdZZH32-report.txt │ ├── smhasher-SimdZZH64-report.txt │ ├── smhasher-SlowWideZZH32-report.txt │ ├── smhasher-SlowWideZZH64-report.txt │ ├── smhasher-SlowZZH32-report.txt │ ├── smhasher-SlowZZH64-report.txt │ ├── smhasher-Spooky128-report.txt │ ├── smhasher-Spooky32-report.txt │ ├── smhasher-Spooky64-report.txt │ ├── smhasher-WideZZH32-report.txt │ ├── smhasher-WideZZH64-report.txt │ ├── smhasher-XXH32-report.txt │ ├── smhasher-XXH64-report.txt │ ├── smhasher-ZZH128-2cycles.txt │ ├── smhasher-ZZH128-report.txt │ ├── smhasher-ZZH32-2cycles.txt │ ├── smhasher-ZZH32-report.txt │ ├── smhasher-ZZH64-2cycles.txt │ ├── smhasher-ZZH64-report.txt │ ├── smhasher-farsh128-report.txt │ ├── smhasher-farsh256-report.txt │ ├── smhasher-farsh32-report.txt │ ├── smhasher-farsh64-report.txt │ ├── smhasher-murmur3a_x86_32-report.txt │ ├── smhasher-murmur3c_x86_128-report.txt │ ├── smhasher-murmur3f_x64_128-report.txt │ ├── smhasher-poly1305-report.txt │ ├── smhasher-uhash128-report.txt │ ├── smhasher-uhash32-report.txt │ ├── smhasher-uhash64-report.txt │ └── smhasher-vhash64-report.txt ├── sha1.cpp ├── xxHash │ ├── xxhash.c │ └── xxhash.h └── xxHashTest.cpp ├── asm-listings ├── gcc-x64-avx2.lst ├── gcc-x64-nosimd.lst ├── gcc-x64.lst ├── gcc-x86-avx2.lst ├── gcc-x86-sse2.lst ├── gcc-x86.lst └── make-listings.cmd ├── benchmark ├── CpuID.h ├── compile-CpuID.cmd ├── compile-other.cmd ├── compile.cmd ├── main.cpp ├── runme.cmd └── timer.h ├── farsh.c └── farsh.h /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled Object files 2 | *.slo 3 | *.lo 4 | *.o 5 | *.obj 6 | 7 | # Precompiled Headers 8 | *.gch 9 | *.pch 10 | 11 | # Compiled Dynamic libraries 12 | *.so 13 | *.dylib 14 | *.dll 15 | 16 | # Fortran module files 17 | *.mod 18 | 19 | # Compiled Static libraries 20 | *.lai 21 | *.la 22 | *.a 23 | *.lib 24 | 25 | # Executables 26 | *.exe 27 | *.out 28 | *.app 29 | 30 | 0/* 31 | 1/* 32 | 2/* 33 | 3/* 34 | 4/* 35 | 5/* 36 | 6/* 37 | 7/* 38 | 8/* 39 | 9/* 40 | keys/ 41 | asm-listings/aaa.cmd 42 | benchmark/aaa.cmd 43 | SMHasher/aaa.cmd 44 | 45 | SMHasher/128.cmd 46 | SMHasher/32.cmd 47 | SMHasher/64.cmd 48 | SMHasher/33.cmd 49 | SMHasher/sp.cmd 50 | SMHasher/mur.cmd 51 | SMHasher/all.cmd -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015-16 Bulat Ziganshin 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | FARSH stands for Fast and Reliable (but not Secure) 32-bit Hash. 2 | While established [new speed records](#benchmark) 3 | and [successfully passed](SMHasher/reports/smhasher-farsh32-report.txt) the [SMHasher] testsuite, 4 | it's not as reliable as the [competition](#competition). 5 | [Discussion and additional benchmarks](http://encode.ru/threads/2213-FARSH-hashing-30-GB-s!). 6 | 7 | # Features / to-do list 8 | - [x] compute hashes up to 1024 bits long 9 | - [x] hashing with user-supplied key material 10 | - [x] [successfully passed](SMHasher/reports/smhasher-farsh32-report.txt) the [SMHasher] testsuite 11 | - [ ] even faster and better quality hash mixing 12 | - [x] SSE2/AVX2 manually-optimized main loop 13 | - [x] 16-byte aligned key material and (optionally) input data for maximum speed on older CPUs 14 | - [ ] manual unrolling of main loop (since msvc/icl can't do it themselves) or asm code 15 | - [ ] try PSLLQ instead of PSHUFD in SSE2 code to [improve speed on older CPUs](http://encode.ru/threads/2213-FARSH-hashing-30-GB-s!?p=43983&viewfull=1#post43983) 16 | - [ ] `farsh_init/farsh_update/farsh_result` streaming API 17 | - [ ] `farsh64*/farsh128*` APIs for faster computation of multi-word hashes 18 | - [ ] `SSE2/AVX2/NEON?` options in the API (+alignment check for SSE2) for selection of the code path instead of compile-time choice 19 | 20 | 21 | # [API](farsh.h) 22 | - `uint32_t farsh(void *data, size_t size, uint64_t seed)` 23 | returns 32-bit hash of the buffer 24 | - `void farsh_n(void *data, size_t size, int k, int n, uint64_t seed, void *hash)` 25 | computes `n` 32-bit hashes starting with the hash number `k`, storing results to the `hash` buffer. 26 | It's `n` times slower than computation of single 32-bit hash. 27 | Hash computed by the `farsh` function has number 0. The function aborts if `k+n > 32`. 28 | - `uint32_t farsh_keyed(void *data, size_t size, void *key, uint64_t seed)` 29 | computes 32-bit hash using `key`, that should be 1024-byte long and aligned to 16-byte boundary. 30 | - `void farsh_keyed_n(void *data, size_t size, void *key, int n, uint64_t seed, void *hash)` 31 | computes `n` 32-bit hashes using `key`, storing results to the `hash` buffer. 32 | `key` should be `1024+16*(n-1)` bytes long and aligned to 16-byte boundary. 33 | - Hash functions accept 64-bit `seed` that can be used to "personalize" the hash value. Use seed==0 if you don't need that feature. 34 | Seeding may have lower quality than in the [competition](#competition) since the seed value mixed with block hashes rather than raw data. 35 | - Header file provides symbolic names for the above-mentioned constants: 36 | `FARSH_MAX_HASHES == 32, FARSH_BASE_KEY_SIZE == 1024, FARSH_BASE_KEY_ALIGNMENT == 16, FARSH_EXTRA_KEY_SIZE == 16` 37 | 38 | 39 | # Internals 40 | The current FARSH version combines two hashing algorithms. 41 | 42 | Low-level hashing algorithm splits all input data into 1024-byte blocks and computes hash value for every block. 43 | It's the very short cycle borrowed from [UHASH] that combines 1024 bytes of input data with 1024 bytes of key material. 44 | The hash value returned by this cycle is 64-bit long, and [UMAC thesis] proved that it has 32 bits of entropy. 45 | So the low-level algorithm compresses each 1024-byte block of input data into 64-bit value carrying 32 bits of entropy. 46 | 47 | High-level hashing algorithm is a stripped-down version of [xxHash64]. It receives sequence of 64-bit values from the previous level 48 | and combines them into final 32-bit hash result. Since the original [xxHash64] algorithm successfully passes all [SMHasher] tests 49 | while computing 64-bit hash from raw data, it's no surprise that modified algorithm is able to compute high-quality 32-bit hash 50 | from the sequence of numbers each carrying 32 bits of entropy. 51 | 52 | The power of the FARSH algorithm comes from its inner cycle, that is very short (read: fast) and allows highly-parallel implementations, 53 | so it can fully exploit power of multi-core, SIMD, VLIW and SIMT (GPU) architectures. 54 | At the same time, there is math proof that it can deliver 32 bits of entropy so we can use it without any doubts. 55 | 56 | 57 | ## Universal hashing 58 | Main loop uses [universal hashing] formula from [UMAC] with a precomputed key material of 1024 bytes (plus 512 bytes for longer hashes). 59 | FARSH is essentially [UHASH] with higher-level hashing algorithms replaced with simpler non-cryptographic one. 60 | The universal hashing formula used here (and copied intact from UMAC) is as simple as 61 | ```C 62 | uint64_t sum = 0; uint32_t *data, *key; 63 | for (i=0; i < elements; i+=2) 64 | sum += uint64_t(data[i] + key[i]) * (data[i+1] + key[i+1]); 65 | ``` 66 | 67 | ## The main loop 68 | - [Source code](farsh.c#L28) 69 | - Asm code (can be found by searching for adcl+mull/pmuludq instructions) 70 | - [gcc -O3 -funroll-loops -m32](asm-listings/gcc-x86.lst#L340) 71 | - [gcc -O3 -funroll-loops -m32 -msse2 -DFARSH_SSE2](asm-listings/gcc-x86-sse2.lst#L349) 72 | - [gcc -O3 -funroll-loops -m32 -mavx2 -DFARSH_AVX2](asm-listings/gcc-x86-avx2.lst#L350) 73 | - [gcc -O3 -funroll-loops -m64 -DFARSH_SSE2](asm-listings/gcc-x64.lst#L252) 74 | - [gcc -O3 -funroll-loops -m64 -mavx2 -DFARSH_AVX2](asm-listings/gcc-x64-avx2.lst#L259) 75 | 76 | 77 | # Benchmark 78 | [Benchmark](benchmark) measures overall hash speed as well as internal loop speed. 79 | The internal loop speed is a hard limit for the speed of any future FARSH version, 80 | while the overall speed includes time required for pretty slow high-level hashing. 81 | Future versions should replace it with faster algorithm still satisfying the [SMHasher] requirements, 82 | making overall hash speed within 10% of the internal loop speed. 83 | 84 | Executables were [compiled](benchmark/compile.cmd) with GCC 4.9.2. 85 | Aligned versions make sure that data being hashed are 64-byte aligned, 86 | unaligned versions make sure that data are unaligned. 87 | This makes big difference on Core2 and older Intel CPUs. 88 | 89 | [Intel Haswell i7-4770 3.9 GHz (AVX2)](http://ark.intel.com/products/75122/Intel-Core-i7-4770-Processor-8M-Cache-up-to-3_90-GHz), 90 | other IvyBridge to Skylake CPUs has pretty close performance/GHz: 91 | 92 | Executable | FARSH 0.2 speed | Internal loop speed 93 | --------------------------|-----------------------------:|----------------------------: 94 | aligned-farsh-x64-avx2 | 54.536 GB/s = 50.790 GiB/s | 65.645 GB/s = 61.137 GiB/s 95 | aligned-farsh-x64 | 31.162 GB/s = 29.022 GiB/s | 35.722 GB/s = 33.269 GiB/s 96 | aligned-farsh-x86-avx2 | 40.279 GB/s = 37.513 GiB/s | 61.682 GB/s = 57.446 GiB/s 97 | aligned-farsh-x86-sse2 | 25.221 GB/s = 23.489 GiB/s | 33.584 GB/s = 31.277 GiB/s 98 | aligned-farsh-x86 | 6.255 GB/s = 5.825 GiB/s | 6.336 GB/s = 5.901 GiB/s 99 | || 100 | farsh-x64-avx2 | 46.024 GB/s = 42.863 GiB/s | 64.967 GB/s = 60.505 GiB/s 101 | farsh-x64 | 30.335 GB/s = 28.252 GiB/s | 34.891 GB/s = 32.495 GiB/s 102 | farsh-x86-avx2 | 35.273 GB/s = 32.851 GiB/s | 57.252 GB/s = 53.320 GiB/s 103 | farsh-x86-sse2 | 24.502 GB/s = 22.820 GiB/s | 33.325 GB/s = 31.037 GiB/s 104 | farsh-x86 | 6.283 GB/s = 5.852 GiB/s | 6.763 GB/s = 6.299 GiB/s 105 | 106 | 107 | [Intel Pentium M processor 1.5 GHz (SSE2)](http://ark.intel.com/products/27576/Intel-Pentium-M-Processor-1_50-GHz-1M-Cache-400-MHz-FSB): 108 | 109 | Executable | FARSH 0.2 speed | Internal loop speed 110 | --------------------------|-----------------------------:|----------------------------: 111 | aligned-farsh-x86-sse2 | 2.625 GB/s = 2.444 GiB/s | 2.791 GB/s = 2.5 GiB/s 112 | aligned-farsh-x86 | 1.664 GB/s = 1.550 GiB/s | 1.946 GB/s = 1.8 GiB/s 113 | || 114 | farsh-x86-sse2 | 2.025 GB/s = 1.886 GiB/s | 2.302 GB/s = 2.1 GiB/s 115 | farsh-x86 | 1.471 GB/s = 1.370 GiB/s | 1.715 GB/s = 1.5 GiB/s 116 | 117 | 118 | K10: [AMD Athlon II X2 220 Processor 2.8 GHz (SSE3)](http://www.cpu-world.com/CPUs/K10/AMD-Athlon%20II%20X2%20220%20-%20ADX220OCK22GM.html): 119 | 120 | Executable | FARSH 0.2 speed | Internal loop speed 121 | --------------------------|-----------------------------:|----------------------------: 122 | aligned-farsh-x64 | 11.300 GB/s = 10.524 GiB/s | 14.446 GB/s = 13.454 GiB/s 123 | aligned-farsh-x86-sse2 | 10.899 GB/s = 10.151 GiB/s | 13.280 GB/s = 12.368 GiB/s 124 | aligned-farsh-x86 | 3.805 GB/s = 3.544 GiB/s | 5.089 GB/s = 4.740 GiB/s 125 | || 126 | farsh-x64 | 12.823 GB/s = 11.943 GiB/s | 14.187 GB/s = 13.212 GiB/s 127 | farsh-x86-sse2 | 10.933 GB/s = 10.182 GiB/s | 12.389 GB/s = 11.538 GiB/s 128 | farsh-x86 | 3.786 GB/s = 3.526 GiB/s | 5.825 GB/s = 5.425 GiB/s 129 | 130 | 131 | Piledriver: [AMD A8-5500 APU 3.7 GHz (AVX)](http://www.cpu-world.com/CPUs/Bulldozer/AMD-A8-Series%20A8-5500.html): 132 | 133 | Executable | FARSH 0.2 speed | Internal loop speed 134 | --------------------------|-----------------------------:|----------------------------: 135 | aligned-farsh-x64 | 17.130 GB/s = 15.953 GiB/s | 21.394 GB/s = 19.924 GiB/s 136 | aligned-farsh-x86-sse2 | 13.790 GB/s = 12.843 GiB/s | 20.830 GB/s = 19.400 GiB/s 137 | aligned-farsh-x86 | 3.872 GB/s = 3.606 GiB/s | 5.457 GB/s = 5.082 GiB/s 138 | || 139 | farsh-x64 | 15.313 GB/s = 14.262 GiB/s | 19.659 GB/s = 18.309 GiB/s 140 | farsh-x86-sse2 | 13.812 GB/s = 12.863 GiB/s | 18.977 GB/s = 17.674 GiB/s 141 | farsh-x86 | 3.959 GB/s = 3.687 GiB/s | 5.056 GB/s = 4.709 GiB/s 142 | 143 | More results and benchmarking executables are available in those [forum posts](http://encode.ru/threads/2213-FARSH-hashing-30-GB-s!?p=48907&viewfull=1#post48907). 144 | 145 | 146 | 147 | # Competition 148 | Fast non-cryptographic hashes: 149 | - [MumHash](https://github.com/vnmakarov/mum-hash) (2016) 150 | - [HighwayHash](https://github.com/google/highwayhash) (2016) 151 | - [CLHash](http://lemire.me/blog/2015/10/26/crazily-fast-hashing-with-carry-less-multiplications), 152 | even [faster with Broadwell](http://lemire.me/blog/2015/12/24/your-software-should-follow-your-hardware-the-clhash-example) (2015) 153 | - [MetroHash](https://github.com/jandrewrogers/MetroHash) (2015) 154 | - Go language [32-bit](https://github.com/golang/go/blob/master/src/runtime/hash32.go) and [64-bit](https://github.com/golang/go/blob/master/src/runtime/hash64.go) hashes (2014) 155 | - [xxHash][xxHash] (2012) and [xxHash64][xxHash64] (2014) 156 | - [SpookyHash](http://burtleburtle.net/bob/hash/spooky.html): a 128-bit noncryptographic hash (2012) 157 | - The [CityHash](https://github.com/google/cityhash) family of hash functions (2011) 158 | - [MurmurHash3](https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp) (2011) 159 | - [Hasshe2](http://cessu.blogspot.ru/2008/11/hashing-with-sse2-revisited-or-my-hash.html) by Cessu (2008) 160 | 161 | Further reading: 162 | - [More info](https://github.com/aappleby/smhasher/wiki/SMHasher) about the [SMHasher] testsuite 163 | - [A lot of hashes](https://github.com/rurban/smhasher) tested by SMHasher (see doc subdir) 164 | - Interesting historical [overview](http://blog.reverberate.org/2012/01/state-of-hash-functions-2012.html) 165 | - [SuperFastHash](http://www.azillionmonkeys.com/qed/hash.html) 166 | - Bob Jenkins [1997 Dr Dobbs article](http://www.burtleburtle.net/bob/hash/doobs.html) and its [extended version](http://burtleburtle.net/bob/hash/evahash.html) 167 | 168 | MAC/PRF, i.e. cryprographically secure keyed hashes: 169 | - [UMAC] and [VMAC] 170 | - The [Poly1305-AES](https://en.wikipedia.org/wiki/Poly1305) message-authentication code 171 | - [SipHash](https://131002.net/siphash/) 172 | - Cryptoanalysis of [CityHash64, MurmurHash](https://131002.net/siphash/#at) and [xxHash](http://crypto.stackexchange.com/questions/6408/from-hash-to-cryptographic-hash) 173 | 174 | 175 | 176 | [VMAC]: http://en.wikipedia.org/wiki/VMAC 177 | [UMAC]: http://en.wikipedia.org/wiki/UMAC 178 | [UMAC thesis]: http://fastcrypto.org/umac/umac_thesis.pdf 179 | [UHASH]: https://tools.ietf.org/html/rfc4418#section-5 180 | [universal hashing]: http://en.wikipedia.org/wiki/Universal_hashing 181 | [xxHash]: https://github.com/Cyan4973/xxHash 182 | [xxHash64]: https://github.com/Cyan4973/xxHash 183 | [SMHasher]: https://github.com/aappleby/smhasher 184 | -------------------------------------------------------------------------------- /SMHasher/AvalancheTest.cpp: -------------------------------------------------------------------------------- 1 | #include "AvalancheTest.h" 2 | 3 | //----------------------------------------------------------------------------- 4 | 5 | void PrintAvalancheDiagram ( int x, int y, int reps, double scale, int * bins ) 6 | { 7 | const char * symbols = ".123456789X"; 8 | 9 | for(int i = 0; i < y; i++) 10 | { 11 | printf("["); 12 | for(int j = 0; j < x; j++) 13 | { 14 | int k = (y - i) -1; 15 | 16 | int bin = bins[k + (j*y)]; 17 | 18 | double b = double(bin) / double(reps); 19 | b = fabs(b*2 - 1); 20 | 21 | b *= scale; 22 | 23 | int s = (int)floor(b*10); 24 | 25 | if(s > 10) s = 10; 26 | if(s < 0) s = 0; 27 | 28 | printf("%c",symbols[s]); 29 | } 30 | 31 | printf("]\n"); 32 | } 33 | } 34 | 35 | //---------------------------------------------------------------------------- 36 | 37 | double maxBias ( std::vector & counts, int reps ) 38 | { 39 | double worst = 0; 40 | 41 | for(int i = 0; i < (int)counts.size(); i++) 42 | { 43 | double c = double(counts[i]) / double(reps); 44 | 45 | double d = fabs(c * 2 - 1); 46 | 47 | if(d > worst) 48 | { 49 | worst = d; 50 | } 51 | } 52 | 53 | return worst; 54 | } 55 | 56 | //----------------------------------------------------------------------------- 57 | -------------------------------------------------------------------------------- /SMHasher/AvalancheTest.h: -------------------------------------------------------------------------------- 1 | //----------------------------------------------------------------------------- 2 | // Flipping a single bit of a key should cause an "avalanche" of changes in 3 | // the hash function's output. Ideally, each output bits should flip 50% of 4 | // the time - if the probability of an output bit flipping is not 50%, that bit 5 | // is "biased". Too much bias means that patterns applied to the input will 6 | // cause "echoes" of the patterns in the output, which in turn can cause the 7 | // hash function to fail to create an even, random distribution of hash values. 8 | 9 | 10 | #pragma once 11 | 12 | #include "Types.h" 13 | #include "Random.h" 14 | 15 | #include 16 | #include 17 | #include 18 | 19 | // Avalanche fails if a bit is biased by more than 1% 20 | 21 | #define AVALANCHE_FAIL 0.01 22 | 23 | double maxBias ( std::vector & counts, int reps ); 24 | 25 | //----------------------------------------------------------------------------- 26 | 27 | template < typename keytype, typename hashtype > 28 | void calcBias ( pfHash hash, std::vector & counts, int reps, Rand & r ) 29 | { 30 | const int keybytes = sizeof(keytype); 31 | const int hashbytes = sizeof(hashtype); 32 | 33 | const int keybits = keybytes * 8; 34 | const int hashbits = hashbytes * 8; 35 | 36 | keytype K; 37 | hashtype A,B; 38 | 39 | for(int irep = 0; irep < reps; irep++) 40 | { 41 | if(irep % (reps/10) == 0) printf("."); 42 | 43 | r.rand_p(&K,keybytes); 44 | 45 | hash(&K,keybytes,0,&A); 46 | 47 | int * cursor = &counts[0]; 48 | 49 | for(int iBit = 0; iBit < keybits; iBit++) 50 | { 51 | flipbit(&K,keybytes,iBit); 52 | hash(&K,keybytes,0,&B); 53 | flipbit(&K,keybytes,iBit); 54 | 55 | for(int iOut = 0; iOut < hashbits; iOut++) 56 | { 57 | int bitA = getbit(&A,hashbytes,iOut); 58 | int bitB = getbit(&B,hashbytes,iOut); 59 | 60 | (*cursor++) += (bitA ^ bitB); 61 | } 62 | } 63 | } 64 | } 65 | 66 | //----------------------------------------------------------------------------- 67 | 68 | template < typename keytype, typename hashtype > 69 | bool AvalancheTest ( pfHash hash, const int reps ) 70 | { 71 | Rand r(48273); 72 | 73 | const int keybytes = sizeof(keytype); 74 | const int hashbytes = sizeof(hashtype); 75 | 76 | const int keybits = keybytes * 8; 77 | const int hashbits = hashbytes * 8; 78 | 79 | printf("Testing %3d-bit keys -> %3d-bit hashes, %8d reps",keybits,hashbits,reps); 80 | 81 | //---------- 82 | 83 | std::vector bins(keybits*hashbits,0); 84 | 85 | calcBias(hash,bins,reps,r); 86 | 87 | //---------- 88 | 89 | bool result = true; 90 | 91 | double b = maxBias(bins,reps); 92 | 93 | printf(" worst bias is %f%%",b * 100.0); 94 | 95 | if(b > AVALANCHE_FAIL) 96 | { 97 | int * cursor = &bins[0]; 98 | 99 | for(int iBit = 0; iBit < keybits; iBit++) 100 | { 101 | for(int iOut = 0; iOut < hashbits; iOut++) 102 | { 103 | double k = *cursor++/(double)reps; 104 | if (k>0.51 || k<0.49) 105 | 0 && printf(", %d->%d %f%%",iBit,iOut,k * 100.0); // enable if you need detailed information 106 | } 107 | } 108 | result = false; 109 | } 110 | 111 | printf("\n"); 112 | 113 | return result; 114 | } 115 | 116 | //---------------------------------------------------------------------------- 117 | // Tests the Bit Independence Criteron. Stricter than Avalanche, but slow and 118 | // not really all that useful. 119 | 120 | template< typename keytype, typename hashtype > 121 | void BicTest ( pfHash hash, const int keybit, const int reps, double & maxBias, int & maxA, int & maxB, bool verbose ) 122 | { 123 | Rand r(11938); 124 | 125 | const int keybytes = sizeof(keytype); 126 | const int hashbytes = sizeof(hashtype); 127 | const int hashbits = hashbytes * 8; 128 | 129 | std::vector bins(hashbits*hashbits*4,0); 130 | 131 | keytype key; 132 | hashtype h1,h2; 133 | 134 | for(int irep = 0; irep < reps; irep++) 135 | { 136 | if(verbose) 137 | { 138 | if(irep % (reps/10) == 0) printf("."); 139 | } 140 | 141 | r.rand_p(&key,keybytes); 142 | hash(&key,keybytes,0,&h1); 143 | 144 | flipbit(key,keybit); 145 | hash(&key,keybytes,0,&h2); 146 | 147 | hashtype d = h1 ^ h2; 148 | 149 | for(int out1 = 0; out1 < hashbits; out1++) 150 | for(int out2 = 0; out2 < hashbits; out2++) 151 | { 152 | if(out1 == out2) continue; 153 | 154 | uint32_t b = getbit(d,out1) | (getbit(d,out2) << 1); 155 | 156 | bins[(out1 * hashbits + out2) * 4 + b]++; 157 | } 158 | } 159 | 160 | if(verbose) printf("\n"); 161 | 162 | maxBias = 0; 163 | 164 | for(int out1 = 0; out1 < hashbits; out1++) 165 | { 166 | for(int out2 = 0; out2 < hashbits; out2++) 167 | { 168 | if(out1 == out2) 169 | { 170 | if(verbose) printf("\\"); 171 | continue; 172 | } 173 | 174 | double bias = 0; 175 | 176 | for(int b = 0; b < 4; b++) 177 | { 178 | double b2 = double(bins[(out1 * hashbits + out2) * 4 + b]) / double(reps / 2); 179 | b2 = fabs(b2 * 2 - 1); 180 | 181 | if(b2 > bias) bias = b2; 182 | } 183 | 184 | if(bias > maxBias) 185 | { 186 | maxBias = bias; 187 | maxA = out1; 188 | maxB = out2; 189 | } 190 | 191 | if(verbose) 192 | { 193 | if (bias < 0.01) printf("."); 194 | else if(bias < 0.05) printf("o"); 195 | else if(bias < 0.33) printf("O"); 196 | else printf("X"); 197 | } 198 | } 199 | 200 | if(verbose) printf("\n"); 201 | } 202 | } 203 | 204 | //---------- 205 | 206 | template< typename keytype, typename hashtype > 207 | bool BicTest ( pfHash hash, const int reps ) 208 | { 209 | const int keybytes = sizeof(keytype); 210 | const int keybits = keybytes * 8; 211 | 212 | double maxBias = 0; 213 | int maxK = 0; 214 | int maxA = 0; 215 | int maxB = 0; 216 | 217 | for(int i = 0; i < keybits; i++) 218 | { 219 | if(i % (keybits/10) == 0) printf("."); 220 | 221 | double bias; 222 | int a,b; 223 | 224 | BicTest(hash,i,reps,bias,a,b,true); 225 | 226 | if(bias > maxBias) 227 | { 228 | maxBias = bias; 229 | maxK = i; 230 | maxA = a; 231 | maxB = b; 232 | } 233 | } 234 | 235 | printf("Max bias %f - (%3d : %3d,%3d)\n",maxBias,maxK,maxA,maxB); 236 | 237 | // Bit independence is harder to pass than avalanche, so we're a bit more lax here. 238 | 239 | bool result = (maxBias < 0.05); 240 | 241 | return result; 242 | } 243 | 244 | //----------------------------------------------------------------------------- 245 | // BIC test variant - store all intermediate data in a table, draw diagram 246 | // afterwards (much faster) 247 | 248 | template< typename keytype, typename hashtype > 249 | void BicTest3 ( pfHash hash, const int reps, bool verbose = true ) 250 | { 251 | const int keybytes = sizeof(keytype); 252 | const int keybits = keybytes * 8; 253 | const int hashbytes = sizeof(hashtype); 254 | const int hashbits = hashbytes * 8; 255 | const int pagesize = hashbits*hashbits*4; 256 | 257 | Rand r(11938); 258 | 259 | double maxBias = 0; 260 | int maxK = 0; 261 | int maxA = 0; 262 | int maxB = 0; 263 | 264 | keytype key; 265 | hashtype h1,h2; 266 | 267 | std::vector bins(keybits*pagesize,0); 268 | 269 | for(int keybit = 0; keybit < keybits; keybit++) 270 | { 271 | if(keybit % (keybits/10) == 0) printf("."); 272 | 273 | int * page = &bins[keybit*pagesize]; 274 | 275 | for(int irep = 0; irep < reps; irep++) 276 | { 277 | r.rand_p(&key,keybytes); 278 | hash(&key,keybytes,0,&h1); 279 | flipbit(key,keybit); 280 | hash(&key,keybytes,0,&h2); 281 | 282 | hashtype d = h1 ^ h2; 283 | 284 | for(int out1 = 0; out1 < hashbits-1; out1++) 285 | for(int out2 = out1+1; out2 < hashbits; out2++) 286 | { 287 | int * b = &page[(out1*hashbits+out2)*4]; 288 | 289 | uint32_t x = getbit(d,out1) | (getbit(d,out2) << 1); 290 | 291 | b[x]++; 292 | } 293 | } 294 | } 295 | 296 | printf("\n"); 297 | 298 | for(int out1 = 0; out1 < hashbits-1; out1++) 299 | { 300 | for(int out2 = out1+1; out2 < hashbits; out2++) 301 | { 302 | if(verbose) printf("(%3d,%3d) - ",out1,out2); 303 | 304 | for(int keybit = 0; keybit < keybits; keybit++) 305 | { 306 | int * page = &bins[keybit*pagesize]; 307 | int * bins = &page[(out1*hashbits+out2)*4]; 308 | 309 | double bias = 0; 310 | 311 | for(int b = 0; b < 4; b++) 312 | { 313 | double b2 = double(bins[b]) / double(reps / 2); 314 | b2 = fabs(b2 * 2 - 1); 315 | 316 | if(b2 > bias) bias = b2; 317 | } 318 | 319 | if(bias > maxBias) 320 | { 321 | maxBias = bias; 322 | maxK = keybit; 323 | maxA = out1; 324 | maxB = out2; 325 | } 326 | 327 | if(verbose) 328 | { 329 | if (bias < 0.01) printf("."); 330 | else if(bias < 0.05) printf("o"); 331 | else if(bias < 0.33) printf("O"); 332 | else printf("X"); 333 | } 334 | } 335 | 336 | // Finished keybit 337 | 338 | if(verbose) printf("\n"); 339 | } 340 | 341 | if(verbose) 342 | { 343 | for(int i = 0; i < keybits+12; i++) printf("-"); 344 | printf("\n"); 345 | } 346 | } 347 | 348 | printf("Max bias %f - (%3d : %3d,%3d)\n",maxBias,maxK,maxA,maxB); 349 | } 350 | 351 | 352 | //----------------------------------------------------------------------------- 353 | // BIC test variant - iterate over output bits, then key bits. No temp storage, 354 | // but slooooow 355 | 356 | template< typename keytype, typename hashtype > 357 | void BicTest2 ( pfHash hash, const int reps, bool verbose = true ) 358 | { 359 | const int keybytes = sizeof(keytype); 360 | const int keybits = keybytes * 8; 361 | const int hashbytes = sizeof(hashtype); 362 | const int hashbits = hashbytes * 8; 363 | 364 | Rand r(11938); 365 | 366 | double maxBias = 0; 367 | int maxK = 0; 368 | int maxA = 0; 369 | int maxB = 0; 370 | 371 | keytype key; 372 | hashtype h1,h2; 373 | 374 | for(int out1 = 0; out1 < hashbits-1; out1++) 375 | for(int out2 = out1+1; out2 < hashbits; out2++) 376 | { 377 | if(verbose) printf("(%3d,%3d) - ",out1,out2); 378 | 379 | for(int keybit = 0; keybit < keybits; keybit++) 380 | { 381 | int bins[4] = { 0, 0, 0, 0 }; 382 | 383 | for(int irep = 0; irep < reps; irep++) 384 | { 385 | r.rand_p(&key,keybytes); 386 | hash(&key,keybytes,0,&h1); 387 | flipbit(key,keybit); 388 | hash(&key,keybytes,0,&h2); 389 | 390 | hashtype d = h1 ^ h2; 391 | 392 | uint32_t b = getbit(d,out1) | (getbit(d,out2) << 1); 393 | 394 | bins[b]++; 395 | } 396 | 397 | double bias = 0; 398 | 399 | for(int b = 0; b < 4; b++) 400 | { 401 | double b2 = double(bins[b]) / double(reps / 2); 402 | b2 = fabs(b2 * 2 - 1); 403 | 404 | if(b2 > bias) bias = b2; 405 | } 406 | 407 | if(bias > maxBias) 408 | { 409 | maxBias = bias; 410 | maxK = keybit; 411 | maxA = out1; 412 | maxB = out2; 413 | } 414 | 415 | if(verbose) 416 | { 417 | if (bias < 0.05) printf("."); 418 | else if(bias < 0.10) printf("o"); 419 | else if(bias < 0.50) printf("O"); 420 | else printf("X"); 421 | } 422 | } 423 | 424 | // Finished keybit 425 | 426 | if(verbose) printf("\n"); 427 | } 428 | 429 | printf("Max bias %f - (%3d : %3d,%3d)\n",maxBias,maxK,maxA,maxB); 430 | } 431 | 432 | //----------------------------------------------------------------------------- 433 | -------------------------------------------------------------------------------- /SMHasher/Bitslice.cpp: -------------------------------------------------------------------------------- 1 | #include "Bitvec.h" 2 | #include 3 | #include 4 | 5 | // handle xnor 6 | 7 | typedef std::vector slice; 8 | typedef std::vector slice_vec; 9 | 10 | int countbits ( slice & v ) 11 | { 12 | int c = 0; 13 | 14 | for(size_t i = 0; i < v.size(); i++) 15 | { 16 | int d = countbits(v[i]); 17 | 18 | c += d; 19 | } 20 | 21 | return c; 22 | } 23 | 24 | int countxor ( slice & a, slice & b ) 25 | { 26 | assert(a.size() == b.size()); 27 | 28 | int c = 0; 29 | 30 | for(size_t i = 0; i < a.size(); i++) 31 | { 32 | int d = countbits(a[i] ^ b[i]); 33 | 34 | c += d; 35 | } 36 | 37 | return c; 38 | } 39 | 40 | void xoreq ( slice & a, slice & b ) 41 | { 42 | assert(a.size() == b.size()); 43 | 44 | for(size_t i = 0; i < a.size(); i++) 45 | { 46 | a[i] ^= b[i]; 47 | } 48 | } 49 | 50 | //----------------------------------------------------------------------------- 51 | // Bitslice a hash set 52 | 53 | template< typename hashtype > 54 | void Bitslice ( std::vector & hashes, slice_vec & slices ) 55 | { 56 | const int hashbytes = sizeof(hashtype); 57 | const int hashbits = hashbytes * 8; 58 | const int slicelen = ((int)hashes.size() + 31) / 32; 59 | 60 | slices.clear(); 61 | slices.resize(hashbits); 62 | 63 | for(int i = 0; i < (int)slices.size(); i++) 64 | { 65 | slices[i].resize(slicelen,0); 66 | } 67 | 68 | for(int j = 0; j < hashbits; j++) 69 | { 70 | void * sliceblob = &(slices[j][0]); 71 | 72 | for(int i = 0; i < (int)hashes.size(); i++) 73 | { 74 | int b = getbit(hashes[i],j); 75 | 76 | setbit(sliceblob,slicelen*4,i,b); 77 | } 78 | } 79 | } 80 | 81 | void FactorSlices ( slice_vec & slices ) 82 | { 83 | std::vector counts(slices.size(),0); 84 | 85 | for(size_t i = 0; i < slices.size(); i++) 86 | { 87 | counts[i] = countbits(slices[i]); 88 | } 89 | 90 | bool changed = true; 91 | 92 | while(changed) 93 | { 94 | int bestA = -1; 95 | int bestB = -1; 96 | 97 | for(int j = 0; j < (int)slices.size()-1; j++) 98 | { 99 | for(int i = j+1; i < (int)slices.size(); i++) 100 | { 101 | int d = countxor(slices[i],slices[j]); 102 | 103 | if((d < counts[i]) && (d < counts[j])) 104 | { 105 | if(counts[i] < counts[j]) 106 | { 107 | bestA = j; 108 | bestB = i; 109 | } 110 | } 111 | else if(d < counts[i]) 112 | { 113 | //bestA = 114 | } 115 | } 116 | } 117 | } 118 | } 119 | 120 | 121 | void foo ( void ) 122 | { 123 | slice a; 124 | slice_vec b; 125 | 126 | Bitslice(a,b); 127 | } -------------------------------------------------------------------------------- /SMHasher/Bitvec.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "Platform.h" 4 | 5 | #include 6 | 7 | //----------------------------------------------------------------------------- 8 | 9 | void printbits ( const void * blob, int len ); 10 | void printhex32 ( const void * blob, int len ); 11 | void printbytes ( const void * blob, int len ); 12 | void printbytes2 ( const void * blob, int len ); 13 | 14 | uint32_t popcount ( uint32_t v ); 15 | uint32_t parity ( uint32_t v ); 16 | 17 | uint32_t getbit ( const void * blob, int len, uint32_t bit ); 18 | uint32_t getbit_wrap ( const void * blob, int len, uint32_t bit ); 19 | 20 | void setbit ( void * blob, int len, uint32_t bit ); 21 | void setbit ( void * blob, int len, uint32_t bit, uint32_t val ); 22 | 23 | void clearbit ( void * blob, int len, uint32_t bit ); 24 | 25 | void flipbit ( void * blob, int len, uint32_t bit ); 26 | 27 | int countbits ( uint32_t v ); 28 | int countbits ( std::vector & v ); 29 | 30 | int countbits ( const void * blob, int len ); 31 | 32 | void invert ( std::vector & v ); 33 | 34 | //---------- 35 | 36 | template< typename T > 37 | inline uint32_t getbit ( T & blob, uint32_t bit ) 38 | { 39 | return getbit(&blob,sizeof(blob),bit); 40 | } 41 | 42 | template<> inline uint32_t getbit ( uint32_t & blob, uint32_t bit ) { return (blob >> (bit & 31)) & 1; } 43 | template<> inline uint32_t getbit ( uint64_t & blob, uint32_t bit ) { return (blob >> (bit & 63)) & 1; } 44 | 45 | //---------- 46 | 47 | template< typename T > 48 | inline void setbit ( T & blob, uint32_t bit ) 49 | { 50 | return setbit(&blob,sizeof(blob),bit); 51 | } 52 | 53 | template<> inline void setbit ( uint32_t & blob, uint32_t bit ) { blob |= uint32_t(1) << (bit & 31); } 54 | template<> inline void setbit ( uint64_t & blob, uint32_t bit ) { blob |= uint64_t(1) << (bit & 63); } 55 | 56 | //---------- 57 | 58 | template< typename T > 59 | inline void flipbit ( T & blob, uint32_t bit ) 60 | { 61 | flipbit(&blob,sizeof(blob),bit); 62 | } 63 | 64 | template<> inline void flipbit ( uint32_t & blob, uint32_t bit ) { bit &= 31; blob ^= (uint32_t(1) << bit); } 65 | template<> inline void flipbit ( uint64_t & blob, uint32_t bit ) { bit &= 63; blob ^= (uint64_t(1) << bit); } 66 | 67 | //----------------------------------------------------------------------------- 68 | // Left and right shift of blobs. The shift(N) versions work on chunks of N 69 | // bits at a time (faster) 70 | 71 | void lshift1 ( void * blob, int len, int c ); 72 | void lshift8 ( void * blob, int len, int c ); 73 | void lshift32 ( void * blob, int len, int c ); 74 | 75 | void rshift1 ( void * blob, int len, int c ); 76 | void rshift8 ( void * blob, int len, int c ); 77 | void rshift32 ( void * blob, int len, int c ); 78 | 79 | inline void lshift ( void * blob, int len, int c ) 80 | { 81 | if((len & 3) == 0) 82 | { 83 | lshift32(blob,len,c); 84 | } 85 | else 86 | { 87 | lshift8(blob,len,c); 88 | } 89 | } 90 | 91 | inline void rshift ( void * blob, int len, int c ) 92 | { 93 | if((len & 3) == 0) 94 | { 95 | rshift32(blob,len,c); 96 | } 97 | else 98 | { 99 | rshift8(blob,len,c); 100 | } 101 | } 102 | 103 | template < typename T > 104 | inline void lshift ( T & blob, int c ) 105 | { 106 | if((sizeof(T) & 3) == 0) 107 | { 108 | lshift32(&blob,sizeof(T),c); 109 | } 110 | else 111 | { 112 | lshift8(&blob,sizeof(T),c); 113 | } 114 | } 115 | 116 | template < typename T > 117 | inline void rshift ( T & blob, int c ) 118 | { 119 | if((sizeof(T) & 3) == 0) 120 | { 121 | lshift32(&blob,sizeof(T),c); 122 | } 123 | else 124 | { 125 | lshift8(&blob,sizeof(T),c); 126 | } 127 | } 128 | 129 | template<> inline void lshift ( uint32_t & blob, int c ) { blob <<= c; } 130 | template<> inline void lshift ( uint64_t & blob, int c ) { blob <<= c; } 131 | template<> inline void rshift ( uint32_t & blob, int c ) { blob >>= c; } 132 | template<> inline void rshift ( uint64_t & blob, int c ) { blob >>= c; } 133 | 134 | //----------------------------------------------------------------------------- 135 | // Left and right rotate of blobs. The rot(N) versions work on chunks of N 136 | // bits at a time (faster) 137 | 138 | void lrot1 ( void * blob, int len, int c ); 139 | void lrot8 ( void * blob, int len, int c ); 140 | void lrot32 ( void * blob, int len, int c ); 141 | 142 | void rrot1 ( void * blob, int len, int c ); 143 | void rrot8 ( void * blob, int len, int c ); 144 | void rrot32 ( void * blob, int len, int c ); 145 | 146 | inline void lrot ( void * blob, int len, int c ) 147 | { 148 | if((len & 3) == 0) 149 | { 150 | return lrot32(blob,len,c); 151 | } 152 | else 153 | { 154 | return lrot8(blob,len,c); 155 | } 156 | } 157 | 158 | inline void rrot ( void * blob, int len, int c ) 159 | { 160 | if((len & 3) == 0) 161 | { 162 | return rrot32(blob,len,c); 163 | } 164 | else 165 | { 166 | return rrot8(blob,len,c); 167 | } 168 | } 169 | 170 | template < typename T > 171 | inline void lrot ( T & blob, int c ) 172 | { 173 | if((sizeof(T) & 3) == 0) 174 | { 175 | return lrot32(&blob,sizeof(T),c); 176 | } 177 | else 178 | { 179 | return lrot8(&blob,sizeof(T),c); 180 | } 181 | } 182 | 183 | template < typename T > 184 | inline void rrot ( T & blob, int c ) 185 | { 186 | if((sizeof(T) & 3) == 0) 187 | { 188 | return rrot32(&blob,sizeof(T),c); 189 | } 190 | else 191 | { 192 | return rrot8(&blob,sizeof(T),c); 193 | } 194 | } 195 | 196 | template<> inline void lrot ( uint32_t & blob, int c ) { blob = ROTL32(blob,c); } 197 | template<> inline void lrot ( uint64_t & blob, int c ) { blob = ROTL64(blob,c); } 198 | template<> inline void rrot ( uint32_t & blob, int c ) { blob = ROTR32(blob,c); } 199 | template<> inline void rrot ( uint64_t & blob, int c ) { blob = ROTR64(blob,c); } 200 | 201 | //----------------------------------------------------------------------------- 202 | // Bit-windowing functions - select some N-bit subset of the input blob 203 | 204 | uint32_t window1 ( void * blob, int len, int start, int count ); 205 | uint32_t window8 ( void * blob, int len, int start, int count ); 206 | uint32_t window32 ( void * blob, int len, int start, int count ); 207 | 208 | inline uint32_t window ( void * blob, int len, int start, int count ) 209 | { 210 | if(len & 3) 211 | { 212 | return window8(blob,len,start,count); 213 | } 214 | else 215 | { 216 | return window32(blob,len,start,count); 217 | } 218 | } 219 | 220 | template < typename T > 221 | inline uint32_t window ( T & blob, int start, int count ) 222 | { 223 | if((sizeof(T) & 3) == 0) 224 | { 225 | return window32(&blob,sizeof(T),start,count); 226 | } 227 | else 228 | { 229 | return window8(&blob,sizeof(T),start,count); 230 | } 231 | } 232 | 233 | template<> 234 | inline uint32_t window ( uint32_t & blob, int start, int count ) 235 | { 236 | return ROTR32(blob,start) & ((1< 240 | inline uint32_t window ( uint64_t & blob, int start, int count ) 241 | { 242 | return (uint32_t)ROTR64(blob,start) & ((1< 13 | #include 14 | #include 15 | 16 | //----------------------------------------------------------------------------- 17 | // Sort through the differentials, ignoring collisions that only occured once 18 | // (these could be false positives). If we find collisions of 3 or more, the 19 | // differential test fails. 20 | 21 | template < class keytype > 22 | bool ProcessDifferentials ( std::vector & diffs, int reps, bool dumpCollisions ) 23 | { 24 | std::sort(diffs.begin(), diffs.end()); 25 | 26 | int count = 1; 27 | int ignore = 0; 28 | 29 | bool result = true; 30 | 31 | if(diffs.size()) 32 | { 33 | keytype kp = diffs[0]; 34 | 35 | for(int i = 1; i < (int)diffs.size(); i++) 36 | { 37 | if(diffs[i] == kp) 38 | { 39 | count++; 40 | continue; 41 | } 42 | else 43 | { 44 | if(count > 1) 45 | { 46 | result = false; 47 | 48 | double pct = 100 * (double(count) / double(reps)); 49 | 50 | if(dumpCollisions) 51 | { 52 | printbits((unsigned char*)&kp,sizeof(kp)); 53 | printf(" - %4.2f%%\n", pct ); 54 | } 55 | } 56 | else 57 | { 58 | ignore++; 59 | } 60 | 61 | kp = diffs[i]; 62 | count = 1; 63 | } 64 | } 65 | 66 | if(count > 1) 67 | { 68 | double pct = 100 * (double(count) / double(reps)); 69 | 70 | if(dumpCollisions) 71 | { 72 | printbits((unsigned char*)&kp,sizeof(kp)); 73 | printf(" - %4.2f%%\n", pct ); 74 | } 75 | } 76 | else 77 | { 78 | ignore++; 79 | } 80 | } 81 | 82 | printf("%d total collisions, of which %d single collisions were ignored",(int)diffs.size(),ignore); 83 | 84 | if(result == false) 85 | { 86 | printf(" !!!!! "); 87 | } 88 | 89 | printf("\n"); 90 | printf("\n"); 91 | 92 | return result; 93 | } 94 | 95 | //----------------------------------------------------------------------------- 96 | // Check all possible keybits-choose-N differentials for collisions, report 97 | // ones that occur significantly more often than expected. 98 | 99 | // Random collisions can happen with probability 1 in 2^32 - if we do more than 100 | // 2^32 tests, we'll probably see some spurious random collisions, so don't report 101 | // them. 102 | 103 | template < typename keytype, typename hashtype > 104 | void DiffTestRecurse ( pfHash hash, keytype & k1, keytype & k2, hashtype & h1, hashtype & h2, int start, int bitsleft, std::vector & diffs ) 105 | { 106 | const int bits = sizeof(keytype)*8; 107 | 108 | for(int i = start; i < bits; i++) 109 | { 110 | flipbit(&k2,sizeof(k2),i); 111 | bitsleft--; 112 | 113 | hash(&k2,sizeof(k2),0,&h2); 114 | 115 | if(h1 == h2) 116 | { 117 | diffs.push_back(k1 ^ k2); 118 | } 119 | 120 | if(bitsleft) 121 | { 122 | DiffTestRecurse(hash,k1,k2,h1,h2,i+1,bitsleft,diffs); 123 | } 124 | 125 | flipbit(&k2,sizeof(k2),i); 126 | bitsleft++; 127 | } 128 | } 129 | 130 | //---------- 131 | 132 | template < typename keytype, typename hashtype > 133 | bool DiffTest ( pfHash hash, int diffbits, int reps, bool dumpCollisions ) 134 | { 135 | const int keybits = sizeof(keytype) * 8; 136 | const int hashbits = sizeof(hashtype) * 8; 137 | 138 | double diffcount = chooseUpToK(keybits,diffbits); 139 | double testcount = (diffcount * double(reps)); 140 | double expected = testcount / pow(2.0,double(hashbits)); 141 | 142 | Rand r(100); 143 | 144 | std::vector diffs; 145 | 146 | keytype k1,k2; 147 | hashtype h1,h2; 148 | 149 | printf("Testing %0.f up-to-%d-bit differentials in %d-bit keys -> %d bit hashes.\n",diffcount,diffbits,keybits,hashbits); 150 | printf("%d reps, %0.f total tests, expecting %2.2f random collisions",reps,testcount,expected); 151 | 152 | for(int i = 0; i < reps; i++) 153 | { 154 | if(i % (reps/10) == 0) printf("."); 155 | 156 | r.rand_p(&k1,sizeof(keytype)); 157 | k2 = k1; 158 | 159 | hash(&k1,sizeof(k1),0,(uint32_t*)&h1); 160 | 161 | DiffTestRecurse(hash,k1,k2,h1,h2,0,diffbits,diffs); 162 | } 163 | printf("\n"); 164 | 165 | bool result = true; 166 | 167 | result &= ProcessDifferentials(diffs,reps,dumpCollisions); 168 | 169 | return result; 170 | } 171 | 172 | //----------------------------------------------------------------------------- 173 | // Differential distribution test - for each N-bit input differential, generate 174 | // a large set of differential key pairs, hash them, and test the output 175 | // differentials using our distribution test code. 176 | 177 | // This is a very hard test to pass - even if the hash values are well-distributed, 178 | // the differences between hash values may not be. It's also not entirely relevant 179 | // for testing hash functions, but it's still interesting. 180 | 181 | // This test is a _lot_ of work, as it's essentially a full keyset test for 182 | // each of a potentially huge number of input differentials. To speed things 183 | // along, we do only a few distribution tests per keyset instead of the full 184 | // grid. 185 | 186 | // #TODO - put diagram drawing back on 187 | 188 | template < typename keytype, typename hashtype > 189 | void DiffDistTest ( pfHash hash, const int diffbits, int trials, double & worst, double & avg ) 190 | { 191 | std::vector keys(trials); 192 | std::vector A(trials),B(trials); 193 | 194 | for(int i = 0; i < trials; i++) 195 | { 196 | rand_p(&keys[i],sizeof(keytype)); 197 | 198 | hash(&keys[i],sizeof(keytype),0,(uint32_t*)&A[i]); 199 | } 200 | 201 | //---------- 202 | 203 | std::vector diffs; 204 | 205 | keytype temp(0); 206 | 207 | SparseKeygenRecurse(0,diffbits,true,temp,diffs); 208 | 209 | //---------- 210 | 211 | worst = 0; 212 | avg = 0; 213 | 214 | hashtype h2; 215 | 216 | for(size_t j = 0; j < diffs.size(); j++) 217 | { 218 | keytype & d = diffs[j]; 219 | 220 | for(int i = 0; i < trials; i++) 221 | { 222 | keytype k2 = keys[i] ^ d; 223 | 224 | hash(&k2,sizeof(k2),0,&h2); 225 | 226 | B[i] = A[i] ^ h2; 227 | } 228 | 229 | double dworst,davg; 230 | 231 | TestDistributionFast(B,dworst,davg); 232 | 233 | avg += davg; 234 | worst = (dworst > worst) ? dworst : worst; 235 | } 236 | 237 | avg /= double(diffs.size()); 238 | } 239 | 240 | //----------------------------------------------------------------------------- 241 | // Simpler differential-distribution test - for all 1-bit differentials, 242 | // generate random key pairs and run full distribution/collision tests on the 243 | // hash differentials 244 | 245 | template < typename keytype, typename hashtype > 246 | bool DiffDistTest2 ( pfHash hash ) 247 | { 248 | Rand r(857374); 249 | 250 | int keybits = sizeof(keytype) * 8; 251 | const int keycount = 256*256*32; 252 | keytype k; 253 | 254 | std::vector hashes(keycount); 255 | hashtype h1,h2; 256 | 257 | bool result = true; 258 | 259 | for(int keybit = 0; keybit < keybits; keybit++) 260 | { 261 | printf("Testing bit %d\n",keybit); 262 | 263 | for(int i = 0; i < keycount; i++) 264 | { 265 | r.rand_p(&k,sizeof(keytype)); 266 | 267 | hash(&k,sizeof(keytype),0,&h1); 268 | flipbit(&k,sizeof(keytype),keybit); 269 | hash(&k,sizeof(keytype),0,&h2); 270 | 271 | hashes[i] = h1 ^ h2; 272 | } 273 | 274 | result &= TestHashList(hashes,true,true,true); 275 | printf("\n"); 276 | } 277 | 278 | return result; 279 | } 280 | 281 | //---------------------------------------------------------------------------- 282 | -------------------------------------------------------------------------------- /SMHasher/FarshTest.cpp: -------------------------------------------------------------------------------- 1 | #include "../farsh.c" 2 | 3 | void farsh32_test ( const void * key, int len, unsigned seed, void * out ) 4 | { 5 | farsh_n(key,len,0,1,seed,out); 6 | } 7 | 8 | void farsh64_test ( const void * key, int len, unsigned seed, void * out ) 9 | { 10 | farsh_n(key,len,0,2,seed,out); 11 | } 12 | 13 | void farsh128_test ( const void * key, int len, unsigned seed, void * out ) 14 | { 15 | farsh_n(key,len,0,4,seed,out); 16 | } 17 | 18 | void farsh256_test ( const void * key, int len, unsigned seed, void * out ) 19 | { 20 | farsh_n(key,len,0,8,seed,out); 21 | } 22 | -------------------------------------------------------------------------------- /SMHasher/Hashes.cpp: -------------------------------------------------------------------------------- 1 | #include "Hashes.h" 2 | 3 | #include "Random.h" 4 | 5 | 6 | //---------------------------------------------------------------------------- 7 | // fake / bad hashes 8 | 9 | void DoNothingHash ( const void *, int, uint32_t, void * ) 10 | { 11 | } 12 | 13 | //----------------------------------------------------------------------------- 14 | // One-byte-at-a-time hash based on Murmur's mix 15 | 16 | uint32_t MurmurOAAT ( const void * key, int len, uint32_t seed ) 17 | { 18 | const uint8_t * data = (const uint8_t*)key; 19 | 20 | uint32_t h = seed; 21 | 22 | for(int i = 0; i < len; i++) 23 | { 24 | h ^= data[i]; 25 | h *= 0x5bd1e995; 26 | h ^= h >> 15; 27 | } 28 | 29 | return h; 30 | } 31 | 32 | 33 | //----------------------------------------------------------------------------- 34 | // 32-bit parts of MurmurHash3_x86_128 35 | 36 | void Murmur3c_32 ( const void * key, const int len, uint32_t seed, void * out ) 37 | { 38 | uint32_t full_out[4]; 39 | MurmurHash3_x86_128 ( key, len, seed, full_out ); 40 | *(uint32_t*)out = full_out[0]; 41 | } 42 | 43 | void Murmur3c_32a ( const void * key, const int len, uint32_t seed, void * out ) 44 | { 45 | uint32_t full_out[4]; 46 | MurmurHash3_x86_128 ( key, len, seed, full_out ); 47 | *(uint32_t*)out = full_out[1]; 48 | } 49 | 50 | void Murmur3c_32b ( const void * key, const int len, uint32_t seed, void * out ) 51 | { 52 | uint32_t full_out[4]; 53 | MurmurHash3_x86_128 ( key, len, seed, full_out ); 54 | *(uint32_t*)out = full_out[2]; 55 | } 56 | 57 | void Murmur3c_32c ( const void * key, const int len, uint32_t seed, void * out ) 58 | { 59 | uint32_t full_out[4]; 60 | MurmurHash3_x86_128 ( key, len, seed, full_out ); 61 | *(uint32_t*)out = full_out[3]; 62 | } 63 | -------------------------------------------------------------------------------- /SMHasher/Hashes.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "Types.h" 4 | #include "MurmurHash3.h" 5 | 6 | //---------- 7 | // These are _not_ hash functions (even though people tend to use crc32 as one...) 8 | 9 | void DoNothingHash ( const void * key, int len, uint32_t seed, void * out ); 10 | void crc32 ( const void * key, int len, uint32_t seed, void * out ); 11 | 12 | //---------- 13 | // 32/64/128-bit parts of SHA1 14 | 15 | void sha1_32 ( const void * key, int len, uint32_t seed, void * out ); 16 | void sha1_32a ( const void * key, int len, uint32_t seed, void * out ); 17 | void sha1_32b ( const void * key, int len, uint32_t seed, void * out ); 18 | void sha1_32c ( const void * key, int len, uint32_t seed, void * out ); 19 | void sha1_64 ( const void * key, int len, uint32_t seed, void * out ); 20 | void sha1_64a ( const void * key, int len, uint32_t seed, void * out ); 21 | void sha1_128 ( const void * key, int len, uint32_t seed, void * out ); 22 | 23 | //---------- 24 | // General purpose hashes 25 | 26 | uint32_t MurmurOAAT ( const void * key, int len, uint32_t seed ); 27 | 28 | void farsh32_test ( const void * key, int len, unsigned seed, void * out ); 29 | void farsh64_test ( const void * key, int len, unsigned seed, void * out ); 30 | void farsh128_test ( const void * key, int len, unsigned seed, void * out ); 31 | void farsh256_test ( const void * key, int len, unsigned seed, void * out ); 32 | 33 | void uhash32_test ( const void * key, int len, unsigned seed, void * out ); 34 | 35 | void vhash64_test ( const void * key, int len, unsigned seed, void * out ); 36 | void vhash128_test ( const void * key, int len, unsigned seed, void * out ); 37 | 38 | void poly1305_test ( const void * key, int len, unsigned seed, void * out ); 39 | 40 | //----------------------------------------------------------------------------- 41 | // SpookyHashV2 and its 32-bit parts 42 | 43 | void SpookyHash32_test ( const void * key, int len, unsigned seed, void * out ); 44 | void SpookyHash32a_test( const void * key, int len, unsigned seed, void * out ); 45 | void SpookyHash32b_test( const void * key, int len, unsigned seed, void * out ); 46 | void SpookyHash32c_test( const void * key, int len, unsigned seed, void * out ); 47 | void SpookyHash64_test ( const void * key, int len, unsigned seed, void * out ); 48 | void SpookyHash128_test( const void * key, int len, unsigned seed, void * out ); 49 | 50 | //----------------------------------------------------------------------------- 51 | // 32-bit parts of MurmurHash3_x86_128 52 | 53 | void Murmur3c_32 ( const void * key, const int len, uint32_t seed, void * out ); 54 | void Murmur3c_32a( const void * key, const int len, uint32_t seed, void * out ); 55 | void Murmur3c_32b( const void * key, const int len, uint32_t seed, void * out ); 56 | void Murmur3c_32c( const void * key, const int len, uint32_t seed, void * out ); 57 | 58 | //----------------------------------------------------------------------------- 59 | // xxHash 60 | 61 | void XXH32_test ( const void * key, int len, unsigned seed, void * out ); 62 | void XXH64_test ( const void * key, int len, unsigned seed, void * out ); 63 | 64 | //----------------------------------------------------------------------------- 65 | // XXH32 with XXH64 backend 66 | 67 | void ModXXH32_test ( const void * key, int len, unsigned seed, void * out ); 68 | void ModXXH32a_test ( const void * key, int len, unsigned seed, void * out ); 69 | void ModXXH64_test ( const void * key, int len, unsigned seed, void * out ); 70 | 71 | //----------------------------------------------------------------------------- 72 | // ZZH & SlowZZH: my experimental x86-optimized hashes 73 | 74 | void ZZH32_test ( const void * key, int len, unsigned seed, void * out ); 75 | void ZZH32a_test ( const void * key, int len, unsigned seed, void * out ); 76 | void ZZH32b_test ( const void * key, int len, unsigned seed, void * out ); 77 | void ZZH32c_test ( const void * key, int len, unsigned seed, void * out ); 78 | void ZZH64_test ( const void * key, int len, unsigned seed, void * out ); 79 | void ZZH64a_test ( const void * key, int len, unsigned seed, void * out ); 80 | void ZZH128_test ( const void * key, int len, unsigned seed, void * out ); 81 | 82 | void SlowZZH32_test ( const void * key, int len, unsigned seed, void * out ); 83 | void SlowZZH32a_test ( const void * key, int len, unsigned seed, void * out ); 84 | void SlowZZH32b_test ( const void * key, int len, unsigned seed, void * out ); 85 | void SlowZZH32c_test ( const void * key, int len, unsigned seed, void * out ); 86 | void SlowZZH64_test ( const void * key, int len, unsigned seed, void * out ); 87 | void SlowZZH64a_test ( const void * key, int len, unsigned seed, void * out ); 88 | void SlowZZH128_test ( const void * key, int len, unsigned seed, void * out ); 89 | 90 | //----------------------------------------------------------------------------- 91 | // WideZZH & SlowWideZZH: my experimental x64-optimized hashes 92 | 93 | void WideZZH32_test ( const void * key, int len, unsigned seed, void * out ); 94 | void WideZZH32a_test ( const void * key, int len, unsigned seed, void * out ); 95 | void WideZZH64_test ( const void * key, int len, unsigned seed, void * out ); 96 | 97 | void SlowWideZZH32_test ( const void * key, int len, unsigned seed, void * out ); 98 | void SlowWideZZH32a_test ( const void * key, int len, unsigned seed, void * out ); 99 | void SlowWideZZH64_test ( const void * key, int len, unsigned seed, void * out ); 100 | 101 | //----------------------------------------------------------------------------- 102 | // SimdZZH: my experimental SIMD-optimized hashe 103 | 104 | void SimdZZH32_test ( const void * key, int len, unsigned seed, void * out ); 105 | void SimdZZH32a_test ( const void * key, int len, unsigned seed, void * out ); 106 | void SimdZZH64_test ( const void * key, int len, unsigned seed, void * out ); 107 | -------------------------------------------------------------------------------- /SMHasher/KeysetTest.cpp: -------------------------------------------------------------------------------- 1 | #include "KeysetTest.h" 2 | 3 | #include "Platform.h" 4 | #include "Random.h" 5 | 6 | #include 7 | #include 8 | 9 | //----------------------------------------------------------------------------- 10 | // This should hopefully be a thorough and uambiguous test of whether a hash 11 | // is correctly implemented on a given platform 12 | 13 | bool VerificationTest ( pfHash hash, const int hashbits, uint32_t expected, bool verbose ) 14 | { 15 | const int hashbytes = hashbits / 8; 16 | 17 | uint8_t * key = new uint8_t[256]; 18 | uint8_t * hashes = new uint8_t[hashbytes * 256]; 19 | uint8_t * final = new uint8_t[hashbytes]; 20 | 21 | memset(key,0,256); 22 | memset(hashes,0,hashbytes*256); 23 | memset(final,0,hashbytes); 24 | 25 | // Hash keys of the form {0}, {0,1}, {0,1,2}... up to N=255,using 256-N as 26 | // the seed 27 | 28 | for(int i = 0; i < 256; i++) 29 | { 30 | key[i] = (uint8_t)i; 31 | 32 | hash(key,i,256-i,&hashes[i*hashbytes]); 33 | } 34 | 35 | // Then hash the result array 36 | 37 | hash(hashes,hashbytes*256,0,final); 38 | 39 | // The first four bytes of that hash, interpreted as a little-endian integer, is our 40 | // verification value 41 | 42 | uint32_t verification = (final[0] << 0) | (final[1] << 8) | (final[2] << 16) | (final[3] << 24); 43 | 44 | delete [] key; 45 | delete [] hashes; 46 | delete [] final; 47 | 48 | //---------- 49 | 50 | if(expected != verification) 51 | { 52 | if(verbose) printf("Verification value 0x%08X : Failed! (Expected 0x%08x)\n",verification,expected); 53 | return false; 54 | } 55 | else 56 | { 57 | if(verbose) printf("Verification value 0x%08X : Passed!\n",verification); 58 | return true; 59 | } 60 | } 61 | 62 | //---------------------------------------------------------------------------- 63 | // Basic sanity checks - 64 | 65 | // A hash function should not be reading outside the bounds of the key. 66 | 67 | // Flipping a bit of a key should, with overwhelmingly high probability, 68 | // result in a different hash. 69 | 70 | // Hashing the same key twice should always produce the same result. 71 | 72 | // The memory alignment of the key should not affect the hash result. 73 | 74 | bool SanityTest ( pfHash hash, const int hashbits ) 75 | { 76 | printf("Running sanity check 1"); 77 | 78 | Rand r(883741); 79 | 80 | bool result = true; 81 | 82 | const int hashbytes = hashbits/8; 83 | const int reps = 10; 84 | const int keymax = 256; 85 | const int pad = 16; 86 | const int buflen = keymax + pad*3; 87 | 88 | uint8_t * buffer1 = new uint8_t[buflen]; 89 | uint8_t * buffer2 = new uint8_t[buflen]; 90 | 91 | uint8_t * hash1 = new uint8_t[hashbytes]; 92 | uint8_t * hash2 = new uint8_t[hashbytes]; 93 | 94 | //---------- 95 | 96 | for(int irep = 0; irep < reps; irep++) 97 | { 98 | if(irep % (reps/10) == 0) printf("."); 99 | 100 | for(int len = 4; len <= keymax; len++) 101 | { 102 | for(int offset = pad; offset < pad*2; offset++) 103 | { 104 | uint8_t * key1 = &buffer1[pad]; 105 | uint8_t * key2 = &buffer2[pad+offset]; 106 | 107 | r.rand_p(buffer1,buflen); 108 | r.rand_p(buffer2,buflen); 109 | 110 | memcpy(key2,key1,len); 111 | 112 | hash(key1,len,0,hash1); 113 | 114 | for(int bit = 0; bit < (len * 8); bit++) 115 | { 116 | // Flip a bit, hash the key -> we should get a different result. 117 | 118 | flipbit(key2,len,bit); 119 | hash(key2,len,0,hash2); 120 | 121 | if(memcmp(hash1,hash2,hashbytes) == 0) 122 | { 123 | result = false; 124 | } 125 | 126 | // Flip it back, hash again -> we should get the original result. 127 | 128 | flipbit(key2,len,bit); 129 | hash(key2,len,0,hash2); 130 | 131 | if(memcmp(hash1,hash2,hashbytes) != 0) 132 | { 133 | result = false; 134 | } 135 | } 136 | } 137 | } 138 | } 139 | 140 | if(result == false) 141 | { 142 | printf("*********FAIL*********\n"); 143 | } 144 | else 145 | { 146 | printf("PASS\n"); 147 | } 148 | 149 | delete [] buffer1; 150 | delete [] buffer2; 151 | 152 | delete [] hash1; 153 | delete [] hash2; 154 | 155 | return result; 156 | } 157 | 158 | //---------------------------------------------------------------------------- 159 | // Appending zero bytes to a key should always cause it to produce a different 160 | // hash value 161 | 162 | void AppendedZeroesTest ( pfHash hash, const int hashbits ) 163 | { 164 | printf("Running sanity check 2"); 165 | 166 | Rand r(173994); 167 | 168 | const int hashbytes = hashbits/8; 169 | 170 | for(int rep = 0; rep < 100; rep++) 171 | { 172 | if(rep % 10 == 0) printf("."); 173 | 174 | unsigned char key[256]; 175 | 176 | memset(key,0,sizeof(key)); 177 | 178 | r.rand_p(key,32); 179 | 180 | uint32_t h1[16]; 181 | uint32_t h2[16]; 182 | 183 | memset(h1,0,hashbytes); 184 | memset(h2,0,hashbytes); 185 | 186 | for(int i = 0; i < 32; i++) 187 | { 188 | hash(key,32+i,0,h1); 189 | 190 | if(memcmp(h1,h2,hashbytes) == 0) 191 | { 192 | printf("\n*********FAIL*********\n"); 193 | return; 194 | } 195 | 196 | memcpy(h2,h1,hashbytes); 197 | } 198 | } 199 | 200 | printf("PASS\n"); 201 | } 202 | 203 | //----------------------------------------------------------------------------- 204 | // Generate all keys of up to N bytes containing two non-zero bytes 205 | 206 | void TwoBytesKeygen ( int maxlen, KeyCallback & c ) 207 | { 208 | //---------- 209 | // Compute # of keys 210 | 211 | int keycount = 0; 212 | 213 | for(int i = 2; i <= maxlen; i++) keycount += (int)chooseK(i,2); 214 | 215 | keycount *= 255*255; 216 | 217 | for(int i = 2; i <= maxlen; i++) keycount += i*255; 218 | 219 | printf("Keyset 'TwoBytes' - up-to-%d-byte keys, %d total keys\n",maxlen, keycount); 220 | 221 | c.reserve(keycount); 222 | 223 | //---------- 224 | // Add all keys with one non-zero byte 225 | 226 | uint8_t key[256]; 227 | 228 | memset(key,0,256); 229 | 230 | for(int keylen = 2; keylen <= maxlen; keylen++) 231 | for(int byteA = 0; byteA < keylen; byteA++) 232 | { 233 | for(int valA = 1; valA <= 255; valA++) 234 | { 235 | key[byteA] = (uint8_t)valA; 236 | 237 | c(key,keylen); 238 | } 239 | 240 | key[byteA] = 0; 241 | } 242 | 243 | //---------- 244 | // Add all keys with two non-zero bytes 245 | 246 | for(int keylen = 2; keylen <= maxlen; keylen++) 247 | for(int byteA = 0; byteA < keylen-1; byteA++) 248 | for(int byteB = byteA+1; byteB < keylen; byteB++) 249 | { 250 | for(int valA = 1; valA <= 255; valA++) 251 | { 252 | key[byteA] = (uint8_t)valA; 253 | 254 | for(int valB = 1; valB <= 255; valB++) 255 | { 256 | key[byteB] = (uint8_t)valB; 257 | c(key,keylen); 258 | } 259 | 260 | key[byteB] = 0; 261 | } 262 | 263 | key[byteA] = 0; 264 | } 265 | } 266 | 267 | //----------------------------------------------------------------------------- 268 | 269 | template< typename hashtype > 270 | void DumpCollisionMap ( CollisionMap & cmap ) 271 | { 272 | typedef CollisionMap cmap_t; 273 | 274 | for(typename cmap_t::iterator it = cmap.begin(); it != cmap.end(); ++it) 275 | { 276 | const hashtype & hash = (*it).first; 277 | 278 | printf("Hash - "); 279 | printbytes(&hash,sizeof(hashtype)); 280 | printf("\n"); 281 | 282 | std::vector & keys = (*it).second; 283 | 284 | for(int i = 0; i < (int)keys.size(); i++) 285 | { 286 | ByteVec & key = keys[i]; 287 | 288 | printf("Key - "); 289 | printbytes(&key[0],(int)key.size()); 290 | printf("\n"); 291 | } 292 | printf("\n"); 293 | } 294 | 295 | } 296 | 297 | // test code 298 | 299 | void ReportCollisions ( pfHash hash ) 300 | { 301 | printf("Hashing keyset\n"); 302 | 303 | std::vector hashes; 304 | 305 | HashCallback c(hash,hashes); 306 | 307 | TwoBytesKeygen(20,c); 308 | 309 | printf("%d hashes\n",(int)hashes.size()); 310 | 311 | printf("Finding collisions\n"); 312 | 313 | HashSet collisions; 314 | 315 | FindCollisions(hashes,collisions,1000); 316 | 317 | printf("%d collisions\n",(int)collisions.size()); 318 | 319 | printf("Mapping collisions\n"); 320 | 321 | CollisionMap cmap; 322 | 323 | CollisionCallback c2(hash,collisions,cmap); 324 | 325 | TwoBytesKeygen(20,c2); 326 | 327 | printf("Dumping collisions\n"); 328 | 329 | DumpCollisionMap(cmap); 330 | } 331 | -------------------------------------------------------------------------------- /SMHasher/MurmurHash/MurmurHash3.cpp: -------------------------------------------------------------------------------- 1 | //----------------------------------------------------------------------------- 2 | // MurmurHash3 was written by Austin Appleby, and is placed in the public 3 | // domain. The author hereby disclaims copyright to this source code. 4 | 5 | // Note - The x86 and x64 versions do _not_ produce the same results, as the 6 | // algorithms are optimized for their respective platforms. You can still 7 | // compile and run any of them on any platform, but your performance with the 8 | // non-native version will be less than optimal. 9 | 10 | #include "MurmurHash3.h" 11 | 12 | //----------------------------------------------------------------------------- 13 | // Platform-specific functions and macros 14 | 15 | // Microsoft Visual Studio 16 | 17 | #if defined(_MSC_VER) 18 | 19 | #define FORCE_INLINE __forceinline 20 | 21 | #include 22 | 23 | #define ROTL32(x,y) _rotl(x,y) 24 | #define ROTL64(x,y) _rotl64(x,y) 25 | 26 | #define BIG_CONSTANT(x) (x) 27 | 28 | // Other compilers 29 | 30 | #else // defined(_MSC_VER) 31 | 32 | #define FORCE_INLINE inline __attribute__((always_inline)) 33 | 34 | inline uint32_t rotl32 ( uint32_t x, int8_t r ) 35 | { 36 | return (x << r) | (x >> (32 - r)); 37 | } 38 | 39 | inline uint64_t rotl64 ( uint64_t x, int8_t r ) 40 | { 41 | return (x << r) | (x >> (64 - r)); 42 | } 43 | 44 | #define ROTL32(x,y) rotl32(x,y) 45 | #define ROTL64(x,y) rotl64(x,y) 46 | 47 | #define BIG_CONSTANT(x) (x##LLU) 48 | 49 | #endif // !defined(_MSC_VER) 50 | 51 | //----------------------------------------------------------------------------- 52 | // Block read - if your platform needs to do endian-swapping or can only 53 | // handle aligned reads, do the conversion here 54 | 55 | FORCE_INLINE uint32_t getblock32 ( const uint32_t * p, int i ) 56 | { 57 | return p[i]; 58 | } 59 | 60 | FORCE_INLINE uint64_t getblock64 ( const uint64_t * p, int i ) 61 | { 62 | return p[i]; 63 | } 64 | 65 | //----------------------------------------------------------------------------- 66 | // Finalization mix - force all bits of a hash block to avalanche 67 | 68 | FORCE_INLINE uint32_t fmix32 ( uint32_t h ) 69 | { 70 | h ^= h >> 16; 71 | h *= 0x85ebca6b; 72 | h ^= h >> 13; 73 | h *= 0xc2b2ae35; 74 | h ^= h >> 16; 75 | 76 | return h; 77 | } 78 | 79 | //---------- 80 | 81 | FORCE_INLINE uint64_t fmix64 ( uint64_t k ) 82 | { 83 | k ^= k >> 33; 84 | k *= BIG_CONSTANT(0xff51afd7ed558ccd); 85 | k ^= k >> 33; 86 | k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53); 87 | k ^= k >> 33; 88 | 89 | return k; 90 | } 91 | 92 | //----------------------------------------------------------------------------- 93 | 94 | void MurmurHash3_x86_32 ( const void * key, int len, 95 | uint32_t seed, void * out ) 96 | { 97 | const uint8_t * data = (const uint8_t*)key; 98 | const int nblocks = len / 4; 99 | 100 | uint32_t h1 = seed; 101 | 102 | const uint32_t c1 = 0xcc9e2d51; 103 | const uint32_t c2 = 0x1b873593; 104 | 105 | //---------- 106 | // body 107 | 108 | const uint32_t * blocks = (const uint32_t *)(data + nblocks*4); 109 | 110 | for(int i = -nblocks; i; i++) 111 | { 112 | uint32_t k1 = getblock32(blocks,i); 113 | 114 | k1 *= c1; 115 | k1 = ROTL32(k1,15); 116 | k1 *= c2; 117 | 118 | h1 ^= k1; 119 | h1 = ROTL32(h1,13); 120 | h1 = h1*5+0xe6546b64; 121 | } 122 | 123 | //---------- 124 | // tail 125 | 126 | const uint8_t * tail = (const uint8_t*)(data + nblocks*4); 127 | 128 | uint32_t k1 = 0; 129 | 130 | switch(len & 3) 131 | { 132 | case 3: k1 ^= tail[2] << 16; 133 | case 2: k1 ^= tail[1] << 8; 134 | case 1: k1 ^= tail[0]; 135 | k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1; 136 | }; 137 | 138 | //---------- 139 | // finalization 140 | 141 | h1 ^= len; 142 | 143 | h1 = fmix32(h1); 144 | 145 | *(uint32_t*)out = h1; 146 | } 147 | 148 | //----------------------------------------------------------------------------- 149 | 150 | void MurmurHash3_x86_128 ( const void * key, const int len, 151 | uint32_t seed, void * out ) 152 | { 153 | const uint8_t * data = (const uint8_t*)key; 154 | const int nblocks = len / 16; 155 | 156 | uint32_t h1 = seed; 157 | uint32_t h2 = seed; 158 | uint32_t h3 = seed; 159 | uint32_t h4 = seed; 160 | 161 | const uint32_t c1 = 0x239b961b; 162 | const uint32_t c2 = 0xab0e9789; 163 | const uint32_t c3 = 0x38b34ae5; 164 | const uint32_t c4 = 0xa1e38b93; 165 | 166 | //---------- 167 | // body 168 | 169 | const uint32_t * blocks = (const uint32_t *)(data + nblocks*16); 170 | 171 | for(int i = -nblocks; i; i++) 172 | { 173 | uint32_t k1 = getblock32(blocks,i*4+0); 174 | uint32_t k2 = getblock32(blocks,i*4+1); 175 | uint32_t k3 = getblock32(blocks,i*4+2); 176 | uint32_t k4 = getblock32(blocks,i*4+3); 177 | 178 | k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1; 179 | 180 | h1 = ROTL32(h1,19); h1 += h2; h1 = h1*5+0x561ccd1b; 181 | 182 | k2 *= c2; k2 = ROTL32(k2,16); k2 *= c3; h2 ^= k2; 183 | 184 | h2 = ROTL32(h2,17); h2 += h3; h2 = h2*5+0x0bcaa747; 185 | 186 | k3 *= c3; k3 = ROTL32(k3,17); k3 *= c4; h3 ^= k3; 187 | 188 | h3 = ROTL32(h3,15); h3 += h4; h3 = h3*5+0x96cd1c35; 189 | 190 | k4 *= c4; k4 = ROTL32(k4,18); k4 *= c1; h4 ^= k4; 191 | 192 | h4 = ROTL32(h4,13); h4 += h1; h4 = h4*5+0x32ac3b17; 193 | } 194 | 195 | //---------- 196 | // tail 197 | 198 | const uint8_t * tail = (const uint8_t*)(data + nblocks*16); 199 | 200 | uint32_t k1 = 0; 201 | uint32_t k2 = 0; 202 | uint32_t k3 = 0; 203 | uint32_t k4 = 0; 204 | 205 | switch(len & 15) 206 | { 207 | case 15: k4 ^= tail[14] << 16; 208 | case 14: k4 ^= tail[13] << 8; 209 | case 13: k4 ^= tail[12] << 0; 210 | k4 *= c4; k4 = ROTL32(k4,18); k4 *= c1; h4 ^= k4; 211 | 212 | case 12: k3 ^= tail[11] << 24; 213 | case 11: k3 ^= tail[10] << 16; 214 | case 10: k3 ^= tail[ 9] << 8; 215 | case 9: k3 ^= tail[ 8] << 0; 216 | k3 *= c3; k3 = ROTL32(k3,17); k3 *= c4; h3 ^= k3; 217 | 218 | case 8: k2 ^= tail[ 7] << 24; 219 | case 7: k2 ^= tail[ 6] << 16; 220 | case 6: k2 ^= tail[ 5] << 8; 221 | case 5: k2 ^= tail[ 4] << 0; 222 | k2 *= c2; k2 = ROTL32(k2,16); k2 *= c3; h2 ^= k2; 223 | 224 | case 4: k1 ^= tail[ 3] << 24; 225 | case 3: k1 ^= tail[ 2] << 16; 226 | case 2: k1 ^= tail[ 1] << 8; 227 | case 1: k1 ^= tail[ 0] << 0; 228 | k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1; 229 | }; 230 | 231 | //---------- 232 | // finalization 233 | 234 | h1 ^= len; h2 ^= len; h3 ^= len; h4 ^= len; 235 | 236 | h1 += h2; h1 += h3; h1 += h4; 237 | h2 += h1; h3 += h1; h4 += h1; 238 | 239 | h1 = fmix32(h1); 240 | h2 = fmix32(h2); 241 | h3 = fmix32(h3); 242 | h4 = fmix32(h4); 243 | 244 | h1 += h2; h1 += h3; h1 += h4; 245 | h2 += h1; h3 += h1; h4 += h1; 246 | 247 | ((uint32_t*)out)[0] = h1; 248 | ((uint32_t*)out)[1] = h2; 249 | ((uint32_t*)out)[2] = h3; 250 | ((uint32_t*)out)[3] = h4; 251 | } 252 | 253 | //----------------------------------------------------------------------------- 254 | 255 | void MurmurHash3_x64_128 ( const void * key, const int len, 256 | const uint32_t seed, void * out ) 257 | { 258 | const uint8_t * data = (const uint8_t*)key; 259 | const int nblocks = len / 16; 260 | 261 | uint64_t h1 = seed; 262 | uint64_t h2 = seed; 263 | 264 | const uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5); 265 | const uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f); 266 | 267 | //---------- 268 | // body 269 | 270 | const uint64_t * blocks = (const uint64_t *)(data); 271 | 272 | for(int i = 0; i < nblocks; i++) 273 | { 274 | uint64_t k1 = getblock64(blocks,i*2+0); 275 | uint64_t k2 = getblock64(blocks,i*2+1); 276 | 277 | k1 *= c1; k1 = ROTL64(k1,31); k1 *= c2; h1 ^= k1; 278 | 279 | h1 = ROTL64(h1,27); h1 += h2; h1 = h1*5+0x52dce729; 280 | 281 | k2 *= c2; k2 = ROTL64(k2,33); k2 *= c1; h2 ^= k2; 282 | 283 | h2 = ROTL64(h2,31); h2 += h1; h2 = h2*5+0x38495ab5; 284 | } 285 | 286 | //---------- 287 | // tail 288 | 289 | const uint8_t * tail = (const uint8_t*)(data + nblocks*16); 290 | 291 | uint64_t k1 = 0; 292 | uint64_t k2 = 0; 293 | 294 | switch(len & 15) 295 | { 296 | case 15: k2 ^= ((uint64_t)tail[14]) << 48; 297 | case 14: k2 ^= ((uint64_t)tail[13]) << 40; 298 | case 13: k2 ^= ((uint64_t)tail[12]) << 32; 299 | case 12: k2 ^= ((uint64_t)tail[11]) << 24; 300 | case 11: k2 ^= ((uint64_t)tail[10]) << 16; 301 | case 10: k2 ^= ((uint64_t)tail[ 9]) << 8; 302 | case 9: k2 ^= ((uint64_t)tail[ 8]) << 0; 303 | k2 *= c2; k2 = ROTL64(k2,33); k2 *= c1; h2 ^= k2; 304 | 305 | case 8: k1 ^= ((uint64_t)tail[ 7]) << 56; 306 | case 7: k1 ^= ((uint64_t)tail[ 6]) << 48; 307 | case 6: k1 ^= ((uint64_t)tail[ 5]) << 40; 308 | case 5: k1 ^= ((uint64_t)tail[ 4]) << 32; 309 | case 4: k1 ^= ((uint64_t)tail[ 3]) << 24; 310 | case 3: k1 ^= ((uint64_t)tail[ 2]) << 16; 311 | case 2: k1 ^= ((uint64_t)tail[ 1]) << 8; 312 | case 1: k1 ^= ((uint64_t)tail[ 0]) << 0; 313 | k1 *= c1; k1 = ROTL64(k1,31); k1 *= c2; h1 ^= k1; 314 | }; 315 | 316 | //---------- 317 | // finalization 318 | 319 | h1 ^= len; h2 ^= len; 320 | 321 | h1 += h2; 322 | h2 += h1; 323 | 324 | h1 = fmix64(h1); 325 | h2 = fmix64(h2); 326 | 327 | h1 += h2; 328 | h2 += h1; 329 | 330 | ((uint64_t*)out)[0] = h1; 331 | ((uint64_t*)out)[1] = h2; 332 | } 333 | 334 | //----------------------------------------------------------------------------- 335 | 336 | -------------------------------------------------------------------------------- /SMHasher/MurmurHash/MurmurHash3.h: -------------------------------------------------------------------------------- 1 | //----------------------------------------------------------------------------- 2 | // MurmurHash3 was written by Austin Appleby, and is placed in the public 3 | // domain. The author hereby disclaims copyright to this source code. 4 | 5 | #ifndef _MURMURHASH3_H_ 6 | #define _MURMURHASH3_H_ 7 | 8 | //----------------------------------------------------------------------------- 9 | // Platform-specific functions and macros 10 | 11 | // Microsoft Visual Studio 12 | 13 | #if defined(_MSC_VER) && (_MSC_VER < 1600) 14 | 15 | typedef unsigned char uint8_t; 16 | typedef unsigned int uint32_t; 17 | typedef unsigned __int64 uint64_t; 18 | 19 | // Other compilers 20 | 21 | #else // defined(_MSC_VER) 22 | 23 | #include 24 | 25 | #endif // !defined(_MSC_VER) 26 | 27 | //----------------------------------------------------------------------------- 28 | 29 | void MurmurHash3_x86_32 ( const void * key, int len, uint32_t seed, void * out ); 30 | 31 | void MurmurHash3_x86_128 ( const void * key, int len, uint32_t seed, void * out ); 32 | 33 | void MurmurHash3_x64_128 ( const void * key, int len, uint32_t seed, void * out ); 34 | 35 | //----------------------------------------------------------------------------- 36 | 37 | #endif // _MURMURHASH3_H_ 38 | -------------------------------------------------------------------------------- /SMHasher/Platform.cpp: -------------------------------------------------------------------------------- 1 | #include "Platform.h" 2 | 3 | #include 4 | 5 | void testRDTSC ( void ) 6 | { 7 | int64_t temp = rdtsc(); 8 | 9 | printf("%d",(int)temp); 10 | } 11 | 12 | #if defined(_WIN32) 13 | 14 | #include 15 | 16 | static DWORD_PTR process_mask = 1, system_mask = 1; 17 | int thread_priority = THREAD_PRIORITY_NORMAL; 18 | 19 | void SetAffinity ( int cpu ) 20 | { 21 | GetProcessAffinityMask(GetCurrentProcess(), &process_mask, &system_mask); // i don't know why, but it can't fetch the process mask as set by the "start" command 22 | thread_priority = GetThreadPriority (GetCurrentThread ()); 23 | 24 | SetThreadAffinityMask(GetCurrentThread(),cpu); 25 | SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_HIGHEST); 26 | } 27 | 28 | void ResetAffinity() 29 | { 30 | SetThreadAffinityMask(GetCurrentThread(),process_mask); 31 | SetThreadPriority(GetCurrentThread(), thread_priority); 32 | } 33 | 34 | #else 35 | 36 | #include 37 | 38 | void SetAffinity ( int /*cpu*/ ) 39 | { 40 | #if !defined(__CYGWIN__) && !defined(__APPLE__) 41 | cpu_set_t mask; 42 | 43 | CPU_ZERO(&mask); 44 | 45 | CPU_SET(2,&mask); 46 | 47 | if( sched_setaffinity(0,sizeof(mask),&mask) == -1) 48 | { 49 | printf("WARNING: Could not set CPU affinity\n"); 50 | } 51 | #endif 52 | } 53 | 54 | void ResetAffinity() 55 | { 56 | // #TODO 57 | } 58 | 59 | #endif 60 | -------------------------------------------------------------------------------- /SMHasher/Platform.h: -------------------------------------------------------------------------------- 1 | //----------------------------------------------------------------------------- 2 | // Platform-specific functions and macros 3 | 4 | #pragma once 5 | 6 | void SetAffinity ( int cpu ); 7 | void ResetAffinity(); 8 | 9 | //----------------------------------------------------------------------------- 10 | // Microsoft Visual Studio 11 | 12 | #if defined(_MSC_VER) 13 | 14 | #define FORCE_INLINE __forceinline 15 | #define NEVER_INLINE __declspec(noinline) 16 | 17 | #include 18 | #include // Has to be included before intrin.h or VC complains about 'ceil' 19 | #include // for __rdtsc 20 | #include "pstdint.h" 21 | 22 | #define ROTL32(x,y) _rotl(x,y) 23 | #define ROTL64(x,y) _rotl64(x,y) 24 | #define ROTR32(x,y) _rotr(x,y) 25 | #define ROTR64(x,y) _rotr64(x,y) 26 | 27 | #pragma warning(disable : 4127) // "conditional expression is constant" in the if()s for avalanchetest 28 | #pragma warning(disable : 4100) 29 | #pragma warning(disable : 4702) 30 | 31 | #define BIG_CONSTANT(x) (x) 32 | 33 | // RDTSC == Read Time Stamp Counter 34 | 35 | #define rdtsc() __rdtsc() 36 | 37 | //----------------------------------------------------------------------------- 38 | // Other compilers 39 | 40 | #else // defined(_MSC_VER) 41 | 42 | #include 43 | 44 | #define FORCE_INLINE inline __attribute__((always_inline)) 45 | #define NEVER_INLINE __attribute__((noinline)) 46 | 47 | inline uint32_t rotl32 ( uint32_t x, int8_t r ) 48 | { 49 | return (x << r) | (x >> (32 - r)); 50 | } 51 | 52 | inline uint64_t rotl64 ( uint64_t x, int8_t r ) 53 | { 54 | return (x << r) | (x >> (64 - r)); 55 | } 56 | 57 | inline uint32_t rotr32 ( uint32_t x, int8_t r ) 58 | { 59 | return (x >> r) | (x << (32 - r)); 60 | } 61 | 62 | inline uint64_t rotr64 ( uint64_t x, int8_t r ) 63 | { 64 | return (x >> r) | (x << (64 - r)); 65 | } 66 | 67 | #define ROTL32(x,y) rotl32(x,y) 68 | #define ROTL64(x,y) rotl64(x,y) 69 | #define ROTR32(x,y) rotr32(x,y) 70 | #define ROTR64(x,y) rotr64(x,y) 71 | 72 | #define BIG_CONSTANT(x) (x##LLU) 73 | 74 | __inline__ unsigned long long int rdtsc() 75 | { 76 | #ifdef __x86_64__ 77 | unsigned int a, d; 78 | __asm__ volatile ("rdtsc" : "=a" (a), "=d" (d)); 79 | return (unsigned long)a | ((unsigned long long)d << 32); 80 | #elif defined(__i386__) 81 | unsigned long long int x; 82 | __asm__ volatile ("rdtsc" : "=A" (x)); 83 | return x; 84 | #else 85 | #define NO_CYCLE_COUNTER 86 | return 0; 87 | #endif 88 | } 89 | 90 | #include 91 | #define _stricmp strcasecmp 92 | 93 | #endif // !defined(_MSC_VER) 94 | 95 | //----------------------------------------------------------------------------- 96 | -------------------------------------------------------------------------------- /SMHasher/Poly1305Test.cpp: -------------------------------------------------------------------------------- 1 | #include "poly1305/poly1305.c" 2 | 3 | const unsigned int poly1305_key[POLY1305_KEYLEN/4] = { 4 | // 16-byte AES key k 5 | 0xb8fe6c39,0x23a44bbe,0x7c01812c,0xf721ad1c, 6 | // r[3], r[7], r[11], r[15] are required to have their top four bits clear, and r[4], r[8], r[12] are required to have their bottom two bits clear 7 | 0xed46de8, 0x39097d8, 0x240a4a4, 0x7b3671c}; 8 | 9 | void poly1305_test ( const void * key, int len, unsigned seed, void * out ) 10 | { 11 | poly1305_auth ((unsigned char*)out, (const unsigned char*)key, len, (const unsigned char*)poly1305_key); 12 | } 13 | -------------------------------------------------------------------------------- /SMHasher/Random.cpp: -------------------------------------------------------------------------------- 1 | #include "Random.h" 2 | 3 | Rand g_rand1(1); 4 | Rand g_rand2(2); 5 | Rand g_rand3(3); 6 | Rand g_rand4(4); 7 | 8 | //----------------------------------------------------------------------------- 9 | -------------------------------------------------------------------------------- /SMHasher/Random.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "Types.h" 4 | 5 | //----------------------------------------------------------------------------- 6 | // Xorshift RNG based on code by George Marsaglia 7 | // http://en.wikipedia.org/wiki/Xorshift 8 | 9 | struct Rand 10 | { 11 | uint32_t x; 12 | uint32_t y; 13 | uint32_t z; 14 | uint32_t w; 15 | 16 | Rand() 17 | { 18 | reseed(uint32_t(0)); 19 | } 20 | 21 | Rand( uint32_t seed ) 22 | { 23 | reseed(seed); 24 | } 25 | 26 | void reseed ( uint32_t seed ) 27 | { 28 | x = 0x498b3bc5 ^ seed; 29 | y = 0; 30 | z = 0; 31 | w = 0; 32 | 33 | for(int i = 0; i < 10; i++) mix(); 34 | } 35 | 36 | void reseed ( uint64_t seed ) 37 | { 38 | x = 0x498b3bc5 ^ (uint32_t)(seed >> 0); 39 | y = 0x5a05089a ^ (uint32_t)(seed >> 32); 40 | z = 0; 41 | w = 0; 42 | 43 | for(int i = 0; i < 10; i++) mix(); 44 | } 45 | 46 | //----------------------------------------------------------------------------- 47 | 48 | void mix ( void ) 49 | { 50 | uint32_t t = x ^ (x << 11); 51 | x = y; y = z; z = w; 52 | w = w ^ (w >> 19) ^ t ^ (t >> 8); 53 | } 54 | 55 | uint32_t rand_u32 ( void ) 56 | { 57 | mix(); 58 | 59 | return x; 60 | } 61 | 62 | uint64_t rand_u64 ( void ) 63 | { 64 | mix(); 65 | 66 | uint64_t a = x; 67 | uint64_t b = y; 68 | 69 | return (a << 32) | b; 70 | } 71 | 72 | void rand_p ( void * blob, int bytes ) 73 | { 74 | uint32_t * blocks = reinterpret_cast(blob); 75 | 76 | while(bytes >= 4) 77 | { 78 | blocks[0] = rand_u32(); 79 | blocks++; 80 | bytes -= 4; 81 | } 82 | 83 | uint8_t * tail = reinterpret_cast(blocks); 84 | 85 | for(int i = 0; i < bytes; i++) 86 | { 87 | tail[i] = (uint8_t)rand_u32(); 88 | } 89 | } 90 | }; 91 | 92 | //----------------------------------------------------------------------------- 93 | 94 | extern Rand g_rand1; 95 | 96 | inline uint32_t rand_u32 ( void ) { return g_rand1.rand_u32(); } 97 | inline uint64_t rand_u64 ( void ) { return g_rand1.rand_u64(); } 98 | 99 | inline void rand_p ( void * blob, int bytes ) 100 | { 101 | uint32_t * blocks = (uint32_t*)blob; 102 | 103 | while(bytes >= 4) 104 | { 105 | *blocks++ = rand_u32(); 106 | bytes -= 4; 107 | } 108 | 109 | uint8_t * tail = (uint8_t*)blocks; 110 | 111 | for(int i = 0; i < bytes; i++) 112 | { 113 | tail[i] = (uint8_t)rand_u32(); 114 | } 115 | } 116 | 117 | //----------------------------------------------------------------------------- 118 | -------------------------------------------------------------------------------- /SMHasher/SHA1/sha1.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | SHA-1 in C 3 | By Steve Reid 4 | 100% Public Domain 5 | 6 | ----------------- 7 | Modified 7/98 8 | By James H. Brown 9 | Still 100% Public Domain 10 | 11 | Corrected a problem which generated improper hash values on 16 bit machines 12 | Routine SHA1Update changed from 13 | void SHA1Update(SHA1_CTX* context, unsigned char* data, unsigned int 14 | len) 15 | to 16 | void SHA1Update(SHA1_CTX* context, unsigned char* data, unsigned 17 | long len) 18 | 19 | The 'len' parameter was declared an int which works fine on 32 bit machines. 20 | However, on 16 bit machines an int is too small for the shifts being done 21 | against 22 | it. This caused the hash function to generate incorrect values if len was 23 | greater than 8191 (8K - 1) due to the 'len << 3' on line 3 of SHA1Update(). 24 | 25 | Since the file IO in main() reads 16K at a time, any file 8K or larger would 26 | be guaranteed to generate the wrong hash (e.g. Test Vector #3, a million 27 | "a"s). 28 | 29 | I also changed the declaration of variables i & j in SHA1Update to 30 | unsigned long from unsigned int for the same reason. 31 | 32 | These changes should make no difference to any 32 bit implementations since 33 | an 34 | int and a long are the same size in those environments. 35 | 36 | -- 37 | I also corrected a few compiler warnings generated by Borland C. 38 | 1. Added #include for exit() prototype 39 | 2. Removed unused variable 'j' in SHA1Final 40 | 3. Changed exit(0) to return(0) at end of main. 41 | 42 | ALL changes I made can be located by searching for comments containing 'JHB' 43 | ----------------- 44 | Modified 8/98 45 | By Steve Reid 46 | Still 100% public domain 47 | 48 | 1- Removed #include and used return() instead of exit() 49 | 2- Fixed overwriting of finalcount in SHA1Final() (discovered by Chris Hall) 50 | 3- Changed email address from steve@edmweb.com to sreid@sea-to-sky.net 51 | 52 | ----------------- 53 | Modified 4/01 54 | By Saul Kravitz 55 | Still 100% PD 56 | Modified to run on Compaq Alpha hardware. 57 | 58 | ----------------- 59 | Modified 07/2002 60 | By Ralph Giles 61 | Still 100% public domain 62 | modified for use with stdint types, autoconf 63 | code cleanup, removed attribution comments 64 | switched SHA1Final() argument order for consistency 65 | use SHA1_ prefix for public api 66 | move public api to sha1.h 67 | */ 68 | 69 | /* 70 | Test Vectors (from FIPS PUB 180-1) 71 | "abc" 72 | A9993E36 4706816A BA3E2571 7850C26C 9CD0D89D 73 | "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq" 74 | 84983E44 1C3BD26E BAAE4AA1 F95129E5 E54670F1 75 | A million repetitions of "a" 76 | 34AA973C D4C4DAA4 F61EEB2B DBAD2731 6534016F 77 | */ 78 | 79 | #include 80 | #include 81 | #include 82 | 83 | #include "sha1.h" 84 | 85 | #if defined(_MSC_VER) 86 | #pragma warning(disable : 4267) 87 | #pragma warning(disable : 4996) 88 | #pragma warning(disable : 4100) 89 | #endif 90 | 91 | void SHA1_Transform(uint32_t state[5], const uint8_t buffer[64]); 92 | 93 | #define rol ROTL32 94 | 95 | /* blk0() and blk() perform the initial expand. */ 96 | /* I got the idea of expanding during the round function from SSLeay */ 97 | /* FIXME: can we do this in an endian-proof way? */ 98 | 99 | #ifdef WORDS_BIGENDIAN 100 | #define blk0(i) block->l[i] 101 | #else 102 | #define blk0(i) (block->l[i] = (rol(block->l[i],24)&0xFF00FF00) | (rol(block->l[i],8)&0x00FF00FF)) 103 | #endif 104 | #define blk(i) (block->l[i&15] = rol(block->l[(i+13)&15]^block->l[(i+8)&15] ^ block->l[(i+2)&15]^block->l[i&15],1)) 105 | 106 | /* (R0+R1), R2, R3, R4 are the different operations used in SHA1 */ 107 | #define R0(v,w,x,y,z,i) z+=((w&(x^y))^y)+blk0(i)+0x5A827999+rol(v,5);w=rol(w,30); 108 | #define R1(v,w,x,y,z,i) z+=((w&(x^y))^y)+blk(i)+0x5A827999+rol(v,5);w=rol(w,30); 109 | #define R2(v,w,x,y,z,i) z+=(w^x^y)+blk(i)+0x6ED9EBA1+rol(v,5);w=rol(w,30); 110 | #define R3(v,w,x,y,z,i) z+=(((w|x)&y)|(w&x))+blk(i)+0x8F1BBCDC+rol(v,5);w=rol(w,30); 111 | #define R4(v,w,x,y,z,i) z+=(w^x^y)+blk(i)+0xCA62C1D6+rol(v,5);w=rol(w,30); 112 | 113 | 114 | /* Hash a single 512-bit block. This is the core of the algorithm. */ 115 | void SHA1_Transform(uint32_t state[5], const uint8_t buffer[64]) 116 | { 117 | uint32_t a, b, c, d, e; 118 | typedef union { 119 | uint8_t c[64]; 120 | uint32_t l[16]; 121 | } CHAR64LONG16; 122 | CHAR64LONG16* block; 123 | 124 | block = (CHAR64LONG16*)buffer; 125 | 126 | /* Copy context->state[] to working vars */ 127 | a = state[0]; 128 | b = state[1]; 129 | c = state[2]; 130 | d = state[3]; 131 | e = state[4]; 132 | 133 | /* 4 rounds of 20 operations each. Loop unrolled. */ 134 | R0(a,b,c,d,e, 0); R0(e,a,b,c,d, 1); R0(d,e,a,b,c, 2); R0(c,d,e,a,b, 3); 135 | R0(b,c,d,e,a, 4); R0(a,b,c,d,e, 5); R0(e,a,b,c,d, 6); R0(d,e,a,b,c, 7); 136 | R0(c,d,e,a,b, 8); R0(b,c,d,e,a, 9); R0(a,b,c,d,e,10); R0(e,a,b,c,d,11); 137 | R0(d,e,a,b,c,12); R0(c,d,e,a,b,13); R0(b,c,d,e,a,14); R0(a,b,c,d,e,15); 138 | R1(e,a,b,c,d,16); R1(d,e,a,b,c,17); R1(c,d,e,a,b,18); R1(b,c,d,e,a,19); 139 | R2(a,b,c,d,e,20); R2(e,a,b,c,d,21); R2(d,e,a,b,c,22); R2(c,d,e,a,b,23); 140 | R2(b,c,d,e,a,24); R2(a,b,c,d,e,25); R2(e,a,b,c,d,26); R2(d,e,a,b,c,27); 141 | R2(c,d,e,a,b,28); R2(b,c,d,e,a,29); R2(a,b,c,d,e,30); R2(e,a,b,c,d,31); 142 | R2(d,e,a,b,c,32); R2(c,d,e,a,b,33); R2(b,c,d,e,a,34); R2(a,b,c,d,e,35); 143 | R2(e,a,b,c,d,36); R2(d,e,a,b,c,37); R2(c,d,e,a,b,38); R2(b,c,d,e,a,39); 144 | R3(a,b,c,d,e,40); R3(e,a,b,c,d,41); R3(d,e,a,b,c,42); R3(c,d,e,a,b,43); 145 | R3(b,c,d,e,a,44); R3(a,b,c,d,e,45); R3(e,a,b,c,d,46); R3(d,e,a,b,c,47); 146 | R3(c,d,e,a,b,48); R3(b,c,d,e,a,49); R3(a,b,c,d,e,50); R3(e,a,b,c,d,51); 147 | R3(d,e,a,b,c,52); R3(c,d,e,a,b,53); R3(b,c,d,e,a,54); R3(a,b,c,d,e,55); 148 | R3(e,a,b,c,d,56); R3(d,e,a,b,c,57); R3(c,d,e,a,b,58); R3(b,c,d,e,a,59); 149 | R4(a,b,c,d,e,60); R4(e,a,b,c,d,61); R4(d,e,a,b,c,62); R4(c,d,e,a,b,63); 150 | R4(b,c,d,e,a,64); R4(a,b,c,d,e,65); R4(e,a,b,c,d,66); R4(d,e,a,b,c,67); 151 | R4(c,d,e,a,b,68); R4(b,c,d,e,a,69); R4(a,b,c,d,e,70); R4(e,a,b,c,d,71); 152 | R4(d,e,a,b,c,72); R4(c,d,e,a,b,73); R4(b,c,d,e,a,74); R4(a,b,c,d,e,75); 153 | R4(e,a,b,c,d,76); R4(d,e,a,b,c,77); R4(c,d,e,a,b,78); R4(b,c,d,e,a,79); 154 | 155 | /* Add the working vars back into context.state[] */ 156 | state[0] += a; 157 | state[1] += b; 158 | state[2] += c; 159 | state[3] += d; 160 | state[4] += e; 161 | 162 | /* Wipe variables */ 163 | a = b = c = d = e = 0; 164 | } 165 | 166 | 167 | /* SHA1Init - Initialize new context */ 168 | void SHA1_Init(SHA1_CTX* context) 169 | { 170 | /* SHA1 initialization constants */ 171 | context->state[0] = 0x67452301; 172 | context->state[1] = 0xEFCDAB89; 173 | context->state[2] = 0x98BADCFE; 174 | context->state[3] = 0x10325476; 175 | context->state[4] = 0xC3D2E1F0; 176 | context->count[0] = 0; 177 | context->count[1] = 0; 178 | } 179 | 180 | 181 | /* Run your data through this. */ 182 | void SHA1_Update(SHA1_CTX* context, const uint8_t* data, const size_t len) 183 | { 184 | size_t i, j; 185 | 186 | j = (context->count[0] >> 3) & 63; 187 | if ((context->count[0] += len << 3) < (len << 3)) context->count[1]++; 188 | 189 | context->count[1] += (len >> 29); 190 | 191 | if ((j + len) > 63) 192 | { 193 | memcpy(&context->buffer[j], data, (i = 64-j)); 194 | SHA1_Transform(context->state, context->buffer); 195 | 196 | for ( ; i + 63 < len; i += 64) 197 | { 198 | SHA1_Transform(context->state, data + i); 199 | } 200 | 201 | j = 0; 202 | } 203 | else i = 0; 204 | memcpy(&context->buffer[j], &data[i], len - i); 205 | } 206 | 207 | 208 | /* Add padding and return the message digest. */ 209 | void SHA1_Final(SHA1_CTX* context, uint8_t digest[SHA1_DIGEST_SIZE]) 210 | { 211 | uint32_t i; 212 | uint8_t finalcount[8]; 213 | 214 | for (i = 0; i < 8; i++) { 215 | finalcount[i] = (unsigned char)((context->count[(i >= 4 ? 0 : 1)] 216 | >> ((3-(i & 3)) * 8) ) & 255); /* Endian independent */ 217 | } 218 | SHA1_Update(context, (uint8_t *)"\200", 1); 219 | while ((context->count[0] & 504) != 448) { 220 | SHA1_Update(context, (uint8_t *)"\0", 1); 221 | } 222 | SHA1_Update(context, finalcount, 8); /* Should cause a SHA1_Transform() */ 223 | for (i = 0; i < SHA1_DIGEST_SIZE; i++) { 224 | digest[i] = (uint8_t) 225 | ((context->state[i>>2] >> ((3-(i & 3)) * 8) ) & 255); 226 | } 227 | 228 | /* Wipe variables */ 229 | i = 0; 230 | memset(context->buffer, 0, 64); 231 | memset(context->state, 0, 20); 232 | memset(context->count, 0, 8); 233 | memset(finalcount, 0, 8); /* SWR */ 234 | } 235 | 236 | //----------------------------------------------------------------------------- 237 | // self test 238 | 239 | //#define TEST 240 | 241 | #ifdef TEST 242 | 243 | static char *test_data[] = { 244 | "abc", 245 | "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq", 246 | "A million repetitions of 'a'"}; 247 | static char *test_results[] = { 248 | "A9993E36 4706816A BA3E2571 7850C26C 9CD0D89D", 249 | "84983E44 1C3BD26E BAAE4AA1 F95129E5 E54670F1", 250 | "34AA973C D4C4DAA4 F61EEB2B DBAD2731 6534016F"}; 251 | 252 | 253 | void digest_to_hex(const uint8_t digest[SHA1_DIGEST_SIZE], char *output) 254 | { 255 | int i,j; 256 | char *c = output; 257 | 258 | for (i = 0; i < SHA1_DIGEST_SIZE/4; i++) { 259 | for (j = 0; j < 4; j++) { 260 | sprintf(c,"%02X", digest[i*4+j]); 261 | c += 2; 262 | } 263 | sprintf(c, " "); 264 | c += 1; 265 | } 266 | *(c - 1) = '\0'; 267 | } 268 | 269 | int main(int argc, char** argv) 270 | { 271 | int k; 272 | SHA1_CTX context; 273 | uint8_t digest[20]; 274 | char output[80]; 275 | 276 | fprintf(stdout, "verifying SHA-1 implementation... "); 277 | 278 | for (k = 0; k < 2; k++){ 279 | SHA1_Init(&context); 280 | SHA1_Update(&context, (uint8_t*)test_data[k], strlen(test_data[k])); 281 | SHA1_Final(&context, digest); 282 | digest_to_hex(digest, output); 283 | 284 | if (strcmp(output, test_results[k])) { 285 | fprintf(stdout, "FAIL\n"); 286 | fprintf(stderr,"* hash of \"%s\" incorrect:\n", test_data[k]); 287 | fprintf(stderr,"\t%s returned\n", output); 288 | fprintf(stderr,"\t%s is correct\n", test_results[k]); 289 | return (1); 290 | } 291 | } 292 | /* million 'a' vector we feed separately */ 293 | SHA1_Init(&context); 294 | for (k = 0; k < 1000000; k++) 295 | SHA1_Update(&context, (uint8_t*)"a", 1); 296 | SHA1_Final(&context, digest); 297 | digest_to_hex(digest, output); 298 | if (strcmp(output, test_results[2])) { 299 | fprintf(stdout, "FAIL\n"); 300 | fprintf(stderr,"* hash of \"%s\" incorrect:\n", test_data[2]); 301 | fprintf(stderr,"\t%s returned\n", output); 302 | fprintf(stderr,"\t%s is correct\n", test_results[2]); 303 | return (1); 304 | } 305 | 306 | /* success */ 307 | fprintf(stdout, "ok\n"); 308 | return(0); 309 | } 310 | #endif /* TEST */ 311 | -------------------------------------------------------------------------------- /SMHasher/SHA1/sha1.h: -------------------------------------------------------------------------------- 1 | /* public api for steve reid's public domain SHA-1 implementation */ 2 | /* this file is in the public domain */ 3 | 4 | #pragma once 5 | 6 | #include "../Platform.h" 7 | 8 | struct SHA1_CTX 9 | { 10 | uint32_t state[5]; 11 | uint32_t count[2]; 12 | uint8_t buffer[64]; 13 | }; 14 | 15 | #define SHA1_DIGEST_SIZE 20 16 | 17 | void SHA1_Init(SHA1_CTX* context); 18 | void SHA1_Update(SHA1_CTX* context, const uint8_t* data, const size_t len); 19 | void SHA1_Final(SHA1_CTX* context, uint8_t digest[SHA1_DIGEST_SIZE]); 20 | -------------------------------------------------------------------------------- /SMHasher/SpeedTest.cpp: -------------------------------------------------------------------------------- 1 | #include "SpeedTest.h" 2 | 3 | #include "Random.h" 4 | 5 | #include // for printf 6 | #include // for memset 7 | #include // for sqrt 8 | #include // for sort 9 | 10 | //----------------------------------------------------------------------------- 11 | // We view our timing values as a series of random variables V that has been 12 | // contaminated with occasional outliers due to cache misses, thread 13 | // preemption, etcetera. To filter out the outliers, we search for the largest 14 | // subset of V such that all its values are within three standard deviations 15 | // of the mean. 16 | 17 | double CalcMean ( std::vector & v ) 18 | { 19 | double mean = 0; 20 | 21 | for(int i = 0; i < (int)v.size(); i++) 22 | { 23 | mean += v[i]; 24 | } 25 | 26 | mean /= double(v.size()); 27 | 28 | return mean; 29 | } 30 | 31 | double CalcMean ( std::vector & v, int a, int b ) 32 | { 33 | double mean = 0; 34 | 35 | for(int i = a; i <= b; i++) 36 | { 37 | mean += v[i]; 38 | } 39 | 40 | mean /= (b-a+1); 41 | 42 | return mean; 43 | } 44 | 45 | double CalcStdv ( std::vector & v, int a, int b ) 46 | { 47 | double mean = CalcMean(v,a,b); 48 | 49 | double stdv = 0; 50 | 51 | for(int i = a; i <= b; i++) 52 | { 53 | double x = v[i] - mean; 54 | 55 | stdv += x*x; 56 | } 57 | 58 | stdv = sqrt(stdv / (b-a+1)); 59 | 60 | return stdv; 61 | } 62 | 63 | // Return true if the largest value in v[0,len) is more than three 64 | // standard deviations from the mean 65 | 66 | bool ContainsOutlier ( std::vector & v, size_t len ) 67 | { 68 | double mean = 0; 69 | 70 | for(size_t i = 0; i < len; i++) 71 | { 72 | mean += v[i]; 73 | } 74 | 75 | mean /= double(len); 76 | 77 | double stdv = 0; 78 | 79 | for(size_t i = 0; i < len; i++) 80 | { 81 | double x = v[i] - mean; 82 | stdv += x*x; 83 | } 84 | 85 | stdv = sqrt(stdv / double(len)); 86 | 87 | double cutoff = mean + stdv*3; 88 | 89 | return v[len-1] > cutoff; 90 | } 91 | 92 | // Do a binary search to find the largest subset of v that does not contain 93 | // outliers. 94 | 95 | void FilterOutliers ( std::vector & v ) 96 | { 97 | std::sort(v.begin(),v.end()); 98 | 99 | size_t len = 0; 100 | 101 | for(size_t x = 0x40000000; x; x = x >> 1 ) 102 | { 103 | if((len | x) >= v.size()) continue; 104 | 105 | if(!ContainsOutlier(v,len | x)) 106 | { 107 | len |= x; 108 | } 109 | } 110 | 111 | v.resize(len); 112 | } 113 | 114 | // Iteratively tighten the set to find a subset that does not contain 115 | // outliers. I'm not positive this works correctly in all cases. 116 | 117 | void FilterOutliers2 ( std::vector & v ) 118 | { 119 | std::sort(v.begin(),v.end()); 120 | 121 | int a = 0; 122 | int b = (int)(v.size() - 1); 123 | 124 | for(int i = 0; i < 10; i++) 125 | { 126 | //printf("%d %d\n",a,b); 127 | 128 | double mean = CalcMean(v,a,b); 129 | double stdv = CalcStdv(v,a,b); 130 | 131 | double cutA = mean - stdv*3; 132 | double cutB = mean + stdv*3; 133 | 134 | while((a < b) && (v[a] < cutA)) a++; 135 | while((b > a) && (v[b] > cutB)) b--; 136 | } 137 | 138 | std::vector v2; 139 | 140 | v2.insert(v2.begin(),v.begin()+a,v.begin()+b+1); 141 | 142 | v.swap(v2); 143 | } 144 | 145 | //----------------------------------------------------------------------------- 146 | // We really want the rdtsc() calls to bracket the function call as tightly 147 | // as possible, but that's hard to do portably. We'll try and get as close as 148 | // possible by marking the function as NEVER_INLINE (to keep the optimizer from 149 | // moving it) and marking the timing variables as "volatile register". 150 | 151 | NEVER_INLINE int64_t timehash ( pfHash hash, int hashsize, const void * key, int len, int seed, const int repeats, bool measure_throughput ) 152 | { 153 | volatile register int64_t begin,end; 154 | 155 | uint32_t temp[16]; 156 | 157 | begin = rdtsc(); 158 | 159 | if (measure_throughput) { 160 | for(int i = 0; i < repeats; i++) 161 | { 162 | hash(key,len,seed,temp); 163 | } 164 | } else { // measure back-to-back latency 165 | switch (hashsize) 166 | { 167 | case 32: for(int i = 0; i < repeats; i++) 168 | { 169 | hash(key,len,seed,temp); 170 | seed = temp[0]; // ensure that new seed depends on ALL bits of hash result 171 | } 172 | break; 173 | 174 | case 64: for(int i = 0; i < repeats; i++) 175 | { 176 | hash(key,len,seed,temp); 177 | seed = (sizeof(size_t) == 4? temp[0] + temp[1] 178 | : (*(uint64_t*)temp >> 1)); 179 | } 180 | break; 181 | 182 | case 128: for(int i = 0; i < repeats; i++) 183 | { 184 | hash(key,len,seed,temp); 185 | seed = temp[0] + temp[1] + temp[2] + temp[3]; 186 | } 187 | break; 188 | 189 | case 256: for(int i = 0; i < repeats; i++) 190 | { 191 | hash(key,len,seed,temp); 192 | seed = temp[0]; 193 | for (int j=1; j < 256/32; j++) 194 | seed += temp[j]; 195 | } 196 | break; 197 | 198 | case 512: for(int i = 0; i < repeats; i++) 199 | { 200 | hash(key,len,seed,temp); 201 | seed = temp[0]; 202 | for (int j=1; j < 512/32; j++) 203 | seed += temp[j]; 204 | } 205 | break; 206 | 207 | } 208 | } 209 | 210 | end = rdtsc(); 211 | 212 | return end-begin; 213 | } 214 | 215 | //----------------------------------------------------------------------------- 216 | 217 | double SpeedTest ( pfHash hash, int hashsize, uint32_t seed, const int trials, const int repeats, const int blocksize, const int align, bool measure_throughput ) 218 | { 219 | Rand r(seed); 220 | 221 | uint8_t * buf = new uint8_t[blocksize + 512]; 222 | 223 | uint64_t t1 = reinterpret_cast(buf); 224 | 225 | t1 = (t1 + 255) & BIG_CONSTANT(0xFFFFFFFFFFFFFF00); 226 | t1 += align; 227 | 228 | uint8_t * block = reinterpret_cast(t1); 229 | 230 | r.rand_p(block,blocksize); 231 | 232 | //---------- 233 | 234 | std::vector times; 235 | times.reserve(trials); 236 | 237 | for(int itrial = 0; itrial < trials; itrial++) 238 | { 239 | r.rand_p(block,blocksize); 240 | 241 | double t = (double)timehash(hash,hashsize,block,blocksize,itrial,repeats,measure_throughput); 242 | 243 | if(t > 0) times.push_back(t); 244 | } 245 | 246 | //---------- 247 | 248 | std::sort(times.begin(),times.end()); 249 | 250 | FilterOutliers(times); 251 | 252 | delete [] buf; 253 | 254 | return CalcMean(times)/repeats; 255 | } 256 | 257 | //----------------------------------------------------------------------------- 258 | // 256k blocks seem to give the best results. 259 | 260 | void BulkSpeedTest ( pfHash hash, int hashsize, uint32_t seed ) 261 | { 262 | const int trials = 2999; 263 | const int repeats = 1; 264 | const int blocksize = 256 * 1024; 265 | const bool measure_throughput = true; 266 | 267 | printf("Bulk speed test - %d-byte keys\n",blocksize); 268 | 269 | for(int align = 0; align < 8; align++) 270 | { 271 | double cycles = SpeedTest(hash,hashsize,seed,trials,repeats,blocksize,align,measure_throughput); 272 | 273 | double bestbpc = double(blocksize)/cycles; 274 | 275 | double bestbps = (bestbpc * 3000000000.0 / 1048576.0); 276 | printf("Alignment %2d - %6.3f bytes/cycle - %7.2f MiB/sec @ 3 ghz\n",align,bestbpc,bestbps); 277 | } 278 | } 279 | 280 | //----------------------------------------------------------------------------- 281 | 282 | void TinySpeedTest ( pfHash hash, int hashsize, int max_keysize, uint32_t seed, bool verbose ) 283 | { 284 | const int trials = 1000; 285 | const int repeats = 1000; 286 | std::vector cycles_latency(max_keysize+1); 287 | std::vector cycles_throughput(max_keysize+1); 288 | 289 | printf("Small key speed test"); 290 | 291 | for (int i=0; i<10; i++) 292 | { 293 | if(verbose) printf("."); 294 | 295 | for(int keysize = 0; keysize <= max_keysize; keysize++) 296 | { 297 | double cycles; 298 | 299 | cycles = SpeedTest(hash,hashsize,seed,trials,repeats,keysize,0,false); 300 | if (i==0 || cycles < cycles_latency[keysize]) 301 | cycles_latency[keysize] = cycles; 302 | 303 | cycles = SpeedTest(hash,hashsize,seed,trials,repeats,keysize,0,true); 304 | if (i==0 || cycles < cycles_throughput[keysize]) 305 | cycles_throughput[keysize] = cycles; 306 | } 307 | } 308 | printf("\n"); 309 | 310 | for(int keysize = 0; keysize <= max_keysize; keysize++) 311 | { 312 | printf("%4d-byte keys - latency %8.2f cycles/hash, throughput %8.2f cycles/hash\n", 313 | keysize, cycles_latency[keysize], cycles_throughput[keysize]); 314 | } 315 | } 316 | 317 | //----------------------------------------------------------------------------- 318 | -------------------------------------------------------------------------------- /SMHasher/SpeedTest.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "Types.h" 4 | 5 | void BulkSpeedTest ( pfHash hash, int hashsize, uint32_t seed ); 6 | void TinySpeedTest ( pfHash hash, int hashsize, int max_keysize, uint32_t seed, bool verbose ); 7 | 8 | //----------------------------------------------------------------------------- 9 | -------------------------------------------------------------------------------- /SMHasher/SpookyHash/SpookyV2.cpp: -------------------------------------------------------------------------------- 1 | // Spooky Hash 2 | // A 128-bit noncryptographic hash, for checksums and table lookup 3 | // By Bob Jenkins. Public domain. 4 | // Oct 31 2010: published framework, disclaimer ShortHash isn't right 5 | // Nov 7 2010: disabled ShortHash 6 | // Oct 31 2011: replace End, ShortMix, ShortEnd, enable ShortHash again 7 | // April 10 2012: buffer overflow on platforms without unaligned reads 8 | // July 12 2012: was passing out variables in final to in/out in short 9 | // July 30 2012: I reintroduced the buffer overflow 10 | // August 5 2012: SpookyV2: d = should be d += in short hash, and remove extra mix from long hash 11 | 12 | #include 13 | #include "SpookyV2.h" 14 | 15 | #define ALLOW_UNALIGNED_READS 1 16 | 17 | // 18 | // short hash ... it could be used on any message, 19 | // but it's used by Spooky just for short messages. 20 | // 21 | void SpookyHash::Short( 22 | const void *message, 23 | size_t length, 24 | uint64 *hash1, 25 | uint64 *hash2) 26 | { 27 | uint64 buf[2*sc_numVars]; 28 | union 29 | { 30 | const uint8 *p8; 31 | uint32 *p32; 32 | uint64 *p64; 33 | size_t i; 34 | } u; 35 | 36 | u.p8 = (const uint8 *)message; 37 | 38 | if (!ALLOW_UNALIGNED_READS && (u.i & 0x7)) 39 | { 40 | memcpy(buf, message, length); 41 | u.p64 = buf; 42 | } 43 | 44 | size_t remainder = length%32; 45 | uint64 a=*hash1; 46 | uint64 b=*hash2; 47 | uint64 c=sc_const; 48 | uint64 d=sc_const; 49 | 50 | if (length > 15) 51 | { 52 | const uint64 *end = u.p64 + (length/32)*4; 53 | 54 | // handle all complete sets of 32 bytes 55 | for (; u.p64 < end; u.p64 += 4) 56 | { 57 | c += u.p64[0]; 58 | d += u.p64[1]; 59 | ShortMix(a,b,c,d); 60 | a += u.p64[2]; 61 | b += u.p64[3]; 62 | } 63 | 64 | //Handle the case of 16+ remaining bytes. 65 | if (remainder >= 16) 66 | { 67 | c += u.p64[0]; 68 | d += u.p64[1]; 69 | ShortMix(a,b,c,d); 70 | u.p64 += 2; 71 | remainder -= 16; 72 | } 73 | } 74 | 75 | // Handle the last 0..15 bytes, and its length 76 | d += ((uint64)length) << 56; 77 | switch (remainder) 78 | { 79 | case 15: 80 | d += ((uint64)u.p8[14]) << 48; 81 | case 14: 82 | d += ((uint64)u.p8[13]) << 40; 83 | case 13: 84 | d += ((uint64)u.p8[12]) << 32; 85 | case 12: 86 | d += u.p32[2]; 87 | c += u.p64[0]; 88 | break; 89 | case 11: 90 | d += ((uint64)u.p8[10]) << 16; 91 | case 10: 92 | d += ((uint64)u.p8[9]) << 8; 93 | case 9: 94 | d += (uint64)u.p8[8]; 95 | case 8: 96 | c += u.p64[0]; 97 | break; 98 | case 7: 99 | c += ((uint64)u.p8[6]) << 48; 100 | case 6: 101 | c += ((uint64)u.p8[5]) << 40; 102 | case 5: 103 | c += ((uint64)u.p8[4]) << 32; 104 | case 4: 105 | c += u.p32[0]; 106 | break; 107 | case 3: 108 | c += ((uint64)u.p8[2]) << 16; 109 | case 2: 110 | c += ((uint64)u.p8[1]) << 8; 111 | case 1: 112 | c += (uint64)u.p8[0]; 113 | break; 114 | case 0: 115 | c += sc_const; 116 | d += sc_const; 117 | } 118 | ShortEnd(a,b,c,d); 119 | *hash1 = a; 120 | *hash2 = b; 121 | } 122 | 123 | 124 | 125 | 126 | // do the whole hash in one call 127 | void SpookyHash::Hash128( 128 | const void *message, 129 | size_t length, 130 | uint64 *hash1, 131 | uint64 *hash2) 132 | { 133 | if (length < sc_bufSize) 134 | { 135 | Short(message, length, hash1, hash2); 136 | return; 137 | } 138 | 139 | uint64 h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11; 140 | uint64 buf[sc_numVars]; 141 | uint64 *end; 142 | union 143 | { 144 | const uint8 *p8; 145 | uint64 *p64; 146 | size_t i; 147 | } u; 148 | size_t remainder; 149 | 150 | h0=h3=h6=h9 = *hash1; 151 | h1=h4=h7=h10 = *hash2; 152 | h2=h5=h8=h11 = sc_const; 153 | 154 | u.p8 = (const uint8 *)message; 155 | end = u.p64 + (length/sc_blockSize)*sc_numVars; 156 | 157 | // handle all whole sc_blockSize blocks of bytes 158 | if (ALLOW_UNALIGNED_READS || ((u.i & 0x7) == 0)) 159 | { 160 | while (u.p64 < end) 161 | { 162 | Mix(u.p64, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); 163 | u.p64 += sc_numVars; 164 | } 165 | } 166 | else 167 | { 168 | while (u.p64 < end) 169 | { 170 | memcpy(buf, u.p64, sc_blockSize); 171 | Mix(buf, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); 172 | u.p64 += sc_numVars; 173 | } 174 | } 175 | 176 | // handle the last partial block of sc_blockSize bytes 177 | remainder = (length - ((const uint8 *)end-(const uint8 *)message)); 178 | memcpy(buf, end, remainder); 179 | memset(((uint8 *)buf)+remainder, 0, sc_blockSize-remainder); 180 | ((uint8 *)buf)[sc_blockSize-1] = remainder; 181 | 182 | // do some final mixing 183 | End(buf, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); 184 | *hash1 = h0; 185 | *hash2 = h1; 186 | } 187 | 188 | 189 | 190 | // init spooky state 191 | void SpookyHash::Init(uint64 seed1, uint64 seed2) 192 | { 193 | m_length = 0; 194 | m_remainder = 0; 195 | m_state[0] = seed1; 196 | m_state[1] = seed2; 197 | } 198 | 199 | 200 | // add a message fragment to the state 201 | void SpookyHash::Update(const void *message, size_t length) 202 | { 203 | uint64 h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11; 204 | size_t newLength = length + m_remainder; 205 | uint8 remainder; 206 | union 207 | { 208 | const uint8 *p8; 209 | uint64 *p64; 210 | size_t i; 211 | } u; 212 | const uint64 *end; 213 | 214 | // Is this message fragment too short? If it is, stuff it away. 215 | if (newLength < sc_bufSize) 216 | { 217 | memcpy(&((uint8 *)m_data)[m_remainder], message, length); 218 | m_length = length + m_length; 219 | m_remainder = (uint8)newLength; 220 | return; 221 | } 222 | 223 | // init the variables 224 | if (m_length < sc_bufSize) 225 | { 226 | h0=h3=h6=h9 = m_state[0]; 227 | h1=h4=h7=h10 = m_state[1]; 228 | h2=h5=h8=h11 = sc_const; 229 | } 230 | else 231 | { 232 | h0 = m_state[0]; 233 | h1 = m_state[1]; 234 | h2 = m_state[2]; 235 | h3 = m_state[3]; 236 | h4 = m_state[4]; 237 | h5 = m_state[5]; 238 | h6 = m_state[6]; 239 | h7 = m_state[7]; 240 | h8 = m_state[8]; 241 | h9 = m_state[9]; 242 | h10 = m_state[10]; 243 | h11 = m_state[11]; 244 | } 245 | m_length = length + m_length; 246 | 247 | // if we've got anything stuffed away, use it now 248 | if (m_remainder) 249 | { 250 | uint8 prefix = sc_bufSize-m_remainder; 251 | memcpy(&(((uint8 *)m_data)[m_remainder]), message, prefix); 252 | u.p64 = m_data; 253 | Mix(u.p64, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); 254 | Mix(&u.p64[sc_numVars], h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); 255 | u.p8 = ((const uint8 *)message) + prefix; 256 | length -= prefix; 257 | } 258 | else 259 | { 260 | u.p8 = (const uint8 *)message; 261 | } 262 | 263 | // handle all whole blocks of sc_blockSize bytes 264 | end = u.p64 + (length/sc_blockSize)*sc_numVars; 265 | remainder = (uint8)(length-((const uint8 *)end-u.p8)); 266 | if (ALLOW_UNALIGNED_READS || (u.i & 0x7) == 0) 267 | { 268 | while (u.p64 < end) 269 | { 270 | Mix(u.p64, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); 271 | u.p64 += sc_numVars; 272 | } 273 | } 274 | else 275 | { 276 | while (u.p64 < end) 277 | { 278 | memcpy(m_data, u.p8, sc_blockSize); 279 | Mix(m_data, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); 280 | u.p64 += sc_numVars; 281 | } 282 | } 283 | 284 | // stuff away the last few bytes 285 | m_remainder = remainder; 286 | memcpy(m_data, end, remainder); 287 | 288 | // stuff away the variables 289 | m_state[0] = h0; 290 | m_state[1] = h1; 291 | m_state[2] = h2; 292 | m_state[3] = h3; 293 | m_state[4] = h4; 294 | m_state[5] = h5; 295 | m_state[6] = h6; 296 | m_state[7] = h7; 297 | m_state[8] = h8; 298 | m_state[9] = h9; 299 | m_state[10] = h10; 300 | m_state[11] = h11; 301 | } 302 | 303 | 304 | // report the hash for the concatenation of all message fragments so far 305 | void SpookyHash::Final(uint64 *hash1, uint64 *hash2) 306 | { 307 | // init the variables 308 | if (m_length < sc_bufSize) 309 | { 310 | *hash1 = m_state[0]; 311 | *hash2 = m_state[1]; 312 | Short( m_data, m_length, hash1, hash2); 313 | return; 314 | } 315 | 316 | const uint64 *data = (const uint64 *)m_data; 317 | uint8 remainder = m_remainder; 318 | 319 | uint64 h0 = m_state[0]; 320 | uint64 h1 = m_state[1]; 321 | uint64 h2 = m_state[2]; 322 | uint64 h3 = m_state[3]; 323 | uint64 h4 = m_state[4]; 324 | uint64 h5 = m_state[5]; 325 | uint64 h6 = m_state[6]; 326 | uint64 h7 = m_state[7]; 327 | uint64 h8 = m_state[8]; 328 | uint64 h9 = m_state[9]; 329 | uint64 h10 = m_state[10]; 330 | uint64 h11 = m_state[11]; 331 | 332 | if (remainder >= sc_blockSize) 333 | { 334 | // m_data can contain two blocks; handle any whole first block 335 | Mix(data, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); 336 | data += sc_numVars; 337 | remainder -= sc_blockSize; 338 | } 339 | 340 | // mix in the last partial block, and the length mod sc_blockSize 341 | memset(&((uint8 *)data)[remainder], 0, (sc_blockSize-remainder)); 342 | 343 | ((uint8 *)data)[sc_blockSize-1] = remainder; 344 | 345 | // do some final mixing 346 | End(data, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); 347 | 348 | *hash1 = h0; 349 | *hash2 = h1; 350 | } 351 | 352 | -------------------------------------------------------------------------------- /SMHasher/SpookyHash/SpookyV2.h: -------------------------------------------------------------------------------- 1 | // 2 | // SpookyHash: a 128-bit noncryptographic hash function 3 | // By Bob Jenkins, public domain 4 | // Oct 31 2010: alpha, framework + SpookyHash::Mix appears right 5 | // Oct 31 2011: alpha again, Mix only good to 2^^69 but rest appears right 6 | // Dec 31 2011: beta, improved Mix, tested it for 2-bit deltas 7 | // Feb 2 2012: production, same bits as beta 8 | // Feb 5 2012: adjusted definitions of uint* to be more portable 9 | // Mar 30 2012: 3 bytes/cycle, not 4. Alpha was 4 but wasn't thorough enough. 10 | // August 5 2012: SpookyV2 (different results) 11 | // 12 | // Up to 3 bytes/cycle for long messages. Reasonably fast for short messages. 13 | // All 1 or 2 bit deltas achieve avalanche within 1% bias per output bit. 14 | // 15 | // This was developed for and tested on 64-bit x86-compatible processors. 16 | // It assumes the processor is little-endian. There is a macro 17 | // controlling whether unaligned reads are allowed (by default they are). 18 | // This should be an equally good hash on big-endian machines, but it will 19 | // compute different results on them than on little-endian machines. 20 | // 21 | // Google's CityHash has similar specs to SpookyHash, and CityHash is faster 22 | // on new Intel boxes. MD4 and MD5 also have similar specs, but they are orders 23 | // of magnitude slower. CRCs are two or more times slower, but unlike 24 | // SpookyHash, they have nice math for combining the CRCs of pieces to form 25 | // the CRCs of wholes. There are also cryptographic hashes, but those are even 26 | // slower than MD5. 27 | // 28 | 29 | #include 30 | 31 | #ifdef _MSC_VER 32 | # define INLINE __forceinline 33 | typedef unsigned __int64 uint64; 34 | typedef unsigned __int32 uint32; 35 | typedef unsigned __int16 uint16; 36 | typedef unsigned __int8 uint8; 37 | #else 38 | # include 39 | # define INLINE inline 40 | typedef uint64_t uint64; 41 | typedef uint32_t uint32; 42 | typedef uint16_t uint16; 43 | typedef uint8_t uint8; 44 | #endif 45 | 46 | 47 | class SpookyHash 48 | { 49 | public: 50 | // 51 | // SpookyHash: hash a single message in one call, produce 128-bit output 52 | // 53 | static void Hash128( 54 | const void *message, // message to hash 55 | size_t length, // length of message in bytes 56 | uint64 *hash1, // in/out: in seed 1, out hash value 1 57 | uint64 *hash2); // in/out: in seed 2, out hash value 2 58 | 59 | // 60 | // Hash64: hash a single message in one call, return 64-bit output 61 | // 62 | static uint64 Hash64( 63 | const void *message, // message to hash 64 | size_t length, // length of message in bytes 65 | uint64 seed) // seed 66 | { 67 | uint64 hash1 = seed; 68 | Hash128(message, length, &hash1, &seed); 69 | return hash1; 70 | } 71 | 72 | // 73 | // Hash32: hash a single message in one call, produce 32-bit output 74 | // 75 | static uint32 Hash32( 76 | const void *message, // message to hash 77 | size_t length, // length of message in bytes 78 | uint32 seed) // seed 79 | { 80 | uint64 hash1 = seed, hash2 = seed; 81 | Hash128(message, length, &hash1, &hash2); 82 | return (uint32)hash1; 83 | } 84 | 85 | // 86 | // Init: initialize the context of a SpookyHash 87 | // 88 | void Init( 89 | uint64 seed1, // any 64-bit value will do, including 0 90 | uint64 seed2); // different seeds produce independent hashes 91 | 92 | // 93 | // Update: add a piece of a message to a SpookyHash state 94 | // 95 | void Update( 96 | const void *message, // message fragment 97 | size_t length); // length of message fragment in bytes 98 | 99 | 100 | // 101 | // Final: compute the hash for the current SpookyHash state 102 | // 103 | // This does not modify the state; you can keep updating it afterward 104 | // 105 | // The result is the same as if SpookyHash() had been called with 106 | // all the pieces concatenated into one message. 107 | // 108 | void Final( 109 | uint64 *hash1, // out only: first 64 bits of hash value. 110 | uint64 *hash2); // out only: second 64 bits of hash value. 111 | 112 | // 113 | // left rotate a 64-bit value by k bytes 114 | // 115 | static INLINE uint64 Rot64(uint64 x, int k) 116 | { 117 | return (x << k) | (x >> (64 - k)); 118 | } 119 | 120 | // 121 | // This is used if the input is 96 bytes long or longer. 122 | // 123 | // The internal state is fully overwritten every 96 bytes. 124 | // Every input bit appears to cause at least 128 bits of entropy 125 | // before 96 other bytes are combined, when run forward or backward 126 | // For every input bit, 127 | // Two inputs differing in just that input bit 128 | // Where "differ" means xor or subtraction 129 | // And the base value is random 130 | // When run forward or backwards one Mix 131 | // I tried 3 pairs of each; they all differed by at least 212 bits. 132 | // 133 | static INLINE void Mix( 134 | const uint64 *data, 135 | uint64 &s0, uint64 &s1, uint64 &s2, uint64 &s3, 136 | uint64 &s4, uint64 &s5, uint64 &s6, uint64 &s7, 137 | uint64 &s8, uint64 &s9, uint64 &s10,uint64 &s11) 138 | { 139 | s0 += data[0]; s2 ^= s10; s11 ^= s0; s0 = Rot64(s0,11); s11 += s1; 140 | s1 += data[1]; s3 ^= s11; s0 ^= s1; s1 = Rot64(s1,32); s0 += s2; 141 | s2 += data[2]; s4 ^= s0; s1 ^= s2; s2 = Rot64(s2,43); s1 += s3; 142 | s3 += data[3]; s5 ^= s1; s2 ^= s3; s3 = Rot64(s3,31); s2 += s4; 143 | s4 += data[4]; s6 ^= s2; s3 ^= s4; s4 = Rot64(s4,17); s3 += s5; 144 | s5 += data[5]; s7 ^= s3; s4 ^= s5; s5 = Rot64(s5,28); s4 += s6; 145 | s6 += data[6]; s8 ^= s4; s5 ^= s6; s6 = Rot64(s6,39); s5 += s7; 146 | s7 += data[7]; s9 ^= s5; s6 ^= s7; s7 = Rot64(s7,57); s6 += s8; 147 | s8 += data[8]; s10 ^= s6; s7 ^= s8; s8 = Rot64(s8,55); s7 += s9; 148 | s9 += data[9]; s11 ^= s7; s8 ^= s9; s9 = Rot64(s9,54); s8 += s10; 149 | s10 += data[10]; s0 ^= s8; s9 ^= s10; s10 = Rot64(s10,22); s9 += s11; 150 | s11 += data[11]; s1 ^= s9; s10 ^= s11; s11 = Rot64(s11,46); s10 += s0; 151 | } 152 | 153 | // 154 | // Mix all 12 inputs together so that h0, h1 are a hash of them all. 155 | // 156 | // For two inputs differing in just the input bits 157 | // Where "differ" means xor or subtraction 158 | // And the base value is random, or a counting value starting at that bit 159 | // The final result will have each bit of h0, h1 flip 160 | // For every input bit, 161 | // with probability 50 +- .3% 162 | // For every pair of input bits, 163 | // with probability 50 +- 3% 164 | // 165 | // This does not rely on the last Mix() call having already mixed some. 166 | // Two iterations was almost good enough for a 64-bit result, but a 167 | // 128-bit result is reported, so End() does three iterations. 168 | // 169 | static INLINE void EndPartial( 170 | uint64 &h0, uint64 &h1, uint64 &h2, uint64 &h3, 171 | uint64 &h4, uint64 &h5, uint64 &h6, uint64 &h7, 172 | uint64 &h8, uint64 &h9, uint64 &h10,uint64 &h11) 173 | { 174 | h11+= h1; h2 ^= h11; h1 = Rot64(h1,44); 175 | h0 += h2; h3 ^= h0; h2 = Rot64(h2,15); 176 | h1 += h3; h4 ^= h1; h3 = Rot64(h3,34); 177 | h2 += h4; h5 ^= h2; h4 = Rot64(h4,21); 178 | h3 += h5; h6 ^= h3; h5 = Rot64(h5,38); 179 | h4 += h6; h7 ^= h4; h6 = Rot64(h6,33); 180 | h5 += h7; h8 ^= h5; h7 = Rot64(h7,10); 181 | h6 += h8; h9 ^= h6; h8 = Rot64(h8,13); 182 | h7 += h9; h10^= h7; h9 = Rot64(h9,38); 183 | h8 += h10; h11^= h8; h10= Rot64(h10,53); 184 | h9 += h11; h0 ^= h9; h11= Rot64(h11,42); 185 | h10+= h0; h1 ^= h10; h0 = Rot64(h0,54); 186 | } 187 | 188 | static INLINE void End( 189 | const uint64 *data, 190 | uint64 &h0, uint64 &h1, uint64 &h2, uint64 &h3, 191 | uint64 &h4, uint64 &h5, uint64 &h6, uint64 &h7, 192 | uint64 &h8, uint64 &h9, uint64 &h10,uint64 &h11) 193 | { 194 | h0 += data[0]; h1 += data[1]; h2 += data[2]; h3 += data[3]; 195 | h4 += data[4]; h5 += data[5]; h6 += data[6]; h7 += data[7]; 196 | h8 += data[8]; h9 += data[9]; h10 += data[10]; h11 += data[11]; 197 | EndPartial(h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); 198 | EndPartial(h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); 199 | EndPartial(h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); 200 | } 201 | 202 | // 203 | // The goal is for each bit of the input to expand into 128 bits of 204 | // apparent entropy before it is fully overwritten. 205 | // n trials both set and cleared at least m bits of h0 h1 h2 h3 206 | // n: 2 m: 29 207 | // n: 3 m: 46 208 | // n: 4 m: 57 209 | // n: 5 m: 107 210 | // n: 6 m: 146 211 | // n: 7 m: 152 212 | // when run forwards or backwards 213 | // for all 1-bit and 2-bit diffs 214 | // with diffs defined by either xor or subtraction 215 | // with a base of all zeros plus a counter, or plus another bit, or random 216 | // 217 | static INLINE void ShortMix(uint64 &h0, uint64 &h1, uint64 &h2, uint64 &h3) 218 | { 219 | h2 = Rot64(h2,50); h2 += h3; h0 ^= h2; 220 | h3 = Rot64(h3,52); h3 += h0; h1 ^= h3; 221 | h0 = Rot64(h0,30); h0 += h1; h2 ^= h0; 222 | h1 = Rot64(h1,41); h1 += h2; h3 ^= h1; 223 | h2 = Rot64(h2,54); h2 += h3; h0 ^= h2; 224 | h3 = Rot64(h3,48); h3 += h0; h1 ^= h3; 225 | h0 = Rot64(h0,38); h0 += h1; h2 ^= h0; 226 | h1 = Rot64(h1,37); h1 += h2; h3 ^= h1; 227 | h2 = Rot64(h2,62); h2 += h3; h0 ^= h2; 228 | h3 = Rot64(h3,34); h3 += h0; h1 ^= h3; 229 | h0 = Rot64(h0,5); h0 += h1; h2 ^= h0; 230 | h1 = Rot64(h1,36); h1 += h2; h3 ^= h1; 231 | } 232 | 233 | // 234 | // Mix all 4 inputs together so that h0, h1 are a hash of them all. 235 | // 236 | // For two inputs differing in just the input bits 237 | // Where "differ" means xor or subtraction 238 | // And the base value is random, or a counting value starting at that bit 239 | // The final result will have each bit of h0, h1 flip 240 | // For every input bit, 241 | // with probability 50 +- .3% (it is probably better than that) 242 | // For every pair of input bits, 243 | // with probability 50 +- .75% (the worst case is approximately that) 244 | // 245 | static INLINE void ShortEnd(uint64 &h0, uint64 &h1, uint64 &h2, uint64 &h3) 246 | { 247 | h3 ^= h2; h2 = Rot64(h2,15); h3 += h2; 248 | h0 ^= h3; h3 = Rot64(h3,52); h0 += h3; 249 | h1 ^= h0; h0 = Rot64(h0,26); h1 += h0; 250 | h2 ^= h1; h1 = Rot64(h1,51); h2 += h1; 251 | h3 ^= h2; h2 = Rot64(h2,28); h3 += h2; 252 | h0 ^= h3; h3 = Rot64(h3,9); h0 += h3; 253 | h1 ^= h0; h0 = Rot64(h0,47); h1 += h0; 254 | h2 ^= h1; h1 = Rot64(h1,54); h2 += h1; 255 | h3 ^= h2; h2 = Rot64(h2,32); h3 += h2; 256 | h0 ^= h3; h3 = Rot64(h3,25); h0 += h3; 257 | h1 ^= h0; h0 = Rot64(h0,63); h1 += h0; 258 | } 259 | 260 | private: 261 | 262 | // 263 | // Short is used for messages under 192 bytes in length 264 | // Short has a low startup cost, the normal mode is good for long 265 | // keys, the cost crossover is at about 192 bytes. The two modes were 266 | // held to the same quality bar. 267 | // 268 | static void Short( 269 | const void *message, // message (array of bytes, not necessarily aligned) 270 | size_t length, // length of message (in bytes) 271 | uint64 *hash1, // in/out: in the seed, out the hash value 272 | uint64 *hash2); // in/out: in the seed, out the hash value 273 | 274 | // number of uint64's in internal state 275 | static const size_t sc_numVars = 12; 276 | 277 | // size of the internal state 278 | static const size_t sc_blockSize = sc_numVars*8; 279 | 280 | // size of buffer of unhashed data, in bytes 281 | static const size_t sc_bufSize = 2*sc_blockSize; 282 | 283 | // 284 | // sc_const: a constant which: 285 | // * is not zero 286 | // * is odd 287 | // * is a not-very-regular mix of 1's and 0's 288 | // * does not need any other special mathematical properties 289 | // 290 | static const uint64 sc_const = 0xdeadbeefdeadbeefLL; 291 | 292 | uint64 m_data[2*sc_numVars]; // unhashed data, for partial messages 293 | uint64 m_state[sc_numVars]; // internal state of the hash 294 | size_t m_length; // total length of the input so far 295 | uint8 m_remainder; // length of unhashed data stashed in m_data 296 | }; 297 | 298 | 299 | 300 | -------------------------------------------------------------------------------- /SMHasher/SpookyHashTest.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "SpookyHash/SpookyV2.cpp" 3 | 4 | void SpookyHash32_test ( const void * key, int len, unsigned seed, void * out ) 5 | { 6 | *(uint32_t*)out = SpookyHash::Hash32(key,len,seed); 7 | } 8 | 9 | void SpookyHash32a_test ( const void * key, int len, unsigned seed, void * out ) 10 | { 11 | uint64_t seed_out[2]; 12 | seed_out[0] = seed; 13 | seed_out[1] = 0; 14 | SpookyHash::Hash128(key,len,seed_out,seed_out+1); 15 | *(uint32_t*)out = seed_out[0] >> 32; 16 | } 17 | 18 | void SpookyHash32b_test ( const void * key, int len, unsigned seed, void * out ) 19 | { 20 | uint64_t seed_out[2]; 21 | seed_out[0] = seed; 22 | seed_out[1] = 0; 23 | SpookyHash::Hash128(key,len,seed_out,seed_out+1); 24 | *(uint32_t*)out = seed_out[1]; 25 | } 26 | 27 | void SpookyHash32c_test ( const void * key, int len, unsigned seed, void * out ) 28 | { 29 | uint64_t seed_out[2]; 30 | seed_out[0] = seed; 31 | seed_out[1] = 0; 32 | SpookyHash::Hash128(key,len,seed_out,seed_out+1); 33 | *(uint32_t*)out = seed_out[1] >> 32; 34 | } 35 | 36 | void SpookyHash64_test ( const void * key, int len, unsigned seed, void * out ) 37 | { 38 | *(uint64_t*)out = SpookyHash::Hash64(key,len,seed); 39 | } 40 | 41 | void SpookyHash128_test ( const void * key, int len, unsigned seed, void * out ) 42 | { 43 | uint64_t *seed_out = (uint64_t*) out; 44 | seed_out[0] = seed; 45 | seed_out[1] = 0; 46 | SpookyHash::Hash128(key,len,seed_out,seed_out+1); 47 | } 48 | -------------------------------------------------------------------------------- /SMHasher/Stats.cpp: -------------------------------------------------------------------------------- 1 | #include "Stats.h" 2 | 3 | //----------------------------------------------------------------------------- 4 | 5 | double chooseK ( int n, int k ) 6 | { 7 | if(k > (n - k)) k = n - k; 8 | 9 | double c = 1; 10 | 11 | for(int i = 0; i < k; i++) 12 | { 13 | double t = double(n-i) / double(i+1); 14 | 15 | c *= t; 16 | } 17 | 18 | return c; 19 | } 20 | 21 | double chooseUpToK ( int n, int k ) 22 | { 23 | double c = 0; 24 | 25 | for(int i = 1; i <= k; i++) 26 | { 27 | c += chooseK(n,i); 28 | } 29 | 30 | return c; 31 | } 32 | 33 | //----------------------------------------------------------------------------- 34 | // Distribution "score" 35 | // TODO - big writeup of what this score means 36 | 37 | // Basically, we're computing a constant that says "The test distribution is as 38 | // uniform, RMS-wise, as a random distribution restricted to (1-X)*100 percent of 39 | // the bins. This makes for a nice uniform way to rate a distribution that isn't 40 | // dependent on the number of bins or the number of keys 41 | 42 | // (as long as # keys > # bins * 3 or so, otherwise random fluctuations show up 43 | // as distribution weaknesses) 44 | 45 | double calcScore ( const int * bins, const int bincount, const int keycount ) 46 | { 47 | double n = bincount; 48 | double k = keycount; 49 | 50 | // compute rms value 51 | 52 | double r = 0; 53 | 54 | for(int i = 0; i < bincount; i++) 55 | { 56 | double b = bins[i]; 57 | 58 | r += b*b; 59 | } 60 | 61 | r = sqrt(r / n); 62 | 63 | // compute fill factor 64 | 65 | double f = (k*k - 1) / (n*r*r - k); 66 | 67 | // rescale to (0,1) with 0 = good, 1 = bad 68 | 69 | return 1 - (f / n); 70 | } 71 | 72 | 73 | //---------------------------------------------------------------------------- 74 | 75 | void plot ( double n ) 76 | { 77 | double n2 = n * 1; 78 | 79 | if(n2 < 0) n2 = 0; 80 | 81 | n2 *= 100; 82 | 83 | if(n2 > 64) n2 = 64; 84 | 85 | int n3 = (int)n2; 86 | 87 | if(n3 == 0) 88 | printf("."); 89 | else 90 | { 91 | char x = '0' + char(n3); 92 | 93 | if(x > '9') x = 'X'; 94 | 95 | printf("%c",x); 96 | } 97 | } 98 | 99 | //----------------------------------------------------------------------------- 100 | -------------------------------------------------------------------------------- /SMHasher/Stats.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "Types.h" 4 | 5 | #include 6 | #include 7 | #include 8 | #include // for std::sort 9 | #include // for memset 10 | #include // for printf 11 | 12 | double calcScore ( const int * bins, const int bincount, const int ballcount ); 13 | 14 | void plot ( double n ); 15 | 16 | inline double ExpectedCollisions ( double balls, double bins ) 17 | { 18 | return balls - bins + bins * pow(1 - 1/bins,balls); 19 | } 20 | 21 | double chooseK ( int b, int k ); 22 | double chooseUpToK ( int n, int k ); 23 | 24 | //----------------------------------------------------------------------------- 25 | 26 | inline uint32_t f3mix ( uint32_t k ) 27 | { 28 | k ^= k >> 16; 29 | k *= 0x85ebca6b; 30 | k ^= k >> 13; 31 | k *= 0xc2b2ae35; 32 | k ^= k >> 16; 33 | 34 | return k; 35 | } 36 | 37 | //----------------------------------------------------------------------------- 38 | // Sort the hash list, count the total number of collisions and return 39 | // the first N collisions for further processing 40 | 41 | template< typename hashtype > 42 | int FindCollisions ( std::vector & hashes, 43 | HashSet & collisions, 44 | int maxCollisions ) 45 | { 46 | int collcount = 0; 47 | 48 | std::sort(hashes.begin(),hashes.end()); 49 | 50 | for(size_t i = 1; i < hashes.size(); i++) 51 | { 52 | if(hashes[i] == hashes[i-1]) 53 | { 54 | collcount++; 55 | 56 | if((int)collisions.size() < maxCollisions) 57 | { 58 | collisions.insert(hashes[i]); 59 | } 60 | } 61 | } 62 | 63 | return collcount; 64 | } 65 | 66 | //----------------------------------------------------------------------------- 67 | 68 | template < class keytype, typename hashtype > 69 | int PrintCollisions ( hashfunc hash, std::vector & keys ) 70 | { 71 | int collcount = 0; 72 | 73 | typedef std::map htab; 74 | htab tab; 75 | 76 | for(size_t i = 1; i < keys.size(); i++) 77 | { 78 | keytype & k1 = keys[i]; 79 | 80 | hashtype h = hash(&k1,sizeof(keytype),0); 81 | 82 | typename htab::iterator it = tab.find(h); 83 | 84 | if(it != tab.end()) 85 | { 86 | keytype & k2 = (*it).second; 87 | 88 | printf("A: "); 89 | printbits(&k1,sizeof(keytype)); 90 | printf("B: "); 91 | printbits(&k2,sizeof(keytype)); 92 | } 93 | else 94 | { 95 | tab.insert( std::make_pair(h,k1) ); 96 | } 97 | } 98 | 99 | return collcount; 100 | } 101 | 102 | //---------------------------------------------------------------------------- 103 | // Measure the distribution "score" for each possible N-bit span up to 20 bits 104 | 105 | template< typename hashtype > 106 | double TestDistribution ( std::vector & hashes, bool drawDiagram ) 107 | { 108 | printf("Testing distribution - "); 109 | 110 | if(drawDiagram) printf("\n"); 111 | 112 | const int hashbits = sizeof(hashtype) * 8; 113 | 114 | int maxwidth = 20; 115 | 116 | // We need at least 5 keys per bin to reliably test distribution biases 117 | // down to 1%, so don't bother to test sparser distributions than that 118 | 119 | while(double(hashes.size()) / double(1 << maxwidth) < 5.0) 120 | { 121 | maxwidth--; 122 | } 123 | 124 | std::vector bins; 125 | bins.resize(1 << maxwidth); 126 | 127 | double worst = 0; 128 | int worstStart = -1; 129 | int worstWidth = -1; 130 | 131 | for(int start = 0; start < hashbits; start++) 132 | { 133 | int width = maxwidth; 134 | int bincount = (1 << width); 135 | 136 | memset(&bins[0],0,sizeof(int)*bincount); 137 | 138 | for(size_t j = 0; j < hashes.size(); j++) 139 | { 140 | hashtype & hash = hashes[j]; 141 | 142 | uint32_t index = window(&hash,sizeof(hash),start,width); 143 | 144 | bins[index]++; 145 | } 146 | 147 | // Test the distribution, then fold the bins in half, 148 | // repeat until we're down to 256 bins 149 | 150 | if(drawDiagram) printf("["); 151 | 152 | while(bincount >= 256) 153 | { 154 | double n = calcScore(&bins[0],bincount,(int)hashes.size()); 155 | 156 | if(drawDiagram) plot(n); 157 | 158 | if(n > worst) 159 | { 160 | worst = n; 161 | worstStart = start; 162 | worstWidth = width; 163 | } 164 | 165 | width--; 166 | bincount /= 2; 167 | 168 | if(width < 8) break; 169 | 170 | for(int i = 0; i < bincount; i++) 171 | { 172 | bins[i] += bins[i+bincount]; 173 | } 174 | } 175 | 176 | if(drawDiagram) printf("]\n"); 177 | } 178 | 179 | double pct = worst * 100.0; 180 | 181 | printf("Worst bias is the %3d-bit window at bit %3d - %5.3f%%",worstWidth,worstStart,pct); 182 | if(pct >= 1.0) printf(" !!!!! "); 183 | printf("\n"); 184 | 185 | return worst; 186 | } 187 | 188 | //---------------------------------------------------------------------------- 189 | 190 | template < typename hashtype > 191 | bool TestHashList ( std::vector & hashes, std::vector & collisions, bool testDist, bool drawDiagram ) 192 | { 193 | bool result = true; 194 | 195 | { 196 | size_t count = hashes.size(); 197 | 198 | double expected = (double(count) * double(count-1)) / pow(2.0,double(sizeof(hashtype) * 8 + 1)); 199 | 200 | printf("Testing collisions - Expected %8.2f, ",expected); 201 | 202 | double collcount = 0; 203 | 204 | HashSet collisions; 205 | 206 | collcount = FindCollisions(hashes,collisions,1000); 207 | 208 | printf("actual %8.2f (%5.2fx)",collcount, collcount / expected); 209 | 210 | if(sizeof(hashtype) == sizeof(uint32_t)) 211 | { 212 | // 2x expected collisions = fail 213 | 214 | // #TODO - collision failure cutoff needs to be expressed as a standard deviation instead 215 | // of a scale factor, otherwise we fail erroneously if there are a small expected number 216 | // of collisions 217 | 218 | if(double(collcount) / double(expected) > 2.0) 219 | { 220 | printf(" !!!!! "); 221 | result = false; 222 | } 223 | } 224 | else 225 | { 226 | // For all hashes larger than 32 bits, _any_ collisions are a failure. 227 | 228 | if(collcount > 0) 229 | { 230 | printf(" !!!!! "); 231 | result = false; 232 | } 233 | } 234 | 235 | printf("\n"); 236 | } 237 | 238 | //---------- 239 | 240 | if(testDist) 241 | { 242 | TestDistribution(hashes,drawDiagram); 243 | } 244 | 245 | return result; 246 | } 247 | 248 | //---------- 249 | 250 | template < typename hashtype > 251 | bool TestHashList ( std::vector & hashes, bool /*testColl*/, bool testDist, bool drawDiagram ) 252 | { 253 | std::vector collisions; 254 | 255 | return TestHashList(hashes,collisions,testDist,drawDiagram); 256 | } 257 | 258 | //----------------------------------------------------------------------------- 259 | 260 | template < class keytype, typename hashtype > 261 | bool TestKeyList ( hashfunc hash, std::vector & keys, bool testColl, bool testDist, bool drawDiagram ) 262 | { 263 | int keycount = (int)keys.size(); 264 | 265 | std::vector hashes; 266 | 267 | hashes.resize(keycount); 268 | 269 | printf("Hashing"); 270 | 271 | for(int i = 0; i < keycount; i++) 272 | { 273 | if(i % (keycount / 10) == 0) printf("."); 274 | 275 | keytype & k = keys[i]; 276 | 277 | hash(&k,sizeof(k),0,&hashes[i]); 278 | } 279 | 280 | printf("\n"); 281 | 282 | bool result = TestHashList(hashes,testColl,testDist,drawDiagram); 283 | 284 | printf("\n"); 285 | 286 | return result; 287 | } 288 | 289 | //----------------------------------------------------------------------------- 290 | // Bytepair test - generate 16-bit indices from all possible non-overlapping 291 | // 8-bit sections of the hash value, check distribution on all of them. 292 | 293 | // This is a very good test for catching weak intercorrelations between bits - 294 | // much harder to pass than the normal distribution test. However, it doesn't 295 | // really model the normal usage of hash functions in hash table lookup, so 296 | // I'm not sure it's that useful (and hash functions that fail this test but 297 | // pass the normal distribution test still work well in practice) 298 | 299 | template < typename hashtype > 300 | double TestDistributionBytepairs ( std::vector & hashes, bool drawDiagram ) 301 | { 302 | const int nbytes = sizeof(hashtype); 303 | const int hashbits = nbytes * 8; 304 | 305 | const int nbins = 65536; 306 | 307 | std::vector bins(nbins,0); 308 | 309 | double worst = 0; 310 | 311 | for(int a = 0; a < hashbits; a++) 312 | { 313 | if(drawDiagram) if((a % 8 == 0) && (a > 0)) printf("\n"); 314 | 315 | if(drawDiagram) printf("["); 316 | 317 | for(int b = 0; b < hashbits; b++) 318 | { 319 | if(drawDiagram) if((b % 8 == 0) && (b > 0)) printf(" "); 320 | 321 | bins.clear(); 322 | bins.resize(nbins,0); 323 | 324 | for(size_t i = 0; i < hashes.size(); i++) 325 | { 326 | hashtype & hash = hashes[i]; 327 | 328 | uint32_t pa = window(&hash,sizeof(hash),a,8); 329 | uint32_t pb = window(&hash,sizeof(hash),b,8); 330 | 331 | bins[pa | (pb << 8)]++; 332 | } 333 | 334 | double s = calcScore(bins,bins.size(),hashes.size()); 335 | 336 | if(drawDiagram) plot(s); 337 | 338 | if(s > worst) 339 | { 340 | worst = s; 341 | } 342 | } 343 | 344 | if(drawDiagram) printf("]\n"); 345 | } 346 | 347 | return worst; 348 | } 349 | 350 | //----------------------------------------------------------------------------- 351 | // Simplified test - only check 64k distributions, and only on byte boundaries 352 | 353 | template < typename hashtype > 354 | void TestDistributionFast ( std::vector & hashes, double & dworst, double & davg ) 355 | { 356 | const int hashbits = sizeof(hashtype) * 8; 357 | const int nbins = 65536; 358 | 359 | std::vector bins(nbins,0); 360 | 361 | dworst = -1.0e90; 362 | davg = 0; 363 | 364 | for(int start = 0; start < hashbits; start += 8) 365 | { 366 | bins.clear(); 367 | bins.resize(nbins,0); 368 | 369 | for(size_t j = 0; j < hashes.size(); j++) 370 | { 371 | hashtype & hash = hashes[j]; 372 | 373 | uint32_t index = window(&hash,sizeof(hash),start,16); 374 | 375 | bins[index]++; 376 | } 377 | 378 | double n = calcScore(&bins.front(),(int)bins.size(),(int)hashes.size()); 379 | 380 | davg += n; 381 | 382 | if(n > dworst) dworst = n; 383 | } 384 | 385 | davg /= double(hashbits/8); 386 | } 387 | 388 | //----------------------------------------------------------------------------- 389 | -------------------------------------------------------------------------------- /SMHasher/Types.cpp: -------------------------------------------------------------------------------- 1 | #include "Types.h" 2 | 3 | #include "Random.h" 4 | 5 | #include 6 | 7 | uint32_t MurmurOAAT ( const void * blob, int len, uint32_t seed ); 8 | 9 | //----------------------------------------------------------------------------- 10 | 11 | #if defined(_MSC_VER) 12 | #pragma optimize( "", off ) 13 | #endif 14 | 15 | void blackhole ( uint32_t ) 16 | { 17 | } 18 | 19 | uint32_t whitehole ( void ) 20 | { 21 | return 0; 22 | } 23 | 24 | #if defined(_MSC_VER) 25 | #pragma optimize( "", on ) 26 | #endif 27 | 28 | uint32_t g_verify = 1; 29 | 30 | void MixVCode ( const void * blob, int len ) 31 | { 32 | g_verify = MurmurOAAT(blob,len,g_verify); 33 | } 34 | 35 | //----------------------------------------------------------------------------- 36 | 37 | bool isprime ( uint32_t x ) 38 | { 39 | uint32_t p[] = 40 | { 41 | 2,3,5,7,11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71,73,79,83,89,97,101, 42 | 103,107,109,113,127,131,137,139,149,151,157,163,167,173,179,181,191,193,197, 43 | 199,211,223,227,229,233,239,241,251 44 | }; 45 | 46 | for(size_t i=0; i < sizeof(p)/sizeof(uint32_t); i++) 47 | { 48 | if((x % p[i]) == 0) 49 | { 50 | return false; 51 | } 52 | } 53 | 54 | for(int i = 257; i < 65536; i += 2) 55 | { 56 | if((x % i) == 0) 57 | { 58 | return false; 59 | } 60 | } 61 | 62 | return true; 63 | } 64 | 65 | void GenerateMixingConstants ( void ) 66 | { 67 | Rand r(8350147); 68 | 69 | int count = 0; 70 | 71 | int trials = 0; 72 | int bitfail = 0; 73 | int popfail = 0; 74 | int matchfail = 0; 75 | int primefail = 0; 76 | 77 | //for(uint32_t x = 1; x; x++) 78 | while(count < 100) 79 | { 80 | //if(x % 100000000 == 0) printf("."); 81 | 82 | trials++; 83 | uint32_t b = r.rand_u32(); 84 | //uint32_t b = x; 85 | 86 | //---------- 87 | // must have between 14 and 18 set bits 88 | 89 | if(popcount(b) < 16) { b = 0; popfail++; } 90 | if(popcount(b) > 16) { b = 0; popfail++; } 91 | 92 | if(b == 0) continue; 93 | 94 | //---------- 95 | // must have 3-5 bits set per 8-bit window 96 | 97 | for(int i = 0; i < 32; i++) 98 | { 99 | uint32_t c = ROTL32(b,i) & 0xFF; 100 | 101 | if(popcount(c) < 3) { b = 0; bitfail++; break; } 102 | if(popcount(c) > 5) { b = 0; bitfail++; break; } 103 | } 104 | 105 | if(b == 0) continue; 106 | 107 | //---------- 108 | // all 8-bit windows must be different 109 | 110 | uint8_t match[256]; 111 | 112 | memset(match,0,256); 113 | 114 | for(int i = 0; i < 32; i++) 115 | { 116 | uint32_t c = ROTL32(b,i) & 0xFF; 117 | 118 | if(match[c]) { b = 0; matchfail++; break; } 119 | 120 | match[c] = 1; 121 | } 122 | 123 | if(b == 0) continue; 124 | 125 | //---------- 126 | // must be prime 127 | 128 | if(!isprime(b)) 129 | { 130 | b = 0; 131 | primefail++; 132 | } 133 | 134 | if(b == 0) continue; 135 | 136 | //---------- 137 | 138 | if(b) 139 | { 140 | printf("0x%08x : 0x%08x\n",b,~b); 141 | count++; 142 | } 143 | } 144 | 145 | printf("%d %d %d %d %d %d\n",trials,popfail,bitfail,matchfail,primefail,count); 146 | } 147 | 148 | //----------------------------------------------------------------------------- 149 | -------------------------------------------------------------------------------- /SMHasher/Types.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "Platform.h" 4 | #include "Bitvec.h" 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | //----------------------------------------------------------------------------- 12 | // If the optimizer detects that a value in a speed test is constant or unused, 13 | // the optimizer may remove references to it or otherwise create code that 14 | // would not occur in a real-world application. To prevent the optimizer from 15 | // doing this we declare two trivial functions that either sink or source data, 16 | // and bar the compiler from optimizing them. 17 | 18 | void blackhole ( uint32_t x ); 19 | uint32_t whitehole ( void ); 20 | 21 | //----------------------------------------------------------------------------- 22 | // We want to verify that every test produces the same result on every platform 23 | // To do this, we hash the results of every test to produce an overall 24 | // verification value for the whole test suite. If two runs produce the same 25 | // verification value, then every test in both run produced the same results 26 | 27 | extern uint32_t g_verify; 28 | 29 | // Mix the given blob of data into the verification code 30 | 31 | void MixVCode ( const void * blob, int len ); 32 | 33 | 34 | //----------------------------------------------------------------------------- 35 | 36 | typedef void (*pfHash) ( const void * blob, const int len, const uint32_t seed, void * out ); 37 | 38 | struct ByteVec : public std::vector 39 | { 40 | ByteVec ( const void * key, int len ) 41 | { 42 | resize(len); 43 | memcpy(&front(),key,len); 44 | } 45 | }; 46 | 47 | template< typename hashtype, typename keytype > 48 | struct CollisionMap : public std::map< hashtype, std::vector > 49 | { 50 | }; 51 | 52 | template< typename hashtype > 53 | struct HashSet : public std::set 54 | { 55 | }; 56 | 57 | //----------------------------------------------------------------------------- 58 | 59 | template < class T > 60 | class hashfunc 61 | { 62 | public: 63 | 64 | hashfunc ( pfHash h ) : m_hash(h) 65 | { 66 | } 67 | 68 | inline void operator () ( const void * key, const int len, const uint32_t seed, uint32_t * out ) 69 | { 70 | m_hash(key,len,seed,out); 71 | } 72 | 73 | inline operator pfHash ( void ) const 74 | { 75 | return m_hash; 76 | } 77 | 78 | inline T operator () ( const void * key, const int len, const uint32_t seed ) 79 | { 80 | T result; 81 | 82 | m_hash(key,len,seed,(uint32_t*)&result); 83 | 84 | return result; 85 | } 86 | 87 | pfHash m_hash; 88 | }; 89 | 90 | //----------------------------------------------------------------------------- 91 | // Key-processing callback objects. Simplifies keyset testing a bit. 92 | 93 | struct KeyCallback 94 | { 95 | KeyCallback() : m_count(0) 96 | { 97 | } 98 | 99 | virtual ~KeyCallback() 100 | { 101 | } 102 | 103 | virtual void operator() ( const void * key, int len ) 104 | { 105 | m_count++; 106 | } 107 | 108 | virtual void reserve ( int keycount ) 109 | { 110 | }; 111 | 112 | int m_count; 113 | }; 114 | 115 | //---------- 116 | 117 | template 118 | struct HashCallback : public KeyCallback 119 | { 120 | typedef std::vector hashvec; 121 | 122 | HashCallback ( pfHash hash, hashvec & hashes ) : m_hashes(hashes), m_pfHash(hash) 123 | { 124 | m_hashes.clear(); 125 | } 126 | 127 | virtual void operator () ( const void * key, int len ) 128 | { 129 | size_t newsize = m_hashes.size() + 1; 130 | 131 | m_hashes.resize(newsize); 132 | 133 | m_pfHash(key,len,0,&m_hashes.back()); 134 | } 135 | 136 | virtual void reserve ( int keycount ) 137 | { 138 | m_hashes.reserve(keycount); 139 | } 140 | 141 | hashvec & m_hashes; 142 | pfHash m_pfHash; 143 | 144 | //---------- 145 | 146 | private: 147 | 148 | HashCallback & operator = ( const HashCallback & ); 149 | }; 150 | 151 | //---------- 152 | 153 | template 154 | struct CollisionCallback : public KeyCallback 155 | { 156 | typedef HashSet hashset; 157 | typedef CollisionMap collmap; 158 | 159 | CollisionCallback ( pfHash hash, hashset & collisions, collmap & cmap ) 160 | : m_pfHash(hash), 161 | m_collisions(collisions), 162 | m_collmap(cmap) 163 | { 164 | } 165 | 166 | virtual void operator () ( const void * key, int len ) 167 | { 168 | hashtype h; 169 | 170 | m_pfHash(key,len,0,&h); 171 | 172 | if(m_collisions.count(h)) 173 | { 174 | m_collmap[h].push_back( ByteVec(key,len) ); 175 | } 176 | } 177 | 178 | //---------- 179 | 180 | pfHash m_pfHash; 181 | hashset & m_collisions; 182 | collmap & m_collmap; 183 | 184 | private: 185 | 186 | CollisionCallback & operator = ( const CollisionCallback & c ); 187 | }; 188 | 189 | //----------------------------------------------------------------------------- 190 | 191 | template < int _bits > 192 | class Blob 193 | { 194 | public: 195 | 196 | Blob() 197 | { 198 | for(size_t i = 0; i < sizeof(bytes); i++) 199 | { 200 | bytes[i] = 0; 201 | } 202 | } 203 | 204 | Blob ( int x ) 205 | { 206 | for(size_t i = 0; i < sizeof(bytes); i++) 207 | { 208 | bytes[i] = 0; 209 | } 210 | 211 | *(int*)bytes = x; 212 | } 213 | 214 | Blob ( const Blob & k ) 215 | { 216 | for(size_t i = 0; i < sizeof(bytes); i++) 217 | { 218 | bytes[i] = k.bytes[i]; 219 | } 220 | } 221 | 222 | Blob & operator = ( const Blob & k ) 223 | { 224 | for(size_t i = 0; i < sizeof(bytes); i++) 225 | { 226 | bytes[i] = k.bytes[i]; 227 | } 228 | 229 | return *this; 230 | } 231 | 232 | Blob ( uint64_t a, uint64_t b ) 233 | { 234 | uint64_t t[2] = {a,b}; 235 | set(&t,16); 236 | } 237 | 238 | void set ( const void * blob, size_t len ) 239 | { 240 | const uint8_t * k = (const uint8_t*)blob; 241 | 242 | len = len > sizeof(bytes) ? sizeof(bytes) : len; 243 | 244 | for(size_t i = 0; i < len; i++) 245 | { 246 | bytes[i] = k[i]; 247 | } 248 | 249 | for(size_t i = len; i < sizeof(bytes); i++) 250 | { 251 | bytes[i] = 0; 252 | } 253 | } 254 | 255 | uint8_t & operator [] ( int i ) 256 | { 257 | return bytes[i]; 258 | } 259 | 260 | const uint8_t & operator [] ( int i ) const 261 | { 262 | return bytes[i]; 263 | } 264 | 265 | //---------- 266 | // boolean operations 267 | 268 | bool operator < ( const Blob & k ) const 269 | { 270 | for(size_t i = 0; i < sizeof(bytes); i++) 271 | { 272 | if(bytes[i] < k.bytes[i]) return true; 273 | if(bytes[i] > k.bytes[i]) return false; 274 | } 275 | 276 | return false; 277 | } 278 | 279 | bool operator == ( const Blob & k ) const 280 | { 281 | for(size_t i = 0; i < sizeof(bytes); i++) 282 | { 283 | if(bytes[i] != k.bytes[i]) return false; 284 | } 285 | 286 | return true; 287 | } 288 | 289 | bool operator != ( const Blob & k ) const 290 | { 291 | return !(*this == k); 292 | } 293 | 294 | //---------- 295 | // bitwise operations 296 | 297 | Blob operator ^ ( const Blob & k ) const 298 | { 299 | Blob t; 300 | 301 | for(size_t i = 0; i < sizeof(bytes); i++) 302 | { 303 | t.bytes[i] = bytes[i] ^ k.bytes[i]; 304 | } 305 | 306 | return t; 307 | } 308 | 309 | Blob & operator ^= ( const Blob & k ) 310 | { 311 | for(size_t i = 0; i < sizeof(bytes); i++) 312 | { 313 | bytes[i] ^= k.bytes[i]; 314 | } 315 | 316 | return *this; 317 | } 318 | 319 | int operator & ( int x ) 320 | { 321 | return (*(int*)bytes) & x; 322 | } 323 | 324 | Blob & operator &= ( const Blob & k ) 325 | { 326 | for(size_t i = 0; i < sizeof(bytes); i++) 327 | { 328 | bytes[i] &= k.bytes[i]; 329 | } 330 | } 331 | 332 | Blob operator << ( int c ) 333 | { 334 | Blob t = *this; 335 | 336 | lshift(&t.bytes[0],sizeof(bytes),c); 337 | 338 | return t; 339 | } 340 | 341 | Blob operator >> ( int c ) 342 | { 343 | Blob t = *this; 344 | 345 | rshift(&t.bytes[0],sizeof(bytes),c); 346 | 347 | return t; 348 | } 349 | 350 | Blob & operator <<= ( int c ) 351 | { 352 | lshift(&bytes[0],sizeof(bytes),c); 353 | 354 | return *this; 355 | } 356 | 357 | Blob & operator >>= ( int c ) 358 | { 359 | rshift(&bytes[0],sizeof(bytes),c); 360 | 361 | return *this; 362 | } 363 | 364 | //---------- 365 | 366 | private: 367 | 368 | uint8_t bytes[(_bits+7)/8]; 369 | }; 370 | 371 | typedef Blob<128> uint128_t; 372 | typedef Blob<256> uint256_t; 373 | 374 | //----------------------------------------------------------------------------- 375 | -------------------------------------------------------------------------------- /SMHasher/UHashTest.cpp: -------------------------------------------------------------------------------- 1 | #include "UMAC/umac.c" 2 | #include "UMAC/rijndael-alg-fst.c" 3 | 4 | char uhash_key[] = "123456789abcdef"; 5 | uhash_ctx_t ahc = uhash_alloc(uhash_key); 6 | 7 | void uhash32_test ( const void * key, int len, unsigned seed, void * out ) 8 | { 9 | uhash_reset (ahc); 10 | uhash_update (ahc, (char*)key, len); 11 | uhash_final (ahc, (char*)out); 12 | } 13 | -------------------------------------------------------------------------------- /SMHasher/UMAC/rijndael-alg-fst.h: -------------------------------------------------------------------------------- 1 | /** 2 | * rijndael-alg-fst.h 3 | * 4 | * @version 3.0 (December 2000) 5 | * 6 | * Optimised ANSI C code for the Rijndael cipher (now AES) 7 | * 8 | * @author Vincent Rijmen 9 | * @author Antoon Bosselaers 10 | * @author Paulo Barreto 11 | * 12 | * This code is hereby placed in the public domain. 13 | * 14 | * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS 15 | * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 16 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE 18 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 19 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 20 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 21 | * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 22 | * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE 23 | * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 24 | * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | #ifndef __RIJNDAEL_ALG_FST_H 27 | #define __RIJNDAEL_ALG_FST_H 28 | 29 | #define MAXKC (256/32) 30 | #define MAXKB (256/8) 31 | #define MAXNR 14 32 | 33 | typedef unsigned char u8; 34 | typedef unsigned short u16; 35 | typedef unsigned int u32; 36 | 37 | int rijndaelKeySetupEnc(u32 rk[/*4*(Nr + 1)*/], const u8 cipherKey[], int keyBits); 38 | int rijndaelKeySetupDec(u32 rk[/*4*(Nr + 1)*/], const u8 cipherKey[], int keyBits); 39 | void rijndaelEncrypt(const u32 rk[/*4*(Nr + 1)*/], int Nr, const u8 pt[16], u8 ct[16]); 40 | void rijndaelDecrypt(const u32 rk[/*4*(Nr + 1)*/], int Nr, const u8 ct[16], u8 pt[16]); 41 | 42 | #ifdef INTERMEDIATE_VALUE_KAT 43 | void rijndaelEncryptRound(const u32 rk[/*4*(Nr + 1)*/], int Nr, u8 block[16], int rounds); 44 | void rijndaelDecryptRound(const u32 rk[/*4*(Nr + 1)*/], int Nr, u8 block[16], int rounds); 45 | #endif /* INTERMEDIATE_VALUE_KAT */ 46 | 47 | #endif /* __RIJNDAEL_ALG_FST_H */ 48 | -------------------------------------------------------------------------------- /SMHasher/UMAC/umac.h: -------------------------------------------------------------------------------- 1 | /* ----------------------------------------------------------------------- 2 | * 3 | * umac.h -- C Implementation UMAC Message Authentication 4 | * 5 | * Version 0.90 of draft-krovetz-umac-03.txt -- 2004 October 6 | * 7 | * For a full description of UMAC message authentication see the UMAC 8 | * world-wide-web page at http://www.cs.ucdavis.edu/~rogaway/umac 9 | * Please report bugs and suggestions to the UMAC webpage. 10 | * 11 | * Copyright (c) 1999-2004 Ted Krovetz 12 | * 13 | * Permission to use, copy, modify, and distribute this software and 14 | * its documentation for any purpose and with or without fee, is hereby 15 | * granted provided that the above copyright notice appears in all copies 16 | * and in supporting documentation, and that the name of the copyright 17 | * holder not be used in advertising or publicity pertaining to 18 | * distribution of the software without specific, written prior permission. 19 | * 20 | * Comments should be directed to Ted Krovetz (tdk@acm.org) 21 | * 22 | * ---------------------------------------------------------------------- */ 23 | 24 | /* ////////////////////// IMPORTANT NOTES ///////////////////////////////// 25 | * 26 | * 1) This version does not work properly on messages larger than 16MB 27 | * 28 | * 2) If you set the switch to use SSE2, then all data must be 16-byte 29 | * aligned 30 | * 31 | * 3) When calling the function umac(), it is assumed that msg is in 32 | * a writable buffer of length divisible by 32 bytes. The message itself 33 | * does not have to fill the entire buffer, but bytes beyond msg may be 34 | * zeroed. 35 | * 36 | * 4) Two free AES implementations are supported by this implementation of 37 | * UMAC. Paulo Barreto's version is in the public domain and can be found 38 | * at http://www.esat.kuleuven.ac.be/~rijmen/rijndael/ (search for 39 | * "Barreto"). The only two files needed are rijndael-alg-fst.c and 40 | * rijndael-alg-fst.h. 41 | * Brian Gladman's version is distributed with GNU Public lisence 42 | * and can be found at http://fp.gladman.plus.com/AES/index.htm. It 43 | * includes a fast IA-32 assembly version. 44 | * 45 | /////////////////////////////////////////////////////////////////////// */ 46 | 47 | 48 | #ifdef __cplusplus 49 | extern "C" { 50 | #endif 51 | 52 | typedef struct umac_ctx *umac_ctx_t; 53 | 54 | umac_ctx_t umac_new(char key[]); 55 | /* Dynamically allocate a umac_ctx struct, initialize variables, 56 | * generate subkeys from key. 57 | */ 58 | 59 | int umac_reset(umac_ctx_t ctx); 60 | /* Reset a umac_ctx to begin authenicating a new message */ 61 | 62 | int umac_update(umac_ctx_t ctx, char *input, long len); 63 | /* Incorporate len bytes pointed to by input into context ctx */ 64 | 65 | int umac_final(umac_ctx_t ctx, char tag[], char nonce[8]); 66 | /* Incorporate any pending data and the ctr value, and return tag. 67 | * This function returns error code if ctr < 0. 68 | */ 69 | 70 | int umac_delete(umac_ctx_t ctx); 71 | /* Deallocate the context structure */ 72 | 73 | int umac(umac_ctx_t ctx, char *input, 74 | long len, char tag[], 75 | char nonce[8]); 76 | /* All-in-one implementation of the functions Reset, Update and Final */ 77 | 78 | 79 | /* uhash.h */ 80 | 81 | 82 | typedef struct uhash_ctx *uhash_ctx_t; 83 | /* The uhash_ctx structure is defined by the implementation of the */ 84 | /* UHASH functions. */ 85 | 86 | uhash_ctx_t uhash_alloc(char key[16]); 87 | /* Dynamically allocate a uhash_ctx struct and generate subkeys using */ 88 | /* the kdf and kdf_key passed in. If kdf_key_len is 0 then RC6 is */ 89 | /* used to generate key with a fixed key. If kdf_key_len > 0 but kdf */ 90 | /* is NULL then the first 16 bytes pointed at by kdf_key is used as a */ 91 | /* key for an RC6 based KDF. */ 92 | 93 | int uhash_free(uhash_ctx_t ctx); 94 | 95 | int uhash_set_params(uhash_ctx_t ctx, 96 | void *params); 97 | 98 | int uhash_reset(uhash_ctx_t ctx); 99 | 100 | int uhash_update(uhash_ctx_t ctx, 101 | char *input, 102 | long len); 103 | 104 | int uhash_final(uhash_ctx_t ctx, 105 | char ouput[]); 106 | 107 | int uhash(uhash_ctx_t ctx, 108 | char *input, 109 | long len, 110 | char output[]); 111 | 112 | #ifdef __cplusplus 113 | } 114 | #endif 115 | -------------------------------------------------------------------------------- /SMHasher/VHashTest.cpp: -------------------------------------------------------------------------------- 1 | #include "VMAC/vmac.c" 2 | 3 | unsigned char vhash_key1[] = "0123456789abcdef"; 4 | unsigned char vhash_key2[] = "fedcba9876543210"; 5 | 6 | void vhash64_test ( const void * key, int len, unsigned seed, void * out ) 7 | { 8 | static vmac_ctx_t ctx[1]; static int inited=0; 9 | if (!inited) 10 | inited=1, vmac_set_key(vhash_key1, ctx); 11 | 12 | *(uint64_t*) out = vhash((unsigned char*)key, len, NULL, ctx); 13 | } 14 | 15 | void vhash128_test ( const void * key, int len, unsigned seed, void * out ) 16 | { 17 | static vmac_ctx_t ctx[2]; static int inited=0; 18 | if (!inited) 19 | inited=1, vmac_set_key(vhash_key1, ctx), vmac_set_key(vhash_key2, ctx+1); 20 | 21 | ((uint64_t*)out)[0] = vhash((unsigned char*)key, len, NULL, ctx); 22 | ((uint64_t*)out)[1] = vhash((unsigned char*)key, len, NULL, ctx+1); 23 | } 24 | -------------------------------------------------------------------------------- /SMHasher/VMAC/vmac.h: -------------------------------------------------------------------------------- 1 | #ifndef HEADER_VMAC_H 2 | #define HEADER_VMAC_H 3 | 4 | /* -------------------------------------------------------------------------- 5 | * VMAC and VHASH Implementation by Ted Krovetz (tdk@acm.org) and Wei Dai. 6 | * This implementation is herby placed in the public domain. 7 | * The authors offers no warranty. Use at your own risk. 8 | * Please send bug reports to the authors. 9 | * Last modified: 17 APR 08, 1700 PDT 10 | * ----------------------------------------------------------------------- */ 11 | 12 | /* -------------------------------------------------------------------------- 13 | * User definable settings. 14 | * ----------------------------------------------------------------------- */ 15 | #ifndef VMAC_TAG_LEN 16 | #define VMAC_TAG_LEN 64 /* Must be 64 or 128 - 64 sufficient for most */ 17 | #endif 18 | #ifndef VMAC_KEY_LEN 19 | #define VMAC_KEY_LEN 128 /* Must be 128, 192 or 256 */ 20 | #endif 21 | #ifndef VMAC_NHBYTES 22 | #define VMAC_NHBYTES 128 /* Must 2^i for any 3 < i < 13. Standard = 128 */ 23 | #endif 24 | #define VMAC_PREFER_BIG_ENDIAN 0 /* Prefer non-x86 */ 25 | 26 | #define VMAC_USE_OPENSSL 0 /* Set to non-zero to use OpenSSL's AES */ 27 | #define VMAC_CACHE_NONCES 1 /* Set to non-zero to cause caching */ 28 | /* of consecutive nonces on 64-bit tags */ 29 | 30 | #ifndef VMAC_RUN_TESTS 31 | #define VMAC_RUN_TESTS 0 /* Set to non-zero to check vectors and speed */ 32 | #endif 33 | #define VMAC_HZ (4600e6) /* Set to hz of host machine to get speed */ 34 | #ifndef VMAC_HASH_ONLY 35 | #define VMAC_HASH_ONLY 0 /* Set to non-zero to time hash only (not-mac) */ 36 | #endif 37 | /* Speeds of cpus I have access to 38 | #define hz (2400e6) glyme Core 2 "Conroe" 39 | #define hz (2000e6) jupiter G5 40 | #define hz (1592e6) titan 41 | #define hz (2793e6) athena/gaia 42 | #define hz (1250e6) isis G4 43 | #define hz (2160e6) imac Core 2 "Merom" 44 | #define hz (266e6) ppc/arm 45 | #define hz (400e6) mips 46 | */ 47 | 48 | /* -------------------------------------------------------------------------- 49 | * This implementation uses uint32_t and uint64_t as names for unsigned 32- 50 | * and 64-bit integer types. These are defined in C99 stdint.h. The 51 | * following may need adaptation if you are not running a C99 or 52 | * Microsoft C environment. 53 | * ----------------------------------------------------------------------- */ 54 | #ifndef VMAC_USE_STDINT 55 | #define VMAC_USE_STDINT 1 /* Set to zero if system has no stdint.h */ 56 | #endif 57 | 58 | #if VMAC_USE_STDINT && !_MSC_VER /* Try stdint.h if non-Microsoft */ 59 | #ifdef __cplusplus 60 | #define __STDC_CONSTANT_MACROS 61 | #endif 62 | #include 63 | #ifndef UINT64_C 64 | #define UINT64_C(v) v ## ULL 65 | #endif 66 | #elif (_MSC_VER) /* Microsoft C does not have stdint.h */ 67 | typedef unsigned __int32 uint32_t; 68 | typedef unsigned __int64 uint64_t; 69 | #ifndef UINT64_C 70 | #define UINT64_C(v) v ## UI64 71 | #endif 72 | #else /* Guess sensibly - may need adaptation */ 73 | typedef unsigned int uint32_t; 74 | typedef unsigned long long uint64_t; 75 | #ifndef UINT64_C 76 | #define UINT64_C(v) v ## ULL 77 | #endif 78 | #endif 79 | 80 | /* -------------------------------------------------------------------------- 81 | * This implementation supports two free AES implementations: OpenSSL's and 82 | * Paulo Barreto's. To use OpenSSL's, you will need to include the OpenSSL 83 | * crypto library (eg, gcc -lcrypto foo.c). For Barreto's, you will need 84 | * to compile rijndael-alg-fst.c, last seen at http://www.iaik.tu-graz.ac.at/ 85 | * research/krypto/AES/old/~rijmen/rijndael/rijndael-fst-3.0.zip and 86 | * http://homes.esat.kuleuven.be/~rijmen/rijndael/rijndael-fst-3.0.zip. 87 | * To use a different implementation, use these definitions as a model. 88 | * ----------------------------------------------------------------------- */ 89 | #if VMAC_USE_LIB_TOM_CRYPT 90 | 91 | #define LTC_NO_CIPHERS 92 | #define LTC_RIJNDAEL 93 | #define ENCRYPT_ONLY 94 | #include "crypt/crypt_argchk.c" 95 | #include "ciphers/aes/aes.c" 96 | 97 | typedef symmetric_key aes_int_key; 98 | 99 | #define aes_encryption(in,out,int_key) \ 100 | aes_enc_ecb_encrypt((unsigned char *)(in),(unsigned char *)(out),(int_key)) 101 | #define aes_key_setup(key,int_key) \ 102 | aes_enc_setup((key),VMAC_KEY_LEN/CHAR_BIT,0,(int_key)) 103 | 104 | #elif VMAC_USE_OPENSSL 105 | 106 | #include 107 | typedef AES_KEY aes_int_key; 108 | 109 | #define aes_encryption(in,out,int_key) \ 110 | AES_encrypt((unsigned char *)(in),(unsigned char *)(out),(int_key)) 111 | #define aes_key_setup(key,int_key) \ 112 | AES_set_encrypt_key((key),VMAC_KEY_LEN,(int_key)) 113 | 114 | #else 115 | 116 | #include "rijndael-alg-fst.h" 117 | typedef u32 aes_int_key[4*(VMAC_KEY_LEN/32+7)]; 118 | 119 | #define aes_encryption(in,out,int_key) \ 120 | rijndaelEncrypt((u32 *)(int_key), \ 121 | ((VMAC_KEY_LEN/32)+6), \ 122 | (u8 *)(in), (u8 *)(out)) 123 | #define aes_key_setup(user_key,int_key) \ 124 | rijndaelKeySetupEnc((u32 *)(int_key), \ 125 | (u8 *)(user_key), \ 126 | VMAC_KEY_LEN) 127 | #endif 128 | 129 | /* --------------------------------------------------------------------- */ 130 | 131 | typedef struct { 132 | uint64_t nhkey [(VMAC_NHBYTES/8)+2*(VMAC_TAG_LEN/64-1)]; 133 | uint64_t polykey[2*VMAC_TAG_LEN/64]; 134 | uint64_t l3key [2*VMAC_TAG_LEN/64]; 135 | uint64_t polytmp[2*VMAC_TAG_LEN/64]; 136 | aes_int_key cipher_key; 137 | #if (VMAC_TAG_LEN == 64) && (VMAC_CACHE_NONCES) 138 | uint64_t cached_nonce[2]; 139 | uint64_t cached_aes[2]; 140 | #endif 141 | int first_block_processed; 142 | } vmac_ctx_t; 143 | 144 | /* --------------------------------------------------------------------- */ 145 | #ifdef __cplusplus 146 | extern "C" { 147 | #endif 148 | /* -------------------------------------------------------------------------- 149 | * <<<<< USAGE NOTES >>>>> 150 | * 151 | * Given msg m (mbytes in length) and nonce buffer n 152 | * this function returns a tag as its output. The tag is returned as 153 | * a number. When VMAC_TAG_LEN == 64, the 'return'ed integer is the tag, 154 | * and *tagl is meaningless. When VMAC_TAG_LEN == 128 the tag is the 155 | * number y * 2^64 + *tagl where y is the function's return value. 156 | * If you want to consider tags to be strings, then you must do so with 157 | * an agreed upon endian orientation for interoperability, and convert 158 | * the results appropriately. VHASH hashes m without creating any tag. 159 | * Consecutive substrings forming a prefix of a message may be passed 160 | * to vhash_update, with vhash or vmac being called with the remainder 161 | * to produce the output. 162 | * 163 | * Requirements: 164 | * - The first bit of the nonce buffer n must be 0. An i byte nonce, is made 165 | * as the first 16-i bytes of n being zero, and the final i the nonce. 166 | * - vhash_update MUST have mbytes be a positive multiple of VMAC_NHBYTES 167 | * 168 | * The following requirements was removed by the changes made by Bulat Ziganshin: 169 | * - On 32-bit architectures with SSE2 instructions, ctx and m MUST be 170 | * begin on 16-byte memory boundaries. 171 | * - m MUST be your message followed by zeroes to the nearest 16-byte 172 | * boundary. If m is a length multiple of 16 bytes, then it is already 173 | * at a 16-byte boundary and needs no padding. mbytes should be your 174 | * message length without any padding. 175 | * ----------------------------------------------------------------------- */ 176 | 177 | #define vmac_update vhash_update 178 | 179 | void vhash_update(unsigned char m[], 180 | unsigned int mbytes, 181 | vmac_ctx_t *ctx); 182 | 183 | uint64_t vmac(unsigned char m[], 184 | unsigned int mbytes, 185 | unsigned char n[16], 186 | uint64_t *tagl, 187 | vmac_ctx_t *ctx); 188 | 189 | uint64_t vhash(unsigned char m[], 190 | unsigned int mbytes, 191 | uint64_t *tagl, 192 | vmac_ctx_t *ctx); 193 | 194 | /* -------------------------------------------------------------------------- 195 | * When passed a VMAC_KEY_LEN bit user_key, this function initialazies ctx. 196 | * ----------------------------------------------------------------------- */ 197 | 198 | void vmac_set_key(unsigned char user_key[], vmac_ctx_t *ctx); 199 | 200 | /* -------------------------------------------------------------------------- 201 | * This function aborts current hash and resets ctx, ready for a new message. 202 | * ----------------------------------------------------------------------- */ 203 | 204 | void vhash_abort(vmac_ctx_t *ctx); 205 | 206 | /* --------------------------------------------------------------------- */ 207 | 208 | #ifdef __cplusplus 209 | } 210 | #endif 211 | 212 | #endif /* HEADER_AES_H */ 213 | -------------------------------------------------------------------------------- /SMHasher/compile.cmd: -------------------------------------------------------------------------------- 1 | g++ -O3 -funroll-loops -s -static -m64 -msse4 -IMurmurHash -IUMAC crc.cpp sha1.cpp MurmurHash/MurmurHash3.cpp SpookyHashTest.cpp xxHashTest.cpp Poly1305Test.cpp VHashTest.cpp UHashTest.cpp FarshTest.cpp Hashes.cpp AvalancheTest.cpp Bitslice.cpp Bitvec.cpp DifferentialTest.cpp KeysetTest.cpp main.cpp Platform.cpp Random.cpp SpeedTest.cpp Stats.cpp Types.cpp 2 | -------------------------------------------------------------------------------- /SMHasher/crc.cpp: -------------------------------------------------------------------------------- 1 | #include "Platform.h" 2 | 3 | /* 4 | * This file is derived from crc32.c from the zlib-1.1.3 distribution 5 | * by Jean-loup Gailly and Mark Adler. 6 | */ 7 | 8 | /* crc32.c -- compute the CRC-32 of a data stream 9 | * Copyright (C) 1995-1998 Mark Adler 10 | * For conditions of distribution and use, see copyright notice in zlib.h 11 | */ 12 | 13 | 14 | /* ======================================================================== 15 | * Table of CRC-32's of all single-byte values (made by make_crc_table) 16 | */ 17 | static const uint32_t crc_table[256] = { 18 | 0x00000000L, 0x77073096L, 0xee0e612cL, 0x990951baL, 0x076dc419L, 19 | 0x706af48fL, 0xe963a535L, 0x9e6495a3L, 0x0edb8832L, 0x79dcb8a4L, 20 | 0xe0d5e91eL, 0x97d2d988L, 0x09b64c2bL, 0x7eb17cbdL, 0xe7b82d07L, 21 | 0x90bf1d91L, 0x1db71064L, 0x6ab020f2L, 0xf3b97148L, 0x84be41deL, 22 | 0x1adad47dL, 0x6ddde4ebL, 0xf4d4b551L, 0x83d385c7L, 0x136c9856L, 23 | 0x646ba8c0L, 0xfd62f97aL, 0x8a65c9ecL, 0x14015c4fL, 0x63066cd9L, 24 | 0xfa0f3d63L, 0x8d080df5L, 0x3b6e20c8L, 0x4c69105eL, 0xd56041e4L, 25 | 0xa2677172L, 0x3c03e4d1L, 0x4b04d447L, 0xd20d85fdL, 0xa50ab56bL, 26 | 0x35b5a8faL, 0x42b2986cL, 0xdbbbc9d6L, 0xacbcf940L, 0x32d86ce3L, 27 | 0x45df5c75L, 0xdcd60dcfL, 0xabd13d59L, 0x26d930acL, 0x51de003aL, 28 | 0xc8d75180L, 0xbfd06116L, 0x21b4f4b5L, 0x56b3c423L, 0xcfba9599L, 29 | 0xb8bda50fL, 0x2802b89eL, 0x5f058808L, 0xc60cd9b2L, 0xb10be924L, 30 | 0x2f6f7c87L, 0x58684c11L, 0xc1611dabL, 0xb6662d3dL, 0x76dc4190L, 31 | 0x01db7106L, 0x98d220bcL, 0xefd5102aL, 0x71b18589L, 0x06b6b51fL, 32 | 0x9fbfe4a5L, 0xe8b8d433L, 0x7807c9a2L, 0x0f00f934L, 0x9609a88eL, 33 | 0xe10e9818L, 0x7f6a0dbbL, 0x086d3d2dL, 0x91646c97L, 0xe6635c01L, 34 | 0x6b6b51f4L, 0x1c6c6162L, 0x856530d8L, 0xf262004eL, 0x6c0695edL, 35 | 0x1b01a57bL, 0x8208f4c1L, 0xf50fc457L, 0x65b0d9c6L, 0x12b7e950L, 36 | 0x8bbeb8eaL, 0xfcb9887cL, 0x62dd1ddfL, 0x15da2d49L, 0x8cd37cf3L, 37 | 0xfbd44c65L, 0x4db26158L, 0x3ab551ceL, 0xa3bc0074L, 0xd4bb30e2L, 38 | 0x4adfa541L, 0x3dd895d7L, 0xa4d1c46dL, 0xd3d6f4fbL, 0x4369e96aL, 39 | 0x346ed9fcL, 0xad678846L, 0xda60b8d0L, 0x44042d73L, 0x33031de5L, 40 | 0xaa0a4c5fL, 0xdd0d7cc9L, 0x5005713cL, 0x270241aaL, 0xbe0b1010L, 41 | 0xc90c2086L, 0x5768b525L, 0x206f85b3L, 0xb966d409L, 0xce61e49fL, 42 | 0x5edef90eL, 0x29d9c998L, 0xb0d09822L, 0xc7d7a8b4L, 0x59b33d17L, 43 | 0x2eb40d81L, 0xb7bd5c3bL, 0xc0ba6cadL, 0xedb88320L, 0x9abfb3b6L, 44 | 0x03b6e20cL, 0x74b1d29aL, 0xead54739L, 0x9dd277afL, 0x04db2615L, 45 | 0x73dc1683L, 0xe3630b12L, 0x94643b84L, 0x0d6d6a3eL, 0x7a6a5aa8L, 46 | 0xe40ecf0bL, 0x9309ff9dL, 0x0a00ae27L, 0x7d079eb1L, 0xf00f9344L, 47 | 0x8708a3d2L, 0x1e01f268L, 0x6906c2feL, 0xf762575dL, 0x806567cbL, 48 | 0x196c3671L, 0x6e6b06e7L, 0xfed41b76L, 0x89d32be0L, 0x10da7a5aL, 49 | 0x67dd4accL, 0xf9b9df6fL, 0x8ebeeff9L, 0x17b7be43L, 0x60b08ed5L, 50 | 0xd6d6a3e8L, 0xa1d1937eL, 0x38d8c2c4L, 0x4fdff252L, 0xd1bb67f1L, 51 | 0xa6bc5767L, 0x3fb506ddL, 0x48b2364bL, 0xd80d2bdaL, 0xaf0a1b4cL, 52 | 0x36034af6L, 0x41047a60L, 0xdf60efc3L, 0xa867df55L, 0x316e8eefL, 53 | 0x4669be79L, 0xcb61b38cL, 0xbc66831aL, 0x256fd2a0L, 0x5268e236L, 54 | 0xcc0c7795L, 0xbb0b4703L, 0x220216b9L, 0x5505262fL, 0xc5ba3bbeL, 55 | 0xb2bd0b28L, 0x2bb45a92L, 0x5cb36a04L, 0xc2d7ffa7L, 0xb5d0cf31L, 56 | 0x2cd99e8bL, 0x5bdeae1dL, 0x9b64c2b0L, 0xec63f226L, 0x756aa39cL, 57 | 0x026d930aL, 0x9c0906a9L, 0xeb0e363fL, 0x72076785L, 0x05005713L, 58 | 0x95bf4a82L, 0xe2b87a14L, 0x7bb12baeL, 0x0cb61b38L, 0x92d28e9bL, 59 | 0xe5d5be0dL, 0x7cdcefb7L, 0x0bdbdf21L, 0x86d3d2d4L, 0xf1d4e242L, 60 | 0x68ddb3f8L, 0x1fda836eL, 0x81be16cdL, 0xf6b9265bL, 0x6fb077e1L, 61 | 0x18b74777L, 0x88085ae6L, 0xff0f6a70L, 0x66063bcaL, 0x11010b5cL, 62 | 0x8f659effL, 0xf862ae69L, 0x616bffd3L, 0x166ccf45L, 0xa00ae278L, 63 | 0xd70dd2eeL, 0x4e048354L, 0x3903b3c2L, 0xa7672661L, 0xd06016f7L, 64 | 0x4969474dL, 0x3e6e77dbL, 0xaed16a4aL, 0xd9d65adcL, 0x40df0b66L, 65 | 0x37d83bf0L, 0xa9bcae53L, 0xdebb9ec5L, 0x47b2cf7fL, 0x30b5ffe9L, 66 | 0xbdbdf21cL, 0xcabac28aL, 0x53b39330L, 0x24b4a3a6L, 0xbad03605L, 67 | 0xcdd70693L, 0x54de5729L, 0x23d967bfL, 0xb3667a2eL, 0xc4614ab8L, 68 | 0x5d681b02L, 0x2a6f2b94L, 0xb40bbe37L, 0xc30c8ea1L, 0x5a05df1bL, 69 | 0x2d02ef8dL 70 | }; 71 | 72 | /* ========================================================================= */ 73 | 74 | #define DO1(buf) crc = crc_table[((int)crc ^ (*buf++)) & 0xff] ^ (crc >> 8); 75 | #define DO2(buf) DO1(buf); DO1(buf); 76 | #define DO4(buf) DO2(buf); DO2(buf); 77 | #define DO8(buf) DO4(buf); DO4(buf); 78 | 79 | /* ========================================================================= */ 80 | 81 | void crc32 ( const void * key, int len, uint32_t seed, void * out ) 82 | { 83 | uint8_t * buf = (uint8_t*)key; 84 | uint32_t crc = seed ^ 0xffffffffL; 85 | 86 | while (len >= 8) 87 | { 88 | DO8(buf); 89 | len -= 8; 90 | } 91 | 92 | while(len--) 93 | { 94 | DO1(buf); 95 | } 96 | 97 | crc ^= 0xffffffffL; 98 | 99 | *(uint32_t*)out = crc; 100 | } 101 | -------------------------------------------------------------------------------- /SMHasher/poly1305/poly1305.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Public Domain poly1305 from Andrew Moon 3 | * poly1305-donna-unrolled.c from https://github.com/floodyberry/poly1305-donna 4 | */ 5 | 6 | /* $OpenBSD: poly1305.c,v 1.3 2013/12/19 22:57:13 djm Exp $ */ 7 | 8 | #include 9 | #include 10 | 11 | #include "poly1305.h" 12 | 13 | #define mul32x32_64(a,b) ((uint64_t)(a) * (b)) 14 | 15 | #define U8TO32_LE(p) \ 16 | (((uint32_t)((p)[0])) | \ 17 | ((uint32_t)((p)[1]) << 8) | \ 18 | ((uint32_t)((p)[2]) << 16) | \ 19 | ((uint32_t)((p)[3]) << 24)) 20 | 21 | #define U32TO8_LE(p, v) \ 22 | do { \ 23 | (p)[0] = (uint8_t)((v)); \ 24 | (p)[1] = (uint8_t)((v) >> 8); \ 25 | (p)[2] = (uint8_t)((v) >> 16); \ 26 | (p)[3] = (uint8_t)((v) >> 24); \ 27 | } while (0) 28 | 29 | void 30 | poly1305_auth(unsigned char out[POLY1305_TAGLEN], const unsigned char *m, size_t inlen, const unsigned char key[POLY1305_KEYLEN]) { 31 | uint32_t t0,t1,t2,t3; 32 | uint32_t h0,h1,h2,h3,h4; 33 | uint32_t r0,r1,r2,r3,r4; 34 | uint32_t s1,s2,s3,s4; 35 | uint32_t b, nb; 36 | size_t j; 37 | uint64_t t[5]; 38 | uint64_t f0,f1,f2,f3; 39 | uint32_t g0,g1,g2,g3,g4; 40 | uint64_t c; 41 | unsigned char mp[16]; 42 | 43 | /* clamp key */ 44 | t0 = U8TO32_LE(key+0); 45 | t1 = U8TO32_LE(key+4); 46 | t2 = U8TO32_LE(key+8); 47 | t3 = U8TO32_LE(key+12); 48 | 49 | /* precompute multipliers */ 50 | r0 = t0 & 0x3ffffff; t0 >>= 26; t0 |= t1 << 6; 51 | r1 = t0 & 0x3ffff03; t1 >>= 20; t1 |= t2 << 12; 52 | r2 = t1 & 0x3ffc0ff; t2 >>= 14; t2 |= t3 << 18; 53 | r3 = t2 & 0x3f03fff; t3 >>= 8; 54 | r4 = t3 & 0x00fffff; 55 | 56 | s1 = r1 * 5; 57 | s2 = r2 * 5; 58 | s3 = r3 * 5; 59 | s4 = r4 * 5; 60 | 61 | /* init state */ 62 | h0 = 0; 63 | h1 = 0; 64 | h2 = 0; 65 | h3 = 0; 66 | h4 = 0; 67 | 68 | /* full blocks */ 69 | if (inlen < 16) goto poly1305_donna_atmost15bytes; 70 | poly1305_donna_16bytes: 71 | m += 16; 72 | inlen -= 16; 73 | 74 | t0 = U8TO32_LE(m-16); 75 | t1 = U8TO32_LE(m-12); 76 | t2 = U8TO32_LE(m-8); 77 | t3 = U8TO32_LE(m-4); 78 | 79 | h0 += t0 & 0x3ffffff; 80 | h1 += ((((uint64_t)t1 << 32) | t0) >> 26) & 0x3ffffff; 81 | h2 += ((((uint64_t)t2 << 32) | t1) >> 20) & 0x3ffffff; 82 | h3 += ((((uint64_t)t3 << 32) | t2) >> 14) & 0x3ffffff; 83 | h4 += (t3 >> 8) | (1 << 24); 84 | 85 | 86 | poly1305_donna_mul: 87 | t[0] = mul32x32_64(h0,r0) + mul32x32_64(h1,s4) + mul32x32_64(h2,s3) + mul32x32_64(h3,s2) + mul32x32_64(h4,s1); 88 | t[1] = mul32x32_64(h0,r1) + mul32x32_64(h1,r0) + mul32x32_64(h2,s4) + mul32x32_64(h3,s3) + mul32x32_64(h4,s2); 89 | t[2] = mul32x32_64(h0,r2) + mul32x32_64(h1,r1) + mul32x32_64(h2,r0) + mul32x32_64(h3,s4) + mul32x32_64(h4,s3); 90 | t[3] = mul32x32_64(h0,r3) + mul32x32_64(h1,r2) + mul32x32_64(h2,r1) + mul32x32_64(h3,r0) + mul32x32_64(h4,s4); 91 | t[4] = mul32x32_64(h0,r4) + mul32x32_64(h1,r3) + mul32x32_64(h2,r2) + mul32x32_64(h3,r1) + mul32x32_64(h4,r0); 92 | 93 | h0 = (uint32_t)t[0] & 0x3ffffff; c = (t[0] >> 26); 94 | t[1] += c; h1 = (uint32_t)t[1] & 0x3ffffff; b = (uint32_t)(t[1] >> 26); 95 | t[2] += b; h2 = (uint32_t)t[2] & 0x3ffffff; b = (uint32_t)(t[2] >> 26); 96 | t[3] += b; h3 = (uint32_t)t[3] & 0x3ffffff; b = (uint32_t)(t[3] >> 26); 97 | t[4] += b; h4 = (uint32_t)t[4] & 0x3ffffff; b = (uint32_t)(t[4] >> 26); 98 | h0 += b * 5; 99 | 100 | if (inlen >= 16) goto poly1305_donna_16bytes; 101 | 102 | /* final bytes */ 103 | poly1305_donna_atmost15bytes: 104 | if (!inlen) goto poly1305_donna_finish; 105 | 106 | for (j = 0; j < inlen; j++) mp[j] = m[j]; 107 | mp[j++] = 1; 108 | for (; j < 16; j++) mp[j] = 0; 109 | inlen = 0; 110 | 111 | t0 = U8TO32_LE(mp+0); 112 | t1 = U8TO32_LE(mp+4); 113 | t2 = U8TO32_LE(mp+8); 114 | t3 = U8TO32_LE(mp+12); 115 | 116 | h0 += t0 & 0x3ffffff; 117 | h1 += ((((uint64_t)t1 << 32) | t0) >> 26) & 0x3ffffff; 118 | h2 += ((((uint64_t)t2 << 32) | t1) >> 20) & 0x3ffffff; 119 | h3 += ((((uint64_t)t3 << 32) | t2) >> 14) & 0x3ffffff; 120 | h4 += (t3 >> 8); 121 | 122 | goto poly1305_donna_mul; 123 | 124 | poly1305_donna_finish: 125 | b = h0 >> 26; h0 = h0 & 0x3ffffff; 126 | h1 += b; b = h1 >> 26; h1 = h1 & 0x3ffffff; 127 | h2 += b; b = h2 >> 26; h2 = h2 & 0x3ffffff; 128 | h3 += b; b = h3 >> 26; h3 = h3 & 0x3ffffff; 129 | h4 += b; b = h4 >> 26; h4 = h4 & 0x3ffffff; 130 | h0 += b * 5; b = h0 >> 26; h0 = h0 & 0x3ffffff; 131 | h1 += b; 132 | 133 | g0 = h0 + 5; b = g0 >> 26; g0 &= 0x3ffffff; 134 | g1 = h1 + b; b = g1 >> 26; g1 &= 0x3ffffff; 135 | g2 = h2 + b; b = g2 >> 26; g2 &= 0x3ffffff; 136 | g3 = h3 + b; b = g3 >> 26; g3 &= 0x3ffffff; 137 | g4 = h4 + b - (1 << 26); 138 | 139 | b = (g4 >> 31) - 1; 140 | nb = ~b; 141 | h0 = (h0 & nb) | (g0 & b); 142 | h1 = (h1 & nb) | (g1 & b); 143 | h2 = (h2 & nb) | (g2 & b); 144 | h3 = (h3 & nb) | (g3 & b); 145 | h4 = (h4 & nb) | (g4 & b); 146 | 147 | f0 = ((h0 ) | (h1 << 26)) + (uint64_t)U8TO32_LE(&key[16]); 148 | f1 = ((h1 >> 6) | (h2 << 20)) + (uint64_t)U8TO32_LE(&key[20]); 149 | f2 = ((h2 >> 12) | (h3 << 14)) + (uint64_t)U8TO32_LE(&key[24]); 150 | f3 = ((h3 >> 18) | (h4 << 8)) + (uint64_t)U8TO32_LE(&key[28]); 151 | 152 | U32TO8_LE(&out[ 0], f0); f1 += (f0 >> 32); 153 | U32TO8_LE(&out[ 4], f1); f2 += (f1 >> 32); 154 | U32TO8_LE(&out[ 8], f2); f3 += (f2 >> 32); 155 | U32TO8_LE(&out[12], f3); 156 | } 157 | -------------------------------------------------------------------------------- /SMHasher/poly1305/poly1305.h: -------------------------------------------------------------------------------- 1 | /* $OpenBSD: poly1305.h,v 1.4 2014/05/02 03:27:54 djm Exp $ */ 2 | 3 | /* 4 | * Public Domain poly1305 from Andrew Moon 5 | * poly1305-donna-unrolled.c from https://github.com/floodyberry/poly1305-donna 6 | */ 7 | 8 | #ifndef POLY1305_H 9 | #define POLY1305_H 10 | 11 | #define POLY1305_KEYLEN 32 12 | #define POLY1305_TAGLEN 16 13 | 14 | void poly1305_auth(unsigned char out[POLY1305_TAGLEN], const unsigned char *m, size_t inlen, const unsigned char key[POLY1305_KEYLEN]); 15 | 16 | #endif /* POLY1305_H */ 17 | -------------------------------------------------------------------------------- /SMHasher/sha1.cpp: -------------------------------------------------------------------------------- 1 | // Test 32/64/128-bit parts of SHA1 2 | 3 | #include "Hashes.h" 4 | #include "SHA1/sha1.cpp" 5 | 6 | void sha1 ( const void * key, int len, uint32_t seed, void * out, int start, int count ) 7 | { 8 | SHA1_CTX context; 9 | 10 | uint8_t digest[20], seed8[] = {seed, seed>>8, seed>>16, seed>>24}; 11 | 12 | SHA1_Init(&context); 13 | SHA1_Update(&context, seed8, 4); 14 | SHA1_Update(&context, (uint8_t*)key, len); 15 | SHA1_Final(&context, digest); 16 | 17 | memcpy(out, &digest[start*4], count*4); 18 | } 19 | 20 | void sha1_32 ( const void * key, int len, uint32_t seed, void * out ) {sha1(key,len,seed,out,0,1);} 21 | void sha1_32a ( const void * key, int len, uint32_t seed, void * out ) {sha1(key,len,seed,out,1,1);} 22 | void sha1_32b ( const void * key, int len, uint32_t seed, void * out ) {sha1(key,len,seed,out,2,1);} 23 | void sha1_32c ( const void * key, int len, uint32_t seed, void * out ) {sha1(key,len,seed,out,3,1);} 24 | void sha1_64 ( const void * key, int len, uint32_t seed, void * out ) {sha1(key,len,seed,out,0,2);} 25 | void sha1_64a ( const void * key, int len, uint32_t seed, void * out ) {sha1(key,len,seed,out,2,2);} 26 | void sha1_128 ( const void * key, int len, uint32_t seed, void * out ) {sha1(key,len,seed,out,0,4);} 27 | -------------------------------------------------------------------------------- /asm-listings/make-listings.cmd: -------------------------------------------------------------------------------- 1 | gcc -O3 -funroll-loops -s -static -c ../farsh.c -m32 -Wa,-adhlns=gcc-x86.lst 2 | gcc -O3 -funroll-loops -s -static -c ../farsh.c -m32 -msse2 -DSSE2 -Wa,-adhlns=gcc-x86-sse2.lst 3 | gcc -O3 -funroll-loops -s -static -c ../farsh.c -m32 -mavx2 -DAVX2 -Wa,-adhlns=gcc-x86-avx2.lst 4 | gcc -O3 -funroll-loops -s -static -c ../farsh.c -m64 -Wa,-adhlns=gcc-x64-nosimd.lst 5 | gcc -O3 -funroll-loops -s -static -c ../farsh.c -m64 -msse2 -DSSE2 -Wa,-adhlns=gcc-x64.lst 6 | gcc -O3 -funroll-loops -s -static -c ../farsh.c -m64 -mavx2 -DAVX2 -Wa,-adhlns=gcc-x64-avx2.lst 7 | -------------------------------------------------------------------------------- /benchmark/CpuID.h: -------------------------------------------------------------------------------- 1 | // Single-file micro-library implementing CPUID-based CPU feature test 2 | // as well as providing CPU name and highest SIMD version strings. 3 | // You can find usage examples at the end of file. 4 | // (c) Bulat Ziganshin & Unknown author 5 | // Placed into public domain 6 | 7 | #pragma once 8 | 9 | #include 10 | #include 11 | #include 12 | #if defined(_MSC_VER) 13 | # include 14 | #endif 15 | 16 | enum CPUIDInfoType 17 | { 18 | RequestLastID = 0, FeatureSupport = 1, NewestFeatureSupport = 7, 19 | RequestLastExtendedID = 0x80000000, BrandNameFirst = 0x80000002, BrandNameLast = 0x80000004 20 | }; 21 | 22 | struct CpuidFeatures 23 | { 24 | union 25 | { 26 | uint32_t CPUInfo[5*4]; 27 | struct 28 | { 29 | // FeatureSupport: EAX 30 | unsigned SteppingID :4;//4 0-3 31 | unsigned Model :4;//8 4-7 32 | unsigned Family :4;//12 8-11 33 | unsigned TypeItl :2;//14 12-13 34 | unsigned Reserved11 :2;//16 14-15 35 | unsigned ExtendedModel :4;//20 16-19 36 | unsigned ExtendedFamily :8;//28 20-27 37 | unsigned Reserved12 :3;//32 28-31 38 | // FeatureSupport: EBX 39 | unsigned BrandIndex :8;//8 0-7 40 | unsigned QwordCFLUSH :8;//16 8-15 41 | unsigned LogicProcCount :8;//24 16-23 42 | unsigned ApicID :8;//32 24-31 43 | // FeatureSupport: ECX 44 | unsigned SSE3 :1;//1 0 45 | unsigned PCLMULQDQ :1;//2 1 46 | unsigned DTES64 :1;//3 2 47 | unsigned MWAIT :1;//4 3 48 | unsigned CPLDebug :1;//5 4 49 | unsigned VMExt :1;//6 5 50 | unsigned SafeModeExt :1;//7 6 51 | unsigned IntelSpeedStep :1;//8 7 52 | unsigned ThermalMonitor :1;//9 8 53 | unsigned SupplSSE3 :1;//10 9 54 | unsigned L1CtxID :1;//11 10 55 | unsigned SDBG :1;//12 11 56 | unsigned FMA3 :1;//13 12 57 | unsigned CMPXCHG16B :1;//14 13 58 | unsigned xTPR :1;//15 14 59 | unsigned MSRDebug :1;//16 15 60 | unsigned Reserved32 :1;//17 16 61 | unsigned ProcContextID :1;//18 17 62 | unsigned DirectCacheAcc :1;//19 18 63 | unsigned SSE41 :1;//20 19 64 | unsigned SSE42 :1;//21 20 65 | unsigned x2APIC :1;//22 21 66 | unsigned MOVBE :1;//23 22 67 | unsigned POPCNT :1;//24 23 68 | unsigned TSC_DEADLINE :1;//25 24 69 | unsigned AES_NI :1;//26 25 70 | unsigned XSAVE :1;//27 26 71 | unsigned OSXSAVE :1;//28 27 72 | unsigned AVX :1;//29 28 73 | unsigned F16C :1;//30 29 74 | unsigned RDRND :1;//31 30 75 | unsigned HYPERVISOR :1;//32 31 76 | // FeatureSupport: EDX 77 | unsigned FPU :1;//1 0 78 | unsigned VME :1;//2 1 79 | unsigned DE :1;//3 2 80 | unsigned PSE :1;//4 3 81 | unsigned TSC :1;//5 4 82 | unsigned MSR :1;//6 5 83 | unsigned PAE :1;//7 6 84 | unsigned MCE :1;//8 7 85 | unsigned Cx8 :1;//9 8 86 | unsigned APIC :1;//10 9 87 | unsigned Reserved41 :1;//11 10 88 | unsigned SEP :1;//12 11 89 | unsigned MTTR :1;//13 12 90 | unsigned PGE :1;//14 13 91 | unsigned MCA :1;//15 14 92 | unsigned CMOV :1;//16 15 93 | unsigned PAT :1;//17 16 94 | unsigned PSE36 :1;//18 17 95 | unsigned PSN :1;//19 18 96 | unsigned CFLUSH :1;//20 19 97 | unsigned Reserved42 :1;//21 20 98 | unsigned DS :1;//22 21 99 | unsigned ACPI :1;//23 22 100 | unsigned MMX :1;//24 23 101 | unsigned FXSR :1;//25 24 102 | unsigned SSE :1;//26 25 103 | unsigned SSE2 :1;//27 26 104 | unsigned SS :1;//28 27 105 | unsigned HTT :1;//29 28 106 | unsigned TM :1;//30 29 107 | unsigned IA64 :1;//31 30 108 | unsigned PBE :1;//32 31 109 | 110 | // NewestFeatureSupport: EAX 111 | unsigned Reserved51 :32;//32 0-31 112 | // NewestFeatureSupport: EBX 113 | unsigned FSGSBASE :1;//1 0 114 | unsigned TSC_ADJUST :1;//2 1 115 | unsigned SGX :1;//3 2 116 | unsigned BMI1 :1;//4 3 117 | unsigned HLE :1;//5 4 118 | unsigned AVX2 :1;//6 5 119 | unsigned Reserved61 :1;//7 6 120 | unsigned SMEP :1;//8 7 121 | unsigned BMI2 :1;//9 8 122 | unsigned ERMS :1;//10 9 123 | unsigned INVPCID :1;//11 10 124 | unsigned RTM :1;//12 11 125 | unsigned PQM :1;//13 12 126 | unsigned FPU_CS_DS_depr :1;//14 13 127 | unsigned MPX :1;//15 14 128 | unsigned PQE :1;//16 15 129 | unsigned AVX512F :1;//17 16 130 | unsigned AVX512DQ :1;//18 17 131 | unsigned RDSEED :1;//19 18 132 | unsigned ADX :1;//20 19 133 | unsigned SMAP :1;//21 20 134 | unsigned AVX512IFMA :1;//22 21 135 | unsigned PCOMMIT :1;//23 22 136 | unsigned CLFLUSHOPT :1;//24 23 137 | unsigned CLWB :1;//25 24 138 | unsigned ProcessorTrace :1;//26 25 139 | unsigned AVX512PF :1;//27 26 140 | unsigned AVX512ER :1;//28 27 141 | unsigned AVX512CD :1;//29 28 142 | unsigned SHA :1;//30 29 143 | unsigned AVX512BW :1;//31 30 144 | unsigned AVX512VL :1;//32 31 145 | // NewestFeatureSupport: ECX 146 | unsigned PREFETCHWT1 :1;//1 0 147 | unsigned AVX512VBMI :1;//2 1 148 | unsigned Reserved71 :30;//3 2-31 149 | // NewestFeatureSupport: EDX 150 | unsigned Reserved81 :32;//32 0-31 151 | 152 | char IDString[3*4*4]; 153 | char HighestSupportedSimdString[48]; // round up the entire structure size to 128 bytes 154 | }; 155 | }; 156 | }; 157 | 158 | 159 | inline void run_cpuid (uint32_t eax /*function_id*/, uint32_t ecx /*subfunction_id*/, uint32_t* abcd /*results*/) 160 | { 161 | #if defined(_MSC_VER) 162 | __cpuidex(abcd, eax, ecx); 163 | #else 164 | uint32_t ebx, edx; 165 | # if defined( __i386__ ) && defined ( __PIC__ ) 166 | /* in case of PIC under 32-bit EBX cannot be clobbered */ 167 | __asm__ ( "movl %%ebx, %%edi \n\t cpuid \n\t xchgl %%ebx, %%edi" : "=D" (ebx), 168 | # else 169 | __asm__ ( "cpuid" : "+b" (ebx), 170 | # endif 171 | "+a" (eax), "+c" (ecx), "=d" (edx) ); 172 | abcd[0] = eax; abcd[1] = ebx; abcd[2] = ecx; abcd[3] = edx; 173 | #endif 174 | } 175 | 176 | 177 | inline void GetCpuidFeatures (struct CpuidFeatures *featureStruct) 178 | { 179 | uint32_t cpuInfo[4] = {0}; uint32_t i; 180 | memset (featureStruct, 0, sizeof(struct CpuidFeatures)); 181 | 182 | // Calling run_cpuid with 0 as the function_id argument 183 | // gets the number of the highest valid function ID. 184 | run_cpuid (RequestLastID, 0, cpuInfo); 185 | 186 | // Request bit fieds describing features supported by the CPU 187 | if (cpuInfo[0] >= FeatureSupport) 188 | run_cpuid (FeatureSupport, 0, featureStruct->CPUInfo); 189 | 190 | if (cpuInfo[0] >= NewestFeatureSupport) 191 | run_cpuid (NewestFeatureSupport, 0, featureStruct->CPUInfo + 4); 192 | 193 | // Compute HighestSupportedSimdString from bit fields 194 | strcpy (featureStruct->HighestSupportedSimdString, 195 | featureStruct->AVX512VBMI?"AVX-512 VBMI" : 196 | featureStruct->AVX512BW? "AVX-512 BW" : 197 | featureStruct->AVX512DQ? "AVX-512 DQ" : 198 | featureStruct->AVX512F? "AVX-512F" : 199 | featureStruct->AVX2? "AVX2" : 200 | featureStruct->AVX? "AVX" : 201 | featureStruct->AES_NI? "AES-NI" : 202 | featureStruct->SSE42? "SSE 4.2" : 203 | featureStruct->SSE41? "SSE 4.1" : 204 | featureStruct->SupplSSE3? "Supplemental SSE3" : 205 | featureStruct->SSE3? "SSE3" : 206 | featureStruct->SSE2? "SSE2" : 207 | featureStruct->SSE? "SSE" : 208 | featureStruct->MMX? "MMX" : "no MMX"); 209 | 210 | 211 | // Calling __cpuid with 0x80000000 as the function_id argument 212 | // gets the number of the highest valid extended ID. 213 | run_cpuid (RequestLastExtendedID, 0, cpuInfo); 214 | 215 | // Interpret CPU brand string if reported 216 | if (cpuInfo[0] >= BrandNameLast) { 217 | for (i=BrandNameFirst; i<=BrandNameLast; i++) 218 | run_cpuid (i, 0, featureStruct->CPUInfo + 4*(i+2-BrandNameFirst)); 219 | } else { 220 | strcpy (featureStruct->IDString, "Ancient CPU"); 221 | } 222 | } 223 | 224 | 225 | #ifdef CPUID_MAIN 226 | // Compile with "gcc -x c CPUID.h -DCPUID_MAIN -s -static -oCpuID" 227 | #include 228 | int main() 229 | { 230 | struct CpuidFeatures features; 231 | GetCpuidFeatures(&features); 232 | 233 | // Display CPU name and highest supported SIMD level 234 | printf("%s: %s\n", features.IDString, features.HighestSupportedSimdString); 235 | 236 | // Another possible usage: 237 | // if (features.AVX2) run_AVX2_specific_code(); 238 | } 239 | #endif 240 | -------------------------------------------------------------------------------- /benchmark/compile-CpuID.cmd: -------------------------------------------------------------------------------- 1 | gcc -x c CpuID.h -DCPUID_MAIN -s -static -oCpuID 2 | -------------------------------------------------------------------------------- /benchmark/compile-other.cmd: -------------------------------------------------------------------------------- 1 | @echo off 2 | set options=%* main.cpp 3 | set options_ms=-MP -Gy -GL -GR- -nologo %options% user32.lib shell32.lib ole32.lib advapi32.lib -link -LARGEADDRESSAWARE 4 | set options_ms_cl=-O2 -GL -Gy -EHsc %options_ms% 5 | set options_ms_icl=-w -O3 -Qipo -Qunroll64 %options_ms% 6 | set options_ms_x86=-MACHINE:x86 -SUBSYSTEM:CONSOLE,5.01 7 | set options_ms_x64=-MACHINE:x64 -SUBSYSTEM:CONSOLE,5.02 8 | set options_gcc4=-O3 -msse2 -funroll-loops -std=c++11 -s -static -lstdc++ %options% 9 | 10 | gcc -m32 %options_gcc4% -ofarsh32.exe 11 | gcc -m64 %options_gcc4% -ofarsh64.exe 12 | 13 | call "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" x86 14 | cl -Fefarsh32m.exe -arch:SSE2 %options_ms_cl% %options_ms_x86% 15 | 16 | call "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" x86_amd64 17 | cl -Fefarsh64m.exe %options_ms_cl% %options_ms_x64% 18 | 19 | call "C:\Program Files (x86)\Intel\Composer XE 2013 SP1\bin\ipsxe-comp-vars.bat" ia32 20 | icl -Fefarsh32i.exe -arch:SSE2 %options_ms_icl% %options_ms_x86% 21 | iccpatch.exe farsh32i.exe >nul 22 | 23 | call "C:\Program Files (x86)\Intel\Composer XE 2013 SP1\bin\ipsxe-comp-vars.bat" intel64 24 | icl -Fefarsh64i.exe %options_ms_icl% %options_ms_x64% 25 | iccpatch.exe farsh64i.exe >nul 26 | 27 | del *.exe.bak *.obj *.res >nul 2>nul 28 | -------------------------------------------------------------------------------- /benchmark/compile.cmd: -------------------------------------------------------------------------------- 1 | gcc -O3 -funroll-loops -s -static -m32 -march=pentium3 main.cpp -oaligned-farsh-x86 -DFARSH_ALIGNED_INPUT 2 | gcc -O3 -funroll-loops -s -static -m32 -msse2 -DFARSH_SSE2 main.cpp -oaligned-farsh-x86-sse2 -DFARSH_ALIGNED_INPUT 3 | gcc -O3 -funroll-loops -s -static -m32 -mavx2 -DFARSH_AVX2 main.cpp -oaligned-farsh-x86-avx2 -DFARSH_ALIGNED_INPUT 4 | gcc -O3 -funroll-loops -s -static -m64 main.cpp -oaligned-farsh-x64-nosimd -DFARSH_ALIGNED_INPUT 5 | gcc -O3 -funroll-loops -s -static -m64 -msse2 -DFARSH_SSE2 main.cpp -oaligned-farsh-x64 -DFARSH_ALIGNED_INPUT 6 | gcc -O3 -funroll-loops -s -static -m64 -mavx2 -DFARSH_AVX2 main.cpp -oaligned-farsh-x64-avx2 -DFARSH_ALIGNED_INPUT 7 | gcc -O3 -funroll-loops -s -static -m32 -march=pentium3 main.cpp -ofarsh-x86 8 | gcc -O3 -funroll-loops -s -static -m32 -msse2 -DFARSH_SSE2 main.cpp -ofarsh-x86-sse2 9 | gcc -O3 -funroll-loops -s -static -m32 -mavx2 -DFARSH_AVX2 main.cpp -ofarsh-x86-avx2 10 | gcc -O3 -funroll-loops -s -static -m64 main.cpp -ofarsh-x64-nosimd 11 | gcc -O3 -funroll-loops -s -static -m64 -msse2 -DFARSH_SSE2 main.cpp -ofarsh-x64 12 | gcc -O3 -funroll-loops -s -static -m64 -mavx2 -DFARSH_AVX2 main.cpp -ofarsh-x64-avx2 13 | -------------------------------------------------------------------------------- /benchmark/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "timer.h" 4 | #if defined(FARSH_AVX2) || defined(FARSH_SSE2) 5 | #include "CpuID.h" 6 | #endif 7 | 8 | #include "../farsh.c" 9 | 10 | #if __GNUC__ 11 | #define ALIGN(n) __attribute__ ((aligned(n))) 12 | #elif _MSC_VER 13 | #define ALIGN(n) __declspec(align(n)) 14 | #else 15 | #define ALIGN(n) 16 | #endif 17 | 18 | int main (int argc, char **argv) 19 | { 20 | bool print_table = (argc > 1); // if any cmdline parameter was given 21 | bool x64 = (sizeof(void*) == 8); // check for 64-bit platform 22 | 23 | #ifdef FARSH_AVX2 24 | char simdext[] = "-avx2"; 25 | struct CpuidFeatures features; GetCpuidFeatures(&features); 26 | if (! features.AVX2) {if (!print_table) printf("AVX2 not found!\n"); return 1;} 27 | #elif defined(FARSH_SSE2) 28 | const char *simdext = x64? "":"-sse2"; 29 | struct CpuidFeatures features; GetCpuidFeatures(&features); 30 | if (! features.SSE2) {if (!print_table) printf("SSE2 not found!\n"); return 1;} 31 | #else 32 | const char *simdext = x64? "-nosimd":""; 33 | #endif 34 | 35 | #ifdef FARSH_ALIGNED_INPUT 36 | bool ALIGNED_INPUT = true; 37 | #else 38 | bool ALIGNED_INPUT = false; 39 | #endif 40 | 41 | // Choose the display format for results 42 | int format = argc==1? 0 : 43 | strcmp(argv[1],"1")==0? 1 : 44 | strcmp(argv[1],"2")==0? 2 : -1; 45 | if (format <= 0) { 46 | printf("FARSH 0.2 Benchmark. See https://github.com/Bulat-Ziganshin/FARSH\n" 47 | " Usage: farsh [1|2] - choose display format\n"); 48 | if (format < 0) return 3; 49 | } 50 | 51 | char progname[100]; 52 | sprintf (progname, "%sfarsh-%s%s", ALIGNED_INPUT? "aligned-":"", 53 | x64? "x64":"x86", 54 | simdext); 55 | 56 | // CHECK THE ZEROES HASHING 57 | const size_t ZEROES = 64*1024; 58 | ALIGN(64) static char zero[ZEROES] = {0}; 59 | for (int i=0; i<=ZEROES; i++) 60 | { 61 | //uint32_t h = farsh (zero, i); 62 | //printf("%5d %08x\n", i, h); 63 | //printf("%4d %08x %08x %08x %08x :: ", minbytes, (UINT)(h), (UINT)(h>>32), sum1, sum2); 64 | } 65 | 66 | 67 | // PREPARE TEST DATA. DATASIZE+FARSH_BASE_KEY_SIZE should be less than the L1 cache size, otherwise speed may be limited by memory reads 68 | const size_t DATASIZE = 12*1024; 69 | ALIGN(64) static char data_array[DATASIZE+1]; 70 | char *data = ALIGNED_INPUT? data_array : data_array + 1; 71 | for (int i=0; i> ((i%16)+8)); 73 | 74 | 75 | #ifndef FARSH_ALIGNED_INPUT 76 | // CHECK FOR POSSIBLE DATA ALIGNMENT PROBLEMS 77 | for (int i=0; i<=64; i++) 78 | { 79 | uint32_t h = farsh (data+i, DATASIZE+1-i, 0); 80 | if (h==42) break; // anti-optimization trick 81 | 82 | char out[32*4]; 83 | for (int j=1; j<=32; j++) 84 | farsh_n (data+i, DATASIZE+1-i, 0, j, 0, out); 85 | } 86 | #endif 87 | 88 | 89 | // BENCHMARK 90 | const uint64_t DATASET = uint64_t(100)<<30; 91 | if (format > 0) printf("%-24s |", progname); 92 | else printf("Hashing %d GiB:", int(DATASET>>30)); 93 | const int EXTRA_LOOPS = (100<<20) / DATASIZE; // These extra loops are required to enable the SIMD engine and switch CPU core to the maximum frequency 94 | Timer t; 95 | uint32_t h = 0; 96 | 97 | 98 | for (int i=0; i < EXTRA_LOOPS+DATASET/DATASIZE; i++) 99 | { 100 | if (i == EXTRA_LOOPS) 101 | t.Start(); 102 | 103 | h = farsh (data, DATASIZE, h); 104 | 105 | if (i == 0 && h != 0xd300ddd8) { // check hash correctness 106 | printf("\nWrong hash value: 0x%08X !!!\n", h); 107 | return 2; 108 | } 109 | } 110 | t.Stop(); double speed = DATASET / t.Elapsed(); 111 | if (print_table) printf("%8.3lf GB/s =%7.3lf GiB/s", speed/1e9, speed/(1<<30)); 112 | else printf(" %.3lf milliseconds =%7.3lf GB/s =%7.3lf GiB/s\n", t.Elapsed()*1000, speed/1e9, speed/(1<<30)); 113 | double t1 = t.Elapsed(); 114 | 115 | 116 | const uint32_t *keys = FARSH_KEYS; 117 | if (t.Elapsed() == 1e42) data++, keys++; // anti-optimization trick 118 | 119 | if (format==0) printf("Internal loop: "); 120 | t.Start(); 121 | for (int i=0; i < DATASET/FARSH_BASE_KEY_SIZE; i++) 122 | { 123 | uint64_t h = farsh_full_block ((uint32_t*)data, keys); 124 | if (h==42) data[0] = i; // anti-optimization trick 125 | } 126 | t.Stop(); speed = DATASET / t.Elapsed(); 127 | if (print_table) printf(" |%8.3lf GB/s =%7.3lf GiB/s", speed/1e9, speed/(1<<30)); 128 | else printf(" %.3lf milliseconds =%7.3lf GB/s =%7.3lf GiB/s\n", t.Elapsed()*1000, speed/1e9, speed/(1<<30)); 129 | 130 | t1 -= t.Elapsed(); 131 | speed = DATASET / t1; 132 | if (format==2) printf(" |%9.3lf GB/s =%8.3lf GiB/s", speed/1e9, speed/(1<<30)); 133 | else if (format==0) printf("External loop: %.3lf milliseconds = %.3lf GB/s = %.3lf GiB/s", t1*1000, speed/1e9, speed/(1<<30)); 134 | printf("\n"); 135 | 136 | return 0; 137 | } 138 | -------------------------------------------------------------------------------- /benchmark/runme.cmd: -------------------------------------------------------------------------------- 1 | for %%e in (*.exe) do @start /b /wait /realtime %%e 1 2 | -------------------------------------------------------------------------------- /benchmark/timer.h: -------------------------------------------------------------------------------- 1 | /* Allows to measure the time required to execute XXX() in the following way: 2 | 3 | Timer t; 4 | t.Start(); 5 | XXX(); 6 | t.Stop(); 7 | double seconds = t.Elapsed(); 8 | */ 9 | 10 | #pragma once 11 | 12 | 13 | #if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) 14 | 15 | #include 16 | 17 | struct Timer 18 | { 19 | Timer() 20 | { 21 | // Initialize the resolution of the timer 22 | if (!QueryPerformanceFrequency(&m_freq)) 23 | { 24 | printf("QueryPerformanceFrequency failed!\n"); 25 | } 26 | 27 | // Calculate the overhead of the timer in ticks 28 | QueryPerformanceCounter(&m_start); 29 | QueryPerformanceCounter(&m_stop); 30 | m_overhead = m_stop.QuadPart - m_start.QuadPart; 31 | } 32 | 33 | void Start() 34 | { 35 | QueryPerformanceCounter(&m_start); 36 | } 37 | 38 | void Stop() 39 | { 40 | QueryPerformanceCounter(&m_stop); 41 | } 42 | 43 | // Returns elapsed time in seconds 44 | double Elapsed() 45 | { 46 | return (m_stop.QuadPart - m_start.QuadPart - m_overhead) / double(m_freq.QuadPart); 47 | } 48 | 49 | private: 50 | 51 | LARGE_INTEGER m_start; 52 | LARGE_INTEGER m_stop; 53 | LARGE_INTEGER m_freq; 54 | LONGLONG m_overhead; 55 | }; 56 | 57 | 58 | #else // this should handle any Unixes 59 | 60 | #include 61 | 62 | struct Timer 63 | { 64 | Timer() 65 | { 66 | // Calculate the timer overhead 67 | overhead = 0; 68 | Start(); 69 | Stop(); 70 | overhead = Elapsed(); 71 | } 72 | 73 | void Start() 74 | { 75 | gettimeofday (&timerStart, NULL); 76 | } 77 | 78 | void Stop() 79 | { 80 | gettimeofday (&timerStop, NULL); 81 | } 82 | 83 | // Returns elapsed time in seconds 84 | double Elapsed() 85 | { 86 | struct timeval timerElapsed; 87 | timersub (&timerStop, &timerStart, &timerElapsed); 88 | return (timerElapsed.tv_sec + timerElapsed.tv_usec/1e6 - overhead); 89 | } 90 | 91 | private: 92 | 93 | struct timeval timerStart, timerStop; 94 | double overhead; 95 | }; 96 | 97 | #endif 98 | -------------------------------------------------------------------------------- /farsh.c: -------------------------------------------------------------------------------- 1 | #include "farsh.h" 2 | 3 | #include /* for size_t */ 4 | #include /* for uint32_t & uint64_t */ 5 | #include /* for abort() */ 6 | #include /* for memcpy() */ 7 | 8 | #if __GNUC__ 9 | #include 10 | #define ALIGN(n) __attribute__ ((aligned(n))) 11 | #elif _MSC_VER 12 | #include 13 | #define ALIGN(n) __declspec(align(n)) 14 | #else 15 | #define ALIGN(n) 16 | #endif 17 | 18 | #define STRIPE FARSH_BASE_KEY_SIZE 19 | #define STRIPE_ELEMENTS (STRIPE/sizeof(uint32_t)) /* should be power of 2 due to use of 'x % STRIPE_ELEMENTS' below */ 20 | #define EXTRA_ELEMENTS (((FARSH_MAX_HASHES-1) * FARSH_EXTRA_KEY_SIZE) / sizeof(uint32_t)) 21 | 22 | ALIGN(64) static const uint32_t FARSH_KEYS [STRIPE_ELEMENTS + EXTRA_ELEMENTS] = { /* STRIPE bytes of key material plus extra keys for hashes up to 1024 bits long */ 23 | 0xb8fe6c39,0x23a44bbe,0x7c01812c,0xf721ad1c,0xded46de9,0x839097db,0x7240a4a4,0xb7b3671f,0xcb79e64e,0xccc0e578,0x825ad07d,0xccff7221,0xb8084674,0xf743248e,0xe03590e6,0x813a264c,0x3c2852bb,0x91c300cb,0x88d0658b,0x1b532ea3,0x71644897,0xa20df94e,0x3819ef46,0xa9deacd8,0xa8fa763f,0xe39c343f,0xf9dcbbc7,0xc70b4f1d,0x8a51e04b,0xcdb45931,0xc89f7ec9,0xd9787364,0x4f6a0752,0xa79b079c,0x8fc49499,0x8ec9b7a9,0x33c92249,0x4eb6404f,0xfb2afb4e,0xa4814255,0x2f0e1b98,0xace93b24,0x188850cd,0x6c5c74a7,0x66fa4404,0xeac5ac83,0x34d3ebc3,0xc581a0ff,0xfa1363eb,0x170ddd51,0xb7f0da49,0xd3165526,0x29d4689e,0x2b16be58,0x7d47a1fc,0x8ff8b8d1,0x7ad031ce,0x45cb3a8f,0x95160428,0xafd7fbca,0xbb4b407e,0x995274a4,0xeb9a2d93,0x3be78908,0xed475f6c,0x919cd8f2,0xd3861e5a,0x6e31390c,0xfe6a3a49,0xdcad0914,0x06508beb,0xa88399f3,0xb058112f,0xe8b0fa79,0x29b4da06,0xedc253fb,0xc3e96dad,0x6e372b83,0x4f78b153,0xfffa6e86,0x21beeeec,0x01caea02,0x1267e50d,0x11e6092f,0xe819d298,0x832f80dd,0x0c4e2477,0xbc7886eb,0x01506637,0x8ba89668,0x6d11e7a0,0xfc12fd15,0x86a54c19,0x593ce3dd,0xd2b13fe5,0x8e772b53,0xae4a60cc,0x647a3b1b,0x547786e0,0x3ec4378e,0x8d7acf89,0xca36f947,0x0e89d5ef,0xaada6a3c,0x6da4a109,0x9ac6e11c,0x686691ef,0xa357bd2b,0xd16f1b9a,0x38c70303,0x7d4622b3,0x2968fa8f,0x8ca5bcb9,0xfcd61005,0x228b5e96,0x2c9dcc19,0x57cf243c,0x3c53f9c1,0x0cc7952c,0x686de4f0,0x93a747b5,0x4e87a510,0x975e91ae,0x4c10b98e,0x8a7f068c,0x346b19ab,0x353ca625,0xf20a50e0,0xce9921f6,0xdf66e014,0x0a11ef4b,0x8bc84ddf,0x84d25d22,0xc823936d,0x94741ec3,0x88278a60,0xb8649331,0x7a707a10,0x7292cad6,0xa7c644c2,0xbd156bfa,0x646c9578,0xb7f4dfd5,0x9f8277a7,0x7013924e,0xad674cc3,0x2cae9d05,0x912a9a22,0xf67c53fa,0x8d7e22a9,0x59ae372b,0x850199f3,0x63a2102c,0xd6ff1261,0x56738ee1,0xaa95145b,0xfdd12832,0x5b684deb,0x0784de94,0xaa62390e,0xbb7ccf19,0x0fefd572,0x565b41ca,0x2206d202,0x2d608479,0x4c0fcd3d,0xd36d3be3,0x155a9a65,0x10f9e732,0xac9b0f1e,0x1f72a03b,0xea9440ae,0x5b674b4f,0x31a827d1,0xecca954f,0x3d2cd61e,0x768d3da4,0x93745ac1,0x1d5d58cb,0x4b86f3b6,0x2aba923a,0x0e65814c,0x8ae063d9,0xcd6969b0,0x36641585,0x742af59d,0x613a1316,0x338ea471,0x47861af3,0x30479dc3,0x1270a481,0x08771069,0xe3c4f0d2,0x0229874c,0x5a8a3bc1,0xe30d9733,0xd05be5a2,0xe2af31ba,0x222049f9,0x9f923b6a,0x033f64ec,0xe528b62b,0x8201efbd,0x2107d877,0xd8312ef1,0xa5679f99,0x1730b51b,0x752616d2,0x05305909,0x0dca440b,0x2093cdd9,0x6409ab50,0xba5c8ecc,0x8d4708ea,0x429f0917,0xb762fab0,0x5161ea75,0x45eba0eb,0xb6f34b41,0x52047123,0xe4181523,0x8d74e90a,0x54fa401c,0xddda0cc7,0x63df182a,0xc6403ef6,0x348ec6e8,0xb9ff57f5,0xf652b8bd,0x0f86b0f3,0xfb3a088a,0x4dc71533,0x7b3617d2,0xa34e87eb,0xba2a9bdd,0xe3381306,0x14bad6bb,0xc96dc7c2,0x333b54b6,0x9be47cfa,0x1dcf9299,0xe7ea5f99,0xb38feacd,0xc3cfe2f7,0x5b87e822,0x39c5ab56,0x18f4a18f,0x2d484d9c,0x4163d519,0x79769e98,0xf58a67f0,0x40590c02,0x319671c0,0x266b133a,0xaf81b287,0x6a31f737, 24 | 0xe3bc0197,0x55079913,0x9f72c696,0x363e00c8,0x53153947,0xebfd127f,0x00f60519,0x46a6b62a,0x93b83380,0x3fe29324,0xdfc67091,0x0f62386d,0xdc375e79,0x8fea3f3e,0xdf8463d0,0x3702fa7b,0x3954435e,0x87caa648,0xa9158bee,0x08f30c25,0x66b82936,0xe7fc3feb,0x183c5450,0xd7ef4345,0x798c7963,0xc02cf557,0x098553d1,0xfa4312aa,0xe29ef883,0x7caf128d,0x74b3a07d,0xc8efdf5b,0x8db23782,0x2c409f4a,0xdae469da,0x4d3e1b3f,0x2e7b9a58,0xc83e3753,0xcefd96a6,0x44ddb068,0x5faed141,0xdee7d0f1,0xc223dbb4,0x7bfbe104,0x114d6e1d,0x52039cd5,0x307c0a9c,0xa6289c12,0x20ee8b3e,0x03724b0b,0xba68ae4a,0x93c5f2a1,0x9af27bb2,0x480f0eba,0xc14c6bbe,0xe7331f87,0xf0104df4,0x22c05363,0xb7e6d08a,0x6f15c449,0x4b9ee2cd,0x6b2c78ae,0x25ed2673,0xb6256596,0x99ad4803,0x654f8f10,0xe89eca64,0xd9a506df,0x530dc5fa,0xfe75be5c,0xa543833d,0xf739fd45,0x1605b488,0xe50f614a,0xe930df83,0x4540195d,0xf2da0f32,0x6b04f79c,0xe3c73c99,0xb3a5265c,0x5a1be07d,0xbda13d2a,0xeddc281c,0xe9d9a39a,0xde9beff1,0x573c1747,0x40be5b3e,0x3756e968,0x968077b6,0x6525a28f,0x747d0735,0x8a0ec11d,0x49c03af5,0xf3def45b,0xc3c9214d,0x9ea2e76d,0xfad3a715,0xcaa7ad89,0xde828e4c,0xa5769bd5,0x467cdb5a,0xd5f2cacb,0x68ebd182,0x8d40341a,0x21556887,0x000a5f6f,0x5ad8a473,0xafe7e886,0x98997d39,0x945ad218,0x46be0c93,0x93a5bd3a,0x3ffa4a8c,0xd834d936,0x2f022a2a,0x20791c6b,0x5db51516,0x8defeed2,0x9dee28a5,0x5188eba7,0xab4f8c67,0x48ceac96,0x2a11e16f,0xc1593b6d 25 | }; 26 | 27 | /* Internal: hash exactly STRIPE bytes */ 28 | static uint64_t farsh_full_block (const uint32_t *data, const uint32_t *key) 29 | { 30 | #ifdef FARSH_AVX2 31 | __m256i sum = _mm256_setzero_si256(); __m128i sum128; int i; 32 | const __m256i *xdata = (const __m256i *) data; 33 | const __m256i *xkey = (const __m256i *) key; 34 | 35 | for (i=0; i < STRIPE/sizeof(__m256i); i++) 36 | { 37 | __m256i d = _mm256_loadu_si256 (xdata+i); 38 | __m256i k = _mm256_loadu_si256 (xkey+i); 39 | __m256i dk = _mm256_add_epi32(d,k); // uint32 dk[8] = {d0+k0, d1+k1 .. d7+k7} 40 | __m256i res = _mm256_mul_epu32 (dk, _mm256_shuffle_epi32 (dk,0x31)); // uint64 res[4] = {dk0*dk1, dk2*dk3, dk4*dk5, dk6*dk7} 41 | sum = _mm256_add_epi64(sum,res); 42 | } 43 | sum = _mm256_add_epi64 (sum, _mm256_shuffle_epi32(sum,3*4+2)); // return sum of four 64-bit values in the sum 44 | sum128 = _mm_add_epi64 (_mm256_castsi256_si128(sum), _mm256_extracti128_si256(sum,1)); 45 | return *(uint64_t*) &sum128; 46 | #elif defined(FARSH_SSE2) 47 | __m128i sum = _mm_setzero_si128(); int i; 48 | const __m128i *xdata = (const __m128i *) data; 49 | const __m128i *xkey = (const __m128i *) key; 50 | 51 | for (i=0; i < STRIPE/sizeof(__m128i); i++) 52 | { 53 | #ifdef FARSH_ALIGNED_INPUT 54 | __m128i d = _mm_load_si128 (xdata+i); 55 | #else 56 | __m128i d = _mm_loadu_si128 (xdata+i); 57 | #endif 58 | __m128i k = _mm_load_si128 (xkey+i); 59 | __m128i dk = _mm_add_epi32(d,k); // uint32 dk[4] = {d0+k0, d1+k1, d2+k2, d3+k3} 60 | __m128i res = _mm_mul_epu32 (dk, _mm_shuffle_epi32 (dk,0x31)); // uint64 res[2] = {dk0*dk1,dk2*dk3} 61 | sum = _mm_add_epi64(sum,res); 62 | } 63 | sum = _mm_add_epi64 (sum, _mm_shuffle_epi32(sum,3*4+2)); // return sum of two 64-bit values in the sum 64 | return *(uint64_t*) ∑ 65 | #else 66 | uint64_t sum = 0; int i; 67 | for (i=0; i < STRIPE_ELEMENTS; i+=2) 68 | sum += (data[i] + key[i]) * (uint64_t)(data[i+1] + key[i+1]); 69 | return sum; 70 | #endif 71 | } 72 | 73 | /* Internal: hash less than STRIPE bytes, with careful handling of partial uint32_t pair at the end of buffer */ 74 | static uint64_t farsh_partial_block (const uint32_t *data, size_t bytes, const uint32_t *key) 75 | { 76 | uint64_t sum = 0; int i; 77 | size_t elements = (bytes/sizeof(uint32_t)) & (~1); 78 | 79 | uint32_t extra_data[2] = {0}; 80 | size_t extra_bytes = bytes - elements*sizeof(uint32_t); 81 | memcpy (extra_data, data+elements, extra_bytes); 82 | 83 | for (i=0; i < elements; i+=2) 84 | sum += (data[i] + key[i]) * (uint64_t)(data[i+1] + key[i+1]); 85 | if (extra_bytes) 86 | sum += (extra_data[0] + key[i]) * (uint64_t)(extra_data[1] + key[i+1]); 87 | return sum; 88 | } 89 | 90 | /* ////////////////////////////////////////////////////////////////////////// */ 91 | /* Hash mixing code, including all constants, was kidnapped from the xxHash64 */ 92 | 93 | /* Internal: combine hash of the current block with overall hashsum */ 94 | static uint64_t farsh_combine (uint64_t sum, uint64_t h) 95 | { 96 | uint64_t PRIME64_1 = 11400714785074694791ULL; 97 | uint64_t PRIME64_2 = 14029467366897019727ULL; 98 | uint64_t PRIME64_4 = 9650029242287828579ULL; 99 | h *= PRIME64_2; 100 | h += h >> 31; 101 | h *= PRIME64_1; 102 | sum ^= h; 103 | sum = (sum+(sum>>27)) * PRIME64_1 + PRIME64_4; 104 | return sum; 105 | } 106 | 107 | /* Internal: compute the final hashsum value */ 108 | static uint32_t farsh_final (uint64_t sum) 109 | { 110 | uint64_t PRIME64_2 = 14029467366897019727ULL; 111 | uint64_t PRIME64_3 = 1609587929392839161ULL; 112 | sum ^= sum >> 33; 113 | sum *= PRIME64_2; 114 | sum ^= sum >> 29; 115 | sum *= PRIME64_3; 116 | return (uint32_t)sum ^ (uint32_t)(sum >> 32); 117 | } 118 | /* End of hash mixing code kidnapped from the xxHash64 */ 119 | /* ////////////////////////////////////////////////////////////////////////// */ 120 | 121 | 122 | /* Public API functions documented in farsh.h */ 123 | 124 | uint32_t farsh_keyed (const void *data, size_t bytes, const void *key, uint64_t seed) 125 | { 126 | uint64_t sum = seed; 127 | const char *ptr = (const char*) data; 128 | const uint32_t *key_ptr = (const uint32_t*) key; 129 | while (bytes >= STRIPE) 130 | { 131 | size_t chunk = STRIPE; 132 | uint64_t h = farsh_full_block ((const uint32_t*)ptr, key_ptr); 133 | sum = farsh_combine (sum, h); 134 | ptr += chunk; bytes -= chunk; 135 | } 136 | if (bytes) 137 | { 138 | size_t chunk = bytes; 139 | uint64_t h = farsh_partial_block ((const uint32_t*)ptr, chunk, key_ptr); 140 | sum = farsh_combine (sum, h); 141 | ptr += chunk; bytes -= chunk; 142 | } 143 | return farsh_final(sum) ^ key_ptr[bytes%STRIPE_ELEMENTS]; /* ensure that zeroes at the end of data will affect the hash value */ 144 | } 145 | 146 | void farsh_keyed_n (const void *data, size_t bytes, const void *key, int n, uint64_t seed, void *hash) 147 | { 148 | int i; uint32_t *hash_ptr = (uint32_t*)hash; 149 | for (i=0; i < n; i++) 150 | hash_ptr[i] = farsh_keyed (data, bytes, (const char*)key + i*FARSH_EXTRA_KEY_SIZE, seed); 151 | } 152 | 153 | void farsh_n (const void *data, size_t bytes, int k, int n, uint64_t seed, void *hash) 154 | { 155 | if (k+n > FARSH_MAX_HASHES) abort(); /* FARSH_KEYS contains only material for the hashes 0..FARSH_MAX_HASHES-1 */ 156 | farsh_keyed_n (data, bytes, (const char*)FARSH_KEYS + k*FARSH_EXTRA_KEY_SIZE, n, seed, hash); 157 | } 158 | 159 | uint32_t farsh (const void *data, size_t bytes, uint64_t seed) 160 | { 161 | return farsh_keyed (data, bytes, FARSH_KEYS, seed); 162 | } 163 | 164 | #undef EXTRA_ELEMENTS 165 | #undef STRIPE 166 | #undef STRIPE_ELEMENTS 167 | #undef ALIGN 168 | -------------------------------------------------------------------------------- /farsh.h: -------------------------------------------------------------------------------- 1 | #include /* for size_t */ 2 | #include /* for uint32_t & uint64_t */ 3 | 4 | /* Return 32-bit hash of the buffer */ 5 | uint32_t farsh (const void *data, size_t bytes, uint64_t seed); 6 | 7 | /* Compute `n` 32-bit hashes starting with the hash number `k`, storing results to the `hash` buffer. 8 | It's `n` times slower than computation of single 32-bit hash. 9 | Hash computed by the `farsh` function has number 0. The function aborts if `k+n > 32`. */ 10 | void farsh_n (const void *data, size_t bytes, int k, int n, uint64_t seed, void *hash); 11 | 12 | /* Compute 32-bit hash using `key`, that should be 1024-byte long and aligned to 16-byte boundary. */ 13 | uint32_t farsh_keyed (const void *data, size_t bytes, const void *key, uint64_t seed); 14 | 15 | /* Compute `n` 32-bit hashes using `key`, storing results to the `hash` buffer. 16 | `key` should be `1024+16*(n-1)` bytes long and aligned to 16-byte boundary. */ 17 | void farsh_keyed_n (const void *data, size_t bytes, const void *key, int n, uint64_t seed, void *hash); 18 | 19 | /* Hash functions accept 64-bit `seed` that can be used to "personalize" the hash value. Use seed==0 if you don't need that feature. 20 | Seeding may have lower quality than in xxHash&co since the seed value mixed with block hashes rather than raw data. */ 21 | 22 | /* Symbolic names for the above-mentioned constants */ 23 | #define FARSH_MAX_HASHES 32 /* number of 32-bit hashes supported by the built-in key */ 24 | #define FARSH_BASE_KEY_SIZE 1024 /* size of user-supplied key required to compute 32-bit hash with index 0 */ 25 | #define FARSH_EXTRA_KEY_SIZE 16 /* extra bytes required to compute 32-bit hash with every next index */ 26 | #define FARSH_BASE_KEY_ALIGNMENT 16 /* user-supplied key should be aligned to this size, otherwise SSE2 code may fail. For maximum speed, it's recommended to align key to 64 bytes. */ 27 | --------------------------------------------------------------------------------