├── Handle.cpp ├── Handle.h ├── README.txt ├── ReviewOfInterfaces.txt ├── SpookyV2.cpp ├── SpookyV2.h ├── X.h ├── city.cc ├── city.h ├── city_hash.h ├── config.h ├── dates.cpp ├── debug_hash.h ├── endian.h ├── example.cpp ├── fnv1a.h ├── hash_adaptors.h ├── hash_append.h ├── hash_functors.cpp ├── hash_functors.h ├── hash_test.cpp ├── hash_test.h ├── jenkins1.h ├── m16.cpp ├── murmur2A.h ├── n3876.h ├── pairii.cpp ├── requirements.txt ├── sha2.c ├── sha2.h ├── sha256.h ├── sherlock.cpp ├── sherlock.txt ├── siphash.cpp ├── siphash.h ├── sizes.cpp ├── spooky.h ├── test.cpp ├── test2.cpp ├── test3.cpp ├── test4.cpp ├── xx_hash.h ├── xxhash.c └── xxhash.h /Handle.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------- Handle.h ------------------------------------- 2 | // 3 | // This software is in the public domain. The only restriction on its use is 4 | // that no one can remove it from the public domain by claiming ownership of it, 5 | // including the original authors. 6 | // 7 | // There is no warranty of correctness on the software contained herein. Use 8 | // at your own risk. 9 | // 10 | //------------------------------------------------------------------------------ 11 | 12 | #include "hash_append.h" 13 | #include "Handle.h" 14 | #include 15 | 16 | // namespace acme is used to demonstrate example code. It is not proposed. 17 | 18 | namespace acme 19 | { 20 | 21 | class Handle::CheshireCat 22 | { 23 | std::unordered_set data_; 24 | public: 25 | CheshireCat(int data1, int data2) 26 | { 27 | data_.insert(data1); 28 | data_.insert(data2); 29 | } 30 | 31 | friend 32 | void 33 | hash_append(Handle::type_erased_hasher& h, CheshireCat const& c); 34 | }; 35 | 36 | void 37 | hash_append(Handle::type_erased_hasher& h, Handle::CheshireCat const& c) 38 | { 39 | using xstd::hash_append; 40 | hash_append(h, c.data_); 41 | } 42 | 43 | Handle::Handle() 44 | : smile(new CheshireCat(1, 2)) 45 | { 46 | } 47 | 48 | Handle::Handle(int data1, int data2) 49 | : smile(new CheshireCat(data1, data2)) 50 | { 51 | } 52 | 53 | Handle::~Handle() 54 | { 55 | delete smile; 56 | } 57 | 58 | } // acme 59 | -------------------------------------------------------------------------------- /Handle.h: -------------------------------------------------------------------------------- 1 | //------------------------------- Handle.h ------------------------------------- 2 | // 3 | // This software is in the public domain. The only restriction on its use is 4 | // that no one can remove it from the public domain by claiming ownership of it, 5 | // including the original authors. 6 | // 7 | // There is no warranty of correctness on the software contained herein. Use 8 | // at your own risk. 9 | // 10 | //------------------------------------------------------------------------------ 11 | 12 | #ifndef HANDLE_H 13 | #define HANDLE_H 14 | 15 | #include "hash_adaptors.h" 16 | 17 | // namespace acme is used to demonstrate example code. It is not proposed. 18 | 19 | namespace acme 20 | { 21 | 22 | class Handle 23 | { 24 | struct CheshireCat; // Not defined here 25 | CheshireCat* smile; // Handle 26 | 27 | public: 28 | Handle(); // Default Constructor 29 | ~Handle(); // Destructor 30 | Handle(Handle const&) = delete; 31 | Handle& operator=(Handle const&) = delete; 32 | Handle(int data1, int data2); 33 | // Other operations... 34 | 35 | // Hash support 36 | using type_erased_hasher = acme::type_erased_hasher; 37 | 38 | friend 39 | void 40 | hash_append(type_erased_hasher&, CheshireCat const&); 41 | 42 | template 43 | friend 44 | void 45 | hash_append(Hasher& h, Handle const& x) 46 | { 47 | using xstd::hash_append; 48 | if (x.smile == nullptr) 49 | hash_append(h, nullptr); 50 | else 51 | { 52 | type_erased_hasher temp(std::move(h)); 53 | hash_append(temp, *x.smile); 54 | h = std::move(*temp.target()); 55 | } 56 | } 57 | }; 58 | 59 | } // acme 60 | 61 | #endif // HANDLE_H 62 | -------------------------------------------------------------------------------- /README.txt: -------------------------------------------------------------------------------- 1 | This is a collection of software used to provide an example 2 | implementation of the proposal currently at: 3 | 4 | http://htmlpreview.github.io/?https://github.com/HowardHinnant/papers/blob/master/hashing.html 5 | 6 | And to demonstrate example software surrounding / using this proposal. 7 | 8 | Though there are a lot of files here, only one file contains an implementation 9 | of proposed software: 10 | 11 | hash_append.h 12 | 13 | Everything in namespace xstd, except for those things in xstd::detail, is 14 | proposed. Nothing else is. The only file containing anything in namespace 15 | xstd is hash_append.h. 16 | 17 | Currently hash_append.h is missing hash_append overloads for many std::containers 18 | and other objects. They should be added. 19 | -------------------------------------------------------------------------------- /ReviewOfInterfaces.txt: -------------------------------------------------------------------------------- 1 | Review of interfaces: 2 | 3 | Z. Programmers who want to hash things so they can put them into unordered 4 | containers: 5 | 6 | std::unordered_set> set; 7 | 8 | Y. Programmers who want to seed their hash algorithms to harden them against 9 | attacks. 10 | 11 | std::unordered_set> set; 12 | std::unordered_set> set{nb, randomly_seeded_hash{seed}}; 13 | 14 | A. Authors of hashable types X - The author of hash_append(h, const X&). 15 | 16 | For each type X a programmer authors, and wants to hash, either: 17 | 18 | 1. 19 | 20 | namespace std { 21 | template <> struct is_uniquely_represented 22 | : public true_type {}; // or whatever conditional 23 | } 24 | 25 | and/or 2. 26 | 27 | template 28 | void 29 | hash_append(HashAlgorithm& h, const X& x) 30 | { 31 | using std::hash_append; 32 | hash_append(h, x.m1, x.m2, x.m3); 33 | } 34 | 35 | B. Authors of hash functors such as randomly_seeded_hash. 36 | 37 | // A hasher that can be seeded, or defaults to a random seed 38 | template 39 | class randomly_seeded_hash 40 | { 41 | private: 42 | static std::mutex mut_s; 43 | static std::mt19937_64 rand_s; 44 | 45 | std::size_t seed0_; 46 | std::size_t seed1_; 47 | public: 48 | using result_type = typename Hasher::result_type; 49 | 50 | randomly_seeded_hash() 51 | { 52 | std::lock_guard _(mut_s); 53 | seed0_ = rand_s(); 54 | seed1_ = rand_s(); 55 | } 56 | 57 | explicit randomly_seeded_hash(std::size_t seed0, std::size_t seed1 = 0) 58 | : seed0_(seed0) 59 | , seed1_(seed1) 60 | {} 61 | 62 | template 63 | result_type 64 | operator()(T const& t) const noexcept 65 | { 66 | Hasher h(seed0_, seed1_); 67 | using std::hash_append; 68 | hash_append(h, t); 69 | return static_cast(h); 70 | } 71 | }; 72 | 73 | C. Authors of hash algorithm adaptors. 74 | 75 | class MyHashAlgorithm 76 | { 77 | public: 78 | static constexpr std::endian endian = std::endian::native; 79 | 80 | using result_type = std::size_t; 81 | 82 | MyHashAlgorithm() = default; 83 | explicit MyHashAlgorithm(std::uint64_t k0, std::uint64_t k1 = 0) noexcept; 84 | 85 | void 86 | operator()(void const* key, std::size_t len) noexcept; 87 | 88 | explicit 89 | operator std::size_t() noexcept; 90 | }; 91 | 92 | D. The committee. They will reject an overly complicated solution (and have done so in the past). 93 | 94 | Everything the vendors have to implement, plus concepts for HashAlgorithm, 95 | is_uniquely_represented, Hasher (i.e. for uhash replacements). 96 | 97 | E. The vendors. They will reject an overly complicated solution (and have done so in the past). 98 | 99 | namespace std 100 | { 101 | 102 | enum class endian 103 | { 104 | native = , 105 | little = , 106 | big = 107 | }; 108 | 109 | template struct is_uniquely_represented; 110 | Specializations for all hashable scalars, pair, tuple, array, c-arrays 111 | and cv-qualified types. 112 | 113 | template 114 | struct is_contiguously_hashable 115 | : public integral_constant{} && 116 | (sizeof(T) == 1 || 117 | HashAlgorithm::endian == endian::native)> 118 | {}; 119 | 120 | template 121 | struct is_contiguously_hashable 122 | : public std::integral_constant{} && 123 | (sizeof(T) == 1 || 124 | HashAlgorithm::endian == endian::native)> 125 | {}; 126 | 127 | template 128 | inline 129 | std::enable_if_t 130 | < 131 | is_contiguously_hashable{} 132 | > 133 | hash_append(Hasher& h, T const& t) noexcept 134 | { 135 | h(std::addressof(t), sizeof(t)); 136 | } 137 | 138 | template > 139 | void 140 | hash_append(HashAlgorithm& h, const T& t); 141 | for all hashable scalar types for which 142 | is_contiguously_hashable{} is false. 143 | Also for nullptr_t, c-arrays, type_info, error_code, pair, tuple, 144 | bitset, unique_ptr, shared_ptr, duration, time_point, type_index, 145 | basic_string, array, deque, forward_list, vector, vector, map, 146 | multimap, set, multiset, complex, valarray, thread::id, and a variadic 147 | version. 148 | 149 | template > 150 | struct uhash 151 | { 152 | using result_type = typename HashAlgorithm::result_type; 153 | 154 | template 155 | result_type 156 | operator()(T const& t) const noexcept 157 | { 158 | HashAlgorithm h; 159 | hash_append(h, t); 160 | return static_cast(h); 161 | } 162 | }; 163 | 164 | The unspecified default HashAlgorithm. 165 | 166 | } // std 167 | -------------------------------------------------------------------------------- /SpookyV2.cpp: -------------------------------------------------------------------------------- 1 | // Spooky Hash 2 | // A 128-bit noncryptographic hash, for checksums and table lookup 3 | // By Bob Jenkins. Public domain. 4 | // Oct 31 2010: published framework, disclaimer ShortHash isn't right 5 | // Nov 7 2010: disabled ShortHash 6 | // Oct 31 2011: replace End, ShortMix, ShortEnd, enable ShortHash again 7 | // April 10 2012: buffer overflow on platforms without unaligned reads 8 | // July 12 2012: was passing out variables in final to in/out in short 9 | // July 30 2012: I reintroduced the buffer overflow 10 | // August 5 2012: SpookyV2: d = should be d += in short hash, and remove extra mix from long hash 11 | 12 | #include 13 | #include "SpookyV2.h" 14 | 15 | #define ALLOW_UNALIGNED_READS 1 16 | 17 | // 18 | // short hash ... it could be used on any message, 19 | // but it's used by Spooky just for short messages. 20 | // 21 | void SpookyHash::Short( 22 | const void *message, 23 | size_t length, 24 | uint64 *hash1, 25 | uint64 *hash2) 26 | { 27 | uint64 buf[2*sc_numVars]; 28 | union 29 | { 30 | const uint8 *p8; 31 | uint32 *p32; 32 | uint64 *p64; 33 | size_t i; 34 | } u; 35 | 36 | u.p8 = (const uint8 *)message; 37 | 38 | if (!ALLOW_UNALIGNED_READS && (u.i & 0x7)) 39 | { 40 | memcpy(buf, message, length); 41 | u.p64 = buf; 42 | } 43 | 44 | size_t remainder = length%32; 45 | uint64 a=*hash1; 46 | uint64 b=*hash2; 47 | uint64 c=sc_const; 48 | uint64 d=sc_const; 49 | 50 | if (length > 15) 51 | { 52 | const uint64 *end = u.p64 + (length/32)*4; 53 | 54 | // handle all complete sets of 32 bytes 55 | for (; u.p64 < end; u.p64 += 4) 56 | { 57 | c += u.p64[0]; 58 | d += u.p64[1]; 59 | ShortMix(a,b,c,d); 60 | a += u.p64[2]; 61 | b += u.p64[3]; 62 | } 63 | 64 | //Handle the case of 16+ remaining bytes. 65 | if (remainder >= 16) 66 | { 67 | c += u.p64[0]; 68 | d += u.p64[1]; 69 | ShortMix(a,b,c,d); 70 | u.p64 += 2; 71 | remainder -= 16; 72 | } 73 | } 74 | 75 | // Handle the last 0..15 bytes, and its length 76 | d += ((uint64)length) << 56; 77 | switch (remainder) 78 | { 79 | case 15: 80 | d += ((uint64)u.p8[14]) << 48; 81 | case 14: 82 | d += ((uint64)u.p8[13]) << 40; 83 | case 13: 84 | d += ((uint64)u.p8[12]) << 32; 85 | case 12: 86 | d += u.p32[2]; 87 | c += u.p64[0]; 88 | break; 89 | case 11: 90 | d += ((uint64)u.p8[10]) << 16; 91 | case 10: 92 | d += ((uint64)u.p8[9]) << 8; 93 | case 9: 94 | d += (uint64)u.p8[8]; 95 | case 8: 96 | c += u.p64[0]; 97 | break; 98 | case 7: 99 | c += ((uint64)u.p8[6]) << 48; 100 | case 6: 101 | c += ((uint64)u.p8[5]) << 40; 102 | case 5: 103 | c += ((uint64)u.p8[4]) << 32; 104 | case 4: 105 | c += u.p32[0]; 106 | break; 107 | case 3: 108 | c += ((uint64)u.p8[2]) << 16; 109 | case 2: 110 | c += ((uint64)u.p8[1]) << 8; 111 | case 1: 112 | c += (uint64)u.p8[0]; 113 | break; 114 | case 0: 115 | c += sc_const; 116 | d += sc_const; 117 | } 118 | ShortEnd(a,b,c,d); 119 | *hash1 = a; 120 | *hash2 = b; 121 | } 122 | 123 | 124 | 125 | 126 | // do the whole hash in one call 127 | void SpookyHash::Hash128( 128 | const void *message, 129 | size_t length, 130 | uint64 *hash1, 131 | uint64 *hash2) 132 | { 133 | if (length < sc_bufSize) 134 | { 135 | Short(message, length, hash1, hash2); 136 | return; 137 | } 138 | 139 | uint64 h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11; 140 | uint64 buf[sc_numVars]; 141 | uint64 *end; 142 | union 143 | { 144 | const uint8 *p8; 145 | uint64 *p64; 146 | size_t i; 147 | } u; 148 | size_t remainder; 149 | 150 | h0=h3=h6=h9 = *hash1; 151 | h1=h4=h7=h10 = *hash2; 152 | h2=h5=h8=h11 = sc_const; 153 | 154 | u.p8 = (const uint8 *)message; 155 | end = u.p64 + (length/sc_blockSize)*sc_numVars; 156 | 157 | // handle all whole sc_blockSize blocks of bytes 158 | if (ALLOW_UNALIGNED_READS || ((u.i & 0x7) == 0)) 159 | { 160 | while (u.p64 < end) 161 | { 162 | Mix(u.p64, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); 163 | u.p64 += sc_numVars; 164 | } 165 | } 166 | else 167 | { 168 | while (u.p64 < end) 169 | { 170 | memcpy(buf, u.p64, sc_blockSize); 171 | Mix(buf, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); 172 | u.p64 += sc_numVars; 173 | } 174 | } 175 | 176 | // handle the last partial block of sc_blockSize bytes 177 | remainder = (length - ((const uint8 *)end-(const uint8 *)message)); 178 | memcpy(buf, end, remainder); 179 | memset(((uint8 *)buf)+remainder, 0, sc_blockSize-remainder); 180 | ((uint8 *)buf)[sc_blockSize-1] = remainder; 181 | 182 | // do some final mixing 183 | End(buf, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); 184 | *hash1 = h0; 185 | *hash2 = h1; 186 | } 187 | 188 | 189 | 190 | // init spooky state 191 | void SpookyHash::Init(uint64 seed1, uint64 seed2) 192 | { 193 | m_length = 0; 194 | m_remainder = 0; 195 | m_state[0] = seed1; 196 | m_state[1] = seed2; 197 | } 198 | 199 | 200 | // add a message fragment to the state 201 | void SpookyHash::Update(const void *message, size_t length) 202 | { 203 | uint64 h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11; 204 | size_t newLength = length + m_remainder; 205 | uint8 remainder; 206 | union 207 | { 208 | const uint8 *p8; 209 | uint64 *p64; 210 | size_t i; 211 | } u; 212 | const uint64 *end; 213 | 214 | // Is this message fragment too short? If it is, stuff it away. 215 | if (newLength < sc_bufSize) 216 | { 217 | memcpy(&((uint8 *)m_data)[m_remainder], message, length); 218 | m_length = length + m_length; 219 | m_remainder = (uint8)newLength; 220 | return; 221 | } 222 | 223 | // init the variables 224 | if (m_length < sc_bufSize) 225 | { 226 | h0=h3=h6=h9 = m_state[0]; 227 | h1=h4=h7=h10 = m_state[1]; 228 | h2=h5=h8=h11 = sc_const; 229 | } 230 | else 231 | { 232 | h0 = m_state[0]; 233 | h1 = m_state[1]; 234 | h2 = m_state[2]; 235 | h3 = m_state[3]; 236 | h4 = m_state[4]; 237 | h5 = m_state[5]; 238 | h6 = m_state[6]; 239 | h7 = m_state[7]; 240 | h8 = m_state[8]; 241 | h9 = m_state[9]; 242 | h10 = m_state[10]; 243 | h11 = m_state[11]; 244 | } 245 | m_length = length + m_length; 246 | 247 | // if we've got anything stuffed away, use it now 248 | if (m_remainder) 249 | { 250 | uint8 prefix = sc_bufSize-m_remainder; 251 | memcpy(&(((uint8 *)m_data)[m_remainder]), message, prefix); 252 | u.p64 = m_data; 253 | Mix(u.p64, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); 254 | Mix(&u.p64[sc_numVars], h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); 255 | u.p8 = ((const uint8 *)message) + prefix; 256 | length -= prefix; 257 | } 258 | else 259 | { 260 | u.p8 = (const uint8 *)message; 261 | } 262 | 263 | // handle all whole blocks of sc_blockSize bytes 264 | end = u.p64 + (length/sc_blockSize)*sc_numVars; 265 | remainder = (uint8)(length-((const uint8 *)end-u.p8)); 266 | if (ALLOW_UNALIGNED_READS || (u.i & 0x7) == 0) 267 | { 268 | while (u.p64 < end) 269 | { 270 | Mix(u.p64, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); 271 | u.p64 += sc_numVars; 272 | } 273 | } 274 | else 275 | { 276 | while (u.p64 < end) 277 | { 278 | memcpy(m_data, u.p8, sc_blockSize); 279 | Mix(m_data, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); 280 | u.p64 += sc_numVars; 281 | } 282 | } 283 | 284 | // stuff away the last few bytes 285 | m_remainder = remainder; 286 | memcpy(m_data, end, remainder); 287 | 288 | // stuff away the variables 289 | m_state[0] = h0; 290 | m_state[1] = h1; 291 | m_state[2] = h2; 292 | m_state[3] = h3; 293 | m_state[4] = h4; 294 | m_state[5] = h5; 295 | m_state[6] = h6; 296 | m_state[7] = h7; 297 | m_state[8] = h8; 298 | m_state[9] = h9; 299 | m_state[10] = h10; 300 | m_state[11] = h11; 301 | } 302 | 303 | 304 | // report the hash for the concatenation of all message fragments so far 305 | void SpookyHash::Final(uint64 *hash1, uint64 *hash2) 306 | { 307 | // init the variables 308 | if (m_length < sc_bufSize) 309 | { 310 | *hash1 = m_state[0]; 311 | *hash2 = m_state[1]; 312 | Short( m_data, m_length, hash1, hash2); 313 | return; 314 | } 315 | 316 | const uint64 *data = (const uint64 *)m_data; 317 | uint8 remainder = m_remainder; 318 | 319 | uint64 h0 = m_state[0]; 320 | uint64 h1 = m_state[1]; 321 | uint64 h2 = m_state[2]; 322 | uint64 h3 = m_state[3]; 323 | uint64 h4 = m_state[4]; 324 | uint64 h5 = m_state[5]; 325 | uint64 h6 = m_state[6]; 326 | uint64 h7 = m_state[7]; 327 | uint64 h8 = m_state[8]; 328 | uint64 h9 = m_state[9]; 329 | uint64 h10 = m_state[10]; 330 | uint64 h11 = m_state[11]; 331 | 332 | if (remainder >= sc_blockSize) 333 | { 334 | // m_data can contain two blocks; handle any whole first block 335 | Mix(data, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); 336 | data += sc_numVars; 337 | remainder -= sc_blockSize; 338 | } 339 | 340 | // mix in the last partial block, and the length mod sc_blockSize 341 | memset(&((uint8 *)data)[remainder], 0, (sc_blockSize-remainder)); 342 | 343 | ((uint8 *)data)[sc_blockSize-1] = remainder; 344 | 345 | // do some final mixing 346 | End(data, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); 347 | 348 | *hash1 = h0; 349 | *hash2 = h1; 350 | } 351 | -------------------------------------------------------------------------------- /SpookyV2.h: -------------------------------------------------------------------------------- 1 | // 2 | // SpookyHash: a 128-bit noncryptographic hash function 3 | // By Bob Jenkins, public domain 4 | // Oct 31 2010: alpha, framework + SpookyHash::Mix appears right 5 | // Oct 31 2011: alpha again, Mix only good to 2^^69 but rest appears right 6 | // Dec 31 2011: beta, improved Mix, tested it for 2-bit deltas 7 | // Feb 2 2012: production, same bits as beta 8 | // Feb 5 2012: adjusted definitions of uint* to be more portable 9 | // Mar 30 2012: 3 bytes/cycle, not 4. Alpha was 4 but wasn't thorough enough. 10 | // August 5 2012: SpookyV2 (different results) 11 | // 12 | // Up to 3 bytes/cycle for long messages. Reasonably fast for short messages. 13 | // All 1 or 2 bit deltas achieve avalanche within 1% bias per output bit. 14 | // 15 | // This was developed for and tested on 64-bit x86-compatible processors. 16 | // It assumes the processor is little-endian. There is a macro 17 | // controlling whether unaligned reads are allowed (by default they are). 18 | // This should be an equally good hash on big-endian machines, but it will 19 | // compute different results on them than on little-endian machines. 20 | // 21 | // Google's CityHash has similar specs to SpookyHash, and CityHash is faster 22 | // on new Intel boxes. MD4 and MD5 also have similar specs, but they are orders 23 | // of magnitude slower. CRCs are two or more times slower, but unlike 24 | // SpookyHash, they have nice math for combining the CRCs of pieces to form 25 | // the CRCs of wholes. There are also cryptographic hashes, but those are even 26 | // slower than MD5. 27 | // 28 | 29 | #ifndef SPOOKYV2_H 30 | #define SPOOKYV2_H 31 | 32 | #include 33 | 34 | #ifdef _MSC_VER 35 | # define INLINE __forceinline 36 | typedef unsigned __int64 uint64; 37 | typedef unsigned __int32 uint32; 38 | typedef unsigned __int16 uint16; 39 | typedef unsigned __int8 uint8; 40 | #else 41 | # include 42 | # define INLINE inline 43 | typedef uint64_t uint64; 44 | typedef uint32_t uint32; 45 | typedef uint16_t uint16; 46 | typedef uint8_t uint8; 47 | #endif 48 | 49 | 50 | class SpookyHash 51 | { 52 | public: 53 | // 54 | // SpookyHash: hash a single message in one call, produce 128-bit output 55 | // 56 | static void Hash128( 57 | const void *message, // message to hash 58 | size_t length, // length of message in bytes 59 | uint64 *hash1, // in/out: in seed 1, out hash value 1 60 | uint64 *hash2); // in/out: in seed 2, out hash value 2 61 | 62 | // 63 | // Hash64: hash a single message in one call, return 64-bit output 64 | // 65 | static uint64 Hash64( 66 | const void *message, // message to hash 67 | size_t length, // length of message in bytes 68 | uint64 seed) // seed 69 | { 70 | uint64 hash1 = seed; 71 | Hash128(message, length, &hash1, &seed); 72 | return hash1; 73 | } 74 | 75 | // 76 | // Hash32: hash a single message in one call, produce 32-bit output 77 | // 78 | static uint32 Hash32( 79 | const void *message, // message to hash 80 | size_t length, // length of message in bytes 81 | uint32 seed) // seed 82 | { 83 | uint64 hash1 = seed, hash2 = seed; 84 | Hash128(message, length, &hash1, &hash2); 85 | return (uint32)hash1; 86 | } 87 | 88 | // 89 | // Init: initialize the context of a SpookyHash 90 | // 91 | void Init( 92 | uint64 seed1, // any 64-bit value will do, including 0 93 | uint64 seed2); // different seeds produce independent hashes 94 | 95 | // 96 | // Update: add a piece of a message to a SpookyHash state 97 | // 98 | void Update( 99 | const void *message, // message fragment 100 | size_t length); // length of message fragment in bytes 101 | 102 | 103 | // 104 | // Final: compute the hash for the current SpookyHash state 105 | // 106 | // This does not modify the state; you can keep updating it afterward 107 | // 108 | // The result is the same as if SpookyHash() had been called with 109 | // all the pieces concatenated into one message. 110 | // 111 | void Final( 112 | uint64 *hash1, // out only: first 64 bits of hash value. 113 | uint64 *hash2); // out only: second 64 bits of hash value. 114 | 115 | // 116 | // left rotate a 64-bit value by k bytes 117 | // 118 | static INLINE uint64 Rot64(uint64 x, int k) 119 | { 120 | return (x << k) | (x >> (64 - k)); 121 | } 122 | 123 | // 124 | // This is used if the input is 96 bytes long or longer. 125 | // 126 | // The internal state is fully overwritten every 96 bytes. 127 | // Every input bit appears to cause at least 128 bits of entropy 128 | // before 96 other bytes are combined, when run forward or backward 129 | // For every input bit, 130 | // Two inputs differing in just that input bit 131 | // Where "differ" means xor or subtraction 132 | // And the base value is random 133 | // When run forward or backwards one Mix 134 | // I tried 3 pairs of each; they all differed by at least 212 bits. 135 | // 136 | static INLINE void Mix( 137 | const uint64 *data, 138 | uint64 &s0, uint64 &s1, uint64 &s2, uint64 &s3, 139 | uint64 &s4, uint64 &s5, uint64 &s6, uint64 &s7, 140 | uint64 &s8, uint64 &s9, uint64 &s10,uint64 &s11) 141 | { 142 | s0 += data[0]; s2 ^= s10; s11 ^= s0; s0 = Rot64(s0,11); s11 += s1; 143 | s1 += data[1]; s3 ^= s11; s0 ^= s1; s1 = Rot64(s1,32); s0 += s2; 144 | s2 += data[2]; s4 ^= s0; s1 ^= s2; s2 = Rot64(s2,43); s1 += s3; 145 | s3 += data[3]; s5 ^= s1; s2 ^= s3; s3 = Rot64(s3,31); s2 += s4; 146 | s4 += data[4]; s6 ^= s2; s3 ^= s4; s4 = Rot64(s4,17); s3 += s5; 147 | s5 += data[5]; s7 ^= s3; s4 ^= s5; s5 = Rot64(s5,28); s4 += s6; 148 | s6 += data[6]; s8 ^= s4; s5 ^= s6; s6 = Rot64(s6,39); s5 += s7; 149 | s7 += data[7]; s9 ^= s5; s6 ^= s7; s7 = Rot64(s7,57); s6 += s8; 150 | s8 += data[8]; s10 ^= s6; s7 ^= s8; s8 = Rot64(s8,55); s7 += s9; 151 | s9 += data[9]; s11 ^= s7; s8 ^= s9; s9 = Rot64(s9,54); s8 += s10; 152 | s10 += data[10]; s0 ^= s8; s9 ^= s10; s10 = Rot64(s10,22); s9 += s11; 153 | s11 += data[11]; s1 ^= s9; s10 ^= s11; s11 = Rot64(s11,46); s10 += s0; 154 | } 155 | 156 | // 157 | // Mix all 12 inputs together so that h0, h1 are a hash of them all. 158 | // 159 | // For two inputs differing in just the input bits 160 | // Where "differ" means xor or subtraction 161 | // And the base value is random, or a counting value starting at that bit 162 | // The final result will have each bit of h0, h1 flip 163 | // For every input bit, 164 | // with probability 50 +- .3% 165 | // For every pair of input bits, 166 | // with probability 50 +- 3% 167 | // 168 | // This does not rely on the last Mix() call having already mixed some. 169 | // Two iterations was almost good enough for a 64-bit result, but a 170 | // 128-bit result is reported, so End() does three iterations. 171 | // 172 | static INLINE void EndPartial( 173 | uint64 &h0, uint64 &h1, uint64 &h2, uint64 &h3, 174 | uint64 &h4, uint64 &h5, uint64 &h6, uint64 &h7, 175 | uint64 &h8, uint64 &h9, uint64 &h10,uint64 &h11) 176 | { 177 | h11+= h1; h2 ^= h11; h1 = Rot64(h1,44); 178 | h0 += h2; h3 ^= h0; h2 = Rot64(h2,15); 179 | h1 += h3; h4 ^= h1; h3 = Rot64(h3,34); 180 | h2 += h4; h5 ^= h2; h4 = Rot64(h4,21); 181 | h3 += h5; h6 ^= h3; h5 = Rot64(h5,38); 182 | h4 += h6; h7 ^= h4; h6 = Rot64(h6,33); 183 | h5 += h7; h8 ^= h5; h7 = Rot64(h7,10); 184 | h6 += h8; h9 ^= h6; h8 = Rot64(h8,13); 185 | h7 += h9; h10^= h7; h9 = Rot64(h9,38); 186 | h8 += h10; h11^= h8; h10= Rot64(h10,53); 187 | h9 += h11; h0 ^= h9; h11= Rot64(h11,42); 188 | h10+= h0; h1 ^= h10; h0 = Rot64(h0,54); 189 | } 190 | 191 | static INLINE void End( 192 | const uint64 *data, 193 | uint64 &h0, uint64 &h1, uint64 &h2, uint64 &h3, 194 | uint64 &h4, uint64 &h5, uint64 &h6, uint64 &h7, 195 | uint64 &h8, uint64 &h9, uint64 &h10,uint64 &h11) 196 | { 197 | h0 += data[0]; h1 += data[1]; h2 += data[2]; h3 += data[3]; 198 | h4 += data[4]; h5 += data[5]; h6 += data[6]; h7 += data[7]; 199 | h8 += data[8]; h9 += data[9]; h10 += data[10]; h11 += data[11]; 200 | EndPartial(h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); 201 | EndPartial(h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); 202 | EndPartial(h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); 203 | } 204 | 205 | // 206 | // The goal is for each bit of the input to expand into 128 bits of 207 | // apparent entropy before it is fully overwritten. 208 | // n trials both set and cleared at least m bits of h0 h1 h2 h3 209 | // n: 2 m: 29 210 | // n: 3 m: 46 211 | // n: 4 m: 57 212 | // n: 5 m: 107 213 | // n: 6 m: 146 214 | // n: 7 m: 152 215 | // when run forwards or backwards 216 | // for all 1-bit and 2-bit diffs 217 | // with diffs defined by either xor or subtraction 218 | // with a base of all zeros plus a counter, or plus another bit, or random 219 | // 220 | static INLINE void ShortMix(uint64 &h0, uint64 &h1, uint64 &h2, uint64 &h3) 221 | { 222 | h2 = Rot64(h2,50); h2 += h3; h0 ^= h2; 223 | h3 = Rot64(h3,52); h3 += h0; h1 ^= h3; 224 | h0 = Rot64(h0,30); h0 += h1; h2 ^= h0; 225 | h1 = Rot64(h1,41); h1 += h2; h3 ^= h1; 226 | h2 = Rot64(h2,54); h2 += h3; h0 ^= h2; 227 | h3 = Rot64(h3,48); h3 += h0; h1 ^= h3; 228 | h0 = Rot64(h0,38); h0 += h1; h2 ^= h0; 229 | h1 = Rot64(h1,37); h1 += h2; h3 ^= h1; 230 | h2 = Rot64(h2,62); h2 += h3; h0 ^= h2; 231 | h3 = Rot64(h3,34); h3 += h0; h1 ^= h3; 232 | h0 = Rot64(h0,5); h0 += h1; h2 ^= h0; 233 | h1 = Rot64(h1,36); h1 += h2; h3 ^= h1; 234 | } 235 | 236 | // 237 | // Mix all 4 inputs together so that h0, h1 are a hash of them all. 238 | // 239 | // For two inputs differing in just the input bits 240 | // Where "differ" means xor or subtraction 241 | // And the base value is random, or a counting value starting at that bit 242 | // The final result will have each bit of h0, h1 flip 243 | // For every input bit, 244 | // with probability 50 +- .3% (it is probably better than that) 245 | // For every pair of input bits, 246 | // with probability 50 +- .75% (the worst case is approximately that) 247 | // 248 | static INLINE void ShortEnd(uint64 &h0, uint64 &h1, uint64 &h2, uint64 &h3) 249 | { 250 | h3 ^= h2; h2 = Rot64(h2,15); h3 += h2; 251 | h0 ^= h3; h3 = Rot64(h3,52); h0 += h3; 252 | h1 ^= h0; h0 = Rot64(h0,26); h1 += h0; 253 | h2 ^= h1; h1 = Rot64(h1,51); h2 += h1; 254 | h3 ^= h2; h2 = Rot64(h2,28); h3 += h2; 255 | h0 ^= h3; h3 = Rot64(h3,9); h0 += h3; 256 | h1 ^= h0; h0 = Rot64(h0,47); h1 += h0; 257 | h2 ^= h1; h1 = Rot64(h1,54); h2 += h1; 258 | h3 ^= h2; h2 = Rot64(h2,32); h3 += h2; 259 | h0 ^= h3; h3 = Rot64(h3,25); h0 += h3; 260 | h1 ^= h0; h0 = Rot64(h0,63); h1 += h0; 261 | } 262 | 263 | private: 264 | 265 | // 266 | // Short is used for messages under 192 bytes in length 267 | // Short has a low startup cost, the normal mode is good for long 268 | // keys, the cost crossover is at about 192 bytes. The two modes were 269 | // held to the same quality bar. 270 | // 271 | static void Short( 272 | const void *message, // message (array of bytes, not necessarily aligned) 273 | size_t length, // length of message (in bytes) 274 | uint64 *hash1, // in/out: in the seed, out the hash value 275 | uint64 *hash2); // in/out: in the seed, out the hash value 276 | 277 | // number of uint64's in internal state 278 | static const size_t sc_numVars = 12; 279 | 280 | // size of the internal state 281 | static const size_t sc_blockSize = sc_numVars*8; 282 | 283 | // size of buffer of unhashed data, in bytes 284 | static const size_t sc_bufSize = 2*sc_blockSize; 285 | 286 | // 287 | // sc_const: a constant which: 288 | // * is not zero 289 | // * is odd 290 | // * is a not-very-regular mix of 1's and 0's 291 | // * does not need any other special mathematical properties 292 | // 293 | static const uint64 sc_const = 0xdeadbeefdeadbeefLL; 294 | 295 | uint64 m_data[2*sc_numVars]; // unhashed data, for partial messages 296 | uint64 m_state[sc_numVars]; // internal state of the hash 297 | size_t m_length; // total length of the input so far 298 | uint8 m_remainder; // length of unhashed data stashed in m_data 299 | }; 300 | 301 | #endif // SPOOKYV2_H 302 | -------------------------------------------------------------------------------- /X.h: -------------------------------------------------------------------------------- 1 | //----------------------------------- X.h -------------------------------------- 2 | // 3 | // This software is in the public domain. The only restriction on its use is 4 | // that no one can remove it from the public domain by claiming ownership of it, 5 | // including the original authors. 6 | // 7 | // There is no warranty of correctness on the software contained herein. Use 8 | // at your own risk. 9 | // 10 | //------------------------------------------------------------------------------ 11 | 12 | #ifndef X_H 13 | #define X_H 14 | 15 | #include 16 | #include 17 | #include 18 | #include 19 | 20 | #include "hash_append.h" 21 | #include "n3876.h" 22 | 23 | // #include "../llvm/include/llvm/ADT/Hashing.h" 24 | 25 | namespace mine 26 | { 27 | 28 | class X 29 | { 30 | // std::tuple date_; 31 | // std::vector> data_; 32 | std::tuple date_; 33 | std::vector> data_; 34 | 35 | public: 36 | X(); 37 | // ... 38 | friend bool operator==(X const& x, X const& y) 39 | { 40 | return std::tie(x.date_, x.data_) == std::tie(y.date_, y.data_); 41 | } 42 | 43 | friend bool operator< (X const& x, X const& y) 44 | { 45 | return std::tie(x.date_, x.data_) < std::tie(y.date_, y.data_); 46 | } 47 | 48 | friend struct std::hash; 49 | 50 | // friend 51 | // llvm::hash_code 52 | // hash_value(X const& x) 53 | // { 54 | // using llvm::hash_value; 55 | // return llvm::hash_combine 56 | // ( 57 | // hash_value(std::get<0>(x.date_)), 58 | // hash_value(std::get<1>(x.date_)), 59 | // hash_value(std::get<2>(x.date_)), 60 | // llvm::hash_combine_range(x.data_.begin(), x.data_.end()) 61 | // ); 62 | // } 63 | 64 | template 65 | friend 66 | void 67 | hash_append(Hasher& h, X const& x) 68 | { 69 | using xstd::hash_append; 70 | hash_append(h, x.date_); 71 | } 72 | }; 73 | 74 | std::mt19937_64 eng; 75 | 76 | X::X() 77 | { 78 | std::uniform_int_distribution yeardata(1914, 2014); 79 | std::uniform_int_distribution monthdata(1, 12); 80 | std::uniform_int_distribution daydata(1, 28); 81 | std::uniform_int_distribution veclen(0, 100); 82 | std::uniform_int_distribution int1data(1, 10); 83 | std::uniform_int_distribution int2data(-3, 3); 84 | std::get<0>(date_) = yeardata(eng); 85 | std::get<1>(date_) = monthdata(eng); 86 | std::get<2>(date_) = daydata(eng); 87 | data_.resize(veclen(eng)); 88 | for (auto& p : data_) 89 | { 90 | p.first = int1data(eng); 91 | p.second = int2data(eng); 92 | } 93 | } 94 | 95 | } // mine 96 | 97 | namespace std 98 | { 99 | 100 | template <> 101 | struct hash 102 | { 103 | size_t 104 | operator()(mine::X const& x) const noexcept 105 | { 106 | using namespace n3876; 107 | size_t h = hash::type>{}(get<0>(x.date_)); 108 | hash_combine(h, get<1>(x.date_), get<2>(x.date_)); 109 | for (auto const& p : x.data_) 110 | hash_combine(h, p.first, p.second); 111 | return h; 112 | } 113 | }; 114 | 115 | } // std 116 | 117 | #endif // X_H 118 | -------------------------------------------------------------------------------- /city.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 Google, Inc. 2 | // 3 | // Permission is hereby granted, free of charge, to any person obtaining a copy 4 | // of this software and associated documentation files (the "Software"), to deal 5 | // in the Software without restriction, including without limitation the rights 6 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | // copies of the Software, and to permit persons to whom the Software is 8 | // furnished to do so, subject to the following conditions: 9 | // 10 | // The above copyright notice and this permission notice shall be included in 11 | // all copies or substantial portions of the Software. 12 | // 13 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | // THE SOFTWARE. 20 | // 21 | // CityHash, by Geoff Pike and Jyrki Alakuijala 22 | // 23 | // This file provides CityHash64() and related functions. 24 | // 25 | // It's probably possible to create even faster hash functions by 26 | // writing a program that systematically explores some of the space of 27 | // possible hash functions, by using SIMD instructions, or by 28 | // compromising on hash quality. 29 | 30 | #include "config.h" 31 | #include "city.h" 32 | 33 | #include 34 | #include // for memcpy and memset 35 | 36 | using namespace std; 37 | 38 | static uint64 UNALIGNED_LOAD64(const char *p) { 39 | uint64 result; 40 | memcpy(&result, p, sizeof(result)); 41 | return result; 42 | } 43 | 44 | static uint32 UNALIGNED_LOAD32(const char *p) { 45 | uint32 result; 46 | memcpy(&result, p, sizeof(result)); 47 | return result; 48 | } 49 | 50 | #ifdef _MSC_VER 51 | 52 | #include 53 | #define bswap_32(x) _byteswap_ulong(x) 54 | #define bswap_64(x) _byteswap_uint64(x) 55 | 56 | #elif defined(__APPLE__) 57 | 58 | // Mac OS X / Darwin features 59 | #include 60 | #define bswap_32(x) OSSwapInt32(x) 61 | #define bswap_64(x) OSSwapInt64(x) 62 | 63 | #elif defined(__NetBSD__) 64 | 65 | #include 66 | #include 67 | #if defined(__BSWAP_RENAME) && !defined(__bswap_32) 68 | #define bswap_32(x) bswap32(x) 69 | #define bswap_64(x) bswap64(x) 70 | #endif 71 | 72 | #else 73 | 74 | #include 75 | 76 | #endif 77 | 78 | #ifdef WORDS_BIGENDIAN 79 | #define uint32_in_expected_order(x) (bswap_32(x)) 80 | #define uint64_in_expected_order(x) (bswap_64(x)) 81 | #else 82 | #define uint32_in_expected_order(x) (x) 83 | #define uint64_in_expected_order(x) (x) 84 | #endif 85 | 86 | #if !defined(LIKELY) 87 | #if HAVE_BUILTIN_EXPECT 88 | #define LIKELY(x) (__builtin_expect(!!(x), 1)) 89 | #else 90 | #define LIKELY(x) (x) 91 | #endif 92 | #endif 93 | 94 | static uint64 Fetch64(const char *p) { 95 | return uint64_in_expected_order(UNALIGNED_LOAD64(p)); 96 | } 97 | 98 | static uint32 Fetch32(const char *p) { 99 | return uint32_in_expected_order(UNALIGNED_LOAD32(p)); 100 | } 101 | 102 | // Some primes between 2^63 and 2^64 for various uses. 103 | static const uint64 k0 = 0xc3a5c85c97cb3127ULL; 104 | static const uint64 k1 = 0xb492b66fbe98f273ULL; 105 | static const uint64 k2 = 0x9ae16a3b2f90404fULL; 106 | 107 | // Magic numbers for 32-bit hashing. Copied from Murmur3. 108 | static const uint32_t c1 = 0xcc9e2d51; 109 | static const uint32_t c2 = 0x1b873593; 110 | 111 | // A 32-bit to 32-bit integer hash copied from Murmur3. 112 | static uint32 fmix(uint32 h) 113 | { 114 | h ^= h >> 16; 115 | h *= 0x85ebca6b; 116 | h ^= h >> 13; 117 | h *= 0xc2b2ae35; 118 | h ^= h >> 16; 119 | return h; 120 | } 121 | 122 | static uint32 Rotate32(uint32 val, int shift) { 123 | // Avoid shifting by 32: doing so yields an undefined result. 124 | return shift == 0 ? val : ((val >> shift) | (val << (32 - shift))); 125 | } 126 | 127 | #undef PERMUTE3 128 | #define PERMUTE3(a, b, c) do { std::swap(a, b); std::swap(a, c); } while (0) 129 | 130 | static uint32 Mur(uint32 a, uint32 h) { 131 | // Helper from Murmur3 for combining two 32-bit values. 132 | a *= c1; 133 | a = Rotate32(a, 17); 134 | a *= c2; 135 | h ^= a; 136 | h = Rotate32(h, 19); 137 | return h * 5 + 0xe6546b64; 138 | } 139 | 140 | static uint32 Hash32Len13to24(const char *s, size_t len) { 141 | uint32 a = Fetch32(s - 4 + (len >> 1)); 142 | uint32 b = Fetch32(s + 4); 143 | uint32 c = Fetch32(s + len - 8); 144 | uint32 d = Fetch32(s + (len >> 1)); 145 | uint32 e = Fetch32(s); 146 | uint32 f = Fetch32(s + len - 4); 147 | uint32 h = len; 148 | 149 | return fmix(Mur(f, Mur(e, Mur(d, Mur(c, Mur(b, Mur(a, h))))))); 150 | } 151 | 152 | static uint32 Hash32Len0to4(const char *s, size_t len) { 153 | uint32 b = 0; 154 | uint32 c = 9; 155 | for (int i = 0; i < len; i++) { 156 | signed char v = s[i]; 157 | b = b * c1 + v; 158 | c ^= b; 159 | } 160 | return fmix(Mur(b, Mur(len, c))); 161 | } 162 | 163 | static uint32 Hash32Len5to12(const char *s, size_t len) { 164 | uint32 a = len, b = len * 5, c = 9, d = b; 165 | a += Fetch32(s); 166 | b += Fetch32(s + len - 4); 167 | c += Fetch32(s + ((len >> 1) & 4)); 168 | return fmix(Mur(c, Mur(b, Mur(a, d)))); 169 | } 170 | 171 | uint32 CityHash32(const char *s, size_t len) { 172 | if (len <= 24) { 173 | return len <= 12 ? 174 | (len <= 4 ? Hash32Len0to4(s, len) : Hash32Len5to12(s, len)) : 175 | Hash32Len13to24(s, len); 176 | } 177 | 178 | // len > 24 179 | uint32 h = len, g = c1 * len, f = g; 180 | uint32 a0 = Rotate32(Fetch32(s + len - 4) * c1, 17) * c2; 181 | uint32 a1 = Rotate32(Fetch32(s + len - 8) * c1, 17) * c2; 182 | uint32 a2 = Rotate32(Fetch32(s + len - 16) * c1, 17) * c2; 183 | uint32 a3 = Rotate32(Fetch32(s + len - 12) * c1, 17) * c2; 184 | uint32 a4 = Rotate32(Fetch32(s + len - 20) * c1, 17) * c2; 185 | h ^= a0; 186 | h = Rotate32(h, 19); 187 | h = h * 5 + 0xe6546b64; 188 | h ^= a2; 189 | h = Rotate32(h, 19); 190 | h = h * 5 + 0xe6546b64; 191 | g ^= a1; 192 | g = Rotate32(g, 19); 193 | g = g * 5 + 0xe6546b64; 194 | g ^= a3; 195 | g = Rotate32(g, 19); 196 | g = g * 5 + 0xe6546b64; 197 | f += a4; 198 | f = Rotate32(f, 19); 199 | f = f * 5 + 0xe6546b64; 200 | size_t iters = (len - 1) / 20; 201 | do { 202 | uint32 a0 = Rotate32(Fetch32(s) * c1, 17) * c2; 203 | uint32 a1 = Fetch32(s + 4); 204 | uint32 a2 = Rotate32(Fetch32(s + 8) * c1, 17) * c2; 205 | uint32 a3 = Rotate32(Fetch32(s + 12) * c1, 17) * c2; 206 | uint32 a4 = Fetch32(s + 16); 207 | h ^= a0; 208 | h = Rotate32(h, 18); 209 | h = h * 5 + 0xe6546b64; 210 | f += a1; 211 | f = Rotate32(f, 19); 212 | f = f * c1; 213 | g += a2; 214 | g = Rotate32(g, 18); 215 | g = g * 5 + 0xe6546b64; 216 | h ^= a3 + a1; 217 | h = Rotate32(h, 19); 218 | h = h * 5 + 0xe6546b64; 219 | g ^= a4; 220 | g = bswap_32(g) * 5; 221 | h += a4 * 5; 222 | h = bswap_32(h); 223 | f += a0; 224 | PERMUTE3(f, h, g); 225 | s += 20; 226 | } while (--iters != 0); 227 | g = Rotate32(g, 11) * c1; 228 | g = Rotate32(g, 17) * c1; 229 | f = Rotate32(f, 11) * c1; 230 | f = Rotate32(f, 17) * c1; 231 | h = Rotate32(h + g, 19); 232 | h = h * 5 + 0xe6546b64; 233 | h = Rotate32(h, 17) * c1; 234 | h = Rotate32(h + f, 19); 235 | h = h * 5 + 0xe6546b64; 236 | h = Rotate32(h, 17) * c1; 237 | return h; 238 | } 239 | 240 | // Bitwise right rotate. Normally this will compile to a single 241 | // instruction, especially if the shift is a manifest constant. 242 | static uint64 Rotate(uint64 val, int shift) { 243 | // Avoid shifting by 64: doing so yields an undefined result. 244 | return shift == 0 ? val : ((val >> shift) | (val << (64 - shift))); 245 | } 246 | 247 | static uint64 ShiftMix(uint64 val) { 248 | return val ^ (val >> 47); 249 | } 250 | 251 | static uint64 HashLen16(uint64 u, uint64 v) { 252 | return Hash128to64(uint128(u, v)); 253 | } 254 | 255 | static uint64 HashLen16(uint64 u, uint64 v, uint64 mul) { 256 | // Murmur-inspired hashing. 257 | uint64 a = (u ^ v) * mul; 258 | a ^= (a >> 47); 259 | uint64 b = (v ^ a) * mul; 260 | b ^= (b >> 47); 261 | b *= mul; 262 | return b; 263 | } 264 | 265 | static uint64 HashLen0to16(const char *s, size_t len) { 266 | if (len >= 8) { 267 | uint64 mul = k2 + len * 2; 268 | uint64 a = Fetch64(s) + k2; 269 | uint64 b = Fetch64(s + len - 8); 270 | uint64 c = Rotate(b, 37) * mul + a; 271 | uint64 d = (Rotate(a, 25) + b) * mul; 272 | return HashLen16(c, d, mul); 273 | } 274 | if (len >= 4) { 275 | uint64 mul = k2 + len * 2; 276 | uint64 a = Fetch32(s); 277 | return HashLen16(len + (a << 3), Fetch32(s + len - 4), mul); 278 | } 279 | if (len > 0) { 280 | uint8 a = s[0]; 281 | uint8 b = s[len >> 1]; 282 | uint8 c = s[len - 1]; 283 | uint32 y = static_cast(a) + (static_cast(b) << 8); 284 | uint32 z = len + (static_cast(c) << 2); 285 | return ShiftMix(y * k2 ^ z * k0) * k2; 286 | } 287 | return k2; 288 | } 289 | 290 | // This probably works well for 16-byte strings as well, but it may be overkill 291 | // in that case. 292 | static uint64 HashLen17to32(const char *s, size_t len) { 293 | uint64 mul = k2 + len * 2; 294 | uint64 a = Fetch64(s) * k1; 295 | uint64 b = Fetch64(s + 8); 296 | uint64 c = Fetch64(s + len - 8) * mul; 297 | uint64 d = Fetch64(s + len - 16) * k2; 298 | return HashLen16(Rotate(a + b, 43) + Rotate(c, 30) + d, 299 | a + Rotate(b + k2, 18) + c, mul); 300 | } 301 | 302 | // Return a 16-byte hash for 48 bytes. Quick and dirty. 303 | // Callers do best to use "random-looking" values for a and b. 304 | static pair WeakHashLen32WithSeeds( 305 | uint64 w, uint64 x, uint64 y, uint64 z, uint64 a, uint64 b) { 306 | a += w; 307 | b = Rotate(b + a + z, 21); 308 | uint64 c = a; 309 | a += x; 310 | a += y; 311 | b += Rotate(a, 44); 312 | return make_pair(a + z, b + c); 313 | } 314 | 315 | // Return a 16-byte hash for s[0] ... s[31], a, and b. Quick and dirty. 316 | static pair WeakHashLen32WithSeeds( 317 | const char* s, uint64 a, uint64 b) { 318 | return WeakHashLen32WithSeeds(Fetch64(s), 319 | Fetch64(s + 8), 320 | Fetch64(s + 16), 321 | Fetch64(s + 24), 322 | a, 323 | b); 324 | } 325 | 326 | // Return an 8-byte hash for 33 to 64 bytes. 327 | static uint64 HashLen33to64(const char *s, size_t len) { 328 | uint64 mul = k2 + len * 2; 329 | uint64 a = Fetch64(s) * k2; 330 | uint64 b = Fetch64(s + 8); 331 | uint64 c = Fetch64(s + len - 24); 332 | uint64 d = Fetch64(s + len - 32); 333 | uint64 e = Fetch64(s + 16) * k2; 334 | uint64 f = Fetch64(s + 24) * 9; 335 | uint64 g = Fetch64(s + len - 8); 336 | uint64 h = Fetch64(s + len - 16) * mul; 337 | uint64 u = Rotate(a + g, 43) + (Rotate(b, 30) + c) * 9; 338 | uint64 v = ((a + g) ^ d) + f + 1; 339 | uint64 w = bswap_64((u + v) * mul) + h; 340 | uint64 x = Rotate(e + f, 42) + c; 341 | uint64 y = (bswap_64((v + w) * mul) + g) * mul; 342 | uint64 z = e + f + c; 343 | a = bswap_64((x + z) * mul + y) + b; 344 | b = ShiftMix((z + a) * mul + d + h) * mul; 345 | return b + x; 346 | } 347 | 348 | uint64 CityHash64(const char *s, size_t len) { 349 | if (len <= 32) { 350 | if (len <= 16) { 351 | return HashLen0to16(s, len); 352 | } else { 353 | return HashLen17to32(s, len); 354 | } 355 | } else if (len <= 64) { 356 | return HashLen33to64(s, len); 357 | } 358 | 359 | // For strings over 64 bytes we hash the end first, and then as we 360 | // loop we keep 56 bytes of state: v, w, x, y, and z. 361 | uint64 x = Fetch64(s + len - 40); 362 | uint64 y = Fetch64(s + len - 16) + Fetch64(s + len - 56); 363 | uint64 z = HashLen16(Fetch64(s + len - 48) + len, Fetch64(s + len - 24)); 364 | pair v = WeakHashLen32WithSeeds(s + len - 64, len, z); 365 | pair w = WeakHashLen32WithSeeds(s + len - 32, y + k1, x); 366 | x = x * k1 + Fetch64(s); 367 | 368 | // Decrease len to the nearest multiple of 64, and operate on 64-byte chunks. 369 | len = (len - 1) & ~static_cast(63); 370 | do { 371 | x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1; 372 | y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1; 373 | x ^= w.second; 374 | y += v.first + Fetch64(s + 40); 375 | z = Rotate(z + w.first, 33) * k1; 376 | v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first); 377 | w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16)); 378 | std::swap(z, x); 379 | s += 64; 380 | len -= 64; 381 | } while (len != 0); 382 | return HashLen16(HashLen16(v.first, w.first) + ShiftMix(y) * k1 + z, 383 | HashLen16(v.second, w.second) + x); 384 | } 385 | 386 | uint64 CityHash64WithSeed(const char *s, size_t len, uint64 seed) { 387 | return CityHash64WithSeeds(s, len, k2, seed); 388 | } 389 | 390 | uint64 CityHash64WithSeeds(const char *s, size_t len, 391 | uint64 seed0, uint64 seed1) { 392 | return HashLen16(CityHash64(s, len) - seed0, seed1); 393 | } 394 | 395 | // A subroutine for CityHash128(). Returns a decent 128-bit hash for strings 396 | // of any length representable in signed long. Based on City and Murmur. 397 | static uint128 CityMurmur(const char *s, size_t len, uint128 seed) { 398 | uint64 a = Uint128Low64(seed); 399 | uint64 b = Uint128High64(seed); 400 | uint64 c = 0; 401 | uint64 d = 0; 402 | signed long l = len - 16; 403 | if (l <= 0) { // len <= 16 404 | a = ShiftMix(a * k1) * k1; 405 | c = b * k1 + HashLen0to16(s, len); 406 | d = ShiftMix(a + (len >= 8 ? Fetch64(s) : c)); 407 | } else { // len > 16 408 | c = HashLen16(Fetch64(s + len - 8) + k1, a); 409 | d = HashLen16(b + len, c + Fetch64(s + len - 16)); 410 | a += d; 411 | do { 412 | a ^= ShiftMix(Fetch64(s) * k1) * k1; 413 | a *= k1; 414 | b ^= a; 415 | c ^= ShiftMix(Fetch64(s + 8) * k1) * k1; 416 | c *= k1; 417 | d ^= c; 418 | s += 16; 419 | l -= 16; 420 | } while (l > 0); 421 | } 422 | a = HashLen16(a, c); 423 | b = HashLen16(d, b); 424 | return uint128(a ^ b, HashLen16(b, a)); 425 | } 426 | 427 | uint128 CityHash128WithSeed(const char *s, size_t len, uint128 seed) { 428 | if (len < 128) { 429 | return CityMurmur(s, len, seed); 430 | } 431 | 432 | // We expect len >= 128 to be the common case. Keep 56 bytes of state: 433 | // v, w, x, y, and z. 434 | pair v, w; 435 | uint64 x = Uint128Low64(seed); 436 | uint64 y = Uint128High64(seed); 437 | uint64 z = len * k1; 438 | v.first = Rotate(y ^ k1, 49) * k1 + Fetch64(s); 439 | v.second = Rotate(v.first, 42) * k1 + Fetch64(s + 8); 440 | w.first = Rotate(y + z, 35) * k1 + x; 441 | w.second = Rotate(x + Fetch64(s + 88), 53) * k1; 442 | 443 | // This is the same inner loop as CityHash64(), manually unrolled. 444 | do { 445 | x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1; 446 | y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1; 447 | x ^= w.second; 448 | y += v.first + Fetch64(s + 40); 449 | z = Rotate(z + w.first, 33) * k1; 450 | v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first); 451 | w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16)); 452 | std::swap(z, x); 453 | s += 64; 454 | x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1; 455 | y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1; 456 | x ^= w.second; 457 | y += v.first + Fetch64(s + 40); 458 | z = Rotate(z + w.first, 33) * k1; 459 | v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first); 460 | w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16)); 461 | std::swap(z, x); 462 | s += 64; 463 | len -= 128; 464 | } while (LIKELY(len >= 128)); 465 | x += Rotate(v.first + z, 49) * k0; 466 | y = y * k0 + Rotate(w.second, 37); 467 | z = z * k0 + Rotate(w.first, 27); 468 | w.first *= 9; 469 | v.first *= k0; 470 | // If 0 < len < 128, hash up to 4 chunks of 32 bytes each from the end of s. 471 | for (size_t tail_done = 0; tail_done < len; ) { 472 | tail_done += 32; 473 | y = Rotate(x + y, 42) * k0 + v.second; 474 | w.first += Fetch64(s + len - tail_done + 16); 475 | x = x * k0 + w.first; 476 | z += w.second + Fetch64(s + len - tail_done); 477 | w.second += v.first; 478 | v = WeakHashLen32WithSeeds(s + len - tail_done, v.first + z, v.second); 479 | v.first *= k0; 480 | } 481 | // At this point our 56 bytes of state should contain more than 482 | // enough information for a strong 128-bit hash. We use two 483 | // different 56-byte-to-8-byte hashes to get a 16-byte final result. 484 | x = HashLen16(x, v.first); 485 | y = HashLen16(y + z, w.first); 486 | return uint128(HashLen16(x + v.second, w.second) + y, 487 | HashLen16(x + w.second, y + v.second)); 488 | } 489 | 490 | uint128 CityHash128(const char *s, size_t len) { 491 | return len >= 16 ? 492 | CityHash128WithSeed(s + 16, len - 16, 493 | uint128(Fetch64(s), Fetch64(s + 8) + k0)) : 494 | CityHash128WithSeed(s, len, uint128(k0, k1)); 495 | } 496 | 497 | #ifdef __SSE4_2__ 498 | #include 499 | #include 500 | 501 | // Requires len >= 240. 502 | static void CityHashCrc256Long(const char *s, size_t len, 503 | uint32 seed, uint64 *result) { 504 | uint64 a = Fetch64(s + 56) + k0; 505 | uint64 b = Fetch64(s + 96) + k0; 506 | uint64 c = result[0] = HashLen16(b, len); 507 | uint64 d = result[1] = Fetch64(s + 120) * k0 + len; 508 | uint64 e = Fetch64(s + 184) + seed; 509 | uint64 f = 0; 510 | uint64 g = 0; 511 | uint64 h = c + d; 512 | uint64 x = seed; 513 | uint64 y = 0; 514 | uint64 z = 0; 515 | 516 | // 240 bytes of input per iter. 517 | size_t iters = len / 240; 518 | len -= iters * 240; 519 | do { 520 | #undef CHUNK 521 | #define CHUNK(r) \ 522 | PERMUTE3(x, z, y); \ 523 | b += Fetch64(s); \ 524 | c += Fetch64(s + 8); \ 525 | d += Fetch64(s + 16); \ 526 | e += Fetch64(s + 24); \ 527 | f += Fetch64(s + 32); \ 528 | a += b; \ 529 | h += f; \ 530 | b += c; \ 531 | f += d; \ 532 | g += e; \ 533 | e += z; \ 534 | g += x; \ 535 | z = _mm_crc32_u64(z, b + g); \ 536 | y = _mm_crc32_u64(y, e + h); \ 537 | x = _mm_crc32_u64(x, f + a); \ 538 | e = Rotate(e, r); \ 539 | c += e; \ 540 | s += 40 541 | 542 | CHUNK(0); PERMUTE3(a, h, c); 543 | CHUNK(33); PERMUTE3(a, h, f); 544 | CHUNK(0); PERMUTE3(b, h, f); 545 | CHUNK(42); PERMUTE3(b, h, d); 546 | CHUNK(0); PERMUTE3(b, h, e); 547 | CHUNK(33); PERMUTE3(a, h, e); 548 | } while (--iters > 0); 549 | 550 | while (len >= 40) { 551 | CHUNK(29); 552 | e ^= Rotate(a, 20); 553 | h += Rotate(b, 30); 554 | g ^= Rotate(c, 40); 555 | f += Rotate(d, 34); 556 | PERMUTE3(c, h, g); 557 | len -= 40; 558 | } 559 | if (len > 0) { 560 | s = s + len - 40; 561 | CHUNK(33); 562 | e ^= Rotate(a, 43); 563 | h += Rotate(b, 42); 564 | g ^= Rotate(c, 41); 565 | f += Rotate(d, 40); 566 | } 567 | result[0] ^= h; 568 | result[1] ^= g; 569 | g += h; 570 | a = HashLen16(a, g + z); 571 | x += y << 32; 572 | b += x; 573 | c = HashLen16(c, z) + h; 574 | d = HashLen16(d, e + result[0]); 575 | g += e; 576 | h += HashLen16(x, f); 577 | e = HashLen16(a, d) + g; 578 | z = HashLen16(b, c) + a; 579 | y = HashLen16(g, h) + c; 580 | result[0] = e + z + y + x; 581 | a = ShiftMix((a + y) * k0) * k0 + b; 582 | result[1] += a + result[0]; 583 | a = ShiftMix(a * k0) * k0 + c; 584 | result[2] = a + result[1]; 585 | a = ShiftMix((a + e) * k0) * k0; 586 | result[3] = a + result[2]; 587 | } 588 | 589 | // Requires len < 240. 590 | static void CityHashCrc256Short(const char *s, size_t len, uint64 *result) { 591 | char buf[240]; 592 | memcpy(buf, s, len); 593 | memset(buf + len, 0, 240 - len); 594 | CityHashCrc256Long(buf, 240, ~static_cast(len), result); 595 | } 596 | 597 | void CityHashCrc256(const char *s, size_t len, uint64 *result) { 598 | if (LIKELY(len >= 240)) { 599 | CityHashCrc256Long(s, len, 0, result); 600 | } else { 601 | CityHashCrc256Short(s, len, result); 602 | } 603 | } 604 | 605 | uint128 CityHashCrc128WithSeed(const char *s, size_t len, uint128 seed) { 606 | if (len <= 900) { 607 | return CityHash128WithSeed(s, len, seed); 608 | } else { 609 | uint64 result[4]; 610 | CityHashCrc256(s, len, result); 611 | uint64 u = Uint128High64(seed) + result[0]; 612 | uint64 v = Uint128Low64(seed) + result[1]; 613 | return uint128(HashLen16(u, v + result[2]), 614 | HashLen16(Rotate(v, 32), u * k0 + result[3])); 615 | } 616 | } 617 | 618 | uint128 CityHashCrc128(const char *s, size_t len) { 619 | if (len <= 900) { 620 | return CityHash128(s, len); 621 | } else { 622 | uint64 result[4]; 623 | CityHashCrc256(s, len, result); 624 | return uint128(result[2], result[3]); 625 | } 626 | } 627 | 628 | #endif 629 | -------------------------------------------------------------------------------- /city.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 Google, Inc. 2 | // 3 | // Permission is hereby granted, free of charge, to any person obtaining a copy 4 | // of this software and associated documentation files (the "Software"), to deal 5 | // in the Software without restriction, including without limitation the rights 6 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | // copies of the Software, and to permit persons to whom the Software is 8 | // furnished to do so, subject to the following conditions: 9 | // 10 | // The above copyright notice and this permission notice shall be included in 11 | // all copies or substantial portions of the Software. 12 | // 13 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | // THE SOFTWARE. 20 | // 21 | // CityHash, by Geoff Pike and Jyrki Alakuijala 22 | // 23 | // http://code.google.com/p/cityhash/ 24 | // 25 | // This file provides a few functions for hashing strings. All of them are 26 | // high-quality functions in the sense that they pass standard tests such 27 | // as Austin Appleby's SMHasher. They are also fast. 28 | // 29 | // For 64-bit x86 code, on short strings, we don't know of anything faster than 30 | // CityHash64 that is of comparable quality. We believe our nearest competitor 31 | // is Murmur3. For 64-bit x86 code, CityHash64 is an excellent choice for hash 32 | // tables and most other hashing (excluding cryptography). 33 | // 34 | // For 64-bit x86 code, on long strings, the picture is more complicated. 35 | // On many recent Intel CPUs, such as Nehalem, Westmere, Sandy Bridge, etc., 36 | // CityHashCrc128 appears to be faster than all competitors of comparable 37 | // quality. CityHash128 is also good but not quite as fast. We believe our 38 | // nearest competitor is Bob Jenkins' Spooky. We don't have great data for 39 | // other 64-bit CPUs, but for long strings we know that Spooky is slightly 40 | // faster than CityHash on some relatively recent AMD x86-64 CPUs, for example. 41 | // Note that CityHashCrc128 is declared in citycrc.h. 42 | // 43 | // For 32-bit x86 code, we don't know of anything faster than CityHash32 that 44 | // is of comparable quality. We believe our nearest competitor is Murmur3A. 45 | // (On 64-bit CPUs, it is typically faster to use the other CityHash variants.) 46 | // 47 | // Functions in the CityHash family are not suitable for cryptography. 48 | // 49 | // Please see CityHash's README file for more details on our performance 50 | // measurements and so on. 51 | // 52 | // WARNING: This code has been only lightly tested on big-endian platforms! 53 | // It is known to work well on little-endian platforms that have a small penalty 54 | // for unaligned reads, such as current Intel and AMD moderate-to-high-end CPUs. 55 | // It should work on all 32-bit and 64-bit platforms that allow unaligned reads; 56 | // bug reports are welcome. 57 | // 58 | // By the way, for some hash functions, given strings a and b, the hash 59 | // of a+b is easily derived from the hashes of a and b. This property 60 | // doesn't hold for any hash functions in this file. 61 | 62 | #ifndef CITY_HASH_H_ 63 | #define CITY_HASH_H_ 64 | 65 | #include // for size_t. 66 | #include 67 | #include 68 | 69 | typedef uint8_t uint8; 70 | typedef uint32_t uint32; 71 | typedef uint64_t uint64; 72 | typedef std::pair uint128; 73 | 74 | inline uint64 Uint128Low64(const uint128& x) { return x.first; } 75 | inline uint64 Uint128High64(const uint128& x) { return x.second; } 76 | 77 | // Hash function for a byte array. 78 | uint64 CityHash64(const char *buf, size_t len); 79 | 80 | // Hash function for a byte array. For convenience, a 64-bit seed is also 81 | // hashed into the result. 82 | uint64 CityHash64WithSeed(const char *buf, size_t len, uint64 seed); 83 | 84 | // Hash function for a byte array. For convenience, two seeds are also 85 | // hashed into the result. 86 | uint64 CityHash64WithSeeds(const char *buf, size_t len, 87 | uint64 seed0, uint64 seed1); 88 | 89 | // Hash function for a byte array. 90 | uint128 CityHash128(const char *s, size_t len); 91 | 92 | // Hash function for a byte array. For convenience, a 128-bit seed is also 93 | // hashed into the result. 94 | uint128 CityHash128WithSeed(const char *s, size_t len, uint128 seed); 95 | 96 | // Hash function for a byte array. Most useful in 32-bit binaries. 97 | uint32 CityHash32(const char *buf, size_t len); 98 | 99 | // Hash 128 input bits down to 64 bits of output. 100 | // This is intended to be a reasonably good hash function. 101 | inline uint64 Hash128to64(const uint128& x) { 102 | // Murmur-inspired hashing. 103 | const uint64 kMul = 0x9ddfea08eb382d69ULL; 104 | uint64 a = (Uint128Low64(x) ^ Uint128High64(x)) * kMul; 105 | a ^= (a >> 47); 106 | uint64 b = (Uint128High64(x) ^ a) * kMul; 107 | b ^= (b >> 47); 108 | b *= kMul; 109 | return b; 110 | } 111 | 112 | #endif // CITY_HASH_H_ 113 | -------------------------------------------------------------------------------- /city_hash.h: -------------------------------------------------------------------------------- 1 | //------------------------------ city_hash.h ----------------------------------- 2 | // 3 | // This software is in the public domain. The only restriction on its use is 4 | // that no one can remove it from the public domain by claiming ownership of it, 5 | // including the original authors. 6 | // 7 | // There is no warranty of correctness on the software contained herein. Use 8 | // at your own risk. 9 | // 10 | //------------------------------------------------------------------------------ 11 | 12 | #ifndef CITY_HASH_H 13 | #define CITY_HASH_H 14 | 15 | #include 16 | #include 17 | #include "city.h" 18 | 19 | // namespace acme is used to demonstrate example code. It is not proposed. 20 | 21 | namespace acme 22 | { 23 | 24 | class city 25 | { 26 | std::vector buf_; 27 | public: 28 | using result_type = std::size_t; 29 | 30 | void 31 | operator()(void const* key, std::size_t len) noexcept 32 | { 33 | char const* p = static_cast(key); 34 | char const* const e = p + len; 35 | for (; p < e; ++p) 36 | buf_.push_back(*p); 37 | } 38 | 39 | explicit 40 | operator std::size_t() noexcept 41 | { 42 | return CityHash64(buf_.data(), buf_.size()); 43 | } 44 | }; 45 | 46 | } // acme 47 | 48 | #endif // CITY_HASH_H 49 | -------------------------------------------------------------------------------- /config.h: -------------------------------------------------------------------------------- 1 | /* config.h. Generated from config.h.in by configure. */ 2 | /* config.h.in. Generated from configure.ac by autoheader. */ 3 | 4 | /* Define if building universal (internal helper macro) */ 5 | /* #undef AC_APPLE_UNIVERSAL_BUILD */ 6 | 7 | /* Define to 1 if the compiler supports __builtin_expect. */ 8 | #define HAVE_BUILTIN_EXPECT 1 9 | 10 | /* Define to 1 if you have the header file. */ 11 | #define HAVE_DLFCN_H 1 12 | 13 | /* Define to 1 if you have the header file. */ 14 | #define HAVE_INTTYPES_H 1 15 | 16 | /* Define to 1 if you have the header file. */ 17 | #define HAVE_MEMORY_H 1 18 | 19 | /* Define to 1 if you have the header file. */ 20 | #define HAVE_STDINT_H 1 21 | 22 | /* Define to 1 if you have the header file. */ 23 | #define HAVE_STDLIB_H 1 24 | 25 | /* Define to 1 if you have the header file. */ 26 | #define HAVE_STRINGS_H 1 27 | 28 | /* Define to 1 if you have the header file. */ 29 | #define HAVE_STRING_H 1 30 | 31 | /* Define to 1 if you have the header file. */ 32 | #define HAVE_SYS_STAT_H 1 33 | 34 | /* Define to 1 if you have the header file. */ 35 | #define HAVE_SYS_TYPES_H 1 36 | 37 | /* Define to 1 if you have the header file. */ 38 | #define HAVE_UNISTD_H 1 39 | 40 | /* Define to the sub-directory in which libtool stores uninstalled libraries. 41 | */ 42 | #define LT_OBJDIR ".libs/" 43 | 44 | /* Define to the address where bug reports for this package should be sent. */ 45 | #define PACKAGE_BUGREPORT "cityhash-discuss@googlegroups.com" 46 | 47 | /* Define to the full name of this package. */ 48 | #define PACKAGE_NAME "CityHash" 49 | 50 | /* Define to the full name and version of this package. */ 51 | #define PACKAGE_STRING "CityHash 1.1.1" 52 | 53 | /* Define to the one symbol short name of this package. */ 54 | #define PACKAGE_TARNAME "cityhash" 55 | 56 | /* Define to the home page for this package. */ 57 | #define PACKAGE_URL "" 58 | 59 | /* Define to the version of this package. */ 60 | #define PACKAGE_VERSION "1.1.1" 61 | 62 | /* Define to 1 if you have the ANSI C header files. */ 63 | #define STDC_HEADERS 1 64 | 65 | /* Define WORDS_BIGENDIAN to 1 if your processor stores words with the most 66 | significant byte first (like Motorola and SPARC, unlike Intel). */ 67 | #if defined AC_APPLE_UNIVERSAL_BUILD 68 | # if defined __BIG_ENDIAN__ 69 | # define WORDS_BIGENDIAN 1 70 | # endif 71 | #else 72 | # ifndef WORDS_BIGENDIAN 73 | /* # undef WORDS_BIGENDIAN */ 74 | # endif 75 | #endif 76 | 77 | /* Define for Solaris 2.5.1 so the uint32_t typedef from , 78 | , or is not used. If the typedef were allowed, the 79 | #define below would cause a syntax error. */ 80 | /* #undef _UINT32_T */ 81 | 82 | /* Define for Solaris 2.5.1 so the uint64_t typedef from , 83 | , or is not used. If the typedef were allowed, the 84 | #define below would cause a syntax error. */ 85 | /* #undef _UINT64_T */ 86 | 87 | /* Define for Solaris 2.5.1 so the uint8_t typedef from , 88 | , or is not used. If the typedef were allowed, the 89 | #define below would cause a syntax error. */ 90 | /* #undef _UINT8_T */ 91 | 92 | /* Define to `__inline__' or `__inline' if that's what the C compiler 93 | calls it, or to nothing if 'inline' is not supported under any name. */ 94 | #ifndef __cplusplus 95 | /* #undef inline */ 96 | #endif 97 | 98 | /* Define to `unsigned int' if does not define. */ 99 | /* #undef size_t */ 100 | 101 | /* Define to `int' if does not define. */ 102 | /* #undef ssize_t */ 103 | 104 | /* Define to the type of an unsigned integer type of width exactly 32 bits if 105 | such a type exists and the standard includes do not define it. */ 106 | /* #undef uint32_t */ 107 | 108 | /* Define to the type of an unsigned integer type of width exactly 64 bits if 109 | such a type exists and the standard includes do not define it. */ 110 | /* #undef uint64_t */ 111 | 112 | /* Define to the type of an unsigned integer type of width exactly 8 bits if 113 | such a type exists and the standard includes do not define it. */ 114 | /* #undef uint8_t */ 115 | -------------------------------------------------------------------------------- /dates.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------- dates.cpp ------------------------------------ 2 | // 3 | // This software is in the public domain. The only restriction on its use is 4 | // that no one can remove it from the public domain by claiming ownership of it, 5 | // including the original authors. 6 | // 7 | // There is no warranty of correctness on the software contained herein. Use 8 | // at your own risk. 9 | // 10 | //------------------------------------------------------------------------------ 11 | 12 | // The proposal 13 | #include "hash_append.h" 14 | 15 | // Example Hashers 16 | #include "siphash.h" 17 | #include "fnv1a.h" 18 | #include "spooky.h" 19 | #include "city_hash.h" 20 | 21 | #include "hash_test.h" 22 | 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | 33 | #include "../llvm/include/llvm/ADT/Hashing.h" 34 | 35 | template 36 | inline 37 | constexpr 38 | bool 39 | is_leap(Int y) noexcept 40 | { 41 | return y % 4 == 0 && (y % 100 != 0 || y % 400 == 0); 42 | } 43 | 44 | inline 45 | constexpr 46 | unsigned 47 | last_day_of_month_common_year(unsigned m) noexcept 48 | { 49 | constexpr unsigned char a[] = {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}; 50 | return a[m-1]; 51 | } 52 | 53 | template 54 | inline 55 | constexpr 56 | unsigned 57 | last_day_of_month(Int y, unsigned m) noexcept 58 | { 59 | return m != 2 || !is_leap(y) ? last_day_of_month_common_year(m) : 29u; 60 | } 61 | 62 | int 63 | main() 64 | { 65 | typedef std::chrono::duration secs; 66 | std::vector hashes; 67 | // xstd::uhash h; 68 | auto t0 = std::chrono::high_resolution_clock::now(); 69 | for (short y = 1914; y < 2014; ++y) 70 | { 71 | for (unsigned char m = 1; m <= 12; ++m) 72 | { 73 | unsigned char l = last_day_of_month(y, m); 74 | for (unsigned char d = 1; d <= l; ++d) 75 | { 76 | auto date = std::make_tuple(y, m, d); 77 | #if 1 78 | // hashes.push_back(llvm::hash_combine(llvm::hash_value(std::get<0>(date)), 79 | // llvm::hash_value(std::get<1>(date)), 80 | // llvm::hash_value(std::get<2>(date)))); 81 | #elif 0 82 | hashes.push_back(h(date)); 83 | #endif 84 | } 85 | } 86 | } 87 | auto t1 = std::chrono::high_resolution_clock::now(); 88 | std::cout << secs(t1-t0).count() << " s\n"; 89 | std::cout << test2(hashes) << '\n'; 90 | std::cout << test4(hashes) << '\n'; 91 | std::cout << test5(hashes) << '\n'; 92 | } 93 | 94 | // llvm 95 | // 0.00127408 s 96 | // 0 97 | // 0.00214465 98 | // 7 99 | 100 | // siphash 101 | // 0.00152026 s 102 | // 0 103 | // 0.0021629 104 | // 7 105 | 106 | // fnv1a 107 | // 0.00068684 s 108 | // 0 109 | // -0.0923842 110 | // 4 111 | 112 | // spooky 113 | // 0.00157705 s 114 | // 0 115 | // -0.00161533 116 | // 7 117 | -------------------------------------------------------------------------------- /debug_hash.h: -------------------------------------------------------------------------------- 1 | //----------------------------- debug_hash.h ----------------------------------- 2 | // 3 | // This software is in the public domain. The only restriction on its use is 4 | // that no one can remove it from the public domain by claiming ownership of it, 5 | // including the original authors. 6 | // 7 | // There is no warranty of correctness on the software contained herein. Use 8 | // at your own risk. 9 | // 10 | //------------------------------------------------------------------------------ 11 | 12 | #ifndef DEBUG_HASH_H 13 | #define DEBUG_HASH_H 14 | 15 | #include 16 | #include 17 | #include 18 | #include 19 | 20 | // namespace acme is used to demonstrate example code. It is not proposed. 21 | 22 | namespace acme 23 | { 24 | 25 | class debugHasher 26 | { 27 | std::vector buf_; 28 | public: 29 | using result_type = std::size_t; 30 | 31 | void 32 | operator()(void const* key, std::size_t len) noexcept 33 | { 34 | unsigned char const* p = static_cast(key); 35 | unsigned char const* const e = p + len; 36 | for (; p < e; ++p) 37 | buf_.push_back(*p); 38 | } 39 | 40 | explicit 41 | operator std::size_t() noexcept 42 | { 43 | std::cout << std::hex; 44 | std::cout << std::setfill('0'); 45 | unsigned int n = 0; 46 | for (auto c : buf_) 47 | { 48 | std::cout << std::setw(2) << (unsigned)c << ' '; 49 | if (++n == 16) 50 | { 51 | std::cout << '\n'; 52 | n = 0; 53 | } 54 | } 55 | std::cout << '\n'; 56 | std::cout << std::dec; 57 | std::cout << std::setfill(' '); 58 | return buf_.size(); 59 | } 60 | }; 61 | 62 | } // acme 63 | 64 | #endif // DEBUG_HASH_H 65 | -------------------------------------------------------------------------------- /endian.h: -------------------------------------------------------------------------------- 1 | //-------------------------------- endian.h ------------------------------------ 2 | // 3 | // This software is in the public domain. The only restriction on its use is 4 | // that no one can remove it from the public domain by claiming ownership of it, 5 | // including the original authors. 6 | // 7 | // There is no warranty of correctness on the software contained herein. Use 8 | // at your own risk. 9 | // 10 | //------------------------------------------------------------------------------ 11 | 12 | #ifndef ENDIAN_H 13 | #define ENDIAN_H 14 | 15 | namespace xstd 16 | { 17 | 18 | // endian provides answers to the following questions: 19 | // 1. Is this system big or little endian? 20 | // 2. Is the "desired endian" of some class or function the same as the 21 | // native endian? 22 | enum class endian 23 | { 24 | native = __BYTE_ORDER__, 25 | little = __ORDER_LITTLE_ENDIAN__, 26 | big = __ORDER_BIG_ENDIAN__ 27 | }; 28 | 29 | static_assert(endian::native == endian::little || 30 | endian::native == endian::big, 31 | "endian::native shall be one of endian::little or endian::big"); 32 | 33 | static_assert(endian::big != endian::little, 34 | "endian::big and endian::little shall have different values"); 35 | 36 | } // xstd 37 | 38 | #endif // ENDIAN_H 39 | -------------------------------------------------------------------------------- /example.cpp: -------------------------------------------------------------------------------- 1 | #include "hash_append.h" 2 | #include 3 | 4 | class MyHashAlgorithm 5 | { 6 | public: 7 | static constexpr xstd::endian endian = xstd::endian::native; 8 | 9 | using result_type = std::size_t; 10 | 11 | void 12 | operator()(void const* s, std::size_t length) noexcept 13 | { 14 | if (length >= 4 && length <= 8) 15 | return hash_4to8_bytes(s, length); 16 | if (length > 8 && length <= 16) 17 | return hash_9to16_bytes(s, length); 18 | if (length > 16 && length <= 32) 19 | return hash_17to32_bytes(s, length); 20 | if (length > 32) 21 | return hash_33to64_bytes(s, length); 22 | if (length != 0) 23 | return hash_1to3_bytes(s, length); 24 | } 25 | 26 | explicit 27 | operator std::size_t() noexcept; 28 | 29 | private: 30 | void hash_1to3_bytes(void const* s, std::size_t length) noexcept; 31 | void hash_4to8_bytes(void const* s, std::size_t length) noexcept; 32 | void hash_9to16_bytes(void const* s, std::size_t length) noexcept; 33 | void hash_17to32_bytes(void const* s, std::size_t length) noexcept; 34 | void hash_33to64_bytes(void const* s, std::size_t length) noexcept; 35 | }; 36 | 37 | std::size_t 38 | test(int i) 39 | { 40 | return xstd::uhash{}(i); 41 | } 42 | -------------------------------------------------------------------------------- /fnv1a.h: -------------------------------------------------------------------------------- 1 | //-------------------------------- fnv1a.h ------------------------------------- 2 | // 3 | // This software is in the public domain. The only restriction on its use is 4 | // that no one can remove it from the public domain by claiming ownership of it, 5 | // including the original authors. 6 | // 7 | // There is no warranty of correctness on the software contained herein. Use 8 | // at your own risk. 9 | // 10 | //------------------------------------------------------------------------------ 11 | 12 | #ifndef FNV1A_H 13 | #define FNV1A_H 14 | 15 | #include "endian.h" 16 | #include 17 | 18 | // namespace acme is used to demonstrate example code. It is not proposed. 19 | 20 | namespace acme 21 | { 22 | 23 | class fnv1a 24 | { 25 | std::size_t state_ = 14695981039346656037u; 26 | public: 27 | 28 | static constexpr xstd::endian endian = xstd::endian::native; 29 | using result_type = std::size_t; 30 | 31 | void 32 | operator()(void const* key, std::size_t len) noexcept 33 | { 34 | unsigned char const* p = static_cast(key); 35 | unsigned char const* const e = p + len; 36 | for (; p < e; ++p) 37 | state_ = (state_ ^ *p) * 1099511628211u; 38 | } 39 | 40 | explicit 41 | operator std::size_t() noexcept 42 | { 43 | return state_; 44 | } 45 | }; 46 | 47 | } // acme 48 | 49 | #endif // FNV1A_H 50 | -------------------------------------------------------------------------------- /hash_adaptors.h: -------------------------------------------------------------------------------- 1 | //---------------------------- hash_adaptors.h --------------------------------- 2 | // 3 | // This software is in the public domain. The only restriction on its use is 4 | // that no one can remove it from the public domain by claiming ownership of it, 5 | // including the original authors. 6 | // 7 | // There is no warranty of correctness on the software contained herein. Use 8 | // at your own risk. 9 | // 10 | //------------------------------------------------------------------------------ 11 | 12 | #ifndef HASH_ADAPTORS_H 13 | #define HASH_ADAPTORS_H 14 | 15 | #include 16 | 17 | // namespace acme is used to demonstrate example code. It is not proposed. 18 | 19 | namespace acme 20 | { 21 | 22 | // Adapt another hasher to append the total length of the message 23 | // (not including the length of the length) to the message 24 | template 25 | class postfix_length 26 | { 27 | std::size_t length_ = 0; 28 | Hasher hasher_; 29 | 30 | public: 31 | using result_type = typename Hasher::result_type; 32 | 33 | postfix_length() = default; 34 | explicit postfix_length(Hasher const& hasher) 35 | : hasher_(hasher) 36 | {} 37 | 38 | void 39 | operator()(void const* key, std::size_t len) noexcept 40 | { 41 | length_ += len; 42 | hasher_(key, len); 43 | } 44 | 45 | explicit 46 | operator result_type() noexcept 47 | { 48 | using xstd::hash_append; 49 | hash_append(hasher_, length_); 50 | return static_cast(hasher_); 51 | } 52 | }; 53 | 54 | template 55 | class type_erased_hasher 56 | { 57 | public: 58 | using result_type = ResultType; 59 | 60 | private: 61 | using function = std::function; 62 | 63 | function hasher_; 64 | result_type (*convert_)(function&); 65 | 66 | public: 67 | template {} && 71 | std::is_same::result_type, 72 | result_type>{} 73 | > 74 | > 75 | explicit 76 | type_erased_hasher(Hasher&& h) 77 | : hasher_(std::forward(h)) 78 | , convert_(convert>) 79 | { 80 | } 81 | 82 | void 83 | operator()(void const* key, std::size_t len) 84 | { 85 | hasher_(key, len); 86 | } 87 | 88 | explicit 89 | operator result_type() noexcept 90 | { 91 | return convert_(hasher_); 92 | } 93 | 94 | template 95 | T* 96 | target() noexcept 97 | { 98 | return hasher_.target(); 99 | } 100 | 101 | private: 102 | template 103 | static 104 | result_type 105 | convert(function& f) noexcept 106 | { 107 | return static_cast(*f.target());; 108 | } 109 | }; 110 | 111 | } // acme 112 | 113 | #endif // HASH_ADAPTORS_H 114 | -------------------------------------------------------------------------------- /hash_append.h: -------------------------------------------------------------------------------- 1 | //----------------------------- hash_append.h ---------------------------------- 2 | // 3 | // This software is in the public domain. The only restriction on its use is 4 | // that no one can remove it from the public domain by claiming ownership of it, 5 | // including the original authors. 6 | // 7 | // There is no warranty of correctness on the software contained herein. Use 8 | // at your own risk. 9 | // 10 | //------------------------------------------------------------------------------ 11 | 12 | #ifndef HASH_APPEND 13 | #define HASH_APPEND 14 | 15 | #include "endian.h" 16 | #include 17 | #include // memmove 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | 29 | #include "siphash.h" // the current default hasher 30 | 31 | // Everything in namespace xstd, excluding those items in xstd::detail, 32 | // is proposed. 33 | 34 | // C++14 is assumed below because std::index_sequence_for makes hash_append 35 | // for tuple just so easy. So in for a penny, in for a pound... 36 | 37 | namespace xstd 38 | { 39 | 40 | namespace detail 41 | { 42 | 43 | // Standards-worthy utilities, but not for this proposal... 44 | 45 | template struct static_and; 46 | 47 | template 48 | struct static_and 49 | : public std::integral_constant{}> 50 | { 51 | }; 52 | 53 | template <> 54 | struct static_and<> 55 | : public std::true_type 56 | { 57 | }; 58 | 59 | template struct static_sum; 60 | 61 | template 62 | struct static_sum 63 | : public std::integral_constant{}> 64 | { 65 | }; 66 | 67 | template <> 68 | struct static_sum<> 69 | : public std::integral_constant 70 | { 71 | }; 72 | 73 | template 74 | constexpr 75 | inline 76 | void 77 | reverse_bytes(T& t) 78 | { 79 | unsigned char* bytes = static_cast(std::memmove(std::addressof(t), 80 | std::addressof(t), 81 | sizeof(T))); 82 | for (unsigned i = 0; i < sizeof(T)/2; ++i) 83 | std::swap(bytes[i], bytes[sizeof(T)-1-i]); 84 | } 85 | 86 | template 87 | constexpr 88 | inline 89 | void 90 | maybe_reverse_bytes(T&, std::false_type) 91 | { 92 | } 93 | 94 | template 95 | constexpr 96 | inline 97 | void 98 | maybe_reverse_bytes(T& t, std::true_type) 99 | { 100 | reverse_bytes(t); 101 | } 102 | 103 | template 104 | constexpr 105 | inline 106 | void 107 | maybe_reverse_bytes(T& t, Hasher&) 108 | { 109 | maybe_reverse_bytes(t, std::integral_constant{}); 111 | } 112 | 113 | } // detail 114 | 115 | // is_uniquely_represented 116 | 117 | // A type T is contiguously hashable if for all combinations of two values of 118 | // a type, say x and y, if x == y, then it must also be true that 119 | // memcmp(addressof(x), addressof(y), sizeof(T)) == 0. I.e. if x == y, 120 | // then x and y have the same bit pattern representation. 121 | 122 | template 123 | struct is_uniquely_represented 124 | : public std::integral_constant{} || 125 | std::is_enum {} || 126 | std::is_pointer{}> 127 | {}; 128 | 129 | template 130 | struct is_uniquely_represented 131 | : public is_uniquely_represented 132 | {}; 133 | 134 | template 135 | struct is_uniquely_represented 136 | : public is_uniquely_represented 137 | {}; 138 | 139 | template 140 | struct is_uniquely_represented 141 | : public is_uniquely_represented 142 | {}; 143 | 144 | // is_uniquely_represented> 145 | 146 | template 147 | struct is_uniquely_represented> 148 | : public std::integral_constant{} && 149 | is_uniquely_represented{} && 150 | sizeof(T) + sizeof(U) == sizeof(std::pair)> 151 | { 152 | }; 153 | 154 | // is_uniquely_represented> 155 | 156 | template 157 | struct is_uniquely_represented> 158 | : public std::integral_constant{}...>{} && 160 | detail::static_sum{} == sizeof(std::tuple)> 161 | { 162 | }; 163 | 164 | // is_uniquely_represented 165 | 166 | template 167 | struct is_uniquely_represented 168 | : public is_uniquely_represented 169 | { 170 | }; 171 | 172 | // is_uniquely_represented> 173 | 174 | template 175 | struct is_uniquely_represented> 176 | : public std::integral_constant{} && 177 | sizeof(T)*N == sizeof(std::array)> 178 | { 179 | }; 180 | 181 | template 182 | struct is_contiguously_hashable 183 | : public std::integral_constant{} && 184 | (sizeof(T) == 1 || 185 | HashAlgorithm::endian == endian::native)> 186 | {}; 187 | 188 | template 189 | struct is_contiguously_hashable 190 | : public std::integral_constant{} && 191 | (sizeof(T) == 1 || 192 | HashAlgorithm::endian == endian::native)> 193 | {}; 194 | 195 | // template 196 | // void 197 | // hash_append(Hasher& h, T const& t); 198 | // 199 | // Each type to be hashed must either be contiguously hashable, or overload 200 | // hash_append to expose its hashable bits to a Hasher. 201 | 202 | // scalars 203 | 204 | template 205 | inline 206 | std::enable_if_t 207 | < 208 | is_contiguously_hashable{} 209 | > 210 | hash_append(Hasher& h, T const& t) noexcept 211 | { 212 | h(std::addressof(t), sizeof(t)); 213 | } 214 | 215 | template 216 | inline 217 | std::enable_if_t 218 | < 219 | !is_contiguously_hashable{} && 220 | (std::is_integral{} || std::is_pointer{} || std::is_enum{}) 221 | > 222 | hash_append(Hasher& h, T t) noexcept 223 | { 224 | detail::reverse_bytes(t); 225 | h(std::addressof(t), sizeof(t)); 226 | } 227 | 228 | template 229 | inline 230 | std::enable_if_t 231 | < 232 | std::is_floating_point{} 233 | > 234 | hash_append(Hasher& h, T t) noexcept 235 | { 236 | if (t == 0) 237 | t = 0; 238 | detail::maybe_reverse_bytes(t, h); 239 | h(&t, sizeof(t)); 240 | } 241 | 242 | template 243 | inline 244 | void 245 | hash_append(Hasher& h, std::nullptr_t) noexcept 246 | { 247 | void const* p = nullptr; 248 | detail::maybe_reverse_bytes(p, h); 249 | h(&p, sizeof(p)); 250 | } 251 | 252 | // Forward declarations for ADL purposes 253 | 254 | template 255 | std::enable_if_t 256 | < 257 | !is_contiguously_hashable{} 258 | > 259 | hash_append(Hasher& h, T (&a)[N]) noexcept; 260 | 261 | template 262 | std::enable_if_t 263 | < 264 | !is_contiguously_hashable{} 265 | > 266 | hash_append(Hasher& h, std::basic_string const& s) noexcept; 267 | 268 | template 269 | std::enable_if_t 270 | < 271 | is_contiguously_hashable{} 272 | > 273 | hash_append(Hasher& h, std::basic_string const& s) noexcept; 274 | 275 | template 276 | std::enable_if_t 277 | < 278 | !is_contiguously_hashable, Hasher>{} 279 | > 280 | hash_append (Hasher& h, std::pair const& p) noexcept; 281 | 282 | template 283 | std::enable_if_t 284 | < 285 | !is_contiguously_hashable{} 286 | > 287 | hash_append(Hasher& h, std::vector const& v) noexcept; 288 | 289 | template 290 | std::enable_if_t 291 | < 292 | is_contiguously_hashable{} 293 | > 294 | hash_append(Hasher& h, std::vector const& v) noexcept; 295 | 296 | template 297 | std::enable_if_t 298 | < 299 | !is_contiguously_hashable, Hasher>{} 300 | > 301 | hash_append(Hasher& h, std::array const& a) noexcept; 302 | 303 | template 304 | std::enable_if_t 305 | < 306 | !is_contiguously_hashable, Hasher>{} 307 | > 308 | hash_append(Hasher& h, std::tuple const& t) noexcept; 309 | 310 | template 311 | void 312 | hash_append(Hasher& h, std::unordered_map const& m); 313 | 314 | template 315 | void 316 | hash_append(Hasher& h, std::unordered_set const& s); 317 | 318 | template 319 | void 320 | hash_append (Hasher& h, T0 const& t0, T1 const& t1, T const& ...t) noexcept; 321 | 322 | // c-array 323 | 324 | template 325 | std::enable_if_t 326 | < 327 | !is_contiguously_hashable{} 328 | > 329 | hash_append(Hasher& h, T (&a)[N]) noexcept 330 | { 331 | for (auto const& t : a) 332 | hash_append(h, t); 333 | } 334 | 335 | // basic_string 336 | 337 | template 338 | inline 339 | std::enable_if_t 340 | < 341 | !is_contiguously_hashable{} 342 | > 343 | hash_append(Hasher& h, std::basic_string const& s) noexcept 344 | { 345 | for (auto c : s) 346 | hash_append(h, c); 347 | hash_append(h, s.size()); 348 | } 349 | 350 | template 351 | inline 352 | std::enable_if_t 353 | < 354 | is_contiguously_hashable{} 355 | > 356 | hash_append(Hasher& h, std::basic_string const& s) noexcept 357 | { 358 | h(s.data(), s.size()*sizeof(CharT)); 359 | hash_append(h, s.size()); 360 | } 361 | 362 | // pair 363 | 364 | template 365 | inline 366 | std::enable_if_t 367 | < 368 | !is_contiguously_hashable, Hasher>{} 369 | > 370 | hash_append (Hasher& h, std::pair const& p) noexcept 371 | { 372 | hash_append (h, p.first, p.second); 373 | } 374 | 375 | // vector 376 | 377 | template 378 | inline 379 | std::enable_if_t 380 | < 381 | !is_contiguously_hashable{} 382 | > 383 | hash_append(Hasher& h, std::vector const& v) noexcept 384 | { 385 | for (auto const& t : v) 386 | hash_append(h, t); 387 | hash_append(h, v.size()); 388 | } 389 | 390 | template 391 | inline 392 | std::enable_if_t 393 | < 394 | is_contiguously_hashable{} 395 | > 396 | hash_append(Hasher& h, std::vector const& v) noexcept 397 | { 398 | h(v.data(), v.size()*sizeof(T)); 399 | hash_append(h, v.size()); 400 | } 401 | 402 | // array 403 | 404 | template 405 | std::enable_if_t 406 | < 407 | !is_contiguously_hashable, Hasher>{} 408 | > 409 | hash_append(Hasher& h, std::array const& a) noexcept 410 | { 411 | for (auto const& t : a) 412 | hash_append(h, t); 413 | } 414 | 415 | // tuple 416 | 417 | namespace detail 418 | { 419 | 420 | inline 421 | void 422 | for_each_item(...) noexcept 423 | { 424 | } 425 | 426 | template 427 | inline 428 | int 429 | hash_one(Hasher& h, T const& t) noexcept 430 | { 431 | hash_append(h, t); 432 | return 0; 433 | } 434 | 435 | template 436 | inline 437 | void 438 | tuple_hash(Hasher& h, std::tuple const& t, std::index_sequence) noexcept 439 | { 440 | for_each_item(hash_one(h, std::get(t))...); 441 | } 442 | 443 | } // detail 444 | 445 | template 446 | inline 447 | std::enable_if_t 448 | < 449 | !is_contiguously_hashable, Hasher>{} 450 | > 451 | hash_append(Hasher& h, std::tuple const& t) noexcept 452 | { 453 | detail::tuple_hash(h, t, std::index_sequence_for{}); 454 | } 455 | 456 | // variadic 457 | 458 | template 459 | inline 460 | void 461 | hash_append (Hasher& h, T0 const& t0, T1 const& t1, T const& ...t) noexcept 462 | { 463 | hash_append(h, t0); 464 | hash_append(h, t1, t...); 465 | } 466 | 467 | // error_code 468 | 469 | template 470 | inline 471 | void 472 | hash_append(HashAlgorithm& h, std::error_code const& ec) 473 | { 474 | hash_append(h, ec.value(), &ec.category()); 475 | } 476 | 477 | // uhash 478 | 479 | template 480 | struct uhash 481 | { 482 | using result_type = typename Hasher::result_type; 483 | 484 | template 485 | result_type 486 | operator()(T const& t) const noexcept 487 | { 488 | Hasher h; 489 | hash_append(h, t); 490 | return static_cast(h); 491 | } 492 | }; 493 | 494 | } // xstd 495 | 496 | #endif // HASH_APPEND 497 | -------------------------------------------------------------------------------- /hash_functors.cpp: -------------------------------------------------------------------------------- 1 | //--------------------------- hash_functors.cpp -------------------------------- 2 | // 3 | // This software is in the public domain. The only restriction on its use is 4 | // that no one can remove it from the public domain by claiming ownership of it, 5 | // including the original authors. 6 | // 7 | // There is no warranty of correctness on the software contained herein. Use 8 | // at your own risk. 9 | // 10 | //------------------------------------------------------------------------------ 11 | 12 | #include "hash_functors.h" 13 | 14 | #include 15 | 16 | // namespace acme is used to demonstrate example code. It is not proposed. 17 | 18 | namespace acme 19 | { 20 | 21 | namespace 22 | { 23 | 24 | std::tuple 25 | init_seeds() 26 | { 27 | std::mt19937_64 eng{std::random_device{}()}; 28 | return std::tuple{eng(), eng()}; 29 | } 30 | 31 | } // unnamed 32 | 33 | std::tuple 34 | get_process_seed() 35 | { 36 | static std::tuple seeds = init_seeds(); 37 | return seeds; 38 | } 39 | 40 | } // acme 41 | -------------------------------------------------------------------------------- /hash_functors.h: -------------------------------------------------------------------------------- 1 | //---------------------------- hash_functors.h --------------------------------- 2 | // 3 | // This software is in the public domain. The only restriction on its use is 4 | // that no one can remove it from the public domain by claiming ownership of it, 5 | // including the original authors. 6 | // 7 | // There is no warranty of correctness on the software contained herein. Use 8 | // at your own risk. 9 | // 10 | //------------------------------------------------------------------------------ 11 | 12 | #ifndef HASH_FUNCTORS_H 13 | #define HASH_FUNCTORS_H 14 | 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include "siphash.h" 20 | #include "hash_append.h" 21 | 22 | // namespace acme is used to demonstrate example code. It is not proposed. 23 | 24 | namespace acme 25 | { 26 | 27 | // A hasher that can be seeded with up to two seeds 28 | template 29 | class seeded_hash 30 | { 31 | private: 32 | std::size_t seed0_; 33 | std::size_t seed1_; 34 | public: 35 | using result_type = typename Hasher::result_type; 36 | 37 | explicit seeded_hash(std::size_t seed0 = 0, std::size_t seed1 = 0) 38 | : seed0_(seed0) 39 | , seed1_(seed1) 40 | {} 41 | 42 | template 43 | result_type 44 | operator()(T const& t) const noexcept 45 | { 46 | Hasher h(seed0_, seed1_); 47 | using xstd::hash_append; 48 | hash_append(h, t); 49 | return static_cast(h); 50 | } 51 | }; 52 | 53 | // A hasher that can be seeded, or defaults to a random seed 54 | template 55 | class randomly_seeded_hash 56 | { 57 | private: 58 | static std::mutex mut_s; 59 | static std::mt19937_64 rand_s; 60 | 61 | std::size_t seed0_; 62 | std::size_t seed1_; 63 | public: 64 | using result_type = typename Hasher::result_type; 65 | 66 | randomly_seeded_hash() 67 | { 68 | std::lock_guard _(mut_s); 69 | seed0_ = rand_s(); 70 | seed1_ = rand_s(); 71 | } 72 | 73 | explicit randomly_seeded_hash(std::size_t seed0, std::size_t seed1 = 0) 74 | : seed0_(seed0) 75 | , seed1_(seed1) 76 | {} 77 | 78 | template 79 | result_type 80 | operator()(T const& t) const noexcept 81 | { 82 | Hasher h(seed0_, seed1_); 83 | using xstd::hash_append; 84 | hash_append(h, t); 85 | return static_cast(h); 86 | } 87 | }; 88 | 89 | template 90 | std::mutex 91 | randomly_seeded_hash::mut_s; 92 | 93 | template 94 | std::mt19937_64 95 | randomly_seeded_hash::rand_s{std::random_device{}()}; 96 | 97 | // A hasher that can be seeded, or defaults to a random seed set per process 98 | 99 | std::tuple 100 | get_process_seed(); 101 | 102 | template 103 | class process_seeded_hash 104 | { 105 | public: 106 | using result_type = typename Hasher::result_type; 107 | 108 | template 109 | result_type 110 | operator()(T const& t) const noexcept 111 | { 112 | std::uint64_t seed0; 113 | std::uint64_t seed1; 114 | std::tie(seed0, seed1) = get_process_seed(); 115 | Hasher h(seed0, seed1); 116 | using xstd::hash_append; 117 | hash_append(h, t); 118 | return static_cast(h); 119 | } 120 | }; 121 | 122 | // A hasher that prepends salt 123 | template 124 | class salted_hash 125 | { 126 | private: 127 | std::size_t salt_; 128 | public: 129 | using result_type = typename Hasher::result_type; 130 | 131 | explicit salted_hash(std::size_t salt) 132 | : salt_(salt) 133 | {} 134 | 135 | template 136 | result_type 137 | operator()(T const& t) const noexcept 138 | { 139 | Hasher h; 140 | using xstd::hash_append; 141 | hash_append(h, salt_, t); 142 | return static_cast(h); 143 | } 144 | }; 145 | 146 | // A hasher that adds padding 147 | template 148 | class padded_hash 149 | { 150 | private: 151 | std::string prefix_; 152 | std::string postfix_; 153 | public: 154 | using result_type = typename Hasher::result_type; 155 | 156 | padded_hash(std::string const& prefix, std::string const& postfix) 157 | : prefix_(prefix) 158 | , postfix_(postfix) 159 | {} 160 | 161 | template 162 | result_type 163 | operator()(T const& t) const noexcept 164 | { 165 | Hasher h; 166 | using xstd::hash_append; 167 | hash_append(h, prefix_ + " GG ", t, " RR " + postfix_); 168 | return static_cast(h); 169 | } 170 | }; 171 | 172 | } // acme 173 | 174 | #endif // HASH_FUNCTORS_H 175 | -------------------------------------------------------------------------------- /hash_test.cpp: -------------------------------------------------------------------------------- 1 | //----------------------------- hash_test.cpp ---------------------------------- 2 | // 3 | // This software is in the public domain. The only restriction on its use is 4 | // that no one can remove it from the public domain by claiming ownership of it, 5 | // including the original authors. 6 | // 7 | // There is no warranty of correctness on the software contained herein. Use 8 | // at your own risk. 9 | // 10 | //------------------------------------------------------------------------------ 11 | 12 | #include "hash_test.h" 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | 21 | template 22 | inline 23 | T 24 | sqr(T t) 25 | { 26 | return t*t; 27 | } 28 | 29 | // what fraction can be hashed without collision? 30 | float 31 | test2 (std::vector const& hashes) 32 | { 33 | std::set s(hashes.begin(), hashes.end()); 34 | return 1 - static_cast(s.size()) / hashes.size(); 35 | } 36 | 37 | float 38 | test1(std::vector const& hashes) 39 | { 40 | const unsigned nbits = CHAR_BIT * sizeof(std::size_t); 41 | const unsigned rows = nbits / 4; 42 | unsigned counts[rows][16] = {0}; 43 | for (auto h : hashes) 44 | { 45 | std::size_t mask = 0xF; 46 | for (unsigned i = 0; i < rows; ++i, mask <<= 4) 47 | counts[i][(h & mask) >> 4*i] += 1; 48 | } 49 | float mean_rows[rows] = {0}; 50 | float mean_cols[16] = {0}; 51 | for (unsigned i = 0; i < rows; ++i) 52 | { 53 | for (unsigned j = 0; j < 16; ++j) 54 | { 55 | mean_rows[i] += counts[i][j]; 56 | mean_cols[j] += counts[i][j]; 57 | } 58 | } 59 | for (unsigned i = 0; i < rows; ++i) 60 | mean_rows[i] /= 16; 61 | for (unsigned j = 0; j < 16; ++j) 62 | mean_cols[j] /= rows; 63 | // for (unsigned i = 0; i < rows; ++i) 64 | // { 65 | // for (unsigned j = 0; j < 16; ++j) 66 | // std::cout << counts[i][j] << ' '; 67 | // std::cout << '\n'; 68 | // } 69 | // std::cout << '\n'; 70 | // for (unsigned i = 0; i < rows; ++i) 71 | // std::cout << mean_rows[i] << ' '; 72 | // std::cout << '\n'; 73 | // for (unsigned j = 0; j < 16; ++j) 74 | // std::cout << mean_cols[j] << ' '; 75 | // std::cout << '\n'; 76 | std::pair dev[rows][16]; 77 | // std::cout << std::fixed; 78 | // std::cout << std::setprecision(2); 79 | for (unsigned i = 0; i < rows; ++i) 80 | { 81 | for (unsigned j = 0; j < 16; ++j) 82 | { 83 | dev[i][j].first = std::abs(counts[i][j] - mean_rows[i]) / mean_rows[i]; 84 | dev[i][j].second = std::abs(counts[i][j] - mean_cols[j]) / mean_cols[j]; 85 | } 86 | } 87 | float max_err = 0; 88 | for (unsigned i = 0; i < rows; ++i) 89 | { 90 | for (unsigned j = 0; j < 16; ++j) 91 | { 92 | // std::cout << '{' << dev[i][j].first << ", " << dev[i][j].second << "} "; 93 | if (max_err < dev[i][j].first) 94 | max_err = dev[i][j].first; 95 | if (max_err < dev[i][j].second) 96 | max_err = dev[i][j].second; 97 | } 98 | // std::cout << '\n'; 99 | } 100 | // std::cout << max_err << '\n'; 101 | return max_err; 102 | } 103 | 104 | template 105 | std::uint32_t 106 | window (T* blob, int start, int count ) 107 | { 108 | std::size_t const len = sizeof(T); 109 | static_assert((len & 3) == 0, ""); 110 | if(count == 0) 111 | return 0; 112 | int const nbits = len * CHAR_BIT; 113 | start %= nbits; 114 | int ndwords = len / 4; 115 | std::uint32_t const* k = static_cast(static_cast(blob)); 116 | int c = start & (32-1); 117 | int d = start / 32; 118 | if(c == 0) 119 | return (k[d] & ((1 << count) - 1)); 120 | int ia = (d + 1) % ndwords; 121 | int ib = (d + 0) % ndwords; 122 | std::uint32_t a = k[ia]; 123 | std::uint32_t b = k[ib]; 124 | std::uint32_t t = (a << (32-c)) | (b >> c); 125 | t &= ((1 << count)-1); 126 | return t; 127 | } 128 | 129 | double 130 | calcScore (const int* bins, const std::size_t bincount, const double k) 131 | { 132 | double const n = bincount; 133 | // compute rms^2 value 134 | double rms_sq = 0; 135 | for(std::size_t i = 0; i < bincount; ++i) 136 | rms_sq += sqr(bins[i]);; 137 | rms_sq /= n; 138 | // compute fill factor 139 | double const f = (sqr(k) - 1) / (n*rms_sq - k); 140 | // rescale to (0,1) with 0 = good, 1 = bad 141 | return 1 - (f / n); 142 | } 143 | 144 | namespace detail 145 | { 146 | 147 | inline 148 | char 149 | score2ascii (double n) 150 | { 151 | char c = static_cast(n); 152 | if (c == 0) 153 | c = '.'; 154 | else if (c > 9) 155 | c = 'X'; 156 | else 157 | c += '0'; 158 | return c; 159 | } 160 | 161 | inline 162 | double 163 | clip (double n, double min, double max) 164 | { 165 | if (n < min) 166 | return min; 167 | if (n > max) 168 | return max; 169 | return n; 170 | } 171 | 172 | } // detail 173 | 174 | float 175 | test3 (std::vector const& hashes) 176 | { 177 | int maxwidth = 20; 178 | // We need at least 5 keys per bin to reliably test distribution biases 179 | // down to 1%, so don't bother to test sparser distributions than that 180 | while (static_cast(hashes.size()) / (1 << maxwidth) < 5.0) 181 | maxwidth--; 182 | double worst = 0; 183 | int worstStart = -1; 184 | int worstWidth = -1; 185 | std::vector bins (1 << maxwidth); 186 | int const hashbits = sizeof(std::size_t) * CHAR_BIT; 187 | for (int start = 0; start < hashbits; ++start) 188 | { 189 | int width = maxwidth; 190 | bins.assign (1 << width, 0); 191 | for (std::size_t j = 0; j < hashes.size(); ++j) 192 | ++bins[window(&hashes[j], start, width)]; 193 | // Test the distribution, then fold the bins in half, 194 | // repeat until we're down to 256 bins 195 | while (bins.size() >= 256) 196 | { 197 | double score = calcScore(bins.data(), bins.size(), hashes.size()); 198 | if (score > worst) 199 | { 200 | worst = score; 201 | worstStart = start; 202 | worstWidth = width; 203 | } 204 | if (--width < 8) 205 | break; 206 | for (std::size_t i = 0, j = bins.size() / 2; j < bins.size(); ++i, ++j) 207 | bins[i] += bins[j]; 208 | bins.resize(bins.size() / 2); 209 | } 210 | } 211 | return worst; 212 | } 213 | 214 | float 215 | test4(std::vector const& hashes, double lf) 216 | { 217 | assert(lf > 0); 218 | std::vector b(static_cast(hashes.size() / lf), 0); 219 | for (auto x : hashes) 220 | b[x % b.size()]++; 221 | double c = 0; 222 | for (auto x : b) 223 | c += x*(x+1)/2.; 224 | if (hashes.size() != 0) 225 | c /= hashes.size(); 226 | return c / (lf/2 + 1) - 1; 227 | } 228 | 229 | float 230 | test5(std::vector const& hashes, double lf) 231 | { 232 | assert(lf > 0); 233 | std::vector b(static_cast(hashes.size() / lf), 0); 234 | for (auto x : hashes) 235 | b[x % b.size()]++; 236 | std::size_t m = 0; 237 | for (auto x : b) 238 | m = std::max(m, x); 239 | return m; 240 | } 241 | -------------------------------------------------------------------------------- /hash_test.h: -------------------------------------------------------------------------------- 1 | //------------------------------ hash_test.h ----------------------------------- 2 | // 3 | // This software is in the public domain. The only restriction on its use is 4 | // that no one can remove it from the public domain by claiming ownership of it, 5 | // including the original authors. 6 | // 7 | // There is no warranty of correctness on the software contained herein. Use 8 | // at your own risk. 9 | // 10 | //------------------------------------------------------------------------------ 11 | 12 | #ifndef HASH_TEST_H 13 | #define HASH_TEST_H 14 | 15 | #include 16 | 17 | float test1(std::vector const& hashes); 18 | float test2(std::vector const& hashes); 19 | float test3(std::vector const& hashes); 20 | float test4(std::vector const& hashess, double lf = 1.0); 21 | float test5(std::vector const& hashess, double lf = 1.0); 22 | 23 | #endif // HASH_TEST_H 24 | -------------------------------------------------------------------------------- /jenkins1.h: -------------------------------------------------------------------------------- 1 | //------------------------------- jenkins1.h ----------------------------------- 2 | // 3 | // This software is in the public domain. The only restriction on its use is 4 | // that no one can remove it from the public domain by claiming ownership of it, 5 | // including the original authors. 6 | // 7 | // There is no warranty of correctness on the software contained herein. Use 8 | // at your own risk. 9 | // 10 | //------------------------------------------------------------------------------ 11 | 12 | #ifndef JENKINS1_H 13 | #define JENKINS1_H 14 | 15 | #include "endian.h" 16 | #include 17 | 18 | // namespace acme is used to demonstrate example code. It is not proposed. 19 | 20 | namespace acme 21 | { 22 | 23 | class jenkins1 24 | { 25 | std::size_t state_ = 0; 26 | 27 | public: 28 | static constexpr xstd::endian endian = xstd::endian::native; 29 | using result_type = std::size_t; 30 | 31 | void 32 | operator()(void const* key, std::size_t len) noexcept 33 | { 34 | unsigned char const* p = static_cast(key); 35 | unsigned char const* const e = p + len; 36 | for (; p < e; ++p) 37 | { 38 | state_ += *p; 39 | state_ += state_ << 10; 40 | state_ ^= state_ >> 6; 41 | } 42 | } 43 | 44 | explicit 45 | operator std::size_t() noexcept 46 | { 47 | state_ += state_ << 3; 48 | state_ ^= state_ >> 11; 49 | state_ += state_ << 15; 50 | return state_; 51 | } 52 | }; 53 | 54 | } // acme 55 | 56 | #endif // JENKINS1_H 57 | -------------------------------------------------------------------------------- /m16.cpp: -------------------------------------------------------------------------------- 1 | //-------------------------------- m16.cpp ------------------------------------- 2 | // 3 | // This software is in the public domain. The only restriction on its use is 4 | // that no one can remove it from the public domain by claiming ownership of it, 5 | // including the original authors. 6 | // 7 | // There is no warranty of correctness on the software contained herein. Use 8 | // at your own risk. 9 | // 10 | //------------------------------------------------------------------------------ 11 | 12 | // The proposal 13 | #include "hash_append.h" 14 | 15 | // Example Hashers 16 | #include "siphash.h" 17 | #include "fnv1a.h" 18 | #include "city_hash.h" 19 | 20 | #include "hash_test.h" 21 | 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | 31 | #include "../llvm/include/llvm/ADT/Hashing.h" 32 | 33 | int 34 | main() 35 | { 36 | #if 0 37 | xstd::uhash h; 38 | #else 39 | std::hash h; 40 | #endif 41 | std::vector hashes; 42 | hashes.reserve(1048576); 43 | typedef std::chrono::duration secs; 44 | auto t0 = std::chrono::high_resolution_clock::now(); 45 | unsigned long long x = 0; 46 | for (unsigned i = 0; i < hashes.capacity(); ++i, x += 16) 47 | #if 1 48 | hashes.push_back(h(x)); 49 | #else 50 | hashes.push_back(llvm::hash_value(x)); 51 | #endif 52 | auto t1 = std::chrono::high_resolution_clock::now(); 53 | std::cout << secs(t1-t0).count() << " s\n"; 54 | std::cout << test4(hashes) << '\n'; 55 | std::cout << test5(hashes) << '\n'; 56 | } 57 | 58 | // llvm / N3333 59 | // 0.00994927 s 60 | // -0.000307719 61 | // 8 62 | // 63 | // xstd::uhash 64 | // 0.0331587 s 65 | // 0.000118891 66 | // 9 67 | // 68 | // xstd::uhash 69 | // 0.0142183 s 70 | // -0.161947 71 | // 3 72 | // 73 | // std::hash 74 | // 0.00767278 s 75 | // 4.66667 76 | // 16 77 | -------------------------------------------------------------------------------- /murmur2A.h: -------------------------------------------------------------------------------- 1 | //------------------------------- murmur2A.h ----------------------------------- 2 | // 3 | // This software is in the public domain. The only restriction on its use is 4 | // that no one can remove it from the public domain by claiming ownership of it, 5 | // including the original authors. 6 | // 7 | // There is no warranty of correctness on the software contained herein. Use 8 | // at your own risk. 9 | // 10 | //------------------------------------------------------------------------------ 11 | 12 | #ifndef MURMUR2A_H 13 | #define MURMUR2A_H 14 | 15 | #include 16 | 17 | // namespace acme is used to demonstrate example code. It is not proposed. 18 | 19 | namespace acme 20 | { 21 | 22 | class MurmurHash2A 23 | { 24 | unsigned int m_hash; 25 | unsigned int m_tail; 26 | unsigned int m_count; 27 | unsigned int m_size; 28 | 29 | public: 30 | using result_type = unsigned int; 31 | 32 | MurmurHash2A ( unsigned int seed = 0 ) noexcept 33 | : m_hash{seed} 34 | , m_tail{0} 35 | , m_count{0} 36 | , m_size{0} 37 | { 38 | } 39 | 40 | void 41 | operator() ( const void* p, std::size_t len ) noexcept 42 | { 43 | const unsigned char* data = static_cast(p); 44 | m_size += len; 45 | 46 | MixTail(data,len); 47 | 48 | while(len >= 4) 49 | { 50 | unsigned int k = *(unsigned int*)data; 51 | 52 | mmix(m_hash,k); 53 | 54 | data += 4; 55 | len -= 4; 56 | } 57 | 58 | MixTail(data,len); 59 | } 60 | 61 | explicit 62 | operator result_type () noexcept 63 | { 64 | mmix(m_hash,m_tail); 65 | mmix(m_hash,m_size); 66 | 67 | m_hash ^= m_hash >> 13; 68 | m_hash *= m; 69 | m_hash ^= m_hash >> 15; 70 | 71 | return m_hash; 72 | } 73 | 74 | private: 75 | 76 | static const unsigned int m = 0x5bd1e995; 77 | static const int r = 24; 78 | 79 | static 80 | void 81 | mmix(unsigned int& h, unsigned int& k) noexcept 82 | { 83 | k *= m; 84 | k ^= k >> r; 85 | k *= m; 86 | h *= m; 87 | h ^= k; 88 | } 89 | 90 | 91 | void 92 | MixTail ( const unsigned char * & data, std::size_t & len ) noexcept 93 | { 94 | while( len && ((len<4) || m_count) ) 95 | { 96 | m_tail |= (*data++) << (m_count * 8); 97 | 98 | m_count++; 99 | len--; 100 | 101 | if(m_count == 4) 102 | { 103 | mmix(m_hash,m_tail); 104 | m_tail = 0; 105 | m_count = 0; 106 | } 107 | } 108 | } 109 | }; 110 | 111 | } // acme 112 | 113 | #endif // MURMUR2A_H 114 | -------------------------------------------------------------------------------- /n3876.h: -------------------------------------------------------------------------------- 1 | //-------------------------------- n3876.h ------------------------------------- 2 | // 3 | // This software is in the public domain. The only restriction on its use is 4 | // that no one can remove it from the public domain by claiming ownership of it, 5 | // including the original authors. 6 | // 7 | // There is no warranty of correctness on the software contained herein. Use 8 | // at your own risk. 9 | // 10 | //------------------------------------------------------------------------------ 11 | 12 | #ifndef N3876_H 13 | #define N3876_H 14 | 15 | #include 16 | 17 | // This is an implementation of N3876 found at: 18 | // http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2014/n3876.pdf 19 | 20 | namespace n3876 21 | { 22 | 23 | inline 24 | void 25 | hash_combine (std::size_t&) 26 | { 27 | } 28 | 29 | template 30 | inline 31 | void 32 | hash_combine (std::size_t& seed, const T& val) 33 | { 34 | seed ^= std::hash{}(val) + 0x9e3779b9 + (seed<<6) + (seed>>2); 35 | } 36 | 37 | template 38 | inline 39 | void 40 | hash_combine (std::size_t& seed, const T& val, const Types&... args) 41 | { 42 | hash_combine(seed, val); 43 | hash_combine(seed, args...); 44 | } 45 | 46 | template 47 | inline 48 | std::size_t 49 | hash_val (const Types&... args) 50 | { 51 | std::size_t seed = 0; 52 | hash_combine(seed, args...); 53 | return seed; 54 | } 55 | 56 | } // n3876 57 | 58 | #endif // N3876_H 59 | -------------------------------------------------------------------------------- /pairii.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------- pairii.cpp ----------------------------------- 2 | // 3 | // This software is in the public domain. The only restriction on its use is 4 | // that no one can remove it from the public domain by claiming ownership of it, 5 | // including the original authors. 6 | // 7 | // There is no warranty of correctness on the software contained herein. Use 8 | // at your own risk. 9 | // 10 | //------------------------------------------------------------------------------ 11 | 12 | // The proposal 13 | #include "hash_append.h" 14 | 15 | // Example Hashers 16 | #include "siphash.h" 17 | #include "fnv1a.h" 18 | #include "city_hash.h" 19 | #include "spooky.h" 20 | 21 | #include "hash_test.h" 22 | 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | 33 | #include "n3876.h" 34 | #include "../llvm/include/llvm/ADT/Hashing.h" 35 | 36 | int 37 | main() 38 | { 39 | #if 0 40 | xstd::uhash h; 41 | #elif 1 42 | std::mt19937_64 eng; 43 | #else 44 | std::hash h; 45 | #endif 46 | std::vector hashes; 47 | hashes.reserve(1048576); 48 | typedef std::chrono::duration secs; 49 | auto t0 = std::chrono::high_resolution_clock::now(); 50 | for (unsigned i = 0; i < 1024; ++i) 51 | for (unsigned j = 0; j < 1024; ++j) 52 | #if 0 53 | hashes.push_back(h(std::make_pair(i, j))); 54 | #elif 0 55 | hashes.push_back(n3876::hash_val(i, j)); 56 | #elif 1 57 | hashes.push_back(eng()); 58 | #else 59 | hashes.push_back(llvm::hash_value(std::make_pair(i, j))); 60 | #endif 61 | auto t1 = std::chrono::high_resolution_clock::now(); 62 | std::cout << secs(t1-t0).count() << " s\n"; 63 | std::cout << test4(hashes) << '\n'; 64 | std::cout << test5(hashes) << '\n'; 65 | } 66 | 67 | // llvm / N3333 68 | // 0.00886668 s 69 | // -0.000547409 70 | // 9 71 | // 72 | // xstd::uhash 73 | // 0.0332836 s 74 | // 0.000781377 75 | // 9 76 | // 77 | // xstd::uhash 78 | // 0.011603 s 79 | // -0.0115 80 | // 4 81 | // 82 | // n3876::hash_val(i, j) 83 | // 0.00748404 s 84 | // 4.75246 85 | // 33 86 | // 87 | // xstd::uhash 88 | // 0.0265555 s 89 | // -8.90096e-06 90 | // 9 91 | // 92 | // random hash 93 | // 0.0128204 s 94 | // 0.000203451 95 | // 8 96 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | HashAlgorithm (hash algorithm) Requirements: 2 | 3 | Has nested type result_type (std::size_t when used with std::unordered_*). 4 | 5 | Constructible (default and/or seeded) // initialization 6 | Effects: Initializes the state of the HashAlgorithm. After default 7 | constructing two HashAlgorithm's, h1 and h2, h1 and h2 shall 8 | have the same state. If two HashAlgorithm's h1 and h2 are 9 | constructed with the same seeds, h1 and h2 shall have 10 | the same state. 11 | 12 | HashAlgorithm(HashAlgorithm const& h) 13 | Effects: After construction *this and h have the same state. However 14 | subsequent updates to *this will not affect the state of h, 15 | and vice-versa. 16 | 17 | HashAlgorithm& operator=(HashAlgorithm const& h) 18 | Effects: After the assignment, *this and h have the same state. 19 | Returns: *this. However subsequent updates to *this will not affect 20 | the state of h, and vice-versa. 21 | 22 | void operator()(void const* key, std::size_t len) ; // update operation 23 | Requires: if len > 0, key points to len contiguous bytes to 24 | be consumed by the HashAlgorithm. The finalize operation 25 | has not been called on this object since construction, 26 | or since *this was assigned to. 27 | 28 | Effects: Updates the state of the hasher using the len bytes 29 | referred to by {key, len} pair. 30 | 31 | If for two keys {k1, len1} and {k2, len2}, both len1 and 32 | len2 == 0, then the two keys are considered equivalent. If 33 | len1 != len2, the two keys are considered not equivalent. 34 | If len1 == len2 and len1 > 0, and if memcmp(k1, k2, len1) 35 | == 0, the two keys are equivalent, else they are not 36 | equivalent. If two instances of HashAlgorithm (e.g. h1 and h2) 37 | have the same state prior to an update operation, and given 38 | two equivalent keys {k1, len} and {k2, len}, then after 39 | h1(k1, len) and h2(k2, len), then h1 and h2 shall have 40 | the same updated state. 41 | 42 | The HashAlgorithm does not access this memory range after the 43 | update operation returns. [Note: If len == 0, then key may 44 | be nullptr. If len == 0, it is unspecified if the state of 45 | the HashAlgorithm is changed during the update. -- end note] 46 | 47 | explicit operator result_type(); // finalize operation 48 | Requires: This operation has not been called on this object 49 | since construction or since *this was assigned to. 50 | Effects: Converts the state of the HashAlgorithm to a result_type. Two 51 | instances of the same type of HashAlgorithm, with the same state, 52 | shall return the same value. It is unspecified if this 53 | operation changes the state of the HashAlgorithm. 54 | Returns: The converted state. 55 | 56 | Hash_functor Requirements: 57 | 58 | using result_type = typename HashAlgorithm::result_type; 59 | 60 | Constructible (default and/or seeded) // initialization 61 | Effects: Initializes the state of the Hash_functor. A Hash_functor 62 | may be stateless or have state. If not stateless, different 63 | default constructions, and different seeded constructions 64 | (even with the same seeds), are not required to initialize 65 | the Hash_functor to the same state. 66 | 67 | Hash_functor(Hash_functor const& hf) 68 | Effects: After construction *this and hf have the same state. 69 | 70 | Hash_functor& operator=(Hash_functor const& hf) 71 | Effects: After the assignment, *this and hf have the same state. 72 | Returns: *this. 73 | 74 | template 75 | result_type 76 | operator()(T const& t) const; 77 | Requires: HashAlgorithm shall be constructible as specified by a concrete 78 | Hash_functor type. 79 | Effects: Constructs a HashAlgorithm h with automatic storage. Each concrete 80 | Hash_functor type shall specifiy how h is constructed. 81 | However h shall be constructed to the same state for every 82 | invocation of (*this)(t). Updates the state of the HashAlgorithm 83 | in an unspecified manner, except that there shall be 84 | exactly one call to: 85 | 86 | using std::hash_append; 87 | hash_append(h, t); 88 | 89 | at some time during the update operation. Furthermore, 90 | subsequent calls shall update the the local h with exactly 91 | the same state every time, except as changed by different 92 | values for t, unless there is an intervening assignment to 93 | *this between calls to this operator. 94 | 95 | Returns: static_cast(h). 96 | [Note: For the same value of t, the same value is returned 97 | on subsequent calls unless there is an intervening 98 | assignment to *this between calls to this operator. -- end 99 | note] 100 | 101 | template 102 | void 103 | hash_append(HashAlgorithm& h, X const& x); 104 | 105 | Requires: hash_append shall be declared in the same namespace in which X 106 | is declared. HashAlgorithm shall meet the HashAlgorithm requirements. If for 107 | two values of X, x1 and x2, and given two values of type HashAlgorithm, 108 | h1 and h2, where h1 and h2 have the same state, then if x1 == x2 109 | then after hash_append(h1, x1) and hash_append(h2, x2), h1 and 110 | h2 shall have the same updated state. 111 | 112 | Effects: Updates the state of h with the value of x. Different values of 113 | x should update h to different states. This may be done by 114 | calling h(void*, size_t), and / or by calling hash_append 115 | (unqualified) on h and sub-objects of x, or by calling 116 | hash_append on h and some value computed from the value of x. 117 | 118 | is_contiguously_hashable: 119 | 120 | A type T is contiguously hashable if for all combinations of two values of a 121 | type, say x and y, if x == y, then it must also be true that 122 | memcmp(addressof(x), addressof(y), sizeof(T)) == 0. 123 | 124 | template struct is_contiguously_hashable derives from true_type 125 | if T is contiguously hashable, else it derives from false_type. 126 | -------------------------------------------------------------------------------- /sha2.h: -------------------------------------------------------------------------------- 1 | /* 2 | * FILE: sha2.h 3 | * AUTHOR: Aaron D. Gifford - http://www.aarongifford.com/ 4 | * 5 | * Copyright (c) 2000-2001, Aaron D. Gifford 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions 10 | * are met: 11 | * 1. Redistributions of source code must retain the above copyright 12 | * notice, this list of conditions and the following disclaimer. 13 | * 2. Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in the 15 | * documentation and/or other materials provided with the distribution. 16 | * 3. Neither the name of the copyright holder nor the names of contributors 17 | * may be used to endorse or promote products derived from this software 18 | * without specific prior written permission. 19 | * 20 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTOR(S) ``AS IS'' AND 21 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTOR(S) BE LIABLE 24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 | * SUCH DAMAGE. 31 | * 32 | * $Id: sha2.h,v 1.1 2001/11/08 00:02:01 adg Exp adg $ 33 | */ 34 | 35 | #ifndef __SHA2_H__ 36 | #define __SHA2_H__ 37 | 38 | #ifdef __cplusplus 39 | extern "C" { 40 | #endif 41 | 42 | 43 | /* 44 | * Import u_intXX_t size_t type definitions from system headers. You 45 | * may need to change this, or define these things yourself in this 46 | * file. 47 | */ 48 | #include 49 | 50 | #ifdef SHA2_USE_INTTYPES_H 51 | 52 | #include 53 | 54 | #endif /* SHA2_USE_INTTYPES_H */ 55 | 56 | 57 | /*** SHA-256/384/512 Various Length Definitions ***********************/ 58 | #define SHA256_BLOCK_LENGTH 64 59 | #define SHA256_DIGEST_LENGTH 32 60 | #define SHA256_DIGEST_STRING_LENGTH (SHA256_DIGEST_LENGTH * 2 + 1) 61 | #define SHA384_BLOCK_LENGTH 128 62 | #define SHA384_DIGEST_LENGTH 48 63 | #define SHA384_DIGEST_STRING_LENGTH (SHA384_DIGEST_LENGTH * 2 + 1) 64 | #define SHA512_BLOCK_LENGTH 128 65 | #define SHA512_DIGEST_LENGTH 64 66 | #define SHA512_DIGEST_STRING_LENGTH (SHA512_DIGEST_LENGTH * 2 + 1) 67 | 68 | 69 | /*** SHA-256/384/512 Context Structures *******************************/ 70 | /* NOTE: If your architecture does not define either u_intXX_t types or 71 | * uintXX_t (from inttypes.h), you may need to define things by hand 72 | * for your system: 73 | */ 74 | #if 0 75 | typedef unsigned char u_int8_t; /* 1-byte (8-bits) */ 76 | typedef unsigned int u_int32_t; /* 4-bytes (32-bits) */ 77 | typedef unsigned long long u_int64_t; /* 8-bytes (64-bits) */ 78 | #endif 79 | /* 80 | * Most BSD systems already define u_intXX_t types, as does Linux. 81 | * Some systems, however, like Compaq's Tru64 Unix instead can use 82 | * uintXX_t types defined by very recent ANSI C standards and included 83 | * in the file: 84 | * 85 | * #include 86 | * 87 | * If you choose to use then please define: 88 | * 89 | * #define SHA2_USE_INTTYPES_H 90 | * 91 | * Or on the command line during compile: 92 | * 93 | * cc -DSHA2_USE_INTTYPES_H ... 94 | */ 95 | #ifdef SHA2_USE_INTTYPES_H 96 | 97 | typedef struct _SHA256_CTX { 98 | uint32_t state[8]; 99 | uint64_t bitcount; 100 | uint8_t buffer[SHA256_BLOCK_LENGTH]; 101 | } SHA256_CTX; 102 | typedef struct _SHA512_CTX { 103 | uint64_t state[8]; 104 | uint64_t bitcount[2]; 105 | uint8_t buffer[SHA512_BLOCK_LENGTH]; 106 | } SHA512_CTX; 107 | 108 | #else /* SHA2_USE_INTTYPES_H */ 109 | 110 | typedef struct _SHA256_CTX { 111 | u_int32_t state[8]; 112 | u_int64_t bitcount; 113 | u_int8_t buffer[SHA256_BLOCK_LENGTH]; 114 | } SHA256_CTX; 115 | typedef struct _SHA512_CTX { 116 | u_int64_t state[8]; 117 | u_int64_t bitcount[2]; 118 | u_int8_t buffer[SHA512_BLOCK_LENGTH]; 119 | } SHA512_CTX; 120 | 121 | #endif /* SHA2_USE_INTTYPES_H */ 122 | 123 | typedef SHA512_CTX SHA384_CTX; 124 | 125 | 126 | /*** SHA-256/384/512 Function Prototypes ******************************/ 127 | #ifndef NOPROTO 128 | #ifdef SHA2_USE_INTTYPES_H 129 | 130 | void SHA256_Init(SHA256_CTX *); 131 | void SHA256_Update(SHA256_CTX*, const uint8_t*, size_t); 132 | void SHA256_Final(uint8_t[SHA256_DIGEST_LENGTH], SHA256_CTX*); 133 | char* SHA256_End(SHA256_CTX*, char[SHA256_DIGEST_STRING_LENGTH]); 134 | char* SHA256_Data(const uint8_t*, size_t, char[SHA256_DIGEST_STRING_LENGTH]); 135 | 136 | void SHA384_Init(SHA384_CTX*); 137 | void SHA384_Update(SHA384_CTX*, const uint8_t*, size_t); 138 | void SHA384_Final(uint8_t[SHA384_DIGEST_LENGTH], SHA384_CTX*); 139 | char* SHA384_End(SHA384_CTX*, char[SHA384_DIGEST_STRING_LENGTH]); 140 | char* SHA384_Data(const uint8_t*, size_t, char[SHA384_DIGEST_STRING_LENGTH]); 141 | 142 | void SHA512_Init(SHA512_CTX*); 143 | void SHA512_Update(SHA512_CTX*, const uint8_t*, size_t); 144 | void SHA512_Final(uint8_t[SHA512_DIGEST_LENGTH], SHA512_CTX*); 145 | char* SHA512_End(SHA512_CTX*, char[SHA512_DIGEST_STRING_LENGTH]); 146 | char* SHA512_Data(const uint8_t*, size_t, char[SHA512_DIGEST_STRING_LENGTH]); 147 | 148 | #else /* SHA2_USE_INTTYPES_H */ 149 | 150 | void SHA256_Init(SHA256_CTX *); 151 | void SHA256_Update(SHA256_CTX*, const u_int8_t*, size_t); 152 | void SHA256_Final(u_int8_t[SHA256_DIGEST_LENGTH], SHA256_CTX*); 153 | char* SHA256_End(SHA256_CTX*, char[SHA256_DIGEST_STRING_LENGTH]); 154 | char* SHA256_Data(const u_int8_t*, size_t, char[SHA256_DIGEST_STRING_LENGTH]); 155 | 156 | void SHA384_Init(SHA384_CTX*); 157 | void SHA384_Update(SHA384_CTX*, const u_int8_t*, size_t); 158 | void SHA384_Final(u_int8_t[SHA384_DIGEST_LENGTH], SHA384_CTX*); 159 | char* SHA384_End(SHA384_CTX*, char[SHA384_DIGEST_STRING_LENGTH]); 160 | char* SHA384_Data(const u_int8_t*, size_t, char[SHA384_DIGEST_STRING_LENGTH]); 161 | 162 | void SHA512_Init(SHA512_CTX*); 163 | void SHA512_Update(SHA512_CTX*, const u_int8_t*, size_t); 164 | void SHA512_Final(u_int8_t[SHA512_DIGEST_LENGTH], SHA512_CTX*); 165 | char* SHA512_End(SHA512_CTX*, char[SHA512_DIGEST_STRING_LENGTH]); 166 | char* SHA512_Data(const u_int8_t*, size_t, char[SHA512_DIGEST_STRING_LENGTH]); 167 | 168 | #endif /* SHA2_USE_INTTYPES_H */ 169 | 170 | #else /* NOPROTO */ 171 | 172 | void SHA256_Init(); 173 | void SHA256_Update(); 174 | void SHA256_Final(); 175 | char* SHA256_End(); 176 | char* SHA256_Data(); 177 | 178 | void SHA384_Init(); 179 | void SHA384_Update(); 180 | void SHA384_Final(); 181 | char* SHA384_End(); 182 | char* SHA384_Data(); 183 | 184 | void SHA512_Init(); 185 | void SHA512_Update(); 186 | void SHA512_Final(); 187 | char* SHA512_End(); 188 | char* SHA512_Data(); 189 | 190 | #endif /* NOPROTO */ 191 | 192 | #ifdef __cplusplus 193 | } 194 | #endif /* __cplusplus */ 195 | 196 | #endif /* __SHA2_H__ */ 197 | 198 | -------------------------------------------------------------------------------- /sha256.h: -------------------------------------------------------------------------------- 1 | //------------------------------- sha256.h ------------------------------------- 2 | // 3 | // This software is in the public domain. The only restriction on its use is 4 | // that no one can remove it from the public domain by claiming ownership of it, 5 | // including the original authors. 6 | // 7 | // There is no warranty of correctness on the software contained herein. Use 8 | // at your own risk. 9 | // 10 | //------------------------------------------------------------------------------ 11 | 12 | #ifndef SHA256_H 13 | #define SHA256_H 14 | 15 | #include "endian.h" 16 | #include 17 | #include 18 | #include 19 | #include "sha2.h" 20 | 21 | // namespace acme is used to demonstrate example code. It is not proposed. 22 | 23 | namespace acme 24 | { 25 | 26 | class sha256 27 | { 28 | SHA256_CTX state_; 29 | public: 30 | static constexpr xstd::endian endian = xstd::endian::big; 31 | using result_type = std::array; 32 | 33 | sha256() noexcept 34 | { 35 | SHA256_Init(&state_); 36 | } 37 | 38 | void 39 | operator()(void const* key, std::size_t len) noexcept 40 | { 41 | SHA256_Update(&state_, static_cast(key), len); 42 | } 43 | 44 | explicit 45 | operator result_type() noexcept 46 | { 47 | result_type r; 48 | SHA256_Final(r.data(), &state_); 49 | return r; 50 | } 51 | }; 52 | 53 | } // acme 54 | 55 | #endif // SHA256_H 56 | -------------------------------------------------------------------------------- /sherlock.cpp: -------------------------------------------------------------------------------- 1 | //----------------------------- sherlock.cpp ----------------------------------- 2 | // 3 | // This software is in the public domain. The only restriction on its use is 4 | // that no one can remove it from the public domain by claiming ownership of it, 5 | // including the original authors. 6 | // 7 | // There is no warranty of correctness on the software contained herein. Use 8 | // at your own risk. 9 | // 10 | //------------------------------------------------------------------------------ 11 | 12 | // The proposal 13 | #include "hash_append.h" 14 | 15 | // Example Hashers 16 | #include "siphash.h" 17 | #include "fnv1a.h" 18 | #include "city_hash.h" 19 | 20 | #include "hash_test.h" 21 | 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | 33 | #include "../llvm/include/llvm/ADT/Hashing.h" 34 | 35 | int 36 | main() 37 | { 38 | std::ifstream infile("sherlock.txt"); 39 | std::istream_iterator first(infile), eof; 40 | std::set s; 41 | for (; first != eof; ++first) 42 | { 43 | std::string str = *first; 44 | str.erase(std::remove_if(str.begin(), str.end(), [](char c) 45 | { 46 | return !std::isalpha(c); 47 | }), str.end()); 48 | if (!str.empty()) 49 | s.insert(str); 50 | } 51 | double avg = 0; 52 | for (auto const& str : s) 53 | avg += str.size(); 54 | avg /= s.size(); 55 | std::cout << "avg = " << avg << '\n'; 56 | #if 1 57 | xstd::uhash h; 58 | #endif 59 | std::vector hashes; 60 | hashes.reserve(s.size()); 61 | typedef std::chrono::duration secs; 62 | auto t0 = std::chrono::high_resolution_clock::now(); 63 | for (auto const& str : s) 64 | #if 1 65 | hashes.push_back(h(str)); 66 | #else 67 | hashes.push_back(llvm::hash_value(str)); 68 | #endif 69 | auto t1 = std::chrono::high_resolution_clock::now(); 70 | std::cout << secs(t1-t0).count() << " s\n"; 71 | std::cout << test4(hashes) << '\n'; 72 | std::cout << test5(hashes) << '\n'; 73 | } 74 | 75 | // avg = 7.08971 76 | 77 | // llvm / N3333 78 | // 0.000532726 s 79 | // -0.00304038 80 | // 7 81 | // 82 | // xstd::uhash 83 | // 0.000772055 s 84 | // -0.00225346 85 | // 7 86 | // 87 | // xstd::uhash 88 | // avg = 7.08971 89 | // 0.000400899 s 90 | // 0.00311192 91 | // 6 92 | -------------------------------------------------------------------------------- /siphash.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------- siphash.h ------------------------------------ 2 | // 3 | // This software is in the public domain. The only restriction on its use is 4 | // that no one can remove it from the public domain by claiming ownership of it, 5 | // including the original authors. 6 | // 7 | // There is no warranty of correctness on the software contained herein. Use 8 | // at your own risk. 9 | // 10 | // Derived from: 11 | // 12 | // SipHash reference C implementation 13 | // 14 | // Written in 2012 by Jean-Philippe Aumasson 15 | // Daniel J. Bernstein 16 | // 17 | // To the extent possible under law, the author(s) have dedicated all copyright 18 | // and related and neighboring rights to this software to the public domain 19 | // worldwide. This software is distributed without any warranty. 20 | // 21 | // You should have received a copy of the CC0 Public Domain Dedication along 22 | // with this software. If not, see 23 | // . 24 | // 25 | //------------------------------------------------------------------------------ 26 | 27 | #include "siphash.h" 28 | #include 29 | 30 | // namespace acme is used to demonstrate example code. It is not proposed. 31 | 32 | namespace acme 33 | { 34 | 35 | namespace 36 | { 37 | 38 | typedef std::uint64_t u64; 39 | typedef std::uint32_t u32; 40 | typedef std::uint8_t u8; 41 | 42 | inline 43 | u64 44 | rotl(u64 x, u64 b) 45 | { 46 | return (x << b) | (x >> (64 - b)); 47 | } 48 | 49 | inline 50 | u64 51 | u8to64_le(const u8* p) 52 | { 53 | #ifdef __LITTLE_ENDIAN__ 54 | return *static_cast(static_cast(p)); 55 | #else 56 | return static_cast(p[7]) << 56 | static_cast(p[6]) << 48 | 57 | static_cast(p[5]) << 40 | static_cast(p[4]) << 32 | 58 | static_cast(p[3]) << 24 | static_cast(p[2]) << 16 | 59 | static_cast(p[1]) << 8 | static_cast(p[0]); 60 | #endif 61 | } 62 | 63 | inline 64 | void 65 | sipround(u64& v0, u64& v1, u64& v2, u64& v3) 66 | { 67 | v0 += v1; 68 | v1 = rotl(v1, 13); 69 | v1 ^= v0; 70 | v0 = rotl(v0, 32); 71 | v2 += v3; 72 | v3 = rotl(v3, 16); 73 | v3 ^= v2; 74 | v0 += v3; 75 | v3 = rotl(v3, 21); 76 | v3 ^= v0; 77 | v2 += v1; 78 | v1 = rotl(v1, 17); 79 | v1 ^= v2; 80 | v2 = rotl(v2, 32); 81 | } 82 | 83 | } // unnamed 84 | 85 | siphash::siphash(std::uint64_t k0, std::uint64_t k1) noexcept 86 | { 87 | v3_ ^= k1; 88 | v2_ ^= k0; 89 | v1_ ^= k1; 90 | v0_ ^= k0; 91 | } 92 | 93 | void 94 | siphash::operator()(void const* key, std::size_t inlen) noexcept 95 | { 96 | u8 const* in = static_cast(key); 97 | total_length_ += inlen; 98 | if (bufsize_ + inlen < 8) 99 | { 100 | std::copy(in, in+inlen, buf_ + bufsize_); 101 | bufsize_ += inlen; 102 | return; 103 | } 104 | if (bufsize_ > 0) 105 | { 106 | auto t = 8 - bufsize_; 107 | std::copy(in, in+t, buf_ + bufsize_); 108 | u64 m = u8to64_le( buf_ ); 109 | v3_ ^= m; 110 | sipround(v0_, v1_, v2_, v3_); 111 | sipround(v0_, v1_, v2_, v3_); 112 | v0_ ^= m; 113 | in += t; 114 | inlen -= t; 115 | } 116 | bufsize_ = inlen & 7; 117 | u8 const* const end = in + (inlen - bufsize_); 118 | for ( ; in != end; in += 8 ) 119 | { 120 | u64 m = u8to64_le( in ); 121 | v3_ ^= m; 122 | sipround(v0_, v1_, v2_, v3_); 123 | sipround(v0_, v1_, v2_, v3_); 124 | v0_ ^= m; 125 | } 126 | std::copy(end, end + bufsize_, buf_); 127 | } 128 | 129 | siphash::operator std::size_t() noexcept 130 | { 131 | std::size_t b = static_cast(total_length_) << 56; 132 | switch(bufsize_) 133 | { 134 | case 7: 135 | b |= static_cast(buf_[6]) << 48; 136 | case 6: 137 | b |= static_cast(buf_[5]) << 40; 138 | case 5: 139 | b |= static_cast(buf_[4]) << 32; 140 | case 4: 141 | b |= static_cast(buf_[3]) << 24; 142 | case 3: 143 | b |= static_cast(buf_[2]) << 16; 144 | case 2: 145 | b |= static_cast(buf_[1]) << 8; 146 | case 1: 147 | b |= static_cast(buf_[0]); 148 | case 0: 149 | break; 150 | } 151 | v3_ ^= b; 152 | sipround(v0_, v1_, v2_, v3_); 153 | sipround(v0_, v1_, v2_, v3_); 154 | v0_ ^= b; 155 | v2_ ^= 0xff; 156 | sipround(v0_, v1_, v2_, v3_); 157 | sipround(v0_, v1_, v2_, v3_); 158 | sipround(v0_, v1_, v2_, v3_); 159 | sipround(v0_, v1_, v2_, v3_); 160 | b = v0_ ^ v1_ ^ v2_ ^ v3_; 161 | return b; 162 | } 163 | 164 | } // acme 165 | -------------------------------------------------------------------------------- /siphash.h: -------------------------------------------------------------------------------- 1 | //------------------------------- siphash.h ------------------------------------ 2 | // 3 | // This software is in the public domain. The only restriction on its use is 4 | // that no one can remove it from the public domain by claiming ownership of it, 5 | // including the original authors. 6 | // 7 | // There is no warranty of correctness on the software contained herein. Use 8 | // at your own risk. 9 | // 10 | // Derived from: 11 | // 12 | // SipHash reference C implementation 13 | // 14 | // Written in 2012 by Jean-Philippe Aumasson 15 | // Daniel J. Bernstein 16 | // 17 | // To the extent possible under law, the author(s) have dedicated all copyright 18 | // and related and neighboring rights to this software to the public domain 19 | // worldwide. This software is distributed without any warranty. 20 | // 21 | // You should have received a copy of the CC0 Public Domain Dedication along 22 | // with this software. If not, see 23 | // . 24 | // 25 | //------------------------------------------------------------------------------ 26 | 27 | #ifndef SIPHASH_H 28 | #define SIPHASH_H 29 | 30 | #include "endian.h" 31 | #include 32 | #include 33 | 34 | // namespace acme is used to demonstrate example code. It is not proposed. 35 | 36 | namespace acme 37 | { 38 | 39 | class siphash 40 | { 41 | std::uint64_t v0_ = 0x736f6d6570736575ULL; 42 | std::uint64_t v1_ = 0x646f72616e646f6dULL; 43 | std::uint64_t v2_ = 0x6c7967656e657261ULL; 44 | std::uint64_t v3_ = 0x7465646279746573ULL; 45 | unsigned char buf_[8]; 46 | unsigned bufsize_ = 0; 47 | unsigned total_length_ = 0; 48 | public: 49 | static constexpr xstd::endian endian = xstd::endian::native; 50 | using result_type = std::size_t; 51 | 52 | siphash() = default; 53 | explicit siphash(std::uint64_t k0, std::uint64_t k1 = 0) noexcept; 54 | 55 | void 56 | operator()(void const* key, std::size_t len) noexcept; 57 | 58 | explicit 59 | operator std::size_t() noexcept; 60 | }; 61 | 62 | } // acme 63 | 64 | #endif // SIPHASH_H 65 | -------------------------------------------------------------------------------- /sizes.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------- sizes.cpp ------------------------------------ 2 | // 3 | // This software is in the public domain. The only restriction on its use is 4 | // that no one can remove it from the public domain by claiming ownership of it, 5 | // including the original authors. 6 | // 7 | // There is no warranty of correctness on the software contained herein. Use 8 | // at your own risk. 9 | // 10 | //------------------------------------------------------------------------------ 11 | 12 | // The proposal 13 | #include "hash_append.h" 14 | 15 | // Example Hashers 16 | #include "siphash.h" 17 | #include "fnv1a.h" 18 | #include "spooky.h" 19 | 20 | #include 21 | int 22 | main() 23 | { 24 | std::cout << sizeof(acme::siphash) << '\n'; 25 | std::cout << sizeof(acme::spooky) << '\n'; 26 | std::cout << sizeof(acme::fnv1a) << '\n'; 27 | } 28 | 29 | // 48 30 | // 304 31 | // 8 32 | -------------------------------------------------------------------------------- /spooky.h: -------------------------------------------------------------------------------- 1 | //------------------------------- spooky.h ------------------------------------- 2 | // 3 | // This software is in the public domain. The only restriction on its use is 4 | // that no one can remove it from the public domain by claiming ownership of it, 5 | // including the original authors. 6 | // 7 | // There is no warranty of correctness on the software contained herein. Use 8 | // at your own risk. 9 | // 10 | //------------------------------------------------------------------------------ 11 | 12 | #ifndef SPOOKY_H 13 | #define SPOOKY_H 14 | 15 | #include "endian.h" 16 | #include 17 | #include 18 | #include "SpookyV2.h" 19 | 20 | // namespace acme is used to demonstrate example code. It is not proposed. 21 | 22 | namespace acme 23 | { 24 | 25 | class spooky 26 | { 27 | SpookyHash state_; 28 | public: 29 | static constexpr xstd::endian endian = xstd::endian::native; 30 | using result_type = std::size_t; 31 | 32 | spooky(std::size_t seed1 = 1, std::size_t seed2 = 2) noexcept 33 | { 34 | state_.Init(seed1, seed2); 35 | } 36 | 37 | void 38 | operator()(void const* key, std::size_t len) noexcept 39 | { 40 | state_.Update(key, len); 41 | } 42 | 43 | explicit 44 | operator result_type() noexcept 45 | { 46 | std::uint64_t h1, h2; 47 | state_.Final(&h1, &h2); 48 | return h1; 49 | } 50 | }; 51 | 52 | } // acme 53 | 54 | #endif // SPOOKY_H 55 | -------------------------------------------------------------------------------- /test.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------- test.cpp ------------------------------------- 2 | // 3 | // This software is in the public domain. The only restriction on its use is 4 | // that no one can remove it from the public domain by claiming ownership of it, 5 | // including the original authors. 6 | // 7 | // There is no warranty of correctness on the software contained herein. Use 8 | // at your own risk. 9 | // 10 | //------------------------------------------------------------------------------ 11 | 12 | // The proposal 13 | #include "hash_append.h" 14 | 15 | // Example Hashers 16 | #include "spooky.h" 17 | #include "fnv1a.h" 18 | #include "jenkins1.h" 19 | #include "murmur2A.h" 20 | #include "spooky.h" 21 | #include "siphash.h" 22 | #include "hash_functors.h" 23 | #include "hash_adaptors.h" 24 | #include "X.h" 25 | #include "hash_test.h" 26 | 27 | #include 28 | #include 29 | #include 30 | 31 | int 32 | main() 33 | { 34 | typedef std::chrono::duration secs; 35 | std::set x; 36 | while (x.size() < 1000000) 37 | x.insert(mine::X{}); 38 | std::vector hashes; 39 | hashes.reserve(x.size()); 40 | { 41 | auto t0 = std::chrono::high_resolution_clock::now(); 42 | for (auto const& i : x) 43 | hashes.push_back(std::hash{}(i)); 44 | auto t1 = std::chrono::high_resolution_clock::now(); 45 | std::cout << "std::hash " << '\n' << std::hex; 46 | std::cout << secs(t1-t0).count() << " s\n"; 47 | std::cout << test1(hashes) << '\n'; 48 | std::cout << test2(hashes) << '\n'; 49 | std::cout << test3(hashes) << '\n'; 50 | std::cout << test4(hashes) << '\n'; 51 | std::cout << test5(hashes) << '\n'; 52 | } 53 | std::cout << '\n'; 54 | hashes.clear(); 55 | { 56 | auto t0 = std::chrono::high_resolution_clock::now(); 57 | for (auto const& i : x) 58 | hashes.push_back(hash_value(i)); 59 | auto t1 = std::chrono::high_resolution_clock::now(); 60 | std::cout << "llvm::hash_value " << '\n' << std::hex; 61 | std::cout << secs(t1-t0).count() << " s\n"; 62 | std::cout << test1(hashes) << '\n'; 63 | std::cout << test2(hashes) << '\n'; 64 | std::cout << test3(hashes) << '\n'; 65 | std::cout << test4(hashes) << '\n'; 66 | std::cout << test5(hashes) << '\n'; 67 | } 68 | std::cout << '\n'; 69 | hashes.clear(); 70 | { 71 | auto t0 = std::chrono::high_resolution_clock::now(); 72 | xstd::uhash h; 73 | for (auto const& i : x) 74 | hashes.push_back(h(i)); 75 | auto t1 = std::chrono::high_resolution_clock::now(); 76 | std::cout << "fnv1a " << '\n' << std::hex; 77 | std::cout << secs(t1-t0).count() << " s\n"; 78 | std::cout << test1(hashes) << '\n'; 79 | std::cout << test2(hashes) << '\n'; 80 | std::cout << test3(hashes) << '\n'; 81 | std::cout << test4(hashes) << '\n'; 82 | std::cout << test5(hashes) << '\n'; 83 | } 84 | std::cout << '\n'; 85 | hashes.clear(); 86 | { 87 | auto t0 = std::chrono::high_resolution_clock::now(); 88 | xstd::uhash h; 89 | for (auto const& i : x) 90 | hashes.push_back(h(i)); 91 | auto t1 = std::chrono::high_resolution_clock::now(); 92 | std::cout << "jenkins1 " << '\n' << std::hex; 93 | std::cout << secs(t1-t0).count() << " s\n"; 94 | std::cout << test1(hashes) << '\n'; 95 | std::cout << test2(hashes) << '\n'; 96 | std::cout << test3(hashes) << '\n'; 97 | std::cout << test4(hashes) << '\n'; 98 | std::cout << test5(hashes) << '\n'; 99 | } 100 | std::cout << '\n'; 101 | hashes.clear(); 102 | { 103 | auto t0 = std::chrono::high_resolution_clock::now(); 104 | xstd::uhash h; 105 | for (auto const& i : x) 106 | hashes.push_back(h(i)); 107 | auto t1 = std::chrono::high_resolution_clock::now(); 108 | std::cout << "MurmurHash2A " << '\n' << std::hex; 109 | std::cout << secs(t1-t0).count() << " s\n"; 110 | std::cout << test1(hashes) << '\n'; 111 | std::cout << test2(hashes) << '\n'; 112 | std::cout << test3(hashes) << '\n'; 113 | std::cout << test4(hashes) << '\n'; 114 | std::cout << test5(hashes) << '\n'; 115 | } 116 | std::cout << '\n'; 117 | hashes.clear(); 118 | { 119 | auto t0 = std::chrono::high_resolution_clock::now(); 120 | xstd::uhash h; 121 | for (auto const& i : x) 122 | hashes.push_back(h(i)); 123 | auto t1 = std::chrono::high_resolution_clock::now(); 124 | std::cout << "spooky " << '\n' << std::hex; 125 | std::cout << secs(t1-t0).count() << " s\n"; 126 | std::cout << test1(hashes) << '\n'; 127 | std::cout << test2(hashes) << '\n'; 128 | std::cout << test3(hashes) << '\n'; 129 | std::cout << test4(hashes) << '\n'; 130 | std::cout << test5(hashes) << '\n'; 131 | } 132 | std::cout << '\n'; 133 | hashes.clear(); 134 | { 135 | auto t0 = std::chrono::high_resolution_clock::now(); 136 | xstd::uhash h; 137 | for (auto const& i : x) 138 | hashes.push_back(h(i)); 139 | auto t1 = std::chrono::high_resolution_clock::now(); 140 | std::cout << "siphash " << '\n' << std::hex; 141 | std::cout << secs(t1-t0).count() << " s\n"; 142 | std::cout << test1(hashes) << '\n'; 143 | std::cout << test2(hashes) << '\n'; 144 | std::cout << test3(hashes) << '\n'; 145 | std::cout << test4(hashes) << '\n'; 146 | std::cout << test5(hashes) << '\n'; 147 | } 148 | } 149 | -------------------------------------------------------------------------------- /test2.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------- test2.cpp ------------------------------------ 2 | // 3 | // This software is in the public domain. The only restriction on its use is 4 | // that no one can remove it from the public domain by claiming ownership of it, 5 | // including the original authors. 6 | // 7 | // There is no warranty of correctness on the software contained herein. Use 8 | // at your own risk. 9 | // 10 | //------------------------------------------------------------------------------ 11 | 12 | #include "X.h" 13 | #include "fnv1a.h" 14 | #include 15 | 16 | std::size_t 17 | hash_contiguous(int (&data)[3]) 18 | { 19 | acme::fnv1a h; 20 | h(data, sizeof(data)); 21 | return static_cast(h); 22 | } 23 | 24 | std::size_t 25 | hash_discontiguous(int data1, int data2, int data3) 26 | { 27 | acme::fnv1a h; 28 | h(&data1, sizeof(data1)); 29 | h(&data2, sizeof(data2)); 30 | h(&data3, sizeof(data3)); 31 | return static_cast(h); 32 | } 33 | 34 | int 35 | main() 36 | { 37 | int data[] = {5, 3, 8}; 38 | assert((hash_contiguous(data) == hash_discontiguous(5, 3, 8))); 39 | } 40 | -------------------------------------------------------------------------------- /test3.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------- test3.cpp ------------------------------------ 2 | // 3 | // This software is in the public domain. The only restriction on its use is 4 | // that no one can remove it from the public domain by claiming ownership of it, 5 | // including the original authors. 6 | // 7 | // There is no warranty of correctness on the software contained herein. Use 8 | // at your own risk. 9 | // 10 | //------------------------------------------------------------------------------ 11 | 12 | #include "hash_append.h" 13 | 14 | #include 15 | #include 16 | 17 | struct X 18 | { 19 | static constexpr xstd::endian desired_endian = xstd::endian::big; 20 | }; 21 | 22 | #include 23 | #include 24 | #include 25 | #include 26 | 27 | #if 1 28 | 29 | #ifdef htonl 30 | #undef htonl 31 | #undef htons 32 | #undef ntohl 33 | #undef ntohs 34 | #endif 35 | 36 | template 37 | constexpr 38 | inline 39 | T 40 | reverse_bytes(T t) 41 | { 42 | unsigned char* bytes = static_cast 43 | (std::memmove(std::addressof(t), std::addressof(t), sizeof(T))); 44 | for (unsigned i = 0; i < sizeof(T)/2; ++i) 45 | std::swap(bytes[i], bytes[sizeof(T)-1-i]); 46 | return t; 47 | } 48 | 49 | template 50 | constexpr 51 | inline 52 | T 53 | reverse_bytes_if(T t, std::true_type) 54 | { 55 | return reverse_bytes(t); 56 | } 57 | 58 | template 59 | constexpr 60 | inline 61 | T 62 | reverse_bytes_if(T t, std::false_type) 63 | { 64 | return t; 65 | } 66 | 67 | std::uint32_t 68 | constexpr 69 | inline 70 | htonl(std::uint32_t x) 71 | { 72 | return reverse_bytes_if(x, std::integral_constant{}); 74 | } 75 | 76 | std::uint16_t 77 | constexpr 78 | inline 79 | htons(std::uint16_t x) 80 | { 81 | return reverse_bytes_if(x, std::integral_constant{}); 83 | } 84 | 85 | std::uint32_t 86 | constexpr 87 | inline 88 | ntohl(std::uint32_t x) 89 | { 90 | return reverse_bytes_if(x, std::integral_constant{}); 92 | } 93 | 94 | std::uint16_t 95 | constexpr 96 | inline 97 | ntohs(std::uint16_t x) 98 | { 99 | return reverse_bytes_if(x, std::integral_constant{}); 101 | } 102 | 103 | template {}> 105 | > 106 | constexpr 107 | inline 108 | T 109 | hton(T t) 110 | { 111 | return reverse_bytes_if(t, std::integral_constant{}); 113 | } 114 | 115 | template {}> 117 | > 118 | constexpr 119 | inline 120 | T 121 | ntoh(T t) 122 | { 123 | return reverse_bytes_if(t, std::integral_constant{}); 125 | } 126 | 127 | #endif 128 | 129 | int 130 | test(int x) 131 | { 132 | return htonl(x); 133 | } 134 | 135 | // int 136 | // main() 137 | // { 138 | // using namespace xstd; 139 | // if (endian::native == endian::little) 140 | // std::cout << "little endian\n"; 141 | // else if (endian::native == endian::big) 142 | // std::cout << "little endian\n"; 143 | // else 144 | // assert(false); 145 | // if (X::desired_endian != endian::native) 146 | // std::cout << "X needs work\n"; 147 | // } 148 | -------------------------------------------------------------------------------- /test4.cpp: -------------------------------------------------------------------------------- 1 | #include "xx_hash.h" 2 | #include "hash_append.h" 3 | #include 4 | 5 | struct S 6 | { 7 | char c; 8 | int x; 9 | }; 10 | 11 | template 12 | void 13 | hash_append(HashAlgorithm& h, const S& s) 14 | { 15 | using xstd::hash_append; 16 | hash_append(h, s.c, s.x); 17 | } 18 | 19 | int 20 | main() 21 | { 22 | xstd::uhash my_hash; 23 | S s[10] = {'a', 1}; 24 | std::cout << my_hash(s) << '\n'; 25 | } 26 | -------------------------------------------------------------------------------- /xx_hash.h: -------------------------------------------------------------------------------- 1 | //------------------------------- xx_hash.h ------------------------------------ 2 | // 3 | // This software is in the public domain. The only restriction on its use is 4 | // that no one can remove it from the public domain by claiming ownership of it, 5 | // including the original authors. 6 | // 7 | // There is no warranty of correctness on the software contained herein. Use 8 | // at your own risk. 9 | // 10 | //------------------------------------------------------------------------------ 11 | 12 | #ifndef XX_HASH_H 13 | #define XX_HASH_H 14 | 15 | #include "endian.h" 16 | #include "xxhash.h" 17 | #include 18 | #include 19 | #include 20 | 21 | // namespace acme is used to demonstrate example code. It is not proposed. 22 | 23 | namespace acme 24 | { 25 | 26 | namespace detail 27 | { 28 | 29 | template 30 | class xx_hash_imp; 31 | 32 | template <> 33 | class xx_hash_imp<32> 34 | { 35 | XXH32_state_t state_; 36 | public: 37 | xx_hash_imp(unsigned seed = 0) noexcept 38 | { 39 | XXH32_reset(&state_, seed); 40 | } 41 | 42 | void 43 | update(void const* key, std::size_t len) noexcept 44 | { 45 | XXH32_update(&state_, key, len); 46 | } 47 | 48 | unsigned 49 | digest() noexcept 50 | { 51 | return XXH32_digest(&state_); 52 | } 53 | }; 54 | 55 | template <> 56 | class xx_hash_imp<64> 57 | { 58 | XXH64_state_t state_; 59 | public: 60 | xx_hash_imp(unsigned long long seed = 0) noexcept 61 | { 62 | XXH64_reset(&state_, seed); 63 | } 64 | 65 | void 66 | update(void const* key, std::size_t len) noexcept 67 | { 68 | XXH64_update(&state_, key, len); 69 | } 70 | 71 | unsigned long long 72 | digest() noexcept 73 | { 74 | return XXH64_digest(&state_); 75 | } 76 | }; 77 | 78 | } // detail 79 | 80 | class xx_hash 81 | : private detail::xx_hash_imp 82 | { 83 | using base = detail::xx_hash_imp; 84 | using base_result = std::conditional_t; 86 | public: 87 | // Change to native if XXH_FORCE_NATIVE_FORMAT == 1 in xxhash.c 88 | static constexpr xstd::endian endian = xstd::endian::little; 89 | using result_type = std::size_t; 90 | 91 | xx_hash(std::size_t seed = 0) noexcept 92 | : base(static_cast(seed)) 93 | { 94 | } 95 | 96 | void 97 | operator()(void const* key, std::size_t len) noexcept 98 | { 99 | this->update(key, len); 100 | } 101 | 102 | explicit 103 | operator result_type() noexcept 104 | { 105 | return static_cast(this->digest()); 106 | } 107 | }; 108 | 109 | } // acme 110 | 111 | #endif // XX_HASH_H 112 | -------------------------------------------------------------------------------- /xxhash.c: -------------------------------------------------------------------------------- 1 | /* 2 | xxHash - Fast Hash algorithm 3 | Copyright (C) 2012-2014, Yann Collet. 4 | BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are 8 | met: 9 | 10 | * Redistributions of source code must retain the above copyright 11 | notice, this list of conditions and the following disclaimer. 12 | * Redistributions in binary form must reproduce the above 13 | copyright notice, this list of conditions and the following disclaimer 14 | in the documentation and/or other materials provided with the 15 | distribution. 16 | 17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | 29 | You can contact the author at : 30 | - xxHash source repository : http://code.google.com/p/xxhash/ 31 | - public discussion board : https://groups.google.com/forum/#!forum/lz4c 32 | */ 33 | 34 | 35 | //************************************** 36 | // Tuning parameters 37 | //************************************** 38 | // Unaligned memory access is automatically enabled for "common" CPU, such as x86. 39 | // For others CPU, the compiler will be more cautious, and insert extra code to ensure aligned access is respected. 40 | // If you know your target CPU supports unaligned memory access, you want to force this option manually to improve performance. 41 | // You can also enable this parameter if you know your input data will always be aligned (boundaries of 4, for U32). 42 | #if defined(__ARM_FEATURE_UNALIGNED) || defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64) 43 | # define XXH_USE_UNALIGNED_ACCESS 1 44 | #endif 45 | 46 | // XXH_ACCEPT_NULL_INPUT_POINTER : 47 | // If the input pointer is a null pointer, xxHash default behavior is to trigger a memory access error, since it is a bad pointer. 48 | // When this option is enabled, xxHash output for null input pointers will be the same as a null-length input. 49 | // This option has a very small performance cost (only measurable on small inputs). 50 | // By default, this option is disabled. To enable it, uncomment below define : 51 | // #define XXH_ACCEPT_NULL_INPUT_POINTER 1 52 | 53 | // XXH_FORCE_NATIVE_FORMAT : 54 | // By default, xxHash library provides endian-independant Hash values, based on little-endian convention. 55 | // Results are therefore identical for little-endian and big-endian CPU. 56 | // This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format. 57 | // Should endian-independance be of no importance for your application, you may set the #define below to 1. 58 | // It will improve speed for Big-endian CPU. 59 | // This option has no impact on Little_Endian CPU. 60 | #define XXH_FORCE_NATIVE_FORMAT 0 61 | 62 | //************************************** 63 | // Compiler Specific Options 64 | //************************************** 65 | // Disable some Visual warning messages 66 | #ifdef _MSC_VER // Visual Studio 67 | # pragma warning(disable : 4127) // disable: C4127: conditional expression is constant 68 | #endif 69 | 70 | #ifdef _MSC_VER // Visual Studio 71 | # define FORCE_INLINE static __forceinline 72 | #else 73 | # ifdef __GNUC__ 74 | # define FORCE_INLINE static inline __attribute__((always_inline)) 75 | # else 76 | # define FORCE_INLINE static inline 77 | # endif 78 | #endif 79 | 80 | //************************************** 81 | // Includes & Memory related functions 82 | //************************************** 83 | #include "xxhash.h" 84 | // Modify the local functions below should you wish to use some other memory routines 85 | // for malloc(), free() 86 | #include 87 | static void* XXH_malloc(size_t s) { return malloc(s); } 88 | static void XXH_free (void* p) { free(p); } 89 | // for memcpy() 90 | #include 91 | static void* XXH_memcpy(void* dest, const void* src, size_t size) 92 | { 93 | return memcpy(dest,src,size); 94 | } 95 | 96 | 97 | //************************************** 98 | // Basic Types 99 | //************************************** 100 | #if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L // C99 101 | # include 102 | typedef uint8_t BYTE; 103 | typedef uint16_t U16; 104 | typedef uint32_t U32; 105 | typedef int32_t S32; 106 | typedef uint64_t U64; 107 | #else 108 | typedef unsigned char BYTE; 109 | typedef unsigned short U16; 110 | typedef unsigned int U32; 111 | typedef signed int S32; 112 | typedef unsigned long long U64; 113 | #endif 114 | 115 | #if defined(__GNUC__) && !defined(XXH_USE_UNALIGNED_ACCESS) 116 | # define _PACKED __attribute__ ((packed)) 117 | #else 118 | # define _PACKED 119 | #endif 120 | 121 | #if !defined(XXH_USE_UNALIGNED_ACCESS) && !defined(__GNUC__) 122 | # ifdef __IBMC__ 123 | # pragma pack(1) 124 | # else 125 | # pragma pack(push, 1) 126 | # endif 127 | #endif 128 | 129 | typedef struct _U32_S 130 | { 131 | U32 v; 132 | } _PACKED U32_S; 133 | typedef struct _U64_S 134 | { 135 | U64 v; 136 | } _PACKED U64_S; 137 | 138 | #if !defined(XXH_USE_UNALIGNED_ACCESS) && !defined(__GNUC__) 139 | # pragma pack(pop) 140 | #endif 141 | 142 | #define A32(x) (((U32_S *)(x))->v) 143 | #define A64(x) (((U64_S *)(x))->v) 144 | 145 | 146 | //*************************************** 147 | // Compiler-specific Functions and Macros 148 | //*************************************** 149 | #define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) 150 | 151 | // Note : although _rotl exists for minGW (GCC under windows), performance seems poor 152 | #if defined(_MSC_VER) 153 | # define XXH_rotl32(x,r) _rotl(x,r) 154 | # define XXH_rotl64(x,r) _rotl64(x,r) 155 | #else 156 | # define XXH_rotl32(x,r) ((x << r) | (x >> (32 - r))) 157 | # define XXH_rotl64(x,r) ((x << r) | (x >> (64 - r))) 158 | #endif 159 | 160 | #if defined(_MSC_VER) // Visual Studio 161 | # define XXH_swap32 _byteswap_ulong 162 | # define XXH_swap64 _byteswap_uint64 163 | #elif GCC_VERSION >= 403 164 | # define XXH_swap32 __builtin_bswap32 165 | # define XXH_swap64 __builtin_bswap64 166 | #else 167 | static inline U32 XXH_swap32 (U32 x) 168 | { 169 | return ((x << 24) & 0xff000000 ) | 170 | ((x << 8) & 0x00ff0000 ) | 171 | ((x >> 8) & 0x0000ff00 ) | 172 | ((x >> 24) & 0x000000ff ); 173 | } 174 | static inline U64 XXH_swap64 (U64 x) 175 | { 176 | return ((x << 56) & 0xff00000000000000ULL) | 177 | ((x << 40) & 0x00ff000000000000ULL) | 178 | ((x << 24) & 0x0000ff0000000000ULL) | 179 | ((x << 8) & 0x000000ff00000000ULL) | 180 | ((x >> 8) & 0x00000000ff000000ULL) | 181 | ((x >> 24) & 0x0000000000ff0000ULL) | 182 | ((x >> 40) & 0x000000000000ff00ULL) | 183 | ((x >> 56) & 0x00000000000000ffULL); 184 | } 185 | #endif 186 | 187 | 188 | //************************************** 189 | // Constants 190 | //************************************** 191 | #define PRIME32_1 2654435761U 192 | #define PRIME32_2 2246822519U 193 | #define PRIME32_3 3266489917U 194 | #define PRIME32_4 668265263U 195 | #define PRIME32_5 374761393U 196 | 197 | #define PRIME64_1 11400714785074694791ULL 198 | #define PRIME64_2 14029467366897019727ULL 199 | #define PRIME64_3 1609587929392839161ULL 200 | #define PRIME64_4 9650029242287828579ULL 201 | #define PRIME64_5 2870177450012600261ULL 202 | 203 | //************************************** 204 | // Architecture Macros 205 | //************************************** 206 | typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess; 207 | #ifndef XXH_CPU_LITTLE_ENDIAN // It is possible to define XXH_CPU_LITTLE_ENDIAN externally, for example using a compiler switch 208 | static const int one = 1; 209 | # define XXH_CPU_LITTLE_ENDIAN (*(char*)(&one)) 210 | #endif 211 | 212 | 213 | //************************************** 214 | // Macros 215 | //************************************** 216 | #define XXH_STATIC_ASSERT(c) { enum { XXH_static_assert = 1/(!!(c)) }; } // use only *after* variable declarations 217 | 218 | 219 | //**************************** 220 | // Memory reads 221 | //**************************** 222 | typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment; 223 | 224 | FORCE_INLINE U32 XXH_readLE32_align(const void* ptr, XXH_endianess endian, XXH_alignment align) 225 | { 226 | if (align==XXH_unaligned) 227 | return endian==XXH_littleEndian ? A32(ptr) : XXH_swap32(A32(ptr)); 228 | else 229 | return endian==XXH_littleEndian ? *(U32*)ptr : XXH_swap32(*(U32*)ptr); 230 | } 231 | 232 | FORCE_INLINE U32 XXH_readLE32(const void* ptr, XXH_endianess endian) 233 | { 234 | return XXH_readLE32_align(ptr, endian, XXH_unaligned); 235 | } 236 | 237 | FORCE_INLINE U64 XXH_readLE64_align(const void* ptr, XXH_endianess endian, XXH_alignment align) 238 | { 239 | if (align==XXH_unaligned) 240 | return endian==XXH_littleEndian ? A64(ptr) : XXH_swap64(A64(ptr)); 241 | else 242 | return endian==XXH_littleEndian ? *(U64*)ptr : XXH_swap64(*(U64*)ptr); 243 | } 244 | 245 | FORCE_INLINE U64 XXH_readLE64(const void* ptr, XXH_endianess endian) 246 | { 247 | return XXH_readLE64_align(ptr, endian, XXH_unaligned); 248 | } 249 | 250 | 251 | //**************************** 252 | // Simple Hash Functions 253 | //**************************** 254 | FORCE_INLINE U32 XXH32_endian_align(const void* input, size_t len, U32 seed, XXH_endianess endian, XXH_alignment align) 255 | { 256 | const BYTE* p = (const BYTE*)input; 257 | const BYTE* bEnd = p + len; 258 | U32 h32; 259 | #define XXH_get32bits(p) XXH_readLE32_align(p, endian, align) 260 | 261 | #ifdef XXH_ACCEPT_NULL_INPUT_POINTER 262 | if (p==NULL) 263 | { 264 | len=0; 265 | bEnd=p=(const BYTE*)(size_t)16; 266 | } 267 | #endif 268 | 269 | if (len>=16) 270 | { 271 | const BYTE* const limit = bEnd - 16; 272 | U32 v1 = seed + PRIME32_1 + PRIME32_2; 273 | U32 v2 = seed + PRIME32_2; 274 | U32 v3 = seed + 0; 275 | U32 v4 = seed - PRIME32_1; 276 | 277 | do 278 | { 279 | v1 += XXH_get32bits(p) * PRIME32_2; 280 | v1 = XXH_rotl32(v1, 13); 281 | v1 *= PRIME32_1; 282 | p+=4; 283 | v2 += XXH_get32bits(p) * PRIME32_2; 284 | v2 = XXH_rotl32(v2, 13); 285 | v2 *= PRIME32_1; 286 | p+=4; 287 | v3 += XXH_get32bits(p) * PRIME32_2; 288 | v3 = XXH_rotl32(v3, 13); 289 | v3 *= PRIME32_1; 290 | p+=4; 291 | v4 += XXH_get32bits(p) * PRIME32_2; 292 | v4 = XXH_rotl32(v4, 13); 293 | v4 *= PRIME32_1; 294 | p+=4; 295 | } 296 | while (p<=limit); 297 | 298 | h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18); 299 | } 300 | else 301 | { 302 | h32 = seed + PRIME32_5; 303 | } 304 | 305 | h32 += (U32) len; 306 | 307 | while (p+4<=bEnd) 308 | { 309 | h32 += XXH_get32bits(p) * PRIME32_3; 310 | h32 = XXH_rotl32(h32, 17) * PRIME32_4 ; 311 | p+=4; 312 | } 313 | 314 | while (p> 15; 322 | h32 *= PRIME32_2; 323 | h32 ^= h32 >> 13; 324 | h32 *= PRIME32_3; 325 | h32 ^= h32 >> 16; 326 | 327 | return h32; 328 | } 329 | 330 | 331 | unsigned int XXH32 (const void* input, size_t len, unsigned seed) 332 | { 333 | #if 0 334 | // Simple version, good for code maintenance, but unfortunately slow for small inputs 335 | XXH32_state_t state; 336 | XXH32_reset(&state, seed); 337 | XXH32_update(&state, input, len); 338 | return XXH32_digest(&state); 339 | #else 340 | XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; 341 | 342 | # if !defined(XXH_USE_UNALIGNED_ACCESS) 343 | if ((((size_t)input) & 3) == 0) // Input is aligned, let's leverage the speed advantage 344 | { 345 | if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) 346 | return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned); 347 | else 348 | return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned); 349 | } 350 | # endif 351 | 352 | if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) 353 | return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned); 354 | else 355 | return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned); 356 | #endif 357 | } 358 | 359 | FORCE_INLINE U64 XXH64_endian_align(const void* input, size_t len, U64 seed, XXH_endianess endian, XXH_alignment align) 360 | { 361 | const BYTE* p = (const BYTE*)input; 362 | const BYTE* bEnd = p + len; 363 | U64 h64; 364 | #define XXH_get64bits(p) XXH_readLE64_align(p, endian, align) 365 | 366 | #ifdef XXH_ACCEPT_NULL_INPUT_POINTER 367 | if (p==NULL) 368 | { 369 | len=0; 370 | bEnd=p=(const BYTE*)(size_t)32; 371 | } 372 | #endif 373 | 374 | if (len>=32) 375 | { 376 | const BYTE* const limit = bEnd - 32; 377 | U64 v1 = seed + PRIME64_1 + PRIME64_2; 378 | U64 v2 = seed + PRIME64_2; 379 | U64 v3 = seed + 0; 380 | U64 v4 = seed - PRIME64_1; 381 | 382 | do 383 | { 384 | v1 += XXH_get64bits(p) * PRIME64_2; 385 | p+=8; 386 | v1 = XXH_rotl64(v1, 31); 387 | v1 *= PRIME64_1; 388 | v2 += XXH_get64bits(p) * PRIME64_2; 389 | p+=8; 390 | v2 = XXH_rotl64(v2, 31); 391 | v2 *= PRIME64_1; 392 | v3 += XXH_get64bits(p) * PRIME64_2; 393 | p+=8; 394 | v3 = XXH_rotl64(v3, 31); 395 | v3 *= PRIME64_1; 396 | v4 += XXH_get64bits(p) * PRIME64_2; 397 | p+=8; 398 | v4 = XXH_rotl64(v4, 31); 399 | v4 *= PRIME64_1; 400 | } 401 | while (p<=limit); 402 | 403 | h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18); 404 | 405 | v1 *= PRIME64_2; 406 | v1 = XXH_rotl64(v1, 31); 407 | v1 *= PRIME64_1; 408 | h64 ^= v1; 409 | h64 = h64 * PRIME64_1 + PRIME64_4; 410 | 411 | v2 *= PRIME64_2; 412 | v2 = XXH_rotl64(v2, 31); 413 | v2 *= PRIME64_1; 414 | h64 ^= v2; 415 | h64 = h64 * PRIME64_1 + PRIME64_4; 416 | 417 | v3 *= PRIME64_2; 418 | v3 = XXH_rotl64(v3, 31); 419 | v3 *= PRIME64_1; 420 | h64 ^= v3; 421 | h64 = h64 * PRIME64_1 + PRIME64_4; 422 | 423 | v4 *= PRIME64_2; 424 | v4 = XXH_rotl64(v4, 31); 425 | v4 *= PRIME64_1; 426 | h64 ^= v4; 427 | h64 = h64 * PRIME64_1 + PRIME64_4; 428 | } 429 | else 430 | { 431 | h64 = seed + PRIME64_5; 432 | } 433 | 434 | h64 += (U64) len; 435 | 436 | while (p+8<=bEnd) 437 | { 438 | U64 k1 = XXH_get64bits(p); 439 | k1 *= PRIME64_2; 440 | k1 = XXH_rotl64(k1,31); 441 | k1 *= PRIME64_1; 442 | h64 ^= k1; 443 | h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4; 444 | p+=8; 445 | } 446 | 447 | if (p+4<=bEnd) 448 | { 449 | h64 ^= (U64)(XXH_get32bits(p)) * PRIME64_1; 450 | h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3; 451 | p+=4; 452 | } 453 | 454 | while (p> 33; 462 | h64 *= PRIME64_2; 463 | h64 ^= h64 >> 29; 464 | h64 *= PRIME64_3; 465 | h64 ^= h64 >> 32; 466 | 467 | return h64; 468 | } 469 | 470 | 471 | unsigned long long XXH64 (const void* input, size_t len, unsigned long long seed) 472 | { 473 | #if 0 474 | // Simple version, good for code maintenance, but unfortunately slow for small inputs 475 | XXH64_state_t state; 476 | XXH64_reset(&state, seed); 477 | XXH64_update(&state, input, len); 478 | return XXH64_digest(&state); 479 | #else 480 | XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; 481 | 482 | # if !defined(XXH_USE_UNALIGNED_ACCESS) 483 | if ((((size_t)input) & 7)==0) // Input is aligned, let's leverage the speed advantage 484 | { 485 | if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) 486 | return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned); 487 | else 488 | return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned); 489 | } 490 | # endif 491 | 492 | if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) 493 | return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned); 494 | else 495 | return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned); 496 | #endif 497 | } 498 | 499 | /**************************************************** 500 | * Advanced Hash Functions 501 | ****************************************************/ 502 | 503 | /*** Allocation ***/ 504 | typedef struct 505 | { 506 | U64 total_len; 507 | U32 seed; 508 | U32 v1; 509 | U32 v2; 510 | U32 v3; 511 | U32 v4; 512 | U32 mem32[4]; /* defined as U32 for alignment */ 513 | U32 memsize; 514 | } XXH_istate32_t; 515 | 516 | typedef struct 517 | { 518 | U64 total_len; 519 | U64 seed; 520 | U64 v1; 521 | U64 v2; 522 | U64 v3; 523 | U64 v4; 524 | U64 mem64[4]; /* defined as U64 for alignment */ 525 | U32 memsize; 526 | } XXH_istate64_t; 527 | 528 | 529 | XXH32_state_t* XXH32_createState(void) 530 | { 531 | XXH_STATIC_ASSERT(sizeof(XXH32_state_t) >= sizeof(XXH_istate32_t)); // A compilation error here means XXH32_state_t is not large enough 532 | return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t)); 533 | } 534 | XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr) 535 | { 536 | XXH_free(statePtr); 537 | return XXH_OK; 538 | }; 539 | 540 | XXH64_state_t* XXH64_createState(void) 541 | { 542 | XXH_STATIC_ASSERT(sizeof(XXH64_state_t) >= sizeof(XXH_istate64_t)); // A compilation error here means XXH64_state_t is not large enough 543 | return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t)); 544 | } 545 | XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr) 546 | { 547 | XXH_free(statePtr); 548 | return XXH_OK; 549 | }; 550 | 551 | 552 | /*** Hash feed ***/ 553 | 554 | XXH_errorcode XXH32_reset(XXH32_state_t* state_in, U32 seed) 555 | { 556 | XXH_istate32_t* state = (XXH_istate32_t*) state_in; 557 | state->seed = seed; 558 | state->v1 = seed + PRIME32_1 + PRIME32_2; 559 | state->v2 = seed + PRIME32_2; 560 | state->v3 = seed + 0; 561 | state->v4 = seed - PRIME32_1; 562 | state->total_len = 0; 563 | state->memsize = 0; 564 | return XXH_OK; 565 | } 566 | 567 | XXH_errorcode XXH64_reset(XXH64_state_t* state_in, unsigned long long seed) 568 | { 569 | XXH_istate64_t* state = (XXH_istate64_t*) state_in; 570 | state->seed = seed; 571 | state->v1 = seed + PRIME64_1 + PRIME64_2; 572 | state->v2 = seed + PRIME64_2; 573 | state->v3 = seed + 0; 574 | state->v4 = seed - PRIME64_1; 575 | state->total_len = 0; 576 | state->memsize = 0; 577 | return XXH_OK; 578 | } 579 | 580 | 581 | FORCE_INLINE XXH_errorcode XXH32_update_endian (XXH32_state_t* state_in, const void* input, size_t len, XXH_endianess endian) 582 | { 583 | XXH_istate32_t* state = (XXH_istate32_t *) state_in; 584 | const BYTE* p = (const BYTE*)input; 585 | const BYTE* const bEnd = p + len; 586 | 587 | #ifdef XXH_ACCEPT_NULL_INPUT_POINTER 588 | if (input==NULL) return XXH_ERROR; 589 | #endif 590 | 591 | state->total_len += len; 592 | 593 | if (state->memsize + len < 16) // fill in tmp buffer 594 | { 595 | XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, len); 596 | state->memsize += (U32)len; 597 | return XXH_OK; 598 | } 599 | 600 | if (state->memsize) // some data left from previous update 601 | { 602 | XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, 16-state->memsize); 603 | { 604 | const U32* p32 = state->mem32; 605 | state->v1 += XXH_readLE32(p32, endian) * PRIME32_2; 606 | state->v1 = XXH_rotl32(state->v1, 13); 607 | state->v1 *= PRIME32_1; 608 | p32++; 609 | state->v2 += XXH_readLE32(p32, endian) * PRIME32_2; 610 | state->v2 = XXH_rotl32(state->v2, 13); 611 | state->v2 *= PRIME32_1; 612 | p32++; 613 | state->v3 += XXH_readLE32(p32, endian) * PRIME32_2; 614 | state->v3 = XXH_rotl32(state->v3, 13); 615 | state->v3 *= PRIME32_1; 616 | p32++; 617 | state->v4 += XXH_readLE32(p32, endian) * PRIME32_2; 618 | state->v4 = XXH_rotl32(state->v4, 13); 619 | state->v4 *= PRIME32_1; 620 | p32++; 621 | } 622 | p += 16-state->memsize; 623 | state->memsize = 0; 624 | } 625 | 626 | if (p <= bEnd-16) 627 | { 628 | const BYTE* const limit = bEnd - 16; 629 | U32 v1 = state->v1; 630 | U32 v2 = state->v2; 631 | U32 v3 = state->v3; 632 | U32 v4 = state->v4; 633 | 634 | do 635 | { 636 | v1 += XXH_readLE32(p, endian) * PRIME32_2; 637 | v1 = XXH_rotl32(v1, 13); 638 | v1 *= PRIME32_1; 639 | p+=4; 640 | v2 += XXH_readLE32(p, endian) * PRIME32_2; 641 | v2 = XXH_rotl32(v2, 13); 642 | v2 *= PRIME32_1; 643 | p+=4; 644 | v3 += XXH_readLE32(p, endian) * PRIME32_2; 645 | v3 = XXH_rotl32(v3, 13); 646 | v3 *= PRIME32_1; 647 | p+=4; 648 | v4 += XXH_readLE32(p, endian) * PRIME32_2; 649 | v4 = XXH_rotl32(v4, 13); 650 | v4 *= PRIME32_1; 651 | p+=4; 652 | } 653 | while (p<=limit); 654 | 655 | state->v1 = v1; 656 | state->v2 = v2; 657 | state->v3 = v3; 658 | state->v4 = v4; 659 | } 660 | 661 | if (p < bEnd) 662 | { 663 | XXH_memcpy(state->mem32, p, bEnd-p); 664 | state->memsize = (int)(bEnd-p); 665 | } 666 | 667 | return XXH_OK; 668 | } 669 | 670 | XXH_errorcode XXH32_update (XXH32_state_t* state_in, const void* input, size_t len) 671 | { 672 | XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; 673 | 674 | if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) 675 | return XXH32_update_endian(state_in, input, len, XXH_littleEndian); 676 | else 677 | return XXH32_update_endian(state_in, input, len, XXH_bigEndian); 678 | } 679 | 680 | 681 | 682 | FORCE_INLINE U32 XXH32_digest_endian (const XXH32_state_t* state_in, XXH_endianess endian) 683 | { 684 | XXH_istate32_t* state = (XXH_istate32_t*) state_in; 685 | const BYTE * p = (const BYTE*)state->mem32; 686 | BYTE* bEnd = (BYTE*)(state->mem32) + state->memsize; 687 | U32 h32; 688 | 689 | if (state->total_len >= 16) 690 | { 691 | h32 = XXH_rotl32(state->v1, 1) + XXH_rotl32(state->v2, 7) + XXH_rotl32(state->v3, 12) + XXH_rotl32(state->v4, 18); 692 | } 693 | else 694 | { 695 | h32 = state->seed + PRIME32_5; 696 | } 697 | 698 | h32 += (U32) state->total_len; 699 | 700 | while (p+4<=bEnd) 701 | { 702 | h32 += XXH_readLE32(p, endian) * PRIME32_3; 703 | h32 = XXH_rotl32(h32, 17) * PRIME32_4; 704 | p+=4; 705 | } 706 | 707 | while (p> 15; 715 | h32 *= PRIME32_2; 716 | h32 ^= h32 >> 13; 717 | h32 *= PRIME32_3; 718 | h32 ^= h32 >> 16; 719 | 720 | return h32; 721 | } 722 | 723 | 724 | U32 XXH32_digest (const XXH32_state_t* state_in) 725 | { 726 | XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; 727 | 728 | if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) 729 | return XXH32_digest_endian(state_in, XXH_littleEndian); 730 | else 731 | return XXH32_digest_endian(state_in, XXH_bigEndian); 732 | } 733 | 734 | 735 | FORCE_INLINE XXH_errorcode XXH64_update_endian (XXH64_state_t* state_in, const void* input, size_t len, XXH_endianess endian) 736 | { 737 | XXH_istate64_t * state = (XXH_istate64_t *) state_in; 738 | const BYTE* p = (const BYTE*)input; 739 | const BYTE* const bEnd = p + len; 740 | 741 | #ifdef XXH_ACCEPT_NULL_INPUT_POINTER 742 | if (input==NULL) return XXH_ERROR; 743 | #endif 744 | 745 | state->total_len += len; 746 | 747 | if (state->memsize + len < 32) // fill in tmp buffer 748 | { 749 | XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, len); 750 | state->memsize += (U32)len; 751 | return XXH_OK; 752 | } 753 | 754 | if (state->memsize) // some data left from previous update 755 | { 756 | XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, 32-state->memsize); 757 | { 758 | const U64* p64 = state->mem64; 759 | state->v1 += XXH_readLE64(p64, endian) * PRIME64_2; 760 | state->v1 = XXH_rotl64(state->v1, 31); 761 | state->v1 *= PRIME64_1; 762 | p64++; 763 | state->v2 += XXH_readLE64(p64, endian) * PRIME64_2; 764 | state->v2 = XXH_rotl64(state->v2, 31); 765 | state->v2 *= PRIME64_1; 766 | p64++; 767 | state->v3 += XXH_readLE64(p64, endian) * PRIME64_2; 768 | state->v3 = XXH_rotl64(state->v3, 31); 769 | state->v3 *= PRIME64_1; 770 | p64++; 771 | state->v4 += XXH_readLE64(p64, endian) * PRIME64_2; 772 | state->v4 = XXH_rotl64(state->v4, 31); 773 | state->v4 *= PRIME64_1; 774 | p64++; 775 | } 776 | p += 32-state->memsize; 777 | state->memsize = 0; 778 | } 779 | 780 | if (p+32 <= bEnd) 781 | { 782 | const BYTE* const limit = bEnd - 32; 783 | U64 v1 = state->v1; 784 | U64 v2 = state->v2; 785 | U64 v3 = state->v3; 786 | U64 v4 = state->v4; 787 | 788 | do 789 | { 790 | v1 += XXH_readLE64(p, endian) * PRIME64_2; 791 | v1 = XXH_rotl64(v1, 31); 792 | v1 *= PRIME64_1; 793 | p+=8; 794 | v2 += XXH_readLE64(p, endian) * PRIME64_2; 795 | v2 = XXH_rotl64(v2, 31); 796 | v2 *= PRIME64_1; 797 | p+=8; 798 | v3 += XXH_readLE64(p, endian) * PRIME64_2; 799 | v3 = XXH_rotl64(v3, 31); 800 | v3 *= PRIME64_1; 801 | p+=8; 802 | v4 += XXH_readLE64(p, endian) * PRIME64_2; 803 | v4 = XXH_rotl64(v4, 31); 804 | v4 *= PRIME64_1; 805 | p+=8; 806 | } 807 | while (p<=limit); 808 | 809 | state->v1 = v1; 810 | state->v2 = v2; 811 | state->v3 = v3; 812 | state->v4 = v4; 813 | } 814 | 815 | if (p < bEnd) 816 | { 817 | XXH_memcpy(state->mem64, p, bEnd-p); 818 | state->memsize = (int)(bEnd-p); 819 | } 820 | 821 | return XXH_OK; 822 | } 823 | 824 | XXH_errorcode XXH64_update (XXH64_state_t* state_in, const void* input, size_t len) 825 | { 826 | XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; 827 | 828 | if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) 829 | return XXH64_update_endian(state_in, input, len, XXH_littleEndian); 830 | else 831 | return XXH64_update_endian(state_in, input, len, XXH_bigEndian); 832 | } 833 | 834 | 835 | 836 | FORCE_INLINE U64 XXH64_digest_endian (const XXH64_state_t* state_in, XXH_endianess endian) 837 | { 838 | XXH_istate64_t * state = (XXH_istate64_t *) state_in; 839 | const BYTE * p = (const BYTE*)state->mem64; 840 | BYTE* bEnd = (BYTE*)state->mem64 + state->memsize; 841 | U64 h64; 842 | 843 | if (state->total_len >= 32) 844 | { 845 | U64 v1 = state->v1; 846 | U64 v2 = state->v2; 847 | U64 v3 = state->v3; 848 | U64 v4 = state->v4; 849 | 850 | h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18); 851 | 852 | v1 *= PRIME64_2; 853 | v1 = XXH_rotl64(v1, 31); 854 | v1 *= PRIME64_1; 855 | h64 ^= v1; 856 | h64 = h64*PRIME64_1 + PRIME64_4; 857 | 858 | v2 *= PRIME64_2; 859 | v2 = XXH_rotl64(v2, 31); 860 | v2 *= PRIME64_1; 861 | h64 ^= v2; 862 | h64 = h64*PRIME64_1 + PRIME64_4; 863 | 864 | v3 *= PRIME64_2; 865 | v3 = XXH_rotl64(v3, 31); 866 | v3 *= PRIME64_1; 867 | h64 ^= v3; 868 | h64 = h64*PRIME64_1 + PRIME64_4; 869 | 870 | v4 *= PRIME64_2; 871 | v4 = XXH_rotl64(v4, 31); 872 | v4 *= PRIME64_1; 873 | h64 ^= v4; 874 | h64 = h64*PRIME64_1 + PRIME64_4; 875 | } 876 | else 877 | { 878 | h64 = state->seed + PRIME64_5; 879 | } 880 | 881 | h64 += (U64) state->total_len; 882 | 883 | while (p+8<=bEnd) 884 | { 885 | U64 k1 = XXH_readLE64(p, endian); 886 | k1 *= PRIME64_2; 887 | k1 = XXH_rotl64(k1,31); 888 | k1 *= PRIME64_1; 889 | h64 ^= k1; 890 | h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4; 891 | p+=8; 892 | } 893 | 894 | if (p+4<=bEnd) 895 | { 896 | h64 ^= (U64)(XXH_readLE32(p, endian)) * PRIME64_1; 897 | h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3; 898 | p+=4; 899 | } 900 | 901 | while (p> 33; 909 | h64 *= PRIME64_2; 910 | h64 ^= h64 >> 29; 911 | h64 *= PRIME64_3; 912 | h64 ^= h64 >> 32; 913 | 914 | return h64; 915 | } 916 | 917 | 918 | unsigned long long XXH64_digest (const XXH64_state_t* state_in) 919 | { 920 | XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; 921 | 922 | if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) 923 | return XXH64_digest_endian(state_in, XXH_littleEndian); 924 | else 925 | return XXH64_digest_endian(state_in, XXH_bigEndian); 926 | } 927 | 928 | 929 | -------------------------------------------------------------------------------- /xxhash.h: -------------------------------------------------------------------------------- 1 | /* 2 | xxHash - Extremely Fast Hash algorithm 3 | Header File 4 | Copyright (C) 2012-2014, Yann Collet. 5 | BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 6 | 7 | Redistribution and use in source and binary forms, with or without 8 | modification, are permitted provided that the following conditions are 9 | met: 10 | 11 | * Redistributions of source code must retain the above copyright 12 | notice, this list of conditions and the following disclaimer. 13 | * Redistributions in binary form must reproduce the above 14 | copyright notice, this list of conditions and the following disclaimer 15 | in the documentation and/or other materials provided with the 16 | distribution. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | 30 | You can contact the author at : 31 | - xxHash source repository : http://code.google.com/p/xxhash/ 32 | */ 33 | 34 | /* Notice extracted from xxHash homepage : 35 | 36 | xxHash is an extremely fast Hash algorithm, running at RAM speed limits. 37 | It also successfully passes all tests from the SMHasher suite. 38 | 39 | Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz) 40 | 41 | Name Speed Q.Score Author 42 | xxHash 5.4 GB/s 10 43 | CrapWow 3.2 GB/s 2 Andrew 44 | MumurHash 3a 2.7 GB/s 10 Austin Appleby 45 | SpookyHash 2.0 GB/s 10 Bob Jenkins 46 | SBox 1.4 GB/s 9 Bret Mulvey 47 | Lookup3 1.2 GB/s 9 Bob Jenkins 48 | SuperFastHash 1.2 GB/s 1 Paul Hsieh 49 | CityHash64 1.05 GB/s 10 Pike & Alakuijala 50 | FNV 0.55 GB/s 5 Fowler, Noll, Vo 51 | CRC32 0.43 GB/s 9 52 | MD5-32 0.33 GB/s 10 Ronald L. Rivest 53 | SHA1-32 0.28 GB/s 10 54 | 55 | Q.Score is a measure of quality of the hash function. 56 | It depends on successfully passing SMHasher test set. 57 | 10 is a perfect score. 58 | */ 59 | 60 | #pragma once 61 | 62 | #if defined (__cplusplus) 63 | extern "C" { 64 | #endif 65 | 66 | 67 | /***************************** 68 | Includes 69 | *****************************/ 70 | #include /* size_t */ 71 | 72 | 73 | /***************************** 74 | Type 75 | *****************************/ 76 | typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode; 77 | 78 | 79 | 80 | /***************************** 81 | Simple Hash Functions 82 | *****************************/ 83 | 84 | unsigned int XXH32 (const void* input, size_t length, unsigned seed); 85 | unsigned long long XXH64 (const void* input, size_t length, unsigned long long seed); 86 | 87 | /* 88 | XXH32() : 89 | Calculate the 32-bits hash of sequence "length" bytes stored at memory address "input". 90 | The memory between input & input+length must be valid (allocated and read-accessible). 91 | "seed" can be used to alter the result predictably. 92 | This function successfully passes all SMHasher tests. 93 | Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s 94 | XXH64() : 95 | Calculate the 64-bits hash of sequence of length "len" stored at memory address "input". 96 | */ 97 | 98 | 99 | 100 | /***************************** 101 | Advanced Hash Functions 102 | *****************************/ 103 | typedef struct { long long ll[ 6]; } XXH32_state_t; 104 | typedef struct { long long ll[11]; } XXH64_state_t; 105 | 106 | /* 107 | These structures allow static allocation of XXH states. 108 | States must then be initialized using XXHnn_reset() before first use. 109 | 110 | If you prefer dynamic allocation, please refer to functions below. 111 | */ 112 | 113 | XXH32_state_t* XXH32_createState(void); 114 | XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr); 115 | 116 | XXH64_state_t* XXH64_createState(void); 117 | XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr); 118 | 119 | /* 120 | These functions create and release memory for XXH state. 121 | States must then be initialized using XXHnn_reset() before first use. 122 | */ 123 | 124 | 125 | XXH_errorcode XXH32_reset (XXH32_state_t* statePtr, unsigned seed); 126 | XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length); 127 | unsigned int XXH32_digest (const XXH32_state_t* statePtr); 128 | 129 | XXH_errorcode XXH64_reset (XXH64_state_t* statePtr, unsigned long long seed); 130 | XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length); 131 | unsigned long long XXH64_digest (const XXH64_state_t* statePtr); 132 | 133 | /* 134 | These functions calculate the xxHash of an input provided in multiple smaller packets, 135 | as opposed to an input provided as a single block. 136 | 137 | XXH state space must first be allocated, using either static or dynamic method provided above. 138 | 139 | Start a new hash by initializing state with a seed, using XXHnn_reset(). 140 | 141 | Then, feed the hash state by calling XXHnn_update() as many times as necessary. 142 | Obviously, input must be valid, meaning allocated and read accessible. 143 | The function returns an error code, with 0 meaning OK, and any other value meaning there is an error. 144 | 145 | Finally, you can produce a hash anytime, by using XXHnn_digest(). 146 | This function returns the final nn-bits hash. 147 | You can nonetheless continue feeding the hash state with more input, 148 | and therefore get some new hashes, by calling again XXHnn_digest(). 149 | 150 | When you are done, don't forget to free XXH state space, using typically XXHnn_freeState(). 151 | */ 152 | 153 | 154 | #if defined (__cplusplus) 155 | } 156 | #endif 157 | --------------------------------------------------------------------------------